1 /* 2 * Copyright 2004-2009, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include "vnode_store.h" 8 9 #include <unistd.h> 10 #include <stdlib.h> 11 #include <string.h> 12 13 #include <KernelExport.h> 14 #include <fs_cache.h> 15 16 #include <condition_variable.h> 17 #include <file_cache.h> 18 #include <generic_syscall.h> 19 #include <low_resource_manager.h> 20 #include <thread.h> 21 #include <util/AutoLock.h> 22 #include <util/kernel_cpp.h> 23 #include <vfs.h> 24 #include <vm.h> 25 #include <vm_page.h> 26 #include <vm_cache.h> 27 28 #include "IORequest.h" 29 30 31 //#define TRACE_FILE_CACHE 32 #ifdef TRACE_FILE_CACHE 33 # define TRACE(x) dprintf x 34 #else 35 # define TRACE(x) ; 36 #endif 37 38 // maximum number of iovecs per request 39 #define MAX_IO_VECS 32 // 128 kB 40 #define MAX_FILE_IO_VECS 32 41 42 #define BYPASS_IO_SIZE 65536 43 #define LAST_ACCESSES 3 44 45 struct file_cache_ref { 46 vm_cache *cache; 47 struct vnode *vnode; 48 off_t last_access[LAST_ACCESSES]; 49 // TODO: it would probably be enough to only store the least 50 // significant 31 bits, and make this uint32 (one bit for 51 // write vs. read) 52 int32 last_access_index; 53 uint16 disabled_count; 54 55 inline void SetLastAccess(int32 index, off_t access, bool isWrite) 56 { 57 // we remember writes as negative offsets 58 last_access[index] = isWrite ? -access : access; 59 } 60 61 inline off_t LastAccess(int32 index, bool isWrite) 62 { 63 return isWrite ? -last_access[index] : last_access[index]; 64 } 65 66 inline uint32 LastAccessPageOffset(int32 index, bool isWrite) 67 { 68 return LastAccess(index, isWrite) >> PAGE_SHIFT; 69 } 70 }; 71 72 class PrecacheIO : public AsyncIOCallback { 73 public: 74 PrecacheIO(file_cache_ref* ref, off_t offset, 75 size_t size); 76 ~PrecacheIO(); 77 78 status_t Prepare(); 79 void ReadAsync(); 80 81 virtual void IOFinished(status_t status, 82 bool partialTransfer, 83 size_t bytesTransferred); 84 85 private: 86 file_cache_ref* fRef; 87 VMCache* fCache; 88 vm_page** fPages; 89 size_t fPageCount; 90 ConditionVariable* fBusyConditions; 91 iovec* fVecs; 92 off_t fOffset; 93 uint32 fVecCount; 94 size_t fSize; 95 }; 96 97 typedef status_t (*cache_func)(file_cache_ref* ref, void* cookie, off_t offset, 98 int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, 99 size_t lastReservedPages, size_t reservePages); 100 101 static void add_to_iovec(iovec* vecs, uint32 &index, uint32 max, addr_t address, 102 size_t size); 103 104 105 static struct cache_module_info* sCacheModule; 106 static const uint8 kZeroBuffer[4096] = {}; 107 108 109 // #pragma mark - 110 111 112 PrecacheIO::PrecacheIO(file_cache_ref* ref, off_t offset, size_t size) 113 : 114 fRef(ref), 115 fCache(ref->cache), 116 fPages(NULL), 117 fBusyConditions(NULL), 118 fVecs(NULL), 119 fOffset(offset), 120 fVecCount(0), 121 fSize(size) 122 { 123 fPageCount = (size + B_PAGE_SIZE - 1) / B_PAGE_SIZE; 124 fCache->AcquireRefLocked(); 125 } 126 127 128 PrecacheIO::~PrecacheIO() 129 { 130 delete[] fPages; 131 delete[] fBusyConditions; 132 delete[] fVecs; 133 fCache->ReleaseRefLocked(); 134 } 135 136 137 status_t 138 PrecacheIO::Prepare() 139 { 140 if (fPageCount == 0) 141 return B_BAD_VALUE; 142 143 fPages = new(std::nothrow) vm_page*[fPageCount]; 144 if (fPages == NULL) 145 return B_NO_MEMORY; 146 147 fBusyConditions = new(std::nothrow) ConditionVariable[fPageCount]; 148 if (fBusyConditions == NULL) 149 return B_NO_MEMORY; 150 151 fVecs = new(std::nothrow) iovec[fPageCount]; 152 if (fVecs == NULL) 153 return B_NO_MEMORY; 154 155 // allocate pages for the cache and mark them busy 156 uint32 i = 0; 157 for (size_t pos = 0; pos < fSize; pos += B_PAGE_SIZE) { 158 vm_page* page = vm_page_allocate_page(PAGE_STATE_FREE, true); 159 if (page == NULL) 160 break; 161 162 fBusyConditions[i].Publish(page, "page"); 163 fCache->InsertPage(page, fOffset + pos); 164 165 add_to_iovec(fVecs, fVecCount, fPageCount, 166 page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE); 167 fPages[i++] = page; 168 } 169 170 if (i != fPageCount) { 171 // allocating pages failed 172 while (i-- > 0) { 173 fBusyConditions[i].Unpublish(); 174 fCache->RemovePage(fPages[i]); 175 vm_page_set_state(fPages[i], PAGE_STATE_FREE); 176 } 177 return B_NO_MEMORY; 178 } 179 180 return B_OK; 181 } 182 183 184 void 185 PrecacheIO::ReadAsync() 186 { 187 // This object is going to be deleted after the I/O request has been 188 // fulfilled 189 vfs_asynchronous_read_pages(fRef->vnode, NULL, fOffset, fVecs, fVecCount, 190 fSize, B_PHYSICAL_IO_REQUEST, this); 191 } 192 193 194 void 195 PrecacheIO::IOFinished(status_t status, bool partialTransfer, 196 size_t bytesTransferred) 197 { 198 AutoLocker<VMCache> locker(fCache); 199 200 // Make successfully loaded pages accessible again (partially 201 // transferred pages are considered failed) 202 size_t pagesTransferred 203 = (bytesTransferred + B_PAGE_SIZE - 1) / B_PAGE_SIZE; 204 205 if (fOffset + bytesTransferred > fCache->virtual_end) 206 bytesTransferred = fCache->virtual_end - fOffset; 207 208 for (uint32 i = 0; i < pagesTransferred; i++) { 209 if (i == pagesTransferred - 1 210 && (bytesTransferred % B_PAGE_SIZE) != 0) { 211 // clear partial page 212 size_t bytesTouched = bytesTransferred % B_PAGE_SIZE; 213 vm_memset_physical((fPages[i]->physical_page_number << PAGE_SHIFT) 214 + bytesTouched, 0, B_PAGE_SIZE - bytesTouched); 215 } 216 217 fPages[i]->state = PAGE_STATE_ACTIVE; 218 fBusyConditions[i].Unpublish(); 219 } 220 221 // Free pages after failed I/O 222 for (uint32 i = pagesTransferred; i < fPageCount; i++) { 223 fBusyConditions[i].Unpublish(); 224 fCache->RemovePage(fPages[i]); 225 vm_page_set_state(fPages[i], PAGE_STATE_FREE); 226 } 227 228 delete this; 229 } 230 231 232 // #pragma mark - 233 234 235 static void 236 add_to_iovec(iovec* vecs, uint32 &index, uint32 max, addr_t address, 237 size_t size) 238 { 239 if (index > 0 && (addr_t)vecs[index - 1].iov_base 240 + vecs[index - 1].iov_len == address) { 241 // the iovec can be combined with the previous one 242 vecs[index - 1].iov_len += size; 243 return; 244 } 245 246 if (index == max) 247 panic("no more space for iovecs!"); 248 249 // we need to start a new iovec 250 vecs[index].iov_base = (void*)address; 251 vecs[index].iov_len = size; 252 index++; 253 } 254 255 256 static inline bool 257 access_is_sequential(file_cache_ref* ref) 258 { 259 return ref->last_access[ref->last_access_index] != 0; 260 } 261 262 263 static inline void 264 push_access(file_cache_ref* ref, off_t offset, size_t bytes, bool isWrite) 265 { 266 TRACE(("%p: push %Ld, %ld, %s\n", ref, offset, bytes, 267 isWrite ? "write" : "read")); 268 269 int32 index = ref->last_access_index; 270 int32 previous = index - 1; 271 if (previous < 0) 272 previous = LAST_ACCESSES - 1; 273 274 if (offset != ref->LastAccess(previous, isWrite)) 275 ref->last_access[previous] = 0; 276 277 ref->SetLastAccess(index, offset + bytes, isWrite); 278 279 if (++index >= LAST_ACCESSES) 280 index = 0; 281 ref->last_access_index = index; 282 } 283 284 285 static void 286 reserve_pages(file_cache_ref* ref, size_t reservePages, bool isWrite) 287 { 288 if (low_resource_state(B_KERNEL_RESOURCE_PAGES) != B_NO_LOW_RESOURCE) { 289 vm_cache* cache = ref->cache; 290 cache->Lock(); 291 292 if (list_is_empty(&cache->consumers) && cache->areas == NULL 293 && access_is_sequential(ref)) { 294 // we are not mapped, and we're accessed sequentially 295 296 if (isWrite) { 297 // just schedule some pages to be written back 298 int32 index = ref->last_access_index; 299 int32 previous = index - 1; 300 if (previous < 0) 301 previous = LAST_ACCESSES - 1; 302 303 vm_page_schedule_write_page_range(cache, 304 ref->LastAccessPageOffset(previous, true), 305 ref->LastAccessPageOffset(index, true)); 306 } else { 307 // free some pages from our cache 308 // TODO: start with oldest 309 uint32 left = reservePages; 310 vm_page* page; 311 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 312 (page = it.Next()) != NULL && left > 0;) { 313 if (page->state != PAGE_STATE_MODIFIED 314 && page->state != PAGE_STATE_BUSY) { 315 cache->RemovePage(page); 316 vm_page_set_state(page, PAGE_STATE_FREE); 317 left--; 318 } 319 } 320 } 321 } 322 cache->Unlock(); 323 } 324 325 vm_page_reserve_pages(reservePages); 326 } 327 328 329 static inline status_t 330 read_pages_and_clear_partial(file_cache_ref* ref, void* cookie, off_t offset, 331 const iovec* vecs, size_t count, uint32 flags, size_t* _numBytes) 332 { 333 size_t bytesUntouched = *_numBytes; 334 335 status_t status = vfs_read_pages(ref->vnode, cookie, offset, vecs, count, 336 flags, _numBytes); 337 338 size_t bytesEnd = *_numBytes; 339 340 if (offset + bytesEnd > ref->cache->virtual_end) 341 bytesEnd = ref->cache->virtual_end - offset; 342 343 if (status == B_OK && bytesEnd < bytesUntouched) { 344 // Clear out any leftovers that were not touched by the above read. 345 // We're doing this here so that not every file system/device has to 346 // implement this. 347 bytesUntouched -= bytesEnd; 348 349 for (int32 i = count; i-- > 0 && bytesUntouched != 0; ) { 350 size_t length = min_c(bytesUntouched, vecs[i].iov_len); 351 vm_memset_physical((addr_t)vecs[i].iov_base + vecs[i].iov_len 352 - length, 0, length); 353 354 bytesUntouched -= length; 355 } 356 } 357 358 return status; 359 } 360 361 362 /*! Reads the requested amount of data into the cache, and allocates 363 pages needed to fulfill that request. This function is called by cache_io(). 364 It can only handle a certain amount of bytes, and the caller must make 365 sure that it matches that criterion. 366 The cache_ref lock must be hold when calling this function; during 367 operation it will unlock the cache, though. 368 */ 369 static status_t 370 read_into_cache(file_cache_ref* ref, void* cookie, off_t offset, 371 int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, 372 size_t lastReservedPages, size_t reservePages) 373 { 374 TRACE(("read_into_cache(offset = %Ld, pageOffset = %ld, buffer = %#lx, " 375 "bufferSize = %lu\n", offset, pageOffset, buffer, bufferSize)); 376 377 vm_cache* cache = ref->cache; 378 379 // TODO: We're using way too much stack! Rather allocate a sufficiently 380 // large chunk on the heap. 381 iovec vecs[MAX_IO_VECS]; 382 uint32 vecCount = 0; 383 384 size_t numBytes = PAGE_ALIGN(pageOffset + bufferSize); 385 vm_page* pages[MAX_IO_VECS]; 386 ConditionVariable busyConditions[MAX_IO_VECS]; 387 int32 pageIndex = 0; 388 389 // allocate pages for the cache and mark them busy 390 for (size_t pos = 0; pos < numBytes; pos += B_PAGE_SIZE) { 391 vm_page* page = pages[pageIndex++] = vm_page_allocate_page( 392 PAGE_STATE_FREE, true); 393 if (page == NULL) 394 panic("no more pages!"); 395 396 busyConditions[pageIndex - 1].Publish(page, "page"); 397 398 cache->InsertPage(page, offset + pos); 399 400 add_to_iovec(vecs, vecCount, MAX_IO_VECS, 401 page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE); 402 // TODO: check if the array is large enough (currently panics)! 403 } 404 405 push_access(ref, offset, bufferSize, false); 406 cache->Unlock(); 407 vm_page_unreserve_pages(lastReservedPages); 408 409 // read file into reserved pages 410 status_t status = read_pages_and_clear_partial(ref, cookie, offset, vecs, 411 vecCount, B_PHYSICAL_IO_REQUEST, &numBytes); 412 if (status != B_OK) { 413 // reading failed, free allocated pages 414 415 dprintf("file_cache: read pages failed: %s\n", strerror(status)); 416 417 cache->Lock(); 418 419 for (int32 i = 0; i < pageIndex; i++) { 420 busyConditions[i].Unpublish(); 421 cache->RemovePage(pages[i]); 422 vm_page_set_state(pages[i], PAGE_STATE_FREE); 423 } 424 425 return status; 426 } 427 428 // copy the pages if needed and unmap them again 429 430 for (int32 i = 0; i < pageIndex; i++) { 431 if (useBuffer && bufferSize != 0) { 432 size_t bytes = min_c(bufferSize, (size_t)B_PAGE_SIZE - pageOffset); 433 434 vm_memcpy_from_physical((void*)buffer, 435 pages[i]->physical_page_number * B_PAGE_SIZE + pageOffset, 436 bytes, true); 437 438 buffer += bytes; 439 bufferSize -= bytes; 440 pageOffset = 0; 441 } 442 } 443 444 reserve_pages(ref, reservePages, false); 445 cache->Lock(); 446 447 // make the pages accessible in the cache 448 for (int32 i = pageIndex; i-- > 0;) { 449 pages[i]->state = PAGE_STATE_ACTIVE; 450 451 busyConditions[i].Unpublish(); 452 } 453 454 return B_OK; 455 } 456 457 458 static status_t 459 read_from_file(file_cache_ref* ref, void* cookie, off_t offset, 460 int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, 461 size_t lastReservedPages, size_t reservePages) 462 { 463 TRACE(("read_from_file(offset = %Ld, pageOffset = %ld, buffer = %#lx, " 464 "bufferSize = %lu\n", offset, pageOffset, buffer, bufferSize)); 465 466 if (!useBuffer) 467 return B_OK; 468 469 iovec vec; 470 vec.iov_base = (void*)buffer; 471 vec.iov_len = bufferSize; 472 473 push_access(ref, offset, bufferSize, false); 474 ref->cache->Unlock(); 475 vm_page_unreserve_pages(lastReservedPages); 476 477 status_t status = vfs_read_pages(ref->vnode, cookie, offset + pageOffset, 478 &vec, 1, 0, &bufferSize); 479 480 if (status == B_OK) 481 reserve_pages(ref, reservePages, false); 482 483 ref->cache->Lock(); 484 485 return status; 486 } 487 488 489 /*! Like read_into_cache() but writes data into the cache. 490 To preserve data consistency, it might also read pages into the cache, 491 though, if only a partial page gets written. 492 The same restrictions apply. 493 */ 494 static status_t 495 write_to_cache(file_cache_ref* ref, void* cookie, off_t offset, 496 int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, 497 size_t lastReservedPages, size_t reservePages) 498 { 499 // TODO: We're using way too much stack! Rather allocate a sufficiently 500 // large chunk on the heap. 501 iovec vecs[MAX_IO_VECS]; 502 uint32 vecCount = 0; 503 size_t numBytes = PAGE_ALIGN(pageOffset + bufferSize); 504 vm_page* pages[MAX_IO_VECS]; 505 int32 pageIndex = 0; 506 status_t status = B_OK; 507 ConditionVariable busyConditions[MAX_IO_VECS]; 508 509 // ToDo: this should be settable somewhere 510 bool writeThrough = false; 511 512 // allocate pages for the cache and mark them busy 513 for (size_t pos = 0; pos < numBytes; pos += B_PAGE_SIZE) { 514 // TODO: if space is becoming tight, and this cache is already grown 515 // big - shouldn't we better steal the pages directly in that case? 516 // (a working set like approach for the file cache) 517 // TODO: the pages we allocate here should have been reserved upfront 518 // in cache_io() 519 vm_page* page = pages[pageIndex++] = vm_page_allocate_page( 520 PAGE_STATE_FREE, true); 521 busyConditions[pageIndex - 1].Publish(page, "page"); 522 523 ref->cache->InsertPage(page, offset + pos); 524 525 add_to_iovec(vecs, vecCount, MAX_IO_VECS, 526 page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE); 527 } 528 529 push_access(ref, offset, bufferSize, true); 530 ref->cache->Unlock(); 531 vm_page_unreserve_pages(lastReservedPages); 532 533 // copy contents (and read in partially written pages first) 534 535 if (pageOffset != 0) { 536 // This is only a partial write, so we have to read the rest of the page 537 // from the file to have consistent data in the cache 538 iovec readVec = { vecs[0].iov_base, B_PAGE_SIZE }; 539 size_t bytesRead = B_PAGE_SIZE; 540 541 status = vfs_read_pages(ref->vnode, cookie, offset, &readVec, 1, 542 B_PHYSICAL_IO_REQUEST, &bytesRead); 543 // ToDo: handle errors for real! 544 if (status < B_OK) 545 panic("1. vfs_read_pages() failed: %s!\n", strerror(status)); 546 } 547 548 addr_t lastPageOffset = (pageOffset + bufferSize) & (B_PAGE_SIZE - 1); 549 if (lastPageOffset != 0) { 550 // get the last page in the I/O vectors 551 addr_t last = (addr_t)vecs[vecCount - 1].iov_base 552 + vecs[vecCount - 1].iov_len - B_PAGE_SIZE; 553 554 if (offset + pageOffset + bufferSize == ref->cache->virtual_end) { 555 // the space in the page after this write action needs to be cleaned 556 vm_memset_physical(last + lastPageOffset, 0, 557 B_PAGE_SIZE - lastPageOffset); 558 } else { 559 // the end of this write does not happen on a page boundary, so we 560 // need to fetch the last page before we can update it 561 iovec readVec = { (void*)last, B_PAGE_SIZE }; 562 size_t bytesRead = B_PAGE_SIZE; 563 564 status = vfs_read_pages(ref->vnode, cookie, 565 PAGE_ALIGN(offset + pageOffset + bufferSize) - B_PAGE_SIZE, 566 &readVec, 1, B_PHYSICAL_IO_REQUEST, &bytesRead); 567 // ToDo: handle errors for real! 568 if (status < B_OK) 569 panic("vfs_read_pages() failed: %s!\n", strerror(status)); 570 571 if (bytesRead < B_PAGE_SIZE) { 572 // the space beyond the file size needs to be cleaned 573 vm_memset_physical(last + bytesRead, 0, 574 B_PAGE_SIZE - bytesRead); 575 } 576 } 577 } 578 579 for (uint32 i = 0; i < vecCount; i++) { 580 addr_t base = (addr_t)vecs[i].iov_base; 581 size_t bytes = min_c(bufferSize, 582 size_t(vecs[i].iov_len - pageOffset)); 583 584 if (useBuffer) { 585 // copy data from user buffer 586 vm_memcpy_to_physical(base + pageOffset, (void*)buffer, bytes, 587 true); 588 } else { 589 // clear buffer instead 590 vm_memset_physical(base + pageOffset, 0, bytes); 591 } 592 593 bufferSize -= bytes; 594 if (bufferSize == 0) 595 break; 596 597 buffer += bytes; 598 pageOffset = 0; 599 } 600 601 if (writeThrough) { 602 // write cached pages back to the file if we were asked to do that 603 status_t status = vfs_write_pages(ref->vnode, cookie, offset, vecs, 604 vecCount, B_PHYSICAL_IO_REQUEST, &numBytes); 605 if (status < B_OK) { 606 // ToDo: remove allocated pages, ...? 607 panic("file_cache: remove allocated pages! write pages failed: %s\n", 608 strerror(status)); 609 } 610 } 611 612 if (status == B_OK) 613 reserve_pages(ref, reservePages, true); 614 615 ref->cache->Lock(); 616 617 // make the pages accessible in the cache 618 for (int32 i = pageIndex; i-- > 0;) { 619 busyConditions[i].Unpublish(); 620 621 if (writeThrough) 622 pages[i]->state = PAGE_STATE_ACTIVE; 623 else 624 vm_page_set_state(pages[i], PAGE_STATE_MODIFIED); 625 } 626 627 return status; 628 } 629 630 631 static status_t 632 write_to_file(file_cache_ref* ref, void* cookie, off_t offset, int32 pageOffset, 633 addr_t buffer, size_t bufferSize, bool useBuffer, size_t lastReservedPages, 634 size_t reservePages) 635 { 636 size_t chunkSize = 0; 637 if (!useBuffer) { 638 // we need to allocate a zero buffer 639 // TODO: use smaller buffers if this fails 640 chunkSize = min_c(bufferSize, B_PAGE_SIZE); 641 buffer = (addr_t)malloc(chunkSize); 642 if (buffer == 0) 643 return B_NO_MEMORY; 644 645 memset((void*)buffer, 0, chunkSize); 646 } 647 648 iovec vec; 649 vec.iov_base = (void*)buffer; 650 vec.iov_len = bufferSize; 651 652 push_access(ref, offset, bufferSize, true); 653 ref->cache->Unlock(); 654 vm_page_unreserve_pages(lastReservedPages); 655 656 status_t status = B_OK; 657 658 if (!useBuffer) { 659 while (bufferSize > 0) { 660 if (bufferSize < chunkSize) 661 chunkSize = bufferSize; 662 663 status = vfs_write_pages(ref->vnode, cookie, offset + pageOffset, 664 &vec, 1, 0, &chunkSize); 665 if (status < B_OK) 666 break; 667 668 bufferSize -= chunkSize; 669 pageOffset += chunkSize; 670 } 671 672 free((void*)buffer); 673 } else { 674 status = vfs_write_pages(ref->vnode, cookie, offset + pageOffset, 675 &vec, 1, 0, &bufferSize); 676 } 677 678 if (status == B_OK) 679 reserve_pages(ref, reservePages, true); 680 681 ref->cache->Lock(); 682 683 return status; 684 } 685 686 687 static inline status_t 688 satisfy_cache_io(file_cache_ref* ref, void* cookie, cache_func function, 689 off_t offset, addr_t buffer, bool useBuffer, int32 &pageOffset, 690 size_t bytesLeft, size_t &reservePages, off_t &lastOffset, 691 addr_t &lastBuffer, int32 &lastPageOffset, size_t &lastLeft, 692 size_t &lastReservedPages) 693 { 694 if (lastBuffer == buffer) 695 return B_OK; 696 697 size_t requestSize = buffer - lastBuffer; 698 reservePages = min_c(MAX_IO_VECS, (lastLeft - requestSize 699 + lastPageOffset + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 700 701 status_t status = function(ref, cookie, lastOffset, lastPageOffset, 702 lastBuffer, requestSize, useBuffer, lastReservedPages, reservePages); 703 if (status == B_OK) { 704 lastReservedPages = reservePages; 705 lastBuffer = buffer; 706 lastLeft = bytesLeft; 707 lastOffset = offset; 708 lastPageOffset = 0; 709 pageOffset = 0; 710 } 711 return status; 712 } 713 714 715 static status_t 716 cache_io(void* _cacheRef, void* cookie, off_t offset, addr_t buffer, 717 size_t* _size, bool doWrite) 718 { 719 if (_cacheRef == NULL) 720 panic("cache_io() called with NULL ref!\n"); 721 722 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 723 vm_cache* cache = ref->cache; 724 off_t fileSize = cache->virtual_end; 725 bool useBuffer = buffer != 0; 726 727 TRACE(("cache_io(ref = %p, offset = %Ld, buffer = %p, size = %lu, %s)\n", 728 ref, offset, (void*)buffer, *_size, doWrite ? "write" : "read")); 729 730 // out of bounds access? 731 if (offset >= fileSize || offset < 0) { 732 *_size = 0; 733 return B_OK; 734 } 735 736 int32 pageOffset = offset & (B_PAGE_SIZE - 1); 737 size_t size = *_size; 738 offset -= pageOffset; 739 740 if (offset + pageOffset + size > fileSize) { 741 // adapt size to be within the file's offsets 742 size = fileSize - pageOffset - offset; 743 *_size = size; 744 } 745 if (size == 0) 746 return B_OK; 747 748 cache_func function; 749 if (doWrite) { 750 // in low memory situations, we bypass the cache beyond a 751 // certain I/O size 752 if (size >= BYPASS_IO_SIZE 753 && low_resource_state(B_KERNEL_RESOURCE_PAGES) 754 != B_NO_LOW_RESOURCE) { 755 function = write_to_file; 756 } else 757 function = write_to_cache; 758 } else { 759 if (size >= BYPASS_IO_SIZE 760 && low_resource_state(B_KERNEL_RESOURCE_PAGES) 761 != B_NO_LOW_RESOURCE) { 762 function = read_from_file; 763 } else 764 function = read_into_cache; 765 } 766 767 // "offset" and "lastOffset" are always aligned to B_PAGE_SIZE, 768 // the "last*" variables always point to the end of the last 769 // satisfied request part 770 771 const uint32 kMaxChunkSize = MAX_IO_VECS * B_PAGE_SIZE; 772 size_t bytesLeft = size, lastLeft = size; 773 int32 lastPageOffset = pageOffset; 774 addr_t lastBuffer = buffer; 775 off_t lastOffset = offset; 776 size_t lastReservedPages = min_c(MAX_IO_VECS, (pageOffset + bytesLeft 777 + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 778 size_t reservePages = 0; 779 780 reserve_pages(ref, lastReservedPages, doWrite); 781 AutoLocker<VMCache> locker(cache); 782 783 while (bytesLeft > 0) { 784 // check if this page is already in memory 785 vm_page* page = cache->LookupPage(offset); 786 if (page != NULL) { 787 // The page may be busy - since we need to unlock the cache sometime 788 // in the near future, we need to satisfy the request of the pages 789 // we didn't get yet (to make sure no one else interferes in the 790 // mean time). 791 status_t status = satisfy_cache_io(ref, cookie, function, offset, 792 buffer, useBuffer, pageOffset, bytesLeft, reservePages, 793 lastOffset, lastBuffer, lastPageOffset, lastLeft, 794 lastReservedPages); 795 if (status != B_OK) 796 return status; 797 798 if (page->state == PAGE_STATE_BUSY) { 799 ConditionVariableEntry entry; 800 entry.Add(page); 801 locker.Unlock(); 802 entry.Wait(); 803 locker.Lock(); 804 continue; 805 } 806 } 807 808 size_t bytesInPage = min_c(size_t(B_PAGE_SIZE - pageOffset), bytesLeft); 809 810 TRACE(("lookup page from offset %Ld: %p, size = %lu, pageOffset " 811 "= %lu\n", offset, page, bytesLeft, pageOffset)); 812 813 if (page != NULL) { 814 // Since we don't actually map pages as part of an area, we have 815 // to manually maintain their usage_count 816 page->usage_count = 2; 817 818 if (doWrite || useBuffer) { 819 // Since the following user_mem{cpy,set}() might cause a page 820 // fault, which in turn might cause pages to be reserved, we 821 // need to unlock the cache temporarily to avoid a potential 822 // deadlock. To make sure that our page doesn't go away, we mark 823 // it busy for the time. 824 uint8 oldPageState = page->state; 825 page->state = PAGE_STATE_BUSY; 826 locker.Unlock(); 827 828 // copy the contents of the page already in memory 829 addr_t pageAddress = page->physical_page_number * B_PAGE_SIZE 830 + pageOffset; 831 if (doWrite) { 832 if (useBuffer) { 833 vm_memcpy_to_physical(pageAddress, (void*)buffer, 834 bytesInPage, true); 835 } else { 836 vm_memset_physical(pageAddress, 0, bytesInPage); 837 } 838 } else if (useBuffer) { 839 vm_memcpy_from_physical((void*)buffer, pageAddress, 840 bytesInPage, true); 841 } 842 843 locker.Lock(); 844 845 page->state = oldPageState; 846 if (doWrite && page->state != PAGE_STATE_MODIFIED) 847 vm_page_set_state(page, PAGE_STATE_MODIFIED); 848 } 849 850 if (bytesLeft <= bytesInPage) { 851 // we've read the last page, so we're done! 852 locker.Unlock(); 853 vm_page_unreserve_pages(lastReservedPages); 854 return B_OK; 855 } 856 857 // prepare a potential gap request 858 lastBuffer = buffer + bytesInPage; 859 lastLeft = bytesLeft - bytesInPage; 860 lastOffset = offset + B_PAGE_SIZE; 861 lastPageOffset = 0; 862 } 863 864 if (bytesLeft <= bytesInPage) 865 break; 866 867 buffer += bytesInPage; 868 bytesLeft -= bytesInPage; 869 pageOffset = 0; 870 offset += B_PAGE_SIZE; 871 872 if (buffer - lastBuffer + lastPageOffset >= kMaxChunkSize) { 873 status_t status = satisfy_cache_io(ref, cookie, function, offset, 874 buffer, useBuffer, pageOffset, bytesLeft, reservePages, 875 lastOffset, lastBuffer, lastPageOffset, lastLeft, 876 lastReservedPages); 877 if (status != B_OK) 878 return status; 879 } 880 } 881 882 // fill the last remaining bytes of the request (either write or read) 883 884 return function(ref, cookie, lastOffset, lastPageOffset, lastBuffer, 885 lastLeft, useBuffer, lastReservedPages, 0); 886 } 887 888 889 static status_t 890 file_cache_control(const char* subsystem, uint32 function, void* buffer, 891 size_t bufferSize) 892 { 893 switch (function) { 894 case CACHE_CLEAR: 895 // ToDo: clear the cache 896 dprintf("cache_control: clear cache!\n"); 897 return B_OK; 898 899 case CACHE_SET_MODULE: 900 { 901 cache_module_info* module = sCacheModule; 902 903 // unset previous module 904 905 if (sCacheModule != NULL) { 906 sCacheModule = NULL; 907 snooze(100000); // 0.1 secs 908 put_module(module->info.name); 909 } 910 911 // get new module, if any 912 913 if (buffer == NULL) 914 return B_OK; 915 916 char name[B_FILE_NAME_LENGTH]; 917 if (!IS_USER_ADDRESS(buffer) 918 || user_strlcpy(name, (char*)buffer, 919 B_FILE_NAME_LENGTH) < B_OK) 920 return B_BAD_ADDRESS; 921 922 if (strncmp(name, CACHE_MODULES_NAME, strlen(CACHE_MODULES_NAME))) 923 return B_BAD_VALUE; 924 925 dprintf("cache_control: set module %s!\n", name); 926 927 status_t status = get_module(name, (module_info**)&module); 928 if (status == B_OK) 929 sCacheModule = module; 930 931 return status; 932 } 933 } 934 935 return B_BAD_HANDLER; 936 } 937 938 939 // #pragma mark - private kernel API 940 941 942 extern "C" void 943 cache_prefetch_vnode(struct vnode* vnode, off_t offset, size_t size) 944 { 945 if (size == 0) 946 return; 947 948 vm_cache* cache; 949 if (vfs_get_vnode_cache(vnode, &cache, false) != B_OK) 950 return; 951 952 file_cache_ref* ref = ((VMVnodeCache*)cache)->FileCacheRef(); 953 off_t fileSize = cache->virtual_end; 954 955 if (offset + size > fileSize) 956 size = fileSize - offset; 957 size_t reservePages = size / B_PAGE_SIZE; 958 959 // Don't do anything if we don't have the resources left, or the cache 960 // already contains more than 2/3 of its pages 961 if (offset >= fileSize || vm_page_num_unused_pages() < 2 * reservePages 962 || 3 * cache->page_count > 2 * fileSize / B_PAGE_SIZE) { 963 cache->ReleaseRef(); 964 return; 965 } 966 967 // "offset" and "size" are always aligned to B_PAGE_SIZE, 968 offset &= ~(B_PAGE_SIZE - 1); 969 size = ROUNDUP(size, B_PAGE_SIZE); 970 971 size_t bytesToRead = 0; 972 off_t lastOffset = offset; 973 974 vm_page_reserve_pages(reservePages); 975 976 cache->Lock(); 977 978 while (true) { 979 // check if this page is already in memory 980 if (size > 0) { 981 vm_page* page = cache->LookupPage(offset); 982 983 offset += B_PAGE_SIZE; 984 size -= B_PAGE_SIZE; 985 986 if (page == NULL) { 987 bytesToRead += B_PAGE_SIZE; 988 continue; 989 } 990 } 991 if (bytesToRead != 0) { 992 // read the part before the current page (or the end of the request) 993 PrecacheIO* io 994 = new(std::nothrow) PrecacheIO(ref, lastOffset, bytesToRead); 995 if (io == NULL || io->Prepare() != B_OK) { 996 delete io; 997 break; 998 } 999 1000 // we must not have the cache locked during I/O 1001 cache->Unlock(); 1002 io->ReadAsync(); 1003 cache->Lock(); 1004 1005 bytesToRead = 0; 1006 } 1007 1008 if (size == 0) { 1009 // we have reached the end of the request 1010 break; 1011 } 1012 1013 lastOffset = offset; 1014 } 1015 1016 cache->ReleaseRefAndUnlock(); 1017 vm_page_unreserve_pages(reservePages); 1018 } 1019 1020 1021 extern "C" void 1022 cache_prefetch(dev_t mountID, ino_t vnodeID, off_t offset, size_t size) 1023 { 1024 // ToDo: schedule prefetch 1025 1026 TRACE(("cache_prefetch(vnode %ld:%Ld)\n", mountID, vnodeID)); 1027 1028 // get the vnode for the object, this also grabs a ref to it 1029 struct vnode* vnode; 1030 if (vfs_get_vnode(mountID, vnodeID, true, &vnode) != B_OK) 1031 return; 1032 1033 cache_prefetch_vnode(vnode, offset, size); 1034 vfs_put_vnode(vnode); 1035 } 1036 1037 1038 extern "C" void 1039 cache_node_opened(struct vnode* vnode, int32 fdType, vm_cache* cache, 1040 dev_t mountID, ino_t parentID, ino_t vnodeID, const char* name) 1041 { 1042 if (sCacheModule == NULL || sCacheModule->node_opened == NULL) 1043 return; 1044 1045 off_t size = -1; 1046 if (cache != NULL) { 1047 file_cache_ref* ref = ((VMVnodeCache*)cache)->FileCacheRef(); 1048 if (ref != NULL) 1049 size = cache->virtual_end; 1050 } 1051 1052 sCacheModule->node_opened(vnode, fdType, mountID, parentID, vnodeID, name, 1053 size); 1054 } 1055 1056 1057 extern "C" void 1058 cache_node_closed(struct vnode* vnode, int32 fdType, vm_cache* cache, 1059 dev_t mountID, ino_t vnodeID) 1060 { 1061 if (sCacheModule == NULL || sCacheModule->node_closed == NULL) 1062 return; 1063 1064 int32 accessType = 0; 1065 if (cache != NULL) { 1066 // ToDo: set accessType 1067 } 1068 1069 sCacheModule->node_closed(vnode, fdType, mountID, vnodeID, accessType); 1070 } 1071 1072 1073 extern "C" void 1074 cache_node_launched(size_t argCount, char* const* args) 1075 { 1076 if (sCacheModule == NULL || sCacheModule->node_launched == NULL) 1077 return; 1078 1079 sCacheModule->node_launched(argCount, args); 1080 } 1081 1082 1083 extern "C" status_t 1084 file_cache_init_post_boot_device(void) 1085 { 1086 // ToDo: get cache module out of driver settings 1087 1088 if (get_module("file_cache/launch_speedup/v1", 1089 (module_info**)&sCacheModule) == B_OK) { 1090 dprintf("** opened launch speedup: %Ld\n", system_time()); 1091 } 1092 return B_OK; 1093 } 1094 1095 1096 extern "C" status_t 1097 file_cache_init(void) 1098 { 1099 register_generic_syscall(CACHE_SYSCALLS, file_cache_control, 1, 0); 1100 return B_OK; 1101 } 1102 1103 1104 // #pragma mark - public FS API 1105 1106 1107 extern "C" void* 1108 file_cache_create(dev_t mountID, ino_t vnodeID, off_t size) 1109 { 1110 TRACE(("file_cache_create(mountID = %ld, vnodeID = %Ld, size = %Ld)\n", 1111 mountID, vnodeID, size)); 1112 1113 file_cache_ref* ref = new file_cache_ref; 1114 if (ref == NULL) 1115 return NULL; 1116 1117 memset(ref->last_access, 0, sizeof(ref->last_access)); 1118 ref->last_access_index = 0; 1119 ref->disabled_count = 0; 1120 1121 // TODO: delay vm_cache creation until data is 1122 // requested/written for the first time? Listing lots of 1123 // files in Tracker (and elsewhere) could be slowed down. 1124 // Since the file_cache_ref itself doesn't have a lock, 1125 // we would need to "rent" one during construction, possibly 1126 // the vnode lock, maybe a dedicated one. 1127 // As there shouldn't be too much contention, we could also 1128 // use atomic_test_and_set(), and free the resources again 1129 // when that fails... 1130 1131 // Get the vnode for the object 1132 // (note, this does not grab a reference to the node) 1133 if (vfs_lookup_vnode(mountID, vnodeID, &ref->vnode) != B_OK) 1134 goto err1; 1135 1136 // Gets (usually creates) the cache for the node 1137 if (vfs_get_vnode_cache(ref->vnode, &ref->cache, true) != B_OK) 1138 goto err1; 1139 1140 ref->cache->virtual_end = size; 1141 ((VMVnodeCache*)ref->cache)->SetFileCacheRef(ref); 1142 return ref; 1143 1144 err1: 1145 delete ref; 1146 return NULL; 1147 } 1148 1149 1150 extern "C" void 1151 file_cache_delete(void* _cacheRef) 1152 { 1153 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1154 1155 if (ref == NULL) 1156 return; 1157 1158 TRACE(("file_cache_delete(ref = %p)\n", ref)); 1159 1160 ref->cache->ReleaseRef(); 1161 delete ref; 1162 } 1163 1164 1165 extern "C" void 1166 file_cache_enable(void* _cacheRef) 1167 { 1168 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1169 1170 AutoLocker<VMCache> _(ref->cache); 1171 1172 if (ref->disabled_count == 0) { 1173 panic("Unbalanced file_cache_enable()!"); 1174 return; 1175 } 1176 1177 ref->disabled_count--; 1178 } 1179 1180 1181 extern "C" status_t 1182 file_cache_disable(void* _cacheRef) 1183 { 1184 // TODO: This function only removes all pages from the cache and prevents 1185 // that the file cache functions add any new ones until re-enabled. The 1186 // VM (on page fault) can still add pages, if the file is mmap()ed. We 1187 // should mark the cache to prevent shared mappings of the file and fix 1188 // the page fault code to deal correctly with private mappings (i.e. only 1189 // insert pages in consumer caches). 1190 1191 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1192 1193 AutoLocker<VMCache> _(ref->cache); 1194 1195 // If already disabled, there's nothing to do for us. 1196 if (ref->disabled_count > 0) { 1197 ref->disabled_count++; 1198 return B_OK; 1199 } 1200 1201 // The file cache is not yet disabled. We need to evict all cached pages. 1202 status_t error = ref->cache->FlushAndRemoveAllPages(); 1203 if (error != B_OK) 1204 return error; 1205 1206 ref->disabled_count++; 1207 return B_OK; 1208 } 1209 1210 1211 extern "C" bool 1212 file_cache_is_enabled(void* _cacheRef) 1213 { 1214 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1215 AutoLocker<VMCache> _(ref->cache); 1216 1217 return ref->disabled_count == 0; 1218 } 1219 1220 1221 extern "C" status_t 1222 file_cache_set_size(void* _cacheRef, off_t newSize) 1223 { 1224 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1225 1226 TRACE(("file_cache_set_size(ref = %p, size = %Ld)\n", ref, newSize)); 1227 1228 if (ref == NULL) 1229 return B_OK; 1230 1231 VMCache* cache = ref->cache; 1232 AutoLocker<VMCache> _(cache); 1233 1234 off_t oldSize = cache->virtual_end; 1235 status_t status = cache->Resize(newSize); 1236 if (status == B_OK && newSize < oldSize) { 1237 // We may have a new partial page at the end of the cache that must be 1238 // cleared. 1239 uint32 partialBytes = newSize % B_PAGE_SIZE; 1240 if (partialBytes != 0) { 1241 vm_page* page = cache->LookupPage(newSize - partialBytes); 1242 if (page != NULL) { 1243 vm_memset_physical(page->physical_page_number * B_PAGE_SIZE 1244 + partialBytes, 0, B_PAGE_SIZE - partialBytes); 1245 } 1246 } 1247 } 1248 1249 return status; 1250 } 1251 1252 1253 extern "C" status_t 1254 file_cache_sync(void* _cacheRef) 1255 { 1256 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1257 if (ref == NULL) 1258 return B_BAD_VALUE; 1259 1260 return ref->cache->WriteModified(); 1261 } 1262 1263 1264 extern "C" status_t 1265 file_cache_read(void* _cacheRef, void* cookie, off_t offset, void* buffer, 1266 size_t* _size) 1267 { 1268 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1269 1270 TRACE(("file_cache_read(ref = %p, offset = %Ld, buffer = %p, size = %lu)\n", 1271 ref, offset, buffer, *_size)); 1272 1273 if (ref->disabled_count > 0) { 1274 // Caching is disabled -- read directly from the file. 1275 iovec vec; 1276 vec.iov_base = buffer; 1277 vec.iov_len = *_size; 1278 return vfs_read_pages(ref->vnode, cookie, offset, &vec, 1, 0, _size); 1279 } 1280 1281 return cache_io(ref, cookie, offset, (addr_t)buffer, _size, false); 1282 } 1283 1284 1285 extern "C" status_t 1286 file_cache_write(void* _cacheRef, void* cookie, off_t offset, 1287 const void* buffer, size_t* _size) 1288 { 1289 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1290 1291 if (ref->disabled_count > 0) { 1292 // Caching is disabled -- write directly to the file. 1293 1294 if (buffer != NULL) { 1295 iovec vec; 1296 vec.iov_base = (void*)buffer; 1297 vec.iov_len = *_size; 1298 return vfs_write_pages(ref->vnode, cookie, offset, &vec, 1, 0, 1299 _size); 1300 } 1301 1302 // NULL buffer -- use a dummy buffer to write zeroes 1303 // TODO: This is not particularly efficient! 1304 iovec vec; 1305 vec.iov_base = (void*)kZeroBuffer; 1306 vec.iov_len = sizeof(kZeroBuffer); 1307 size_t size = *_size; 1308 while (size > 0) { 1309 size_t toWrite = min_c(size, vec.iov_len); 1310 size_t written = toWrite; 1311 status_t error = vfs_write_pages(ref->vnode, cookie, offset, &vec, 1312 1, 0, &written); 1313 if (error != B_OK) 1314 return error; 1315 if (written == 0) 1316 break; 1317 1318 offset += written; 1319 size -= written; 1320 } 1321 1322 *_size -= size; 1323 return B_OK; 1324 } 1325 1326 status_t status = cache_io(ref, cookie, offset, 1327 (addr_t)const_cast<void*>(buffer), _size, true); 1328 1329 TRACE(("file_cache_write(ref = %p, offset = %Ld, buffer = %p, size = %lu)" 1330 " = %ld\n", ref, offset, buffer, *_size, status)); 1331 1332 return status; 1333 } 1334 1335