1 /* 2 * Copyright 2004-2009, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include "vnode_store.h" 8 9 #include <unistd.h> 10 #include <stdlib.h> 11 #include <string.h> 12 13 #include <KernelExport.h> 14 #include <fs_cache.h> 15 16 #include <condition_variable.h> 17 #include <file_cache.h> 18 #include <generic_syscall.h> 19 #include <low_resource_manager.h> 20 #include <thread.h> 21 #include <util/AutoLock.h> 22 #include <util/kernel_cpp.h> 23 #include <vfs.h> 24 #include <vm/vm.h> 25 #include <vm/vm_page.h> 26 #include <vm/VMCache.h> 27 28 #include "IORequest.h" 29 30 31 //#define TRACE_FILE_CACHE 32 #ifdef TRACE_FILE_CACHE 33 # define TRACE(x) dprintf x 34 #else 35 # define TRACE(x) ; 36 #endif 37 38 // maximum number of iovecs per request 39 #define MAX_IO_VECS 32 // 128 kB 40 41 #define BYPASS_IO_SIZE 65536 42 #define LAST_ACCESSES 3 43 44 struct file_cache_ref { 45 VMCache *cache; 46 struct vnode *vnode; 47 off_t last_access[LAST_ACCESSES]; 48 // TODO: it would probably be enough to only store the least 49 // significant 31 bits, and make this uint32 (one bit for 50 // write vs. read) 51 int32 last_access_index; 52 uint16 disabled_count; 53 54 inline void SetLastAccess(int32 index, off_t access, bool isWrite) 55 { 56 // we remember writes as negative offsets 57 last_access[index] = isWrite ? -access : access; 58 } 59 60 inline off_t LastAccess(int32 index, bool isWrite) const 61 { 62 return isWrite ? -last_access[index] : last_access[index]; 63 } 64 65 inline uint32 LastAccessPageOffset(int32 index, bool isWrite) 66 { 67 return LastAccess(index, isWrite) >> PAGE_SHIFT; 68 } 69 }; 70 71 class PrecacheIO : public AsyncIOCallback { 72 public: 73 PrecacheIO(file_cache_ref* ref, off_t offset, 74 generic_size_t size); 75 ~PrecacheIO(); 76 77 status_t Prepare(vm_page_reservation* reservation); 78 void ReadAsync(); 79 80 virtual void IOFinished(status_t status, 81 bool partialTransfer, 82 generic_size_t bytesTransferred); 83 84 private: 85 file_cache_ref* fRef; 86 VMCache* fCache; 87 vm_page** fPages; 88 size_t fPageCount; 89 ConditionVariable* fBusyConditions; 90 generic_io_vec* fVecs; 91 off_t fOffset; 92 uint32 fVecCount; 93 generic_size_t fSize; 94 #if DEBUG_PAGE_ACCESS 95 thread_id fAllocatingThread; 96 #endif 97 }; 98 99 typedef status_t (*cache_func)(file_cache_ref* ref, void* cookie, off_t offset, 100 int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, 101 vm_page_reservation* reservation, size_t reservePages); 102 103 static void add_to_iovec(generic_io_vec* vecs, uint32 &index, uint32 max, 104 generic_addr_t address, generic_size_t size); 105 106 107 static struct cache_module_info* sCacheModule; 108 109 110 static const uint32 kZeroVecCount = 32; 111 static const size_t kZeroVecSize = kZeroVecCount * B_PAGE_SIZE; 112 static phys_addr_t sZeroPage; 113 static generic_io_vec sZeroVecs[kZeroVecCount]; 114 115 116 // #pragma mark - 117 118 119 PrecacheIO::PrecacheIO(file_cache_ref* ref, off_t offset, generic_size_t size) 120 : 121 fRef(ref), 122 fCache(ref->cache), 123 fPages(NULL), 124 fVecs(NULL), 125 fOffset(offset), 126 fVecCount(0), 127 fSize(size) 128 { 129 fPageCount = (size + B_PAGE_SIZE - 1) / B_PAGE_SIZE; 130 fCache->AcquireRefLocked(); 131 fCache->AcquireStoreRef(); 132 } 133 134 135 PrecacheIO::~PrecacheIO() 136 { 137 delete[] fPages; 138 delete[] fVecs; 139 fCache->ReleaseStoreRef(); 140 fCache->ReleaseRefLocked(); 141 } 142 143 144 status_t 145 PrecacheIO::Prepare(vm_page_reservation* reservation) 146 { 147 if (fPageCount == 0) 148 return B_BAD_VALUE; 149 150 fPages = new(std::nothrow) vm_page*[fPageCount]; 151 if (fPages == NULL) 152 return B_NO_MEMORY; 153 154 fVecs = new(std::nothrow) generic_io_vec[fPageCount]; 155 if (fVecs == NULL) 156 return B_NO_MEMORY; 157 158 // allocate pages for the cache and mark them busy 159 uint32 i = 0; 160 for (generic_size_t pos = 0; pos < fSize; pos += B_PAGE_SIZE) { 161 vm_page* page = vm_page_allocate_page(reservation, 162 PAGE_STATE_CACHED | VM_PAGE_ALLOC_BUSY); 163 164 fCache->InsertPage(page, fOffset + pos); 165 166 add_to_iovec(fVecs, fVecCount, fPageCount, 167 page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE); 168 fPages[i++] = page; 169 } 170 171 #if DEBUG_PAGE_ACCESS 172 fAllocatingThread = find_thread(NULL); 173 #endif 174 175 return B_OK; 176 } 177 178 179 void 180 PrecacheIO::ReadAsync() 181 { 182 // This object is going to be deleted after the I/O request has been 183 // fulfilled 184 vfs_asynchronous_read_pages(fRef->vnode, NULL, fOffset, fVecs, fVecCount, 185 fSize, B_PHYSICAL_IO_REQUEST, this); 186 } 187 188 189 void 190 PrecacheIO::IOFinished(status_t status, bool partialTransfer, 191 generic_size_t bytesTransferred) 192 { 193 AutoLocker<VMCache> locker(fCache); 194 195 // Make successfully loaded pages accessible again (partially 196 // transferred pages are considered failed) 197 phys_size_t pagesTransferred 198 = (bytesTransferred + B_PAGE_SIZE - 1) / B_PAGE_SIZE; 199 200 if ((fOffset + (off_t)bytesTransferred) > fCache->virtual_end) 201 bytesTransferred = fCache->virtual_end - fOffset; 202 203 for (uint32 i = 0; i < pagesTransferred; i++) { 204 if (i == pagesTransferred - 1 205 && (bytesTransferred % B_PAGE_SIZE) != 0) { 206 // clear partial page 207 size_t bytesTouched = bytesTransferred % B_PAGE_SIZE; 208 vm_memset_physical( 209 ((phys_addr_t)fPages[i]->physical_page_number << PAGE_SHIFT) 210 + bytesTouched, 211 0, B_PAGE_SIZE - bytesTouched); 212 } 213 214 DEBUG_PAGE_ACCESS_TRANSFER(fPages[i], fAllocatingThread); 215 216 fCache->MarkPageUnbusy(fPages[i]); 217 218 DEBUG_PAGE_ACCESS_END(fPages[i]); 219 } 220 221 // Free pages after failed I/O 222 for (uint32 i = pagesTransferred; i < fPageCount; i++) { 223 DEBUG_PAGE_ACCESS_TRANSFER(fPages[i], fAllocatingThread); 224 fCache->NotifyPageEvents(fPages[i], PAGE_EVENT_NOT_BUSY); 225 fCache->RemovePage(fPages[i]); 226 vm_page_set_state(fPages[i], PAGE_STATE_FREE); 227 } 228 229 delete this; 230 } 231 232 233 // #pragma mark - 234 235 236 static void 237 add_to_iovec(generic_io_vec* vecs, uint32 &index, uint32 max, 238 generic_addr_t address, generic_size_t size) 239 { 240 if (index > 0 && vecs[index - 1].base + vecs[index - 1].length == address) { 241 // the iovec can be combined with the previous one 242 vecs[index - 1].length += size; 243 return; 244 } 245 246 if (index == max) 247 panic("no more space for iovecs!"); 248 249 // we need to start a new iovec 250 vecs[index].base = address; 251 vecs[index].length = size; 252 index++; 253 } 254 255 256 static inline bool 257 access_is_sequential(file_cache_ref* ref) 258 { 259 return ref->last_access[ref->last_access_index] != 0; 260 } 261 262 263 static inline void 264 push_access(file_cache_ref* ref, off_t offset, generic_size_t bytes, 265 bool isWrite) 266 { 267 TRACE(("%p: push %lld, %ld, %s\n", ref, offset, bytes, 268 isWrite ? "write" : "read")); 269 270 int32 index = ref->last_access_index; 271 int32 previous = index - 1; 272 if (previous < 0) 273 previous = LAST_ACCESSES - 1; 274 275 if (offset != ref->LastAccess(previous, isWrite)) 276 ref->last_access[previous] = 0; 277 278 ref->SetLastAccess(index, offset + bytes, isWrite); 279 280 if (++index >= LAST_ACCESSES) 281 index = 0; 282 ref->last_access_index = index; 283 } 284 285 286 static void 287 reserve_pages(file_cache_ref* ref, vm_page_reservation* reservation, 288 size_t reservePages, bool isWrite) 289 { 290 if (low_resource_state(B_KERNEL_RESOURCE_PAGES) != B_NO_LOW_RESOURCE) { 291 VMCache* cache = ref->cache; 292 cache->Lock(); 293 294 if (cache->consumers.IsEmpty() && cache->areas == NULL 295 && access_is_sequential(ref)) { 296 // we are not mapped, and we're accessed sequentially 297 298 if (isWrite) { 299 // Just write some pages back, and actually wait until they 300 // have been written back in order to relieve the page pressure 301 // a bit. 302 int32 index = ref->last_access_index; 303 int32 previous = index - 1; 304 if (previous < 0) 305 previous = LAST_ACCESSES - 1; 306 307 vm_page_write_modified_page_range(cache, 308 ref->LastAccessPageOffset(previous, true), 309 ref->LastAccessPageOffset(index, true)); 310 } else { 311 // free some pages from our cache 312 // TODO: start with oldest 313 uint32 left = reservePages; 314 vm_page* page; 315 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 316 (page = it.Next()) != NULL && left > 0;) { 317 if (page->State() == PAGE_STATE_CACHED && !page->busy) { 318 DEBUG_PAGE_ACCESS_START(page); 319 ASSERT(!page->IsMapped()); 320 ASSERT(!page->modified); 321 cache->RemovePage(page); 322 vm_page_set_state(page, PAGE_STATE_FREE); 323 left--; 324 } 325 } 326 } 327 } 328 cache->Unlock(); 329 } 330 331 vm_page_reserve_pages(reservation, reservePages, VM_PRIORITY_USER); 332 } 333 334 335 static inline status_t 336 read_pages_and_clear_partial(file_cache_ref* ref, void* cookie, off_t offset, 337 const generic_io_vec* vecs, size_t count, uint32 flags, 338 generic_size_t* _numBytes) 339 { 340 generic_size_t bytesUntouched = *_numBytes; 341 342 status_t status = vfs_read_pages(ref->vnode, cookie, offset, vecs, count, 343 flags, _numBytes); 344 345 generic_size_t bytesEnd = *_numBytes; 346 347 if (offset + (off_t)bytesEnd > ref->cache->virtual_end) 348 bytesEnd = ref->cache->virtual_end - offset; 349 350 if (status == B_OK && bytesEnd < bytesUntouched) { 351 // Clear out any leftovers that were not touched by the above read. 352 // We're doing this here so that not every file system/device has to 353 // implement this. 354 bytesUntouched -= bytesEnd; 355 356 for (int32 i = count; i-- > 0 && bytesUntouched != 0; ) { 357 generic_size_t length = min_c(bytesUntouched, vecs[i].length); 358 vm_memset_physical(vecs[i].base + vecs[i].length - length, 0, 359 length); 360 361 bytesUntouched -= length; 362 } 363 } 364 365 return status; 366 } 367 368 369 /*! Reads the requested amount of data into the cache, and allocates 370 pages needed to fulfill that request. This function is called by cache_io(). 371 It can only handle a certain amount of bytes, and the caller must make 372 sure that it matches that criterion. 373 The cache_ref lock must be held when calling this function; during 374 operation it will unlock the cache, though. 375 */ 376 static status_t 377 read_into_cache(file_cache_ref* ref, void* cookie, off_t offset, 378 int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, 379 vm_page_reservation* reservation, size_t reservePages) 380 { 381 TRACE(("read_into_cache(offset = %lld, pageOffset = %ld, buffer = %#lx, " 382 "bufferSize = %lu\n", offset, pageOffset, buffer, bufferSize)); 383 384 VMCache* cache = ref->cache; 385 386 // TODO: We're using way too much stack! Rather allocate a sufficiently 387 // large chunk on the heap. 388 generic_io_vec vecs[MAX_IO_VECS]; 389 uint32 vecCount = 0; 390 391 generic_size_t numBytes = PAGE_ALIGN(pageOffset + bufferSize); 392 vm_page* pages[MAX_IO_VECS]; 393 int32 pageIndex = 0; 394 395 // allocate pages for the cache and mark them busy 396 for (generic_size_t pos = 0; pos < numBytes; pos += B_PAGE_SIZE) { 397 vm_page* page = pages[pageIndex++] = vm_page_allocate_page( 398 reservation, PAGE_STATE_CACHED | VM_PAGE_ALLOC_BUSY); 399 400 cache->InsertPage(page, offset + pos); 401 402 add_to_iovec(vecs, vecCount, MAX_IO_VECS, 403 page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE); 404 // TODO: check if the array is large enough (currently panics)! 405 } 406 407 push_access(ref, offset, bufferSize, false); 408 cache->Unlock(); 409 vm_page_unreserve_pages(reservation); 410 411 // read file into reserved pages 412 status_t status = read_pages_and_clear_partial(ref, cookie, offset, vecs, 413 vecCount, B_PHYSICAL_IO_REQUEST, &numBytes); 414 if (status != B_OK) { 415 // reading failed, free allocated pages 416 417 dprintf("file_cache: read pages failed: %s\n", strerror(status)); 418 419 cache->Lock(); 420 421 for (int32 i = 0; i < pageIndex; i++) { 422 cache->NotifyPageEvents(pages[i], PAGE_EVENT_NOT_BUSY); 423 cache->RemovePage(pages[i]); 424 vm_page_set_state(pages[i], PAGE_STATE_FREE); 425 } 426 427 return status; 428 } 429 430 // copy the pages if needed and unmap them again 431 432 for (int32 i = 0; i < pageIndex; i++) { 433 if (useBuffer && bufferSize != 0) { 434 size_t bytes = min_c(bufferSize, (size_t)B_PAGE_SIZE - pageOffset); 435 436 vm_memcpy_from_physical((void*)buffer, 437 pages[i]->physical_page_number * B_PAGE_SIZE + pageOffset, 438 bytes, IS_USER_ADDRESS(buffer)); 439 440 buffer += bytes; 441 bufferSize -= bytes; 442 pageOffset = 0; 443 } 444 } 445 446 reserve_pages(ref, reservation, reservePages, false); 447 cache->Lock(); 448 449 // make the pages accessible in the cache 450 for (int32 i = pageIndex; i-- > 0;) { 451 DEBUG_PAGE_ACCESS_END(pages[i]); 452 453 cache->MarkPageUnbusy(pages[i]); 454 } 455 456 return B_OK; 457 } 458 459 460 static status_t 461 read_from_file(file_cache_ref* ref, void* cookie, off_t offset, 462 int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, 463 vm_page_reservation* reservation, size_t reservePages) 464 { 465 TRACE(("read_from_file(offset = %lld, pageOffset = %ld, buffer = %#lx, " 466 "bufferSize = %lu\n", offset, pageOffset, buffer, bufferSize)); 467 468 if (!useBuffer) 469 return B_OK; 470 471 generic_io_vec vec; 472 vec.base = buffer; 473 vec.length = bufferSize; 474 475 push_access(ref, offset, bufferSize, false); 476 ref->cache->Unlock(); 477 vm_page_unreserve_pages(reservation); 478 479 generic_size_t toRead = bufferSize; 480 status_t status = vfs_read_pages(ref->vnode, cookie, offset + pageOffset, 481 &vec, 1, 0, &toRead); 482 483 if (status == B_OK) 484 reserve_pages(ref, reservation, reservePages, false); 485 486 ref->cache->Lock(); 487 488 return status; 489 } 490 491 492 /*! Like read_into_cache() but writes data into the cache. 493 To preserve data consistency, it might also read pages into the cache, 494 though, if only a partial page gets written. 495 The same restrictions apply. 496 */ 497 static status_t 498 write_to_cache(file_cache_ref* ref, void* cookie, off_t offset, 499 int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, 500 vm_page_reservation* reservation, size_t reservePages) 501 { 502 // TODO: We're using way too much stack! Rather allocate a sufficiently 503 // large chunk on the heap. 504 generic_io_vec vecs[MAX_IO_VECS]; 505 uint32 vecCount = 0; 506 generic_size_t numBytes = PAGE_ALIGN(pageOffset + bufferSize); 507 vm_page* pages[MAX_IO_VECS]; 508 int32 pageIndex = 0; 509 status_t status = B_OK; 510 511 // ToDo: this should be settable somewhere 512 bool writeThrough = false; 513 514 // allocate pages for the cache and mark them busy 515 for (generic_size_t pos = 0; pos < numBytes; pos += B_PAGE_SIZE) { 516 // TODO: if space is becoming tight, and this cache is already grown 517 // big - shouldn't we better steal the pages directly in that case? 518 // (a working set like approach for the file cache) 519 // TODO: the pages we allocate here should have been reserved upfront 520 // in cache_io() 521 vm_page* page = pages[pageIndex++] = vm_page_allocate_page( 522 reservation, 523 (writeThrough ? PAGE_STATE_CACHED : PAGE_STATE_MODIFIED) 524 | VM_PAGE_ALLOC_BUSY); 525 526 page->modified = !writeThrough; 527 528 ref->cache->InsertPage(page, offset + pos); 529 530 add_to_iovec(vecs, vecCount, MAX_IO_VECS, 531 page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE); 532 } 533 534 push_access(ref, offset, bufferSize, true); 535 ref->cache->Unlock(); 536 vm_page_unreserve_pages(reservation); 537 538 // copy contents (and read in partially written pages first) 539 540 if (pageOffset != 0) { 541 // This is only a partial write, so we have to read the rest of the page 542 // from the file to have consistent data in the cache 543 generic_io_vec readVec = { vecs[0].base, B_PAGE_SIZE }; 544 generic_size_t bytesRead = B_PAGE_SIZE; 545 546 status = vfs_read_pages(ref->vnode, cookie, offset, &readVec, 1, 547 B_PHYSICAL_IO_REQUEST, &bytesRead); 548 // ToDo: handle errors for real! 549 if (status < B_OK) 550 panic("1. vfs_read_pages() failed: %s!\n", strerror(status)); 551 } 552 553 size_t lastPageOffset = (pageOffset + bufferSize) % B_PAGE_SIZE; 554 if (lastPageOffset != 0) { 555 // get the last page in the I/O vectors 556 generic_addr_t last = vecs[vecCount - 1].base 557 + vecs[vecCount - 1].length - B_PAGE_SIZE; 558 559 if ((off_t)(offset + pageOffset + bufferSize) == ref->cache->virtual_end) { 560 // the space in the page after this write action needs to be cleaned 561 vm_memset_physical(last + lastPageOffset, 0, 562 B_PAGE_SIZE - lastPageOffset); 563 } else { 564 // the end of this write does not happen on a page boundary, so we 565 // need to fetch the last page before we can update it 566 generic_io_vec readVec = { last, B_PAGE_SIZE }; 567 generic_size_t bytesRead = B_PAGE_SIZE; 568 569 status = vfs_read_pages(ref->vnode, cookie, 570 PAGE_ALIGN(offset + pageOffset + bufferSize) - B_PAGE_SIZE, 571 &readVec, 1, B_PHYSICAL_IO_REQUEST, &bytesRead); 572 // ToDo: handle errors for real! 573 if (status < B_OK) 574 panic("vfs_read_pages() failed: %s!\n", strerror(status)); 575 576 if (bytesRead < B_PAGE_SIZE) { 577 // the space beyond the file size needs to be cleaned 578 vm_memset_physical(last + bytesRead, 0, 579 B_PAGE_SIZE - bytesRead); 580 } 581 } 582 } 583 584 for (uint32 i = 0; i < vecCount; i++) { 585 generic_addr_t base = vecs[i].base; 586 generic_size_t bytes = min_c((generic_size_t)bufferSize, 587 generic_size_t(vecs[i].length - pageOffset)); 588 589 if (useBuffer) { 590 // copy data from user buffer 591 vm_memcpy_to_physical(base + pageOffset, (void*)buffer, bytes, 592 IS_USER_ADDRESS(buffer)); 593 } else { 594 // clear buffer instead 595 vm_memset_physical(base + pageOffset, 0, bytes); 596 } 597 598 bufferSize -= bytes; 599 if (bufferSize == 0) 600 break; 601 602 buffer += bytes; 603 pageOffset = 0; 604 } 605 606 if (writeThrough) { 607 // write cached pages back to the file if we were asked to do that 608 status_t status = vfs_write_pages(ref->vnode, cookie, offset, vecs, 609 vecCount, B_PHYSICAL_IO_REQUEST, &numBytes); 610 if (status < B_OK) { 611 // ToDo: remove allocated pages, ...? 612 panic("file_cache: remove allocated pages! write pages failed: %s\n", 613 strerror(status)); 614 } 615 } 616 617 if (status == B_OK) 618 reserve_pages(ref, reservation, reservePages, true); 619 620 ref->cache->Lock(); 621 622 // make the pages accessible in the cache 623 for (int32 i = pageIndex; i-- > 0;) { 624 ref->cache->MarkPageUnbusy(pages[i]); 625 626 DEBUG_PAGE_ACCESS_END(pages[i]); 627 } 628 629 return status; 630 } 631 632 633 static status_t 634 write_zeros_to_file(struct vnode* vnode, void* cookie, off_t offset, 635 size_t* _size) 636 { 637 size_t size = *_size; 638 status_t status = B_OK; 639 while (size > 0) { 640 generic_size_t length = min_c(size, kZeroVecSize); 641 generic_io_vec* vecs = sZeroVecs; 642 generic_io_vec vec; 643 size_t count = kZeroVecCount; 644 if (length != kZeroVecSize) { 645 if (length > B_PAGE_SIZE) { 646 length = ROUNDDOWN(length, B_PAGE_SIZE); 647 count = length / B_PAGE_SIZE; 648 } else { 649 vec.base = sZeroPage; 650 vec.length = length; 651 vecs = &vec; 652 count = 1; 653 } 654 } 655 656 status = vfs_write_pages(vnode, cookie, offset, 657 vecs, count, B_PHYSICAL_IO_REQUEST, &length); 658 if (status != B_OK || length == 0) 659 break; 660 661 offset += length; 662 size -= length; 663 } 664 665 *_size = *_size - size; 666 return status; 667 } 668 669 670 static status_t 671 write_to_file(file_cache_ref* ref, void* cookie, off_t offset, int32 pageOffset, 672 addr_t buffer, size_t bufferSize, bool useBuffer, 673 vm_page_reservation* reservation, size_t reservePages) 674 { 675 push_access(ref, offset, bufferSize, true); 676 ref->cache->Unlock(); 677 vm_page_unreserve_pages(reservation); 678 679 status_t status = B_OK; 680 681 if (!useBuffer) { 682 status = write_zeros_to_file(ref->vnode, cookie, offset + pageOffset, 683 &bufferSize); 684 } else { 685 generic_io_vec vec; 686 vec.base = buffer; 687 vec.length = bufferSize; 688 generic_size_t toWrite = bufferSize; 689 status = vfs_write_pages(ref->vnode, cookie, offset + pageOffset, 690 &vec, 1, 0, &toWrite); 691 } 692 693 if (status == B_OK) 694 reserve_pages(ref, reservation, reservePages, true); 695 696 ref->cache->Lock(); 697 698 return status; 699 } 700 701 702 static inline status_t 703 satisfy_cache_io(file_cache_ref* ref, void* cookie, cache_func function, 704 off_t offset, addr_t buffer, bool useBuffer, int32 &pageOffset, 705 size_t bytesLeft, size_t &reservePages, off_t &lastOffset, 706 addr_t &lastBuffer, int32 &lastPageOffset, size_t &lastLeft, 707 size_t &lastReservedPages, vm_page_reservation* reservation) 708 { 709 if (lastBuffer == buffer) 710 return B_OK; 711 712 size_t requestSize = buffer - lastBuffer; 713 reservePages = min_c(MAX_IO_VECS, (lastLeft - requestSize 714 + lastPageOffset + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 715 716 status_t status = function(ref, cookie, lastOffset, lastPageOffset, 717 lastBuffer, requestSize, useBuffer, reservation, reservePages); 718 if (status == B_OK) { 719 lastReservedPages = reservePages; 720 lastBuffer = buffer; 721 lastLeft = bytesLeft; 722 lastOffset = offset; 723 lastPageOffset = 0; 724 pageOffset = 0; 725 } 726 return status; 727 } 728 729 730 static status_t 731 cache_io(void* _cacheRef, void* cookie, off_t offset, addr_t buffer, 732 size_t* _size, bool doWrite) 733 { 734 if (_cacheRef == NULL) 735 panic("cache_io() called with NULL ref!\n"); 736 737 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 738 VMCache* cache = ref->cache; 739 bool useBuffer = buffer != 0; 740 741 TRACE(("cache_io(ref = %p, offset = %lld, buffer = %p, size = %lu, %s)\n", 742 ref, offset, (void*)buffer, *_size, doWrite ? "write" : "read")); 743 744 int32 pageOffset = offset & (B_PAGE_SIZE - 1); 745 size_t size = *_size; 746 offset -= pageOffset; 747 748 // "offset" and "lastOffset" are always aligned to B_PAGE_SIZE, 749 // the "last*" variables always point to the end of the last 750 // satisfied request part 751 752 const uint32 kMaxChunkSize = MAX_IO_VECS * B_PAGE_SIZE; 753 size_t bytesLeft = size, lastLeft = size; 754 int32 lastPageOffset = pageOffset; 755 addr_t lastBuffer = buffer; 756 off_t lastOffset = offset; 757 size_t lastReservedPages = min_c(MAX_IO_VECS, (pageOffset + bytesLeft 758 + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 759 size_t reservePages = 0; 760 size_t pagesProcessed = 0; 761 cache_func function = NULL; 762 763 vm_page_reservation reservation; 764 reserve_pages(ref, &reservation, lastReservedPages, doWrite); 765 766 AutoLocker<VMCache> locker(cache); 767 768 while (bytesLeft > 0) { 769 // Periodically reevaluate the low memory situation and select the 770 // read/write hook accordingly 771 if (pagesProcessed % 32 == 0) { 772 if (size >= BYPASS_IO_SIZE 773 && low_resource_state(B_KERNEL_RESOURCE_PAGES) 774 != B_NO_LOW_RESOURCE) { 775 // In low memory situations we bypass the cache beyond a 776 // certain I/O size. 777 function = doWrite ? write_to_file : read_from_file; 778 } else 779 function = doWrite ? write_to_cache : read_into_cache; 780 } 781 782 // check if this page is already in memory 783 vm_page* page = cache->LookupPage(offset); 784 if (page != NULL) { 785 // The page may be busy - since we need to unlock the cache sometime 786 // in the near future, we need to satisfy the request of the pages 787 // we didn't get yet (to make sure no one else interferes in the 788 // meantime). 789 status_t status = satisfy_cache_io(ref, cookie, function, offset, 790 buffer, useBuffer, pageOffset, bytesLeft, reservePages, 791 lastOffset, lastBuffer, lastPageOffset, lastLeft, 792 lastReservedPages, &reservation); 793 if (status != B_OK) 794 return status; 795 796 // Since satisfy_cache_io() unlocks the cache, we need to look up 797 // the page again. 798 page = cache->LookupPage(offset); 799 if (page != NULL && page->busy) { 800 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, true); 801 continue; 802 } 803 } 804 805 size_t bytesInPage = min_c(size_t(B_PAGE_SIZE - pageOffset), bytesLeft); 806 807 TRACE(("lookup page from offset %lld: %p, size = %lu, pageOffset " 808 "= %lu\n", offset, page, bytesLeft, pageOffset)); 809 810 if (page != NULL) { 811 if (doWrite || useBuffer) { 812 // Since the following user_mem{cpy,set}() might cause a page 813 // fault, which in turn might cause pages to be reserved, we 814 // need to unlock the cache temporarily to avoid a potential 815 // deadlock. To make sure that our page doesn't go away, we mark 816 // it busy for the time. 817 page->busy = true; 818 locker.Unlock(); 819 820 // copy the contents of the page already in memory 821 phys_addr_t pageAddress 822 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE 823 + pageOffset; 824 bool userBuffer = IS_USER_ADDRESS(buffer); 825 if (doWrite) { 826 if (useBuffer) { 827 vm_memcpy_to_physical(pageAddress, (void*)buffer, 828 bytesInPage, userBuffer); 829 } else { 830 vm_memset_physical(pageAddress, 0, bytesInPage); 831 } 832 } else if (useBuffer) { 833 vm_memcpy_from_physical((void*)buffer, pageAddress, 834 bytesInPage, userBuffer); 835 } 836 837 locker.Lock(); 838 839 if (doWrite) { 840 DEBUG_PAGE_ACCESS_START(page); 841 842 page->modified = true; 843 844 if (page->State() != PAGE_STATE_MODIFIED) 845 vm_page_set_state(page, PAGE_STATE_MODIFIED); 846 847 DEBUG_PAGE_ACCESS_END(page); 848 } 849 850 cache->MarkPageUnbusy(page); 851 } 852 853 // If it is cached only, requeue the page, so the respective queue 854 // roughly remains LRU first sorted. 855 if (page->State() == PAGE_STATE_CACHED 856 || page->State() == PAGE_STATE_MODIFIED) { 857 DEBUG_PAGE_ACCESS_START(page); 858 vm_page_requeue(page, true); 859 DEBUG_PAGE_ACCESS_END(page); 860 } 861 862 if (bytesLeft <= bytesInPage) { 863 // we've read the last page, so we're done! 864 locker.Unlock(); 865 vm_page_unreserve_pages(&reservation); 866 return B_OK; 867 } 868 869 // prepare a potential gap request 870 lastBuffer = buffer + bytesInPage; 871 lastLeft = bytesLeft - bytesInPage; 872 lastOffset = offset + B_PAGE_SIZE; 873 lastPageOffset = 0; 874 } 875 876 if (bytesLeft <= bytesInPage) 877 break; 878 879 buffer += bytesInPage; 880 bytesLeft -= bytesInPage; 881 pageOffset = 0; 882 offset += B_PAGE_SIZE; 883 pagesProcessed++; 884 885 if (buffer - lastBuffer + lastPageOffset >= kMaxChunkSize) { 886 status_t status = satisfy_cache_io(ref, cookie, function, offset, 887 buffer, useBuffer, pageOffset, bytesLeft, reservePages, 888 lastOffset, lastBuffer, lastPageOffset, lastLeft, 889 lastReservedPages, &reservation); 890 if (status != B_OK) 891 return status; 892 } 893 } 894 895 // fill the last remaining bytes of the request (either write or read) 896 897 return function(ref, cookie, lastOffset, lastPageOffset, lastBuffer, 898 lastLeft, useBuffer, &reservation, 0); 899 } 900 901 902 static status_t 903 file_cache_control(const char* subsystem, uint32 function, void* buffer, 904 size_t bufferSize) 905 { 906 switch (function) { 907 case CACHE_CLEAR: 908 // ToDo: clear the cache 909 dprintf("cache_control: clear cache!\n"); 910 return B_OK; 911 912 case CACHE_SET_MODULE: 913 { 914 cache_module_info* module = sCacheModule; 915 916 // unset previous module 917 918 if (sCacheModule != NULL) { 919 sCacheModule = NULL; 920 snooze(100000); // 0.1 secs 921 put_module(module->info.name); 922 } 923 924 // get new module, if any 925 926 if (buffer == NULL) 927 return B_OK; 928 929 char name[B_FILE_NAME_LENGTH]; 930 if (!IS_USER_ADDRESS(buffer) 931 || user_strlcpy(name, (char*)buffer, 932 B_FILE_NAME_LENGTH) < B_OK) 933 return B_BAD_ADDRESS; 934 935 if (strncmp(name, CACHE_MODULES_NAME, strlen(CACHE_MODULES_NAME))) 936 return B_BAD_VALUE; 937 938 dprintf("cache_control: set module %s!\n", name); 939 940 status_t status = get_module(name, (module_info**)&module); 941 if (status == B_OK) 942 sCacheModule = module; 943 944 return status; 945 } 946 } 947 948 return B_BAD_HANDLER; 949 } 950 951 952 // #pragma mark - private kernel API 953 954 955 extern "C" void 956 cache_prefetch_vnode(struct vnode* vnode, off_t offset, size_t size) 957 { 958 if (size == 0) 959 return; 960 961 VMCache* cache; 962 if (vfs_get_vnode_cache(vnode, &cache, false) != B_OK) 963 return; 964 if (cache->type != CACHE_TYPE_VNODE) { 965 cache->ReleaseRef(); 966 return; 967 } 968 969 file_cache_ref* ref = ((VMVnodeCache*)cache)->FileCacheRef(); 970 off_t fileSize = cache->virtual_end; 971 972 if ((off_t)(offset + size) > fileSize) 973 size = fileSize - offset; 974 975 // "offset" and "size" are always aligned to B_PAGE_SIZE, 976 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 977 size = ROUNDUP(size, B_PAGE_SIZE); 978 979 const size_t pagesCount = size / B_PAGE_SIZE; 980 981 // Don't do anything if we don't have the resources left, or the cache 982 // already contains more than 2/3 of its pages 983 if (offset >= fileSize || vm_page_num_unused_pages() < 2 * pagesCount 984 || (3 * cache->page_count) > (2 * fileSize / B_PAGE_SIZE)) { 985 cache->ReleaseRef(); 986 return; 987 } 988 989 size_t bytesToRead = 0; 990 off_t lastOffset = offset; 991 992 vm_page_reservation reservation; 993 vm_page_reserve_pages(&reservation, pagesCount, VM_PRIORITY_USER); 994 995 cache->Lock(); 996 997 while (true) { 998 // check if this page is already in memory 999 if (size > 0) { 1000 vm_page* page = cache->LookupPage(offset); 1001 1002 offset += B_PAGE_SIZE; 1003 size -= B_PAGE_SIZE; 1004 1005 if (page == NULL) { 1006 bytesToRead += B_PAGE_SIZE; 1007 continue; 1008 } 1009 } 1010 if (bytesToRead != 0) { 1011 // read the part before the current page (or the end of the request) 1012 PrecacheIO* io = new(std::nothrow) PrecacheIO(ref, lastOffset, 1013 bytesToRead); 1014 if (io == NULL || io->Prepare(&reservation) != B_OK) { 1015 delete io; 1016 break; 1017 } 1018 1019 // we must not have the cache locked during I/O 1020 cache->Unlock(); 1021 io->ReadAsync(); 1022 cache->Lock(); 1023 1024 bytesToRead = 0; 1025 } 1026 1027 if (size == 0) { 1028 // we have reached the end of the request 1029 break; 1030 } 1031 1032 lastOffset = offset; 1033 } 1034 1035 cache->ReleaseRefAndUnlock(); 1036 vm_page_unreserve_pages(&reservation); 1037 } 1038 1039 1040 extern "C" void 1041 cache_prefetch(dev_t mountID, ino_t vnodeID, off_t offset, size_t size) 1042 { 1043 // ToDo: schedule prefetch 1044 1045 TRACE(("cache_prefetch(vnode %ld:%lld)\n", mountID, vnodeID)); 1046 1047 // get the vnode for the object, this also grabs a ref to it 1048 struct vnode* vnode; 1049 if (vfs_get_vnode(mountID, vnodeID, true, &vnode) != B_OK) 1050 return; 1051 1052 cache_prefetch_vnode(vnode, offset, size); 1053 vfs_put_vnode(vnode); 1054 } 1055 1056 1057 extern "C" void 1058 cache_node_opened(struct vnode* vnode, VMCache* cache, 1059 dev_t mountID, ino_t parentID, ino_t vnodeID, const char* name) 1060 { 1061 if (sCacheModule == NULL || sCacheModule->node_opened == NULL) 1062 return; 1063 1064 off_t size = -1; 1065 if (cache != NULL && cache->type == CACHE_TYPE_VNODE) { 1066 file_cache_ref* ref = ((VMVnodeCache*)cache)->FileCacheRef(); 1067 if (ref != NULL) 1068 size = cache->virtual_end; 1069 } 1070 1071 sCacheModule->node_opened(vnode, mountID, parentID, vnodeID, name, 1072 size); 1073 } 1074 1075 1076 extern "C" void 1077 cache_node_closed(struct vnode* vnode, VMCache* cache, 1078 dev_t mountID, ino_t vnodeID) 1079 { 1080 if (sCacheModule == NULL || sCacheModule->node_closed == NULL) 1081 return; 1082 1083 int32 accessType = 0; 1084 if (cache != NULL && cache->type == CACHE_TYPE_VNODE) { 1085 // ToDo: set accessType 1086 } 1087 1088 sCacheModule->node_closed(vnode, mountID, vnodeID, accessType); 1089 } 1090 1091 1092 extern "C" void 1093 cache_node_launched(size_t argCount, char* const* args) 1094 { 1095 if (sCacheModule == NULL || sCacheModule->node_launched == NULL) 1096 return; 1097 1098 sCacheModule->node_launched(argCount, args); 1099 } 1100 1101 1102 extern "C" status_t 1103 file_cache_init_post_boot_device(void) 1104 { 1105 // ToDo: get cache module out of driver settings 1106 1107 if (get_module("file_cache/launch_speedup/v1", 1108 (module_info**)&sCacheModule) == B_OK) { 1109 dprintf("** opened launch speedup: %" B_PRId64 "\n", system_time()); 1110 } 1111 return B_OK; 1112 } 1113 1114 1115 extern "C" status_t 1116 file_cache_init(void) 1117 { 1118 // allocate a clean page we can use for writing zeroes 1119 vm_page_reservation reservation; 1120 vm_page_reserve_pages(&reservation, 1, VM_PRIORITY_SYSTEM); 1121 vm_page* page = vm_page_allocate_page(&reservation, 1122 PAGE_STATE_WIRED | VM_PAGE_ALLOC_CLEAR); 1123 vm_page_unreserve_pages(&reservation); 1124 1125 sZeroPage = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 1126 1127 for (uint32 i = 0; i < kZeroVecCount; i++) { 1128 sZeroVecs[i].base = sZeroPage; 1129 sZeroVecs[i].length = B_PAGE_SIZE; 1130 } 1131 1132 register_generic_syscall(CACHE_SYSCALLS, file_cache_control, 1, 0); 1133 return B_OK; 1134 } 1135 1136 1137 // #pragma mark - public FS API 1138 1139 1140 extern "C" void* 1141 file_cache_create(dev_t mountID, ino_t vnodeID, off_t size) 1142 { 1143 TRACE(("file_cache_create(mountID = %ld, vnodeID = %lld, size = %lld)\n", 1144 mountID, vnodeID, size)); 1145 1146 file_cache_ref* ref = new file_cache_ref; 1147 if (ref == NULL) 1148 return NULL; 1149 1150 memset(ref->last_access, 0, sizeof(ref->last_access)); 1151 ref->last_access_index = 0; 1152 ref->disabled_count = 0; 1153 1154 // TODO: delay VMCache creation until data is 1155 // requested/written for the first time? Listing lots of 1156 // files in Tracker (and elsewhere) could be slowed down. 1157 // Since the file_cache_ref itself doesn't have a lock, 1158 // we would need to "rent" one during construction, possibly 1159 // the vnode lock, maybe a dedicated one. 1160 // As there shouldn't be too much contention, we could also 1161 // use atomic_test_and_set(), and free the resources again 1162 // when that fails... 1163 1164 // Get the vnode for the object 1165 // (note, this does not grab a reference to the node) 1166 if (vfs_lookup_vnode(mountID, vnodeID, &ref->vnode) != B_OK) 1167 goto err1; 1168 1169 // Gets (usually creates) the cache for the node 1170 if (vfs_get_vnode_cache(ref->vnode, &ref->cache, true) != B_OK) 1171 goto err1; 1172 1173 ref->cache->virtual_end = size; 1174 ((VMVnodeCache*)ref->cache)->SetFileCacheRef(ref); 1175 return ref; 1176 1177 err1: 1178 delete ref; 1179 return NULL; 1180 } 1181 1182 1183 extern "C" void 1184 file_cache_delete(void* _cacheRef) 1185 { 1186 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1187 1188 if (ref == NULL) 1189 return; 1190 1191 TRACE(("file_cache_delete(ref = %p)\n", ref)); 1192 1193 ref->cache->ReleaseRef(); 1194 delete ref; 1195 } 1196 1197 1198 extern "C" void 1199 file_cache_enable(void* _cacheRef) 1200 { 1201 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1202 1203 AutoLocker<VMCache> _(ref->cache); 1204 1205 if (ref->disabled_count == 0) { 1206 panic("Unbalanced file_cache_enable()!"); 1207 return; 1208 } 1209 1210 ref->disabled_count--; 1211 } 1212 1213 1214 extern "C" status_t 1215 file_cache_disable(void* _cacheRef) 1216 { 1217 // TODO: This function only removes all pages from the cache and prevents 1218 // that the file cache functions add any new ones until re-enabled. The 1219 // VM (on page fault) can still add pages, if the file is mmap()ed. We 1220 // should mark the cache to prevent shared mappings of the file and fix 1221 // the page fault code to deal correctly with private mappings (i.e. only 1222 // insert pages in consumer caches). 1223 1224 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1225 1226 AutoLocker<VMCache> _(ref->cache); 1227 1228 // If already disabled, there's nothing to do for us. 1229 if (ref->disabled_count > 0) { 1230 ref->disabled_count++; 1231 return B_OK; 1232 } 1233 1234 // The file cache is not yet disabled. We need to evict all cached pages. 1235 status_t error = ref->cache->FlushAndRemoveAllPages(); 1236 if (error != B_OK) 1237 return error; 1238 1239 ref->disabled_count++; 1240 return B_OK; 1241 } 1242 1243 1244 extern "C" bool 1245 file_cache_is_enabled(void* _cacheRef) 1246 { 1247 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1248 AutoLocker<VMCache> _(ref->cache); 1249 1250 return ref->disabled_count == 0; 1251 } 1252 1253 1254 extern "C" status_t 1255 file_cache_set_size(void* _cacheRef, off_t newSize) 1256 { 1257 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1258 1259 TRACE(("file_cache_set_size(ref = %p, size = %lld)\n", ref, newSize)); 1260 1261 if (ref == NULL) 1262 return B_OK; 1263 1264 VMCache* cache = ref->cache; 1265 AutoLocker<VMCache> _(cache); 1266 1267 off_t oldSize = cache->virtual_end; 1268 status_t status = cache->Resize(newSize, VM_PRIORITY_USER); 1269 // Note, the priority doesn't really matter, since this cache doesn't 1270 // reserve any memory. 1271 if (status == B_OK && newSize < oldSize) { 1272 // We may have a new partial page at the end of the cache that must be 1273 // cleared. 1274 uint32 partialBytes = newSize % B_PAGE_SIZE; 1275 if (partialBytes != 0) { 1276 vm_page* page = cache->LookupPage(newSize - partialBytes); 1277 if (page != NULL) { 1278 vm_memset_physical(page->physical_page_number * B_PAGE_SIZE 1279 + partialBytes, 0, B_PAGE_SIZE - partialBytes); 1280 } 1281 } 1282 } 1283 1284 return status; 1285 } 1286 1287 1288 extern "C" status_t 1289 file_cache_sync(void* _cacheRef) 1290 { 1291 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1292 if (ref == NULL) 1293 return B_BAD_VALUE; 1294 1295 return ref->cache->WriteModified(); 1296 } 1297 1298 1299 extern "C" status_t 1300 file_cache_read(void* _cacheRef, void* cookie, off_t offset, void* buffer, 1301 size_t* _size) 1302 { 1303 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1304 1305 TRACE(("file_cache_read(ref = %p, offset = %lld, buffer = %p, size = %lu)\n", 1306 ref, offset, buffer, *_size)); 1307 1308 // Bounds checking. We do this here so it applies to uncached I/O. 1309 if (offset < 0) 1310 return B_BAD_VALUE; 1311 const off_t fileSize = ref->cache->virtual_end; 1312 if (offset >= fileSize || *_size == 0) { 1313 *_size = 0; 1314 return B_OK; 1315 } 1316 if ((off_t)(offset + *_size) > fileSize) 1317 *_size = fileSize - offset; 1318 1319 if (ref->disabled_count > 0) { 1320 // Caching is disabled -- read directly from the file. 1321 generic_io_vec vec; 1322 vec.base = (addr_t)buffer; 1323 generic_size_t size = vec.length = *_size; 1324 status_t error = vfs_read_pages(ref->vnode, cookie, offset, &vec, 1, 0, 1325 &size); 1326 *_size = size; 1327 return error; 1328 } 1329 1330 return cache_io(ref, cookie, offset, (addr_t)buffer, _size, false); 1331 } 1332 1333 1334 extern "C" status_t 1335 file_cache_write(void* _cacheRef, void* cookie, off_t offset, 1336 const void* buffer, size_t* _size) 1337 { 1338 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1339 1340 // We don't do bounds checking here, as we are relying on the 1341 // file system which called us to already have done that and made 1342 // adjustments as necessary, unlike in read(). 1343 1344 if (ref->disabled_count > 0) { 1345 // Caching is disabled -- write directly to the file. 1346 if (buffer != NULL) { 1347 generic_io_vec vec; 1348 vec.base = (addr_t)buffer; 1349 generic_size_t size = vec.length = *_size; 1350 1351 status_t error = vfs_write_pages(ref->vnode, cookie, offset, &vec, 1352 1, 0, &size); 1353 *_size = size; 1354 return error; 1355 } 1356 return write_zeros_to_file(ref->vnode, cookie, offset, _size); 1357 } 1358 1359 status_t status = cache_io(ref, cookie, offset, 1360 (addr_t)const_cast<void*>(buffer), _size, true); 1361 1362 TRACE(("file_cache_write(ref = %p, offset = %lld, buffer = %p, size = %lu)" 1363 " = %ld\n", ref, offset, buffer, *_size, status)); 1364 1365 return status; 1366 } 1367