1 /* 2 * Copyright 2004-2009, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include "vnode_store.h" 8 9 #include <unistd.h> 10 #include <stdlib.h> 11 #include <string.h> 12 13 #include <KernelExport.h> 14 #include <fs_cache.h> 15 16 #include <condition_variable.h> 17 #include <file_cache.h> 18 #include <generic_syscall.h> 19 #include <low_resource_manager.h> 20 #include <thread.h> 21 #include <util/AutoLock.h> 22 #include <util/kernel_cpp.h> 23 #include <vfs.h> 24 #include <vm/vm.h> 25 #include <vm/vm_page.h> 26 #include <vm/VMCache.h> 27 28 #include "IORequest.h" 29 30 31 //#define TRACE_FILE_CACHE 32 #ifdef TRACE_FILE_CACHE 33 # define TRACE(x) dprintf x 34 #else 35 # define TRACE(x) ; 36 #endif 37 38 // maximum number of iovecs per request 39 #define MAX_IO_VECS 32 // 128 kB 40 #define MAX_FILE_IO_VECS 32 41 42 #define BYPASS_IO_SIZE 65536 43 #define LAST_ACCESSES 3 44 45 struct file_cache_ref { 46 VMCache *cache; 47 struct vnode *vnode; 48 off_t last_access[LAST_ACCESSES]; 49 // TODO: it would probably be enough to only store the least 50 // significant 31 bits, and make this uint32 (one bit for 51 // write vs. read) 52 int32 last_access_index; 53 uint16 disabled_count; 54 55 inline void SetLastAccess(int32 index, off_t access, bool isWrite) 56 { 57 // we remember writes as negative offsets 58 last_access[index] = isWrite ? -access : access; 59 } 60 61 inline off_t LastAccess(int32 index, bool isWrite) const 62 { 63 return isWrite ? -last_access[index] : last_access[index]; 64 } 65 66 inline uint32 LastAccessPageOffset(int32 index, bool isWrite) 67 { 68 return LastAccess(index, isWrite) >> PAGE_SHIFT; 69 } 70 }; 71 72 class PrecacheIO : public AsyncIOCallback { 73 public: 74 PrecacheIO(file_cache_ref* ref, off_t offset, 75 generic_size_t size); 76 ~PrecacheIO(); 77 78 status_t Prepare(vm_page_reservation* reservation); 79 void ReadAsync(); 80 81 virtual void IOFinished(status_t status, 82 bool partialTransfer, 83 generic_size_t bytesTransferred); 84 85 private: 86 file_cache_ref* fRef; 87 VMCache* fCache; 88 vm_page** fPages; 89 size_t fPageCount; 90 ConditionVariable* fBusyConditions; 91 generic_io_vec* fVecs; 92 off_t fOffset; 93 uint32 fVecCount; 94 generic_size_t fSize; 95 #if DEBUG_PAGE_ACCESS 96 thread_id fAllocatingThread; 97 #endif 98 }; 99 100 typedef status_t (*cache_func)(file_cache_ref* ref, void* cookie, off_t offset, 101 int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, 102 vm_page_reservation* reservation, size_t reservePages); 103 104 static void add_to_iovec(generic_io_vec* vecs, uint32 &index, uint32 max, 105 generic_addr_t address, generic_size_t size); 106 107 108 static struct cache_module_info* sCacheModule; 109 110 111 static const uint32 kZeroVecCount = 32; 112 static const size_t kZeroVecSize = kZeroVecCount * B_PAGE_SIZE; 113 static phys_addr_t sZeroPage; // physical address 114 static generic_io_vec sZeroVecs[kZeroVecCount]; 115 116 117 // #pragma mark - 118 119 120 PrecacheIO::PrecacheIO(file_cache_ref* ref, off_t offset, generic_size_t size) 121 : 122 fRef(ref), 123 fCache(ref->cache), 124 fPages(NULL), 125 fVecs(NULL), 126 fOffset(offset), 127 fVecCount(0), 128 fSize(size) 129 { 130 fPageCount = (size + B_PAGE_SIZE - 1) / B_PAGE_SIZE; 131 fCache->AcquireRefLocked(); 132 } 133 134 135 PrecacheIO::~PrecacheIO() 136 { 137 delete[] fPages; 138 delete[] fVecs; 139 fCache->ReleaseRefLocked(); 140 } 141 142 143 status_t 144 PrecacheIO::Prepare(vm_page_reservation* reservation) 145 { 146 if (fPageCount == 0) 147 return B_BAD_VALUE; 148 149 fPages = new(std::nothrow) vm_page*[fPageCount]; 150 if (fPages == NULL) 151 return B_NO_MEMORY; 152 153 fVecs = new(std::nothrow) generic_io_vec[fPageCount]; 154 if (fVecs == NULL) 155 return B_NO_MEMORY; 156 157 // allocate pages for the cache and mark them busy 158 uint32 i = 0; 159 for (generic_size_t pos = 0; pos < fSize; pos += B_PAGE_SIZE) { 160 vm_page* page = vm_page_allocate_page(reservation, 161 PAGE_STATE_CACHED | VM_PAGE_ALLOC_BUSY); 162 163 fCache->InsertPage(page, fOffset + pos); 164 165 add_to_iovec(fVecs, fVecCount, fPageCount, 166 page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE); 167 fPages[i++] = page; 168 } 169 170 #if DEBUG_PAGE_ACCESS 171 fAllocatingThread = find_thread(NULL); 172 #endif 173 174 return B_OK; 175 } 176 177 178 void 179 PrecacheIO::ReadAsync() 180 { 181 // This object is going to be deleted after the I/O request has been 182 // fulfilled 183 vfs_asynchronous_read_pages(fRef->vnode, NULL, fOffset, fVecs, fVecCount, 184 fSize, B_PHYSICAL_IO_REQUEST, this); 185 } 186 187 188 void 189 PrecacheIO::IOFinished(status_t status, bool partialTransfer, 190 generic_size_t bytesTransferred) 191 { 192 AutoLocker<VMCache> locker(fCache); 193 194 // Make successfully loaded pages accessible again (partially 195 // transferred pages are considered failed) 196 phys_size_t pagesTransferred 197 = (bytesTransferred + B_PAGE_SIZE - 1) / B_PAGE_SIZE; 198 199 if (fOffset + (off_t)bytesTransferred > fCache->virtual_end) 200 bytesTransferred = fCache->virtual_end - fOffset; 201 202 for (uint32 i = 0; i < pagesTransferred; i++) { 203 if (i == pagesTransferred - 1 204 && (bytesTransferred % B_PAGE_SIZE) != 0) { 205 // clear partial page 206 size_t bytesTouched = bytesTransferred % B_PAGE_SIZE; 207 vm_memset_physical( 208 ((phys_addr_t)fPages[i]->physical_page_number << PAGE_SHIFT) 209 + bytesTouched, 210 0, B_PAGE_SIZE - bytesTouched); 211 } 212 213 DEBUG_PAGE_ACCESS_TRANSFER(fPages[i], fAllocatingThread); 214 215 fCache->MarkPageUnbusy(fPages[i]); 216 217 DEBUG_PAGE_ACCESS_END(fPages[i]); 218 } 219 220 // Free pages after failed I/O 221 for (uint32 i = pagesTransferred; i < fPageCount; i++) { 222 DEBUG_PAGE_ACCESS_TRANSFER(fPages[i], fAllocatingThread); 223 fCache->NotifyPageEvents(fPages[i], PAGE_EVENT_NOT_BUSY); 224 fCache->RemovePage(fPages[i]); 225 vm_page_set_state(fPages[i], PAGE_STATE_FREE); 226 } 227 228 delete this; 229 } 230 231 232 // #pragma mark - 233 234 235 static void 236 add_to_iovec(generic_io_vec* vecs, uint32 &index, uint32 max, 237 generic_addr_t address, generic_size_t size) 238 { 239 if (index > 0 && vecs[index - 1].base + vecs[index - 1].length == address) { 240 // the iovec can be combined with the previous one 241 vecs[index - 1].length += size; 242 return; 243 } 244 245 if (index == max) 246 panic("no more space for iovecs!"); 247 248 // we need to start a new iovec 249 vecs[index].base = address; 250 vecs[index].length = size; 251 index++; 252 } 253 254 255 static inline bool 256 access_is_sequential(file_cache_ref* ref) 257 { 258 return ref->last_access[ref->last_access_index] != 0; 259 } 260 261 262 static inline void 263 push_access(file_cache_ref* ref, off_t offset, generic_size_t bytes, 264 bool isWrite) 265 { 266 TRACE(("%p: push %Ld, %ld, %s\n", ref, offset, bytes, 267 isWrite ? "write" : "read")); 268 269 int32 index = ref->last_access_index; 270 int32 previous = index - 1; 271 if (previous < 0) 272 previous = LAST_ACCESSES - 1; 273 274 if (offset != ref->LastAccess(previous, isWrite)) 275 ref->last_access[previous] = 0; 276 277 ref->SetLastAccess(index, offset + bytes, isWrite); 278 279 if (++index >= LAST_ACCESSES) 280 index = 0; 281 ref->last_access_index = index; 282 } 283 284 285 static void 286 reserve_pages(file_cache_ref* ref, vm_page_reservation* reservation, 287 size_t reservePages, bool isWrite) 288 { 289 if (low_resource_state(B_KERNEL_RESOURCE_PAGES) != B_NO_LOW_RESOURCE) { 290 VMCache* cache = ref->cache; 291 cache->Lock(); 292 293 if (cache->consumers.IsEmpty() && cache->areas == NULL 294 && access_is_sequential(ref)) { 295 // we are not mapped, and we're accessed sequentially 296 297 if (isWrite) { 298 // Just write some pages back, and actually wait until they 299 // have been written back in order to relieve the page pressure 300 // a bit. 301 int32 index = ref->last_access_index; 302 int32 previous = index - 1; 303 if (previous < 0) 304 previous = LAST_ACCESSES - 1; 305 306 vm_page_write_modified_page_range(cache, 307 ref->LastAccessPageOffset(previous, true), 308 ref->LastAccessPageOffset(index, true)); 309 } else { 310 // free some pages from our cache 311 // TODO: start with oldest 312 uint32 left = reservePages; 313 vm_page* page; 314 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 315 (page = it.Next()) != NULL && left > 0;) { 316 if (page->State() == PAGE_STATE_CACHED && !page->busy) { 317 DEBUG_PAGE_ACCESS_START(page); 318 ASSERT(!page->IsMapped()); 319 ASSERT(!page->modified); 320 cache->RemovePage(page); 321 vm_page_set_state(page, PAGE_STATE_FREE); 322 left--; 323 } 324 } 325 } 326 } 327 cache->Unlock(); 328 } 329 330 vm_page_reserve_pages(reservation, reservePages, VM_PRIORITY_USER); 331 } 332 333 334 static inline status_t 335 read_pages_and_clear_partial(file_cache_ref* ref, void* cookie, off_t offset, 336 const generic_io_vec* vecs, size_t count, uint32 flags, 337 generic_size_t* _numBytes) 338 { 339 generic_size_t bytesUntouched = *_numBytes; 340 341 status_t status = vfs_read_pages(ref->vnode, cookie, offset, vecs, count, 342 flags, _numBytes); 343 344 generic_size_t bytesEnd = *_numBytes; 345 346 if (offset + (off_t)bytesEnd > ref->cache->virtual_end) 347 bytesEnd = ref->cache->virtual_end - offset; 348 349 if (status == B_OK && bytesEnd < bytesUntouched) { 350 // Clear out any leftovers that were not touched by the above read. 351 // We're doing this here so that not every file system/device has to 352 // implement this. 353 bytesUntouched -= bytesEnd; 354 355 for (int32 i = count; i-- > 0 && bytesUntouched != 0; ) { 356 generic_size_t length = min_c(bytesUntouched, vecs[i].length); 357 vm_memset_physical(vecs[i].base + vecs[i].length - length, 0, 358 length); 359 360 bytesUntouched -= length; 361 } 362 } 363 364 return status; 365 } 366 367 368 /*! Reads the requested amount of data into the cache, and allocates 369 pages needed to fulfill that request. This function is called by cache_io(). 370 It can only handle a certain amount of bytes, and the caller must make 371 sure that it matches that criterion. 372 The cache_ref lock must be held when calling this function; during 373 operation it will unlock the cache, though. 374 */ 375 static status_t 376 read_into_cache(file_cache_ref* ref, void* cookie, off_t offset, 377 int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, 378 vm_page_reservation* reservation, size_t reservePages) 379 { 380 TRACE(("read_into_cache(offset = %Ld, pageOffset = %ld, buffer = %#lx, " 381 "bufferSize = %lu\n", offset, pageOffset, buffer, bufferSize)); 382 383 VMCache* cache = ref->cache; 384 385 // TODO: We're using way too much stack! Rather allocate a sufficiently 386 // large chunk on the heap. 387 generic_io_vec vecs[MAX_IO_VECS]; 388 uint32 vecCount = 0; 389 390 generic_size_t numBytes = PAGE_ALIGN(pageOffset + bufferSize); 391 vm_page* pages[MAX_IO_VECS]; 392 int32 pageIndex = 0; 393 394 // allocate pages for the cache and mark them busy 395 for (generic_size_t pos = 0; pos < numBytes; pos += B_PAGE_SIZE) { 396 vm_page* page = pages[pageIndex++] = vm_page_allocate_page( 397 reservation, PAGE_STATE_CACHED | VM_PAGE_ALLOC_BUSY); 398 399 cache->InsertPage(page, offset + pos); 400 401 add_to_iovec(vecs, vecCount, MAX_IO_VECS, 402 page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE); 403 // TODO: check if the array is large enough (currently panics)! 404 } 405 406 push_access(ref, offset, bufferSize, false); 407 cache->Unlock(); 408 vm_page_unreserve_pages(reservation); 409 410 // read file into reserved pages 411 status_t status = read_pages_and_clear_partial(ref, cookie, offset, vecs, 412 vecCount, B_PHYSICAL_IO_REQUEST, &numBytes); 413 if (status != B_OK) { 414 // reading failed, free allocated pages 415 416 dprintf("file_cache: read pages failed: %s\n", strerror(status)); 417 418 cache->Lock(); 419 420 for (int32 i = 0; i < pageIndex; i++) { 421 cache->NotifyPageEvents(pages[i], PAGE_EVENT_NOT_BUSY); 422 cache->RemovePage(pages[i]); 423 vm_page_set_state(pages[i], PAGE_STATE_FREE); 424 } 425 426 return status; 427 } 428 429 // copy the pages if needed and unmap them again 430 431 for (int32 i = 0; i < pageIndex; i++) { 432 if (useBuffer && bufferSize != 0) { 433 size_t bytes = min_c(bufferSize, (size_t)B_PAGE_SIZE - pageOffset); 434 435 vm_memcpy_from_physical((void*)buffer, 436 pages[i]->physical_page_number * B_PAGE_SIZE + pageOffset, 437 bytes, IS_USER_ADDRESS(buffer)); 438 439 buffer += bytes; 440 bufferSize -= bytes; 441 pageOffset = 0; 442 } 443 } 444 445 reserve_pages(ref, reservation, reservePages, false); 446 cache->Lock(); 447 448 // make the pages accessible in the cache 449 for (int32 i = pageIndex; i-- > 0;) { 450 DEBUG_PAGE_ACCESS_END(pages[i]); 451 452 cache->MarkPageUnbusy(pages[i]); 453 } 454 455 return B_OK; 456 } 457 458 459 static status_t 460 read_from_file(file_cache_ref* ref, void* cookie, off_t offset, 461 int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, 462 vm_page_reservation* reservation, size_t reservePages) 463 { 464 TRACE(("read_from_file(offset = %Ld, pageOffset = %ld, buffer = %#lx, " 465 "bufferSize = %lu\n", offset, pageOffset, buffer, bufferSize)); 466 467 if (!useBuffer) 468 return B_OK; 469 470 generic_io_vec vec; 471 vec.base = buffer; 472 vec.length = bufferSize; 473 474 push_access(ref, offset, bufferSize, false); 475 ref->cache->Unlock(); 476 vm_page_unreserve_pages(reservation); 477 478 generic_size_t toRead = bufferSize; 479 status_t status = vfs_read_pages(ref->vnode, cookie, offset + pageOffset, 480 &vec, 1, 0, &toRead); 481 482 if (status == B_OK) 483 reserve_pages(ref, reservation, reservePages, false); 484 485 ref->cache->Lock(); 486 487 return status; 488 } 489 490 491 /*! Like read_into_cache() but writes data into the cache. 492 To preserve data consistency, it might also read pages into the cache, 493 though, if only a partial page gets written. 494 The same restrictions apply. 495 */ 496 static status_t 497 write_to_cache(file_cache_ref* ref, void* cookie, off_t offset, 498 int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, 499 vm_page_reservation* reservation, size_t reservePages) 500 { 501 // TODO: We're using way too much stack! Rather allocate a sufficiently 502 // large chunk on the heap. 503 generic_io_vec vecs[MAX_IO_VECS]; 504 uint32 vecCount = 0; 505 generic_size_t numBytes = PAGE_ALIGN(pageOffset + bufferSize); 506 vm_page* pages[MAX_IO_VECS]; 507 int32 pageIndex = 0; 508 status_t status = B_OK; 509 510 // ToDo: this should be settable somewhere 511 bool writeThrough = false; 512 513 // allocate pages for the cache and mark them busy 514 for (generic_size_t pos = 0; pos < numBytes; pos += B_PAGE_SIZE) { 515 // TODO: if space is becoming tight, and this cache is already grown 516 // big - shouldn't we better steal the pages directly in that case? 517 // (a working set like approach for the file cache) 518 // TODO: the pages we allocate here should have been reserved upfront 519 // in cache_io() 520 vm_page* page = pages[pageIndex++] = vm_page_allocate_page( 521 reservation, 522 (writeThrough ? PAGE_STATE_CACHED : PAGE_STATE_MODIFIED) 523 | VM_PAGE_ALLOC_BUSY); 524 525 page->modified = !writeThrough; 526 527 ref->cache->InsertPage(page, offset + pos); 528 529 add_to_iovec(vecs, vecCount, MAX_IO_VECS, 530 page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE); 531 } 532 533 push_access(ref, offset, bufferSize, true); 534 ref->cache->Unlock(); 535 vm_page_unreserve_pages(reservation); 536 537 // copy contents (and read in partially written pages first) 538 539 if (pageOffset != 0) { 540 // This is only a partial write, so we have to read the rest of the page 541 // from the file to have consistent data in the cache 542 generic_io_vec readVec = { vecs[0].base, B_PAGE_SIZE }; 543 generic_size_t bytesRead = B_PAGE_SIZE; 544 545 status = vfs_read_pages(ref->vnode, cookie, offset, &readVec, 1, 546 B_PHYSICAL_IO_REQUEST, &bytesRead); 547 // ToDo: handle errors for real! 548 if (status < B_OK) 549 panic("1. vfs_read_pages() failed: %s!\n", strerror(status)); 550 } 551 552 size_t lastPageOffset = (pageOffset + bufferSize) % B_PAGE_SIZE; 553 if (lastPageOffset != 0) { 554 // get the last page in the I/O vectors 555 generic_addr_t last = vecs[vecCount - 1].base 556 + vecs[vecCount - 1].length - B_PAGE_SIZE; 557 558 if ((off_t)(offset + pageOffset + bufferSize) == ref->cache->virtual_end) { 559 // the space in the page after this write action needs to be cleaned 560 vm_memset_physical(last + lastPageOffset, 0, 561 B_PAGE_SIZE - lastPageOffset); 562 } else { 563 // the end of this write does not happen on a page boundary, so we 564 // need to fetch the last page before we can update it 565 generic_io_vec readVec = { last, B_PAGE_SIZE }; 566 generic_size_t bytesRead = B_PAGE_SIZE; 567 568 status = vfs_read_pages(ref->vnode, cookie, 569 PAGE_ALIGN(offset + pageOffset + bufferSize) - B_PAGE_SIZE, 570 &readVec, 1, B_PHYSICAL_IO_REQUEST, &bytesRead); 571 // ToDo: handle errors for real! 572 if (status < B_OK) 573 panic("vfs_read_pages() failed: %s!\n", strerror(status)); 574 575 if (bytesRead < B_PAGE_SIZE) { 576 // the space beyond the file size needs to be cleaned 577 vm_memset_physical(last + bytesRead, 0, 578 B_PAGE_SIZE - bytesRead); 579 } 580 } 581 } 582 583 for (uint32 i = 0; i < vecCount; i++) { 584 generic_addr_t base = vecs[i].base; 585 generic_size_t bytes = min_c((generic_size_t)bufferSize, 586 generic_size_t(vecs[i].length - pageOffset)); 587 588 if (useBuffer) { 589 // copy data from user buffer 590 vm_memcpy_to_physical(base + pageOffset, (void*)buffer, bytes, 591 IS_USER_ADDRESS(buffer)); 592 } else { 593 // clear buffer instead 594 vm_memset_physical(base + pageOffset, 0, bytes); 595 } 596 597 bufferSize -= bytes; 598 if (bufferSize == 0) 599 break; 600 601 buffer += bytes; 602 pageOffset = 0; 603 } 604 605 if (writeThrough) { 606 // write cached pages back to the file if we were asked to do that 607 status_t status = vfs_write_pages(ref->vnode, cookie, offset, vecs, 608 vecCount, B_PHYSICAL_IO_REQUEST, &numBytes); 609 if (status < B_OK) { 610 // ToDo: remove allocated pages, ...? 611 panic("file_cache: remove allocated pages! write pages failed: %s\n", 612 strerror(status)); 613 } 614 } 615 616 if (status == B_OK) 617 reserve_pages(ref, reservation, reservePages, true); 618 619 ref->cache->Lock(); 620 621 // make the pages accessible in the cache 622 for (int32 i = pageIndex; i-- > 0;) { 623 ref->cache->MarkPageUnbusy(pages[i]); 624 625 DEBUG_PAGE_ACCESS_END(pages[i]); 626 } 627 628 return status; 629 } 630 631 632 static status_t 633 write_to_file(file_cache_ref* ref, void* cookie, off_t offset, int32 pageOffset, 634 addr_t buffer, size_t bufferSize, bool useBuffer, 635 vm_page_reservation* reservation, size_t reservePages) 636 { 637 push_access(ref, offset, bufferSize, true); 638 ref->cache->Unlock(); 639 vm_page_unreserve_pages(reservation); 640 641 status_t status = B_OK; 642 643 if (!useBuffer) { 644 while (bufferSize > 0) { 645 generic_size_t written = min_c(bufferSize, kZeroVecSize); 646 status = vfs_write_pages(ref->vnode, cookie, offset + pageOffset, 647 sZeroVecs, kZeroVecCount, B_PHYSICAL_IO_REQUEST, &written); 648 if (status != B_OK) 649 return status; 650 if (written == 0) 651 return B_ERROR; 652 653 bufferSize -= written; 654 pageOffset += written; 655 } 656 } else { 657 generic_io_vec vec; 658 vec.base = buffer; 659 vec.length = bufferSize; 660 generic_size_t toWrite = bufferSize; 661 status = vfs_write_pages(ref->vnode, cookie, offset + pageOffset, 662 &vec, 1, 0, &toWrite); 663 } 664 665 if (status == B_OK) 666 reserve_pages(ref, reservation, reservePages, true); 667 668 ref->cache->Lock(); 669 670 return status; 671 } 672 673 674 static inline status_t 675 satisfy_cache_io(file_cache_ref* ref, void* cookie, cache_func function, 676 off_t offset, addr_t buffer, bool useBuffer, int32 &pageOffset, 677 size_t bytesLeft, size_t &reservePages, off_t &lastOffset, 678 addr_t &lastBuffer, int32 &lastPageOffset, size_t &lastLeft, 679 size_t &lastReservedPages, vm_page_reservation* reservation) 680 { 681 if (lastBuffer == buffer) 682 return B_OK; 683 684 size_t requestSize = buffer - lastBuffer; 685 reservePages = min_c(MAX_IO_VECS, (lastLeft - requestSize 686 + lastPageOffset + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 687 688 status_t status = function(ref, cookie, lastOffset, lastPageOffset, 689 lastBuffer, requestSize, useBuffer, reservation, reservePages); 690 if (status == B_OK) { 691 lastReservedPages = reservePages; 692 lastBuffer = buffer; 693 lastLeft = bytesLeft; 694 lastOffset = offset; 695 lastPageOffset = 0; 696 pageOffset = 0; 697 } 698 return status; 699 } 700 701 702 static status_t 703 cache_io(void* _cacheRef, void* cookie, off_t offset, addr_t buffer, 704 size_t* _size, bool doWrite) 705 { 706 if (_cacheRef == NULL) 707 panic("cache_io() called with NULL ref!\n"); 708 709 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 710 VMCache* cache = ref->cache; 711 off_t fileSize = cache->virtual_end; 712 bool useBuffer = buffer != 0; 713 714 TRACE(("cache_io(ref = %p, offset = %Ld, buffer = %p, size = %lu, %s)\n", 715 ref, offset, (void*)buffer, *_size, doWrite ? "write" : "read")); 716 717 // out of bounds access? 718 if (offset >= fileSize || offset < 0) { 719 *_size = 0; 720 return B_OK; 721 } 722 723 int32 pageOffset = offset & (B_PAGE_SIZE - 1); 724 size_t size = *_size; 725 offset -= pageOffset; 726 727 if ((off_t)(offset + pageOffset + size) > fileSize) { 728 // adapt size to be within the file's offsets 729 size = fileSize - pageOffset - offset; 730 *_size = size; 731 } 732 if (size == 0) 733 return B_OK; 734 735 // "offset" and "lastOffset" are always aligned to B_PAGE_SIZE, 736 // the "last*" variables always point to the end of the last 737 // satisfied request part 738 739 const uint32 kMaxChunkSize = MAX_IO_VECS * B_PAGE_SIZE; 740 size_t bytesLeft = size, lastLeft = size; 741 int32 lastPageOffset = pageOffset; 742 addr_t lastBuffer = buffer; 743 off_t lastOffset = offset; 744 size_t lastReservedPages = min_c(MAX_IO_VECS, (pageOffset + bytesLeft 745 + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 746 size_t reservePages = 0; 747 size_t pagesProcessed = 0; 748 cache_func function = NULL; 749 750 vm_page_reservation reservation; 751 reserve_pages(ref, &reservation, lastReservedPages, doWrite); 752 753 AutoLocker<VMCache> locker(cache); 754 755 while (bytesLeft > 0) { 756 // Periodically reevaluate the low memory situation and select the 757 // read/write hook accordingly 758 if (pagesProcessed % 32 == 0) { 759 if (size >= BYPASS_IO_SIZE 760 && low_resource_state(B_KERNEL_RESOURCE_PAGES) 761 != B_NO_LOW_RESOURCE) { 762 // In low memory situations we bypass the cache beyond a 763 // certain I/O size. 764 function = doWrite ? write_to_file : read_from_file; 765 } else 766 function = doWrite ? write_to_cache : read_into_cache; 767 } 768 769 // check if this page is already in memory 770 vm_page* page = cache->LookupPage(offset); 771 if (page != NULL) { 772 // The page may be busy - since we need to unlock the cache sometime 773 // in the near future, we need to satisfy the request of the pages 774 // we didn't get yet (to make sure no one else interferes in the 775 // meantime). 776 status_t status = satisfy_cache_io(ref, cookie, function, offset, 777 buffer, useBuffer, pageOffset, bytesLeft, reservePages, 778 lastOffset, lastBuffer, lastPageOffset, lastLeft, 779 lastReservedPages, &reservation); 780 if (status != B_OK) 781 return status; 782 783 // Since satisfy_cache_io() unlocks the cache, we need to look up 784 // the page again. 785 page = cache->LookupPage(offset); 786 if (page != NULL && page->busy) { 787 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, true); 788 continue; 789 } 790 } 791 792 size_t bytesInPage = min_c(size_t(B_PAGE_SIZE - pageOffset), bytesLeft); 793 794 TRACE(("lookup page from offset %Ld: %p, size = %lu, pageOffset " 795 "= %lu\n", offset, page, bytesLeft, pageOffset)); 796 797 if (page != NULL) { 798 if (doWrite || useBuffer) { 799 // Since the following user_mem{cpy,set}() might cause a page 800 // fault, which in turn might cause pages to be reserved, we 801 // need to unlock the cache temporarily to avoid a potential 802 // deadlock. To make sure that our page doesn't go away, we mark 803 // it busy for the time. 804 page->busy = true; 805 locker.Unlock(); 806 807 // copy the contents of the page already in memory 808 phys_addr_t pageAddress 809 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE 810 + pageOffset; 811 bool userBuffer = IS_USER_ADDRESS(buffer); 812 if (doWrite) { 813 if (useBuffer) { 814 vm_memcpy_to_physical(pageAddress, (void*)buffer, 815 bytesInPage, userBuffer); 816 } else { 817 vm_memset_physical(pageAddress, 0, bytesInPage); 818 } 819 } else if (useBuffer) { 820 vm_memcpy_from_physical((void*)buffer, pageAddress, 821 bytesInPage, userBuffer); 822 } 823 824 locker.Lock(); 825 826 if (doWrite) { 827 DEBUG_PAGE_ACCESS_START(page); 828 829 page->modified = true; 830 831 if (page->State() != PAGE_STATE_MODIFIED) 832 vm_page_set_state(page, PAGE_STATE_MODIFIED); 833 834 DEBUG_PAGE_ACCESS_END(page); 835 } 836 837 cache->MarkPageUnbusy(page); 838 } 839 840 // If it is cached only, requeue the page, so the respective queue 841 // roughly remains LRU first sorted. 842 if (page->State() == PAGE_STATE_CACHED 843 || page->State() == PAGE_STATE_MODIFIED) { 844 DEBUG_PAGE_ACCESS_START(page); 845 vm_page_requeue(page, true); 846 DEBUG_PAGE_ACCESS_END(page); 847 } 848 849 if (bytesLeft <= bytesInPage) { 850 // we've read the last page, so we're done! 851 locker.Unlock(); 852 vm_page_unreserve_pages(&reservation); 853 return B_OK; 854 } 855 856 // prepare a potential gap request 857 lastBuffer = buffer + bytesInPage; 858 lastLeft = bytesLeft - bytesInPage; 859 lastOffset = offset + B_PAGE_SIZE; 860 lastPageOffset = 0; 861 } 862 863 if (bytesLeft <= bytesInPage) 864 break; 865 866 buffer += bytesInPage; 867 bytesLeft -= bytesInPage; 868 pageOffset = 0; 869 offset += B_PAGE_SIZE; 870 pagesProcessed++; 871 872 if (buffer - lastBuffer + lastPageOffset >= kMaxChunkSize) { 873 status_t status = satisfy_cache_io(ref, cookie, function, offset, 874 buffer, useBuffer, pageOffset, bytesLeft, reservePages, 875 lastOffset, lastBuffer, lastPageOffset, lastLeft, 876 lastReservedPages, &reservation); 877 if (status != B_OK) 878 return status; 879 } 880 } 881 882 // fill the last remaining bytes of the request (either write or read) 883 884 return function(ref, cookie, lastOffset, lastPageOffset, lastBuffer, 885 lastLeft, useBuffer, &reservation, 0); 886 } 887 888 889 static status_t 890 file_cache_control(const char* subsystem, uint32 function, void* buffer, 891 size_t bufferSize) 892 { 893 switch (function) { 894 case CACHE_CLEAR: 895 // ToDo: clear the cache 896 dprintf("cache_control: clear cache!\n"); 897 return B_OK; 898 899 case CACHE_SET_MODULE: 900 { 901 cache_module_info* module = sCacheModule; 902 903 // unset previous module 904 905 if (sCacheModule != NULL) { 906 sCacheModule = NULL; 907 snooze(100000); // 0.1 secs 908 put_module(module->info.name); 909 } 910 911 // get new module, if any 912 913 if (buffer == NULL) 914 return B_OK; 915 916 char name[B_FILE_NAME_LENGTH]; 917 if (!IS_USER_ADDRESS(buffer) 918 || user_strlcpy(name, (char*)buffer, 919 B_FILE_NAME_LENGTH) < B_OK) 920 return B_BAD_ADDRESS; 921 922 if (strncmp(name, CACHE_MODULES_NAME, strlen(CACHE_MODULES_NAME))) 923 return B_BAD_VALUE; 924 925 dprintf("cache_control: set module %s!\n", name); 926 927 status_t status = get_module(name, (module_info**)&module); 928 if (status == B_OK) 929 sCacheModule = module; 930 931 return status; 932 } 933 } 934 935 return B_BAD_HANDLER; 936 } 937 938 939 // #pragma mark - private kernel API 940 941 942 extern "C" void 943 cache_prefetch_vnode(struct vnode* vnode, off_t offset, size_t size) 944 { 945 if (size == 0) 946 return; 947 948 VMCache* cache; 949 if (vfs_get_vnode_cache(vnode, &cache, false) != B_OK) 950 return; 951 952 file_cache_ref* ref = ((VMVnodeCache*)cache)->FileCacheRef(); 953 off_t fileSize = cache->virtual_end; 954 955 if ((off_t)(offset + size) > fileSize) 956 size = fileSize - offset; 957 958 // "offset" and "size" are always aligned to B_PAGE_SIZE, 959 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 960 size = ROUNDUP(size, B_PAGE_SIZE); 961 962 size_t reservePages = size / B_PAGE_SIZE; 963 964 // Don't do anything if we don't have the resources left, or the cache 965 // already contains more than 2/3 of its pages 966 if (offset >= fileSize || vm_page_num_unused_pages() < 2 * reservePages 967 || 3 * cache->page_count > 2 * fileSize / B_PAGE_SIZE) { 968 cache->ReleaseRef(); 969 return; 970 } 971 972 size_t bytesToRead = 0; 973 off_t lastOffset = offset; 974 975 vm_page_reservation reservation; 976 vm_page_reserve_pages(&reservation, reservePages, VM_PRIORITY_USER); 977 978 cache->Lock(); 979 980 while (true) { 981 // check if this page is already in memory 982 if (size > 0) { 983 vm_page* page = cache->LookupPage(offset); 984 985 offset += B_PAGE_SIZE; 986 size -= B_PAGE_SIZE; 987 988 if (page == NULL) { 989 bytesToRead += B_PAGE_SIZE; 990 continue; 991 } 992 } 993 if (bytesToRead != 0) { 994 // read the part before the current page (or the end of the request) 995 PrecacheIO* io = new(std::nothrow) PrecacheIO(ref, lastOffset, 996 bytesToRead); 997 if (io == NULL || io->Prepare(&reservation) != B_OK) { 998 delete io; 999 break; 1000 } 1001 1002 // we must not have the cache locked during I/O 1003 cache->Unlock(); 1004 io->ReadAsync(); 1005 cache->Lock(); 1006 1007 bytesToRead = 0; 1008 } 1009 1010 if (size == 0) { 1011 // we have reached the end of the request 1012 break; 1013 } 1014 1015 lastOffset = offset; 1016 } 1017 1018 cache->ReleaseRefAndUnlock(); 1019 vm_page_unreserve_pages(&reservation); 1020 } 1021 1022 1023 extern "C" void 1024 cache_prefetch(dev_t mountID, ino_t vnodeID, off_t offset, size_t size) 1025 { 1026 // ToDo: schedule prefetch 1027 1028 TRACE(("cache_prefetch(vnode %ld:%Ld)\n", mountID, vnodeID)); 1029 1030 // get the vnode for the object, this also grabs a ref to it 1031 struct vnode* vnode; 1032 if (vfs_get_vnode(mountID, vnodeID, true, &vnode) != B_OK) 1033 return; 1034 1035 cache_prefetch_vnode(vnode, offset, size); 1036 vfs_put_vnode(vnode); 1037 } 1038 1039 1040 extern "C" void 1041 cache_node_opened(struct vnode* vnode, int32 fdType, VMCache* cache, 1042 dev_t mountID, ino_t parentID, ino_t vnodeID, const char* name) 1043 { 1044 if (sCacheModule == NULL || sCacheModule->node_opened == NULL) 1045 return; 1046 1047 off_t size = -1; 1048 if (cache != NULL) { 1049 file_cache_ref* ref = ((VMVnodeCache*)cache)->FileCacheRef(); 1050 if (ref != NULL) 1051 size = cache->virtual_end; 1052 } 1053 1054 sCacheModule->node_opened(vnode, fdType, mountID, parentID, vnodeID, name, 1055 size); 1056 } 1057 1058 1059 extern "C" void 1060 cache_node_closed(struct vnode* vnode, int32 fdType, VMCache* cache, 1061 dev_t mountID, ino_t vnodeID) 1062 { 1063 if (sCacheModule == NULL || sCacheModule->node_closed == NULL) 1064 return; 1065 1066 int32 accessType = 0; 1067 if (cache != NULL) { 1068 // ToDo: set accessType 1069 } 1070 1071 sCacheModule->node_closed(vnode, fdType, mountID, vnodeID, accessType); 1072 } 1073 1074 1075 extern "C" void 1076 cache_node_launched(size_t argCount, char* const* args) 1077 { 1078 if (sCacheModule == NULL || sCacheModule->node_launched == NULL) 1079 return; 1080 1081 sCacheModule->node_launched(argCount, args); 1082 } 1083 1084 1085 extern "C" status_t 1086 file_cache_init_post_boot_device(void) 1087 { 1088 // ToDo: get cache module out of driver settings 1089 1090 if (get_module("file_cache/launch_speedup/v1", 1091 (module_info**)&sCacheModule) == B_OK) { 1092 dprintf("** opened launch speedup: %" B_PRId64 "\n", system_time()); 1093 } 1094 return B_OK; 1095 } 1096 1097 1098 extern "C" status_t 1099 file_cache_init(void) 1100 { 1101 // allocate a clean page we can use for writing zeroes 1102 vm_page_reservation reservation; 1103 vm_page_reserve_pages(&reservation, 1, VM_PRIORITY_SYSTEM); 1104 vm_page* page = vm_page_allocate_page(&reservation, 1105 PAGE_STATE_WIRED | VM_PAGE_ALLOC_CLEAR); 1106 vm_page_unreserve_pages(&reservation); 1107 1108 sZeroPage = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 1109 1110 for (uint32 i = 0; i < kZeroVecCount; i++) { 1111 sZeroVecs[i].base = sZeroPage; 1112 sZeroVecs[i].length = B_PAGE_SIZE; 1113 } 1114 1115 register_generic_syscall(CACHE_SYSCALLS, file_cache_control, 1, 0); 1116 return B_OK; 1117 } 1118 1119 1120 // #pragma mark - public FS API 1121 1122 1123 extern "C" void* 1124 file_cache_create(dev_t mountID, ino_t vnodeID, off_t size) 1125 { 1126 TRACE(("file_cache_create(mountID = %ld, vnodeID = %Ld, size = %Ld)\n", 1127 mountID, vnodeID, size)); 1128 1129 file_cache_ref* ref = new file_cache_ref; 1130 if (ref == NULL) 1131 return NULL; 1132 1133 memset(ref->last_access, 0, sizeof(ref->last_access)); 1134 ref->last_access_index = 0; 1135 ref->disabled_count = 0; 1136 1137 // TODO: delay VMCache creation until data is 1138 // requested/written for the first time? Listing lots of 1139 // files in Tracker (and elsewhere) could be slowed down. 1140 // Since the file_cache_ref itself doesn't have a lock, 1141 // we would need to "rent" one during construction, possibly 1142 // the vnode lock, maybe a dedicated one. 1143 // As there shouldn't be too much contention, we could also 1144 // use atomic_test_and_set(), and free the resources again 1145 // when that fails... 1146 1147 // Get the vnode for the object 1148 // (note, this does not grab a reference to the node) 1149 if (vfs_lookup_vnode(mountID, vnodeID, &ref->vnode) != B_OK) 1150 goto err1; 1151 1152 // Gets (usually creates) the cache for the node 1153 if (vfs_get_vnode_cache(ref->vnode, &ref->cache, true) != B_OK) 1154 goto err1; 1155 1156 ref->cache->virtual_end = size; 1157 ((VMVnodeCache*)ref->cache)->SetFileCacheRef(ref); 1158 return ref; 1159 1160 err1: 1161 delete ref; 1162 return NULL; 1163 } 1164 1165 1166 extern "C" void 1167 file_cache_delete(void* _cacheRef) 1168 { 1169 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1170 1171 if (ref == NULL) 1172 return; 1173 1174 TRACE(("file_cache_delete(ref = %p)\n", ref)); 1175 1176 ref->cache->ReleaseRef(); 1177 delete ref; 1178 } 1179 1180 1181 extern "C" void 1182 file_cache_enable(void* _cacheRef) 1183 { 1184 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1185 1186 AutoLocker<VMCache> _(ref->cache); 1187 1188 if (ref->disabled_count == 0) { 1189 panic("Unbalanced file_cache_enable()!"); 1190 return; 1191 } 1192 1193 ref->disabled_count--; 1194 } 1195 1196 1197 extern "C" status_t 1198 file_cache_disable(void* _cacheRef) 1199 { 1200 // TODO: This function only removes all pages from the cache and prevents 1201 // that the file cache functions add any new ones until re-enabled. The 1202 // VM (on page fault) can still add pages, if the file is mmap()ed. We 1203 // should mark the cache to prevent shared mappings of the file and fix 1204 // the page fault code to deal correctly with private mappings (i.e. only 1205 // insert pages in consumer caches). 1206 1207 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1208 1209 AutoLocker<VMCache> _(ref->cache); 1210 1211 // If already disabled, there's nothing to do for us. 1212 if (ref->disabled_count > 0) { 1213 ref->disabled_count++; 1214 return B_OK; 1215 } 1216 1217 // The file cache is not yet disabled. We need to evict all cached pages. 1218 status_t error = ref->cache->FlushAndRemoveAllPages(); 1219 if (error != B_OK) 1220 return error; 1221 1222 ref->disabled_count++; 1223 return B_OK; 1224 } 1225 1226 1227 extern "C" bool 1228 file_cache_is_enabled(void* _cacheRef) 1229 { 1230 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1231 AutoLocker<VMCache> _(ref->cache); 1232 1233 return ref->disabled_count == 0; 1234 } 1235 1236 1237 extern "C" status_t 1238 file_cache_set_size(void* _cacheRef, off_t newSize) 1239 { 1240 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1241 1242 TRACE(("file_cache_set_size(ref = %p, size = %Ld)\n", ref, newSize)); 1243 1244 if (ref == NULL) 1245 return B_OK; 1246 1247 VMCache* cache = ref->cache; 1248 AutoLocker<VMCache> _(cache); 1249 1250 off_t oldSize = cache->virtual_end; 1251 status_t status = cache->Resize(newSize, VM_PRIORITY_USER); 1252 // Note, the priority doesn't really matter, since this cache doesn't 1253 // reserve any memory. 1254 if (status == B_OK && newSize < oldSize) { 1255 // We may have a new partial page at the end of the cache that must be 1256 // cleared. 1257 uint32 partialBytes = newSize % B_PAGE_SIZE; 1258 if (partialBytes != 0) { 1259 vm_page* page = cache->LookupPage(newSize - partialBytes); 1260 if (page != NULL) { 1261 vm_memset_physical(page->physical_page_number * B_PAGE_SIZE 1262 + partialBytes, 0, B_PAGE_SIZE - partialBytes); 1263 } 1264 } 1265 } 1266 1267 return status; 1268 } 1269 1270 1271 extern "C" status_t 1272 file_cache_sync(void* _cacheRef) 1273 { 1274 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1275 if (ref == NULL) 1276 return B_BAD_VALUE; 1277 1278 return ref->cache->WriteModified(); 1279 } 1280 1281 1282 extern "C" status_t 1283 file_cache_read(void* _cacheRef, void* cookie, off_t offset, void* buffer, 1284 size_t* _size) 1285 { 1286 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1287 1288 TRACE(("file_cache_read(ref = %p, offset = %Ld, buffer = %p, size = %lu)\n", 1289 ref, offset, buffer, *_size)); 1290 1291 if (ref->disabled_count > 0) { 1292 // Caching is disabled -- read directly from the file. 1293 generic_io_vec vec; 1294 vec.base = (addr_t)buffer; 1295 generic_size_t size = vec.length = *_size; 1296 status_t error = vfs_read_pages(ref->vnode, cookie, offset, &vec, 1, 0, 1297 &size); 1298 *_size = size; 1299 return error; 1300 } 1301 1302 return cache_io(ref, cookie, offset, (addr_t)buffer, _size, false); 1303 } 1304 1305 1306 extern "C" status_t 1307 file_cache_write(void* _cacheRef, void* cookie, off_t offset, 1308 const void* buffer, size_t* _size) 1309 { 1310 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1311 1312 if (ref->disabled_count > 0) { 1313 // Caching is disabled -- write directly to the file. 1314 1315 if (buffer != NULL) { 1316 generic_io_vec vec; 1317 vec.base = (addr_t)buffer; 1318 generic_size_t size = vec.length = *_size; 1319 1320 status_t error = vfs_write_pages(ref->vnode, cookie, offset, &vec, 1321 1, 0, &size); 1322 *_size = size; 1323 return error; 1324 } 1325 1326 // NULL buffer -- use a dummy buffer to write zeroes 1327 size_t size = *_size; 1328 while (size > 0) { 1329 size_t toWrite = min_c(size, kZeroVecSize); 1330 generic_size_t written = toWrite; 1331 status_t error = vfs_write_pages(ref->vnode, cookie, offset, 1332 sZeroVecs, kZeroVecCount, B_PHYSICAL_IO_REQUEST, &written); 1333 if (error != B_OK) 1334 return error; 1335 if (written == 0) 1336 break; 1337 1338 offset += written; 1339 size -= written; 1340 } 1341 1342 *_size -= size; 1343 return B_OK; 1344 } 1345 1346 status_t status = cache_io(ref, cookie, offset, 1347 (addr_t)const_cast<void*>(buffer), _size, true); 1348 1349 TRACE(("file_cache_write(ref = %p, offset = %Ld, buffer = %p, size = %lu)" 1350 " = %ld\n", ref, offset, buffer, *_size, status)); 1351 1352 return status; 1353 } 1354