1 /* 2 * Copyright 2004-2009, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include "vnode_store.h" 8 9 #include <unistd.h> 10 #include <stdlib.h> 11 #include <string.h> 12 13 #include <KernelExport.h> 14 #include <fs_cache.h> 15 16 #include <condition_variable.h> 17 #include <file_cache.h> 18 #include <generic_syscall.h> 19 #include <low_resource_manager.h> 20 #include <thread.h> 21 #include <util/AutoLock.h> 22 #include <util/kernel_cpp.h> 23 #include <vfs.h> 24 #include <vm/vm.h> 25 #include <vm/vm_page.h> 26 #include <vm/VMCache.h> 27 28 #include "IORequest.h" 29 30 31 //#define TRACE_FILE_CACHE 32 #ifdef TRACE_FILE_CACHE 33 # define TRACE(x) dprintf x 34 #else 35 # define TRACE(x) ; 36 #endif 37 38 // maximum number of iovecs per request 39 #define MAX_IO_VECS 32 // 128 kB 40 41 #define BYPASS_IO_SIZE 65536 42 #define LAST_ACCESSES 3 43 44 struct file_cache_ref { 45 VMCache *cache; 46 struct vnode *vnode; 47 off_t last_access[LAST_ACCESSES]; 48 // TODO: it would probably be enough to only store the least 49 // significant 31 bits, and make this uint32 (one bit for 50 // write vs. read) 51 int32 last_access_index; 52 uint16 disabled_count; 53 54 inline void SetLastAccess(int32 index, off_t access, bool isWrite) 55 { 56 // we remember writes as negative offsets 57 last_access[index] = isWrite ? -access : access; 58 } 59 60 inline off_t LastAccess(int32 index, bool isWrite) const 61 { 62 return isWrite ? -last_access[index] : last_access[index]; 63 } 64 65 inline uint32 LastAccessPageOffset(int32 index, bool isWrite) 66 { 67 return LastAccess(index, isWrite) >> PAGE_SHIFT; 68 } 69 }; 70 71 class PrecacheIO : public AsyncIOCallback { 72 public: 73 PrecacheIO(file_cache_ref* ref, off_t offset, 74 generic_size_t size); 75 ~PrecacheIO(); 76 77 status_t Prepare(vm_page_reservation* reservation); 78 void ReadAsync(); 79 80 virtual void IOFinished(status_t status, 81 bool partialTransfer, 82 generic_size_t bytesTransferred); 83 84 private: 85 file_cache_ref* fRef; 86 VMCache* fCache; 87 vm_page** fPages; 88 size_t fPageCount; 89 ConditionVariable* fBusyConditions; 90 generic_io_vec* fVecs; 91 off_t fOffset; 92 uint32 fVecCount; 93 generic_size_t fSize; 94 #if DEBUG_PAGE_ACCESS 95 thread_id fAllocatingThread; 96 #endif 97 }; 98 99 typedef status_t (*cache_func)(file_cache_ref* ref, void* cookie, off_t offset, 100 int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, 101 vm_page_reservation* reservation, size_t reservePages); 102 103 static void add_to_iovec(generic_io_vec* vecs, uint32 &index, uint32 max, 104 generic_addr_t address, generic_size_t size); 105 106 107 static struct cache_module_info* sCacheModule; 108 109 110 static const uint32 kZeroVecCount = 32; 111 static const size_t kZeroVecSize = kZeroVecCount * B_PAGE_SIZE; 112 static phys_addr_t sZeroPage; 113 static generic_io_vec sZeroVecs[kZeroVecCount]; 114 115 116 // #pragma mark - 117 118 119 PrecacheIO::PrecacheIO(file_cache_ref* ref, off_t offset, generic_size_t size) 120 : 121 fRef(ref), 122 fCache(ref->cache), 123 fPages(NULL), 124 fVecs(NULL), 125 fOffset(offset), 126 fVecCount(0), 127 fSize(size) 128 { 129 fPageCount = (size + B_PAGE_SIZE - 1) / B_PAGE_SIZE; 130 fCache->AcquireRefLocked(); 131 } 132 133 134 PrecacheIO::~PrecacheIO() 135 { 136 delete[] fPages; 137 delete[] fVecs; 138 fCache->ReleaseRefLocked(); 139 } 140 141 142 status_t 143 PrecacheIO::Prepare(vm_page_reservation* reservation) 144 { 145 if (fPageCount == 0) 146 return B_BAD_VALUE; 147 148 fPages = new(std::nothrow) vm_page*[fPageCount]; 149 if (fPages == NULL) 150 return B_NO_MEMORY; 151 152 fVecs = new(std::nothrow) generic_io_vec[fPageCount]; 153 if (fVecs == NULL) 154 return B_NO_MEMORY; 155 156 // allocate pages for the cache and mark them busy 157 uint32 i = 0; 158 for (generic_size_t pos = 0; pos < fSize; pos += B_PAGE_SIZE) { 159 vm_page* page = vm_page_allocate_page(reservation, 160 PAGE_STATE_CACHED | VM_PAGE_ALLOC_BUSY); 161 162 fCache->InsertPage(page, fOffset + pos); 163 164 add_to_iovec(fVecs, fVecCount, fPageCount, 165 page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE); 166 fPages[i++] = page; 167 } 168 169 #if DEBUG_PAGE_ACCESS 170 fAllocatingThread = find_thread(NULL); 171 #endif 172 173 return B_OK; 174 } 175 176 177 void 178 PrecacheIO::ReadAsync() 179 { 180 // This object is going to be deleted after the I/O request has been 181 // fulfilled 182 vfs_asynchronous_read_pages(fRef->vnode, NULL, fOffset, fVecs, fVecCount, 183 fSize, B_PHYSICAL_IO_REQUEST, this); 184 } 185 186 187 void 188 PrecacheIO::IOFinished(status_t status, bool partialTransfer, 189 generic_size_t bytesTransferred) 190 { 191 AutoLocker<VMCache> locker(fCache); 192 193 // Make successfully loaded pages accessible again (partially 194 // transferred pages are considered failed) 195 phys_size_t pagesTransferred 196 = (bytesTransferred + B_PAGE_SIZE - 1) / B_PAGE_SIZE; 197 198 if (fOffset + (off_t)bytesTransferred > fCache->virtual_end) 199 bytesTransferred = fCache->virtual_end - fOffset; 200 201 for (uint32 i = 0; i < pagesTransferred; i++) { 202 if (i == pagesTransferred - 1 203 && (bytesTransferred % B_PAGE_SIZE) != 0) { 204 // clear partial page 205 size_t bytesTouched = bytesTransferred % B_PAGE_SIZE; 206 vm_memset_physical( 207 ((phys_addr_t)fPages[i]->physical_page_number << PAGE_SHIFT) 208 + bytesTouched, 209 0, B_PAGE_SIZE - bytesTouched); 210 } 211 212 DEBUG_PAGE_ACCESS_TRANSFER(fPages[i], fAllocatingThread); 213 214 fCache->MarkPageUnbusy(fPages[i]); 215 216 DEBUG_PAGE_ACCESS_END(fPages[i]); 217 } 218 219 // Free pages after failed I/O 220 for (uint32 i = pagesTransferred; i < fPageCount; i++) { 221 DEBUG_PAGE_ACCESS_TRANSFER(fPages[i], fAllocatingThread); 222 fCache->NotifyPageEvents(fPages[i], PAGE_EVENT_NOT_BUSY); 223 fCache->RemovePage(fPages[i]); 224 vm_page_set_state(fPages[i], PAGE_STATE_FREE); 225 } 226 227 delete this; 228 } 229 230 231 // #pragma mark - 232 233 234 static void 235 add_to_iovec(generic_io_vec* vecs, uint32 &index, uint32 max, 236 generic_addr_t address, generic_size_t size) 237 { 238 if (index > 0 && vecs[index - 1].base + vecs[index - 1].length == address) { 239 // the iovec can be combined with the previous one 240 vecs[index - 1].length += size; 241 return; 242 } 243 244 if (index == max) 245 panic("no more space for iovecs!"); 246 247 // we need to start a new iovec 248 vecs[index].base = address; 249 vecs[index].length = size; 250 index++; 251 } 252 253 254 static inline bool 255 access_is_sequential(file_cache_ref* ref) 256 { 257 return ref->last_access[ref->last_access_index] != 0; 258 } 259 260 261 static inline void 262 push_access(file_cache_ref* ref, off_t offset, generic_size_t bytes, 263 bool isWrite) 264 { 265 TRACE(("%p: push %lld, %ld, %s\n", ref, offset, bytes, 266 isWrite ? "write" : "read")); 267 268 int32 index = ref->last_access_index; 269 int32 previous = index - 1; 270 if (previous < 0) 271 previous = LAST_ACCESSES - 1; 272 273 if (offset != ref->LastAccess(previous, isWrite)) 274 ref->last_access[previous] = 0; 275 276 ref->SetLastAccess(index, offset + bytes, isWrite); 277 278 if (++index >= LAST_ACCESSES) 279 index = 0; 280 ref->last_access_index = index; 281 } 282 283 284 static void 285 reserve_pages(file_cache_ref* ref, vm_page_reservation* reservation, 286 size_t reservePages, bool isWrite) 287 { 288 if (low_resource_state(B_KERNEL_RESOURCE_PAGES) != B_NO_LOW_RESOURCE) { 289 VMCache* cache = ref->cache; 290 cache->Lock(); 291 292 if (cache->consumers.IsEmpty() && cache->areas == NULL 293 && access_is_sequential(ref)) { 294 // we are not mapped, and we're accessed sequentially 295 296 if (isWrite) { 297 // Just write some pages back, and actually wait until they 298 // have been written back in order to relieve the page pressure 299 // a bit. 300 int32 index = ref->last_access_index; 301 int32 previous = index - 1; 302 if (previous < 0) 303 previous = LAST_ACCESSES - 1; 304 305 vm_page_write_modified_page_range(cache, 306 ref->LastAccessPageOffset(previous, true), 307 ref->LastAccessPageOffset(index, true)); 308 } else { 309 // free some pages from our cache 310 // TODO: start with oldest 311 uint32 left = reservePages; 312 vm_page* page; 313 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 314 (page = it.Next()) != NULL && left > 0;) { 315 if (page->State() == PAGE_STATE_CACHED && !page->busy) { 316 DEBUG_PAGE_ACCESS_START(page); 317 ASSERT(!page->IsMapped()); 318 ASSERT(!page->modified); 319 cache->RemovePage(page); 320 vm_page_set_state(page, PAGE_STATE_FREE); 321 left--; 322 } 323 } 324 } 325 } 326 cache->Unlock(); 327 } 328 329 vm_page_reserve_pages(reservation, reservePages, VM_PRIORITY_USER); 330 } 331 332 333 static inline status_t 334 read_pages_and_clear_partial(file_cache_ref* ref, void* cookie, off_t offset, 335 const generic_io_vec* vecs, size_t count, uint32 flags, 336 generic_size_t* _numBytes) 337 { 338 generic_size_t bytesUntouched = *_numBytes; 339 340 status_t status = vfs_read_pages(ref->vnode, cookie, offset, vecs, count, 341 flags, _numBytes); 342 343 generic_size_t bytesEnd = *_numBytes; 344 345 if (offset + (off_t)bytesEnd > ref->cache->virtual_end) 346 bytesEnd = ref->cache->virtual_end - offset; 347 348 if (status == B_OK && bytesEnd < bytesUntouched) { 349 // Clear out any leftovers that were not touched by the above read. 350 // We're doing this here so that not every file system/device has to 351 // implement this. 352 bytesUntouched -= bytesEnd; 353 354 for (int32 i = count; i-- > 0 && bytesUntouched != 0; ) { 355 generic_size_t length = min_c(bytesUntouched, vecs[i].length); 356 vm_memset_physical(vecs[i].base + vecs[i].length - length, 0, 357 length); 358 359 bytesUntouched -= length; 360 } 361 } 362 363 return status; 364 } 365 366 367 /*! Reads the requested amount of data into the cache, and allocates 368 pages needed to fulfill that request. This function is called by cache_io(). 369 It can only handle a certain amount of bytes, and the caller must make 370 sure that it matches that criterion. 371 The cache_ref lock must be held when calling this function; during 372 operation it will unlock the cache, though. 373 */ 374 static status_t 375 read_into_cache(file_cache_ref* ref, void* cookie, off_t offset, 376 int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, 377 vm_page_reservation* reservation, size_t reservePages) 378 { 379 TRACE(("read_into_cache(offset = %lld, pageOffset = %ld, buffer = %#lx, " 380 "bufferSize = %lu\n", offset, pageOffset, buffer, bufferSize)); 381 382 VMCache* cache = ref->cache; 383 384 // TODO: We're using way too much stack! Rather allocate a sufficiently 385 // large chunk on the heap. 386 generic_io_vec vecs[MAX_IO_VECS]; 387 uint32 vecCount = 0; 388 389 generic_size_t numBytes = PAGE_ALIGN(pageOffset + bufferSize); 390 vm_page* pages[MAX_IO_VECS]; 391 int32 pageIndex = 0; 392 393 // allocate pages for the cache and mark them busy 394 for (generic_size_t pos = 0; pos < numBytes; pos += B_PAGE_SIZE) { 395 vm_page* page = pages[pageIndex++] = vm_page_allocate_page( 396 reservation, PAGE_STATE_CACHED | VM_PAGE_ALLOC_BUSY); 397 398 cache->InsertPage(page, offset + pos); 399 400 add_to_iovec(vecs, vecCount, MAX_IO_VECS, 401 page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE); 402 // TODO: check if the array is large enough (currently panics)! 403 } 404 405 push_access(ref, offset, bufferSize, false); 406 cache->Unlock(); 407 vm_page_unreserve_pages(reservation); 408 409 // read file into reserved pages 410 status_t status = read_pages_and_clear_partial(ref, cookie, offset, vecs, 411 vecCount, B_PHYSICAL_IO_REQUEST, &numBytes); 412 if (status != B_OK) { 413 // reading failed, free allocated pages 414 415 dprintf("file_cache: read pages failed: %s\n", strerror(status)); 416 417 cache->Lock(); 418 419 for (int32 i = 0; i < pageIndex; i++) { 420 cache->NotifyPageEvents(pages[i], PAGE_EVENT_NOT_BUSY); 421 cache->RemovePage(pages[i]); 422 vm_page_set_state(pages[i], PAGE_STATE_FREE); 423 } 424 425 return status; 426 } 427 428 // copy the pages if needed and unmap them again 429 430 for (int32 i = 0; i < pageIndex; i++) { 431 if (useBuffer && bufferSize != 0) { 432 size_t bytes = min_c(bufferSize, (size_t)B_PAGE_SIZE - pageOffset); 433 434 vm_memcpy_from_physical((void*)buffer, 435 pages[i]->physical_page_number * B_PAGE_SIZE + pageOffset, 436 bytes, IS_USER_ADDRESS(buffer)); 437 438 buffer += bytes; 439 bufferSize -= bytes; 440 pageOffset = 0; 441 } 442 } 443 444 reserve_pages(ref, reservation, reservePages, false); 445 cache->Lock(); 446 447 // make the pages accessible in the cache 448 for (int32 i = pageIndex; i-- > 0;) { 449 DEBUG_PAGE_ACCESS_END(pages[i]); 450 451 cache->MarkPageUnbusy(pages[i]); 452 } 453 454 return B_OK; 455 } 456 457 458 static status_t 459 read_from_file(file_cache_ref* ref, void* cookie, off_t offset, 460 int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, 461 vm_page_reservation* reservation, size_t reservePages) 462 { 463 TRACE(("read_from_file(offset = %lld, pageOffset = %ld, buffer = %#lx, " 464 "bufferSize = %lu\n", offset, pageOffset, buffer, bufferSize)); 465 466 if (!useBuffer) 467 return B_OK; 468 469 generic_io_vec vec; 470 vec.base = buffer; 471 vec.length = bufferSize; 472 473 push_access(ref, offset, bufferSize, false); 474 ref->cache->Unlock(); 475 vm_page_unreserve_pages(reservation); 476 477 generic_size_t toRead = bufferSize; 478 status_t status = vfs_read_pages(ref->vnode, cookie, offset + pageOffset, 479 &vec, 1, 0, &toRead); 480 481 if (status == B_OK) 482 reserve_pages(ref, reservation, reservePages, false); 483 484 ref->cache->Lock(); 485 486 return status; 487 } 488 489 490 /*! Like read_into_cache() but writes data into the cache. 491 To preserve data consistency, it might also read pages into the cache, 492 though, if only a partial page gets written. 493 The same restrictions apply. 494 */ 495 static status_t 496 write_to_cache(file_cache_ref* ref, void* cookie, off_t offset, 497 int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, 498 vm_page_reservation* reservation, size_t reservePages) 499 { 500 // TODO: We're using way too much stack! Rather allocate a sufficiently 501 // large chunk on the heap. 502 generic_io_vec vecs[MAX_IO_VECS]; 503 uint32 vecCount = 0; 504 generic_size_t numBytes = PAGE_ALIGN(pageOffset + bufferSize); 505 vm_page* pages[MAX_IO_VECS]; 506 int32 pageIndex = 0; 507 status_t status = B_OK; 508 509 // ToDo: this should be settable somewhere 510 bool writeThrough = false; 511 512 // allocate pages for the cache and mark them busy 513 for (generic_size_t pos = 0; pos < numBytes; pos += B_PAGE_SIZE) { 514 // TODO: if space is becoming tight, and this cache is already grown 515 // big - shouldn't we better steal the pages directly in that case? 516 // (a working set like approach for the file cache) 517 // TODO: the pages we allocate here should have been reserved upfront 518 // in cache_io() 519 vm_page* page = pages[pageIndex++] = vm_page_allocate_page( 520 reservation, 521 (writeThrough ? PAGE_STATE_CACHED : PAGE_STATE_MODIFIED) 522 | VM_PAGE_ALLOC_BUSY); 523 524 page->modified = !writeThrough; 525 526 ref->cache->InsertPage(page, offset + pos); 527 528 add_to_iovec(vecs, vecCount, MAX_IO_VECS, 529 page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE); 530 } 531 532 push_access(ref, offset, bufferSize, true); 533 ref->cache->Unlock(); 534 vm_page_unreserve_pages(reservation); 535 536 // copy contents (and read in partially written pages first) 537 538 if (pageOffset != 0) { 539 // This is only a partial write, so we have to read the rest of the page 540 // from the file to have consistent data in the cache 541 generic_io_vec readVec = { vecs[0].base, B_PAGE_SIZE }; 542 generic_size_t bytesRead = B_PAGE_SIZE; 543 544 status = vfs_read_pages(ref->vnode, cookie, offset, &readVec, 1, 545 B_PHYSICAL_IO_REQUEST, &bytesRead); 546 // ToDo: handle errors for real! 547 if (status < B_OK) 548 panic("1. vfs_read_pages() failed: %s!\n", strerror(status)); 549 } 550 551 size_t lastPageOffset = (pageOffset + bufferSize) % B_PAGE_SIZE; 552 if (lastPageOffset != 0) { 553 // get the last page in the I/O vectors 554 generic_addr_t last = vecs[vecCount - 1].base 555 + vecs[vecCount - 1].length - B_PAGE_SIZE; 556 557 if ((off_t)(offset + pageOffset + bufferSize) == ref->cache->virtual_end) { 558 // the space in the page after this write action needs to be cleaned 559 vm_memset_physical(last + lastPageOffset, 0, 560 B_PAGE_SIZE - lastPageOffset); 561 } else { 562 // the end of this write does not happen on a page boundary, so we 563 // need to fetch the last page before we can update it 564 generic_io_vec readVec = { last, B_PAGE_SIZE }; 565 generic_size_t bytesRead = B_PAGE_SIZE; 566 567 status = vfs_read_pages(ref->vnode, cookie, 568 PAGE_ALIGN(offset + pageOffset + bufferSize) - B_PAGE_SIZE, 569 &readVec, 1, B_PHYSICAL_IO_REQUEST, &bytesRead); 570 // ToDo: handle errors for real! 571 if (status < B_OK) 572 panic("vfs_read_pages() failed: %s!\n", strerror(status)); 573 574 if (bytesRead < B_PAGE_SIZE) { 575 // the space beyond the file size needs to be cleaned 576 vm_memset_physical(last + bytesRead, 0, 577 B_PAGE_SIZE - bytesRead); 578 } 579 } 580 } 581 582 for (uint32 i = 0; i < vecCount; i++) { 583 generic_addr_t base = vecs[i].base; 584 generic_size_t bytes = min_c((generic_size_t)bufferSize, 585 generic_size_t(vecs[i].length - pageOffset)); 586 587 if (useBuffer) { 588 // copy data from user buffer 589 vm_memcpy_to_physical(base + pageOffset, (void*)buffer, bytes, 590 IS_USER_ADDRESS(buffer)); 591 } else { 592 // clear buffer instead 593 vm_memset_physical(base + pageOffset, 0, bytes); 594 } 595 596 bufferSize -= bytes; 597 if (bufferSize == 0) 598 break; 599 600 buffer += bytes; 601 pageOffset = 0; 602 } 603 604 if (writeThrough) { 605 // write cached pages back to the file if we were asked to do that 606 status_t status = vfs_write_pages(ref->vnode, cookie, offset, vecs, 607 vecCount, B_PHYSICAL_IO_REQUEST, &numBytes); 608 if (status < B_OK) { 609 // ToDo: remove allocated pages, ...? 610 panic("file_cache: remove allocated pages! write pages failed: %s\n", 611 strerror(status)); 612 } 613 } 614 615 if (status == B_OK) 616 reserve_pages(ref, reservation, reservePages, true); 617 618 ref->cache->Lock(); 619 620 // make the pages accessible in the cache 621 for (int32 i = pageIndex; i-- > 0;) { 622 ref->cache->MarkPageUnbusy(pages[i]); 623 624 DEBUG_PAGE_ACCESS_END(pages[i]); 625 } 626 627 return status; 628 } 629 630 631 static status_t 632 write_zeros_to_file(struct vnode* vnode, void* cookie, off_t offset, 633 size_t* _size) 634 { 635 size_t size = *_size; 636 status_t status = B_OK; 637 while (size > 0) { 638 generic_size_t length = min_c(size, kZeroVecSize); 639 generic_io_vec* vecs = sZeroVecs; 640 generic_io_vec vec; 641 size_t count = kZeroVecCount; 642 if (length != kZeroVecSize) { 643 if (length > B_PAGE_SIZE) { 644 length = ROUNDDOWN(length, B_PAGE_SIZE); 645 count = length / B_PAGE_SIZE; 646 } else { 647 vec.base = sZeroPage; 648 vec.length = length; 649 vecs = &vec; 650 count = 1; 651 } 652 } 653 654 status = vfs_write_pages(vnode, cookie, offset, 655 vecs, count, B_PHYSICAL_IO_REQUEST, &length); 656 if (status != B_OK || length == 0) 657 break; 658 659 offset += length; 660 size -= length; 661 } 662 663 *_size = *_size - size; 664 return status; 665 } 666 667 668 static status_t 669 write_to_file(file_cache_ref* ref, void* cookie, off_t offset, int32 pageOffset, 670 addr_t buffer, size_t bufferSize, bool useBuffer, 671 vm_page_reservation* reservation, size_t reservePages) 672 { 673 push_access(ref, offset, bufferSize, true); 674 ref->cache->Unlock(); 675 vm_page_unreserve_pages(reservation); 676 677 status_t status = B_OK; 678 679 if (!useBuffer) { 680 status = write_zeros_to_file(ref->vnode, cookie, offset + pageOffset, 681 &bufferSize); 682 } else { 683 generic_io_vec vec; 684 vec.base = buffer; 685 vec.length = bufferSize; 686 generic_size_t toWrite = bufferSize; 687 status = vfs_write_pages(ref->vnode, cookie, offset + pageOffset, 688 &vec, 1, 0, &toWrite); 689 } 690 691 if (status == B_OK) 692 reserve_pages(ref, reservation, reservePages, true); 693 694 ref->cache->Lock(); 695 696 return status; 697 } 698 699 700 static inline status_t 701 satisfy_cache_io(file_cache_ref* ref, void* cookie, cache_func function, 702 off_t offset, addr_t buffer, bool useBuffer, int32 &pageOffset, 703 size_t bytesLeft, size_t &reservePages, off_t &lastOffset, 704 addr_t &lastBuffer, int32 &lastPageOffset, size_t &lastLeft, 705 size_t &lastReservedPages, vm_page_reservation* reservation) 706 { 707 if (lastBuffer == buffer) 708 return B_OK; 709 710 size_t requestSize = buffer - lastBuffer; 711 reservePages = min_c(MAX_IO_VECS, (lastLeft - requestSize 712 + lastPageOffset + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 713 714 status_t status = function(ref, cookie, lastOffset, lastPageOffset, 715 lastBuffer, requestSize, useBuffer, reservation, reservePages); 716 if (status == B_OK) { 717 lastReservedPages = reservePages; 718 lastBuffer = buffer; 719 lastLeft = bytesLeft; 720 lastOffset = offset; 721 lastPageOffset = 0; 722 pageOffset = 0; 723 } 724 return status; 725 } 726 727 728 static status_t 729 cache_io(void* _cacheRef, void* cookie, off_t offset, addr_t buffer, 730 size_t* _size, bool doWrite) 731 { 732 if (_cacheRef == NULL) 733 panic("cache_io() called with NULL ref!\n"); 734 735 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 736 VMCache* cache = ref->cache; 737 bool useBuffer = buffer != 0; 738 739 TRACE(("cache_io(ref = %p, offset = %lld, buffer = %p, size = %lu, %s)\n", 740 ref, offset, (void*)buffer, *_size, doWrite ? "write" : "read")); 741 742 int32 pageOffset = offset & (B_PAGE_SIZE - 1); 743 size_t size = *_size; 744 offset -= pageOffset; 745 746 // "offset" and "lastOffset" are always aligned to B_PAGE_SIZE, 747 // the "last*" variables always point to the end of the last 748 // satisfied request part 749 750 const uint32 kMaxChunkSize = MAX_IO_VECS * B_PAGE_SIZE; 751 size_t bytesLeft = size, lastLeft = size; 752 int32 lastPageOffset = pageOffset; 753 addr_t lastBuffer = buffer; 754 off_t lastOffset = offset; 755 size_t lastReservedPages = min_c(MAX_IO_VECS, (pageOffset + bytesLeft 756 + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 757 size_t reservePages = 0; 758 size_t pagesProcessed = 0; 759 cache_func function = NULL; 760 761 vm_page_reservation reservation; 762 reserve_pages(ref, &reservation, lastReservedPages, doWrite); 763 764 AutoLocker<VMCache> locker(cache); 765 766 while (bytesLeft > 0) { 767 // Periodically reevaluate the low memory situation and select the 768 // read/write hook accordingly 769 if (pagesProcessed % 32 == 0) { 770 if (size >= BYPASS_IO_SIZE 771 && low_resource_state(B_KERNEL_RESOURCE_PAGES) 772 != B_NO_LOW_RESOURCE) { 773 // In low memory situations we bypass the cache beyond a 774 // certain I/O size. 775 function = doWrite ? write_to_file : read_from_file; 776 } else 777 function = doWrite ? write_to_cache : read_into_cache; 778 } 779 780 // check if this page is already in memory 781 vm_page* page = cache->LookupPage(offset); 782 if (page != NULL) { 783 // The page may be busy - since we need to unlock the cache sometime 784 // in the near future, we need to satisfy the request of the pages 785 // we didn't get yet (to make sure no one else interferes in the 786 // meantime). 787 status_t status = satisfy_cache_io(ref, cookie, function, offset, 788 buffer, useBuffer, pageOffset, bytesLeft, reservePages, 789 lastOffset, lastBuffer, lastPageOffset, lastLeft, 790 lastReservedPages, &reservation); 791 if (status != B_OK) 792 return status; 793 794 // Since satisfy_cache_io() unlocks the cache, we need to look up 795 // the page again. 796 page = cache->LookupPage(offset); 797 if (page != NULL && page->busy) { 798 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, true); 799 continue; 800 } 801 } 802 803 size_t bytesInPage = min_c(size_t(B_PAGE_SIZE - pageOffset), bytesLeft); 804 805 TRACE(("lookup page from offset %lld: %p, size = %lu, pageOffset " 806 "= %lu\n", offset, page, bytesLeft, pageOffset)); 807 808 if (page != NULL) { 809 if (doWrite || useBuffer) { 810 // Since the following user_mem{cpy,set}() might cause a page 811 // fault, which in turn might cause pages to be reserved, we 812 // need to unlock the cache temporarily to avoid a potential 813 // deadlock. To make sure that our page doesn't go away, we mark 814 // it busy for the time. 815 page->busy = true; 816 locker.Unlock(); 817 818 // copy the contents of the page already in memory 819 phys_addr_t pageAddress 820 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE 821 + pageOffset; 822 bool userBuffer = IS_USER_ADDRESS(buffer); 823 if (doWrite) { 824 if (useBuffer) { 825 vm_memcpy_to_physical(pageAddress, (void*)buffer, 826 bytesInPage, userBuffer); 827 } else { 828 vm_memset_physical(pageAddress, 0, bytesInPage); 829 } 830 } else if (useBuffer) { 831 vm_memcpy_from_physical((void*)buffer, pageAddress, 832 bytesInPage, userBuffer); 833 } 834 835 locker.Lock(); 836 837 if (doWrite) { 838 DEBUG_PAGE_ACCESS_START(page); 839 840 page->modified = true; 841 842 if (page->State() != PAGE_STATE_MODIFIED) 843 vm_page_set_state(page, PAGE_STATE_MODIFIED); 844 845 DEBUG_PAGE_ACCESS_END(page); 846 } 847 848 cache->MarkPageUnbusy(page); 849 } 850 851 // If it is cached only, requeue the page, so the respective queue 852 // roughly remains LRU first sorted. 853 if (page->State() == PAGE_STATE_CACHED 854 || page->State() == PAGE_STATE_MODIFIED) { 855 DEBUG_PAGE_ACCESS_START(page); 856 vm_page_requeue(page, true); 857 DEBUG_PAGE_ACCESS_END(page); 858 } 859 860 if (bytesLeft <= bytesInPage) { 861 // we've read the last page, so we're done! 862 locker.Unlock(); 863 vm_page_unreserve_pages(&reservation); 864 return B_OK; 865 } 866 867 // prepare a potential gap request 868 lastBuffer = buffer + bytesInPage; 869 lastLeft = bytesLeft - bytesInPage; 870 lastOffset = offset + B_PAGE_SIZE; 871 lastPageOffset = 0; 872 } 873 874 if (bytesLeft <= bytesInPage) 875 break; 876 877 buffer += bytesInPage; 878 bytesLeft -= bytesInPage; 879 pageOffset = 0; 880 offset += B_PAGE_SIZE; 881 pagesProcessed++; 882 883 if (buffer - lastBuffer + lastPageOffset >= kMaxChunkSize) { 884 status_t status = satisfy_cache_io(ref, cookie, function, offset, 885 buffer, useBuffer, pageOffset, bytesLeft, reservePages, 886 lastOffset, lastBuffer, lastPageOffset, lastLeft, 887 lastReservedPages, &reservation); 888 if (status != B_OK) 889 return status; 890 } 891 } 892 893 // fill the last remaining bytes of the request (either write or read) 894 895 return function(ref, cookie, lastOffset, lastPageOffset, lastBuffer, 896 lastLeft, useBuffer, &reservation, 0); 897 } 898 899 900 static status_t 901 file_cache_control(const char* subsystem, uint32 function, void* buffer, 902 size_t bufferSize) 903 { 904 switch (function) { 905 case CACHE_CLEAR: 906 // ToDo: clear the cache 907 dprintf("cache_control: clear cache!\n"); 908 return B_OK; 909 910 case CACHE_SET_MODULE: 911 { 912 cache_module_info* module = sCacheModule; 913 914 // unset previous module 915 916 if (sCacheModule != NULL) { 917 sCacheModule = NULL; 918 snooze(100000); // 0.1 secs 919 put_module(module->info.name); 920 } 921 922 // get new module, if any 923 924 if (buffer == NULL) 925 return B_OK; 926 927 char name[B_FILE_NAME_LENGTH]; 928 if (!IS_USER_ADDRESS(buffer) 929 || user_strlcpy(name, (char*)buffer, 930 B_FILE_NAME_LENGTH) < B_OK) 931 return B_BAD_ADDRESS; 932 933 if (strncmp(name, CACHE_MODULES_NAME, strlen(CACHE_MODULES_NAME))) 934 return B_BAD_VALUE; 935 936 dprintf("cache_control: set module %s!\n", name); 937 938 status_t status = get_module(name, (module_info**)&module); 939 if (status == B_OK) 940 sCacheModule = module; 941 942 return status; 943 } 944 } 945 946 return B_BAD_HANDLER; 947 } 948 949 950 // #pragma mark - private kernel API 951 952 953 extern "C" void 954 cache_prefetch_vnode(struct vnode* vnode, off_t offset, size_t size) 955 { 956 if (size == 0) 957 return; 958 959 VMCache* cache; 960 if (vfs_get_vnode_cache(vnode, &cache, false) != B_OK) 961 return; 962 if (cache->type != CACHE_TYPE_VNODE) { 963 cache->ReleaseRef(); 964 return; 965 } 966 967 file_cache_ref* ref = ((VMVnodeCache*)cache)->FileCacheRef(); 968 off_t fileSize = cache->virtual_end; 969 970 if ((off_t)(offset + size) > fileSize) 971 size = fileSize - offset; 972 973 // "offset" and "size" are always aligned to B_PAGE_SIZE, 974 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 975 size = ROUNDUP(size, B_PAGE_SIZE); 976 977 const size_t pagesCount = size / B_PAGE_SIZE; 978 979 // Don't do anything if we don't have the resources left, or the cache 980 // already contains more than 2/3 of its pages 981 if (offset >= fileSize || vm_page_num_unused_pages() < 2 * pagesCount 982 || (3 * cache->page_count) > (2 * fileSize / B_PAGE_SIZE)) { 983 cache->ReleaseRef(); 984 return; 985 } 986 987 size_t bytesToRead = 0; 988 off_t lastOffset = offset; 989 990 vm_page_reservation reservation; 991 vm_page_reserve_pages(&reservation, pagesCount, VM_PRIORITY_USER); 992 993 cache->Lock(); 994 995 while (true) { 996 // check if this page is already in memory 997 if (size > 0) { 998 vm_page* page = cache->LookupPage(offset); 999 1000 offset += B_PAGE_SIZE; 1001 size -= B_PAGE_SIZE; 1002 1003 if (page == NULL) { 1004 bytesToRead += B_PAGE_SIZE; 1005 continue; 1006 } 1007 } 1008 if (bytesToRead != 0) { 1009 // read the part before the current page (or the end of the request) 1010 PrecacheIO* io = new(std::nothrow) PrecacheIO(ref, lastOffset, 1011 bytesToRead); 1012 if (io == NULL || io->Prepare(&reservation) != B_OK) { 1013 delete io; 1014 break; 1015 } 1016 1017 // we must not have the cache locked during I/O 1018 cache->Unlock(); 1019 io->ReadAsync(); 1020 cache->Lock(); 1021 1022 bytesToRead = 0; 1023 } 1024 1025 if (size == 0) { 1026 // we have reached the end of the request 1027 break; 1028 } 1029 1030 lastOffset = offset; 1031 } 1032 1033 cache->ReleaseRefAndUnlock(); 1034 vm_page_unreserve_pages(&reservation); 1035 } 1036 1037 1038 extern "C" void 1039 cache_prefetch(dev_t mountID, ino_t vnodeID, off_t offset, size_t size) 1040 { 1041 // ToDo: schedule prefetch 1042 1043 TRACE(("cache_prefetch(vnode %ld:%lld)\n", mountID, vnodeID)); 1044 1045 // get the vnode for the object, this also grabs a ref to it 1046 struct vnode* vnode; 1047 if (vfs_get_vnode(mountID, vnodeID, true, &vnode) != B_OK) 1048 return; 1049 1050 cache_prefetch_vnode(vnode, offset, size); 1051 vfs_put_vnode(vnode); 1052 } 1053 1054 1055 extern "C" void 1056 cache_node_opened(struct vnode* vnode, VMCache* cache, 1057 dev_t mountID, ino_t parentID, ino_t vnodeID, const char* name) 1058 { 1059 if (sCacheModule == NULL || sCacheModule->node_opened == NULL) 1060 return; 1061 1062 off_t size = -1; 1063 if (cache != NULL && cache->type == CACHE_TYPE_VNODE) { 1064 file_cache_ref* ref = ((VMVnodeCache*)cache)->FileCacheRef(); 1065 if (ref != NULL) 1066 size = cache->virtual_end; 1067 } 1068 1069 sCacheModule->node_opened(vnode, mountID, parentID, vnodeID, name, 1070 size); 1071 } 1072 1073 1074 extern "C" void 1075 cache_node_closed(struct vnode* vnode, VMCache* cache, 1076 dev_t mountID, ino_t vnodeID) 1077 { 1078 if (sCacheModule == NULL || sCacheModule->node_closed == NULL) 1079 return; 1080 1081 int32 accessType = 0; 1082 if (cache != NULL && cache->type == CACHE_TYPE_VNODE) { 1083 // ToDo: set accessType 1084 } 1085 1086 sCacheModule->node_closed(vnode, mountID, vnodeID, accessType); 1087 } 1088 1089 1090 extern "C" void 1091 cache_node_launched(size_t argCount, char* const* args) 1092 { 1093 if (sCacheModule == NULL || sCacheModule->node_launched == NULL) 1094 return; 1095 1096 sCacheModule->node_launched(argCount, args); 1097 } 1098 1099 1100 extern "C" status_t 1101 file_cache_init_post_boot_device(void) 1102 { 1103 // ToDo: get cache module out of driver settings 1104 1105 if (get_module("file_cache/launch_speedup/v1", 1106 (module_info**)&sCacheModule) == B_OK) { 1107 dprintf("** opened launch speedup: %" B_PRId64 "\n", system_time()); 1108 } 1109 return B_OK; 1110 } 1111 1112 1113 extern "C" status_t 1114 file_cache_init(void) 1115 { 1116 // allocate a clean page we can use for writing zeroes 1117 vm_page_reservation reservation; 1118 vm_page_reserve_pages(&reservation, 1, VM_PRIORITY_SYSTEM); 1119 vm_page* page = vm_page_allocate_page(&reservation, 1120 PAGE_STATE_WIRED | VM_PAGE_ALLOC_CLEAR); 1121 vm_page_unreserve_pages(&reservation); 1122 1123 sZeroPage = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 1124 1125 for (uint32 i = 0; i < kZeroVecCount; i++) { 1126 sZeroVecs[i].base = sZeroPage; 1127 sZeroVecs[i].length = B_PAGE_SIZE; 1128 } 1129 1130 register_generic_syscall(CACHE_SYSCALLS, file_cache_control, 1, 0); 1131 return B_OK; 1132 } 1133 1134 1135 // #pragma mark - public FS API 1136 1137 1138 extern "C" void* 1139 file_cache_create(dev_t mountID, ino_t vnodeID, off_t size) 1140 { 1141 TRACE(("file_cache_create(mountID = %ld, vnodeID = %lld, size = %lld)\n", 1142 mountID, vnodeID, size)); 1143 1144 file_cache_ref* ref = new file_cache_ref; 1145 if (ref == NULL) 1146 return NULL; 1147 1148 memset(ref->last_access, 0, sizeof(ref->last_access)); 1149 ref->last_access_index = 0; 1150 ref->disabled_count = 0; 1151 1152 // TODO: delay VMCache creation until data is 1153 // requested/written for the first time? Listing lots of 1154 // files in Tracker (and elsewhere) could be slowed down. 1155 // Since the file_cache_ref itself doesn't have a lock, 1156 // we would need to "rent" one during construction, possibly 1157 // the vnode lock, maybe a dedicated one. 1158 // As there shouldn't be too much contention, we could also 1159 // use atomic_test_and_set(), and free the resources again 1160 // when that fails... 1161 1162 // Get the vnode for the object 1163 // (note, this does not grab a reference to the node) 1164 if (vfs_lookup_vnode(mountID, vnodeID, &ref->vnode) != B_OK) 1165 goto err1; 1166 1167 // Gets (usually creates) the cache for the node 1168 if (vfs_get_vnode_cache(ref->vnode, &ref->cache, true) != B_OK) 1169 goto err1; 1170 1171 ref->cache->virtual_end = size; 1172 ((VMVnodeCache*)ref->cache)->SetFileCacheRef(ref); 1173 return ref; 1174 1175 err1: 1176 delete ref; 1177 return NULL; 1178 } 1179 1180 1181 extern "C" void 1182 file_cache_delete(void* _cacheRef) 1183 { 1184 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1185 1186 if (ref == NULL) 1187 return; 1188 1189 TRACE(("file_cache_delete(ref = %p)\n", ref)); 1190 1191 ref->cache->ReleaseRef(); 1192 delete ref; 1193 } 1194 1195 1196 extern "C" void 1197 file_cache_enable(void* _cacheRef) 1198 { 1199 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1200 1201 AutoLocker<VMCache> _(ref->cache); 1202 1203 if (ref->disabled_count == 0) { 1204 panic("Unbalanced file_cache_enable()!"); 1205 return; 1206 } 1207 1208 ref->disabled_count--; 1209 } 1210 1211 1212 extern "C" status_t 1213 file_cache_disable(void* _cacheRef) 1214 { 1215 // TODO: This function only removes all pages from the cache and prevents 1216 // that the file cache functions add any new ones until re-enabled. The 1217 // VM (on page fault) can still add pages, if the file is mmap()ed. We 1218 // should mark the cache to prevent shared mappings of the file and fix 1219 // the page fault code to deal correctly with private mappings (i.e. only 1220 // insert pages in consumer caches). 1221 1222 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1223 1224 AutoLocker<VMCache> _(ref->cache); 1225 1226 // If already disabled, there's nothing to do for us. 1227 if (ref->disabled_count > 0) { 1228 ref->disabled_count++; 1229 return B_OK; 1230 } 1231 1232 // The file cache is not yet disabled. We need to evict all cached pages. 1233 status_t error = ref->cache->FlushAndRemoveAllPages(); 1234 if (error != B_OK) 1235 return error; 1236 1237 ref->disabled_count++; 1238 return B_OK; 1239 } 1240 1241 1242 extern "C" bool 1243 file_cache_is_enabled(void* _cacheRef) 1244 { 1245 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1246 AutoLocker<VMCache> _(ref->cache); 1247 1248 return ref->disabled_count == 0; 1249 } 1250 1251 1252 extern "C" status_t 1253 file_cache_set_size(void* _cacheRef, off_t newSize) 1254 { 1255 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1256 1257 TRACE(("file_cache_set_size(ref = %p, size = %lld)\n", ref, newSize)); 1258 1259 if (ref == NULL) 1260 return B_OK; 1261 1262 VMCache* cache = ref->cache; 1263 AutoLocker<VMCache> _(cache); 1264 1265 off_t oldSize = cache->virtual_end; 1266 status_t status = cache->Resize(newSize, VM_PRIORITY_USER); 1267 // Note, the priority doesn't really matter, since this cache doesn't 1268 // reserve any memory. 1269 if (status == B_OK && newSize < oldSize) { 1270 // We may have a new partial page at the end of the cache that must be 1271 // cleared. 1272 uint32 partialBytes = newSize % B_PAGE_SIZE; 1273 if (partialBytes != 0) { 1274 vm_page* page = cache->LookupPage(newSize - partialBytes); 1275 if (page != NULL) { 1276 vm_memset_physical(page->physical_page_number * B_PAGE_SIZE 1277 + partialBytes, 0, B_PAGE_SIZE - partialBytes); 1278 } 1279 } 1280 } 1281 1282 return status; 1283 } 1284 1285 1286 extern "C" status_t 1287 file_cache_sync(void* _cacheRef) 1288 { 1289 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1290 if (ref == NULL) 1291 return B_BAD_VALUE; 1292 1293 return ref->cache->WriteModified(); 1294 } 1295 1296 1297 extern "C" status_t 1298 file_cache_read(void* _cacheRef, void* cookie, off_t offset, void* buffer, 1299 size_t* _size) 1300 { 1301 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1302 1303 TRACE(("file_cache_read(ref = %p, offset = %lld, buffer = %p, size = %lu)\n", 1304 ref, offset, buffer, *_size)); 1305 1306 // Bounds checking. We do this here so it applies to uncached I/O. 1307 if (offset < 0) 1308 return B_BAD_VALUE; 1309 const off_t fileSize = ref->cache->virtual_end; 1310 if (offset >= fileSize || *_size == 0) { 1311 *_size = 0; 1312 return B_OK; 1313 } 1314 if ((off_t)(offset + *_size) > fileSize) 1315 *_size = fileSize - offset; 1316 1317 if (ref->disabled_count > 0) { 1318 // Caching is disabled -- read directly from the file. 1319 generic_io_vec vec; 1320 vec.base = (addr_t)buffer; 1321 generic_size_t size = vec.length = *_size; 1322 status_t error = vfs_read_pages(ref->vnode, cookie, offset, &vec, 1, 0, 1323 &size); 1324 *_size = size; 1325 return error; 1326 } 1327 1328 return cache_io(ref, cookie, offset, (addr_t)buffer, _size, false); 1329 } 1330 1331 1332 extern "C" status_t 1333 file_cache_write(void* _cacheRef, void* cookie, off_t offset, 1334 const void* buffer, size_t* _size) 1335 { 1336 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1337 1338 // We don't do bounds checking here, as we are relying on the 1339 // file system which called us to already have done that and made 1340 // adjustments as necessary, unlike in read(). 1341 1342 if (ref->disabled_count > 0) { 1343 // Caching is disabled -- write directly to the file. 1344 if (buffer != NULL) { 1345 generic_io_vec vec; 1346 vec.base = (addr_t)buffer; 1347 generic_size_t size = vec.length = *_size; 1348 1349 status_t error = vfs_write_pages(ref->vnode, cookie, offset, &vec, 1350 1, 0, &size); 1351 *_size = size; 1352 return error; 1353 } 1354 return write_zeros_to_file(ref->vnode, cookie, offset, _size); 1355 } 1356 1357 status_t status = cache_io(ref, cookie, offset, 1358 (addr_t)const_cast<void*>(buffer), _size, true); 1359 1360 TRACE(("file_cache_write(ref = %p, offset = %lld, buffer = %p, size = %lu)" 1361 " = %ld\n", ref, offset, buffer, *_size, status)); 1362 1363 return status; 1364 } 1365