1 /* 2 * Copyright 2004-2009, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include "vnode_store.h" 8 9 #include <unistd.h> 10 #include <stdlib.h> 11 #include <string.h> 12 13 #include <KernelExport.h> 14 #include <fs_cache.h> 15 16 #include <condition_variable.h> 17 #include <file_cache.h> 18 #include <generic_syscall.h> 19 #include <low_resource_manager.h> 20 #include <thread.h> 21 #include <util/AutoLock.h> 22 #include <util/kernel_cpp.h> 23 #include <vfs.h> 24 #include <vm/vm.h> 25 #include <vm/vm_page.h> 26 #include <vm/VMCache.h> 27 28 #include "IORequest.h" 29 30 31 //#define TRACE_FILE_CACHE 32 #ifdef TRACE_FILE_CACHE 33 # define TRACE(x) dprintf x 34 #else 35 # define TRACE(x) ; 36 #endif 37 38 // maximum number of iovecs per request 39 #define MAX_IO_VECS 32 // 128 kB 40 #define MAX_FILE_IO_VECS 32 41 42 #define BYPASS_IO_SIZE 65536 43 #define LAST_ACCESSES 3 44 45 struct file_cache_ref { 46 VMCache *cache; 47 struct vnode *vnode; 48 off_t last_access[LAST_ACCESSES]; 49 // TODO: it would probably be enough to only store the least 50 // significant 31 bits, and make this uint32 (one bit for 51 // write vs. read) 52 int32 last_access_index; 53 uint16 disabled_count; 54 55 inline void SetLastAccess(int32 index, off_t access, bool isWrite) 56 { 57 // we remember writes as negative offsets 58 last_access[index] = isWrite ? -access : access; 59 } 60 61 inline off_t LastAccess(int32 index, bool isWrite) const 62 { 63 return isWrite ? -last_access[index] : last_access[index]; 64 } 65 66 inline uint32 LastAccessPageOffset(int32 index, bool isWrite) 67 { 68 return LastAccess(index, isWrite) >> PAGE_SHIFT; 69 } 70 }; 71 72 class PrecacheIO : public AsyncIOCallback { 73 public: 74 PrecacheIO(file_cache_ref* ref, off_t offset, 75 size_t size); 76 ~PrecacheIO(); 77 78 status_t Prepare(vm_page_reservation* reservation); 79 void ReadAsync(); 80 81 virtual void IOFinished(status_t status, 82 bool partialTransfer, 83 size_t bytesTransferred); 84 85 private: 86 file_cache_ref* fRef; 87 VMCache* fCache; 88 vm_page** fPages; 89 size_t fPageCount; 90 ConditionVariable* fBusyConditions; 91 iovec* fVecs; 92 off_t fOffset; 93 uint32 fVecCount; 94 size_t fSize; 95 #if DEBUG_PAGE_ACCESS 96 thread_id fAllocatingThread; 97 #endif 98 }; 99 100 typedef status_t (*cache_func)(file_cache_ref* ref, void* cookie, off_t offset, 101 int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, 102 vm_page_reservation* reservation, size_t reservePages); 103 104 static void add_to_iovec(iovec* vecs, uint32 &index, uint32 max, addr_t address, 105 size_t size); 106 107 108 static struct cache_module_info* sCacheModule; 109 110 111 static const uint32 kZeroVecCount = 32; 112 static const size_t kZeroVecSize = kZeroVecCount * B_PAGE_SIZE; 113 static addr_t sZeroPage; // physical address 114 static iovec sZeroVecs[kZeroVecCount]; 115 116 117 // #pragma mark - 118 119 120 PrecacheIO::PrecacheIO(file_cache_ref* ref, off_t offset, size_t size) 121 : 122 fRef(ref), 123 fCache(ref->cache), 124 fPages(NULL), 125 fVecs(NULL), 126 fOffset(offset), 127 fVecCount(0), 128 fSize(size) 129 { 130 fPageCount = (size + B_PAGE_SIZE - 1) / B_PAGE_SIZE; 131 fCache->AcquireRefLocked(); 132 } 133 134 135 PrecacheIO::~PrecacheIO() 136 { 137 delete[] fPages; 138 delete[] fVecs; 139 fCache->ReleaseRefLocked(); 140 } 141 142 143 status_t 144 PrecacheIO::Prepare(vm_page_reservation* reservation) 145 { 146 if (fPageCount == 0) 147 return B_BAD_VALUE; 148 149 fPages = new(std::nothrow) vm_page*[fPageCount]; 150 if (fPages == NULL) 151 return B_NO_MEMORY; 152 153 fVecs = new(std::nothrow) iovec[fPageCount]; 154 if (fVecs == NULL) 155 return B_NO_MEMORY; 156 157 // allocate pages for the cache and mark them busy 158 uint32 i = 0; 159 for (size_t pos = 0; pos < fSize; pos += B_PAGE_SIZE) { 160 vm_page* page = vm_page_allocate_page(reservation, 161 PAGE_STATE_CACHED | VM_PAGE_ALLOC_BUSY); 162 163 fCache->InsertPage(page, fOffset + pos); 164 165 add_to_iovec(fVecs, fVecCount, fPageCount, 166 page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE); 167 fPages[i++] = page; 168 } 169 170 #if DEBUG_PAGE_ACCESS 171 fAllocatingThread = find_thread(NULL); 172 #endif 173 174 return B_OK; 175 } 176 177 178 void 179 PrecacheIO::ReadAsync() 180 { 181 // This object is going to be deleted after the I/O request has been 182 // fulfilled 183 vfs_asynchronous_read_pages(fRef->vnode, NULL, fOffset, fVecs, fVecCount, 184 fSize, B_PHYSICAL_IO_REQUEST, this); 185 } 186 187 188 void 189 PrecacheIO::IOFinished(status_t status, bool partialTransfer, 190 size_t bytesTransferred) 191 { 192 AutoLocker<VMCache> locker(fCache); 193 194 // Make successfully loaded pages accessible again (partially 195 // transferred pages are considered failed) 196 size_t pagesTransferred 197 = (bytesTransferred + B_PAGE_SIZE - 1) / B_PAGE_SIZE; 198 199 if (fOffset + bytesTransferred > fCache->virtual_end) 200 bytesTransferred = fCache->virtual_end - fOffset; 201 202 for (uint32 i = 0; i < pagesTransferred; i++) { 203 if (i == pagesTransferred - 1 204 && (bytesTransferred % B_PAGE_SIZE) != 0) { 205 // clear partial page 206 size_t bytesTouched = bytesTransferred % B_PAGE_SIZE; 207 vm_memset_physical((fPages[i]->physical_page_number << PAGE_SHIFT) 208 + bytesTouched, 0, B_PAGE_SIZE - bytesTouched); 209 } 210 211 DEBUG_PAGE_ACCESS_TRANSFER(fPages[i], fAllocatingThread); 212 213 fCache->MarkPageUnbusy(fPages[i]); 214 215 DEBUG_PAGE_ACCESS_END(fPages[i]); 216 } 217 218 // Free pages after failed I/O 219 for (uint32 i = pagesTransferred; i < fPageCount; i++) { 220 DEBUG_PAGE_ACCESS_TRANSFER(fPages[i], fAllocatingThread); 221 fCache->NotifyPageEvents(fPages[i], PAGE_EVENT_NOT_BUSY); 222 fCache->RemovePage(fPages[i]); 223 vm_page_set_state(fPages[i], PAGE_STATE_FREE); 224 } 225 226 delete this; 227 } 228 229 230 // #pragma mark - 231 232 233 static void 234 add_to_iovec(iovec* vecs, uint32 &index, uint32 max, addr_t address, 235 size_t size) 236 { 237 if (index > 0 && (addr_t)vecs[index - 1].iov_base 238 + vecs[index - 1].iov_len == address) { 239 // the iovec can be combined with the previous one 240 vecs[index - 1].iov_len += size; 241 return; 242 } 243 244 if (index == max) 245 panic("no more space for iovecs!"); 246 247 // we need to start a new iovec 248 vecs[index].iov_base = (void*)address; 249 vecs[index].iov_len = size; 250 index++; 251 } 252 253 254 static inline bool 255 access_is_sequential(file_cache_ref* ref) 256 { 257 return ref->last_access[ref->last_access_index] != 0; 258 } 259 260 261 static inline void 262 push_access(file_cache_ref* ref, off_t offset, size_t bytes, bool isWrite) 263 { 264 TRACE(("%p: push %Ld, %ld, %s\n", ref, offset, bytes, 265 isWrite ? "write" : "read")); 266 267 int32 index = ref->last_access_index; 268 int32 previous = index - 1; 269 if (previous < 0) 270 previous = LAST_ACCESSES - 1; 271 272 if (offset != ref->LastAccess(previous, isWrite)) 273 ref->last_access[previous] = 0; 274 275 ref->SetLastAccess(index, offset + bytes, isWrite); 276 277 if (++index >= LAST_ACCESSES) 278 index = 0; 279 ref->last_access_index = index; 280 } 281 282 283 static void 284 reserve_pages(file_cache_ref* ref, vm_page_reservation* reservation, 285 size_t reservePages, bool isWrite) 286 { 287 if (low_resource_state(B_KERNEL_RESOURCE_PAGES) != B_NO_LOW_RESOURCE) { 288 VMCache* cache = ref->cache; 289 cache->Lock(); 290 291 if (list_is_empty(&cache->consumers) && cache->areas == NULL 292 && access_is_sequential(ref)) { 293 // we are not mapped, and we're accessed sequentially 294 295 if (isWrite) { 296 // Just write some pages back, and actually wait until they 297 // have been written back in order to relieve the page pressure 298 // a bit. 299 int32 index = ref->last_access_index; 300 int32 previous = index - 1; 301 if (previous < 0) 302 previous = LAST_ACCESSES - 1; 303 304 vm_page_write_modified_page_range(cache, 305 ref->LastAccessPageOffset(previous, true), 306 ref->LastAccessPageOffset(index, true)); 307 } else { 308 // free some pages from our cache 309 // TODO: start with oldest 310 uint32 left = reservePages; 311 vm_page* page; 312 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 313 (page = it.Next()) != NULL && left > 0;) { 314 if (page->State() == PAGE_STATE_CACHED && !page->busy) { 315 DEBUG_PAGE_ACCESS_START(page); 316 ASSERT(!page->IsMapped()); 317 ASSERT(!page->modified); 318 cache->RemovePage(page); 319 vm_page_set_state(page, PAGE_STATE_FREE); 320 left--; 321 } 322 } 323 } 324 } 325 cache->Unlock(); 326 } 327 328 vm_page_reserve_pages(reservation, reservePages, VM_PRIORITY_USER); 329 } 330 331 332 static inline status_t 333 read_pages_and_clear_partial(file_cache_ref* ref, void* cookie, off_t offset, 334 const iovec* vecs, size_t count, uint32 flags, size_t* _numBytes) 335 { 336 size_t bytesUntouched = *_numBytes; 337 338 status_t status = vfs_read_pages(ref->vnode, cookie, offset, vecs, count, 339 flags, _numBytes); 340 341 size_t bytesEnd = *_numBytes; 342 343 if (offset + bytesEnd > ref->cache->virtual_end) 344 bytesEnd = ref->cache->virtual_end - offset; 345 346 if (status == B_OK && bytesEnd < bytesUntouched) { 347 // Clear out any leftovers that were not touched by the above read. 348 // We're doing this here so that not every file system/device has to 349 // implement this. 350 bytesUntouched -= bytesEnd; 351 352 for (int32 i = count; i-- > 0 && bytesUntouched != 0; ) { 353 size_t length = min_c(bytesUntouched, vecs[i].iov_len); 354 vm_memset_physical((addr_t)vecs[i].iov_base + vecs[i].iov_len 355 - length, 0, length); 356 357 bytesUntouched -= length; 358 } 359 } 360 361 return status; 362 } 363 364 365 /*! Reads the requested amount of data into the cache, and allocates 366 pages needed to fulfill that request. This function is called by cache_io(). 367 It can only handle a certain amount of bytes, and the caller must make 368 sure that it matches that criterion. 369 The cache_ref lock must be held when calling this function; during 370 operation it will unlock the cache, though. 371 */ 372 static status_t 373 read_into_cache(file_cache_ref* ref, void* cookie, off_t offset, 374 int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, 375 vm_page_reservation* reservation, size_t reservePages) 376 { 377 TRACE(("read_into_cache(offset = %Ld, pageOffset = %ld, buffer = %#lx, " 378 "bufferSize = %lu\n", offset, pageOffset, buffer, bufferSize)); 379 380 VMCache* cache = ref->cache; 381 382 // TODO: We're using way too much stack! Rather allocate a sufficiently 383 // large chunk on the heap. 384 iovec vecs[MAX_IO_VECS]; 385 uint32 vecCount = 0; 386 387 size_t numBytes = PAGE_ALIGN(pageOffset + bufferSize); 388 vm_page* pages[MAX_IO_VECS]; 389 int32 pageIndex = 0; 390 391 // allocate pages for the cache and mark them busy 392 for (size_t pos = 0; pos < numBytes; pos += B_PAGE_SIZE) { 393 vm_page* page = pages[pageIndex++] = vm_page_allocate_page( 394 reservation, PAGE_STATE_CACHED | VM_PAGE_ALLOC_BUSY); 395 396 cache->InsertPage(page, offset + pos); 397 398 add_to_iovec(vecs, vecCount, MAX_IO_VECS, 399 page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE); 400 // TODO: check if the array is large enough (currently panics)! 401 } 402 403 push_access(ref, offset, bufferSize, false); 404 cache->Unlock(); 405 vm_page_unreserve_pages(reservation); 406 407 // read file into reserved pages 408 status_t status = read_pages_and_clear_partial(ref, cookie, offset, vecs, 409 vecCount, B_PHYSICAL_IO_REQUEST, &numBytes); 410 if (status != B_OK) { 411 // reading failed, free allocated pages 412 413 dprintf("file_cache: read pages failed: %s\n", strerror(status)); 414 415 cache->Lock(); 416 417 for (int32 i = 0; i < pageIndex; i++) { 418 cache->NotifyPageEvents(pages[i], PAGE_EVENT_NOT_BUSY); 419 cache->RemovePage(pages[i]); 420 vm_page_set_state(pages[i], PAGE_STATE_FREE); 421 } 422 423 return status; 424 } 425 426 // copy the pages if needed and unmap them again 427 428 for (int32 i = 0; i < pageIndex; i++) { 429 if (useBuffer && bufferSize != 0) { 430 size_t bytes = min_c(bufferSize, (size_t)B_PAGE_SIZE - pageOffset); 431 432 vm_memcpy_from_physical((void*)buffer, 433 pages[i]->physical_page_number * B_PAGE_SIZE + pageOffset, 434 bytes, true); 435 436 buffer += bytes; 437 bufferSize -= bytes; 438 pageOffset = 0; 439 } 440 } 441 442 reserve_pages(ref, reservation, reservePages, false); 443 cache->Lock(); 444 445 // make the pages accessible in the cache 446 for (int32 i = pageIndex; i-- > 0;) { 447 DEBUG_PAGE_ACCESS_END(pages[i]); 448 449 cache->MarkPageUnbusy(pages[i]); 450 } 451 452 return B_OK; 453 } 454 455 456 static status_t 457 read_from_file(file_cache_ref* ref, void* cookie, off_t offset, 458 int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, 459 vm_page_reservation* reservation, size_t reservePages) 460 { 461 TRACE(("read_from_file(offset = %Ld, pageOffset = %ld, buffer = %#lx, " 462 "bufferSize = %lu\n", offset, pageOffset, buffer, bufferSize)); 463 464 if (!useBuffer) 465 return B_OK; 466 467 iovec vec; 468 vec.iov_base = (void*)buffer; 469 vec.iov_len = bufferSize; 470 471 push_access(ref, offset, bufferSize, false); 472 ref->cache->Unlock(); 473 vm_page_unreserve_pages(reservation); 474 475 status_t status = vfs_read_pages(ref->vnode, cookie, offset + pageOffset, 476 &vec, 1, 0, &bufferSize); 477 478 if (status == B_OK) 479 reserve_pages(ref, reservation, reservePages, false); 480 481 ref->cache->Lock(); 482 483 return status; 484 } 485 486 487 /*! Like read_into_cache() but writes data into the cache. 488 To preserve data consistency, it might also read pages into the cache, 489 though, if only a partial page gets written. 490 The same restrictions apply. 491 */ 492 static status_t 493 write_to_cache(file_cache_ref* ref, void* cookie, off_t offset, 494 int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, 495 vm_page_reservation* reservation, size_t reservePages) 496 { 497 // TODO: We're using way too much stack! Rather allocate a sufficiently 498 // large chunk on the heap. 499 iovec vecs[MAX_IO_VECS]; 500 uint32 vecCount = 0; 501 size_t numBytes = PAGE_ALIGN(pageOffset + bufferSize); 502 vm_page* pages[MAX_IO_VECS]; 503 int32 pageIndex = 0; 504 status_t status = B_OK; 505 506 // ToDo: this should be settable somewhere 507 bool writeThrough = false; 508 509 // allocate pages for the cache and mark them busy 510 for (size_t pos = 0; pos < numBytes; pos += B_PAGE_SIZE) { 511 // TODO: if space is becoming tight, and this cache is already grown 512 // big - shouldn't we better steal the pages directly in that case? 513 // (a working set like approach for the file cache) 514 // TODO: the pages we allocate here should have been reserved upfront 515 // in cache_io() 516 vm_page* page = pages[pageIndex++] = vm_page_allocate_page( 517 reservation, 518 (writeThrough ? PAGE_STATE_CACHED : PAGE_STATE_MODIFIED) 519 | VM_PAGE_ALLOC_BUSY); 520 521 page->modified = !writeThrough; 522 523 ref->cache->InsertPage(page, offset + pos); 524 525 add_to_iovec(vecs, vecCount, MAX_IO_VECS, 526 page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE); 527 } 528 529 push_access(ref, offset, bufferSize, true); 530 ref->cache->Unlock(); 531 vm_page_unreserve_pages(reservation); 532 533 // copy contents (and read in partially written pages first) 534 535 if (pageOffset != 0) { 536 // This is only a partial write, so we have to read the rest of the page 537 // from the file to have consistent data in the cache 538 iovec readVec = { vecs[0].iov_base, B_PAGE_SIZE }; 539 size_t bytesRead = B_PAGE_SIZE; 540 541 status = vfs_read_pages(ref->vnode, cookie, offset, &readVec, 1, 542 B_PHYSICAL_IO_REQUEST, &bytesRead); 543 // ToDo: handle errors for real! 544 if (status < B_OK) 545 panic("1. vfs_read_pages() failed: %s!\n", strerror(status)); 546 } 547 548 addr_t lastPageOffset = (pageOffset + bufferSize) & (B_PAGE_SIZE - 1); 549 if (lastPageOffset != 0) { 550 // get the last page in the I/O vectors 551 addr_t last = (addr_t)vecs[vecCount - 1].iov_base 552 + vecs[vecCount - 1].iov_len - B_PAGE_SIZE; 553 554 if (offset + pageOffset + bufferSize == ref->cache->virtual_end) { 555 // the space in the page after this write action needs to be cleaned 556 vm_memset_physical(last + lastPageOffset, 0, 557 B_PAGE_SIZE - lastPageOffset); 558 } else { 559 // the end of this write does not happen on a page boundary, so we 560 // need to fetch the last page before we can update it 561 iovec readVec = { (void*)last, B_PAGE_SIZE }; 562 size_t bytesRead = B_PAGE_SIZE; 563 564 status = vfs_read_pages(ref->vnode, cookie, 565 PAGE_ALIGN(offset + pageOffset + bufferSize) - B_PAGE_SIZE, 566 &readVec, 1, B_PHYSICAL_IO_REQUEST, &bytesRead); 567 // ToDo: handle errors for real! 568 if (status < B_OK) 569 panic("vfs_read_pages() failed: %s!\n", strerror(status)); 570 571 if (bytesRead < B_PAGE_SIZE) { 572 // the space beyond the file size needs to be cleaned 573 vm_memset_physical(last + bytesRead, 0, 574 B_PAGE_SIZE - bytesRead); 575 } 576 } 577 } 578 579 for (uint32 i = 0; i < vecCount; i++) { 580 addr_t base = (addr_t)vecs[i].iov_base; 581 size_t bytes = min_c(bufferSize, 582 size_t(vecs[i].iov_len - pageOffset)); 583 584 if (useBuffer) { 585 // copy data from user buffer 586 vm_memcpy_to_physical(base + pageOffset, (void*)buffer, bytes, 587 true); 588 } else { 589 // clear buffer instead 590 vm_memset_physical(base + pageOffset, 0, bytes); 591 } 592 593 bufferSize -= bytes; 594 if (bufferSize == 0) 595 break; 596 597 buffer += bytes; 598 pageOffset = 0; 599 } 600 601 if (writeThrough) { 602 // write cached pages back to the file if we were asked to do that 603 status_t status = vfs_write_pages(ref->vnode, cookie, offset, vecs, 604 vecCount, B_PHYSICAL_IO_REQUEST, &numBytes); 605 if (status < B_OK) { 606 // ToDo: remove allocated pages, ...? 607 panic("file_cache: remove allocated pages! write pages failed: %s\n", 608 strerror(status)); 609 } 610 } 611 612 if (status == B_OK) 613 reserve_pages(ref, reservation, reservePages, true); 614 615 ref->cache->Lock(); 616 617 // make the pages accessible in the cache 618 for (int32 i = pageIndex; i-- > 0;) { 619 ref->cache->MarkPageUnbusy(pages[i]); 620 621 DEBUG_PAGE_ACCESS_END(pages[i]); 622 } 623 624 return status; 625 } 626 627 628 static status_t 629 write_to_file(file_cache_ref* ref, void* cookie, off_t offset, int32 pageOffset, 630 addr_t buffer, size_t bufferSize, bool useBuffer, 631 vm_page_reservation* reservation, size_t reservePages) 632 { 633 push_access(ref, offset, bufferSize, true); 634 ref->cache->Unlock(); 635 vm_page_unreserve_pages(reservation); 636 637 status_t status = B_OK; 638 639 if (!useBuffer) { 640 while (bufferSize > 0) { 641 size_t written = min_c(bufferSize, kZeroVecSize); 642 status = vfs_write_pages(ref->vnode, cookie, offset + pageOffset, 643 sZeroVecs, kZeroVecCount, B_PHYSICAL_IO_REQUEST, &written); 644 if (status != B_OK) 645 return status; 646 if (written == 0) 647 return B_ERROR; 648 649 bufferSize -= written; 650 pageOffset += written; 651 } 652 } else { 653 iovec vec; 654 vec.iov_base = (void*)buffer; 655 vec.iov_len = bufferSize; 656 status = vfs_write_pages(ref->vnode, cookie, offset + pageOffset, 657 &vec, 1, 0, &bufferSize); 658 } 659 660 if (status == B_OK) 661 reserve_pages(ref, reservation, reservePages, true); 662 663 ref->cache->Lock(); 664 665 return status; 666 } 667 668 669 static inline status_t 670 satisfy_cache_io(file_cache_ref* ref, void* cookie, cache_func function, 671 off_t offset, addr_t buffer, bool useBuffer, int32 &pageOffset, 672 size_t bytesLeft, size_t &reservePages, off_t &lastOffset, 673 addr_t &lastBuffer, int32 &lastPageOffset, size_t &lastLeft, 674 size_t &lastReservedPages, vm_page_reservation* reservation) 675 { 676 if (lastBuffer == buffer) 677 return B_OK; 678 679 size_t requestSize = buffer - lastBuffer; 680 reservePages = min_c(MAX_IO_VECS, (lastLeft - requestSize 681 + lastPageOffset + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 682 683 status_t status = function(ref, cookie, lastOffset, lastPageOffset, 684 lastBuffer, requestSize, useBuffer, reservation, reservePages); 685 if (status == B_OK) { 686 lastReservedPages = reservePages; 687 lastBuffer = buffer; 688 lastLeft = bytesLeft; 689 lastOffset = offset; 690 lastPageOffset = 0; 691 pageOffset = 0; 692 } 693 return status; 694 } 695 696 697 static status_t 698 cache_io(void* _cacheRef, void* cookie, off_t offset, addr_t buffer, 699 size_t* _size, bool doWrite) 700 { 701 if (_cacheRef == NULL) 702 panic("cache_io() called with NULL ref!\n"); 703 704 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 705 VMCache* cache = ref->cache; 706 off_t fileSize = cache->virtual_end; 707 bool useBuffer = buffer != 0; 708 709 TRACE(("cache_io(ref = %p, offset = %Ld, buffer = %p, size = %lu, %s)\n", 710 ref, offset, (void*)buffer, *_size, doWrite ? "write" : "read")); 711 712 // out of bounds access? 713 if (offset >= fileSize || offset < 0) { 714 *_size = 0; 715 return B_OK; 716 } 717 718 int32 pageOffset = offset & (B_PAGE_SIZE - 1); 719 size_t size = *_size; 720 offset -= pageOffset; 721 722 if (offset + pageOffset + size > fileSize) { 723 // adapt size to be within the file's offsets 724 size = fileSize - pageOffset - offset; 725 *_size = size; 726 } 727 if (size == 0) 728 return B_OK; 729 730 // "offset" and "lastOffset" are always aligned to B_PAGE_SIZE, 731 // the "last*" variables always point to the end of the last 732 // satisfied request part 733 734 const uint32 kMaxChunkSize = MAX_IO_VECS * B_PAGE_SIZE; 735 size_t bytesLeft = size, lastLeft = size; 736 int32 lastPageOffset = pageOffset; 737 addr_t lastBuffer = buffer; 738 off_t lastOffset = offset; 739 size_t lastReservedPages = min_c(MAX_IO_VECS, (pageOffset + bytesLeft 740 + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 741 size_t reservePages = 0; 742 size_t pagesProcessed = 0; 743 cache_func function = NULL; 744 745 vm_page_reservation reservation; 746 reserve_pages(ref, &reservation, lastReservedPages, doWrite); 747 748 AutoLocker<VMCache> locker(cache); 749 750 while (bytesLeft > 0) { 751 // Periodically reevaluate the low memory situation and select the 752 // read/write hook accordingly 753 if (pagesProcessed % 32 == 0) { 754 if (size >= BYPASS_IO_SIZE 755 && low_resource_state(B_KERNEL_RESOURCE_PAGES) 756 != B_NO_LOW_RESOURCE) { 757 // In low memory situations we bypass the cache beyond a 758 // certain I/O size. 759 function = doWrite ? write_to_file : read_from_file; 760 } else 761 function = doWrite ? write_to_cache : read_into_cache; 762 } 763 764 // check if this page is already in memory 765 vm_page* page = cache->LookupPage(offset); 766 if (page != NULL) { 767 // The page may be busy - since we need to unlock the cache sometime 768 // in the near future, we need to satisfy the request of the pages 769 // we didn't get yet (to make sure no one else interferes in the 770 // meantime). 771 status_t status = satisfy_cache_io(ref, cookie, function, offset, 772 buffer, useBuffer, pageOffset, bytesLeft, reservePages, 773 lastOffset, lastBuffer, lastPageOffset, lastLeft, 774 lastReservedPages, &reservation); 775 if (status != B_OK) 776 return status; 777 778 if (page->busy) { 779 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, true); 780 continue; 781 } 782 } 783 784 size_t bytesInPage = min_c(size_t(B_PAGE_SIZE - pageOffset), bytesLeft); 785 786 TRACE(("lookup page from offset %Ld: %p, size = %lu, pageOffset " 787 "= %lu\n", offset, page, bytesLeft, pageOffset)); 788 789 if (page != NULL) { 790 if (doWrite || useBuffer) { 791 // Since the following user_mem{cpy,set}() might cause a page 792 // fault, which in turn might cause pages to be reserved, we 793 // need to unlock the cache temporarily to avoid a potential 794 // deadlock. To make sure that our page doesn't go away, we mark 795 // it busy for the time. 796 page->busy = true; 797 locker.Unlock(); 798 799 // copy the contents of the page already in memory 800 addr_t pageAddress = page->physical_page_number * B_PAGE_SIZE 801 + pageOffset; 802 if (doWrite) { 803 if (useBuffer) { 804 vm_memcpy_to_physical(pageAddress, (void*)buffer, 805 bytesInPage, true); 806 } else { 807 vm_memset_physical(pageAddress, 0, bytesInPage); 808 } 809 } else if (useBuffer) { 810 vm_memcpy_from_physical((void*)buffer, pageAddress, 811 bytesInPage, true); 812 } 813 814 locker.Lock(); 815 816 if (doWrite) { 817 DEBUG_PAGE_ACCESS_START(page); 818 819 page->modified = true; 820 821 if (page->State() != PAGE_STATE_MODIFIED) 822 vm_page_set_state(page, PAGE_STATE_MODIFIED); 823 824 DEBUG_PAGE_ACCESS_END(page); 825 } 826 827 cache->MarkPageUnbusy(page); 828 } 829 830 // If it is cached only, requeue the page, so the respective queue 831 // roughly remains LRU first sorted. 832 if (page->State() == PAGE_STATE_CACHED 833 || page->State() == PAGE_STATE_MODIFIED) { 834 DEBUG_PAGE_ACCESS_START(page); 835 vm_page_requeue(page, true); 836 DEBUG_PAGE_ACCESS_END(page); 837 } 838 839 if (bytesLeft <= bytesInPage) { 840 // we've read the last page, so we're done! 841 locker.Unlock(); 842 vm_page_unreserve_pages(&reservation); 843 return B_OK; 844 } 845 846 // prepare a potential gap request 847 lastBuffer = buffer + bytesInPage; 848 lastLeft = bytesLeft - bytesInPage; 849 lastOffset = offset + B_PAGE_SIZE; 850 lastPageOffset = 0; 851 } 852 853 if (bytesLeft <= bytesInPage) 854 break; 855 856 buffer += bytesInPage; 857 bytesLeft -= bytesInPage; 858 pageOffset = 0; 859 offset += B_PAGE_SIZE; 860 pagesProcessed++; 861 862 if (buffer - lastBuffer + lastPageOffset >= kMaxChunkSize) { 863 status_t status = satisfy_cache_io(ref, cookie, function, offset, 864 buffer, useBuffer, pageOffset, bytesLeft, reservePages, 865 lastOffset, lastBuffer, lastPageOffset, lastLeft, 866 lastReservedPages, &reservation); 867 if (status != B_OK) 868 return status; 869 } 870 } 871 872 // fill the last remaining bytes of the request (either write or read) 873 874 return function(ref, cookie, lastOffset, lastPageOffset, lastBuffer, 875 lastLeft, useBuffer, &reservation, 0); 876 } 877 878 879 static status_t 880 file_cache_control(const char* subsystem, uint32 function, void* buffer, 881 size_t bufferSize) 882 { 883 switch (function) { 884 case CACHE_CLEAR: 885 // ToDo: clear the cache 886 dprintf("cache_control: clear cache!\n"); 887 return B_OK; 888 889 case CACHE_SET_MODULE: 890 { 891 cache_module_info* module = sCacheModule; 892 893 // unset previous module 894 895 if (sCacheModule != NULL) { 896 sCacheModule = NULL; 897 snooze(100000); // 0.1 secs 898 put_module(module->info.name); 899 } 900 901 // get new module, if any 902 903 if (buffer == NULL) 904 return B_OK; 905 906 char name[B_FILE_NAME_LENGTH]; 907 if (!IS_USER_ADDRESS(buffer) 908 || user_strlcpy(name, (char*)buffer, 909 B_FILE_NAME_LENGTH) < B_OK) 910 return B_BAD_ADDRESS; 911 912 if (strncmp(name, CACHE_MODULES_NAME, strlen(CACHE_MODULES_NAME))) 913 return B_BAD_VALUE; 914 915 dprintf("cache_control: set module %s!\n", name); 916 917 status_t status = get_module(name, (module_info**)&module); 918 if (status == B_OK) 919 sCacheModule = module; 920 921 return status; 922 } 923 } 924 925 return B_BAD_HANDLER; 926 } 927 928 929 // #pragma mark - private kernel API 930 931 932 extern "C" void 933 cache_prefetch_vnode(struct vnode* vnode, off_t offset, size_t size) 934 { 935 if (size == 0) 936 return; 937 938 VMCache* cache; 939 if (vfs_get_vnode_cache(vnode, &cache, false) != B_OK) 940 return; 941 942 file_cache_ref* ref = ((VMVnodeCache*)cache)->FileCacheRef(); 943 off_t fileSize = cache->virtual_end; 944 945 if (offset + size > fileSize) 946 size = fileSize - offset; 947 948 // "offset" and "size" are always aligned to B_PAGE_SIZE, 949 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 950 size = ROUNDUP(size, B_PAGE_SIZE); 951 952 size_t reservePages = size / B_PAGE_SIZE; 953 954 // Don't do anything if we don't have the resources left, or the cache 955 // already contains more than 2/3 of its pages 956 if (offset >= fileSize || vm_page_num_unused_pages() < 2 * reservePages 957 || 3 * cache->page_count > 2 * fileSize / B_PAGE_SIZE) { 958 cache->ReleaseRef(); 959 return; 960 } 961 962 size_t bytesToRead = 0; 963 off_t lastOffset = offset; 964 965 vm_page_reservation reservation; 966 vm_page_reserve_pages(&reservation, reservePages, VM_PRIORITY_USER); 967 968 cache->Lock(); 969 970 while (true) { 971 // check if this page is already in memory 972 if (size > 0) { 973 vm_page* page = cache->LookupPage(offset); 974 975 offset += B_PAGE_SIZE; 976 size -= B_PAGE_SIZE; 977 978 if (page == NULL) { 979 bytesToRead += B_PAGE_SIZE; 980 continue; 981 } 982 } 983 if (bytesToRead != 0) { 984 // read the part before the current page (or the end of the request) 985 PrecacheIO* io = new(std::nothrow) PrecacheIO(ref, lastOffset, 986 bytesToRead); 987 if (io == NULL || io->Prepare(&reservation) != B_OK) { 988 delete io; 989 break; 990 } 991 992 // we must not have the cache locked during I/O 993 cache->Unlock(); 994 io->ReadAsync(); 995 cache->Lock(); 996 997 bytesToRead = 0; 998 } 999 1000 if (size == 0) { 1001 // we have reached the end of the request 1002 break; 1003 } 1004 1005 lastOffset = offset; 1006 } 1007 1008 cache->ReleaseRefAndUnlock(); 1009 vm_page_unreserve_pages(&reservation); 1010 } 1011 1012 1013 extern "C" void 1014 cache_prefetch(dev_t mountID, ino_t vnodeID, off_t offset, size_t size) 1015 { 1016 // ToDo: schedule prefetch 1017 1018 TRACE(("cache_prefetch(vnode %ld:%Ld)\n", mountID, vnodeID)); 1019 1020 // get the vnode for the object, this also grabs a ref to it 1021 struct vnode* vnode; 1022 if (vfs_get_vnode(mountID, vnodeID, true, &vnode) != B_OK) 1023 return; 1024 1025 cache_prefetch_vnode(vnode, offset, size); 1026 vfs_put_vnode(vnode); 1027 } 1028 1029 1030 extern "C" void 1031 cache_node_opened(struct vnode* vnode, int32 fdType, VMCache* cache, 1032 dev_t mountID, ino_t parentID, ino_t vnodeID, const char* name) 1033 { 1034 if (sCacheModule == NULL || sCacheModule->node_opened == NULL) 1035 return; 1036 1037 off_t size = -1; 1038 if (cache != NULL) { 1039 file_cache_ref* ref = ((VMVnodeCache*)cache)->FileCacheRef(); 1040 if (ref != NULL) 1041 size = cache->virtual_end; 1042 } 1043 1044 sCacheModule->node_opened(vnode, fdType, mountID, parentID, vnodeID, name, 1045 size); 1046 } 1047 1048 1049 extern "C" void 1050 cache_node_closed(struct vnode* vnode, int32 fdType, VMCache* cache, 1051 dev_t mountID, ino_t vnodeID) 1052 { 1053 if (sCacheModule == NULL || sCacheModule->node_closed == NULL) 1054 return; 1055 1056 int32 accessType = 0; 1057 if (cache != NULL) { 1058 // ToDo: set accessType 1059 } 1060 1061 sCacheModule->node_closed(vnode, fdType, mountID, vnodeID, accessType); 1062 } 1063 1064 1065 extern "C" void 1066 cache_node_launched(size_t argCount, char* const* args) 1067 { 1068 if (sCacheModule == NULL || sCacheModule->node_launched == NULL) 1069 return; 1070 1071 sCacheModule->node_launched(argCount, args); 1072 } 1073 1074 1075 extern "C" status_t 1076 file_cache_init_post_boot_device(void) 1077 { 1078 // ToDo: get cache module out of driver settings 1079 1080 if (get_module("file_cache/launch_speedup/v1", 1081 (module_info**)&sCacheModule) == B_OK) { 1082 dprintf("** opened launch speedup: %Ld\n", system_time()); 1083 } 1084 return B_OK; 1085 } 1086 1087 1088 extern "C" status_t 1089 file_cache_init(void) 1090 { 1091 // allocate a clean page we can use for writing zeroes 1092 vm_page_reservation reservation; 1093 vm_page_reserve_pages(&reservation, 1, VM_PRIORITY_SYSTEM); 1094 vm_page* page = vm_page_allocate_page(&reservation, 1095 PAGE_STATE_WIRED | VM_PAGE_ALLOC_CLEAR); 1096 vm_page_unreserve_pages(&reservation); 1097 1098 sZeroPage = (addr_t)page->physical_page_number * B_PAGE_SIZE; 1099 1100 for (uint32 i = 0; i < kZeroVecCount; i++) { 1101 sZeroVecs[i].iov_base = (void*)sZeroPage; 1102 sZeroVecs[i].iov_len = B_PAGE_SIZE; 1103 } 1104 1105 register_generic_syscall(CACHE_SYSCALLS, file_cache_control, 1, 0); 1106 return B_OK; 1107 } 1108 1109 1110 // #pragma mark - public FS API 1111 1112 1113 extern "C" void* 1114 file_cache_create(dev_t mountID, ino_t vnodeID, off_t size) 1115 { 1116 TRACE(("file_cache_create(mountID = %ld, vnodeID = %Ld, size = %Ld)\n", 1117 mountID, vnodeID, size)); 1118 1119 file_cache_ref* ref = new file_cache_ref; 1120 if (ref == NULL) 1121 return NULL; 1122 1123 memset(ref->last_access, 0, sizeof(ref->last_access)); 1124 ref->last_access_index = 0; 1125 ref->disabled_count = 0; 1126 1127 // TODO: delay VMCache creation until data is 1128 // requested/written for the first time? Listing lots of 1129 // files in Tracker (and elsewhere) could be slowed down. 1130 // Since the file_cache_ref itself doesn't have a lock, 1131 // we would need to "rent" one during construction, possibly 1132 // the vnode lock, maybe a dedicated one. 1133 // As there shouldn't be too much contention, we could also 1134 // use atomic_test_and_set(), and free the resources again 1135 // when that fails... 1136 1137 // Get the vnode for the object 1138 // (note, this does not grab a reference to the node) 1139 if (vfs_lookup_vnode(mountID, vnodeID, &ref->vnode) != B_OK) 1140 goto err1; 1141 1142 // Gets (usually creates) the cache for the node 1143 if (vfs_get_vnode_cache(ref->vnode, &ref->cache, true) != B_OK) 1144 goto err1; 1145 1146 ref->cache->virtual_end = size; 1147 ((VMVnodeCache*)ref->cache)->SetFileCacheRef(ref); 1148 return ref; 1149 1150 err1: 1151 delete ref; 1152 return NULL; 1153 } 1154 1155 1156 extern "C" void 1157 file_cache_delete(void* _cacheRef) 1158 { 1159 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1160 1161 if (ref == NULL) 1162 return; 1163 1164 TRACE(("file_cache_delete(ref = %p)\n", ref)); 1165 1166 ref->cache->ReleaseRef(); 1167 delete ref; 1168 } 1169 1170 1171 extern "C" void 1172 file_cache_enable(void* _cacheRef) 1173 { 1174 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1175 1176 AutoLocker<VMCache> _(ref->cache); 1177 1178 if (ref->disabled_count == 0) { 1179 panic("Unbalanced file_cache_enable()!"); 1180 return; 1181 } 1182 1183 ref->disabled_count--; 1184 } 1185 1186 1187 extern "C" status_t 1188 file_cache_disable(void* _cacheRef) 1189 { 1190 // TODO: This function only removes all pages from the cache and prevents 1191 // that the file cache functions add any new ones until re-enabled. The 1192 // VM (on page fault) can still add pages, if the file is mmap()ed. We 1193 // should mark the cache to prevent shared mappings of the file and fix 1194 // the page fault code to deal correctly with private mappings (i.e. only 1195 // insert pages in consumer caches). 1196 1197 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1198 1199 AutoLocker<VMCache> _(ref->cache); 1200 1201 // If already disabled, there's nothing to do for us. 1202 if (ref->disabled_count > 0) { 1203 ref->disabled_count++; 1204 return B_OK; 1205 } 1206 1207 // The file cache is not yet disabled. We need to evict all cached pages. 1208 status_t error = ref->cache->FlushAndRemoveAllPages(); 1209 if (error != B_OK) 1210 return error; 1211 1212 ref->disabled_count++; 1213 return B_OK; 1214 } 1215 1216 1217 extern "C" bool 1218 file_cache_is_enabled(void* _cacheRef) 1219 { 1220 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1221 AutoLocker<VMCache> _(ref->cache); 1222 1223 return ref->disabled_count == 0; 1224 } 1225 1226 1227 extern "C" status_t 1228 file_cache_set_size(void* _cacheRef, off_t newSize) 1229 { 1230 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1231 1232 TRACE(("file_cache_set_size(ref = %p, size = %Ld)\n", ref, newSize)); 1233 1234 if (ref == NULL) 1235 return B_OK; 1236 1237 VMCache* cache = ref->cache; 1238 AutoLocker<VMCache> _(cache); 1239 1240 off_t oldSize = cache->virtual_end; 1241 status_t status = cache->Resize(newSize, VM_PRIORITY_USER); 1242 // Note, the priority doesn't really matter, since this cache doesn't 1243 // reserve any memory. 1244 if (status == B_OK && newSize < oldSize) { 1245 // We may have a new partial page at the end of the cache that must be 1246 // cleared. 1247 uint32 partialBytes = newSize % B_PAGE_SIZE; 1248 if (partialBytes != 0) { 1249 vm_page* page = cache->LookupPage(newSize - partialBytes); 1250 if (page != NULL) { 1251 vm_memset_physical(page->physical_page_number * B_PAGE_SIZE 1252 + partialBytes, 0, B_PAGE_SIZE - partialBytes); 1253 } 1254 } 1255 } 1256 1257 return status; 1258 } 1259 1260 1261 extern "C" status_t 1262 file_cache_sync(void* _cacheRef) 1263 { 1264 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1265 if (ref == NULL) 1266 return B_BAD_VALUE; 1267 1268 return ref->cache->WriteModified(); 1269 } 1270 1271 1272 extern "C" status_t 1273 file_cache_read(void* _cacheRef, void* cookie, off_t offset, void* buffer, 1274 size_t* _size) 1275 { 1276 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1277 1278 TRACE(("file_cache_read(ref = %p, offset = %Ld, buffer = %p, size = %lu)\n", 1279 ref, offset, buffer, *_size)); 1280 1281 if (ref->disabled_count > 0) { 1282 // Caching is disabled -- read directly from the file. 1283 iovec vec; 1284 vec.iov_base = buffer; 1285 vec.iov_len = *_size; 1286 return vfs_read_pages(ref->vnode, cookie, offset, &vec, 1, 0, _size); 1287 } 1288 1289 return cache_io(ref, cookie, offset, (addr_t)buffer, _size, false); 1290 } 1291 1292 1293 extern "C" status_t 1294 file_cache_write(void* _cacheRef, void* cookie, off_t offset, 1295 const void* buffer, size_t* _size) 1296 { 1297 file_cache_ref* ref = (file_cache_ref*)_cacheRef; 1298 1299 if (ref->disabled_count > 0) { 1300 // Caching is disabled -- write directly to the file. 1301 1302 if (buffer != NULL) { 1303 iovec vec; 1304 vec.iov_base = (void*)buffer; 1305 vec.iov_len = *_size; 1306 return vfs_write_pages(ref->vnode, cookie, offset, &vec, 1, 0, 1307 _size); 1308 } 1309 1310 // NULL buffer -- use a dummy buffer to write zeroes 1311 size_t size = *_size; 1312 while (size > 0) { 1313 size_t toWrite = min_c(size, kZeroVecSize); 1314 size_t written = toWrite; 1315 status_t error = vfs_write_pages(ref->vnode, cookie, offset, 1316 sZeroVecs, kZeroVecCount, B_PHYSICAL_IO_REQUEST, &written); 1317 if (error != B_OK) 1318 return error; 1319 if (written == 0) 1320 break; 1321 1322 offset += written; 1323 size -= written; 1324 } 1325 1326 *_size -= size; 1327 return B_OK; 1328 } 1329 1330 status_t status = cache_io(ref, cookie, offset, 1331 (addr_t)const_cast<void*>(buffer), _size, true); 1332 1333 TRACE(("file_cache_write(ref = %p, offset = %Ld, buffer = %p, size = %lu)" 1334 " = %ld\n", ref, offset, buffer, *_size, status)); 1335 1336 return status; 1337 } 1338