1 /* 2 * Copyright 2004-2007, Haiku Inc. All rights reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Axel Dörfler <axeld@pinc-software.de> 7 * Ingo Weinhold <bonefish@cs.tu-berlin.de> 8 */ 9 10 #include "fssh_fs_cache.h" 11 12 #include <new> 13 14 #include <stdlib.h> 15 16 #include "DoublyLinkedList.h" 17 #include "fssh_kernel_export.h" 18 #include "fssh_stdio.h" 19 #include "fssh_string.h" 20 #include "fssh_uio.h" 21 #include "fssh_unistd.h" 22 #include "hash.h" 23 #include "lock.h" 24 #include "vfs.h" 25 26 27 #undef TRACE 28 //#define TRACE_FILE_CACHE 29 #ifdef TRACE_FILE_CACHE 30 # define TRACE(x) fssh_dprintf x 31 #else 32 # define TRACE(x) ; 33 #endif 34 35 using std::nothrow; 36 37 38 // This is a hacked version of the kernel file cache implementation. The main 39 // part of the implementation didn't change that much -- some code not needed 40 // in userland has been removed, file_cache_ref, vm_cache_ref and vm_cache 41 // have been unified, and the complete interface to the VM (vm_*()) and the 42 // VFS (vfs_*()) has been re-implemented to suit our needs. 43 // 44 // The PagePool class is a data structure used for managing the pages (vm_page) 45 // allocated and assigned to caches (file_cache_ref). It has a list for free 46 // pages, i.e. those that are not assigned to any cache and can be reused at 47 // once. A second list contains those pages that belong to a cache, but are 48 // not in use by any of the functions. These pages have a reference count of 49 // 0. They will be stolen from the owning cache when a usable page is needed, 50 // there are no free pages anymore, and the limit of pages that shall be used 51 // at maximum has already been reached. 52 // 53 // The only purpose of the page reference count (vm_page::ref_count) is to 54 // indicate whether the page is in use (i.e. known to one or more of the cache 55 // functions currently being executed). vm_page_get_page(), 56 // vm_page_allocate_page(), and vm_cache_lookup_page acquire a reference to 57 // the page, vm_page_put_page() releases a reference. 58 59 // vm_page::state indicates in what state 60 // a page currently is. PAGE_STATE_FREE is only encountered for pages not 61 // belonging to a cache (being in page pool's free pages list, or just having 62 // been removed or not yet added). PAGE_STATE_ACTIVE/MODIFIED indicate that 63 // the page contains valid file data, in the latter case not yet written to 64 // disk. PAGE_STATE_BUSY means that the page is currently being manipulated by 65 // an operation, usually meaning that page data are being read from/written to 66 // disk. The required behavior when encountering a busy page, is to put the 67 // page, release the page pool and cache locks, wait a short time, and 68 // retry again later. 69 // 70 // Changing the page state requires having a reference to the page, 71 // holding the lock of the owning cache (if any) and the lock of the page pool 72 // (in case the page is not newly allocated). 73 // 74 // A page is in up to three lists. The free or unused list in the page pool 75 // (guarded by the page pool lock), the pages list of the cache the page 76 // belongs to, and the modified pages list of the cache (both cache lists 77 // are guarded by both the page pool and the cache lock). The modified pages 78 // list only contains PAGE_STATE_MODIFIED or PAGE_STATE_BUSY pages. 79 80 81 // maximum number of iovecs per request 82 #define MAX_IO_VECS 64 // 256 kB 83 #define MAX_FILE_IO_VECS 32 84 #define MAX_TEMP_IO_VECS 8 85 86 #define CACHED_FILE_EXTENTS 2 87 // must be smaller than MAX_FILE_IO_VECS 88 // ToDo: find out how much of these are typically used 89 90 #define user_memcpy(a, b, c) fssh_memcpy(a, b, c) 91 92 #define PAGE_ALIGN(x) (((x) + (FSSH_B_PAGE_SIZE - 1)) & ~(FSSH_B_PAGE_SIZE - 1)) 93 #define ASSERT(x) 94 95 namespace FSShell { 96 97 enum { 98 PAGE_STATE_ACTIVE = 0, 99 PAGE_STATE_BUSY, 100 PAGE_STATE_MODIFIED, 101 PAGE_STATE_FREE 102 }; 103 104 enum { 105 PHYSICAL_PAGE_NO_WAIT = 0, 106 PHYSICAL_PAGE_CAN_WAIT, 107 }; 108 109 struct vm_page; 110 struct file_cache_ref; 111 112 typedef DoublyLinkedListLink<vm_page> page_link; 113 114 // vm page 115 struct vm_page { 116 vm_page* next; 117 page_link link_cache; 118 page_link link_cache_modified; 119 page_link link_pool; 120 file_cache_ref* cache; 121 fssh_off_t offset; 122 void* data; 123 uint8_t state; 124 uint32_t ref_count; 125 126 vm_page() 127 : next(NULL), 128 cache(NULL), 129 offset(0), 130 data(malloc(FSSH_B_PAGE_SIZE)), 131 state(PAGE_STATE_FREE), 132 ref_count(1) 133 { 134 } 135 136 ~vm_page() 137 { 138 free(data); 139 } 140 141 fssh_addr_t Address() const { return (fssh_addr_t)data; } 142 143 static int Compare(void *_cacheEntry, const void *_key); 144 static uint32_t Hash(void *_cacheEntry, const void *_key, uint32_t range); 145 }; 146 147 struct file_extent { 148 fssh_off_t offset; 149 fssh_file_io_vec disk; 150 }; 151 152 struct file_map { 153 file_map(); 154 ~file_map(); 155 156 file_extent *operator[](uint32_t index); 157 file_extent *ExtentAt(uint32_t index); 158 fssh_status_t Add(fssh_file_io_vec *vecs, fssh_size_t vecCount, fssh_off_t &lastOffset); 159 void Free(); 160 161 union { 162 file_extent direct[CACHED_FILE_EXTENTS]; 163 file_extent *array; 164 }; 165 fssh_size_t count; 166 }; 167 168 169 static vm_page *vm_page_allocate_page(int state); 170 static void vm_page_get_page(vm_page* page); 171 static fssh_status_t vm_page_put_page(vm_page* page); 172 static fssh_status_t vm_page_set_state(vm_page *page, int state); 173 174 static void vm_cache_insert_page(file_cache_ref *cacheRef, vm_page *page, 175 fssh_off_t offset); 176 static void vm_cache_remove_page(file_cache_ref *cacheRef, vm_page *page); 177 static vm_page *vm_cache_lookup_page(file_cache_ref *cacheRef, fssh_off_t page); 178 static fssh_status_t vm_cache_resize(file_cache_ref *cacheRef, fssh_off_t newSize); 179 static fssh_status_t vm_cache_write_modified(file_cache_ref *ref, bool fsReenter); 180 181 static fssh_status_t vfs_read_pages(int fd, fssh_off_t pos, const fssh_iovec *vecs, 182 fssh_size_t count, fssh_size_t *_numBytes, bool fsReenter); 183 static fssh_status_t vfs_write_pages(int fd, fssh_off_t pos, const fssh_iovec *vecs, 184 fssh_size_t count, fssh_size_t *_numBytes, bool fsReenter); 185 186 static fssh_status_t pages_io(file_cache_ref *ref, fssh_off_t offset, const fssh_iovec *vecs, 187 fssh_size_t count, fssh_size_t *_numBytes, bool doWrite); 188 189 190 typedef DoublyLinkedList<vm_page, 191 DoublyLinkedListMemberGetLink<vm_page, 192 &vm_page::link_cache_modified> > cache_modified_page_list; 193 194 typedef DoublyLinkedList<vm_page, 195 DoublyLinkedListMemberGetLink<vm_page, 196 &vm_page::link_cache> > cache_page_list; 197 198 struct file_cache_ref { 199 mutex lock; 200 fssh_mount_id mountID; 201 fssh_vnode_id nodeID; 202 void* node; 203 int deviceFD; 204 fssh_off_t virtual_size; 205 206 cache_page_list pages; 207 cache_modified_page_list modifiedPages; 208 209 file_map map; 210 }; 211 212 struct page_hash_key { 213 page_hash_key() {} 214 page_hash_key(int fd, fssh_vnode_id id, fssh_off_t offset) 215 : deviceFD(fd), 216 nodeID(id), 217 offset(offset) 218 { 219 } 220 221 int deviceFD; 222 fssh_vnode_id nodeID; 223 fssh_off_t offset; 224 }; 225 226 227 typedef DoublyLinkedList<vm_page, 228 DoublyLinkedListMemberGetLink<vm_page, 229 &vm_page::link_pool> > pool_page_list; 230 231 struct PagePool { 232 233 PagePool() 234 : pageHash(NULL), 235 unusedPages(), 236 freePages(), 237 allocatedPages(0) 238 { 239 } 240 241 ~PagePool() 242 { 243 } 244 245 fssh_status_t Init() 246 { 247 fssh_status_t error = recursive_lock_init(&lock, "page pool"); 248 if (error != FSSH_B_OK) { 249 fssh_panic("PagePool: Failed to init lock\n"); 250 return error; 251 } 252 253 pageHash = hash_init(256, 0, &vm_page::Compare, &vm_page::Hash); 254 if (pageHash == NULL) { 255 fssh_panic("PagePool: Failed to create page hash\n"); 256 return FSSH_B_NO_MEMORY; 257 } 258 259 return FSSH_B_OK; 260 } 261 262 bool Lock() { return (recursive_lock_lock(&lock) == FSSH_B_OK); } 263 void Unlock() { recursive_lock_unlock(&lock); } 264 265 recursive_lock lock; 266 hash_table* pageHash; 267 pool_page_list unusedPages; 268 pool_page_list freePages; 269 uint32_t allocatedPages; 270 }; 271 272 struct PagePutter { 273 PagePutter(vm_page* page) 274 : fPage(page) 275 { 276 } 277 278 ~PagePutter() 279 { 280 if (fPage) 281 vm_page_put_page(fPage); 282 } 283 284 private: 285 vm_page* fPage; 286 }; 287 288 static PagePool sPagePool; 289 static const uint32_t kMaxAllocatedPages = 1024; 290 291 292 int 293 vm_page::Compare(void *_cacheEntry, const void *_key) 294 { 295 vm_page *page = (vm_page*)_cacheEntry; 296 const page_hash_key *key = (const page_hash_key *)_key; 297 298 // device FD 299 if (page->cache->deviceFD != key->deviceFD) 300 return page->cache->deviceFD - key->deviceFD; 301 302 // node ID 303 if (page->cache->nodeID < key->nodeID) 304 return -1; 305 if (page->cache->nodeID > key->nodeID) 306 return 1; 307 308 // offset 309 if (page->offset < key->offset) 310 return -1; 311 if (page->offset > key->offset) 312 return 1; 313 314 return 0; 315 } 316 317 uint32_t 318 vm_page::Hash(void *_cacheEntry, const void *_key, uint32_t range) 319 { 320 vm_page *page = (vm_page*)_cacheEntry; 321 const page_hash_key *key = (const page_hash_key *)_key; 322 323 int fd = (page ? page->cache->deviceFD : key->deviceFD); 324 fssh_vnode_id id = (page ? page->cache->nodeID : key->nodeID); 325 fssh_off_t offset = (page ? page->offset : key->offset); 326 327 uint32_t value = fd; 328 value = value * 17 + id; 329 value = value * 17 + offset; 330 331 return value % range; 332 } 333 334 335 vm_page * 336 vm_page_allocate_page(int state) 337 { 338 AutoLocker<PagePool> poolLocker(sPagePool); 339 340 // is a queued free page available? 341 vm_page* page = sPagePool.freePages.RemoveHead(); 342 if (page) { 343 page->ref_count++; 344 return page; 345 } 346 347 // no free page 348 349 #if 0 350 // TODO: Nice idea in principle, but causes a serious locking problem. 351 // vm_page_allocate_page() is always invoked with some cached locked at the 352 // same time. Thus locking a cache here to steal a page can cause a deadlock. 353 // If the limit for allocated pages has been reached, we try to steal an 354 // unused page. 355 if (sPagePool.allocatedPages >= kMaxAllocatedPages 356 && !sPagePool.unusedPages.IsEmpty()) { 357 // we grab the first non-busy page 358 for (pool_page_list::Iterator it(&sPagePool.unusedPages); 359 vm_page* currentPage = it.Next();) { 360 if (currentPage->state != PAGE_STATE_BUSY) { 361 it.Remove(); 362 page = currentPage; 363 page->ref_count++; 364 break; 365 } 366 } 367 368 // If we've found a suitable page, we need to mark it busy, write it 369 // if it was modified, and finally remove it from its cache. 370 if (page != NULL) { 371 bool modified = (page->state == PAGE_STATE_MODIFIED); 372 373 // mark the page busy 374 page->state = PAGE_STATE_BUSY; 375 376 // We don't need the pool lock anymore. 377 poolLocker.Unlock(); 378 379 file_cache_ref* cache = page->cache; 380 381 // If the page is modified, we write it to the disk. 382 if (modified) { 383 // find out, how much to write, and remove the page from the 384 // cache's modified pages list 385 MutexLocker cacheLocker(cache->lock); 386 fssh_size_t bytes = fssh_min_c(FSSH_B_PAGE_SIZE, 387 cache->virtual_size - page->offset); 388 cache->modifiedPages.Remove(page); 389 cacheLocker.Unlock(); 390 391 // if we need to write anything, do it now 392 if (bytes > 0) { 393 fssh_iovec vecs[1]; 394 vecs[0].iov_base = page->data; 395 vecs[0].iov_len = bytes; 396 fssh_status_t error = pages_io(cache, page->offset, vecs, 1, 397 &bytes, true); 398 if (error != FSSH_B_OK) { 399 // failed to write the page: bad, but we can't do 400 // much about it 401 fssh_dprintf("vm_page_allocate_page(): Failed to write " 402 "page %p (cache %p, offset: %lld).\n", page, 403 cache, page->offset); 404 } 405 } 406 } 407 408 // remove the page from the cache 409 MutexLocker cacheLocker(cache->lock); 410 vm_cache_remove_page(cache, page); 411 412 // now it's ours 413 page->state = PAGE_STATE_FREE; 414 415 return page; 416 } 417 } 418 #endif // 0 419 420 // no page yet -- allocate a new one 421 422 page = new(nothrow) vm_page; 423 if (!page || !page->data) { 424 delete page; 425 return NULL; 426 } 427 428 sPagePool.allocatedPages++; 429 430 return page; 431 } 432 433 434 static void 435 vm_page_get_page(vm_page* page) 436 { 437 if (page) { 438 AutoLocker<PagePool> _(sPagePool); 439 440 // increase ref count 441 page->ref_count++; 442 443 // if the page was unused before, remove it from the unused pages list 444 if (page->ref_count == 1) 445 sPagePool.unusedPages.Remove(page); 446 } 447 } 448 449 450 static fssh_status_t 451 vm_page_put_page(vm_page* page) 452 { 453 if (!page) 454 return FSSH_B_BAD_VALUE; 455 456 AutoLocker<PagePool> _(sPagePool); 457 458 if (page->ref_count <= 0) { 459 fssh_panic("vm_page_put_page(): page %p already unreferenced!\n", page); 460 return FSSH_B_BAD_VALUE; 461 } 462 463 // decrease ref count 464 page->ref_count--; 465 466 if (page->ref_count > 0) 467 return FSSH_B_OK; 468 469 // the page is unreference now: add it to the unused or free pages list 470 471 if (page->state == PAGE_STATE_FREE) { 472 // page is free 473 // if we've maxed out the allowed allocated page, free this one, 474 // otherwise add it to the free list 475 if (sPagePool.allocatedPages > kMaxAllocatedPages) { 476 delete page; 477 sPagePool.allocatedPages--; 478 } else 479 sPagePool.freePages.Add(page); 480 } else { 481 // page is is not free; add to unused pages list 482 sPagePool.unusedPages.Add(page); 483 } 484 485 return FSSH_B_OK; 486 } 487 488 489 fssh_status_t 490 vm_page_set_state(vm_page *page, int state) 491 { 492 AutoLocker<PagePool> _(sPagePool); 493 494 if (page->ref_count <= 0) { 495 fssh_panic("vm_page_set_state(): page %p is already unreferenced!\n", 496 page); 497 return FSSH_B_BAD_VALUE; 498 } 499 500 if (state == page->state) 501 return FSSH_B_OK; 502 503 // If it was modified before, remove the page from the cache's modified 504 // pages list. 505 if (page->state == PAGE_STATE_MODIFIED && page->cache) 506 page->cache->modifiedPages.Remove(page); 507 508 switch (state) { 509 case PAGE_STATE_ACTIVE: 510 case PAGE_STATE_BUSY: 511 case PAGE_STATE_FREE: 512 page->state = state; 513 break; 514 515 case PAGE_STATE_MODIFIED: 516 { 517 page->state = state; 518 519 // add page to the modified list of the cache 520 if (!page->cache) { 521 fssh_panic("vm_page_set_state(): setting page state of page %p " 522 "to PAGE_STATE_MODIFIED, but page is not in a cache\n", 523 page); 524 return FSSH_B_BAD_VALUE; 525 } 526 527 page->cache->modifiedPages.Add(page); 528 529 break; 530 } 531 532 default: 533 fssh_panic("vm_page_set_state(): invalid page state: %d\n", state); 534 return FSSH_B_BAD_VALUE; 535 } 536 537 return FSSH_B_OK; 538 } 539 540 541 // cache must be locked 542 // 543 void 544 vm_cache_insert_page(file_cache_ref *cache, vm_page *page, fssh_off_t offset) 545 { 546 AutoLocker<PagePool> _(sPagePool); 547 548 if (page->cache != NULL) { 549 fssh_panic("vm_cache_insert_page(%p, %p): page already in cache %p\n", 550 cache, page, page->cache); 551 return; 552 } 553 554 page->cache = cache; 555 page->offset = offset; 556 557 // insert page into hash 558 fssh_status_t error = hash_insert(sPagePool.pageHash, page); 559 if (error != FSSH_B_OK) { 560 fssh_panic("vm_cache_insert_page(): Failed to insert page %p into hash!\n", 561 page); 562 page->cache = NULL; 563 page->offset = 0; 564 return; 565 } 566 567 // add page to cache page list 568 cache->pages.Add(page); 569 } 570 571 572 // cache must be locked 573 // 574 void 575 vm_cache_remove_page(file_cache_ref *cache, vm_page *page) 576 { 577 if (cache != page->cache) { 578 fssh_panic("vm_cache_remove_page(%p, %p): page is in cache %p\n", 579 cache, page, page->cache); 580 return; 581 } 582 583 AutoLocker<PagePool> _(sPagePool); 584 585 if (page->state == PAGE_STATE_MODIFIED) 586 cache->modifiedPages.Remove(page); 587 588 cache->pages.Remove(page); 589 590 hash_remove(sPagePool.pageHash, page); 591 592 page->cache = NULL; 593 page->offset = 0; 594 } 595 596 597 vm_page * 598 vm_cache_lookup_page(file_cache_ref *cache, fssh_off_t offset) 599 { 600 if (!cache) 601 return NULL; 602 603 AutoLocker<PagePool> _(sPagePool); 604 605 page_hash_key key(cache->deviceFD, cache->nodeID, offset); 606 607 vm_page* page = (vm_page*)hash_lookup(sPagePool.pageHash, &key); 608 609 if (page) 610 vm_page_get_page(page); 611 612 return page; 613 } 614 615 616 // cache must be locked 617 // 618 fssh_status_t 619 vm_cache_resize(file_cache_ref *cache, fssh_off_t newSize) 620 { 621 fssh_off_t oldAlignedSize = PAGE_ALIGN(cache->virtual_size); 622 fssh_off_t newAlignedSize = PAGE_ALIGN(newSize); 623 624 cache->virtual_size = newSize; 625 626 // if the aligned cache size increased or remained the same, we're done 627 if (newAlignedSize >= oldAlignedSize) 628 return FSSH_B_OK; 629 630 // the file shrinks, so we need to get rid of excess pages 631 632 // Hold the page pool lock virtually all the time from now on. 633 AutoLocker<PagePool> poolLocker(sPagePool); 634 635 // For sake of efficiency we sort the cache's list of pages so that all 636 // pages that need to be removed are at the beginning of the list. 637 vm_page* page = cache->pages.Head(); 638 if (newAlignedSize > 0) { 639 while (page) { 640 vm_page* nextPage = cache->pages.GetNext(page); 641 642 if (page->offset >= newAlignedSize) { 643 // move to the beginning of the list 644 cache->pages.Remove(page); 645 cache->pages.Add(page, false); 646 } 647 648 page = nextPage; 649 } 650 } 651 652 // now we remove and free the excess pages one by one 653 while (true) { 654 // get the first page in the list to remove 655 // (since we sorted the list, this is usually very cheap) 656 for (cache_page_list::Iterator it(&cache->pages); (page = it.Next());) { 657 if (page->offset >= newAlignedSize) 658 break; 659 } 660 661 // no more pages? -- then we're done 662 if (!page) 663 return FSSH_B_OK; 664 665 if (page->state == PAGE_STATE_BUSY) { 666 // the page is busy -- wait a while and try again 667 poolLocker.Unlock(); 668 mutex_unlock(&cache->lock); 669 fssh_snooze(20000); 670 mutex_lock(&cache->lock); 671 poolLocker.Lock(); 672 } else { 673 // the page isn't busy -- get rid of it 674 vm_page_get_page(page); 675 vm_cache_remove_page(cache, page); 676 vm_page_set_state(page, PAGE_STATE_FREE); 677 vm_page_put_page(page); 678 } 679 } 680 681 return FSSH_B_OK; 682 } 683 684 685 fssh_status_t 686 vm_cache_write_modified(file_cache_ref *cache, bool fsReenter) 687 { 688 // TODO: Write more than one page at a time. To avoid deadlocks, when a 689 // busy page is encountered the previously collected pages need to be 690 // written. Otherwise as many pages as our on-stack array can contain 691 // can be processed at once. 692 MutexLocker locker(cache->lock); 693 while (true) { 694 // get the next modified page and mark it busy 695 vm_page* page = NULL; 696 697 while (true) { 698 // get the first modified page 699 AutoLocker<PagePool> poolLocker(sPagePool); 700 page = cache->modifiedPages.Head(); 701 702 if (!page) 703 return FSSH_B_OK; 704 705 // if not marked busy, remove it and mark it busy 706 if (page->state != PAGE_STATE_BUSY) { 707 cache->modifiedPages.Remove(page); 708 vm_page_get_page(page); 709 page->state = PAGE_STATE_BUSY; 710 break; 711 } 712 713 // page is busy -- wait a while 714 vm_page_put_page(page); 715 poolLocker.Unlock(); 716 locker.Unlock(); 717 fssh_snooze(20000); 718 locker.Lock(); 719 } 720 721 locker.Unlock(); 722 723 // write the page 724 fssh_size_t bytes = fssh_min_c(FSSH_B_PAGE_SIZE, cache->virtual_size - page->offset); 725 fssh_iovec vecs[1]; 726 vecs[0].iov_base = page->data; 727 vecs[0].iov_len = bytes; 728 fssh_status_t error = pages_io(cache, page->offset, vecs, 1, &bytes, true); 729 if (error != FSSH_B_OK) 730 return error; 731 732 locker.Lock(); 733 734 vm_page_set_state(page, PAGE_STATE_ACTIVE); 735 vm_page_put_page(page); 736 } 737 738 return FSSH_B_OK; 739 } 740 741 742 fssh_status_t 743 vfs_read_pages(int fd, fssh_off_t pos, const fssh_iovec *vecs, fssh_size_t count, 744 fssh_size_t *_numBytes, bool fsReenter) 745 { 746 // check how much the iovecs allow us to read 747 fssh_size_t toRead = 0; 748 for (fssh_size_t i = 0; i < count; i++) 749 toRead += vecs[i].iov_len; 750 751 fssh_iovec* newVecs = NULL; 752 if (*_numBytes < toRead) { 753 // We're supposed to read less than specified by the vecs. Since 754 // readv_pos() doesn't support this, we need to clone the vecs. 755 newVecs = new(nothrow) fssh_iovec[count]; 756 if (!newVecs) 757 return FSSH_B_NO_MEMORY; 758 759 fssh_size_t newCount = 0; 760 for (fssh_size_t i = 0; i < count && toRead > 0; i++) { 761 fssh_size_t vecLen = fssh_min_c(vecs[i].iov_len, toRead); 762 newVecs[i].iov_base = vecs[i].iov_base; 763 newVecs[i].iov_len = vecLen; 764 toRead -= vecLen; 765 newCount++; 766 } 767 768 vecs = newVecs; 769 count = newCount; 770 } 771 772 fssh_ssize_t bytesRead = fssh_readv_pos(fd, pos, vecs, count); 773 delete[] newVecs; 774 if (bytesRead < 0) 775 return bytesRead; 776 777 *_numBytes = bytesRead; 778 return FSSH_B_OK; 779 } 780 781 782 fssh_status_t 783 vfs_write_pages(int fd, fssh_off_t pos, const fssh_iovec *vecs, fssh_size_t count, 784 fssh_size_t *_numBytes, bool fsReenter) 785 { 786 // check how much the iovecs allow us to write 787 fssh_size_t toWrite = 0; 788 for (fssh_size_t i = 0; i < count; i++) 789 toWrite += vecs[i].iov_len; 790 791 fssh_iovec* newVecs = NULL; 792 if (*_numBytes < toWrite) { 793 // We're supposed to write less than specified by the vecs. Since 794 // writev_pos() doesn't support this, we need to clone the vecs. 795 newVecs = new(nothrow) fssh_iovec[count]; 796 if (!newVecs) 797 return FSSH_B_NO_MEMORY; 798 799 fssh_size_t newCount = 0; 800 for (fssh_size_t i = 0; i < count && toWrite > 0; i++) { 801 fssh_size_t vecLen = fssh_min_c(vecs[i].iov_len, toWrite); 802 newVecs[i].iov_base = vecs[i].iov_base; 803 newVecs[i].iov_len = vecLen; 804 toWrite -= vecLen; 805 newCount++; 806 } 807 808 vecs = newVecs; 809 count = newCount; 810 } 811 812 fssh_ssize_t bytesWritten = fssh_writev_pos(fd, pos, vecs, count); 813 delete[] newVecs; 814 if (bytesWritten < 0) 815 return bytesWritten; 816 817 *_numBytes = bytesWritten; 818 return FSSH_B_OK; 819 } 820 821 822 fssh_status_t 823 file_cache_init() 824 { 825 fssh_status_t error = sPagePool.Init(); 826 if (error != FSSH_B_OK) 827 return error; 828 829 return FSSH_B_OK; 830 } 831 832 833 // #pragma mark - 834 835 836 file_map::file_map() 837 { 838 array = NULL; 839 count = 0; 840 } 841 842 843 file_map::~file_map() 844 { 845 Free(); 846 } 847 848 849 file_extent * 850 file_map::operator[](uint32_t index) 851 { 852 return ExtentAt(index); 853 } 854 855 856 file_extent * 857 file_map::ExtentAt(uint32_t index) 858 { 859 if (index >= count) 860 return NULL; 861 862 if (count > CACHED_FILE_EXTENTS) 863 return &array[index]; 864 865 return &direct[index]; 866 } 867 868 869 fssh_status_t 870 file_map::Add(fssh_file_io_vec *vecs, fssh_size_t vecCount, fssh_off_t &lastOffset) 871 { 872 TRACE(("file_map::Add(vecCount = %ld)\n", vecCount)); 873 874 fssh_off_t offset = 0; 875 876 if (vecCount <= CACHED_FILE_EXTENTS && count == 0) { 877 // just use the reserved area in the file_cache_ref structure 878 } else { 879 // TODO: once we can invalidate only parts of the file map, 880 // we might need to copy the previously cached file extends 881 // from the direct range 882 file_extent *newMap = (file_extent *)realloc(array, 883 (count + vecCount) * sizeof(file_extent)); 884 if (newMap == NULL) 885 return FSSH_B_NO_MEMORY; 886 887 array = newMap; 888 889 if (count != 0) { 890 file_extent *extent = ExtentAt(count - 1); 891 offset = extent->offset + extent->disk.length; 892 } 893 } 894 895 int32_t start = count; 896 count += vecCount; 897 898 for (uint32_t i = 0; i < vecCount; i++) { 899 file_extent *extent = ExtentAt(start + i); 900 901 extent->offset = offset; 902 extent->disk = vecs[i]; 903 904 offset += extent->disk.length; 905 } 906 907 #ifdef TRACE_FILE_CACHE 908 for (uint32_t i = 0; i < count; i++) { 909 file_extent *extent = ExtentAt(i); 910 fssh_dprintf("[%ld] extend offset %Ld, disk offset %Ld, length %Ld\n", 911 i, extent->offset, extent->disk.offset, extent->disk.length); 912 } 913 #endif 914 915 lastOffset = offset; 916 return FSSH_B_OK; 917 } 918 919 920 void 921 file_map::Free() 922 { 923 if (count > CACHED_FILE_EXTENTS) 924 free(array); 925 926 array = NULL; 927 count = 0; 928 } 929 930 931 // #pragma mark - 932 933 934 static void 935 add_to_iovec(fssh_iovec *vecs, int32_t &index, int32_t max, fssh_addr_t address, fssh_size_t size) 936 { 937 if (index > 0 && (fssh_addr_t)vecs[index - 1].iov_base + vecs[index - 1].iov_len == address) { 938 // the iovec can be combined with the previous one 939 vecs[index - 1].iov_len += size; 940 return; 941 } 942 943 if (index == max) 944 fssh_panic("no more space for iovecs!"); 945 946 // we need to start a new iovec 947 vecs[index].iov_base = (void *)address; 948 vecs[index].iov_len = size; 949 index++; 950 } 951 952 953 static file_extent * 954 find_file_extent(file_cache_ref *ref, fssh_off_t offset, uint32_t *_index) 955 { 956 // TODO: do binary search 957 958 for (uint32_t index = 0; index < ref->map.count; index++) { 959 file_extent *extent = ref->map[index]; 960 961 if (extent->offset <= offset 962 && extent->offset + extent->disk.length > offset) { 963 if (_index) 964 *_index = index; 965 return extent; 966 } 967 } 968 969 return NULL; 970 } 971 972 973 static fssh_status_t 974 get_file_map(file_cache_ref *ref, fssh_off_t offset, fssh_size_t size, 975 fssh_file_io_vec *vecs, fssh_size_t *_count) 976 { 977 fssh_size_t maxVecs = *_count; 978 fssh_status_t status = FSSH_B_OK; 979 980 if (ref->map.count == 0) { 981 // we don't yet have the map of this file, so let's grab it 982 // (ordered by offset, so that we can do a binary search on them) 983 984 mutex_lock(&ref->lock); 985 986 // the file map could have been requested in the mean time 987 if (ref->map.count == 0) { 988 fssh_size_t vecCount = maxVecs; 989 fssh_off_t mapOffset = 0; 990 991 while (true) { 992 status = vfs_get_file_map(ref->node, mapOffset, ~0UL, vecs, 993 &vecCount); 994 if (status < FSSH_B_OK && status != FSSH_B_BUFFER_OVERFLOW) { 995 mutex_unlock(&ref->lock); 996 return status; 997 } 998 999 fssh_status_t addStatus = ref->map.Add(vecs, vecCount, mapOffset); 1000 if (addStatus != FSSH_B_OK) { 1001 // only clobber the status in case of failure 1002 status = addStatus; 1003 } 1004 1005 if (status != FSSH_B_BUFFER_OVERFLOW) 1006 break; 1007 1008 // when we are here, the map has been stored in the array, and 1009 // the array size was still too small to cover the whole file 1010 vecCount = maxVecs; 1011 } 1012 } 1013 1014 mutex_unlock(&ref->lock); 1015 } 1016 1017 if (status != FSSH_B_OK) { 1018 // We must invalidate the (part of the) map we already 1019 // have, as we cannot know if it's complete or not 1020 ref->map.Free(); 1021 return status; 1022 } 1023 1024 // We now have cached the map of this file, we now need to 1025 // translate it for the requested access. 1026 1027 uint32_t index; 1028 file_extent *fileExtent = find_file_extent(ref, offset, &index); 1029 if (fileExtent == NULL) { 1030 // access outside file bounds? But that's not our problem 1031 *_count = 0; 1032 return FSSH_B_OK; 1033 } 1034 1035 offset -= fileExtent->offset; 1036 vecs[0].offset = fileExtent->disk.offset + offset; 1037 vecs[0].length = fileExtent->disk.length - offset; 1038 1039 if (vecs[0].length >= size || index >= ref->map.count - 1) { 1040 *_count = 1; 1041 return FSSH_B_OK; 1042 } 1043 1044 // copy the rest of the vecs 1045 1046 size -= vecs[0].length; 1047 1048 for (index = 1; index < ref->map.count;) { 1049 fileExtent++; 1050 1051 vecs[index] = fileExtent->disk; 1052 index++; 1053 1054 if (size <= fileExtent->disk.length) 1055 break; 1056 1057 if (index >= maxVecs) { 1058 *_count = index; 1059 return FSSH_B_BUFFER_OVERFLOW; 1060 } 1061 1062 size -= fileExtent->disk.length; 1063 } 1064 1065 *_count = index; 1066 return FSSH_B_OK; 1067 } 1068 1069 1070 /*! 1071 Does the dirty work of translating the request into actual disk offsets 1072 and reads to or writes from the supplied iovecs as specified by \a doWrite. 1073 */ 1074 static fssh_status_t 1075 pages_io(file_cache_ref *ref, fssh_off_t offset, const fssh_iovec *vecs, fssh_size_t count, 1076 fssh_size_t *_numBytes, bool doWrite) 1077 { 1078 TRACE(("pages_io: ref = %p, offset = %Ld, size = %lu, vecCount = %lu, %s\n", 1079 ref, offset, *_numBytes, count, doWrite ? "write" : "read")); 1080 1081 // translate the iovecs into direct device accesses 1082 fssh_file_io_vec fileVecs[MAX_FILE_IO_VECS]; 1083 fssh_size_t fileVecCount = MAX_FILE_IO_VECS; 1084 fssh_size_t numBytes = *_numBytes; 1085 1086 fssh_status_t status = get_file_map(ref, offset, numBytes, fileVecs, 1087 &fileVecCount); 1088 if (status < FSSH_B_OK && status != FSSH_B_BUFFER_OVERFLOW) { 1089 TRACE(("get_file_map(offset = %Ld, numBytes = %lu) failed: %s\n", 1090 offset, numBytes, fssh_strerror(status))); 1091 return status; 1092 } 1093 1094 bool bufferOverflow = status == FSSH_B_BUFFER_OVERFLOW; 1095 1096 #ifdef TRACE_FILE_CACHE 1097 fssh_dprintf("got %lu file vecs for %Ld:%lu%s:\n", fileVecCount, offset, 1098 numBytes, bufferOverflow ? " (array too small)" : ""); 1099 for (fssh_size_t i = 0; i < fileVecCount; i++) { 1100 fssh_dprintf(" [%lu] offset = %Ld, size = %Ld\n", 1101 i, fileVecs[i].offset, fileVecs[i].length); 1102 } 1103 #endif 1104 1105 if (fileVecCount == 0) { 1106 // There are no file vecs at this offset, so we're obviously trying 1107 // to access the file outside of its bounds 1108 TRACE(("pages_io: access outside of vnode %p at offset %Ld\n", 1109 ref->vnode, offset)); 1110 return FSSH_B_BAD_VALUE; 1111 } 1112 1113 uint32_t fileVecIndex; 1114 fssh_size_t size; 1115 1116 if (!doWrite) { 1117 // now directly read the data from the device 1118 // the first file_io_vec can be read directly 1119 1120 size = fileVecs[0].length; 1121 if (size > numBytes) 1122 size = numBytes; 1123 1124 status = vfs_read_pages(ref->deviceFD, fileVecs[0].offset, vecs, 1125 count, &size, false); 1126 if (status < FSSH_B_OK) 1127 return status; 1128 1129 // TODO: this is a work-around for buggy device drivers! 1130 // When our own drivers honour the length, we can: 1131 // a) also use this direct I/O for writes (otherwise, it would 1132 // overwrite precious data) 1133 // b) panic if the term below is true (at least for writes) 1134 if (size > fileVecs[0].length) { 1135 //fssh_dprintf("warning: device driver %p doesn't respect total length in read_pages() call!\n", ref->device); 1136 size = fileVecs[0].length; 1137 } 1138 1139 ASSERT(size <= fileVecs[0].length); 1140 1141 // If the file portion was contiguous, we're already done now 1142 if (size == numBytes) 1143 return FSSH_B_OK; 1144 1145 // if we reached the end of the file, we can return as well 1146 if (size != fileVecs[0].length) { 1147 *_numBytes = size; 1148 return FSSH_B_OK; 1149 } 1150 1151 fileVecIndex = 1; 1152 } else { 1153 fileVecIndex = 0; 1154 size = 0; 1155 } 1156 1157 // Too bad, let's process the rest of the file_io_vecs 1158 1159 fssh_size_t totalSize = size; 1160 1161 // first, find out where we have to continue in our iovecs 1162 uint32_t i = 0; 1163 for (; i < count; i++) { 1164 if (size < vecs[i].iov_len) 1165 break; 1166 1167 size -= vecs[i].iov_len; 1168 } 1169 1170 fssh_size_t vecOffset = size; 1171 fssh_size_t bytesLeft = numBytes - size; 1172 1173 while (true) { 1174 for (; fileVecIndex < fileVecCount; fileVecIndex++) { 1175 fssh_file_io_vec &fileVec = fileVecs[fileVecIndex]; 1176 fssh_off_t fileOffset = fileVec.offset; 1177 fssh_off_t fileLeft = fssh_min_c(fileVec.length, bytesLeft); 1178 1179 TRACE(("FILE VEC [%lu] length %Ld\n", fileVecIndex, fileLeft)); 1180 1181 // process the complete fileVec 1182 while (fileLeft > 0) { 1183 fssh_iovec tempVecs[MAX_TEMP_IO_VECS]; 1184 uint32_t tempCount = 0; 1185 1186 // size tracks how much of what is left of the current fileVec 1187 // (fileLeft) has been assigned to tempVecs 1188 size = 0; 1189 1190 // assign what is left of the current fileVec to the tempVecs 1191 for (size = 0; size < fileLeft && i < count 1192 && tempCount < MAX_TEMP_IO_VECS;) { 1193 // try to satisfy one iovec per iteration (or as much as 1194 // possible) 1195 1196 // bytes left of the current iovec 1197 fssh_size_t vecLeft = vecs[i].iov_len - vecOffset; 1198 if (vecLeft == 0) { 1199 vecOffset = 0; 1200 i++; 1201 continue; 1202 } 1203 1204 TRACE(("fill vec %ld, offset = %lu, size = %lu\n", 1205 i, vecOffset, size)); 1206 1207 // actually available bytes 1208 fssh_size_t tempVecSize = fssh_min_c(vecLeft, fileLeft - size); 1209 1210 tempVecs[tempCount].iov_base 1211 = (void *)((fssh_addr_t)vecs[i].iov_base + vecOffset); 1212 tempVecs[tempCount].iov_len = tempVecSize; 1213 tempCount++; 1214 1215 size += tempVecSize; 1216 vecOffset += tempVecSize; 1217 } 1218 1219 fssh_size_t bytes = size; 1220 if (doWrite) { 1221 status = vfs_write_pages(ref->deviceFD, fileOffset, 1222 tempVecs, tempCount, &bytes, false); 1223 } else { 1224 status = vfs_read_pages(ref->deviceFD, fileOffset, 1225 tempVecs, tempCount, &bytes, false); 1226 } 1227 if (status < FSSH_B_OK) 1228 return status; 1229 1230 totalSize += bytes; 1231 bytesLeft -= size; 1232 fileOffset += size; 1233 fileLeft -= size; 1234 //fssh_dprintf("-> file left = %Lu\n", fileLeft); 1235 1236 if (size != bytes || i >= count) { 1237 // there are no more bytes or iovecs, let's bail out 1238 *_numBytes = totalSize; 1239 return FSSH_B_OK; 1240 } 1241 } 1242 } 1243 1244 if (bufferOverflow) { 1245 status = get_file_map(ref, offset + totalSize, bytesLeft, fileVecs, 1246 &fileVecCount); 1247 if (status < FSSH_B_OK && status != FSSH_B_BUFFER_OVERFLOW) { 1248 TRACE(("get_file_map(offset = %Ld, numBytes = %lu) failed: %s\n", 1249 offset, numBytes, fssh_strerror(status))); 1250 return status; 1251 } 1252 1253 bufferOverflow = status == FSSH_B_BUFFER_OVERFLOW; 1254 fileVecIndex = 0; 1255 1256 #ifdef TRACE_FILE_CACHE 1257 fssh_dprintf("got %lu file vecs for %Ld:%lu%s:\n", fileVecCount, 1258 offset + totalSize, numBytes, 1259 bufferOverflow ? " (array too small)" : ""); 1260 for (fssh_size_t i = 0; i < fileVecCount; i++) { 1261 fssh_dprintf(" [%lu] offset = %Ld, size = %Ld\n", 1262 i, fileVecs[i].offset, fileVecs[i].length); 1263 } 1264 #endif 1265 } else 1266 break; 1267 } 1268 1269 *_numBytes = totalSize; 1270 return FSSH_B_OK; 1271 } 1272 1273 1274 /*! 1275 This function is called by read_into_cache() (and from there only) - it 1276 can only handle a certain amount of bytes, and read_into_cache() makes 1277 sure that it matches that criterion. 1278 */ 1279 static inline fssh_status_t 1280 read_chunk_into_cache(file_cache_ref *ref, fssh_off_t offset, fssh_size_t numBytes, 1281 int32_t pageOffset, fssh_addr_t buffer, fssh_size_t bufferSize) 1282 { 1283 TRACE(("read_chunk(offset = %Ld, size = %lu, pageOffset = %ld, buffer = %#lx, bufferSize = %lu\n", 1284 offset, size, pageOffset, buffer, bufferSize)); 1285 1286 fssh_iovec vecs[MAX_IO_VECS]; 1287 int32_t vecCount = 0; 1288 1289 vm_page *pages[MAX_IO_VECS]; 1290 int32_t pageIndex = 0; 1291 1292 // allocate pages for the cache and mark them busy 1293 for (fssh_size_t pos = 0; pos < numBytes; pos += FSSH_B_PAGE_SIZE) { 1294 vm_page *page = pages[pageIndex++] = vm_page_allocate_page(PAGE_STATE_FREE); 1295 if (page == NULL) 1296 fssh_panic("no more pages!"); 1297 1298 page->state = PAGE_STATE_BUSY; 1299 1300 vm_cache_insert_page(ref, page, offset + pos); 1301 1302 fssh_addr_t virtualAddress = page->Address(); 1303 1304 add_to_iovec(vecs, vecCount, MAX_IO_VECS, virtualAddress, FSSH_B_PAGE_SIZE); 1305 // ToDo: check if the array is large enough! 1306 } 1307 1308 mutex_unlock(&ref->lock); 1309 1310 // read file into reserved pages 1311 fssh_status_t status = pages_io(ref, offset, vecs, vecCount, &numBytes, false); 1312 if (status < FSSH_B_OK) { 1313 // reading failed, free allocated pages 1314 1315 fssh_dprintf("file_cache: read pages failed: %s\n", fssh_strerror(status)); 1316 1317 mutex_lock(&ref->lock); 1318 1319 for (int32_t i = 0; i < pageIndex; i++) { 1320 vm_cache_remove_page(ref, pages[i]); 1321 vm_page_set_state(pages[i], PAGE_STATE_FREE); 1322 vm_page_put_page(pages[i]); 1323 } 1324 1325 return status; 1326 } 1327 1328 // copy the pages and unmap them again 1329 1330 for (int32_t i = 0; i < vecCount; i++) { 1331 fssh_addr_t base = (fssh_addr_t)vecs[i].iov_base; 1332 fssh_size_t size = vecs[i].iov_len; 1333 1334 // copy to user buffer if necessary 1335 if (bufferSize != 0) { 1336 fssh_size_t bytes = fssh_min_c(bufferSize, size - pageOffset); 1337 1338 user_memcpy((void *)buffer, (void *)(base + pageOffset), bytes); 1339 buffer += bytes; 1340 bufferSize -= bytes; 1341 pageOffset = 0; 1342 } 1343 } 1344 1345 mutex_lock(&ref->lock); 1346 1347 // make the pages accessible in the cache 1348 for (int32_t i = pageIndex; i-- > 0;) { 1349 vm_page_set_state(pages[i], PAGE_STATE_ACTIVE); 1350 vm_page_put_page(pages[i]); 1351 } 1352 1353 return FSSH_B_OK; 1354 } 1355 1356 1357 /*! 1358 This function reads \a size bytes directly from the file into the cache. 1359 If \a bufferSize does not equal zero, \a bufferSize bytes from the data 1360 read in are also copied to the provided \a buffer. 1361 This function always allocates all pages; it is the responsibility of the 1362 calling function to only ask for yet uncached ranges. 1363 The cache_ref lock must be hold when calling this function. 1364 */ 1365 static fssh_status_t 1366 read_into_cache(file_cache_ref *ref, fssh_off_t offset, fssh_size_t size, fssh_addr_t buffer, fssh_size_t bufferSize) 1367 { 1368 TRACE(("read_from_cache: ref = %p, offset = %Ld, size = %lu, buffer = %p, bufferSize = %lu\n", 1369 ref, offset, size, (void *)buffer, bufferSize)); 1370 1371 // do we have to read in anything at all? 1372 if (size == 0) 1373 return FSSH_B_OK; 1374 1375 // make sure "offset" is page aligned - but also remember the page offset 1376 int32_t pageOffset = offset & (FSSH_B_PAGE_SIZE - 1); 1377 size = PAGE_ALIGN(size + pageOffset); 1378 offset -= pageOffset; 1379 1380 while (true) { 1381 fssh_size_t chunkSize = size; 1382 if (chunkSize > (MAX_IO_VECS * FSSH_B_PAGE_SIZE)) 1383 chunkSize = MAX_IO_VECS * FSSH_B_PAGE_SIZE; 1384 1385 fssh_status_t status = read_chunk_into_cache(ref, offset, chunkSize, pageOffset, 1386 buffer, bufferSize); 1387 if (status != FSSH_B_OK) 1388 return status; 1389 1390 if ((size -= chunkSize) == 0) 1391 return FSSH_B_OK; 1392 1393 if (chunkSize >= bufferSize) { 1394 bufferSize = 0; 1395 buffer = 0; 1396 } else { 1397 bufferSize -= chunkSize - pageOffset; 1398 buffer += chunkSize - pageOffset; 1399 } 1400 1401 offset += chunkSize; 1402 pageOffset = 0; 1403 } 1404 1405 return FSSH_B_OK; 1406 } 1407 1408 1409 /** Like read_chunk_into_cache() but writes data into the cache */ 1410 1411 static inline fssh_status_t 1412 write_chunk_to_cache(file_cache_ref *ref, fssh_off_t offset, fssh_size_t numBytes, 1413 int32_t pageOffset, fssh_addr_t buffer, fssh_size_t bufferSize) 1414 { 1415 fssh_iovec vecs[MAX_IO_VECS]; 1416 int32_t vecCount = 0; 1417 vm_page *pages[MAX_IO_VECS]; 1418 int32_t pageIndex = 0; 1419 fssh_status_t status = FSSH_B_OK; 1420 1421 // ToDo: this should be settable somewhere 1422 bool writeThrough = false; 1423 1424 // allocate pages for the cache and mark them busy 1425 for (fssh_size_t pos = 0; pos < numBytes; pos += FSSH_B_PAGE_SIZE) { 1426 // ToDo: if space is becoming tight, and this cache is already grown 1427 // big - shouldn't we better steal the pages directly in that case? 1428 // (a working set like approach for the file cache) 1429 vm_page *page = pages[pageIndex++] = vm_page_allocate_page(PAGE_STATE_FREE); 1430 page->state = PAGE_STATE_BUSY; 1431 1432 vm_cache_insert_page(ref, page, offset + pos); 1433 1434 fssh_addr_t virtualAddress = page->Address(); 1435 1436 add_to_iovec(vecs, vecCount, MAX_IO_VECS, virtualAddress, FSSH_B_PAGE_SIZE); 1437 // ToDo: check if the array is large enough! 1438 } 1439 1440 mutex_unlock(&ref->lock); 1441 1442 // copy contents (and read in partially written pages first) 1443 1444 if (pageOffset != 0) { 1445 // This is only a partial write, so we have to read the rest of the page 1446 // from the file to have consistent data in the cache 1447 fssh_iovec readVec = { vecs[0].iov_base, FSSH_B_PAGE_SIZE }; 1448 fssh_size_t bytesRead = FSSH_B_PAGE_SIZE; 1449 1450 status = pages_io(ref, offset, &readVec, 1, &bytesRead, false); 1451 // ToDo: handle errors for real! 1452 if (status < FSSH_B_OK) 1453 fssh_panic("1. pages_io() failed: %s!\n", fssh_strerror(status)); 1454 } 1455 1456 fssh_addr_t lastPageOffset = (pageOffset + bufferSize) & (FSSH_B_PAGE_SIZE - 1); 1457 if (lastPageOffset != 0) { 1458 // get the last page in the I/O vectors 1459 fssh_addr_t last = (fssh_addr_t)vecs[vecCount - 1].iov_base 1460 + vecs[vecCount - 1].iov_len - FSSH_B_PAGE_SIZE; 1461 1462 if (offset + pageOffset + bufferSize == ref->virtual_size) { 1463 // the space in the page after this write action needs to be cleaned 1464 fssh_memset((void *)(last + lastPageOffset), 0, FSSH_B_PAGE_SIZE - lastPageOffset); 1465 } else if (vecCount > 1) { 1466 // the end of this write does not happen on a page boundary, so we 1467 // need to fetch the last page before we can update it 1468 fssh_iovec readVec = { (void *)last, FSSH_B_PAGE_SIZE }; 1469 fssh_size_t bytesRead = FSSH_B_PAGE_SIZE; 1470 1471 status = pages_io(ref, offset + numBytes - FSSH_B_PAGE_SIZE, &readVec, 1, 1472 &bytesRead, false); 1473 // ToDo: handle errors for real! 1474 if (status < FSSH_B_OK) 1475 fssh_panic("pages_io() failed: %s!\n", fssh_strerror(status)); 1476 } 1477 } 1478 1479 for (int32_t i = 0; i < vecCount; i++) { 1480 fssh_addr_t base = (fssh_addr_t)vecs[i].iov_base; 1481 fssh_size_t bytes = fssh_min_c(bufferSize, fssh_size_t(vecs[i].iov_len - pageOffset)); 1482 1483 // copy data from user buffer 1484 user_memcpy((void *)(base + pageOffset), (void *)buffer, bytes); 1485 1486 bufferSize -= bytes; 1487 if (bufferSize == 0) 1488 break; 1489 1490 buffer += bytes; 1491 pageOffset = 0; 1492 } 1493 1494 if (writeThrough) { 1495 // write cached pages back to the file if we were asked to do that 1496 fssh_status_t status = pages_io(ref, offset, vecs, vecCount, &numBytes, true); 1497 if (status < FSSH_B_OK) { 1498 // ToDo: remove allocated pages, ...? 1499 fssh_panic("file_cache: remove allocated pages! write pages failed: %s\n", 1500 fssh_strerror(status)); 1501 } 1502 } 1503 1504 mutex_lock(&ref->lock); 1505 1506 // unmap the pages again 1507 1508 // make the pages accessible in the cache 1509 for (int32_t i = pageIndex; i-- > 0;) { 1510 if (writeThrough) 1511 vm_page_set_state(pages[i], PAGE_STATE_ACTIVE); 1512 else 1513 vm_page_set_state(pages[i], PAGE_STATE_MODIFIED); 1514 vm_page_put_page(pages[i]); 1515 } 1516 1517 return status; 1518 } 1519 1520 1521 /** Like read_into_cache() but writes data into the cache. To preserve data consistency, 1522 * it might also read pages into the cache, though, if only a partial page gets written. 1523 * The cache_ref lock must be hold when calling this function. 1524 */ 1525 1526 static fssh_status_t 1527 write_to_cache(file_cache_ref *ref, fssh_off_t offset, fssh_size_t size, fssh_addr_t buffer, fssh_size_t bufferSize) 1528 { 1529 TRACE(("write_to_cache: ref = %p, offset = %Ld, size = %lu, buffer = %p, bufferSize = %lu\n", 1530 ref, offset, size, (void *)buffer, bufferSize)); 1531 1532 // make sure "offset" is page aligned - but also remember the page offset 1533 int32_t pageOffset = offset & (FSSH_B_PAGE_SIZE - 1); 1534 size = PAGE_ALIGN(size + pageOffset); 1535 offset -= pageOffset; 1536 1537 while (true) { 1538 fssh_size_t chunkSize = size; 1539 if (chunkSize > (MAX_IO_VECS * FSSH_B_PAGE_SIZE)) 1540 chunkSize = MAX_IO_VECS * FSSH_B_PAGE_SIZE; 1541 1542 fssh_status_t status = write_chunk_to_cache(ref, offset, chunkSize, pageOffset, buffer, bufferSize); 1543 if (status != FSSH_B_OK) 1544 return status; 1545 1546 if ((size -= chunkSize) == 0) 1547 return FSSH_B_OK; 1548 1549 if (chunkSize >= bufferSize) { 1550 bufferSize = 0; 1551 buffer = 0; 1552 } else { 1553 bufferSize -= chunkSize - pageOffset; 1554 buffer += chunkSize - pageOffset; 1555 } 1556 1557 offset += chunkSize; 1558 pageOffset = 0; 1559 } 1560 1561 return FSSH_B_OK; 1562 } 1563 1564 1565 static fssh_status_t 1566 satisfy_cache_io(file_cache_ref *ref, fssh_off_t offset, fssh_addr_t buffer, fssh_addr_t lastBuffer, 1567 bool doWrite) 1568 { 1569 fssh_size_t requestSize = buffer - lastBuffer; 1570 1571 if (doWrite) 1572 return write_to_cache(ref, offset, requestSize, lastBuffer, requestSize); 1573 1574 return read_into_cache(ref, offset, requestSize, lastBuffer, requestSize); 1575 } 1576 1577 1578 static fssh_status_t 1579 cache_io(void *_cacheRef, fssh_off_t offset, fssh_addr_t buffer, fssh_size_t *_size, bool doWrite) 1580 { 1581 if (_cacheRef == NULL) 1582 fssh_panic("cache_io() called with NULL ref!\n"); 1583 1584 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1585 fssh_off_t fileSize = ref->virtual_size; 1586 1587 TRACE(("cache_io(ref = %p, offset = %Ld, buffer = %p, size = %lu, %s)\n", 1588 ref, offset, (void *)buffer, *_size, doWrite ? "write" : "read")); 1589 1590 // out of bounds access? 1591 if (offset >= fileSize || offset < 0) { 1592 *_size = 0; 1593 return FSSH_B_OK; 1594 } 1595 1596 int32_t pageOffset = offset & (FSSH_B_PAGE_SIZE - 1); 1597 fssh_size_t size = *_size; 1598 offset -= pageOffset; 1599 1600 if (offset + pageOffset + size > fileSize) { 1601 // adapt size to be within the file's offsets 1602 size = fileSize - pageOffset - offset; 1603 *_size = size; 1604 } 1605 1606 // "offset" and "lastOffset" are always aligned to FSSH_B_PAGE_SIZE, 1607 // the "last*" variables always point to the end of the last 1608 // satisfied request part 1609 1610 fssh_size_t bytesLeft = size, lastLeft = size; 1611 int32_t lastPageOffset = pageOffset; 1612 fssh_addr_t lastBuffer = buffer; 1613 fssh_off_t lastOffset = offset; 1614 1615 mutex_lock(&ref->lock); 1616 1617 for (; bytesLeft > 0; offset += FSSH_B_PAGE_SIZE) { 1618 // check if this page is already in memory 1619 restart: 1620 vm_page *page = vm_cache_lookup_page(ref, offset); 1621 PagePutter pagePutter(page); 1622 1623 if (page != NULL) { 1624 // The page is busy - since we need to unlock the cache sometime 1625 // in the near future, we need to satisfy the request of the pages 1626 // we didn't get yet (to make sure no one else interferes in the 1627 // mean time). 1628 fssh_status_t status = FSSH_B_OK; 1629 1630 if (lastBuffer != buffer) { 1631 status = satisfy_cache_io(ref, lastOffset + lastPageOffset, 1632 buffer, lastBuffer, doWrite); 1633 if (status == FSSH_B_OK) { 1634 lastBuffer = buffer; 1635 lastLeft = bytesLeft; 1636 lastOffset = offset; 1637 lastPageOffset = 0; 1638 pageOffset = 0; 1639 } 1640 } 1641 1642 if (status != FSSH_B_OK) { 1643 mutex_unlock(&ref->lock); 1644 return status; 1645 } 1646 1647 if (page->state == PAGE_STATE_BUSY) { 1648 mutex_unlock(&ref->lock); 1649 // ToDo: don't wait forever! 1650 fssh_snooze(20000); 1651 mutex_lock(&ref->lock); 1652 goto restart; 1653 } 1654 } 1655 1656 fssh_size_t bytesInPage = fssh_min_c(fssh_size_t(FSSH_B_PAGE_SIZE - pageOffset), bytesLeft); 1657 fssh_addr_t virtualAddress; 1658 1659 TRACE(("lookup page from offset %Ld: %p, size = %lu, pageOffset = %lu\n", offset, page, bytesLeft, pageOffset)); 1660 if (page != NULL) { 1661 virtualAddress = page->Address(); 1662 1663 // and copy the contents of the page already in memory 1664 if (doWrite) { 1665 user_memcpy((void *)(virtualAddress + pageOffset), (void *)buffer, bytesInPage); 1666 1667 // make sure the page is in the modified list 1668 if (page->state != PAGE_STATE_MODIFIED) 1669 vm_page_set_state(page, PAGE_STATE_MODIFIED); 1670 } else 1671 user_memcpy((void *)buffer, (void *)(virtualAddress + pageOffset), bytesInPage); 1672 1673 if (bytesLeft <= bytesInPage) { 1674 // we've read the last page, so we're done! 1675 mutex_unlock(&ref->lock); 1676 return FSSH_B_OK; 1677 } 1678 1679 // prepare a potential gap request 1680 lastBuffer = buffer + bytesInPage; 1681 lastLeft = bytesLeft - bytesInPage; 1682 lastOffset = offset + FSSH_B_PAGE_SIZE; 1683 lastPageOffset = 0; 1684 } 1685 1686 if (bytesLeft <= bytesInPage) 1687 break; 1688 1689 buffer += bytesInPage; 1690 bytesLeft -= bytesInPage; 1691 pageOffset = 0; 1692 } 1693 1694 // fill the last remaining bytes of the request (either write or read) 1695 1696 fssh_status_t status; 1697 if (doWrite) 1698 status = write_to_cache(ref, lastOffset + lastPageOffset, lastLeft, lastBuffer, lastLeft); 1699 else 1700 status = read_into_cache(ref, lastOffset + lastPageOffset, lastLeft, lastBuffer, lastLeft); 1701 1702 mutex_unlock(&ref->lock); 1703 return status; 1704 } 1705 1706 1707 } // namespace FSShell 1708 1709 1710 // #pragma mark - public FS API 1711 1712 1713 using namespace FSShell; 1714 1715 1716 void * 1717 fssh_file_cache_create(fssh_mount_id mountID, fssh_vnode_id vnodeID, 1718 fssh_off_t size, int fd) 1719 { 1720 TRACE(("file_cache_create(mountID = %ld, vnodeID = %Ld, size = %Ld, fd = %d)\n", mountID, vnodeID, size, fd)); 1721 1722 file_cache_ref *ref = new(nothrow) file_cache_ref; 1723 if (ref == NULL) 1724 return NULL; 1725 1726 ref->mountID = mountID; 1727 ref->nodeID = vnodeID; 1728 ref->deviceFD = fd; 1729 ref->virtual_size = size; 1730 1731 // get vnode 1732 fssh_status_t error = vfs_lookup_vnode(mountID, vnodeID, &ref->node); 1733 if (error != FSSH_B_OK) { 1734 fssh_dprintf("file_cache_create(): Failed get vnode %d:%lld: %s\n", 1735 mountID, vnodeID, fssh_strerror(error)); 1736 delete ref; 1737 return NULL; 1738 } 1739 1740 // create lock 1741 char buffer[32]; 1742 fssh_snprintf(buffer, sizeof(buffer), "file cache %d:%lld", (int)mountID, vnodeID); 1743 error = mutex_init(&ref->lock, buffer); 1744 if (error != FSSH_B_OK) { 1745 fssh_dprintf("file_cache_create(): Failed to init mutex: %s\n", 1746 fssh_strerror(error)); 1747 delete ref; 1748 return NULL; 1749 } 1750 1751 return ref; 1752 } 1753 1754 1755 void 1756 fssh_file_cache_delete(void *_cacheRef) 1757 { 1758 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1759 1760 if (ref == NULL) 1761 return; 1762 1763 TRACE(("file_cache_delete(ref = %p)\n", ref)); 1764 1765 // write all modified pages and resize the cache to 0 to free all pages 1766 // it contains 1767 vm_cache_write_modified(ref, false); 1768 1769 mutex_lock(&ref->lock); 1770 vm_cache_resize(ref, 0); 1771 1772 mutex_destroy(&ref->lock); 1773 1774 delete ref; 1775 } 1776 1777 1778 fssh_status_t 1779 fssh_file_cache_set_size(void *_cacheRef, fssh_off_t size) 1780 { 1781 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1782 1783 TRACE(("file_cache_set_size(ref = %p, size = %Ld)\n", ref, size)); 1784 1785 if (ref == NULL) 1786 return FSSH_B_OK; 1787 1788 fssh_file_cache_invalidate_file_map(_cacheRef, 0, size); 1789 // ToDo: make this better (we would only need to extend or shrink the map) 1790 1791 mutex_lock(&ref->lock); 1792 fssh_status_t status = vm_cache_resize(ref, size); 1793 mutex_unlock(&ref->lock); 1794 1795 return status; 1796 } 1797 1798 1799 fssh_status_t 1800 fssh_file_cache_sync(void *_cacheRef) 1801 { 1802 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1803 if (ref == NULL) 1804 return FSSH_B_BAD_VALUE; 1805 1806 return vm_cache_write_modified(ref, true); 1807 } 1808 1809 1810 fssh_status_t 1811 fssh_file_cache_read_pages(void *_cacheRef, fssh_off_t offset, const fssh_iovec *vecs, fssh_size_t count, fssh_size_t *_numBytes) 1812 { 1813 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1814 1815 return pages_io(ref, offset, vecs, count, _numBytes, false); 1816 } 1817 1818 1819 fssh_status_t 1820 fssh_file_cache_write_pages(void *_cacheRef, fssh_off_t offset, const fssh_iovec *vecs, fssh_size_t count, fssh_size_t *_numBytes) 1821 { 1822 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1823 1824 fssh_status_t status = pages_io(ref, offset, vecs, count, _numBytes, true); 1825 TRACE(("file_cache_write_pages(ref = %p, offset = %Ld, vecs = %p, count = %lu, bytes = %lu) = %ld\n", 1826 ref, offset, vecs, count, *_numBytes, status)); 1827 1828 return status; 1829 } 1830 1831 1832 fssh_status_t 1833 fssh_file_cache_read(void *_cacheRef, fssh_off_t offset, void *bufferBase, fssh_size_t *_size) 1834 { 1835 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1836 1837 TRACE(("file_cache_read(ref = %p, offset = %Ld, buffer = %p, size = %lu)\n", 1838 ref, offset, bufferBase, *_size)); 1839 1840 return cache_io(ref, offset, (fssh_addr_t)bufferBase, _size, false); 1841 } 1842 1843 1844 fssh_status_t 1845 fssh_file_cache_write(void *_cacheRef, fssh_off_t offset, const void *buffer, fssh_size_t *_size) 1846 { 1847 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1848 1849 fssh_status_t status = cache_io(ref, offset, (fssh_addr_t)const_cast<void *>(buffer), _size, true); 1850 TRACE(("file_cache_write(ref = %p, offset = %Ld, buffer = %p, size = %lu) = %ld\n", 1851 ref, offset, buffer, *_size, status)); 1852 1853 return status; 1854 } 1855 1856 1857 fssh_status_t 1858 fssh_file_cache_invalidate_file_map(void *_cacheRef, fssh_off_t offset, fssh_off_t size) 1859 { 1860 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1861 1862 // ToDo: honour offset/size parameters 1863 1864 TRACE(("file_cache_invalidate_file_map(offset = %Ld, size = %Ld)\n", offset, size)); 1865 mutex_lock(&ref->lock); 1866 ref->map.Free(); 1867 mutex_unlock(&ref->lock); 1868 return FSSH_B_OK; 1869 } 1870