1 /* 2 * Copyright 2004-2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include "vnode_store.h" 8 9 #include <KernelExport.h> 10 #include <fs_cache.h> 11 12 #include <util/kernel_cpp.h> 13 #include <file_cache.h> 14 #include <vfs.h> 15 #include <vm.h> 16 #include <vm_page.h> 17 #include <vm_cache.h> 18 #include <generic_syscall.h> 19 20 #include <unistd.h> 21 #include <stdlib.h> 22 #include <string.h> 23 24 25 //#define TRACE_FILE_CACHE 26 #ifdef TRACE_FILE_CACHE 27 # define TRACE(x) dprintf x 28 #else 29 # define TRACE(x) ; 30 #endif 31 32 // maximum number of iovecs per request 33 #define MAX_IO_VECS 64 // 256 kB 34 #define MAX_FILE_IO_VECS 32 35 36 #define CACHED_FILE_EXTENTS 2 37 // must be smaller than MAX_FILE_IO_VECS 38 // ToDo: find out how much of these are typically used 39 40 struct file_extent { 41 off_t offset; 42 file_io_vec disk; 43 }; 44 45 struct file_map { 46 file_map(); 47 ~file_map(); 48 49 file_extent *operator[](uint32 index); 50 file_extent *ExtentAt(uint32 index); 51 status_t Add(file_io_vec *vecs, size_t vecCount); 52 void Free(); 53 54 union { 55 file_extent direct[CACHED_FILE_EXTENTS]; 56 file_extent *array; 57 }; 58 size_t count; 59 }; 60 61 struct file_cache_ref { 62 vm_cache_ref *cache; 63 void *vnode; 64 void *device; 65 void *cookie; 66 file_map map; 67 }; 68 69 70 static struct cache_module_info *sCacheModule; 71 72 73 file_map::file_map() 74 { 75 array = NULL; 76 count = 0; 77 } 78 79 80 file_map::~file_map() 81 { 82 Free(); 83 } 84 85 86 file_extent * 87 file_map::operator[](uint32 index) 88 { 89 return ExtentAt(index); 90 } 91 92 93 file_extent * 94 file_map::ExtentAt(uint32 index) 95 { 96 if (index >= count) 97 return NULL; 98 99 if (count > CACHED_FILE_EXTENTS) 100 return &array[index]; 101 102 return &direct[index]; 103 } 104 105 106 status_t 107 file_map::Add(file_io_vec *vecs, size_t vecCount) 108 { 109 off_t offset = 0; 110 111 if (vecCount <= CACHED_FILE_EXTENTS && count == 0) { 112 // just use the reserved area in the file_cache_ref structure 113 } else { 114 file_extent *newMap = (file_extent *)realloc(array, 115 (count + vecCount) * sizeof(file_extent)); 116 if (newMap == NULL) 117 return B_NO_MEMORY; 118 119 array = newMap; 120 121 if (count != 0) { 122 file_extent *extent = ExtentAt(count - 1); 123 offset = extent->offset + extent->disk.length; 124 } 125 } 126 127 count += vecCount; 128 129 for (uint32 i = 0; i < vecCount; i++) { 130 file_extent *extent = ExtentAt(i); 131 132 extent->offset = offset; 133 extent->disk = vecs[i]; 134 135 offset += extent->disk.length; 136 } 137 138 return B_OK; 139 } 140 141 142 void 143 file_map::Free() 144 { 145 if (count > CACHED_FILE_EXTENTS) 146 free(array); 147 148 array = NULL; 149 count = 0; 150 } 151 152 153 // #pragma mark - 154 155 156 static void 157 add_to_iovec(iovec *vecs, int32 &index, int32 max, addr_t address, size_t size) 158 { 159 if (index > 0 && (addr_t)vecs[index - 1].iov_base + vecs[index - 1].iov_len == address) { 160 // the iovec can be combined with the previous one 161 vecs[index - 1].iov_len += size; 162 return; 163 } 164 165 if (index == max) 166 panic("no more space for iovecs!"); 167 168 // we need to start a new iovec 169 vecs[index].iov_base = (void *)address; 170 vecs[index].iov_len = size; 171 index++; 172 } 173 174 175 static file_extent * 176 find_file_extent(file_cache_ref *ref, off_t offset, uint32 *_index) 177 { 178 // ToDo: do binary search 179 180 for (uint32 index = 0; index < ref->map.count; index++) { 181 file_extent *extent = ref->map[index]; 182 183 if (extent->offset <= offset 184 && extent->offset + extent->disk.length > offset) { 185 if (_index) 186 *_index = index; 187 return extent; 188 } 189 } 190 191 return NULL; 192 } 193 194 195 static status_t 196 get_file_map(file_cache_ref *ref, off_t offset, size_t size, 197 file_io_vec *vecs, size_t *_count) 198 { 199 size_t maxVecs = *_count; 200 201 if (ref->map.count == 0) { 202 // we don't yet have the map of this file, so let's grab it 203 // (ordered by offset, so that we can do a binary search on them) 204 205 mutex_lock(&ref->cache->lock); 206 207 // the file map could have been requested in the mean time 208 if (ref->map.count == 0) { 209 size_t vecCount = maxVecs; 210 status_t status; 211 off_t mapOffset = 0; 212 213 while (true) { 214 status = vfs_get_file_map(ref->vnode, mapOffset, ~0UL, vecs, &vecCount); 215 if (status < B_OK && status != B_BUFFER_OVERFLOW) { 216 mutex_unlock(&ref->cache->lock); 217 return status; 218 } 219 220 ref->map.Add(vecs, vecCount); 221 222 if (status != B_BUFFER_OVERFLOW) 223 break; 224 225 // when we are here, the map has been stored in the array, and 226 // the array size was still too small to cover the whole file 227 file_io_vec *last = &vecs[vecCount - 1]; 228 mapOffset += last->length; 229 vecCount = maxVecs; 230 } 231 } 232 233 mutex_unlock(&ref->cache->lock); 234 } 235 236 // We now have cached the map of this file, we now need to 237 // translate it for the requested access. 238 239 uint32 index; 240 file_extent *fileExtent = find_file_extent(ref, offset, &index); 241 if (fileExtent == NULL) { 242 // access outside file bounds? But that's not our problem 243 *_count = 0; 244 return B_OK; 245 } 246 247 offset -= fileExtent->offset; 248 vecs[0].offset = fileExtent->disk.offset + offset; 249 vecs[0].length = fileExtent->disk.length - offset; 250 251 if (vecs[0].length >= size || index >= ref->map.count - 1) { 252 *_count = 1; 253 return B_OK; 254 } 255 256 // copy the rest of the vecs 257 258 size -= vecs[0].length; 259 260 for (index = 1; index < ref->map.count;) { 261 fileExtent++; 262 263 vecs[index] = fileExtent->disk; 264 index++; 265 266 if (index >= maxVecs) { 267 *_count = index; 268 return B_BUFFER_OVERFLOW; 269 } 270 271 if (size <= fileExtent->disk.length) 272 break; 273 274 size -= fileExtent->disk.length; 275 } 276 277 *_count = index; 278 return B_OK; 279 } 280 281 282 static status_t 283 pages_io(file_cache_ref *ref, off_t offset, const iovec *vecs, size_t count, 284 size_t *_numBytes, bool doWrite) 285 { 286 TRACE(("pages_io: ref = %p, offset = %Ld, size = %lu, %s\n", ref, offset, 287 *_numBytes, doWrite ? "write" : "read")); 288 289 // translate the iovecs into direct device accesses 290 file_io_vec fileVecs[MAX_FILE_IO_VECS]; 291 size_t fileVecCount = MAX_FILE_IO_VECS; 292 size_t numBytes = *_numBytes; 293 294 status_t status = get_file_map(ref, offset, numBytes, fileVecs, &fileVecCount); 295 if (status < B_OK) 296 return status; 297 298 // ToDo: handle array overflow gracefully! 299 300 #ifdef TRACE_FILE_CACHE 301 dprintf("got %lu file vecs for %Ld:%lu:\n", fileVecCount, offset, numBytes); 302 for (size_t i = 0; i < fileVecCount; i++) 303 dprintf("[%lu] offset = %Ld, size = %Ld\n", i, fileVecs[i].offset, fileVecs[i].length); 304 #endif 305 306 uint32 fileVecIndex; 307 size_t size; 308 309 if (!doWrite) { 310 // now directly read the data from the device 311 // the first file_io_vec can be read directly 312 313 size = fileVecs[0].length; 314 if (size > numBytes) 315 size = numBytes; 316 317 status = vfs_read_pages(ref->device, ref->cookie, fileVecs[0].offset, vecs, count, &size); 318 if (status < B_OK) 319 return status; 320 321 // ToDo: this is a work-around for buggy device drivers! 322 // When our own drivers honour the length, we can: 323 // a) also use this direct I/O for writes (otherwise, it would overwrite precious data) 324 // b) panic if the term below is true (at least for writes) 325 if (size > fileVecs[0].length) { 326 dprintf("warning: device driver %p doesn't respect total length in read_pages() call!\n", ref->device); 327 size = fileVecs[0].length; 328 } 329 330 ASSERT(size <= fileVecs[0].length); 331 332 // If the file portion was contiguous, we're already done now 333 if (size == numBytes) 334 return B_OK; 335 336 // if we reached the end of the file, we can return as well 337 if (size != fileVecs[0].length) { 338 *_numBytes = size; 339 return B_OK; 340 } 341 342 fileVecIndex = 1; 343 } else { 344 fileVecIndex = 0; 345 size = 0; 346 } 347 348 // Too bad, let's process the rest of the file_io_vecs 349 350 size_t totalSize = size; 351 352 // first, find out where we have to continue in our iovecs 353 uint32 i = 0; 354 for (; i < count; i++) { 355 if (size <= vecs[i].iov_len) 356 break; 357 358 size -= vecs[i].iov_len; 359 } 360 361 size_t vecOffset = size; 362 363 for (; fileVecIndex < fileVecCount; fileVecIndex++) { 364 file_io_vec &fileVec = fileVecs[fileVecIndex]; 365 iovec tempVecs[8]; 366 uint32 tempCount = 1; 367 368 tempVecs[0].iov_base = (void *)((addr_t)vecs[i].iov_base + vecOffset); 369 370 size = min_c(vecs[i].iov_len - vecOffset, fileVec.length); 371 tempVecs[0].iov_len = size; 372 373 TRACE(("fill vec %ld, offset = %lu, size = %lu\n", i, vecOffset, size)); 374 375 if (size >= fileVec.length) 376 vecOffset += size; 377 else 378 vecOffset = 0; 379 380 while (size < fileVec.length && ++i < count) { 381 tempVecs[tempCount].iov_base = vecs[i].iov_base; 382 tempCount++; 383 384 // is this iovec larger than the file_io_vec? 385 if (vecs[i].iov_len + size > fileVec.length) { 386 size += tempVecs[tempCount].iov_len = vecOffset = fileVec.length - size; 387 break; 388 } 389 390 size += tempVecs[tempCount].iov_len = vecs[i].iov_len; 391 } 392 393 size_t bytes = size; 394 if (doWrite) 395 status = vfs_write_pages(ref->device, ref->cookie, fileVec.offset, tempVecs, tempCount, &bytes); 396 else 397 status = vfs_read_pages(ref->device, ref->cookie, fileVec.offset, tempVecs, tempCount, &bytes); 398 if (status < B_OK) 399 return status; 400 401 totalSize += size; 402 403 if (size != bytes) { 404 // there are no more bytes, let's bail out 405 *_numBytes = totalSize; 406 return B_OK; 407 } 408 } 409 410 return B_OK; 411 } 412 413 414 /** This function is called by read_into_cache() (and from there only) - it 415 * can only handle a certain amount of bytes, and read_into_cache() makes 416 * sure that it matches that criterion. 417 */ 418 419 static inline status_t 420 read_chunk_into_cache(file_cache_ref *ref, off_t offset, size_t size, 421 int32 pageOffset, addr_t buffer, size_t bufferSize) 422 { 423 TRACE(("read_chunk(offset = %Ld, size = %lu, pageOffset = %ld, buffer = %#lx, bufferSize = %lu\n", 424 offset, size, pageOffset, buffer, bufferSize)); 425 426 vm_cache_ref *cache = ref->cache; 427 428 iovec vecs[MAX_IO_VECS]; 429 int32 vecCount = 0; 430 431 vm_page *pages[MAX_IO_VECS]; 432 int32 pageIndex = 0; 433 434 // allocate pages for the cache and mark them busy 435 for (size_t pos = 0; pos < size; pos += B_PAGE_SIZE) { 436 vm_page *page = pages[pageIndex++] = vm_page_allocate_page(PAGE_STATE_FREE); 437 if (page == NULL) 438 panic("no more pages!"); 439 440 page->state = PAGE_STATE_BUSY; 441 442 vm_cache_insert_page(cache, page, offset + pos); 443 444 addr_t virtualAddress; 445 if (vm_get_physical_page(page->ppn * B_PAGE_SIZE, &virtualAddress, PHYSICAL_PAGE_CAN_WAIT) < B_OK) 446 panic("could not get physical page"); 447 448 add_to_iovec(vecs, vecCount, MAX_IO_VECS, virtualAddress, B_PAGE_SIZE); 449 // ToDo: check if the array is large enough! 450 } 451 452 mutex_unlock(&cache->lock); 453 454 // read file into reserved pages 455 status_t status = pages_io(ref, offset, vecs, vecCount, &size, false); 456 if (status < B_OK) { 457 // ToDo: remove allocated pages... 458 panic("file_cache: remove allocated pages! read pages failed: %s\n", strerror(status)); 459 mutex_lock(&cache->lock); 460 return status; 461 } 462 463 // copy the pages and unmap them again 464 465 for (int32 i = 0; i < vecCount; i++) { 466 addr_t base = (addr_t)vecs[i].iov_base; 467 size_t size = vecs[i].iov_len; 468 469 // copy to user buffer if necessary 470 if (bufferSize != 0) { 471 size_t bytes = min_c(bufferSize, size - pageOffset); 472 473 user_memcpy((void *)buffer, (void *)(base + pageOffset), bytes); 474 buffer += bytes; 475 bufferSize -= bytes; 476 pageOffset = 0; 477 } 478 479 for (size_t pos = 0; pos < size; pos += B_PAGE_SIZE, base += B_PAGE_SIZE) 480 vm_put_physical_page(base); 481 } 482 483 mutex_lock(&cache->lock); 484 485 // make the pages accessible in the cache 486 for (int32 i = pageIndex; i-- > 0;) 487 pages[i]->state = PAGE_STATE_ACTIVE; 488 489 return B_OK; 490 } 491 492 493 /** This function reads \a size bytes directly from the file into the cache. 494 * If \a bufferSize does not equal zero, \a bufferSize bytes from the data 495 * read in are also copied to the provided \a buffer. 496 * This function always allocates all pages; it is the responsibility of the 497 * calling function to only ask for yet uncached ranges. 498 * The cache_ref lock must be hold when calling this function. 499 */ 500 501 static status_t 502 read_into_cache(file_cache_ref *ref, off_t offset, size_t size, addr_t buffer, size_t bufferSize) 503 { 504 TRACE(("read_from_cache: ref = %p, offset = %Ld, size = %lu, buffer = %p, bufferSize = %lu\n", 505 ref, offset, size, (void *)buffer, bufferSize)); 506 507 // do we have to read in anything at all? 508 if (size == 0) 509 return B_OK; 510 511 // make sure "offset" is page aligned - but also remember the page offset 512 int32 pageOffset = offset & (B_PAGE_SIZE - 1); 513 size = PAGE_ALIGN(size + pageOffset); 514 offset -= pageOffset; 515 516 while (true) { 517 size_t chunkSize = size; 518 if (chunkSize > (MAX_IO_VECS * B_PAGE_SIZE)) 519 chunkSize = MAX_IO_VECS * B_PAGE_SIZE; 520 521 status_t status = read_chunk_into_cache(ref, offset, chunkSize, pageOffset, 522 buffer, bufferSize); 523 if (status != B_OK) 524 return status; 525 526 if ((size -= chunkSize) == 0) 527 return B_OK; 528 529 if (chunkSize >= bufferSize) { 530 bufferSize = 0; 531 buffer = NULL; 532 } else { 533 bufferSize -= chunkSize - pageOffset; 534 buffer += chunkSize - pageOffset; 535 } 536 537 offset += chunkSize; 538 pageOffset = 0; 539 } 540 541 return B_OK; 542 } 543 544 545 /** Like read_chunk_into_cache() but writes data into the cache */ 546 547 static inline status_t 548 write_chunk_to_cache(file_cache_ref *ref, off_t offset, size_t size, 549 int32 pageOffset, addr_t buffer, size_t bufferSize) 550 { 551 iovec vecs[MAX_IO_VECS]; 552 int32 vecCount = 0; 553 vm_page *pages[MAX_IO_VECS]; 554 int32 pageIndex = 0; 555 status_t status = B_OK; 556 557 // ToDo: this should be settable somewhere 558 bool writeThrough = false; 559 560 // allocate pages for the cache and mark them busy 561 for (size_t pos = 0; pos < size; pos += B_PAGE_SIZE) { 562 // ToDo: if space is becoming tight, and this cache is already grown 563 // big - shouldn't we better steal the pages directly in that case? 564 // (a working set like approach for the file cache) 565 vm_page *page = pages[pageIndex++] = vm_page_allocate_page(PAGE_STATE_FREE); 566 page->state = PAGE_STATE_BUSY; 567 568 vm_cache_insert_page(ref->cache, page, offset + pos); 569 570 addr_t virtualAddress; 571 vm_get_physical_page(page->ppn * B_PAGE_SIZE, &virtualAddress, 572 PHYSICAL_PAGE_CAN_WAIT); 573 574 add_to_iovec(vecs, vecCount, MAX_IO_VECS, virtualAddress, B_PAGE_SIZE); 575 // ToDo: check if the array is large enough! 576 } 577 578 mutex_unlock(&ref->cache->lock); 579 580 // copy contents (and read in partially written pages first) 581 582 if (pageOffset != 0) { 583 // This is only a partial write, so we have to read the rest of the page 584 // from the file to have consistent data in the cache 585 iovec readVec = { vecs[0].iov_base, B_PAGE_SIZE }; 586 size_t bytesRead = B_PAGE_SIZE; 587 588 status = pages_io(ref, offset, &readVec, 1, &bytesRead, false); 589 // ToDo: handle errors for real! 590 if (status < B_OK) 591 panic("pages_io() failed!\n"); 592 } 593 594 addr_t lastPageOffset = (pageOffset + bufferSize) & (B_PAGE_SIZE - 1); 595 if (lastPageOffset != 0) { 596 // get the last page in the I/O vectors 597 addr_t last = (addr_t)vecs[vecCount - 1].iov_base 598 + vecs[vecCount - 1].iov_len - B_PAGE_SIZE; 599 600 if (offset + pageOffset + bufferSize == ref->cache->cache->virtual_size) { 601 // the space in the page after this write action needs to be cleaned 602 memset((void *)(last + lastPageOffset), 0, B_PAGE_SIZE - lastPageOffset); 603 } else if (vecCount > 1) { 604 // the end of this write does not happen on a page boundary, so we 605 // need to fetch the last page before we can update it 606 iovec readVec = { (void *)last, B_PAGE_SIZE }; 607 size_t bytesRead = B_PAGE_SIZE; 608 609 status = pages_io(ref, offset + size - B_PAGE_SIZE, &readVec, 1, 610 &bytesRead, false); 611 // ToDo: handle errors for real! 612 if (status < B_OK) 613 panic("pages_io() failed!\n"); 614 } 615 } 616 617 for (int32 i = 0; i < vecCount; i++) { 618 addr_t base = (addr_t)vecs[i].iov_base; 619 size_t bytes = min_c(bufferSize, size_t(vecs[i].iov_len - pageOffset)); 620 621 // copy data from user buffer 622 user_memcpy((void *)(base + pageOffset), (void *)buffer, bytes); 623 624 bufferSize -= bytes; 625 if (bufferSize == 0) 626 break; 627 628 buffer += bytes; 629 pageOffset = 0; 630 } 631 632 if (writeThrough) { 633 // write cached pages back to the file if we were asked to do that 634 status_t status = pages_io(ref, offset, vecs, vecCount, &size, true); 635 if (status < B_OK) { 636 // ToDo: remove allocated pages, ...? 637 panic("file_cache: remove allocated pages! write pages failed: %s\n", 638 strerror(status)); 639 } 640 } 641 642 mutex_lock(&ref->cache->lock); 643 644 // unmap the pages again 645 646 for (int32 i = 0; i < vecCount; i++) { 647 addr_t base = (addr_t)vecs[i].iov_base; 648 size_t size = vecs[i].iov_len; 649 for (size_t pos = 0; pos < size; pos += B_PAGE_SIZE, base += B_PAGE_SIZE) 650 vm_put_physical_page(base); 651 } 652 653 // make the pages accessible in the cache 654 for (int32 i = pageIndex; i-- > 0;) { 655 if (writeThrough) 656 pages[i]->state = PAGE_STATE_ACTIVE; 657 else 658 vm_page_set_state(pages[i], PAGE_STATE_MODIFIED); 659 } 660 661 return status; 662 } 663 664 665 /** Like read_into_cache() but writes data into the cache. To preserve data consistency, 666 * it might also read pages into the cache, though, if only a partial page gets written. 667 * The cache_ref lock must be hold when calling this function. 668 */ 669 670 static status_t 671 write_to_cache(file_cache_ref *ref, off_t offset, size_t size, addr_t buffer, size_t bufferSize) 672 { 673 TRACE(("write_to_cache: ref = %p, offset = %Ld, size = %lu, buffer = %p, bufferSize = %lu\n", 674 ref, offset, size, (void *)buffer, bufferSize)); 675 676 // make sure "offset" is page aligned - but also remember the page offset 677 int32 pageOffset = offset & (B_PAGE_SIZE - 1); 678 size = PAGE_ALIGN(size + pageOffset); 679 offset -= pageOffset; 680 681 while (true) { 682 size_t chunkSize = size; 683 if (chunkSize > (MAX_IO_VECS * B_PAGE_SIZE)) 684 chunkSize = MAX_IO_VECS * B_PAGE_SIZE; 685 686 status_t status = write_chunk_to_cache(ref, offset, chunkSize, pageOffset, buffer, bufferSize); 687 if (status != B_OK) 688 return status; 689 690 if ((size -= chunkSize) == 0) 691 return B_OK; 692 693 if (chunkSize >= bufferSize) { 694 bufferSize = 0; 695 buffer = NULL; 696 } else { 697 bufferSize -= chunkSize - pageOffset; 698 buffer += chunkSize - pageOffset; 699 } 700 701 offset += chunkSize; 702 pageOffset = 0; 703 } 704 705 return B_OK; 706 } 707 708 709 static status_t 710 cache_io(void *_cacheRef, off_t offset, addr_t buffer, size_t *_size, bool doWrite) 711 { 712 if (_cacheRef == NULL) 713 panic("cache_io() called with NULL ref!\n"); 714 715 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 716 vm_cache_ref *cache = ref->cache; 717 off_t fileSize = cache->cache->virtual_size; 718 719 TRACE(("cache_io(ref = %p, offset = %Ld, buffer = %p, size = %lu, %s)\n", 720 ref, offset, (void *)buffer, *_size, doWrite ? "write" : "read")); 721 722 // out of bounds access? 723 if (offset >= fileSize || offset < 0) { 724 *_size = 0; 725 return B_OK; 726 } 727 728 int32 pageOffset = offset & (B_PAGE_SIZE - 1); 729 size_t size = *_size; 730 offset -= pageOffset; 731 732 if (offset + pageOffset + size > fileSize) { 733 // adapt size to be within the file's offsets 734 size = fileSize - pageOffset - offset; 735 *_size = size; 736 } 737 738 // "offset" and "lastOffset" are always aligned to B_PAGE_SIZE, 739 // the "last*" variables always point to the end of the last 740 // satisfied request part 741 742 size_t bytesLeft = size, lastLeft = size; 743 int32 lastPageOffset = pageOffset; 744 addr_t lastBuffer = buffer; 745 off_t lastOffset = offset; 746 747 mutex_lock(&cache->lock); 748 749 for (; bytesLeft > 0; offset += B_PAGE_SIZE) { 750 // check if this page is already in memory 751 addr_t virtualAddress; 752 restart: 753 vm_page *page = vm_cache_lookup_page(cache, offset); 754 if (page != NULL && page->state == PAGE_STATE_BUSY) { 755 // ToDo: don't wait forever! 756 mutex_unlock(&cache->lock); 757 snooze(20000); 758 mutex_lock(&cache->lock); 759 goto restart; 760 } 761 762 size_t bytesInPage = min_c(size_t(B_PAGE_SIZE - pageOffset), bytesLeft); 763 764 TRACE(("lookup page from offset %Ld: %p, size = %lu, pageOffset = %lu\n", offset, page, bytesLeft, pageOffset)); 765 if (page != NULL 766 && vm_get_physical_page(page->ppn * B_PAGE_SIZE, 767 &virtualAddress, PHYSICAL_PAGE_CAN_WAIT) == B_OK) { 768 // it is, so let's satisfy the first part of the request, if we have to 769 if (lastBuffer != buffer) { 770 size_t requestSize = buffer - lastBuffer; 771 status_t status; 772 if (doWrite) { 773 status = write_to_cache(ref, lastOffset + lastPageOffset, 774 requestSize, lastBuffer, requestSize); 775 } else { 776 status = read_into_cache(ref, lastOffset + lastPageOffset, 777 requestSize, lastBuffer, requestSize); 778 } 779 if (status != B_OK) { 780 vm_put_physical_page(virtualAddress); 781 mutex_unlock(&cache->lock); 782 return B_IO_ERROR; 783 } 784 } 785 786 // and copy the contents of the page already in memory 787 if (doWrite) { 788 user_memcpy((void *)(virtualAddress + pageOffset), (void *)buffer, bytesInPage); 789 790 // make sure the page is in the modified list 791 if (page->state != PAGE_STATE_MODIFIED) 792 vm_page_set_state(page, PAGE_STATE_MODIFIED); 793 } else 794 user_memcpy((void *)buffer, (void *)(virtualAddress + pageOffset), bytesInPage); 795 796 vm_put_physical_page(virtualAddress); 797 798 if (bytesLeft <= bytesInPage) { 799 // we've read the last page, so we're done! 800 mutex_unlock(&cache->lock); 801 return B_OK; 802 } 803 804 // prepare a potential gap request 805 lastBuffer = buffer + bytesInPage; 806 lastLeft = bytesLeft - bytesInPage; 807 lastOffset = offset + B_PAGE_SIZE; 808 lastPageOffset = 0; 809 } 810 811 if (bytesLeft <= bytesInPage) 812 break; 813 814 buffer += bytesInPage; 815 bytesLeft -= bytesInPage; 816 pageOffset = 0; 817 } 818 819 // fill the last remaining bytes of the request (either write or read) 820 821 status_t status; 822 if (doWrite) 823 status = write_to_cache(ref, lastOffset + lastPageOffset, lastLeft, lastBuffer, lastLeft); 824 else 825 status = read_into_cache(ref, lastOffset + lastPageOffset, lastLeft, lastBuffer, lastLeft); 826 827 mutex_unlock(&cache->lock); 828 return status; 829 } 830 831 832 static status_t 833 file_cache_control(const char *subsystem, uint32 function, void *buffer, size_t bufferSize) 834 { 835 switch (function) { 836 case CACHE_CLEAR: 837 // ToDo: clear the cache 838 dprintf("cache_control: clear cache!\n"); 839 break; 840 case CACHE_SET_MODULE: 841 { 842 cache_module_info *module = sCacheModule; 843 844 // unset previous module 845 846 if (sCacheModule != NULL) { 847 sCacheModule = NULL; 848 snooze(100000); // 0.1 secs 849 put_module(module->info.name); 850 } 851 852 // get new module, if any 853 854 if (buffer == NULL) 855 break; 856 857 char name[B_FILE_NAME_LENGTH]; 858 if (!IS_USER_ADDRESS(buffer) 859 || user_strlcpy(name, (char *)buffer, B_FILE_NAME_LENGTH) < B_OK) 860 return B_BAD_ADDRESS; 861 862 if (strncmp(name, CACHE_MODULES_NAME, strlen(CACHE_MODULES_NAME))) 863 return B_BAD_VALUE; 864 865 dprintf("cache_control: set module %s!\n", name); 866 867 if (get_module(name, (module_info **)&module) == B_OK) 868 sCacheModule = module; 869 break; 870 } 871 } 872 873 return B_OK; 874 } 875 876 877 // #pragma mark - 878 // kernel public API 879 880 881 extern "C" void 882 cache_prefetch_vnode(void *vnode, off_t offset, size_t size) 883 { 884 vm_cache_ref *cache; 885 if (vfs_get_vnode_cache(vnode, &cache, false) != B_OK) 886 return; 887 888 file_cache_ref *ref = (struct file_cache_ref *)((vnode_store *)cache->cache->store)->file_cache_ref; 889 off_t fileSize = cache->cache->virtual_size; 890 891 if (size > fileSize) 892 size = fileSize; 893 894 // we never fetch more than 4 MB at once 895 if (size > 4 * 1024 * 1024) 896 size = 4 * 1024 * 1024; 897 898 size_t bytesLeft = size, lastLeft = size; 899 off_t lastOffset = offset; 900 size_t lastSize = 0; 901 902 mutex_lock(&cache->lock); 903 904 for (; bytesLeft > 0; offset += B_PAGE_SIZE) { 905 // check if this page is already in memory 906 addr_t virtualAddress; 907 restart: 908 vm_page *page = vm_cache_lookup_page(cache, offset); 909 if (page != NULL) { 910 // it is, so let's satisfy in the first part of the request 911 if (lastOffset < offset) { 912 size_t requestSize = offset - lastOffset; 913 read_into_cache(ref, lastOffset, requestSize, NULL, 0); 914 } 915 916 if (bytesLeft <= B_PAGE_SIZE) { 917 // we've read the last page, so we're done! 918 goto out; 919 } 920 921 // prepare a potential gap request 922 lastOffset = offset + B_PAGE_SIZE; 923 lastLeft = bytesLeft - B_PAGE_SIZE; 924 } 925 926 if (bytesLeft <= B_PAGE_SIZE) 927 break; 928 929 bytesLeft -= B_PAGE_SIZE; 930 } 931 932 // read in the last part 933 read_into_cache(ref, lastOffset, lastLeft, NULL, 0); 934 935 out: 936 mutex_unlock(&cache->lock); 937 } 938 939 940 extern "C" void 941 cache_prefetch(mount_id mountID, vnode_id vnodeID, off_t offset, size_t size) 942 { 943 void *vnode; 944 945 // ToDo: schedule prefetch 946 947 TRACE(("cache_prefetch(vnode %ld:%Ld)\n", mountID, vnodeID)); 948 949 // get the vnode for the object, this also grabs a ref to it 950 if (vfs_get_vnode(mountID, vnodeID, &vnode) != B_OK) 951 return; 952 953 cache_prefetch_vnode(vnode, offset, size); 954 vfs_put_vnode(vnode); 955 } 956 957 958 extern "C" void 959 cache_node_opened(void *vnode, int32 fdType, vm_cache_ref *cache, mount_id mountID, 960 vnode_id parentID, vnode_id vnodeID, const char *name) 961 { 962 if (sCacheModule == NULL || sCacheModule->node_opened == NULL) 963 return; 964 965 off_t size = -1; 966 if (cache != NULL) { 967 file_cache_ref *ref = (file_cache_ref *)((vnode_store *)cache->cache->store)->file_cache_ref; 968 if (ref != NULL) 969 size = ref->cache->cache->virtual_size; 970 } 971 972 sCacheModule->node_opened(vnode, fdType, mountID, parentID, vnodeID, name, size); 973 } 974 975 976 extern "C" void 977 cache_node_closed(void *vnode, int32 fdType, vm_cache_ref *cache, 978 mount_id mountID, vnode_id vnodeID) 979 { 980 if (sCacheModule == NULL || sCacheModule->node_closed == NULL) 981 return; 982 983 int32 accessType = 0; 984 if (cache != NULL) { 985 // ToDo: set accessType 986 } 987 988 sCacheModule->node_closed(vnode, fdType, mountID, vnodeID, accessType); 989 } 990 991 992 extern "C" void 993 cache_node_launched(size_t argCount, char * const *args) 994 { 995 if (sCacheModule == NULL || sCacheModule->node_launched == NULL) 996 return; 997 998 sCacheModule->node_launched(argCount, args); 999 } 1000 1001 1002 extern "C" status_t 1003 file_cache_init_post_boot_device(void) 1004 { 1005 // ToDo: get cache module out of driver settings 1006 1007 if (get_module("file_cache/launch_speedup/v1", (module_info **)&sCacheModule) == B_OK) { 1008 dprintf("** opened launch speedup: %Ld\n", system_time()); 1009 } else 1010 dprintf("** could not open launch speedup!\n"); 1011 1012 return B_OK; 1013 } 1014 1015 1016 extern "C" status_t 1017 file_cache_init(void) 1018 { 1019 register_generic_syscall(CACHE_SYSCALLS, file_cache_control, 1, 0); 1020 return B_OK; 1021 } 1022 1023 1024 // #pragma mark - 1025 // public FS API 1026 1027 1028 extern "C" void * 1029 file_cache_create(mount_id mountID, vnode_id vnodeID, off_t size, int fd) 1030 { 1031 TRACE(("file_cache_create(mountID = %ld, vnodeID = %Ld, size = %Ld, fd = %d)\n", mountID, vnodeID, size, fd)); 1032 1033 file_cache_ref *ref = new file_cache_ref; 1034 if (ref == NULL) 1035 return NULL; 1036 1037 // ToDo: delay vm_cache/vm_cache_ref creation until data is 1038 // requested/written for the first time? Listing lots of 1039 // files in Tracker (and elsewhere) could be slowed down. 1040 // Since the file_cache_ref itself doesn't have a lock, 1041 // we would need to "rent" one during construction, possibly 1042 // the vnode lock, maybe a dedicated one. 1043 // As there shouldn't be too much contention, we could also 1044 // use atomic_test_and_set(), and free the resources again 1045 // when that fails... 1046 1047 // get the vnode of the underlying device 1048 if (vfs_get_vnode_from_fd(fd, true, &ref->device) != B_OK) 1049 goto err1; 1050 1051 // we also need the cookie of the underlying device to properly access it 1052 if (vfs_get_cookie_from_fd(fd, &ref->cookie) != B_OK) 1053 goto err2; 1054 1055 // get the vnode for the object (note, this does not grab a reference to the node) 1056 if (vfs_lookup_vnode(mountID, vnodeID, &ref->vnode) != B_OK) 1057 goto err2; 1058 1059 if (vfs_get_vnode_cache(ref->vnode, &ref->cache, true) != B_OK) 1060 goto err3; 1061 1062 ref->cache->cache->virtual_size = size; 1063 ((vnode_store *)ref->cache->cache->store)->file_cache_ref = ref; 1064 return ref; 1065 1066 err3: 1067 vfs_put_vnode(ref->vnode); 1068 err2: 1069 vfs_put_vnode(ref->device); 1070 err1: 1071 delete ref; 1072 return NULL; 1073 } 1074 1075 1076 extern "C" void 1077 file_cache_delete(void *_cacheRef) 1078 { 1079 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1080 1081 if (ref == NULL) 1082 return; 1083 1084 TRACE(("file_cache_delete(ref = %p)\n", ref)); 1085 1086 vfs_put_vnode(ref->device); 1087 delete ref; 1088 } 1089 1090 1091 extern "C" status_t 1092 file_cache_set_size(void *_cacheRef, off_t size) 1093 { 1094 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1095 1096 TRACE(("file_cache_set_size(ref = %p, size = %Ld)\n", ref, size)); 1097 1098 if (ref == NULL) 1099 return B_OK; 1100 1101 file_cache_invalidate_file_map(_cacheRef, 0, size); 1102 // ToDo: make this better (we would only need to extend or shrink the map) 1103 1104 mutex_lock(&ref->cache->lock); 1105 status_t status = vm_cache_resize(ref->cache, size); 1106 mutex_unlock(&ref->cache->lock); 1107 1108 return status; 1109 } 1110 1111 1112 extern "C" status_t 1113 file_cache_sync(void *_cacheRef) 1114 { 1115 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1116 if (ref == NULL) 1117 return B_BAD_VALUE; 1118 1119 return vm_cache_write_modified(ref->cache); 1120 } 1121 1122 1123 extern "C" status_t 1124 file_cache_read_pages(void *_cacheRef, off_t offset, const iovec *vecs, size_t count, size_t *_numBytes) 1125 { 1126 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1127 1128 return pages_io(ref, offset, vecs, count, _numBytes, false); 1129 } 1130 1131 1132 extern "C" status_t 1133 file_cache_write_pages(void *_cacheRef, off_t offset, const iovec *vecs, size_t count, size_t *_numBytes) 1134 { 1135 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1136 1137 status_t status = pages_io(ref, offset, vecs, count, _numBytes, true); 1138 TRACE(("file_cache_write_pages(ref = %p, offset = %Ld, vecs = %p, count = %lu, bytes = %lu) = %ld\n", 1139 ref, offset, vecs, count, *_numBytes, status)); 1140 1141 return status; 1142 } 1143 1144 1145 extern "C" status_t 1146 file_cache_read(void *_cacheRef, off_t offset, void *bufferBase, size_t *_size) 1147 { 1148 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1149 1150 TRACE(("file_cache_read(ref = %p, offset = %Ld, buffer = %p, size = %lu)\n", 1151 ref, offset, bufferBase, *_size)); 1152 1153 return cache_io(ref, offset, (addr_t)bufferBase, _size, false); 1154 } 1155 1156 1157 extern "C" status_t 1158 file_cache_write(void *_cacheRef, off_t offset, const void *buffer, size_t *_size) 1159 { 1160 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1161 1162 status_t status = cache_io(ref, offset, (addr_t)const_cast<void *>(buffer), _size, true); 1163 TRACE(("file_cache_write(ref = %p, offset = %Ld, buffer = %p, size = %lu) = %ld\n", 1164 ref, offset, buffer, *_size, status)); 1165 1166 return status; 1167 } 1168 1169 1170 extern "C" status_t 1171 file_cache_invalidate_file_map(void *_cacheRef, off_t offset, off_t size) 1172 { 1173 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1174 1175 // ToDo: honour offset/size parameters 1176 1177 TRACE(("file_cache_invalidate_file_map(offset = %Ld, size = %Ld)\n", offset, size)); 1178 mutex_lock(&ref->cache->lock); 1179 ref->map.Free(); 1180 mutex_unlock(&ref->cache->lock); 1181 return B_OK; 1182 } 1183