1 /* 2 * Copyright 2004-2006, Axel Dörfler, axeld@pinc-software.de. All rights reserved. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include "vnode_store.h" 8 9 #include <KernelExport.h> 10 #include <fs_cache.h> 11 12 #include <util/kernel_cpp.h> 13 #include <file_cache.h> 14 #include <vfs.h> 15 #include <vm.h> 16 #include <vm_page.h> 17 #include <vm_cache.h> 18 #include <generic_syscall.h> 19 20 #include <unistd.h> 21 #include <stdlib.h> 22 #include <string.h> 23 24 25 //#define TRACE_FILE_CACHE 26 #ifdef TRACE_FILE_CACHE 27 # define TRACE(x) dprintf x 28 #else 29 # define TRACE(x) ; 30 #endif 31 32 // maximum number of iovecs per request 33 #define MAX_IO_VECS 64 // 256 kB 34 #define MAX_FILE_IO_VECS 32 35 36 #define CACHED_FILE_EXTENTS 2 37 // must be smaller than MAX_FILE_IO_VECS 38 // ToDo: find out how much of these are typically used 39 40 struct file_extent { 41 off_t offset; 42 file_io_vec disk; 43 }; 44 45 struct file_map { 46 file_map(); 47 ~file_map(); 48 49 file_extent *operator[](uint32 index); 50 file_extent *ExtentAt(uint32 index); 51 status_t Add(file_io_vec *vecs, size_t vecCount, off_t &lastOffset); 52 void Free(); 53 54 union { 55 file_extent direct[CACHED_FILE_EXTENTS]; 56 file_extent *array; 57 }; 58 size_t count; 59 }; 60 61 struct file_cache_ref { 62 vm_cache_ref *cache; 63 void *vnode; 64 void *device; 65 void *cookie; 66 file_map map; 67 }; 68 69 70 static struct cache_module_info *sCacheModule; 71 72 73 file_map::file_map() 74 { 75 array = NULL; 76 count = 0; 77 } 78 79 80 file_map::~file_map() 81 { 82 Free(); 83 } 84 85 86 file_extent * 87 file_map::operator[](uint32 index) 88 { 89 return ExtentAt(index); 90 } 91 92 93 file_extent * 94 file_map::ExtentAt(uint32 index) 95 { 96 if (index >= count) 97 return NULL; 98 99 if (count > CACHED_FILE_EXTENTS) 100 return &array[index]; 101 102 return &direct[index]; 103 } 104 105 106 status_t 107 file_map::Add(file_io_vec *vecs, size_t vecCount, off_t &lastOffset) 108 { 109 TRACE(("file_map::Add(vecCount = %ld)\n", vecCount)); 110 111 off_t offset = 0; 112 113 if (vecCount <= CACHED_FILE_EXTENTS && count == 0) { 114 // just use the reserved area in the file_cache_ref structure 115 } else { 116 // TODO: once we can invalidate only parts of the file map, 117 // we might need to copy the previously cached file extends 118 // from the direct range 119 file_extent *newMap = (file_extent *)realloc(array, 120 (count + vecCount) * sizeof(file_extent)); 121 if (newMap == NULL) 122 return B_NO_MEMORY; 123 124 array = newMap; 125 126 if (count != 0) { 127 file_extent *extent = ExtentAt(count - 1); 128 offset = extent->offset + extent->disk.length; 129 } 130 } 131 132 int32 start = count; 133 count += vecCount; 134 135 for (uint32 i = 0; i < vecCount; i++) { 136 file_extent *extent = ExtentAt(start + i); 137 138 extent->offset = offset; 139 extent->disk = vecs[i]; 140 141 offset += extent->disk.length; 142 } 143 144 #ifdef TRACE_FILE_CACHE 145 for (uint32 i = 0; i < count; i++) { 146 file_extent *extent = ExtentAt(i); 147 dprintf("[%ld] extend offset %Ld, disk offset %Ld, length %Ld\n", 148 i, extent->offset, extent->disk.offset, extent->disk.length); 149 } 150 #endif 151 152 lastOffset = offset; 153 return B_OK; 154 } 155 156 157 void 158 file_map::Free() 159 { 160 if (count > CACHED_FILE_EXTENTS) 161 free(array); 162 163 array = NULL; 164 count = 0; 165 } 166 167 168 // #pragma mark - 169 170 171 static void 172 add_to_iovec(iovec *vecs, int32 &index, int32 max, addr_t address, size_t size) 173 { 174 if (index > 0 && (addr_t)vecs[index - 1].iov_base + vecs[index - 1].iov_len == address) { 175 // the iovec can be combined with the previous one 176 vecs[index - 1].iov_len += size; 177 return; 178 } 179 180 if (index == max) 181 panic("no more space for iovecs!"); 182 183 // we need to start a new iovec 184 vecs[index].iov_base = (void *)address; 185 vecs[index].iov_len = size; 186 index++; 187 } 188 189 190 static file_extent * 191 find_file_extent(file_cache_ref *ref, off_t offset, uint32 *_index) 192 { 193 // ToDo: do binary search 194 195 for (uint32 index = 0; index < ref->map.count; index++) { 196 file_extent *extent = ref->map[index]; 197 198 if (extent->offset <= offset 199 && extent->offset + extent->disk.length > offset) { 200 if (_index) 201 *_index = index; 202 return extent; 203 } 204 } 205 206 return NULL; 207 } 208 209 210 static status_t 211 get_file_map(file_cache_ref *ref, off_t offset, size_t size, 212 file_io_vec *vecs, size_t *_count) 213 { 214 size_t maxVecs = *_count; 215 status_t status = B_OK; 216 217 if (ref->map.count == 0) { 218 // we don't yet have the map of this file, so let's grab it 219 // (ordered by offset, so that we can do a binary search on them) 220 221 mutex_lock(&ref->cache->lock); 222 223 // the file map could have been requested in the mean time 224 if (ref->map.count == 0) { 225 size_t vecCount = maxVecs; 226 off_t mapOffset = 0; 227 228 while (true) { 229 status = vfs_get_file_map(ref->vnode, mapOffset, ~0UL, vecs, &vecCount); 230 if (status < B_OK && status != B_BUFFER_OVERFLOW) { 231 mutex_unlock(&ref->cache->lock); 232 return status; 233 } 234 235 status_t addStatus = ref->map.Add(vecs, vecCount, mapOffset); 236 if (addStatus != B_OK) { 237 // only clobber the status in case of failure 238 status = addStatus; 239 } 240 241 if (status != B_BUFFER_OVERFLOW) 242 break; 243 244 // when we are here, the map has been stored in the array, and 245 // the array size was still too small to cover the whole file 246 vecCount = maxVecs; 247 } 248 } 249 250 mutex_unlock(&ref->cache->lock); 251 } 252 253 if (status != B_OK) { 254 // We must invalidate the (part of the) map we already 255 // have, as we cannot know if it's complete or not 256 ref->map.Free(); 257 return status; 258 } 259 260 // We now have cached the map of this file, we now need to 261 // translate it for the requested access. 262 263 uint32 index; 264 file_extent *fileExtent = find_file_extent(ref, offset, &index); 265 if (fileExtent == NULL) { 266 // access outside file bounds? But that's not our problem 267 *_count = 0; 268 return B_OK; 269 } 270 271 offset -= fileExtent->offset; 272 vecs[0].offset = fileExtent->disk.offset + offset; 273 vecs[0].length = fileExtent->disk.length - offset; 274 275 if (vecs[0].length >= size || index >= ref->map.count - 1) { 276 *_count = 1; 277 return B_OK; 278 } 279 280 // copy the rest of the vecs 281 282 size -= vecs[0].length; 283 284 for (index = 1; index < ref->map.count;) { 285 fileExtent++; 286 287 vecs[index] = fileExtent->disk; 288 index++; 289 290 if (index >= maxVecs) { 291 *_count = index; 292 return B_BUFFER_OVERFLOW; 293 } 294 295 if (size <= fileExtent->disk.length) 296 break; 297 298 size -= fileExtent->disk.length; 299 } 300 301 *_count = index; 302 return B_OK; 303 } 304 305 306 static status_t 307 pages_io(file_cache_ref *ref, off_t offset, const iovec *vecs, size_t count, 308 size_t *_numBytes, bool doWrite) 309 { 310 TRACE(("pages_io: ref = %p, offset = %Ld, size = %lu, %s\n", ref, offset, 311 *_numBytes, doWrite ? "write" : "read")); 312 313 // translate the iovecs into direct device accesses 314 file_io_vec fileVecs[MAX_FILE_IO_VECS]; 315 size_t fileVecCount = MAX_FILE_IO_VECS; 316 size_t numBytes = *_numBytes; 317 318 status_t status = get_file_map(ref, offset, numBytes, fileVecs, &fileVecCount); 319 if (status < B_OK) { 320 TRACE(("get_file_map(offset = %Ld, numBytes = %lu) failed\n", offset, 321 numBytes)); 322 return status; 323 } 324 325 // ToDo: handle array overflow gracefully! 326 327 #ifdef TRACE_FILE_CACHE 328 dprintf("got %lu file vecs for %Ld:%lu:\n", fileVecCount, offset, numBytes); 329 for (size_t i = 0; i < fileVecCount; i++) 330 dprintf("[%lu] offset = %Ld, size = %Ld\n", i, fileVecs[i].offset, fileVecs[i].length); 331 #endif 332 333 if (fileVecCount == 0) { 334 // There are no file vecs at this offset, so we're obviously trying 335 // to access the file outside of its bounds 336 TRACE(("pages_io: access outside of vnode %p at offset %Ld\n", ref->vnode, offset)); 337 return B_BAD_VALUE; 338 } 339 340 uint32 fileVecIndex; 341 size_t size; 342 343 if (!doWrite) { 344 // now directly read the data from the device 345 // the first file_io_vec can be read directly 346 347 size = fileVecs[0].length; 348 if (size > numBytes) 349 size = numBytes; 350 351 status = vfs_read_pages(ref->device, ref->cookie, fileVecs[0].offset, vecs, 352 count, &size, false); 353 if (status < B_OK) 354 return status; 355 356 // ToDo: this is a work-around for buggy device drivers! 357 // When our own drivers honour the length, we can: 358 // a) also use this direct I/O for writes (otherwise, it would overwrite precious data) 359 // b) panic if the term below is true (at least for writes) 360 if (size > fileVecs[0].length) { 361 //dprintf("warning: device driver %p doesn't respect total length in read_pages() call!\n", ref->device); 362 size = fileVecs[0].length; 363 } 364 365 ASSERT(size <= fileVecs[0].length); 366 367 // If the file portion was contiguous, we're already done now 368 if (size == numBytes) 369 return B_OK; 370 371 // if we reached the end of the file, we can return as well 372 if (size != fileVecs[0].length) { 373 *_numBytes = size; 374 return B_OK; 375 } 376 377 fileVecIndex = 1; 378 } else { 379 fileVecIndex = 0; 380 size = 0; 381 } 382 383 // Too bad, let's process the rest of the file_io_vecs 384 385 size_t totalSize = size; 386 387 // first, find out where we have to continue in our iovecs 388 uint32 i = 0; 389 for (; i < count; i++) { 390 if (size <= vecs[i].iov_len) 391 break; 392 393 size -= vecs[i].iov_len; 394 } 395 396 size_t vecOffset = size; 397 398 for (; fileVecIndex < fileVecCount; fileVecIndex++) { 399 file_io_vec &fileVec = fileVecs[fileVecIndex]; 400 iovec tempVecs[8]; 401 uint32 tempCount = 1; 402 403 tempVecs[0].iov_base = (void *)((addr_t)vecs[i].iov_base + vecOffset); 404 405 size = min_c(vecs[i].iov_len - vecOffset, fileVec.length); 406 tempVecs[0].iov_len = size; 407 408 TRACE(("fill vec %ld, offset = %lu, size = %lu\n", i, vecOffset, size)); 409 410 if (size >= fileVec.length) 411 vecOffset += size; 412 else 413 vecOffset = 0; 414 415 while (size < fileVec.length && ++i < count) { 416 tempVecs[tempCount].iov_base = vecs[i].iov_base; 417 tempCount++; 418 419 // is this iovec larger than the file_io_vec? 420 if (vecs[i].iov_len + size > fileVec.length) { 421 size += tempVecs[tempCount].iov_len = vecOffset = fileVec.length - size; 422 break; 423 } 424 425 size += tempVecs[tempCount].iov_len = vecs[i].iov_len; 426 } 427 428 size_t bytes = size; 429 if (doWrite) { 430 status = vfs_write_pages(ref->device, ref->cookie, fileVec.offset, tempVecs, 431 tempCount, &bytes, false); 432 } else { 433 status = vfs_read_pages(ref->device, ref->cookie, fileVec.offset, tempVecs, 434 tempCount, &bytes, false); 435 } 436 if (status < B_OK) 437 return status; 438 439 totalSize += size; 440 441 if (size != bytes) { 442 // there are no more bytes, let's bail out 443 *_numBytes = totalSize; 444 return B_OK; 445 } 446 } 447 448 return B_OK; 449 } 450 451 452 /** This function is called by read_into_cache() (and from there only) - it 453 * can only handle a certain amount of bytes, and read_into_cache() makes 454 * sure that it matches that criterion. 455 */ 456 457 static inline status_t 458 read_chunk_into_cache(file_cache_ref *ref, off_t offset, size_t size, 459 int32 pageOffset, addr_t buffer, size_t bufferSize) 460 { 461 TRACE(("read_chunk(offset = %Ld, size = %lu, pageOffset = %ld, buffer = %#lx, bufferSize = %lu\n", 462 offset, size, pageOffset, buffer, bufferSize)); 463 464 vm_cache_ref *cache = ref->cache; 465 466 iovec vecs[MAX_IO_VECS]; 467 int32 vecCount = 0; 468 469 vm_page *pages[MAX_IO_VECS]; 470 int32 pageIndex = 0; 471 472 // allocate pages for the cache and mark them busy 473 for (size_t pos = 0; pos < size; pos += B_PAGE_SIZE) { 474 vm_page *page = pages[pageIndex++] = vm_page_allocate_page(PAGE_STATE_FREE); 475 if (page == NULL) 476 panic("no more pages!"); 477 478 page->state = PAGE_STATE_BUSY; 479 480 vm_cache_insert_page(cache, page, offset + pos); 481 482 addr_t virtualAddress; 483 if (vm_get_physical_page(page->physical_page_number * B_PAGE_SIZE, &virtualAddress, PHYSICAL_PAGE_CAN_WAIT) < B_OK) 484 panic("could not get physical page"); 485 486 add_to_iovec(vecs, vecCount, MAX_IO_VECS, virtualAddress, B_PAGE_SIZE); 487 // ToDo: check if the array is large enough! 488 } 489 490 mutex_unlock(&cache->lock); 491 492 // read file into reserved pages 493 status_t status = pages_io(ref, offset, vecs, vecCount, &size, false); 494 if (status < B_OK) { 495 // reading failed, free allocated pages 496 497 dprintf("file_cache: read pages failed: %s\n", strerror(status)); 498 499 for (int32 i = 0; i < vecCount; i++) { 500 addr_t base = (addr_t)vecs[i].iov_base; 501 size_t size = vecs[i].iov_len; 502 503 for (size_t pos = 0; pos < size; pos += B_PAGE_SIZE, base += B_PAGE_SIZE) 504 vm_put_physical_page(base); 505 } 506 507 mutex_lock(&cache->lock); 508 509 for (int32 i = 0; i < pageIndex; i++) { 510 vm_cache_remove_page(cache, pages[i]); 511 vm_page_set_state(pages[i], PAGE_STATE_FREE); 512 } 513 514 return status; 515 } 516 517 // copy the pages and unmap them again 518 519 for (int32 i = 0; i < vecCount; i++) { 520 addr_t base = (addr_t)vecs[i].iov_base; 521 size_t size = vecs[i].iov_len; 522 523 // copy to user buffer if necessary 524 if (bufferSize != 0) { 525 size_t bytes = min_c(bufferSize, size - pageOffset); 526 527 user_memcpy((void *)buffer, (void *)(base + pageOffset), bytes); 528 buffer += bytes; 529 bufferSize -= bytes; 530 pageOffset = 0; 531 } 532 533 for (size_t pos = 0; pos < size; pos += B_PAGE_SIZE, base += B_PAGE_SIZE) 534 vm_put_physical_page(base); 535 } 536 537 mutex_lock(&cache->lock); 538 539 // make the pages accessible in the cache 540 for (int32 i = pageIndex; i-- > 0;) 541 pages[i]->state = PAGE_STATE_ACTIVE; 542 543 return B_OK; 544 } 545 546 547 /** This function reads \a size bytes directly from the file into the cache. 548 * If \a bufferSize does not equal zero, \a bufferSize bytes from the data 549 * read in are also copied to the provided \a buffer. 550 * This function always allocates all pages; it is the responsibility of the 551 * calling function to only ask for yet uncached ranges. 552 * The cache_ref lock must be hold when calling this function. 553 */ 554 555 static status_t 556 read_into_cache(file_cache_ref *ref, off_t offset, size_t size, addr_t buffer, size_t bufferSize) 557 { 558 TRACE(("read_from_cache: ref = %p, offset = %Ld, size = %lu, buffer = %p, bufferSize = %lu\n", 559 ref, offset, size, (void *)buffer, bufferSize)); 560 561 // do we have to read in anything at all? 562 if (size == 0) 563 return B_OK; 564 565 // make sure "offset" is page aligned - but also remember the page offset 566 int32 pageOffset = offset & (B_PAGE_SIZE - 1); 567 size = PAGE_ALIGN(size + pageOffset); 568 offset -= pageOffset; 569 570 while (true) { 571 size_t chunkSize = size; 572 if (chunkSize > (MAX_IO_VECS * B_PAGE_SIZE)) 573 chunkSize = MAX_IO_VECS * B_PAGE_SIZE; 574 575 status_t status = read_chunk_into_cache(ref, offset, chunkSize, pageOffset, 576 buffer, bufferSize); 577 if (status != B_OK) 578 return status; 579 580 if ((size -= chunkSize) == 0) 581 return B_OK; 582 583 if (chunkSize >= bufferSize) { 584 bufferSize = 0; 585 buffer = NULL; 586 } else { 587 bufferSize -= chunkSize - pageOffset; 588 buffer += chunkSize - pageOffset; 589 } 590 591 offset += chunkSize; 592 pageOffset = 0; 593 } 594 595 return B_OK; 596 } 597 598 599 /** Like read_chunk_into_cache() but writes data into the cache */ 600 601 static inline status_t 602 write_chunk_to_cache(file_cache_ref *ref, off_t offset, size_t size, 603 int32 pageOffset, addr_t buffer, size_t bufferSize) 604 { 605 iovec vecs[MAX_IO_VECS]; 606 int32 vecCount = 0; 607 vm_page *pages[MAX_IO_VECS]; 608 int32 pageIndex = 0; 609 status_t status = B_OK; 610 611 // ToDo: this should be settable somewhere 612 bool writeThrough = false; 613 614 // allocate pages for the cache and mark them busy 615 for (size_t pos = 0; pos < size; pos += B_PAGE_SIZE) { 616 // ToDo: if space is becoming tight, and this cache is already grown 617 // big - shouldn't we better steal the pages directly in that case? 618 // (a working set like approach for the file cache) 619 vm_page *page = pages[pageIndex++] = vm_page_allocate_page(PAGE_STATE_FREE); 620 page->state = PAGE_STATE_BUSY; 621 622 vm_cache_insert_page(ref->cache, page, offset + pos); 623 624 addr_t virtualAddress; 625 vm_get_physical_page(page->physical_page_number * B_PAGE_SIZE, &virtualAddress, 626 PHYSICAL_PAGE_CAN_WAIT); 627 628 add_to_iovec(vecs, vecCount, MAX_IO_VECS, virtualAddress, B_PAGE_SIZE); 629 // ToDo: check if the array is large enough! 630 } 631 632 mutex_unlock(&ref->cache->lock); 633 634 // copy contents (and read in partially written pages first) 635 636 if (pageOffset != 0) { 637 // This is only a partial write, so we have to read the rest of the page 638 // from the file to have consistent data in the cache 639 iovec readVec = { vecs[0].iov_base, B_PAGE_SIZE }; 640 size_t bytesRead = B_PAGE_SIZE; 641 642 status = pages_io(ref, offset, &readVec, 1, &bytesRead, false); 643 // ToDo: handle errors for real! 644 if (status < B_OK) 645 panic("1. pages_io() failed: %s!\n", strerror(status)); 646 } 647 648 addr_t lastPageOffset = (pageOffset + bufferSize) & (B_PAGE_SIZE - 1); 649 if (lastPageOffset != 0) { 650 // get the last page in the I/O vectors 651 addr_t last = (addr_t)vecs[vecCount - 1].iov_base 652 + vecs[vecCount - 1].iov_len - B_PAGE_SIZE; 653 654 if (offset + pageOffset + bufferSize == ref->cache->cache->virtual_size) { 655 // the space in the page after this write action needs to be cleaned 656 memset((void *)(last + lastPageOffset), 0, B_PAGE_SIZE - lastPageOffset); 657 } else if (vecCount > 1) { 658 // the end of this write does not happen on a page boundary, so we 659 // need to fetch the last page before we can update it 660 iovec readVec = { (void *)last, B_PAGE_SIZE }; 661 size_t bytesRead = B_PAGE_SIZE; 662 663 status = pages_io(ref, offset + size - B_PAGE_SIZE, &readVec, 1, 664 &bytesRead, false); 665 // ToDo: handle errors for real! 666 if (status < B_OK) 667 panic("pages_io() failed: %s!\n", strerror(status)); 668 } 669 } 670 671 for (int32 i = 0; i < vecCount; i++) { 672 addr_t base = (addr_t)vecs[i].iov_base; 673 size_t bytes = min_c(bufferSize, size_t(vecs[i].iov_len - pageOffset)); 674 675 // copy data from user buffer 676 user_memcpy((void *)(base + pageOffset), (void *)buffer, bytes); 677 678 bufferSize -= bytes; 679 if (bufferSize == 0) 680 break; 681 682 buffer += bytes; 683 pageOffset = 0; 684 } 685 686 if (writeThrough) { 687 // write cached pages back to the file if we were asked to do that 688 status_t status = pages_io(ref, offset, vecs, vecCount, &size, true); 689 if (status < B_OK) { 690 // ToDo: remove allocated pages, ...? 691 panic("file_cache: remove allocated pages! write pages failed: %s\n", 692 strerror(status)); 693 } 694 } 695 696 mutex_lock(&ref->cache->lock); 697 698 // unmap the pages again 699 700 for (int32 i = 0; i < vecCount; i++) { 701 addr_t base = (addr_t)vecs[i].iov_base; 702 size_t size = vecs[i].iov_len; 703 for (size_t pos = 0; pos < size; pos += B_PAGE_SIZE, base += B_PAGE_SIZE) 704 vm_put_physical_page(base); 705 } 706 707 // make the pages accessible in the cache 708 for (int32 i = pageIndex; i-- > 0;) { 709 if (writeThrough) 710 pages[i]->state = PAGE_STATE_ACTIVE; 711 else 712 vm_page_set_state(pages[i], PAGE_STATE_MODIFIED); 713 } 714 715 return status; 716 } 717 718 719 /** Like read_into_cache() but writes data into the cache. To preserve data consistency, 720 * it might also read pages into the cache, though, if only a partial page gets written. 721 * The cache_ref lock must be hold when calling this function. 722 */ 723 724 static status_t 725 write_to_cache(file_cache_ref *ref, off_t offset, size_t size, addr_t buffer, size_t bufferSize) 726 { 727 TRACE(("write_to_cache: ref = %p, offset = %Ld, size = %lu, buffer = %p, bufferSize = %lu\n", 728 ref, offset, size, (void *)buffer, bufferSize)); 729 730 // make sure "offset" is page aligned - but also remember the page offset 731 int32 pageOffset = offset & (B_PAGE_SIZE - 1); 732 size = PAGE_ALIGN(size + pageOffset); 733 offset -= pageOffset; 734 735 while (true) { 736 size_t chunkSize = size; 737 if (chunkSize > (MAX_IO_VECS * B_PAGE_SIZE)) 738 chunkSize = MAX_IO_VECS * B_PAGE_SIZE; 739 740 status_t status = write_chunk_to_cache(ref, offset, chunkSize, pageOffset, buffer, bufferSize); 741 if (status != B_OK) 742 return status; 743 744 if ((size -= chunkSize) == 0) 745 return B_OK; 746 747 if (chunkSize >= bufferSize) { 748 bufferSize = 0; 749 buffer = NULL; 750 } else { 751 bufferSize -= chunkSize - pageOffset; 752 buffer += chunkSize - pageOffset; 753 } 754 755 offset += chunkSize; 756 pageOffset = 0; 757 } 758 759 return B_OK; 760 } 761 762 763 static status_t 764 satisfy_cache_io(file_cache_ref *ref, off_t offset, addr_t buffer, addr_t lastBuffer, 765 bool doWrite) 766 { 767 size_t requestSize = buffer - lastBuffer; 768 769 if (doWrite) 770 return write_to_cache(ref, offset, requestSize, lastBuffer, requestSize); 771 772 return read_into_cache(ref, offset, requestSize, lastBuffer, requestSize); 773 } 774 775 776 static status_t 777 cache_io(void *_cacheRef, off_t offset, addr_t buffer, size_t *_size, bool doWrite) 778 { 779 if (_cacheRef == NULL) 780 panic("cache_io() called with NULL ref!\n"); 781 782 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 783 vm_cache_ref *cache = ref->cache; 784 off_t fileSize = cache->cache->virtual_size; 785 786 TRACE(("cache_io(ref = %p, offset = %Ld, buffer = %p, size = %lu, %s)\n", 787 ref, offset, (void *)buffer, *_size, doWrite ? "write" : "read")); 788 789 // out of bounds access? 790 if (offset >= fileSize || offset < 0) { 791 *_size = 0; 792 return B_OK; 793 } 794 795 int32 pageOffset = offset & (B_PAGE_SIZE - 1); 796 size_t size = *_size; 797 offset -= pageOffset; 798 799 if (offset + pageOffset + size > fileSize) { 800 // adapt size to be within the file's offsets 801 size = fileSize - pageOffset - offset; 802 *_size = size; 803 } 804 805 // "offset" and "lastOffset" are always aligned to B_PAGE_SIZE, 806 // the "last*" variables always point to the end of the last 807 // satisfied request part 808 809 size_t bytesLeft = size, lastLeft = size; 810 int32 lastPageOffset = pageOffset; 811 addr_t lastBuffer = buffer; 812 off_t lastOffset = offset; 813 814 mutex_lock(&cache->lock); 815 816 for (; bytesLeft > 0; offset += B_PAGE_SIZE) { 817 // check if this page is already in memory 818 restart: 819 vm_page *page = vm_cache_lookup_page(cache, offset); 820 vm_page *dummyPage = NULL; 821 if (page != NULL) { 822 // The page is busy - since we need to unlock the cache sometime 823 // in the near future, we need to satisfy the request of the pages 824 // we didn't get yet (to make sure no one else interferes in the 825 // mean time). 826 status_t status = B_OK; 827 828 if (lastBuffer != buffer) { 829 status = satisfy_cache_io(ref, lastOffset + lastPageOffset, 830 buffer, lastBuffer, doWrite); 831 if (status == B_OK) { 832 lastBuffer = buffer; 833 lastLeft = bytesLeft; 834 lastOffset = offset; 835 lastPageOffset = 0; 836 pageOffset = 0; 837 } 838 } 839 840 if (status != B_OK) { 841 mutex_unlock(&cache->lock); 842 return status; 843 } 844 845 if (page->state == PAGE_STATE_BUSY) { 846 if (page->type == PAGE_TYPE_DUMMY) { 847 dummyPage = page; 848 page = vm_page_allocate_page(PAGE_STATE_FREE); 849 if (page == NULL) { 850 mutex_unlock(&cache->lock); 851 return B_NO_MEMORY; 852 } 853 } else { 854 mutex_unlock(&cache->lock); 855 // ToDo: don't wait forever! 856 snooze(20000); 857 mutex_lock(&cache->lock); 858 goto restart; 859 } 860 } 861 } 862 863 size_t bytesInPage = min_c(size_t(B_PAGE_SIZE - pageOffset), bytesLeft); 864 addr_t virtualAddress; 865 866 TRACE(("lookup page from offset %Ld: %p, size = %lu, pageOffset = %lu\n", offset, page, bytesLeft, pageOffset)); 867 if (page != NULL) { 868 vm_get_physical_page(page->physical_page_number * B_PAGE_SIZE, 869 &virtualAddress, PHYSICAL_PAGE_CAN_WAIT); 870 871 if (dummyPage != NULL && (!doWrite || bytesInPage != B_PAGE_SIZE)) { 872 // This page is currently in-use by someone else - since we cannot 873 // know if this someone does what we want, and if it even can do 874 // what we want (we may own a lock the blocks the other request), 875 // we need to handle this case specifically 876 iovec vec; 877 vec.iov_base = (void *)virtualAddress; 878 vec.iov_len = B_PAGE_SIZE; 879 880 size_t size = B_PAGE_SIZE; 881 status_t status = pages_io(ref, offset, &vec, 1, &size, false); 882 if (status != B_OK) { 883 vm_put_physical_page(virtualAddress); 884 mutex_unlock(&cache->lock); 885 return status; 886 } 887 } 888 889 // and copy the contents of the page already in memory 890 if (doWrite) { 891 user_memcpy((void *)(virtualAddress + pageOffset), (void *)buffer, bytesInPage); 892 893 // make sure the page is in the modified list 894 if (page->state != PAGE_STATE_MODIFIED) 895 vm_page_set_state(page, PAGE_STATE_MODIFIED); 896 } else 897 user_memcpy((void *)buffer, (void *)(virtualAddress + pageOffset), bytesInPage); 898 899 vm_put_physical_page(virtualAddress); 900 901 if (dummyPage != NULL) { 902 // check if the dummy page is still in place 903 restart_dummy_lookup: 904 vm_page *currentPage = vm_cache_lookup_page(cache, offset); 905 if (currentPage->state == PAGE_STATE_BUSY) { 906 if (currentPage->type == PAGE_TYPE_DUMMY) { 907 // we let the other party add our page 908 currentPage->queue_next = page; 909 } else { 910 mutex_unlock(&cache->lock); 911 // ToDo: don't wait forever! 912 snooze(20000); 913 mutex_lock(&cache->lock); 914 goto restart_dummy_lookup; 915 } 916 } else if (currentPage != NULL) { 917 // we need to copy our new page into the old one 918 addr_t destinationAddress; 919 vm_get_physical_page(page->physical_page_number * B_PAGE_SIZE, 920 &virtualAddress, PHYSICAL_PAGE_CAN_WAIT); 921 vm_get_physical_page(currentPage->physical_page_number * B_PAGE_SIZE, 922 &destinationAddress, PHYSICAL_PAGE_CAN_WAIT); 923 924 memcpy((void *)destinationAddress, (void *)virtualAddress, B_PAGE_SIZE); 925 926 vm_put_physical_page(destinationAddress); 927 vm_put_physical_page(virtualAddress); 928 929 vm_page_set_state(page, PAGE_STATE_FREE); 930 } else { 931 // there is no page in place anymore, we'll put ours into it 932 vm_cache_insert_page(cache, page, offset); 933 } 934 } 935 936 if (bytesLeft <= bytesInPage) { 937 // we've read the last page, so we're done! 938 mutex_unlock(&cache->lock); 939 return B_OK; 940 } 941 942 // prepare a potential gap request 943 lastBuffer = buffer + bytesInPage; 944 lastLeft = bytesLeft - bytesInPage; 945 lastOffset = offset + B_PAGE_SIZE; 946 lastPageOffset = 0; 947 } 948 949 if (bytesLeft <= bytesInPage) 950 break; 951 952 buffer += bytesInPage; 953 bytesLeft -= bytesInPage; 954 pageOffset = 0; 955 } 956 957 // fill the last remaining bytes of the request (either write or read) 958 959 status_t status; 960 if (doWrite) 961 status = write_to_cache(ref, lastOffset + lastPageOffset, lastLeft, lastBuffer, lastLeft); 962 else 963 status = read_into_cache(ref, lastOffset + lastPageOffset, lastLeft, lastBuffer, lastLeft); 964 965 mutex_unlock(&cache->lock); 966 return status; 967 } 968 969 970 static status_t 971 file_cache_control(const char *subsystem, uint32 function, void *buffer, size_t bufferSize) 972 { 973 switch (function) { 974 case CACHE_CLEAR: 975 // ToDo: clear the cache 976 dprintf("cache_control: clear cache!\n"); 977 return B_OK; 978 979 case CACHE_SET_MODULE: 980 { 981 cache_module_info *module = sCacheModule; 982 983 // unset previous module 984 985 if (sCacheModule != NULL) { 986 sCacheModule = NULL; 987 snooze(100000); // 0.1 secs 988 put_module(module->info.name); 989 } 990 991 // get new module, if any 992 993 if (buffer == NULL) 994 return B_OK; 995 996 char name[B_FILE_NAME_LENGTH]; 997 if (!IS_USER_ADDRESS(buffer) 998 || user_strlcpy(name, (char *)buffer, B_FILE_NAME_LENGTH) < B_OK) 999 return B_BAD_ADDRESS; 1000 1001 if (strncmp(name, CACHE_MODULES_NAME, strlen(CACHE_MODULES_NAME))) 1002 return B_BAD_VALUE; 1003 1004 dprintf("cache_control: set module %s!\n", name); 1005 1006 status_t status = get_module(name, (module_info **)&module); 1007 if (status == B_OK) 1008 sCacheModule = module; 1009 1010 return status; 1011 } 1012 } 1013 1014 return B_BAD_HANDLER; 1015 } 1016 1017 1018 // #pragma mark - 1019 // kernel public API 1020 1021 1022 extern "C" void 1023 cache_prefetch_vnode(void *vnode, off_t offset, size_t size) 1024 { 1025 vm_cache_ref *cache; 1026 if (vfs_get_vnode_cache(vnode, &cache, false) != B_OK) 1027 return; 1028 1029 file_cache_ref *ref = (struct file_cache_ref *)((vnode_store *)cache->cache->store)->file_cache_ref; 1030 off_t fileSize = cache->cache->virtual_size; 1031 1032 if (size > fileSize) 1033 size = fileSize; 1034 1035 // we never fetch more than 4 MB at once 1036 if (size > 4 * 1024 * 1024) 1037 size = 4 * 1024 * 1024; 1038 1039 size_t bytesLeft = size, lastLeft = size; 1040 off_t lastOffset = offset; 1041 size_t lastSize = 0; 1042 1043 mutex_lock(&cache->lock); 1044 1045 for (; bytesLeft > 0; offset += B_PAGE_SIZE) { 1046 // check if this page is already in memory 1047 addr_t virtualAddress; 1048 restart: 1049 vm_page *page = vm_cache_lookup_page(cache, offset); 1050 if (page != NULL) { 1051 // it is, so let's satisfy in the first part of the request 1052 if (lastOffset < offset) { 1053 size_t requestSize = offset - lastOffset; 1054 read_into_cache(ref, lastOffset, requestSize, NULL, 0); 1055 } 1056 1057 if (bytesLeft <= B_PAGE_SIZE) { 1058 // we've read the last page, so we're done! 1059 goto out; 1060 } 1061 1062 // prepare a potential gap request 1063 lastOffset = offset + B_PAGE_SIZE; 1064 lastLeft = bytesLeft - B_PAGE_SIZE; 1065 } 1066 1067 if (bytesLeft <= B_PAGE_SIZE) 1068 break; 1069 1070 bytesLeft -= B_PAGE_SIZE; 1071 } 1072 1073 // read in the last part 1074 read_into_cache(ref, lastOffset, lastLeft, NULL, 0); 1075 1076 out: 1077 mutex_unlock(&cache->lock); 1078 vm_cache_release_ref(cache); 1079 } 1080 1081 1082 extern "C" void 1083 cache_prefetch(mount_id mountID, vnode_id vnodeID, off_t offset, size_t size) 1084 { 1085 void *vnode; 1086 1087 // ToDo: schedule prefetch 1088 1089 TRACE(("cache_prefetch(vnode %ld:%Ld)\n", mountID, vnodeID)); 1090 1091 // get the vnode for the object, this also grabs a ref to it 1092 if (vfs_get_vnode(mountID, vnodeID, &vnode) != B_OK) 1093 return; 1094 1095 cache_prefetch_vnode(vnode, offset, size); 1096 vfs_put_vnode(vnode); 1097 } 1098 1099 1100 extern "C" void 1101 cache_node_opened(void *vnode, int32 fdType, vm_cache_ref *cache, mount_id mountID, 1102 vnode_id parentID, vnode_id vnodeID, const char *name) 1103 { 1104 if (sCacheModule == NULL || sCacheModule->node_opened == NULL) 1105 return; 1106 1107 off_t size = -1; 1108 if (cache != NULL) { 1109 file_cache_ref *ref = (file_cache_ref *)((vnode_store *)cache->cache->store)->file_cache_ref; 1110 if (ref != NULL) 1111 size = ref->cache->cache->virtual_size; 1112 } 1113 1114 sCacheModule->node_opened(vnode, fdType, mountID, parentID, vnodeID, name, size); 1115 } 1116 1117 1118 extern "C" void 1119 cache_node_closed(void *vnode, int32 fdType, vm_cache_ref *cache, 1120 mount_id mountID, vnode_id vnodeID) 1121 { 1122 if (sCacheModule == NULL || sCacheModule->node_closed == NULL) 1123 return; 1124 1125 int32 accessType = 0; 1126 if (cache != NULL) { 1127 // ToDo: set accessType 1128 } 1129 1130 sCacheModule->node_closed(vnode, fdType, mountID, vnodeID, accessType); 1131 } 1132 1133 1134 extern "C" void 1135 cache_node_launched(size_t argCount, char * const *args) 1136 { 1137 if (sCacheModule == NULL || sCacheModule->node_launched == NULL) 1138 return; 1139 1140 sCacheModule->node_launched(argCount, args); 1141 } 1142 1143 1144 extern "C" status_t 1145 file_cache_init_post_boot_device(void) 1146 { 1147 // ToDo: get cache module out of driver settings 1148 1149 if (get_module("file_cache/launch_speedup/v1", (module_info **)&sCacheModule) == B_OK) { 1150 dprintf("** opened launch speedup: %Ld\n", system_time()); 1151 } else 1152 dprintf("** could not open launch speedup!\n"); 1153 1154 return B_OK; 1155 } 1156 1157 1158 extern "C" status_t 1159 file_cache_init(void) 1160 { 1161 register_generic_syscall(CACHE_SYSCALLS, file_cache_control, 1, 0); 1162 return B_OK; 1163 } 1164 1165 1166 // #pragma mark - 1167 // public FS API 1168 1169 1170 extern "C" void * 1171 file_cache_create(mount_id mountID, vnode_id vnodeID, off_t size, int fd) 1172 { 1173 TRACE(("file_cache_create(mountID = %ld, vnodeID = %Ld, size = %Ld, fd = %d)\n", mountID, vnodeID, size, fd)); 1174 1175 file_cache_ref *ref = new file_cache_ref; 1176 if (ref == NULL) 1177 return NULL; 1178 1179 // TODO: delay vm_cache/vm_cache_ref creation until data is 1180 // requested/written for the first time? Listing lots of 1181 // files in Tracker (and elsewhere) could be slowed down. 1182 // Since the file_cache_ref itself doesn't have a lock, 1183 // we would need to "rent" one during construction, possibly 1184 // the vnode lock, maybe a dedicated one. 1185 // As there shouldn't be too much contention, we could also 1186 // use atomic_test_and_set(), and free the resources again 1187 // when that fails... 1188 1189 // Get the vnode of the underlying device 1190 if (vfs_get_vnode_from_fd(fd, true, &ref->device) != B_OK) 1191 goto err1; 1192 1193 // We also need the cookie of the underlying device to properly access it 1194 if (vfs_get_cookie_from_fd(fd, &ref->cookie) != B_OK) 1195 goto err2; 1196 1197 // Get the vnode for the object (note, this does not grab a reference to the node) 1198 if (vfs_lookup_vnode(mountID, vnodeID, &ref->vnode) != B_OK) 1199 goto err2; 1200 1201 // Gets (usually creates) the cache for the node - note, this does grab a 1202 // reference to the node... 1203 if (vfs_get_vnode_cache(ref->vnode, &ref->cache, true) != B_OK) 1204 goto err2; 1205 1206 // ... that we don't need, and therefore release it again. 1207 // Our caller already holds a reference to the vnode; it will destroy us 1208 // when the last one goes away (which, of course, can only ever happen if 1209 // we don't grab an extra reference). 1210 vfs_put_vnode(ref->vnode); 1211 1212 ref->cache->cache->virtual_size = size; 1213 ((vnode_store *)ref->cache->cache->store)->file_cache_ref = ref; 1214 return ref; 1215 1216 err2: 1217 vfs_put_vnode(ref->device); 1218 err1: 1219 delete ref; 1220 return NULL; 1221 } 1222 1223 1224 extern "C" void 1225 file_cache_delete(void *_cacheRef) 1226 { 1227 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1228 1229 if (ref == NULL) 1230 return; 1231 1232 TRACE(("file_cache_delete(ref = %p)\n", ref)); 1233 1234 vfs_put_vnode(ref->device); 1235 delete ref; 1236 } 1237 1238 1239 extern "C" status_t 1240 file_cache_set_size(void *_cacheRef, off_t size) 1241 { 1242 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1243 1244 TRACE(("file_cache_set_size(ref = %p, size = %Ld)\n", ref, size)); 1245 1246 if (ref == NULL) 1247 return B_OK; 1248 1249 file_cache_invalidate_file_map(_cacheRef, 0, size); 1250 // ToDo: make this better (we would only need to extend or shrink the map) 1251 1252 mutex_lock(&ref->cache->lock); 1253 status_t status = vm_cache_resize(ref->cache, size); 1254 mutex_unlock(&ref->cache->lock); 1255 1256 return status; 1257 } 1258 1259 1260 extern "C" status_t 1261 file_cache_sync(void *_cacheRef) 1262 { 1263 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1264 if (ref == NULL) 1265 return B_BAD_VALUE; 1266 1267 return vm_cache_write_modified(ref->cache, true); 1268 } 1269 1270 1271 extern "C" status_t 1272 file_cache_read_pages(void *_cacheRef, off_t offset, const iovec *vecs, size_t count, size_t *_numBytes) 1273 { 1274 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1275 1276 return pages_io(ref, offset, vecs, count, _numBytes, false); 1277 } 1278 1279 1280 extern "C" status_t 1281 file_cache_write_pages(void *_cacheRef, off_t offset, const iovec *vecs, size_t count, size_t *_numBytes) 1282 { 1283 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1284 1285 status_t status = pages_io(ref, offset, vecs, count, _numBytes, true); 1286 TRACE(("file_cache_write_pages(ref = %p, offset = %Ld, vecs = %p, count = %lu, bytes = %lu) = %ld\n", 1287 ref, offset, vecs, count, *_numBytes, status)); 1288 1289 return status; 1290 } 1291 1292 1293 extern "C" status_t 1294 file_cache_read(void *_cacheRef, off_t offset, void *bufferBase, size_t *_size) 1295 { 1296 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1297 1298 TRACE(("file_cache_read(ref = %p, offset = %Ld, buffer = %p, size = %lu)\n", 1299 ref, offset, bufferBase, *_size)); 1300 1301 return cache_io(ref, offset, (addr_t)bufferBase, _size, false); 1302 } 1303 1304 1305 extern "C" status_t 1306 file_cache_write(void *_cacheRef, off_t offset, const void *buffer, size_t *_size) 1307 { 1308 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1309 1310 status_t status = cache_io(ref, offset, (addr_t)const_cast<void *>(buffer), _size, true); 1311 TRACE(("file_cache_write(ref = %p, offset = %Ld, buffer = %p, size = %lu) = %ld\n", 1312 ref, offset, buffer, *_size, status)); 1313 1314 return status; 1315 } 1316 1317 1318 extern "C" status_t 1319 file_cache_invalidate_file_map(void *_cacheRef, off_t offset, off_t size) 1320 { 1321 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1322 1323 // ToDo: honour offset/size parameters 1324 1325 TRACE(("file_cache_invalidate_file_map(offset = %Ld, size = %Ld)\n", offset, size)); 1326 mutex_lock(&ref->cache->lock); 1327 ref->map.Free(); 1328 mutex_unlock(&ref->cache->lock); 1329 return B_OK; 1330 } 1331