1 /* 2 * Copyright 2004-2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include "vnode_store.h" 8 9 #include <KernelExport.h> 10 #include <fs_cache.h> 11 12 #include <util/kernel_cpp.h> 13 #include <file_cache.h> 14 #include <vfs.h> 15 #include <vm.h> 16 #include <vm_page.h> 17 #include <vm_cache.h> 18 #include <generic_syscall.h> 19 20 #include <unistd.h> 21 #include <stdlib.h> 22 #include <string.h> 23 24 25 //#define TRACE_FILE_CACHE 26 #ifdef TRACE_FILE_CACHE 27 # define TRACE(x) dprintf x 28 #else 29 # define TRACE(x) ; 30 #endif 31 32 // maximum number of iovecs per request 33 #define MAX_IO_VECS 64 // 256 kB 34 #define MAX_FILE_IO_VECS 32 35 36 #define CACHED_FILE_EXTENTS 2 37 // must be smaller than MAX_FILE_IO_VECS 38 // ToDo: find out how much of these are typically used 39 40 struct file_extent { 41 off_t offset; 42 file_io_vec disk; 43 }; 44 45 struct file_map { 46 file_map(); 47 ~file_map(); 48 49 file_extent *operator[](uint32 index); 50 file_extent *ExtentAt(uint32 index); 51 status_t Add(file_io_vec *vecs, size_t vecCount); 52 void Free(); 53 54 union { 55 file_extent direct[CACHED_FILE_EXTENTS]; 56 file_extent *array; 57 }; 58 size_t count; 59 }; 60 61 struct file_cache_ref { 62 vm_cache_ref *cache; 63 void *vnode; 64 void *device; 65 void *cookie; 66 file_map map; 67 }; 68 69 70 static struct cache_module_info *sCacheModule; 71 72 73 file_map::file_map() 74 { 75 array = NULL; 76 count = 0; 77 } 78 79 80 file_map::~file_map() 81 { 82 Free(); 83 } 84 85 86 file_extent * 87 file_map::operator[](uint32 index) 88 { 89 return ExtentAt(index); 90 } 91 92 93 file_extent * 94 file_map::ExtentAt(uint32 index) 95 { 96 if (index >= count) 97 return NULL; 98 99 if (count > CACHED_FILE_EXTENTS) 100 return &array[index]; 101 102 return &direct[index]; 103 } 104 105 106 status_t 107 file_map::Add(file_io_vec *vecs, size_t vecCount) 108 { 109 off_t offset = 0; 110 111 if (vecCount <= CACHED_FILE_EXTENTS && count == 0) { 112 // just use the reserved area in the file_cache_ref structure 113 } else { 114 file_extent *newMap = (file_extent *)realloc(array, 115 (count + vecCount) * sizeof(file_extent)); 116 if (newMap == NULL) 117 return B_NO_MEMORY; 118 119 array = newMap; 120 121 if (count != 0) { 122 file_extent *extent = ExtentAt(count - 1); 123 offset = extent->offset + extent->disk.length; 124 } 125 } 126 127 count += vecCount; 128 129 for (uint32 i = 0; i < vecCount; i++) { 130 file_extent *extent = ExtentAt(i); 131 132 extent->offset = offset; 133 extent->disk = vecs[i]; 134 135 offset += extent->disk.length; 136 } 137 138 return B_OK; 139 } 140 141 142 void 143 file_map::Free() 144 { 145 if (count > CACHED_FILE_EXTENTS) 146 free(array); 147 148 array = NULL; 149 count = 0; 150 } 151 152 153 // #pragma mark - 154 155 156 static void 157 add_to_iovec(iovec *vecs, int32 &index, int32 max, addr_t address, size_t size) 158 { 159 if (index > 0 && (addr_t)vecs[index - 1].iov_base + vecs[index - 1].iov_len == address) { 160 // the iovec can be combined with the previous one 161 vecs[index - 1].iov_len += size; 162 return; 163 } 164 165 if (index == max) 166 panic("no more space for iovecs!"); 167 168 // we need to start a new iovec 169 vecs[index].iov_base = (void *)address; 170 vecs[index].iov_len = size; 171 index++; 172 } 173 174 175 static file_extent * 176 find_file_extent(file_cache_ref *ref, off_t offset, uint32 *_index) 177 { 178 // ToDo: do binary search 179 180 for (uint32 index = 0; index < ref->map.count; index++) { 181 file_extent *extent = ref->map[index]; 182 183 if (extent->offset <= offset 184 && extent->offset + extent->disk.length > offset) { 185 if (_index) 186 *_index = index; 187 return extent; 188 } 189 } 190 191 return NULL; 192 } 193 194 195 static status_t 196 get_file_map(file_cache_ref *ref, off_t offset, size_t size, 197 file_io_vec *vecs, size_t *_count) 198 { 199 size_t maxVecs = *_count; 200 201 if (ref->map.count == 0) { 202 // we don't yet have the map of this file, so let's grab it 203 // (ordered by offset, so that we can do a binary search on them) 204 205 mutex_lock(&ref->cache->lock); 206 207 // the file map could have been requested in the mean time 208 if (ref->map.count == 0) { 209 size_t vecCount = maxVecs; 210 status_t status; 211 off_t mapOffset = 0; 212 213 while (true) { 214 status = vfs_get_file_map(ref->vnode, mapOffset, ~0UL, vecs, &vecCount); 215 if (status < B_OK && status != B_BUFFER_OVERFLOW) { 216 mutex_unlock(&ref->cache->lock); 217 return status; 218 } 219 220 ref->map.Add(vecs, vecCount); 221 222 if (status != B_BUFFER_OVERFLOW) 223 break; 224 225 // when we are here, the map has been stored in the array, and 226 // the array size was still too small to cover the whole file 227 file_io_vec *last = &vecs[vecCount - 1]; 228 mapOffset += last->length; 229 vecCount = maxVecs; 230 } 231 } 232 233 mutex_unlock(&ref->cache->lock); 234 } 235 236 // We now have cached the map of this file, we now need to 237 // translate it for the requested access. 238 239 uint32 index; 240 file_extent *fileExtent = find_file_extent(ref, offset, &index); 241 if (fileExtent == NULL) { 242 // access outside file bounds? But that's not our problem 243 *_count = 0; 244 return B_OK; 245 } 246 247 offset -= fileExtent->offset; 248 vecs[0].offset = fileExtent->disk.offset + offset; 249 vecs[0].length = fileExtent->disk.length - offset; 250 251 if (vecs[0].length >= size || index >= ref->map.count - 1) { 252 *_count = 1; 253 return B_OK; 254 } 255 256 // copy the rest of the vecs 257 258 size -= vecs[0].length; 259 260 for (index = 1; index < ref->map.count;) { 261 fileExtent++; 262 263 vecs[index] = fileExtent->disk; 264 index++; 265 266 if (index >= maxVecs) { 267 *_count = index; 268 return B_BUFFER_OVERFLOW; 269 } 270 271 if (size <= fileExtent->disk.length) 272 break; 273 274 size -= fileExtent->disk.length; 275 } 276 277 *_count = index; 278 return B_OK; 279 } 280 281 282 static status_t 283 pages_io(file_cache_ref *ref, off_t offset, const iovec *vecs, size_t count, 284 size_t *_numBytes, bool doWrite) 285 { 286 TRACE(("pages_io: ref = %p, offset = %Ld, size = %lu, %s\n", ref, offset, 287 *_numBytes, doWrite ? "write" : "read")); 288 289 // translate the iovecs into direct device accesses 290 file_io_vec fileVecs[MAX_FILE_IO_VECS]; 291 size_t fileVecCount = MAX_FILE_IO_VECS; 292 size_t numBytes = *_numBytes; 293 294 status_t status = get_file_map(ref, offset, numBytes, fileVecs, &fileVecCount); 295 if (status < B_OK) { 296 TRACE(("get_file_map(offset = %Ld, numBytes = %lu) failed\n", offset, 297 numBytes)); 298 return status; 299 } 300 301 // ToDo: handle array overflow gracefully! 302 303 #ifdef TRACE_FILE_CACHE 304 dprintf("got %lu file vecs for %Ld:%lu:\n", fileVecCount, offset, numBytes); 305 for (size_t i = 0; i < fileVecCount; i++) 306 dprintf("[%lu] offset = %Ld, size = %Ld\n", i, fileVecs[i].offset, fileVecs[i].length); 307 #endif 308 309 uint32 fileVecIndex; 310 size_t size; 311 312 if (!doWrite) { 313 // now directly read the data from the device 314 // the first file_io_vec can be read directly 315 316 size = fileVecs[0].length; 317 if (size > numBytes) 318 size = numBytes; 319 320 status = vfs_read_pages(ref->device, ref->cookie, fileVecs[0].offset, vecs, 321 count, &size, false); 322 if (status < B_OK) 323 return status; 324 325 // ToDo: this is a work-around for buggy device drivers! 326 // When our own drivers honour the length, we can: 327 // a) also use this direct I/O for writes (otherwise, it would overwrite precious data) 328 // b) panic if the term below is true (at least for writes) 329 if (size > fileVecs[0].length) { 330 //dprintf("warning: device driver %p doesn't respect total length in read_pages() call!\n", ref->device); 331 size = fileVecs[0].length; 332 } 333 334 ASSERT(size <= fileVecs[0].length); 335 336 // If the file portion was contiguous, we're already done now 337 if (size == numBytes) 338 return B_OK; 339 340 // if we reached the end of the file, we can return as well 341 if (size != fileVecs[0].length) { 342 *_numBytes = size; 343 return B_OK; 344 } 345 346 fileVecIndex = 1; 347 } else { 348 fileVecIndex = 0; 349 size = 0; 350 } 351 352 // Too bad, let's process the rest of the file_io_vecs 353 354 size_t totalSize = size; 355 356 // first, find out where we have to continue in our iovecs 357 uint32 i = 0; 358 for (; i < count; i++) { 359 if (size <= vecs[i].iov_len) 360 break; 361 362 size -= vecs[i].iov_len; 363 } 364 365 size_t vecOffset = size; 366 367 for (; fileVecIndex < fileVecCount; fileVecIndex++) { 368 file_io_vec &fileVec = fileVecs[fileVecIndex]; 369 iovec tempVecs[8]; 370 uint32 tempCount = 1; 371 372 tempVecs[0].iov_base = (void *)((addr_t)vecs[i].iov_base + vecOffset); 373 374 size = min_c(vecs[i].iov_len - vecOffset, fileVec.length); 375 tempVecs[0].iov_len = size; 376 377 TRACE(("fill vec %ld, offset = %lu, size = %lu\n", i, vecOffset, size)); 378 379 if (size >= fileVec.length) 380 vecOffset += size; 381 else 382 vecOffset = 0; 383 384 while (size < fileVec.length && ++i < count) { 385 tempVecs[tempCount].iov_base = vecs[i].iov_base; 386 tempCount++; 387 388 // is this iovec larger than the file_io_vec? 389 if (vecs[i].iov_len + size > fileVec.length) { 390 size += tempVecs[tempCount].iov_len = vecOffset = fileVec.length - size; 391 break; 392 } 393 394 size += tempVecs[tempCount].iov_len = vecs[i].iov_len; 395 } 396 397 size_t bytes = size; 398 if (doWrite) { 399 status = vfs_write_pages(ref->device, ref->cookie, fileVec.offset, tempVecs, 400 tempCount, &bytes, false); 401 } else { 402 status = vfs_read_pages(ref->device, ref->cookie, fileVec.offset, tempVecs, 403 tempCount, &bytes, false); 404 } 405 if (status < B_OK) 406 return status; 407 408 totalSize += size; 409 410 if (size != bytes) { 411 // there are no more bytes, let's bail out 412 *_numBytes = totalSize; 413 return B_OK; 414 } 415 } 416 417 return B_OK; 418 } 419 420 421 /** This function is called by read_into_cache() (and from there only) - it 422 * can only handle a certain amount of bytes, and read_into_cache() makes 423 * sure that it matches that criterion. 424 */ 425 426 static inline status_t 427 read_chunk_into_cache(file_cache_ref *ref, off_t offset, size_t size, 428 int32 pageOffset, addr_t buffer, size_t bufferSize) 429 { 430 TRACE(("read_chunk(offset = %Ld, size = %lu, pageOffset = %ld, buffer = %#lx, bufferSize = %lu\n", 431 offset, size, pageOffset, buffer, bufferSize)); 432 433 vm_cache_ref *cache = ref->cache; 434 435 iovec vecs[MAX_IO_VECS]; 436 int32 vecCount = 0; 437 438 vm_page *pages[MAX_IO_VECS]; 439 int32 pageIndex = 0; 440 441 // allocate pages for the cache and mark them busy 442 for (size_t pos = 0; pos < size; pos += B_PAGE_SIZE) { 443 vm_page *page = pages[pageIndex++] = vm_page_allocate_page(PAGE_STATE_FREE); 444 if (page == NULL) 445 panic("no more pages!"); 446 447 page->state = PAGE_STATE_BUSY; 448 449 vm_cache_insert_page(cache, page, offset + pos); 450 451 addr_t virtualAddress; 452 if (vm_get_physical_page(page->physical_page_number * B_PAGE_SIZE, &virtualAddress, PHYSICAL_PAGE_CAN_WAIT) < B_OK) 453 panic("could not get physical page"); 454 455 add_to_iovec(vecs, vecCount, MAX_IO_VECS, virtualAddress, B_PAGE_SIZE); 456 // ToDo: check if the array is large enough! 457 } 458 459 mutex_unlock(&cache->lock); 460 461 // read file into reserved pages 462 status_t status = pages_io(ref, offset, vecs, vecCount, &size, false); 463 if (status < B_OK) { 464 // reading failed, free allocated pages 465 466 dprintf("file_cache: read pages failed: %s\n", strerror(status)); 467 468 for (int32 i = 0; i < vecCount; i++) { 469 addr_t base = (addr_t)vecs[i].iov_base; 470 size_t size = vecs[i].iov_len; 471 472 for (size_t pos = 0; pos < size; pos += B_PAGE_SIZE, base += B_PAGE_SIZE) 473 vm_put_physical_page(base); 474 } 475 476 mutex_lock(&cache->lock); 477 478 for (int32 i = 0; i < pageIndex; i++) { 479 vm_cache_remove_page(cache, pages[i]); 480 vm_page_set_state(pages[i], PAGE_STATE_FREE); 481 } 482 483 return status; 484 } 485 486 // copy the pages and unmap them again 487 488 for (int32 i = 0; i < vecCount; i++) { 489 addr_t base = (addr_t)vecs[i].iov_base; 490 size_t size = vecs[i].iov_len; 491 492 // copy to user buffer if necessary 493 if (bufferSize != 0) { 494 size_t bytes = min_c(bufferSize, size - pageOffset); 495 496 user_memcpy((void *)buffer, (void *)(base + pageOffset), bytes); 497 buffer += bytes; 498 bufferSize -= bytes; 499 pageOffset = 0; 500 } 501 502 for (size_t pos = 0; pos < size; pos += B_PAGE_SIZE, base += B_PAGE_SIZE) 503 vm_put_physical_page(base); 504 } 505 506 mutex_lock(&cache->lock); 507 508 // make the pages accessible in the cache 509 for (int32 i = pageIndex; i-- > 0;) 510 pages[i]->state = PAGE_STATE_ACTIVE; 511 512 return B_OK; 513 } 514 515 516 /** This function reads \a size bytes directly from the file into the cache. 517 * If \a bufferSize does not equal zero, \a bufferSize bytes from the data 518 * read in are also copied to the provided \a buffer. 519 * This function always allocates all pages; it is the responsibility of the 520 * calling function to only ask for yet uncached ranges. 521 * The cache_ref lock must be hold when calling this function. 522 */ 523 524 static status_t 525 read_into_cache(file_cache_ref *ref, off_t offset, size_t size, addr_t buffer, size_t bufferSize) 526 { 527 TRACE(("read_from_cache: ref = %p, offset = %Ld, size = %lu, buffer = %p, bufferSize = %lu\n", 528 ref, offset, size, (void *)buffer, bufferSize)); 529 530 // do we have to read in anything at all? 531 if (size == 0) 532 return B_OK; 533 534 // make sure "offset" is page aligned - but also remember the page offset 535 int32 pageOffset = offset & (B_PAGE_SIZE - 1); 536 size = PAGE_ALIGN(size + pageOffset); 537 offset -= pageOffset; 538 539 while (true) { 540 size_t chunkSize = size; 541 if (chunkSize > (MAX_IO_VECS * B_PAGE_SIZE)) 542 chunkSize = MAX_IO_VECS * B_PAGE_SIZE; 543 544 status_t status = read_chunk_into_cache(ref, offset, chunkSize, pageOffset, 545 buffer, bufferSize); 546 if (status != B_OK) 547 return status; 548 549 if ((size -= chunkSize) == 0) 550 return B_OK; 551 552 if (chunkSize >= bufferSize) { 553 bufferSize = 0; 554 buffer = NULL; 555 } else { 556 bufferSize -= chunkSize - pageOffset; 557 buffer += chunkSize - pageOffset; 558 } 559 560 offset += chunkSize; 561 pageOffset = 0; 562 } 563 564 return B_OK; 565 } 566 567 568 /** Like read_chunk_into_cache() but writes data into the cache */ 569 570 static inline status_t 571 write_chunk_to_cache(file_cache_ref *ref, off_t offset, size_t size, 572 int32 pageOffset, addr_t buffer, size_t bufferSize) 573 { 574 iovec vecs[MAX_IO_VECS]; 575 int32 vecCount = 0; 576 vm_page *pages[MAX_IO_VECS]; 577 int32 pageIndex = 0; 578 status_t status = B_OK; 579 580 // ToDo: this should be settable somewhere 581 bool writeThrough = false; 582 583 // allocate pages for the cache and mark them busy 584 for (size_t pos = 0; pos < size; pos += B_PAGE_SIZE) { 585 // ToDo: if space is becoming tight, and this cache is already grown 586 // big - shouldn't we better steal the pages directly in that case? 587 // (a working set like approach for the file cache) 588 vm_page *page = pages[pageIndex++] = vm_page_allocate_page(PAGE_STATE_FREE); 589 page->state = PAGE_STATE_BUSY; 590 591 vm_cache_insert_page(ref->cache, page, offset + pos); 592 593 addr_t virtualAddress; 594 vm_get_physical_page(page->physical_page_number * B_PAGE_SIZE, &virtualAddress, 595 PHYSICAL_PAGE_CAN_WAIT); 596 597 add_to_iovec(vecs, vecCount, MAX_IO_VECS, virtualAddress, B_PAGE_SIZE); 598 // ToDo: check if the array is large enough! 599 } 600 601 mutex_unlock(&ref->cache->lock); 602 603 // copy contents (and read in partially written pages first) 604 605 if (pageOffset != 0) { 606 // This is only a partial write, so we have to read the rest of the page 607 // from the file to have consistent data in the cache 608 iovec readVec = { vecs[0].iov_base, B_PAGE_SIZE }; 609 size_t bytesRead = B_PAGE_SIZE; 610 611 status = pages_io(ref, offset, &readVec, 1, &bytesRead, false); 612 // ToDo: handle errors for real! 613 if (status < B_OK) 614 panic("1. pages_io() failed: %s!\n", strerror(status)); 615 } 616 617 addr_t lastPageOffset = (pageOffset + bufferSize) & (B_PAGE_SIZE - 1); 618 if (lastPageOffset != 0) { 619 // get the last page in the I/O vectors 620 addr_t last = (addr_t)vecs[vecCount - 1].iov_base 621 + vecs[vecCount - 1].iov_len - B_PAGE_SIZE; 622 623 if (offset + pageOffset + bufferSize == ref->cache->cache->virtual_size) { 624 // the space in the page after this write action needs to be cleaned 625 memset((void *)(last + lastPageOffset), 0, B_PAGE_SIZE - lastPageOffset); 626 } else if (vecCount > 1) { 627 // the end of this write does not happen on a page boundary, so we 628 // need to fetch the last page before we can update it 629 iovec readVec = { (void *)last, B_PAGE_SIZE }; 630 size_t bytesRead = B_PAGE_SIZE; 631 632 status = pages_io(ref, offset + size - B_PAGE_SIZE, &readVec, 1, 633 &bytesRead, false); 634 // ToDo: handle errors for real! 635 if (status < B_OK) 636 panic("pages_io() failed: %s!\n", strerror(status)); 637 } 638 } 639 640 for (int32 i = 0; i < vecCount; i++) { 641 addr_t base = (addr_t)vecs[i].iov_base; 642 size_t bytes = min_c(bufferSize, size_t(vecs[i].iov_len - pageOffset)); 643 644 // copy data from user buffer 645 user_memcpy((void *)(base + pageOffset), (void *)buffer, bytes); 646 647 bufferSize -= bytes; 648 if (bufferSize == 0) 649 break; 650 651 buffer += bytes; 652 pageOffset = 0; 653 } 654 655 if (writeThrough) { 656 // write cached pages back to the file if we were asked to do that 657 status_t status = pages_io(ref, offset, vecs, vecCount, &size, true); 658 if (status < B_OK) { 659 // ToDo: remove allocated pages, ...? 660 panic("file_cache: remove allocated pages! write pages failed: %s\n", 661 strerror(status)); 662 } 663 } 664 665 mutex_lock(&ref->cache->lock); 666 667 // unmap the pages again 668 669 for (int32 i = 0; i < vecCount; i++) { 670 addr_t base = (addr_t)vecs[i].iov_base; 671 size_t size = vecs[i].iov_len; 672 for (size_t pos = 0; pos < size; pos += B_PAGE_SIZE, base += B_PAGE_SIZE) 673 vm_put_physical_page(base); 674 } 675 676 // make the pages accessible in the cache 677 for (int32 i = pageIndex; i-- > 0;) { 678 if (writeThrough) 679 pages[i]->state = PAGE_STATE_ACTIVE; 680 else 681 vm_page_set_state(pages[i], PAGE_STATE_MODIFIED); 682 } 683 684 return status; 685 } 686 687 688 /** Like read_into_cache() but writes data into the cache. To preserve data consistency, 689 * it might also read pages into the cache, though, if only a partial page gets written. 690 * The cache_ref lock must be hold when calling this function. 691 */ 692 693 static status_t 694 write_to_cache(file_cache_ref *ref, off_t offset, size_t size, addr_t buffer, size_t bufferSize) 695 { 696 TRACE(("write_to_cache: ref = %p, offset = %Ld, size = %lu, buffer = %p, bufferSize = %lu\n", 697 ref, offset, size, (void *)buffer, bufferSize)); 698 699 // make sure "offset" is page aligned - but also remember the page offset 700 int32 pageOffset = offset & (B_PAGE_SIZE - 1); 701 size = PAGE_ALIGN(size + pageOffset); 702 offset -= pageOffset; 703 704 while (true) { 705 size_t chunkSize = size; 706 if (chunkSize > (MAX_IO_VECS * B_PAGE_SIZE)) 707 chunkSize = MAX_IO_VECS * B_PAGE_SIZE; 708 709 status_t status = write_chunk_to_cache(ref, offset, chunkSize, pageOffset, buffer, bufferSize); 710 if (status != B_OK) 711 return status; 712 713 if ((size -= chunkSize) == 0) 714 return B_OK; 715 716 if (chunkSize >= bufferSize) { 717 bufferSize = 0; 718 buffer = NULL; 719 } else { 720 bufferSize -= chunkSize - pageOffset; 721 buffer += chunkSize - pageOffset; 722 } 723 724 offset += chunkSize; 725 pageOffset = 0; 726 } 727 728 return B_OK; 729 } 730 731 732 static status_t 733 satisfy_cache_io(file_cache_ref *ref, off_t offset, addr_t buffer, addr_t lastBuffer, 734 bool doWrite) 735 { 736 size_t requestSize = buffer - lastBuffer; 737 738 if (doWrite) 739 return write_to_cache(ref, offset, requestSize, lastBuffer, requestSize); 740 741 return read_into_cache(ref, offset, requestSize, lastBuffer, requestSize); 742 } 743 744 745 static status_t 746 cache_io(void *_cacheRef, off_t offset, addr_t buffer, size_t *_size, bool doWrite) 747 { 748 if (_cacheRef == NULL) 749 panic("cache_io() called with NULL ref!\n"); 750 751 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 752 vm_cache_ref *cache = ref->cache; 753 off_t fileSize = cache->cache->virtual_size; 754 755 TRACE(("cache_io(ref = %p, offset = %Ld, buffer = %p, size = %lu, %s)\n", 756 ref, offset, (void *)buffer, *_size, doWrite ? "write" : "read")); 757 758 // out of bounds access? 759 if (offset >= fileSize || offset < 0) { 760 *_size = 0; 761 return B_OK; 762 } 763 764 int32 pageOffset = offset & (B_PAGE_SIZE - 1); 765 size_t size = *_size; 766 offset -= pageOffset; 767 768 if (offset + pageOffset + size > fileSize) { 769 // adapt size to be within the file's offsets 770 size = fileSize - pageOffset - offset; 771 *_size = size; 772 } 773 774 // "offset" and "lastOffset" are always aligned to B_PAGE_SIZE, 775 // the "last*" variables always point to the end of the last 776 // satisfied request part 777 778 size_t bytesLeft = size, lastLeft = size; 779 int32 lastPageOffset = pageOffset; 780 addr_t lastBuffer = buffer; 781 off_t lastOffset = offset; 782 783 mutex_lock(&cache->lock); 784 785 for (; bytesLeft > 0; offset += B_PAGE_SIZE) { 786 // check if this page is already in memory 787 restart: 788 vm_page *page = vm_cache_lookup_page(cache, offset); 789 vm_page *dummyPage = NULL; 790 if (page != NULL) { 791 // The page is busy - since we need to unlock the cache sometime 792 // in the near future, we need to satisfy the request of the pages 793 // we didn't get yet (to make sure no one else interferes in the 794 // mean time). 795 status_t status = B_OK; 796 797 if (lastBuffer != buffer) { 798 status = satisfy_cache_io(ref, lastOffset + lastPageOffset, 799 buffer, lastBuffer, doWrite); 800 if (status == B_OK) { 801 lastBuffer = buffer; 802 lastLeft = bytesLeft; 803 lastOffset = offset; 804 lastPageOffset = 0; 805 pageOffset = 0; 806 } 807 } 808 809 if (status != B_OK) { 810 mutex_unlock(&cache->lock); 811 return status; 812 } 813 814 if (page->state == PAGE_STATE_BUSY) { 815 if (page->type == PAGE_TYPE_DUMMY) { 816 dummyPage = page; 817 page = vm_page_allocate_page(PAGE_STATE_FREE); 818 if (page == NULL) { 819 mutex_unlock(&cache->lock); 820 return B_NO_MEMORY; 821 } 822 } else { 823 mutex_unlock(&cache->lock); 824 // ToDo: don't wait forever! 825 snooze(20000); 826 mutex_lock(&cache->lock); 827 goto restart; 828 } 829 } 830 } 831 832 size_t bytesInPage = min_c(size_t(B_PAGE_SIZE - pageOffset), bytesLeft); 833 addr_t virtualAddress; 834 835 TRACE(("lookup page from offset %Ld: %p, size = %lu, pageOffset = %lu\n", offset, page, bytesLeft, pageOffset)); 836 if (page != NULL) { 837 vm_get_physical_page(page->physical_page_number * B_PAGE_SIZE, 838 &virtualAddress, PHYSICAL_PAGE_CAN_WAIT); 839 840 if (dummyPage != NULL && (!doWrite || bytesInPage != B_PAGE_SIZE)) { 841 // This page is currently in-use by someone else - since we cannot 842 // know if this someone does what we want, and if it even can do 843 // what we want (we may own a lock the blocks the other request), 844 // we need to handle this case specifically 845 iovec vec; 846 vec.iov_base = (void *)virtualAddress; 847 vec.iov_len = B_PAGE_SIZE; 848 849 size_t size = B_PAGE_SIZE; 850 status_t status = pages_io(ref, offset, &vec, 1, &size, false); 851 if (status != B_OK) { 852 vm_put_physical_page(virtualAddress); 853 mutex_unlock(&cache->lock); 854 return status; 855 } 856 } 857 858 // and copy the contents of the page already in memory 859 if (doWrite) { 860 user_memcpy((void *)(virtualAddress + pageOffset), (void *)buffer, bytesInPage); 861 862 // make sure the page is in the modified list 863 if (page->state != PAGE_STATE_MODIFIED) 864 vm_page_set_state(page, PAGE_STATE_MODIFIED); 865 } else 866 user_memcpy((void *)buffer, (void *)(virtualAddress + pageOffset), bytesInPage); 867 868 vm_put_physical_page(virtualAddress); 869 870 if (dummyPage != NULL) { 871 // check if the dummy page is still in place 872 restart_dummy_lookup: 873 vm_page *currentPage = vm_cache_lookup_page(cache, offset); 874 if (currentPage->state == PAGE_STATE_BUSY) { 875 if (currentPage->type == PAGE_TYPE_DUMMY) { 876 // we let the other party add our page 877 currentPage->queue_next = page; 878 } else { 879 mutex_unlock(&cache->lock); 880 // ToDo: don't wait forever! 881 snooze(20000); 882 mutex_lock(&cache->lock); 883 goto restart_dummy_lookup; 884 } 885 } else if (currentPage != NULL) { 886 // we need to copy our new page into the old one 887 addr_t destinationAddress; 888 vm_get_physical_page(page->physical_page_number * B_PAGE_SIZE, 889 &virtualAddress, PHYSICAL_PAGE_CAN_WAIT); 890 vm_get_physical_page(currentPage->physical_page_number * B_PAGE_SIZE, 891 &destinationAddress, PHYSICAL_PAGE_CAN_WAIT); 892 893 memcpy((void *)destinationAddress, (void *)virtualAddress, B_PAGE_SIZE); 894 895 vm_put_physical_page(destinationAddress); 896 vm_put_physical_page(virtualAddress); 897 898 vm_page_set_state(page, PAGE_STATE_FREE); 899 } else { 900 // there is no page in place anymore, we'll put ours into it 901 vm_cache_insert_page(cache, page, offset); 902 } 903 } 904 905 if (bytesLeft <= bytesInPage) { 906 // we've read the last page, so we're done! 907 mutex_unlock(&cache->lock); 908 return B_OK; 909 } 910 911 // prepare a potential gap request 912 lastBuffer = buffer + bytesInPage; 913 lastLeft = bytesLeft - bytesInPage; 914 lastOffset = offset + B_PAGE_SIZE; 915 lastPageOffset = 0; 916 } 917 918 if (bytesLeft <= bytesInPage) 919 break; 920 921 buffer += bytesInPage; 922 bytesLeft -= bytesInPage; 923 pageOffset = 0; 924 } 925 926 // fill the last remaining bytes of the request (either write or read) 927 928 status_t status; 929 if (doWrite) 930 status = write_to_cache(ref, lastOffset + lastPageOffset, lastLeft, lastBuffer, lastLeft); 931 else 932 status = read_into_cache(ref, lastOffset + lastPageOffset, lastLeft, lastBuffer, lastLeft); 933 934 mutex_unlock(&cache->lock); 935 return status; 936 } 937 938 939 static status_t 940 file_cache_control(const char *subsystem, uint32 function, void *buffer, size_t bufferSize) 941 { 942 switch (function) { 943 case CACHE_CLEAR: 944 // ToDo: clear the cache 945 dprintf("cache_control: clear cache!\n"); 946 return B_OK; 947 948 case CACHE_SET_MODULE: 949 { 950 cache_module_info *module = sCacheModule; 951 952 // unset previous module 953 954 if (sCacheModule != NULL) { 955 sCacheModule = NULL; 956 snooze(100000); // 0.1 secs 957 put_module(module->info.name); 958 } 959 960 // get new module, if any 961 962 if (buffer == NULL) 963 return B_OK; 964 965 char name[B_FILE_NAME_LENGTH]; 966 if (!IS_USER_ADDRESS(buffer) 967 || user_strlcpy(name, (char *)buffer, B_FILE_NAME_LENGTH) < B_OK) 968 return B_BAD_ADDRESS; 969 970 if (strncmp(name, CACHE_MODULES_NAME, strlen(CACHE_MODULES_NAME))) 971 return B_BAD_VALUE; 972 973 dprintf("cache_control: set module %s!\n", name); 974 975 status_t status = get_module(name, (module_info **)&module); 976 if (status == B_OK) 977 sCacheModule = module; 978 979 return status; 980 } 981 } 982 983 return B_BAD_HANDLER; 984 } 985 986 987 // #pragma mark - 988 // kernel public API 989 990 991 extern "C" void 992 cache_prefetch_vnode(void *vnode, off_t offset, size_t size) 993 { 994 vm_cache_ref *cache; 995 if (vfs_get_vnode_cache(vnode, &cache, false) != B_OK) 996 return; 997 998 file_cache_ref *ref = (struct file_cache_ref *)((vnode_store *)cache->cache->store)->file_cache_ref; 999 off_t fileSize = cache->cache->virtual_size; 1000 1001 if (size > fileSize) 1002 size = fileSize; 1003 1004 // we never fetch more than 4 MB at once 1005 if (size > 4 * 1024 * 1024) 1006 size = 4 * 1024 * 1024; 1007 1008 size_t bytesLeft = size, lastLeft = size; 1009 off_t lastOffset = offset; 1010 size_t lastSize = 0; 1011 1012 mutex_lock(&cache->lock); 1013 1014 for (; bytesLeft > 0; offset += B_PAGE_SIZE) { 1015 // check if this page is already in memory 1016 addr_t virtualAddress; 1017 restart: 1018 vm_page *page = vm_cache_lookup_page(cache, offset); 1019 if (page != NULL) { 1020 // it is, so let's satisfy in the first part of the request 1021 if (lastOffset < offset) { 1022 size_t requestSize = offset - lastOffset; 1023 read_into_cache(ref, lastOffset, requestSize, NULL, 0); 1024 } 1025 1026 if (bytesLeft <= B_PAGE_SIZE) { 1027 // we've read the last page, so we're done! 1028 goto out; 1029 } 1030 1031 // prepare a potential gap request 1032 lastOffset = offset + B_PAGE_SIZE; 1033 lastLeft = bytesLeft - B_PAGE_SIZE; 1034 } 1035 1036 if (bytesLeft <= B_PAGE_SIZE) 1037 break; 1038 1039 bytesLeft -= B_PAGE_SIZE; 1040 } 1041 1042 // read in the last part 1043 read_into_cache(ref, lastOffset, lastLeft, NULL, 0); 1044 1045 out: 1046 mutex_unlock(&cache->lock); 1047 vm_cache_release_ref(cache); 1048 } 1049 1050 1051 extern "C" void 1052 cache_prefetch(mount_id mountID, vnode_id vnodeID, off_t offset, size_t size) 1053 { 1054 void *vnode; 1055 1056 // ToDo: schedule prefetch 1057 1058 TRACE(("cache_prefetch(vnode %ld:%Ld)\n", mountID, vnodeID)); 1059 1060 // get the vnode for the object, this also grabs a ref to it 1061 if (vfs_get_vnode(mountID, vnodeID, &vnode) != B_OK) 1062 return; 1063 1064 cache_prefetch_vnode(vnode, offset, size); 1065 vfs_put_vnode(vnode); 1066 } 1067 1068 1069 extern "C" void 1070 cache_node_opened(void *vnode, int32 fdType, vm_cache_ref *cache, mount_id mountID, 1071 vnode_id parentID, vnode_id vnodeID, const char *name) 1072 { 1073 if (sCacheModule == NULL || sCacheModule->node_opened == NULL) 1074 return; 1075 1076 off_t size = -1; 1077 if (cache != NULL) { 1078 file_cache_ref *ref = (file_cache_ref *)((vnode_store *)cache->cache->store)->file_cache_ref; 1079 if (ref != NULL) 1080 size = ref->cache->cache->virtual_size; 1081 } 1082 1083 sCacheModule->node_opened(vnode, fdType, mountID, parentID, vnodeID, name, size); 1084 } 1085 1086 1087 extern "C" void 1088 cache_node_closed(void *vnode, int32 fdType, vm_cache_ref *cache, 1089 mount_id mountID, vnode_id vnodeID) 1090 { 1091 if (sCacheModule == NULL || sCacheModule->node_closed == NULL) 1092 return; 1093 1094 int32 accessType = 0; 1095 if (cache != NULL) { 1096 // ToDo: set accessType 1097 } 1098 1099 sCacheModule->node_closed(vnode, fdType, mountID, vnodeID, accessType); 1100 } 1101 1102 1103 extern "C" void 1104 cache_node_launched(size_t argCount, char * const *args) 1105 { 1106 if (sCacheModule == NULL || sCacheModule->node_launched == NULL) 1107 return; 1108 1109 sCacheModule->node_launched(argCount, args); 1110 } 1111 1112 1113 extern "C" status_t 1114 file_cache_init_post_boot_device(void) 1115 { 1116 // ToDo: get cache module out of driver settings 1117 1118 if (get_module("file_cache/launch_speedup/v1", (module_info **)&sCacheModule) == B_OK) { 1119 dprintf("** opened launch speedup: %Ld\n", system_time()); 1120 } else 1121 dprintf("** could not open launch speedup!\n"); 1122 1123 return B_OK; 1124 } 1125 1126 1127 extern "C" status_t 1128 file_cache_init(void) 1129 { 1130 register_generic_syscall(CACHE_SYSCALLS, file_cache_control, 1, 0); 1131 return B_OK; 1132 } 1133 1134 1135 // #pragma mark - 1136 // public FS API 1137 1138 1139 extern "C" void * 1140 file_cache_create(mount_id mountID, vnode_id vnodeID, off_t size, int fd) 1141 { 1142 TRACE(("file_cache_create(mountID = %ld, vnodeID = %Ld, size = %Ld, fd = %d)\n", mountID, vnodeID, size, fd)); 1143 1144 file_cache_ref *ref = new file_cache_ref; 1145 if (ref == NULL) 1146 return NULL; 1147 1148 // ToDo: delay vm_cache/vm_cache_ref creation until data is 1149 // requested/written for the first time? Listing lots of 1150 // files in Tracker (and elsewhere) could be slowed down. 1151 // Since the file_cache_ref itself doesn't have a lock, 1152 // we would need to "rent" one during construction, possibly 1153 // the vnode lock, maybe a dedicated one. 1154 // As there shouldn't be too much contention, we could also 1155 // use atomic_test_and_set(), and free the resources again 1156 // when that fails... 1157 1158 // get the vnode of the underlying device 1159 if (vfs_get_vnode_from_fd(fd, true, &ref->device) != B_OK) 1160 goto err1; 1161 1162 // we also need the cookie of the underlying device to properly access it 1163 if (vfs_get_cookie_from_fd(fd, &ref->cookie) != B_OK) 1164 goto err2; 1165 1166 // get the vnode for the object (note, this does not grab a reference to the node) 1167 if (vfs_lookup_vnode(mountID, vnodeID, &ref->vnode) != B_OK) 1168 goto err2; 1169 1170 if (vfs_get_vnode_cache(ref->vnode, &ref->cache, true) != B_OK) 1171 goto err2; 1172 1173 ref->cache->cache->virtual_size = size; 1174 ((vnode_store *)ref->cache->cache->store)->file_cache_ref = ref; 1175 return ref; 1176 1177 err2: 1178 vfs_put_vnode(ref->device); 1179 err1: 1180 delete ref; 1181 return NULL; 1182 } 1183 1184 1185 extern "C" void 1186 file_cache_delete(void *_cacheRef) 1187 { 1188 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1189 1190 if (ref == NULL) 1191 return; 1192 1193 TRACE(("file_cache_delete(ref = %p)\n", ref)); 1194 1195 vfs_put_vnode(ref->device); 1196 delete ref; 1197 } 1198 1199 1200 extern "C" status_t 1201 file_cache_set_size(void *_cacheRef, off_t size) 1202 { 1203 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1204 1205 TRACE(("file_cache_set_size(ref = %p, size = %Ld)\n", ref, size)); 1206 1207 if (ref == NULL) 1208 return B_OK; 1209 1210 file_cache_invalidate_file_map(_cacheRef, 0, size); 1211 // ToDo: make this better (we would only need to extend or shrink the map) 1212 1213 mutex_lock(&ref->cache->lock); 1214 status_t status = vm_cache_resize(ref->cache, size); 1215 mutex_unlock(&ref->cache->lock); 1216 1217 return status; 1218 } 1219 1220 1221 extern "C" status_t 1222 file_cache_sync(void *_cacheRef) 1223 { 1224 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1225 if (ref == NULL) 1226 return B_BAD_VALUE; 1227 1228 return vm_cache_write_modified(ref->cache, true); 1229 } 1230 1231 1232 extern "C" status_t 1233 file_cache_read_pages(void *_cacheRef, off_t offset, const iovec *vecs, size_t count, size_t *_numBytes) 1234 { 1235 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1236 1237 return pages_io(ref, offset, vecs, count, _numBytes, false); 1238 } 1239 1240 1241 extern "C" status_t 1242 file_cache_write_pages(void *_cacheRef, off_t offset, const iovec *vecs, size_t count, size_t *_numBytes) 1243 { 1244 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1245 1246 status_t status = pages_io(ref, offset, vecs, count, _numBytes, true); 1247 TRACE(("file_cache_write_pages(ref = %p, offset = %Ld, vecs = %p, count = %lu, bytes = %lu) = %ld\n", 1248 ref, offset, vecs, count, *_numBytes, status)); 1249 1250 return status; 1251 } 1252 1253 1254 extern "C" status_t 1255 file_cache_read(void *_cacheRef, off_t offset, void *bufferBase, size_t *_size) 1256 { 1257 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1258 1259 TRACE(("file_cache_read(ref = %p, offset = %Ld, buffer = %p, size = %lu)\n", 1260 ref, offset, bufferBase, *_size)); 1261 1262 return cache_io(ref, offset, (addr_t)bufferBase, _size, false); 1263 } 1264 1265 1266 extern "C" status_t 1267 file_cache_write(void *_cacheRef, off_t offset, const void *buffer, size_t *_size) 1268 { 1269 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1270 1271 status_t status = cache_io(ref, offset, (addr_t)const_cast<void *>(buffer), _size, true); 1272 TRACE(("file_cache_write(ref = %p, offset = %Ld, buffer = %p, size = %lu) = %ld\n", 1273 ref, offset, buffer, *_size, status)); 1274 1275 return status; 1276 } 1277 1278 1279 extern "C" status_t 1280 file_cache_invalidate_file_map(void *_cacheRef, off_t offset, off_t size) 1281 { 1282 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1283 1284 // ToDo: honour offset/size parameters 1285 1286 TRACE(("file_cache_invalidate_file_map(offset = %Ld, size = %Ld)\n", offset, size)); 1287 mutex_lock(&ref->cache->lock); 1288 ref->map.Free(); 1289 mutex_unlock(&ref->cache->lock); 1290 return B_OK; 1291 } 1292