1 /* 2 * Copyright 2004-2007, Axel Dörfler, axeld@pinc-software.de. All rights reserved. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include "vnode_store.h" 8 9 #include <unistd.h> 10 #include <stdlib.h> 11 #include <string.h> 12 13 #include <KernelExport.h> 14 #include <fs_cache.h> 15 16 #include <condition_variable.h> 17 #include <file_cache.h> 18 #include <generic_syscall.h> 19 #include <util/AutoLock.h> 20 #include <util/kernel_cpp.h> 21 #include <vfs.h> 22 #include <vm.h> 23 #include <vm_page.h> 24 #include <vm_cache.h> 25 26 27 //#define TRACE_FILE_CACHE 28 #ifdef TRACE_FILE_CACHE 29 # define TRACE(x) dprintf x 30 #else 31 # define TRACE(x) ; 32 #endif 33 34 // maximum number of iovecs per request 35 #define MAX_IO_VECS 32 // 128 kB 36 #define MAX_FILE_IO_VECS 32 37 #define MAX_TEMP_IO_VECS 8 38 39 #define CACHED_FILE_EXTENTS 2 40 // must be smaller than MAX_FILE_IO_VECS 41 // ToDo: find out how much of these are typically used 42 43 struct file_extent { 44 off_t offset; 45 file_io_vec disk; 46 }; 47 48 struct file_map { 49 file_map(); 50 ~file_map(); 51 52 file_extent *operator[](uint32 index); 53 file_extent *ExtentAt(uint32 index); 54 status_t Add(file_io_vec *vecs, size_t vecCount, off_t &lastOffset); 55 void Free(); 56 57 union { 58 file_extent direct[CACHED_FILE_EXTENTS]; 59 file_extent *array; 60 }; 61 size_t count; 62 }; 63 64 struct file_cache_ref { 65 vm_cache *cache; 66 struct vnode *vnode; 67 struct vnode *device; 68 void *cookie; 69 file_map map; 70 }; 71 72 73 static struct cache_module_info *sCacheModule; 74 75 76 file_map::file_map() 77 { 78 array = NULL; 79 count = 0; 80 } 81 82 83 file_map::~file_map() 84 { 85 Free(); 86 } 87 88 89 file_extent * 90 file_map::operator[](uint32 index) 91 { 92 return ExtentAt(index); 93 } 94 95 96 file_extent * 97 file_map::ExtentAt(uint32 index) 98 { 99 if (index >= count) 100 return NULL; 101 102 if (count > CACHED_FILE_EXTENTS) 103 return &array[index]; 104 105 return &direct[index]; 106 } 107 108 109 status_t 110 file_map::Add(file_io_vec *vecs, size_t vecCount, off_t &lastOffset) 111 { 112 TRACE(("file_map::Add(vecCount = %ld)\n", vecCount)); 113 114 off_t offset = 0; 115 116 if (vecCount <= CACHED_FILE_EXTENTS && count == 0) { 117 // just use the reserved area in the file_cache_ref structure 118 } else { 119 // TODO: once we can invalidate only parts of the file map, 120 // we might need to copy the previously cached file extends 121 // from the direct range 122 file_extent *newMap = (file_extent *)realloc(array, 123 (count + vecCount) * sizeof(file_extent)); 124 if (newMap == NULL) 125 return B_NO_MEMORY; 126 127 array = newMap; 128 129 if (count != 0) { 130 file_extent *extent = ExtentAt(count - 1); 131 offset = extent->offset + extent->disk.length; 132 } 133 } 134 135 int32 start = count; 136 count += vecCount; 137 138 for (uint32 i = 0; i < vecCount; i++) { 139 file_extent *extent = ExtentAt(start + i); 140 141 extent->offset = offset; 142 extent->disk = vecs[i]; 143 144 offset += extent->disk.length; 145 } 146 147 #ifdef TRACE_FILE_CACHE 148 for (uint32 i = 0; i < count; i++) { 149 file_extent *extent = ExtentAt(i); 150 dprintf("[%ld] extend offset %Ld, disk offset %Ld, length %Ld\n", 151 i, extent->offset, extent->disk.offset, extent->disk.length); 152 } 153 #endif 154 155 lastOffset = offset; 156 return B_OK; 157 } 158 159 160 void 161 file_map::Free() 162 { 163 if (count > CACHED_FILE_EXTENTS) 164 free(array); 165 166 array = NULL; 167 count = 0; 168 } 169 170 171 // #pragma mark - 172 173 174 static void 175 add_to_iovec(iovec *vecs, int32 &index, int32 max, addr_t address, size_t size) 176 { 177 if (index > 0 && (addr_t)vecs[index - 1].iov_base 178 + vecs[index - 1].iov_len == address) { 179 // the iovec can be combined with the previous one 180 vecs[index - 1].iov_len += size; 181 return; 182 } 183 184 if (index == max) 185 panic("no more space for iovecs!"); 186 187 // we need to start a new iovec 188 vecs[index].iov_base = (void *)address; 189 vecs[index].iov_len = size; 190 index++; 191 } 192 193 194 static file_extent * 195 find_file_extent(file_cache_ref *ref, off_t offset, uint32 *_index) 196 { 197 // TODO: do binary search 198 199 for (uint32 index = 0; index < ref->map.count; index++) { 200 file_extent *extent = ref->map[index]; 201 202 if (extent->offset <= offset 203 && extent->offset + extent->disk.length > offset) { 204 if (_index) 205 *_index = index; 206 return extent; 207 } 208 } 209 210 return NULL; 211 } 212 213 214 static status_t 215 get_file_map(file_cache_ref *ref, off_t offset, size_t size, 216 file_io_vec *vecs, size_t *_count) 217 { 218 size_t maxVecs = *_count; 219 status_t status = B_OK; 220 221 if (ref->map.count == 0) { 222 // we don't yet have the map of this file, so let's grab it 223 // (ordered by offset, so that we can do a binary search on them) 224 225 MutexLocker _(ref->cache->lock); 226 227 // the file map could have been requested in the mean time 228 if (ref->map.count == 0) { 229 size_t vecCount = maxVecs; 230 off_t mapOffset = 0; 231 232 while (true) { 233 status = vfs_get_file_map(ref->vnode, mapOffset, ~0UL, vecs, 234 &vecCount); 235 if (status < B_OK && status != B_BUFFER_OVERFLOW) 236 return status; 237 238 status_t addStatus = ref->map.Add(vecs, vecCount, mapOffset); 239 if (addStatus != B_OK) { 240 // only clobber the status in case of failure 241 status = addStatus; 242 } 243 244 if (status != B_BUFFER_OVERFLOW) 245 break; 246 247 // when we are here, the map has been stored in the array, and 248 // the array size was still too small to cover the whole file 249 vecCount = maxVecs; 250 } 251 } 252 } 253 254 if (status != B_OK) { 255 // We must invalidate the (part of the) map we already 256 // have, as we cannot know if it's complete or not 257 ref->map.Free(); 258 return status; 259 } 260 261 // We now have cached the map of this file, we now need to 262 // translate it for the requested access. 263 264 uint32 index; 265 file_extent *fileExtent = find_file_extent(ref, offset, &index); 266 if (fileExtent == NULL) { 267 // access outside file bounds? But that's not our problem 268 *_count = 0; 269 return B_OK; 270 } 271 272 offset -= fileExtent->offset; 273 vecs[0].offset = fileExtent->disk.offset + offset; 274 vecs[0].length = fileExtent->disk.length - offset; 275 276 if (vecs[0].length >= size || index >= ref->map.count - 1) { 277 *_count = 1; 278 return B_OK; 279 } 280 281 // copy the rest of the vecs 282 283 size -= vecs[0].length; 284 285 for (index = 1; index < ref->map.count;) { 286 fileExtent++; 287 288 vecs[index] = fileExtent->disk; 289 index++; 290 291 if (size <= fileExtent->disk.length) 292 break; 293 294 if (index >= maxVecs) { 295 *_count = index; 296 return B_BUFFER_OVERFLOW; 297 } 298 299 size -= fileExtent->disk.length; 300 } 301 302 *_count = index; 303 return B_OK; 304 } 305 306 307 /*! 308 Does the dirty work of translating the request into actual disk offsets 309 and reads to or writes from the supplied iovecs as specified by \a doWrite. 310 */ 311 static status_t 312 pages_io(file_cache_ref *ref, off_t offset, const iovec *vecs, size_t count, 313 size_t *_numBytes, bool doWrite) 314 { 315 TRACE(("pages_io: ref = %p, offset = %Ld, size = %lu, vecCount = %lu, %s\n", 316 ref, offset, *_numBytes, count, doWrite ? "write" : "read")); 317 318 // translate the iovecs into direct device accesses 319 file_io_vec fileVecs[MAX_FILE_IO_VECS]; 320 size_t fileVecCount = MAX_FILE_IO_VECS; 321 size_t numBytes = *_numBytes; 322 323 status_t status = get_file_map(ref, offset, numBytes, fileVecs, 324 &fileVecCount); 325 if (status < B_OK && status != B_BUFFER_OVERFLOW) { 326 TRACE(("get_file_map(offset = %Ld, numBytes = %lu) failed: %s\n", 327 offset, numBytes, strerror(status))); 328 return status; 329 } 330 331 bool bufferOverflow = status == B_BUFFER_OVERFLOW; 332 333 #ifdef TRACE_FILE_CACHE 334 dprintf("got %lu file vecs for %Ld:%lu%s:\n", fileVecCount, offset, 335 numBytes, bufferOverflow ? " (array too small)" : ""); 336 for (size_t i = 0; i < fileVecCount; i++) { 337 dprintf(" [%lu] offset = %Ld, size = %Ld\n", 338 i, fileVecs[i].offset, fileVecs[i].length); 339 } 340 #endif 341 342 if (fileVecCount == 0) { 343 // There are no file vecs at this offset, so we're obviously trying 344 // to access the file outside of its bounds 345 TRACE(("pages_io: access outside of vnode %p at offset %Ld\n", 346 ref->vnode, offset)); 347 return B_BAD_VALUE; 348 } 349 350 uint32 fileVecIndex; 351 size_t size; 352 353 if (!doWrite) { 354 // now directly read the data from the device 355 // the first file_io_vec can be read directly 356 357 size = fileVecs[0].length; 358 if (size > numBytes) 359 size = numBytes; 360 361 status = vfs_read_pages(ref->device, ref->cookie, fileVecs[0].offset, 362 vecs, count, &size, true, false); 363 if (status < B_OK) 364 return status; 365 366 // TODO: this is a work-around for buggy device drivers! 367 // When our own drivers honour the length, we can: 368 // a) also use this direct I/O for writes (otherwise, it would 369 // overwrite precious data) 370 // b) panic if the term below is true (at least for writes) 371 if (size > fileVecs[0].length) { 372 //dprintf("warning: device driver %p doesn't respect total length in read_pages() call!\n", ref->device); 373 size = fileVecs[0].length; 374 } 375 376 ASSERT(size <= fileVecs[0].length); 377 378 // If the file portion was contiguous, we're already done now 379 if (size == numBytes) 380 return B_OK; 381 382 // if we reached the end of the file, we can return as well 383 if (size != fileVecs[0].length) { 384 *_numBytes = size; 385 return B_OK; 386 } 387 388 fileVecIndex = 1; 389 } else { 390 fileVecIndex = 0; 391 size = 0; 392 } 393 394 // Too bad, let's process the rest of the file_io_vecs 395 396 size_t totalSize = size; 397 398 // first, find out where we have to continue in our iovecs 399 uint32 i = 0; 400 for (; i < count; i++) { 401 if (size < vecs[i].iov_len) 402 break; 403 404 size -= vecs[i].iov_len; 405 } 406 407 size_t vecOffset = size; 408 size_t bytesLeft = numBytes - size; 409 410 while (true) { 411 for (; fileVecIndex < fileVecCount; fileVecIndex++) { 412 file_io_vec &fileVec = fileVecs[fileVecIndex]; 413 off_t fileOffset = fileVec.offset; 414 off_t fileLeft = min_c(fileVec.length, bytesLeft); 415 416 TRACE(("FILE VEC [%lu] length %Ld\n", fileVecIndex, fileLeft)); 417 418 // process the complete fileVec 419 while (fileLeft > 0) { 420 iovec tempVecs[MAX_TEMP_IO_VECS]; 421 uint32 tempCount = 0; 422 423 // size tracks how much of what is left of the current fileVec 424 // (fileLeft) has been assigned to tempVecs 425 size = 0; 426 427 // assign what is left of the current fileVec to the tempVecs 428 for (size = 0; size < fileLeft && i < count 429 && tempCount < MAX_TEMP_IO_VECS;) { 430 // try to satisfy one iovec per iteration (or as much as 431 // possible) 432 433 // bytes left of the current iovec 434 size_t vecLeft = vecs[i].iov_len - vecOffset; 435 if (vecLeft == 0) { 436 vecOffset = 0; 437 i++; 438 continue; 439 } 440 441 TRACE(("fill vec %ld, offset = %lu, size = %lu\n", 442 i, vecOffset, size)); 443 444 // actually available bytes 445 size_t tempVecSize = min_c(vecLeft, fileLeft - size); 446 447 tempVecs[tempCount].iov_base 448 = (void *)((addr_t)vecs[i].iov_base + vecOffset); 449 tempVecs[tempCount].iov_len = tempVecSize; 450 tempCount++; 451 452 size += tempVecSize; 453 vecOffset += tempVecSize; 454 } 455 456 size_t bytes = size; 457 if (doWrite) { 458 status = vfs_write_pages(ref->device, ref->cookie, 459 fileOffset, tempVecs, tempCount, &bytes, true, false); 460 } else { 461 status = vfs_read_pages(ref->device, ref->cookie, 462 fileOffset, tempVecs, tempCount, &bytes, true, false); 463 } 464 if (status < B_OK) 465 return status; 466 467 totalSize += bytes; 468 bytesLeft -= size; 469 fileOffset += size; 470 fileLeft -= size; 471 //dprintf("-> file left = %Lu\n", fileLeft); 472 473 if (size != bytes || i >= count) { 474 // there are no more bytes or iovecs, let's bail out 475 *_numBytes = totalSize; 476 return B_OK; 477 } 478 } 479 } 480 481 if (bufferOverflow) { 482 status = get_file_map(ref, offset + totalSize, bytesLeft, fileVecs, 483 &fileVecCount); 484 if (status < B_OK && status != B_BUFFER_OVERFLOW) { 485 TRACE(("get_file_map(offset = %Ld, numBytes = %lu) failed: %s\n", 486 offset, numBytes, strerror(status))); 487 return status; 488 } 489 490 bufferOverflow = status == B_BUFFER_OVERFLOW; 491 fileVecIndex = 0; 492 493 #ifdef TRACE_FILE_CACHE 494 dprintf("got %lu file vecs for %Ld:%lu%s:\n", fileVecCount, 495 offset + totalSize, numBytes, 496 bufferOverflow ? " (array too small)" : ""); 497 for (size_t i = 0; i < fileVecCount; i++) { 498 dprintf(" [%lu] offset = %Ld, size = %Ld\n", 499 i, fileVecs[i].offset, fileVecs[i].length); 500 } 501 #endif 502 } else 503 break; 504 } 505 506 *_numBytes = totalSize; 507 return B_OK; 508 } 509 510 511 /*! Reads the requested amount of data into the cache, and allocates 512 pages needed to fulfill that request. This function is called by cache_io(). 513 It can only handle a certain amount of bytes, and the caller must make 514 sure that it matches that criterion. 515 The cache_ref lock must be hold when calling this function; during 516 operation it will unlock the cache, though. 517 */ 518 static status_t 519 read_into_cache(file_cache_ref *ref, off_t offset, size_t numBytes, 520 int32 pageOffset, addr_t buffer, size_t bufferSize, 521 size_t lastReservedPages, size_t reservePages) 522 { 523 TRACE(("read_into_cache(offset = %Ld, size = %lu, pageOffset = %ld, buffer " 524 "= %#lx, bufferSize = %lu\n", offset, numBytes, pageOffset, buffer, 525 bufferSize)); 526 527 vm_cache *cache = ref->cache; 528 529 // TODO: We're using way too much stack! Rather allocate a sufficiently 530 // large chunk on the heap. 531 iovec vecs[MAX_IO_VECS]; 532 int32 vecCount = 0; 533 534 vm_page *pages[MAX_IO_VECS]; 535 ConditionVariable<vm_page> busyConditions[MAX_IO_VECS]; 536 int32 pageIndex = 0; 537 538 // allocate pages for the cache and mark them busy 539 for (size_t pos = 0; pos < numBytes; pos += B_PAGE_SIZE) { 540 vm_page *page = pages[pageIndex++] = vm_page_allocate_page( 541 PAGE_STATE_FREE, true); 542 if (page == NULL) 543 panic("no more pages!"); 544 545 busyConditions[pageIndex - 1].Publish(page, "page"); 546 547 vm_cache_insert_page(cache, page, offset + pos); 548 549 addr_t virtualAddress; 550 if (vm_get_physical_page(page->physical_page_number * B_PAGE_SIZE, 551 &virtualAddress, PHYSICAL_PAGE_CAN_WAIT) < B_OK) 552 panic("could not get physical page"); 553 554 add_to_iovec(vecs, vecCount, MAX_IO_VECS, virtualAddress, B_PAGE_SIZE); 555 // TODO: check if the array is large enough (currently panics)! 556 } 557 558 mutex_unlock(&cache->lock); 559 vm_page_unreserve_pages(lastReservedPages); 560 561 // read file into reserved pages 562 status_t status = pages_io(ref, offset, vecs, vecCount, &numBytes, false); 563 if (status < B_OK) { 564 // reading failed, free allocated pages 565 566 dprintf("file_cache: read pages failed: %s\n", strerror(status)); 567 568 for (int32 i = 0; i < vecCount; i++) { 569 addr_t base = (addr_t)vecs[i].iov_base; 570 size_t size = vecs[i].iov_len; 571 572 for (size_t pos = 0; pos < size; 573 pos += B_PAGE_SIZE, base += B_PAGE_SIZE) { 574 vm_put_physical_page(base); 575 } 576 } 577 578 mutex_lock(&cache->lock); 579 580 for (int32 i = 0; i < pageIndex; i++) { 581 busyConditions[i].Unpublish(); 582 vm_cache_remove_page(cache, pages[i]); 583 vm_page_set_state(pages[i], PAGE_STATE_FREE); 584 } 585 586 return status; 587 } 588 589 // copy the pages and unmap them again 590 591 for (int32 i = 0; i < vecCount; i++) { 592 addr_t base = (addr_t)vecs[i].iov_base; 593 size_t size = vecs[i].iov_len; 594 595 // copy to user buffer if necessary 596 if (bufferSize != 0) { 597 size_t bytes = min_c(bufferSize, size - pageOffset); 598 599 user_memcpy((void *)buffer, (void *)(base + pageOffset), bytes); 600 buffer += bytes; 601 bufferSize -= bytes; 602 pageOffset = 0; 603 } 604 605 for (size_t pos = 0; pos < size; pos += B_PAGE_SIZE, 606 base += B_PAGE_SIZE) { 607 vm_put_physical_page(base); 608 } 609 } 610 611 vm_page_reserve_pages(reservePages); 612 mutex_lock(&cache->lock); 613 614 // make the pages accessible in the cache 615 for (int32 i = pageIndex; i-- > 0;) { 616 pages[i]->state = PAGE_STATE_ACTIVE; 617 618 busyConditions[i].Unpublish(); 619 } 620 621 return B_OK; 622 } 623 624 625 /*! Like read_into_cache() but writes data into the cache. 626 To preserve data consistency, it might also read pages into the cache, 627 though, if only a partial page gets written. 628 The same restrictions apply. 629 */ 630 static status_t 631 write_to_cache(file_cache_ref *ref, off_t offset, size_t numBytes, 632 int32 pageOffset, addr_t buffer, size_t bufferSize, 633 size_t lastReservedPages, size_t reservePages) 634 { 635 // TODO: We're using way too much stack! Rather allocate a sufficiently 636 // large chunk on the heap. 637 iovec vecs[MAX_IO_VECS]; 638 int32 vecCount = 0; 639 vm_page *pages[MAX_IO_VECS]; 640 int32 pageIndex = 0; 641 status_t status = B_OK; 642 ConditionVariable<vm_page> busyConditions[MAX_IO_VECS]; 643 644 // ToDo: this should be settable somewhere 645 bool writeThrough = false; 646 647 // allocate pages for the cache and mark them busy 648 for (size_t pos = 0; pos < numBytes; pos += B_PAGE_SIZE) { 649 // TODO: if space is becoming tight, and this cache is already grown 650 // big - shouldn't we better steal the pages directly in that case? 651 // (a working set like approach for the file cache) 652 // TODO: the pages we allocate here should have been reserved upfront 653 // in cache_io() 654 vm_page *page = pages[pageIndex++] = vm_page_allocate_page( 655 PAGE_STATE_FREE, true); 656 busyConditions[pageIndex - 1].Publish(page, "page"); 657 658 vm_cache_insert_page(ref->cache, page, offset + pos); 659 660 addr_t virtualAddress; 661 vm_get_physical_page(page->physical_page_number * B_PAGE_SIZE, 662 &virtualAddress, PHYSICAL_PAGE_CAN_WAIT); 663 664 add_to_iovec(vecs, vecCount, MAX_IO_VECS, virtualAddress, B_PAGE_SIZE); 665 // ToDo: check if the array is large enough! 666 } 667 668 mutex_unlock(&ref->cache->lock); 669 vm_page_unreserve_pages(lastReservedPages); 670 671 // copy contents (and read in partially written pages first) 672 673 if (pageOffset != 0) { 674 // This is only a partial write, so we have to read the rest of the page 675 // from the file to have consistent data in the cache 676 iovec readVec = { vecs[0].iov_base, B_PAGE_SIZE }; 677 size_t bytesRead = B_PAGE_SIZE; 678 679 status = pages_io(ref, offset, &readVec, 1, &bytesRead, false); 680 // ToDo: handle errors for real! 681 if (status < B_OK) 682 panic("1. pages_io() failed: %s!\n", strerror(status)); 683 } 684 685 addr_t lastPageOffset = (pageOffset + bufferSize) & (B_PAGE_SIZE - 1); 686 if (lastPageOffset != 0) { 687 // get the last page in the I/O vectors 688 addr_t last = (addr_t)vecs[vecCount - 1].iov_base 689 + vecs[vecCount - 1].iov_len - B_PAGE_SIZE; 690 691 if (offset + pageOffset + bufferSize == ref->cache->virtual_size) { 692 // the space in the page after this write action needs to be cleaned 693 memset((void *)(last + lastPageOffset), 0, 694 B_PAGE_SIZE - lastPageOffset); 695 } else if (vecCount > 1) { 696 // the end of this write does not happen on a page boundary, so we 697 // need to fetch the last page before we can update it 698 iovec readVec = { (void *)last, B_PAGE_SIZE }; 699 size_t bytesRead = B_PAGE_SIZE; 700 701 status = pages_io(ref, offset + numBytes - B_PAGE_SIZE, &readVec, 1, 702 &bytesRead, false); 703 // ToDo: handle errors for real! 704 if (status < B_OK) 705 panic("pages_io() failed: %s!\n", strerror(status)); 706 } 707 } 708 709 for (int32 i = 0; i < vecCount; i++) { 710 addr_t base = (addr_t)vecs[i].iov_base; 711 size_t bytes = min_c(bufferSize, size_t(vecs[i].iov_len - pageOffset)); 712 713 // copy data from user buffer 714 user_memcpy((void *)(base + pageOffset), (void *)buffer, bytes); 715 716 bufferSize -= bytes; 717 if (bufferSize == 0) 718 break; 719 720 buffer += bytes; 721 pageOffset = 0; 722 } 723 724 if (writeThrough) { 725 // write cached pages back to the file if we were asked to do that 726 status_t status = pages_io(ref, offset, vecs, vecCount, &numBytes, 727 true); 728 if (status < B_OK) { 729 // ToDo: remove allocated pages, ...? 730 panic("file_cache: remove allocated pages! write pages failed: %s\n", 731 strerror(status)); 732 } 733 } 734 735 if (status == B_OK) 736 vm_page_reserve_pages(reservePages); 737 738 mutex_lock(&ref->cache->lock); 739 740 // unmap the pages again 741 742 for (int32 i = 0; i < vecCount; i++) { 743 addr_t base = (addr_t)vecs[i].iov_base; 744 size_t size = vecs[i].iov_len; 745 for (size_t pos = 0; pos < size; pos += B_PAGE_SIZE, 746 base += B_PAGE_SIZE) { 747 vm_put_physical_page(base); 748 } 749 } 750 751 // make the pages accessible in the cache 752 for (int32 i = pageIndex; i-- > 0;) { 753 busyConditions[i].Unpublish(); 754 755 if (writeThrough) 756 pages[i]->state = PAGE_STATE_ACTIVE; 757 else 758 vm_page_set_state(pages[i], PAGE_STATE_MODIFIED); 759 } 760 761 return status; 762 } 763 764 765 static status_t 766 satisfy_cache_io(file_cache_ref *ref, off_t offset, addr_t buffer, 767 int32 &pageOffset, size_t bytesLeft, size_t &reservePages, 768 off_t &lastOffset, addr_t &lastBuffer, int32 &lastPageOffset, 769 size_t &lastLeft, size_t &lastReservedPages, bool doWrite) 770 { 771 if (lastBuffer == buffer) 772 return B_OK; 773 774 size_t requestSize = buffer - lastBuffer; 775 reservePages = min_c(MAX_IO_VECS, 776 (lastLeft - requestSize + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 777 778 status_t status; 779 if (doWrite) { 780 status = write_to_cache(ref, lastOffset, requestSize, lastPageOffset, 781 lastBuffer, requestSize, lastReservedPages, reservePages); 782 } else { 783 status = read_into_cache(ref, lastOffset, requestSize, lastPageOffset, 784 lastBuffer, requestSize, lastReservedPages, reservePages); 785 } 786 if (status == B_OK) { 787 lastReservedPages = reservePages; 788 lastBuffer = buffer; 789 lastLeft = bytesLeft; 790 lastOffset = offset; 791 lastPageOffset = 0; 792 pageOffset = 0; 793 } 794 return status; 795 } 796 797 798 static status_t 799 cache_io(void *_cacheRef, off_t offset, addr_t buffer, size_t *_size, 800 bool doWrite) 801 { 802 if (_cacheRef == NULL) 803 panic("cache_io() called with NULL ref!\n"); 804 805 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 806 vm_cache *cache = ref->cache; 807 off_t fileSize = cache->virtual_size; 808 809 TRACE(("cache_io(ref = %p, offset = %Ld, buffer = %p, size = %lu, %s)\n", 810 ref, offset, (void *)buffer, *_size, doWrite ? "write" : "read")); 811 812 // out of bounds access? 813 if (offset >= fileSize || offset < 0) { 814 *_size = 0; 815 return B_OK; 816 } 817 818 int32 pageOffset = offset & (B_PAGE_SIZE - 1); 819 size_t size = *_size; 820 offset -= pageOffset; 821 822 if (offset + pageOffset + size > fileSize) { 823 // adapt size to be within the file's offsets 824 size = fileSize - pageOffset - offset; 825 *_size = size; 826 } 827 if (size == 0) 828 return B_OK; 829 830 // "offset" and "lastOffset" are always aligned to B_PAGE_SIZE, 831 // the "last*" variables always point to the end of the last 832 // satisfied request part 833 834 const uint32 kMaxChunkSize = MAX_IO_VECS * B_PAGE_SIZE; 835 size_t bytesLeft = size, lastLeft = size; 836 int32 lastPageOffset = pageOffset; 837 addr_t lastBuffer = buffer; 838 off_t lastOffset = offset; 839 size_t lastReservedPages = min_c(MAX_IO_VECS, 840 (bytesLeft + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 841 size_t reservePages = 0; 842 843 vm_page_reserve_pages(lastReservedPages); 844 MutexLocker locker(cache->lock); 845 846 while (bytesLeft > 0) { 847 // check if this page is already in memory 848 vm_page *page = vm_cache_lookup_page(cache, offset); 849 if (page != NULL) { 850 // The page may be busy - since we need to unlock the cache sometime 851 // in the near future, we need to satisfy the request of the pages 852 // we didn't get yet (to make sure no one else interferes in the 853 // mean time). 854 status_t status = satisfy_cache_io(ref, offset, buffer, pageOffset, 855 bytesLeft, reservePages, lastOffset, lastBuffer, lastPageOffset, 856 lastLeft, lastReservedPages, doWrite); 857 if (status != B_OK) 858 return status; 859 860 if (page->state == PAGE_STATE_BUSY) { 861 ConditionVariableEntry<vm_page> entry; 862 entry.Add(page); 863 locker.Unlock(); 864 entry.Wait(); 865 locker.Lock(); 866 continue; 867 } 868 } 869 870 size_t bytesInPage = min_c(size_t(B_PAGE_SIZE - pageOffset), bytesLeft); 871 addr_t virtualAddress; 872 873 TRACE(("lookup page from offset %Ld: %p, size = %lu, pageOffset " 874 "= %lu\n", offset, page, bytesLeft, pageOffset)); 875 876 if (page != NULL) { 877 vm_get_physical_page(page->physical_page_number * B_PAGE_SIZE, 878 &virtualAddress, PHYSICAL_PAGE_CAN_WAIT); 879 880 // Since we don't actually map pages as part of an area, we have 881 // to manually maintain its usage_count 882 page->usage_count = 2; 883 884 // and copy the contents of the page already in memory 885 if (doWrite) { 886 user_memcpy((void *)(virtualAddress + pageOffset), 887 (void *)buffer, bytesInPage); 888 889 // make sure the page is in the modified list 890 if (page->state != PAGE_STATE_MODIFIED) 891 vm_page_set_state(page, PAGE_STATE_MODIFIED); 892 } else { 893 user_memcpy((void *)buffer, 894 (void *)(virtualAddress + pageOffset), bytesInPage); 895 } 896 897 vm_put_physical_page(virtualAddress); 898 899 if (bytesLeft <= bytesInPage) { 900 // we've read the last page, so we're done! 901 locker.Unlock(); 902 vm_page_unreserve_pages(lastReservedPages); 903 return B_OK; 904 } 905 906 // prepare a potential gap request 907 lastBuffer = buffer + bytesInPage; 908 lastLeft = bytesLeft - bytesInPage; 909 lastOffset = offset + B_PAGE_SIZE; 910 lastPageOffset = 0; 911 } 912 913 if (bytesLeft <= bytesInPage) 914 break; 915 916 buffer += bytesInPage; 917 bytesLeft -= bytesInPage; 918 pageOffset = 0; 919 offset += B_PAGE_SIZE; 920 921 if (buffer - lastBuffer + lastPageOffset >= kMaxChunkSize) { 922 status_t status = satisfy_cache_io(ref, offset, buffer, pageOffset, 923 bytesLeft, reservePages, lastOffset, lastBuffer, lastPageOffset, 924 lastLeft, lastReservedPages, doWrite); 925 if (status != B_OK) 926 return status; 927 } 928 } 929 930 // fill the last remaining bytes of the request (either write or read) 931 932 status_t status; 933 if (doWrite) { 934 status = write_to_cache(ref, lastOffset, lastLeft, lastPageOffset, 935 lastBuffer, lastLeft, lastReservedPages, 0); 936 } else { 937 status = read_into_cache(ref, lastOffset, lastLeft, lastPageOffset, 938 lastBuffer, lastLeft, lastReservedPages, 0); 939 } 940 941 return status; 942 } 943 944 945 static status_t 946 file_cache_control(const char *subsystem, uint32 function, void *buffer, 947 size_t bufferSize) 948 { 949 switch (function) { 950 case CACHE_CLEAR: 951 // ToDo: clear the cache 952 dprintf("cache_control: clear cache!\n"); 953 return B_OK; 954 955 case CACHE_SET_MODULE: 956 { 957 cache_module_info *module = sCacheModule; 958 959 // unset previous module 960 961 if (sCacheModule != NULL) { 962 sCacheModule = NULL; 963 snooze(100000); // 0.1 secs 964 put_module(module->info.name); 965 } 966 967 // get new module, if any 968 969 if (buffer == NULL) 970 return B_OK; 971 972 char name[B_FILE_NAME_LENGTH]; 973 if (!IS_USER_ADDRESS(buffer) 974 || user_strlcpy(name, (char *)buffer, 975 B_FILE_NAME_LENGTH) < B_OK) 976 return B_BAD_ADDRESS; 977 978 if (strncmp(name, CACHE_MODULES_NAME, strlen(CACHE_MODULES_NAME))) 979 return B_BAD_VALUE; 980 981 dprintf("cache_control: set module %s!\n", name); 982 983 status_t status = get_module(name, (module_info **)&module); 984 if (status == B_OK) 985 sCacheModule = module; 986 987 return status; 988 } 989 } 990 991 return B_BAD_HANDLER; 992 } 993 994 995 // #pragma mark - private kernel API 996 997 998 extern "C" void 999 cache_prefetch_vnode(struct vnode *vnode, off_t offset, size_t size) 1000 { 1001 #if 0 1002 vm_cache *cache; 1003 if (vfs_get_vnode_cache(vnode, &cache, false) != B_OK) 1004 return; 1005 1006 file_cache_ref *ref = (struct file_cache_ref *) 1007 ((vnode_store *)cache->store)->file_cache_ref; 1008 off_t fileSize = cache->virtual_size; 1009 1010 if (size > fileSize) 1011 size = fileSize; 1012 1013 // we never fetch more than 4 MB at once 1014 if (size > 4 * 1024 * 1024) 1015 size = 4 * 1024 * 1024; 1016 1017 size_t bytesLeft = size, lastLeft = size; 1018 off_t lastOffset = offset; 1019 size_t lastSize = 0; 1020 1021 mutex_lock(&cache->lock); 1022 1023 for (; bytesLeft > 0; offset += B_PAGE_SIZE) { 1024 // check if this page is already in memory 1025 addr_t virtualAddress; 1026 restart: 1027 vm_page *page = vm_cache_lookup_page(cache, offset); 1028 if (page != NULL) { 1029 if (page->state == PAGE_STATE_BUSY) { 1030 // if busy retry again later 1031 ConditionVariableEntry<vm_page> entry; 1032 entry.Add(page); 1033 mutex_unlock(&cache->lock); 1034 entry.Wait(); 1035 mutex_lock(&cache->lock); 1036 1037 goto restart; 1038 } 1039 1040 // it is, so let's satisfy in the first part of the request 1041 if (lastOffset < offset) { 1042 size_t requestSize = offset - lastOffset; 1043 read_into_cache(ref, lastOffset, requestSize, NULL, 0); 1044 } 1045 1046 if (bytesLeft <= B_PAGE_SIZE) { 1047 // we've read the last page, so we're done! 1048 goto out; 1049 } 1050 1051 // prepare a potential gap request 1052 lastOffset = offset + B_PAGE_SIZE; 1053 lastLeft = bytesLeft - B_PAGE_SIZE; 1054 } 1055 1056 if (bytesLeft <= B_PAGE_SIZE) 1057 break; 1058 1059 bytesLeft -= B_PAGE_SIZE; 1060 } 1061 1062 // read in the last part 1063 read_into_cache(ref, lastOffset, lastLeft, NULL, 0); 1064 1065 out: 1066 mutex_unlock(&cache->lock); 1067 vm_cache_release_ref(cache); 1068 #endif 1069 } 1070 1071 1072 extern "C" void 1073 cache_prefetch(dev_t mountID, ino_t vnodeID, off_t offset, size_t size) 1074 { 1075 // ToDo: schedule prefetch 1076 1077 TRACE(("cache_prefetch(vnode %ld:%Ld)\n", mountID, vnodeID)); 1078 1079 // get the vnode for the object, this also grabs a ref to it 1080 struct vnode *vnode; 1081 if (vfs_get_vnode(mountID, vnodeID, true, &vnode) != B_OK) 1082 return; 1083 1084 cache_prefetch_vnode(vnode, offset, size); 1085 vfs_put_vnode(vnode); 1086 } 1087 1088 1089 extern "C" void 1090 cache_node_opened(struct vnode *vnode, int32 fdType, vm_cache *cache, 1091 dev_t mountID, ino_t parentID, ino_t vnodeID, const char *name) 1092 { 1093 if (sCacheModule == NULL || sCacheModule->node_opened == NULL) 1094 return; 1095 1096 off_t size = -1; 1097 if (cache != NULL) { 1098 file_cache_ref *ref = (file_cache_ref *) 1099 ((vnode_store *)cache->store)->file_cache_ref; 1100 if (ref != NULL) 1101 size = cache->virtual_size; 1102 } 1103 1104 sCacheModule->node_opened(vnode, fdType, mountID, parentID, vnodeID, name, 1105 size); 1106 } 1107 1108 1109 extern "C" void 1110 cache_node_closed(struct vnode *vnode, int32 fdType, vm_cache *cache, 1111 dev_t mountID, ino_t vnodeID) 1112 { 1113 if (sCacheModule == NULL || sCacheModule->node_closed == NULL) 1114 return; 1115 1116 int32 accessType = 0; 1117 if (cache != NULL) { 1118 // ToDo: set accessType 1119 } 1120 1121 sCacheModule->node_closed(vnode, fdType, mountID, vnodeID, accessType); 1122 } 1123 1124 1125 extern "C" void 1126 cache_node_launched(size_t argCount, char * const *args) 1127 { 1128 if (sCacheModule == NULL || sCacheModule->node_launched == NULL) 1129 return; 1130 1131 sCacheModule->node_launched(argCount, args); 1132 } 1133 1134 1135 extern "C" status_t 1136 file_cache_init_post_boot_device(void) 1137 { 1138 // ToDo: get cache module out of driver settings 1139 1140 if (get_module("file_cache/launch_speedup/v1", 1141 (module_info **)&sCacheModule) == B_OK) { 1142 dprintf("** opened launch speedup: %Ld\n", system_time()); 1143 } 1144 return B_OK; 1145 } 1146 1147 1148 extern "C" status_t 1149 file_cache_init(void) 1150 { 1151 register_generic_syscall(CACHE_SYSCALLS, file_cache_control, 1, 0); 1152 return B_OK; 1153 } 1154 1155 1156 // #pragma mark - public FS API 1157 1158 1159 extern "C" void * 1160 file_cache_create(dev_t mountID, ino_t vnodeID, off_t size, int fd) 1161 { 1162 TRACE(("file_cache_create(mountID = %ld, vnodeID = %Ld, size = %Ld, " 1163 "fd = %d)\n", mountID, vnodeID, size, fd)); 1164 1165 file_cache_ref *ref = new file_cache_ref; 1166 if (ref == NULL) 1167 return NULL; 1168 1169 // TODO: delay vm_cache creation until data is 1170 // requested/written for the first time? Listing lots of 1171 // files in Tracker (and elsewhere) could be slowed down. 1172 // Since the file_cache_ref itself doesn't have a lock, 1173 // we would need to "rent" one during construction, possibly 1174 // the vnode lock, maybe a dedicated one. 1175 // As there shouldn't be too much contention, we could also 1176 // use atomic_test_and_set(), and free the resources again 1177 // when that fails... 1178 1179 // Get the vnode of the underlying device 1180 if (vfs_get_vnode_from_fd(fd, true, &ref->device) != B_OK) 1181 goto err1; 1182 1183 // We also need the cookie of the underlying device to properly access it 1184 if (vfs_get_cookie_from_fd(fd, &ref->cookie) != B_OK) 1185 goto err2; 1186 1187 // Get the vnode for the object 1188 // (note, this does not grab a reference to the node) 1189 if (vfs_lookup_vnode(mountID, vnodeID, &ref->vnode) != B_OK) 1190 goto err2; 1191 1192 // Gets (usually creates) the cache for the node 1193 if (vfs_get_vnode_cache(ref->vnode, &ref->cache, true) != B_OK) 1194 goto err2; 1195 1196 ref->cache->virtual_size = size; 1197 ((vnode_store *)ref->cache->store)->file_cache_ref = ref; 1198 return ref; 1199 1200 err2: 1201 vfs_put_vnode(ref->device); 1202 err1: 1203 delete ref; 1204 return NULL; 1205 } 1206 1207 1208 extern "C" void 1209 file_cache_delete(void *_cacheRef) 1210 { 1211 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1212 1213 if (ref == NULL) 1214 return; 1215 1216 TRACE(("file_cache_delete(ref = %p)\n", ref)); 1217 1218 vm_cache_release_ref(ref->cache); 1219 vfs_put_vnode(ref->device); 1220 delete ref; 1221 } 1222 1223 1224 extern "C" status_t 1225 file_cache_set_size(void *_cacheRef, off_t newSize) 1226 { 1227 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1228 1229 TRACE(("file_cache_set_size(ref = %p, size = %Ld)\n", ref, size)); 1230 1231 if (ref == NULL) 1232 return B_OK; 1233 1234 mutex_lock(&ref->cache->lock); 1235 1236 off_t offset = ref->cache->virtual_size; 1237 off_t size = newSize; 1238 if (offset > newSize) { 1239 size = offset - newSize; 1240 offset = newSize; 1241 } else 1242 size = newSize - offset; 1243 1244 status_t status = vm_cache_resize(ref->cache, newSize); 1245 mutex_unlock(&ref->cache->lock); 1246 1247 file_cache_invalidate_file_map(_cacheRef, offset, size); 1248 1249 return status; 1250 } 1251 1252 1253 extern "C" status_t 1254 file_cache_sync(void *_cacheRef) 1255 { 1256 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1257 if (ref == NULL) 1258 return B_BAD_VALUE; 1259 1260 return vm_cache_write_modified(ref->cache, true); 1261 } 1262 1263 1264 extern "C" status_t 1265 file_cache_read_pages(void *_cacheRef, off_t offset, const iovec *vecs, 1266 size_t count, size_t *_numBytes) 1267 { 1268 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1269 1270 return pages_io(ref, offset, vecs, count, _numBytes, false); 1271 } 1272 1273 1274 extern "C" status_t 1275 file_cache_write_pages(void *_cacheRef, off_t offset, const iovec *vecs, 1276 size_t count, size_t *_numBytes) 1277 { 1278 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1279 1280 status_t status = pages_io(ref, offset, vecs, count, _numBytes, true); 1281 1282 TRACE(("file_cache_write_pages(ref = %p, offset = %Ld, vecs = %p, " 1283 "count = %lu, bytes = %lu) = %ld\n", ref, offset, vecs, count, 1284 *_numBytes, status)); 1285 1286 return status; 1287 } 1288 1289 1290 extern "C" status_t 1291 file_cache_read(void *_cacheRef, off_t offset, void *bufferBase, size_t *_size) 1292 { 1293 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1294 1295 TRACE(("file_cache_read(ref = %p, offset = %Ld, buffer = %p, size = %lu)\n", 1296 ref, offset, bufferBase, *_size)); 1297 1298 return cache_io(ref, offset, (addr_t)bufferBase, _size, false); 1299 } 1300 1301 1302 extern "C" status_t 1303 file_cache_write(void *_cacheRef, off_t offset, const void *buffer, 1304 size_t *_size) 1305 { 1306 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1307 1308 status_t status = cache_io(ref, offset, (addr_t)const_cast<void *>(buffer), 1309 _size, true); 1310 1311 TRACE(("file_cache_write(ref = %p, offset = %Ld, buffer = %p, size = %lu)" 1312 " = %ld\n", ref, offset, buffer, *_size, status)); 1313 1314 return status; 1315 } 1316 1317 1318 extern "C" status_t 1319 file_cache_invalidate_file_map(void *_cacheRef, off_t offset, off_t size) 1320 { 1321 file_cache_ref *ref = (file_cache_ref *)_cacheRef; 1322 1323 // ToDo: honour offset/size parameters 1324 1325 TRACE(("file_cache_invalidate_file_map(offset = %Ld, size = %Ld)\n", offset, 1326 size)); 1327 1328 MutexLocker _(ref->cache->lock); 1329 ref->map.Free(); 1330 return B_OK; 1331 } 1332