1 /* 2 * Copyright 2004-2007, Axel Dörfler, axeld@pinc-software.de. All rights reserved. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include <OS.h> 8 #include <fs_interface.h> 9 10 #include <stdarg.h> 11 #include <stdio.h> 12 #include <stdlib.h> 13 #include <string.h> 14 #include <sys/uio.h> 15 16 #define TRACE_FILE_CACHE 17 #define TRACE(x) printf x 18 #define dprintf printf 19 20 #ifndef ASSERT 21 # define ASSERT(x) ; 22 #endif 23 24 // maximum number of iovecs per request 25 #define MAX_IO_VECS 64 // 256 kB 26 #define MAX_FILE_IO_VECS 4 27 #define MAX_TEMP_IO_VECS 8 28 29 #define CACHED_FILE_EXTENTS 2 30 // must be smaller than MAX_FILE_IO_VECS 31 // ToDo: find out how much of these are typically used 32 33 struct vm_cache_ref; 34 35 struct file_extent { 36 off_t offset; 37 file_io_vec disk; 38 }; 39 40 struct file_map { 41 file_map(); 42 ~file_map(); 43 44 file_extent *operator[](uint32 index); 45 file_extent *ExtentAt(uint32 index); 46 status_t Add(file_io_vec *vecs, size_t vecCount, off_t &lastOffset); 47 void Free(); 48 49 union { 50 file_extent direct[CACHED_FILE_EXTENTS]; 51 file_extent *array; 52 }; 53 size_t count; 54 }; 55 56 struct file_cache_ref { 57 vm_cache_ref *cache; 58 void *vnode; 59 void *device; 60 void *cookie; 61 file_map map; 62 }; 63 64 65 const uint32 kMaxFileVecs = 1024; 66 67 file_io_vec gFileVecs[kMaxFileVecs]; 68 size_t gFileVecCount; 69 off_t gFileSize; 70 71 72 file_map::file_map() 73 { 74 array = NULL; 75 count = 0; 76 } 77 78 79 file_map::~file_map() 80 { 81 Free(); 82 } 83 84 85 file_extent * 86 file_map::operator[](uint32 index) 87 { 88 return ExtentAt(index); 89 } 90 91 92 file_extent * 93 file_map::ExtentAt(uint32 index) 94 { 95 if (index >= count) 96 return NULL; 97 98 if (count > CACHED_FILE_EXTENTS) 99 return &array[index]; 100 101 return &direct[index]; 102 } 103 104 105 status_t 106 file_map::Add(file_io_vec *vecs, size_t vecCount, off_t &lastOffset) 107 { 108 TRACE(("file_map::Add(vecCount = %ld)\n", vecCount)); 109 110 off_t offset = 0; 111 112 if (vecCount <= CACHED_FILE_EXTENTS && count == 0) { 113 // just use the reserved area in the file_cache_ref structure 114 } else { 115 // TODO: once we can invalidate only parts of the file map, 116 // we might need to copy the previously cached file extends 117 // from the direct range 118 file_extent *newMap = (file_extent *)realloc(array, 119 (count + vecCount) * sizeof(file_extent)); 120 if (newMap == NULL) 121 return B_NO_MEMORY; 122 123 array = newMap; 124 125 if (count != 0) { 126 file_extent *extent = ExtentAt(count - 1); 127 offset = extent->offset + extent->disk.length; 128 } 129 } 130 131 int32 start = count; 132 count += vecCount; 133 134 for (uint32 i = 0; i < vecCount; i++) { 135 file_extent *extent = ExtentAt(start + i); 136 137 extent->offset = offset; 138 extent->disk = vecs[i]; 139 140 offset += extent->disk.length; 141 } 142 143 #ifdef TRACE_FILE_CACHE 144 for (uint32 i = 0; i < count; i++) { 145 file_extent *extent = ExtentAt(i); 146 dprintf(" [%ld] extend offset %lld, disk offset %lld, length %lld\n", 147 i, extent->offset, extent->disk.offset, extent->disk.length); 148 } 149 #endif 150 151 lastOffset = offset; 152 return B_OK; 153 } 154 155 156 void 157 file_map::Free() 158 { 159 if (count > CACHED_FILE_EXTENTS) 160 free(array); 161 162 array = NULL; 163 count = 0; 164 } 165 166 167 // #pragma mark - 168 169 170 void 171 set_vecs(iovec *vecs, size_t *_count, ...) 172 { 173 uint32 base = 0; 174 size_t count = 0; 175 176 va_list args; 177 va_start(args, _count); 178 179 while (count < MAX_IO_VECS) { 180 int32 length = va_arg(args, int32); 181 if (length < 0) 182 break; 183 184 vecs[count].iov_base = (void *)base; 185 vecs[count].iov_len = length; 186 187 base += length; 188 count++; 189 } 190 191 va_end(args); 192 *_count = count; 193 } 194 195 196 void 197 set_file_map(int32 base, int32 length, ...) 198 { 199 gFileVecs[0].offset = base; 200 gFileVecs[0].length = length; 201 202 gFileSize = length; 203 gFileVecCount = 1; 204 205 va_list args; 206 va_start(args, length); 207 208 while (gFileVecCount < kMaxFileVecs) { 209 off_t offset = va_arg(args, int32); 210 if (offset < 0) 211 break; 212 213 length = va_arg(args, int32); 214 215 gFileVecs[gFileVecCount].offset = offset; 216 gFileVecs[gFileVecCount].length = length; 217 218 gFileSize += length; 219 gFileVecCount++; 220 } 221 222 va_end(args); 223 } 224 225 226 status_t 227 find_map_base(off_t offset, off_t &diskOffset, off_t &diskLength, 228 off_t &fileOffset) 229 { 230 fileOffset = 0; 231 232 for (uint32 i = 0; i < gFileVecCount; i++) { 233 if (offset < gFileVecs[i].length) { 234 diskOffset = gFileVecs[i].offset; 235 diskLength = gFileVecs[i].length; 236 return B_OK; 237 } 238 239 fileOffset += gFileVecs[i].length; 240 offset -= gFileVecs[i].length; 241 } 242 243 return B_ENTRY_NOT_FOUND; 244 } 245 246 247 // #pragma mark - VFS functions 248 249 250 static status_t 251 vfs_get_file_map(void *vnode, off_t offset, size_t size, file_io_vec *vecs, 252 size_t *_count) 253 { 254 off_t diskOffset, diskLength, fileOffset; 255 size_t max = *_count; 256 uint32 index = 0; 257 258 printf("vfs_get_file_map(offset = %lld, size = %lu, count = %lu)\n", 259 offset, size, *_count); 260 261 while (true) { 262 status_t status = find_map_base(offset, diskOffset, diskLength, fileOffset); 263 //status_t status = inode->FindBlockRun(offset, run, fileOffset); 264 if (status != B_OK) 265 return status; 266 267 vecs[index].offset = diskOffset + offset - fileOffset; 268 vecs[index].length = diskLength - offset + fileOffset; 269 offset += vecs[index].length; 270 271 // are we already done? 272 if (size <= vecs[index].length 273 || offset >= gFileSize) { 274 if (offset > gFileSize) { 275 // make sure the extent ends with the last official file 276 // block (without taking any preallocations into account) 277 vecs[index].length = gFileSize - fileOffset; 278 } 279 *_count = index + 1; 280 return B_OK; 281 } 282 283 size -= vecs[index].length; 284 index++; 285 286 if (index >= max) { 287 // we're out of file_io_vecs; let's bail out 288 *_count = index; 289 return B_BUFFER_OVERFLOW; 290 } 291 } 292 } 293 294 295 static status_t 296 vfs_read_pages(void *device, void *cookie, off_t offset, 297 const iovec *vecs, size_t count, size_t *bytes, bool kernel) 298 { 299 printf("read offset %lld, length %lu\n", offset, *bytes); 300 for (uint32 i = 0; i < count; i++) { 301 printf(" [%lu] base %lu, length %lu\n", 302 i, (uint32)vecs[i].iov_base, vecs[i].iov_len); 303 } 304 return B_OK; 305 } 306 307 308 static status_t 309 vfs_write_pages(void *device, void *cookie, off_t offset, 310 const iovec *vecs, size_t count, size_t *bytes, bool kernel) 311 { 312 printf("write offset %lld, length %lu\n", offset, *bytes); 313 for (uint32 i = 0; i < count; i++) { 314 printf(" [%lu] base %lu, length %lu\n", 315 i, (uint32)vecs[i].iov_base, vecs[i].iov_len); 316 } 317 return B_OK; 318 } 319 320 321 // #pragma mark - file_cache.cpp copies 322 323 324 static file_extent * 325 find_file_extent(file_cache_ref *ref, off_t offset, uint32 *_index) 326 { 327 // TODO: do binary search 328 329 for (uint32 index = 0; index < ref->map.count; index++) { 330 file_extent *extent = ref->map[index]; 331 332 if (extent->offset <= offset 333 && extent->offset + extent->disk.length > offset) { 334 if (_index) 335 *_index = index; 336 return extent; 337 } 338 } 339 340 return NULL; 341 } 342 343 344 static status_t 345 get_file_map(file_cache_ref *ref, off_t offset, size_t size, 346 file_io_vec *vecs, size_t *_count) 347 { 348 size_t maxVecs = *_count; 349 status_t status = B_OK; 350 351 if (ref->map.count == 0) { 352 // we don't yet have the map of this file, so let's grab it 353 // (ordered by offset, so that we can do a binary search on them) 354 355 //mutex_lock(&ref->cache->lock); 356 357 // the file map could have been requested in the mean time 358 if (ref->map.count == 0) { 359 size_t vecCount = maxVecs; 360 off_t mapOffset = 0; 361 362 while (true) { 363 status = vfs_get_file_map(ref->vnode, mapOffset, ~0UL, vecs, &vecCount); 364 if (status < B_OK && status != B_BUFFER_OVERFLOW) { 365 //mutex_unlock(&ref->cache->lock); 366 return status; 367 } 368 369 status_t addStatus = ref->map.Add(vecs, vecCount, mapOffset); 370 if (addStatus != B_OK) { 371 // only clobber the status in case of failure 372 status = addStatus; 373 } 374 375 if (status != B_BUFFER_OVERFLOW) 376 break; 377 378 // when we are here, the map has been stored in the array, and 379 // the array size was still too small to cover the whole file 380 vecCount = maxVecs; 381 } 382 } 383 384 //mutex_unlock(&ref->cache->lock); 385 } 386 387 if (status != B_OK) { 388 // We must invalidate the (part of the) map we already 389 // have, as we cannot know if it's complete or not 390 ref->map.Free(); 391 return status; 392 } 393 394 // We now have cached the map of this file, we now need to 395 // translate it for the requested access. 396 397 uint32 index; 398 file_extent *fileExtent = find_file_extent(ref, offset, &index); 399 if (fileExtent == NULL) { 400 // access outside file bounds? But that's not our problem 401 *_count = 0; 402 return B_OK; 403 } 404 405 offset -= fileExtent->offset; 406 vecs[0].offset = fileExtent->disk.offset + offset; 407 vecs[0].length = fileExtent->disk.length - offset; 408 409 if (vecs[0].length >= size || index >= ref->map.count - 1) { 410 *_count = 1; 411 return B_OK; 412 } 413 414 // copy the rest of the vecs 415 416 size -= vecs[0].length; 417 418 for (index = 1; index < ref->map.count;) { 419 fileExtent++; 420 421 vecs[index] = fileExtent->disk; 422 index++; 423 424 if (size <= fileExtent->disk.length) 425 break; 426 427 if (index >= maxVecs) { 428 *_count = index; 429 return B_BUFFER_OVERFLOW; 430 } 431 432 size -= fileExtent->disk.length; 433 } 434 435 *_count = index; 436 return B_OK; 437 } 438 439 440 /*! 441 Does the dirty work of translating the request into actual disk offsets 442 and reads to or writes from the supplied iovecs as specified by \a doWrite. 443 */ 444 static status_t 445 pages_io(file_cache_ref *ref, off_t offset, const iovec *vecs, size_t count, 446 size_t *_numBytes, bool doWrite) 447 { 448 TRACE(("pages_io: ref = %p, offset = %lld, size = %lu, vecCount = %lu, %s\n", ref, offset, 449 *_numBytes, count, doWrite ? "write" : "read")); 450 451 // translate the iovecs into direct device accesses 452 file_io_vec fileVecs[MAX_FILE_IO_VECS]; 453 size_t fileVecCount = MAX_FILE_IO_VECS; 454 size_t numBytes = *_numBytes; 455 456 status_t status = get_file_map(ref, offset, numBytes, fileVecs, 457 &fileVecCount); 458 if (status < B_OK && status != B_BUFFER_OVERFLOW) { 459 TRACE(("get_file_map(offset = %lld, numBytes = %lu) failed: %s\n", offset, 460 numBytes, strerror(status))); 461 return status; 462 } 463 464 bool bufferOverflow = status == B_BUFFER_OVERFLOW; 465 466 #ifdef TRACE_FILE_CACHE 467 dprintf("got %lu file vecs for %lld:%lu%s:\n", fileVecCount, offset, numBytes, 468 bufferOverflow ? " (array too small)" : ""); 469 for (size_t i = 0; i < fileVecCount; i++) { 470 dprintf(" [%lu] offset = %lld, size = %lld\n", 471 i, fileVecs[i].offset, fileVecs[i].length); 472 } 473 #endif 474 475 if (fileVecCount == 0) { 476 // There are no file vecs at this offset, so we're obviously trying 477 // to access the file outside of its bounds 478 TRACE(("pages_io: access outside of vnode %p at offset %lld\n", 479 ref->vnode, offset)); 480 return B_BAD_VALUE; 481 } 482 483 uint32 fileVecIndex; 484 size_t size; 485 486 if (!doWrite) { 487 // now directly read the data from the device 488 // the first file_io_vec can be read directly 489 490 size = fileVecs[0].length; 491 if (size > numBytes) 492 size = numBytes; 493 494 status = vfs_read_pages(ref->device, ref->cookie, fileVecs[0].offset, vecs, 495 count, &size, false); 496 if (status < B_OK) 497 return status; 498 499 // TODO: this is a work-around for buggy device drivers! 500 // When our own drivers honour the length, we can: 501 // a) also use this direct I/O for writes (otherwise, it would 502 // overwrite precious data) 503 // b) panic if the term below is true (at least for writes) 504 if (size > fileVecs[0].length) { 505 //dprintf("warning: device driver %p doesn't respect total length in read_pages() call!\n", ref->device); 506 size = fileVecs[0].length; 507 } 508 509 ASSERT(size <= fileVecs[0].length); 510 511 // If the file portion was contiguous, we're already done now 512 if (size == numBytes) 513 return B_OK; 514 515 // if we reached the end of the file, we can return as well 516 if (size != fileVecs[0].length) { 517 *_numBytes = size; 518 return B_OK; 519 } 520 521 fileVecIndex = 1; 522 } else { 523 fileVecIndex = 0; 524 size = 0; 525 } 526 527 // Too bad, let's process the rest of the file_io_vecs 528 529 size_t totalSize = size; 530 531 // first, find out where we have to continue in our iovecs 532 uint32 i = 0; 533 for (; i < count; i++) { 534 if (size < vecs[i].iov_len) 535 break; 536 537 size -= vecs[i].iov_len; 538 } 539 540 size_t vecOffset = size; 541 size_t bytesLeft = numBytes - size; 542 543 while (true) { 544 for (; fileVecIndex < fileVecCount; fileVecIndex++) { 545 file_io_vec &fileVec = fileVecs[fileVecIndex]; 546 off_t fileOffset = fileVec.offset; 547 off_t fileLeft = min_c(fileVec.length, bytesLeft); 548 549 TRACE(("FILE VEC [%lu] length %lld\n", fileVecIndex, fileLeft)); 550 551 // process the complete fileVec 552 while (fileLeft > 0) { 553 iovec tempVecs[MAX_TEMP_IO_VECS]; 554 uint32 tempCount = 0; 555 556 // size tracks how much of what is left of the current fileVec 557 // (fileLeft) has been assigned to tempVecs 558 size = 0; 559 560 // assign what is left of the current fileVec to the tempVecs 561 for (size = 0; size < fileLeft && i < count 562 && tempCount < MAX_TEMP_IO_VECS;) { 563 // try to satisfy one iovec per iteration (or as much as 564 // possible) 565 566 // bytes left of the current iovec 567 size_t vecLeft = vecs[i].iov_len - vecOffset; 568 if (vecLeft == 0) { 569 vecOffset = 0; 570 i++; 571 continue; 572 } 573 574 TRACE(("fill vec %ld, offset = %lu, size = %lu\n", 575 i, vecOffset, size)); 576 577 // actually available bytes 578 size_t tempVecSize = min_c(vecLeft, fileLeft - size); 579 580 tempVecs[tempCount].iov_base 581 = (void *)((addr_t)vecs[i].iov_base + vecOffset); 582 tempVecs[tempCount].iov_len = tempVecSize; 583 tempCount++; 584 585 size += tempVecSize; 586 vecOffset += tempVecSize; 587 } 588 589 size_t bytes = size; 590 if (doWrite) { 591 status = vfs_write_pages(ref->device, ref->cookie, 592 fileOffset, tempVecs, tempCount, &bytes, false); 593 } else { 594 status = vfs_read_pages(ref->device, ref->cookie, 595 fileOffset, tempVecs, tempCount, &bytes, false); 596 } 597 if (status < B_OK) 598 return status; 599 600 totalSize += bytes; 601 bytesLeft -= size; 602 fileOffset += size; 603 fileLeft -= size; 604 //dprintf("-> file left = %Lu\n", fileLeft); 605 606 if (size != bytes || i >= count) { 607 // there are no more bytes or iovecs, let's bail out 608 *_numBytes = totalSize; 609 return B_OK; 610 } 611 } 612 } 613 614 if (bufferOverflow) { 615 status = get_file_map(ref, offset + totalSize, bytesLeft, fileVecs, 616 &fileVecCount); 617 if (status < B_OK && status != B_BUFFER_OVERFLOW) { 618 TRACE(("get_file_map(offset = %lld, numBytes = %lu) failed: %s\n", 619 offset, numBytes, strerror(status))); 620 return status; 621 } 622 623 bufferOverflow = status == B_BUFFER_OVERFLOW; 624 fileVecIndex = 0; 625 626 #ifdef TRACE_FILE_CACHE 627 dprintf("got %lu file vecs for %lld:%lu%s:\n", fileVecCount, 628 offset + totalSize, numBytes, 629 bufferOverflow ? " (array too small)" : ""); 630 for (size_t i = 0; i < fileVecCount; i++) { 631 dprintf(" [%lu] offset = %lld, size = %lld\n", 632 i, fileVecs[i].offset, fileVecs[i].length); 633 } 634 #endif 635 } else 636 break; 637 } 638 639 *_numBytes = totalSize; 640 return B_OK; 641 } 642 643 644 // #pragma mark - 645 646 647 int 648 main(int argc, char **argv) 649 { 650 file_cache_ref ref; 651 iovec vecs[MAX_IO_VECS]; 652 size_t count = 1; 653 size_t numBytes = 10000; 654 off_t offset = 4999; 655 656 set_vecs(vecs, &count, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 657 16, 4096, 8192, 16384, 4096, 4096, -1); 658 set_file_map(0, 2000, 5000, 3000, 10000, 800, 11000, 20, 12000, 30, 659 13000, 70, 14000, 100, 15000, 900, 20000, 30000, -1); 660 661 status_t status = pages_io(&ref, offset, vecs, count, &numBytes, false); 662 if (status < B_OK) 663 fprintf(stderr, "pages_io() returned: %s\n", strerror(status)); 664 665 return 0; 666 } 667 668