1 /*
2 * Copyright 2004-2007, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3 * Distributed under the terms of the MIT License.
4 */
5
6
7 #include <OS.h>
8 #include <fs_interface.h>
9
10 #include <stdarg.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <sys/uio.h>
15
16 #define TRACE_FILE_CACHE
17 #define TRACE(x) printf x
18 #define dprintf printf
19
20 #ifndef ASSERT
21 # define ASSERT(x) ;
22 #endif
23
24 // maximum number of iovecs per request
25 #define MAX_IO_VECS 64 // 256 kB
26 #define MAX_FILE_IO_VECS 4
27 #define MAX_TEMP_IO_VECS 8
28
29 #define CACHED_FILE_EXTENTS 2
30 // must be smaller than MAX_FILE_IO_VECS
31 // ToDo: find out how much of these are typically used
32
33 struct vm_cache_ref;
34
35 struct file_extent {
36 off_t offset;
37 file_io_vec disk;
38 };
39
40 struct file_map {
41 file_map();
42 ~file_map();
43
44 file_extent *operator[](uint32 index);
45 file_extent *ExtentAt(uint32 index);
46 status_t Add(file_io_vec *vecs, size_t vecCount, off_t &lastOffset);
47 void Free();
48
49 union {
50 file_extent direct[CACHED_FILE_EXTENTS];
51 file_extent *array;
52 };
53 size_t count;
54 };
55
56 struct file_cache_ref {
57 vm_cache_ref *cache;
58 void *vnode;
59 void *device;
60 void *cookie;
61 file_map map;
62 };
63
64
65 const uint32 kMaxFileVecs = 1024;
66
67 file_io_vec gFileVecs[kMaxFileVecs];
68 size_t gFileVecCount;
69 off_t gFileSize;
70
71
file_map()72 file_map::file_map()
73 {
74 array = NULL;
75 count = 0;
76 }
77
78
~file_map()79 file_map::~file_map()
80 {
81 Free();
82 }
83
84
85 file_extent *
operator [](uint32 index)86 file_map::operator[](uint32 index)
87 {
88 return ExtentAt(index);
89 }
90
91
92 file_extent *
ExtentAt(uint32 index)93 file_map::ExtentAt(uint32 index)
94 {
95 if (index >= count)
96 return NULL;
97
98 if (count > CACHED_FILE_EXTENTS)
99 return &array[index];
100
101 return &direct[index];
102 }
103
104
105 status_t
Add(file_io_vec * vecs,size_t vecCount,off_t & lastOffset)106 file_map::Add(file_io_vec *vecs, size_t vecCount, off_t &lastOffset)
107 {
108 TRACE(("file_map::Add(vecCount = %ld)\n", vecCount));
109
110 off_t offset = 0;
111
112 if (vecCount <= CACHED_FILE_EXTENTS && count == 0) {
113 // just use the reserved area in the file_cache_ref structure
114 } else {
115 // TODO: once we can invalidate only parts of the file map,
116 // we might need to copy the previously cached file extends
117 // from the direct range
118 file_extent *newMap = (file_extent *)realloc(array,
119 (count + vecCount) * sizeof(file_extent));
120 if (newMap == NULL)
121 return B_NO_MEMORY;
122
123 array = newMap;
124
125 if (count != 0) {
126 file_extent *extent = ExtentAt(count - 1);
127 offset = extent->offset + extent->disk.length;
128 }
129 }
130
131 int32 start = count;
132 count += vecCount;
133
134 for (uint32 i = 0; i < vecCount; i++) {
135 file_extent *extent = ExtentAt(start + i);
136
137 extent->offset = offset;
138 extent->disk = vecs[i];
139
140 offset += extent->disk.length;
141 }
142
143 #ifdef TRACE_FILE_CACHE
144 for (uint32 i = 0; i < count; i++) {
145 file_extent *extent = ExtentAt(i);
146 dprintf(" [%ld] extend offset %lld, disk offset %lld, length %lld\n",
147 i, extent->offset, extent->disk.offset, extent->disk.length);
148 }
149 #endif
150
151 lastOffset = offset;
152 return B_OK;
153 }
154
155
156 void
Free()157 file_map::Free()
158 {
159 if (count > CACHED_FILE_EXTENTS)
160 free(array);
161
162 array = NULL;
163 count = 0;
164 }
165
166
167 // #pragma mark -
168
169
170 void
set_vecs(iovec * vecs,size_t * _count,...)171 set_vecs(iovec *vecs, size_t *_count, ...)
172 {
173 uint32 base = 0;
174 size_t count = 0;
175
176 va_list args;
177 va_start(args, _count);
178
179 while (count < MAX_IO_VECS) {
180 int32 length = va_arg(args, int32);
181 if (length < 0)
182 break;
183
184 vecs[count].iov_base = (void *)base;
185 vecs[count].iov_len = length;
186
187 base += length;
188 count++;
189 }
190
191 va_end(args);
192 *_count = count;
193 }
194
195
196 void
set_file_map(int32 base,int32 length,...)197 set_file_map(int32 base, int32 length, ...)
198 {
199 gFileVecs[0].offset = base;
200 gFileVecs[0].length = length;
201
202 gFileSize = length;
203 gFileVecCount = 1;
204
205 va_list args;
206 va_start(args, length);
207
208 while (gFileVecCount < kMaxFileVecs) {
209 off_t offset = va_arg(args, int32);
210 if (offset < 0)
211 break;
212
213 length = va_arg(args, int32);
214
215 gFileVecs[gFileVecCount].offset = offset;
216 gFileVecs[gFileVecCount].length = length;
217
218 gFileSize += length;
219 gFileVecCount++;
220 }
221
222 va_end(args);
223 }
224
225
226 status_t
find_map_base(off_t offset,off_t & diskOffset,off_t & diskLength,off_t & fileOffset)227 find_map_base(off_t offset, off_t &diskOffset, off_t &diskLength,
228 off_t &fileOffset)
229 {
230 fileOffset = 0;
231
232 for (uint32 i = 0; i < gFileVecCount; i++) {
233 if (offset < gFileVecs[i].length) {
234 diskOffset = gFileVecs[i].offset;
235 diskLength = gFileVecs[i].length;
236 return B_OK;
237 }
238
239 fileOffset += gFileVecs[i].length;
240 offset -= gFileVecs[i].length;
241 }
242
243 return B_ENTRY_NOT_FOUND;
244 }
245
246
247 // #pragma mark - VFS functions
248
249
250 static status_t
vfs_get_file_map(void * vnode,off_t offset,size_t size,file_io_vec * vecs,size_t * _count)251 vfs_get_file_map(void *vnode, off_t offset, size_t size, file_io_vec *vecs,
252 size_t *_count)
253 {
254 off_t diskOffset, diskLength, fileOffset;
255 size_t max = *_count;
256 uint32 index = 0;
257
258 printf("vfs_get_file_map(offset = %lld, size = %lu, count = %lu)\n",
259 offset, size, *_count);
260
261 while (true) {
262 status_t status = find_map_base(offset, diskOffset, diskLength, fileOffset);
263 //status_t status = inode->FindBlockRun(offset, run, fileOffset);
264 if (status != B_OK)
265 return status;
266
267 vecs[index].offset = diskOffset + offset - fileOffset;
268 vecs[index].length = diskLength - offset + fileOffset;
269 offset += vecs[index].length;
270
271 // are we already done?
272 if (size <= vecs[index].length
273 || offset >= gFileSize) {
274 if (offset > gFileSize) {
275 // make sure the extent ends with the last official file
276 // block (without taking any preallocations into account)
277 vecs[index].length = gFileSize - fileOffset;
278 }
279 *_count = index + 1;
280 return B_OK;
281 }
282
283 size -= vecs[index].length;
284 index++;
285
286 if (index >= max) {
287 // we're out of file_io_vecs; let's bail out
288 *_count = index;
289 return B_BUFFER_OVERFLOW;
290 }
291 }
292 }
293
294
295 static status_t
vfs_read_pages(void * device,void * cookie,off_t offset,const iovec * vecs,size_t count,size_t * bytes,bool kernel)296 vfs_read_pages(void *device, void *cookie, off_t offset,
297 const iovec *vecs, size_t count, size_t *bytes, bool kernel)
298 {
299 printf("read offset %lld, length %lu\n", offset, *bytes);
300 for (uint32 i = 0; i < count; i++) {
301 printf(" [%lu] base %lu, length %lu\n",
302 i, (uint32)vecs[i].iov_base, vecs[i].iov_len);
303 }
304 return B_OK;
305 }
306
307
308 static status_t
vfs_write_pages(void * device,void * cookie,off_t offset,const iovec * vecs,size_t count,size_t * bytes,bool kernel)309 vfs_write_pages(void *device, void *cookie, off_t offset,
310 const iovec *vecs, size_t count, size_t *bytes, bool kernel)
311 {
312 printf("write offset %lld, length %lu\n", offset, *bytes);
313 for (uint32 i = 0; i < count; i++) {
314 printf(" [%lu] base %lu, length %lu\n",
315 i, (uint32)vecs[i].iov_base, vecs[i].iov_len);
316 }
317 return B_OK;
318 }
319
320
321 // #pragma mark - file_cache.cpp copies
322
323
324 static file_extent *
find_file_extent(file_cache_ref * ref,off_t offset,uint32 * _index)325 find_file_extent(file_cache_ref *ref, off_t offset, uint32 *_index)
326 {
327 // TODO: do binary search
328
329 for (uint32 index = 0; index < ref->map.count; index++) {
330 file_extent *extent = ref->map[index];
331
332 if (extent->offset <= offset
333 && extent->offset + extent->disk.length > offset) {
334 if (_index)
335 *_index = index;
336 return extent;
337 }
338 }
339
340 return NULL;
341 }
342
343
344 static status_t
get_file_map(file_cache_ref * ref,off_t offset,size_t size,file_io_vec * vecs,size_t * _count)345 get_file_map(file_cache_ref *ref, off_t offset, size_t size,
346 file_io_vec *vecs, size_t *_count)
347 {
348 size_t maxVecs = *_count;
349 status_t status = B_OK;
350
351 if (ref->map.count == 0) {
352 // we don't yet have the map of this file, so let's grab it
353 // (ordered by offset, so that we can do a binary search on them)
354
355 //mutex_lock(&ref->cache->lock);
356
357 // the file map could have been requested in the mean time
358 if (ref->map.count == 0) {
359 size_t vecCount = maxVecs;
360 off_t mapOffset = 0;
361
362 while (true) {
363 status = vfs_get_file_map(ref->vnode, mapOffset, ~0UL, vecs, &vecCount);
364 if (status < B_OK && status != B_BUFFER_OVERFLOW) {
365 //mutex_unlock(&ref->cache->lock);
366 return status;
367 }
368
369 status_t addStatus = ref->map.Add(vecs, vecCount, mapOffset);
370 if (addStatus != B_OK) {
371 // only clobber the status in case of failure
372 status = addStatus;
373 }
374
375 if (status != B_BUFFER_OVERFLOW)
376 break;
377
378 // when we are here, the map has been stored in the array, and
379 // the array size was still too small to cover the whole file
380 vecCount = maxVecs;
381 }
382 }
383
384 //mutex_unlock(&ref->cache->lock);
385 }
386
387 if (status != B_OK) {
388 // We must invalidate the (part of the) map we already
389 // have, as we cannot know if it's complete or not
390 ref->map.Free();
391 return status;
392 }
393
394 // We now have cached the map of this file, we now need to
395 // translate it for the requested access.
396
397 uint32 index;
398 file_extent *fileExtent = find_file_extent(ref, offset, &index);
399 if (fileExtent == NULL) {
400 // access outside file bounds? But that's not our problem
401 *_count = 0;
402 return B_OK;
403 }
404
405 offset -= fileExtent->offset;
406 vecs[0].offset = fileExtent->disk.offset + offset;
407 vecs[0].length = fileExtent->disk.length - offset;
408
409 if (vecs[0].length >= size || index >= ref->map.count - 1) {
410 *_count = 1;
411 return B_OK;
412 }
413
414 // copy the rest of the vecs
415
416 size -= vecs[0].length;
417
418 for (index = 1; index < ref->map.count;) {
419 fileExtent++;
420
421 vecs[index] = fileExtent->disk;
422 index++;
423
424 if (size <= fileExtent->disk.length)
425 break;
426
427 if (index >= maxVecs) {
428 *_count = index;
429 return B_BUFFER_OVERFLOW;
430 }
431
432 size -= fileExtent->disk.length;
433 }
434
435 *_count = index;
436 return B_OK;
437 }
438
439
440 /*!
441 Does the dirty work of translating the request into actual disk offsets
442 and reads to or writes from the supplied iovecs as specified by \a doWrite.
443 */
444 static status_t
pages_io(file_cache_ref * ref,off_t offset,const iovec * vecs,size_t count,size_t * _numBytes,bool doWrite)445 pages_io(file_cache_ref *ref, off_t offset, const iovec *vecs, size_t count,
446 size_t *_numBytes, bool doWrite)
447 {
448 TRACE(("pages_io: ref = %p, offset = %lld, size = %lu, vecCount = %lu, %s\n", ref, offset,
449 *_numBytes, count, doWrite ? "write" : "read"));
450
451 // translate the iovecs into direct device accesses
452 file_io_vec fileVecs[MAX_FILE_IO_VECS];
453 size_t fileVecCount = MAX_FILE_IO_VECS;
454 size_t numBytes = *_numBytes;
455
456 status_t status = get_file_map(ref, offset, numBytes, fileVecs,
457 &fileVecCount);
458 if (status < B_OK && status != B_BUFFER_OVERFLOW) {
459 TRACE(("get_file_map(offset = %lld, numBytes = %lu) failed: %s\n", offset,
460 numBytes, strerror(status)));
461 return status;
462 }
463
464 bool bufferOverflow = status == B_BUFFER_OVERFLOW;
465
466 #ifdef TRACE_FILE_CACHE
467 dprintf("got %lu file vecs for %lld:%lu%s:\n", fileVecCount, offset, numBytes,
468 bufferOverflow ? " (array too small)" : "");
469 for (size_t i = 0; i < fileVecCount; i++) {
470 dprintf(" [%lu] offset = %lld, size = %lld\n",
471 i, fileVecs[i].offset, fileVecs[i].length);
472 }
473 #endif
474
475 if (fileVecCount == 0) {
476 // There are no file vecs at this offset, so we're obviously trying
477 // to access the file outside of its bounds
478 TRACE(("pages_io: access outside of vnode %p at offset %lld\n",
479 ref->vnode, offset));
480 return B_BAD_VALUE;
481 }
482
483 uint32 fileVecIndex;
484 size_t size;
485
486 if (!doWrite) {
487 // now directly read the data from the device
488 // the first file_io_vec can be read directly
489
490 size = fileVecs[0].length;
491 if (size > numBytes)
492 size = numBytes;
493
494 status = vfs_read_pages(ref->device, ref->cookie, fileVecs[0].offset, vecs,
495 count, &size, false);
496 if (status < B_OK)
497 return status;
498
499 // TODO: this is a work-around for buggy device drivers!
500 // When our own drivers honour the length, we can:
501 // a) also use this direct I/O for writes (otherwise, it would
502 // overwrite precious data)
503 // b) panic if the term below is true (at least for writes)
504 if (size > fileVecs[0].length) {
505 //dprintf("warning: device driver %p doesn't respect total length in read_pages() call!\n", ref->device);
506 size = fileVecs[0].length;
507 }
508
509 ASSERT(size <= fileVecs[0].length);
510
511 // If the file portion was contiguous, we're already done now
512 if (size == numBytes)
513 return B_OK;
514
515 // if we reached the end of the file, we can return as well
516 if (size != fileVecs[0].length) {
517 *_numBytes = size;
518 return B_OK;
519 }
520
521 fileVecIndex = 1;
522 } else {
523 fileVecIndex = 0;
524 size = 0;
525 }
526
527 // Too bad, let's process the rest of the file_io_vecs
528
529 size_t totalSize = size;
530
531 // first, find out where we have to continue in our iovecs
532 uint32 i = 0;
533 for (; i < count; i++) {
534 if (size < vecs[i].iov_len)
535 break;
536
537 size -= vecs[i].iov_len;
538 }
539
540 size_t vecOffset = size;
541 size_t bytesLeft = numBytes - size;
542
543 while (true) {
544 for (; fileVecIndex < fileVecCount; fileVecIndex++) {
545 file_io_vec &fileVec = fileVecs[fileVecIndex];
546 off_t fileOffset = fileVec.offset;
547 off_t fileLeft = min_c(fileVec.length, bytesLeft);
548
549 TRACE(("FILE VEC [%lu] length %lld\n", fileVecIndex, fileLeft));
550
551 // process the complete fileVec
552 while (fileLeft > 0) {
553 iovec tempVecs[MAX_TEMP_IO_VECS];
554 uint32 tempCount = 0;
555
556 // size tracks how much of what is left of the current fileVec
557 // (fileLeft) has been assigned to tempVecs
558 size = 0;
559
560 // assign what is left of the current fileVec to the tempVecs
561 for (size = 0; size < fileLeft && i < count
562 && tempCount < MAX_TEMP_IO_VECS;) {
563 // try to satisfy one iovec per iteration (or as much as
564 // possible)
565
566 // bytes left of the current iovec
567 size_t vecLeft = vecs[i].iov_len - vecOffset;
568 if (vecLeft == 0) {
569 vecOffset = 0;
570 i++;
571 continue;
572 }
573
574 TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
575 i, vecOffset, size));
576
577 // actually available bytes
578 size_t tempVecSize = min_c(vecLeft, fileLeft - size);
579
580 tempVecs[tempCount].iov_base
581 = (void *)((addr_t)vecs[i].iov_base + vecOffset);
582 tempVecs[tempCount].iov_len = tempVecSize;
583 tempCount++;
584
585 size += tempVecSize;
586 vecOffset += tempVecSize;
587 }
588
589 size_t bytes = size;
590 if (doWrite) {
591 status = vfs_write_pages(ref->device, ref->cookie,
592 fileOffset, tempVecs, tempCount, &bytes, false);
593 } else {
594 status = vfs_read_pages(ref->device, ref->cookie,
595 fileOffset, tempVecs, tempCount, &bytes, false);
596 }
597 if (status < B_OK)
598 return status;
599
600 totalSize += bytes;
601 bytesLeft -= size;
602 fileOffset += size;
603 fileLeft -= size;
604 //dprintf("-> file left = %Lu\n", fileLeft);
605
606 if (size != bytes || i >= count) {
607 // there are no more bytes or iovecs, let's bail out
608 *_numBytes = totalSize;
609 return B_OK;
610 }
611 }
612 }
613
614 if (bufferOverflow) {
615 status = get_file_map(ref, offset + totalSize, bytesLeft, fileVecs,
616 &fileVecCount);
617 if (status < B_OK && status != B_BUFFER_OVERFLOW) {
618 TRACE(("get_file_map(offset = %lld, numBytes = %lu) failed: %s\n",
619 offset, numBytes, strerror(status)));
620 return status;
621 }
622
623 bufferOverflow = status == B_BUFFER_OVERFLOW;
624 fileVecIndex = 0;
625
626 #ifdef TRACE_FILE_CACHE
627 dprintf("got %lu file vecs for %lld:%lu%s:\n", fileVecCount,
628 offset + totalSize, numBytes,
629 bufferOverflow ? " (array too small)" : "");
630 for (size_t i = 0; i < fileVecCount; i++) {
631 dprintf(" [%lu] offset = %lld, size = %lld\n",
632 i, fileVecs[i].offset, fileVecs[i].length);
633 }
634 #endif
635 } else
636 break;
637 }
638
639 *_numBytes = totalSize;
640 return B_OK;
641 }
642
643
644 // #pragma mark -
645
646
647 int
main(int argc,char ** argv)648 main(int argc, char **argv)
649 {
650 file_cache_ref ref;
651 iovec vecs[MAX_IO_VECS];
652 size_t count = 1;
653 size_t numBytes = 10000;
654 off_t offset = 4999;
655
656 set_vecs(vecs, &count, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
657 16, 4096, 8192, 16384, 4096, 4096, -1);
658 set_file_map(0, 2000, 5000, 3000, 10000, 800, 11000, 20, 12000, 30,
659 13000, 70, 14000, 100, 15000, 900, 20000, 30000, -1);
660
661 status_t status = pages_io(&ref, offset, vecs, count, &numBytes, false);
662 if (status < B_OK)
663 fprintf(stderr, "pages_io() returned: %s\n", strerror(status));
664
665 return 0;
666 }
667
668