xref: /haiku/src/tests/system/kernel/cache/pages_io_test.cpp (revision 425ac1b60a56f4df7a0e88bd784545c0ec4fa01f)
1 /*
2  * Copyright 2004-2007, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include <OS.h>
8 #include <fs_interface.h>
9 
10 #include <stdarg.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <sys/uio.h>
15 
16 #define TRACE_FILE_CACHE
17 #define TRACE(x) printf x
18 #define dprintf printf
19 
20 #ifndef ASSERT
21 #	define ASSERT(x) ;
22 #endif
23 
24 // maximum number of iovecs per request
25 #define MAX_IO_VECS			64	// 256 kB
26 #define MAX_FILE_IO_VECS	4
27 #define MAX_TEMP_IO_VECS	8
28 
29 #define CACHED_FILE_EXTENTS	2
30 	// must be smaller than MAX_FILE_IO_VECS
31 	// ToDo: find out how much of these are typically used
32 
33 struct vm_cache_ref;
34 
35 struct file_extent {
36 	off_t			offset;
37 	file_io_vec		disk;
38 };
39 
40 struct file_map {
41 	file_map();
42 	~file_map();
43 
44 	file_extent *operator[](uint32 index);
45 	file_extent *ExtentAt(uint32 index);
46 	status_t Add(file_io_vec *vecs, size_t vecCount, off_t &lastOffset);
47 	void Free();
48 
49 	union {
50 		file_extent	direct[CACHED_FILE_EXTENTS];
51 		file_extent	*array;
52 	};
53 	size_t			count;
54 };
55 
56 struct file_cache_ref {
57 	vm_cache_ref	*cache;
58 	void			*vnode;
59 	void			*device;
60 	void			*cookie;
61 	file_map		map;
62 };
63 
64 
65 const uint32 kMaxFileVecs = 1024;
66 
67 file_io_vec gFileVecs[kMaxFileVecs];
68 size_t gFileVecCount;
69 off_t gFileSize;
70 
71 
file_map()72 file_map::file_map()
73 {
74 	array = NULL;
75 	count = 0;
76 }
77 
78 
~file_map()79 file_map::~file_map()
80 {
81 	Free();
82 }
83 
84 
85 file_extent *
operator [](uint32 index)86 file_map::operator[](uint32 index)
87 {
88 	return ExtentAt(index);
89 }
90 
91 
92 file_extent *
ExtentAt(uint32 index)93 file_map::ExtentAt(uint32 index)
94 {
95 	if (index >= count)
96 		return NULL;
97 
98 	if (count > CACHED_FILE_EXTENTS)
99 		return &array[index];
100 
101 	return &direct[index];
102 }
103 
104 
105 status_t
Add(file_io_vec * vecs,size_t vecCount,off_t & lastOffset)106 file_map::Add(file_io_vec *vecs, size_t vecCount, off_t &lastOffset)
107 {
108 	TRACE(("file_map::Add(vecCount = %ld)\n", vecCount));
109 
110 	off_t offset = 0;
111 
112 	if (vecCount <= CACHED_FILE_EXTENTS && count == 0) {
113 		// just use the reserved area in the file_cache_ref structure
114 	} else {
115 		// TODO: once we can invalidate only parts of the file map,
116 		//	we might need to copy the previously cached file extends
117 		//	from the direct range
118 		file_extent *newMap = (file_extent *)realloc(array,
119 			(count + vecCount) * sizeof(file_extent));
120 		if (newMap == NULL)
121 			return B_NO_MEMORY;
122 
123 		array = newMap;
124 
125 		if (count != 0) {
126 			file_extent *extent = ExtentAt(count - 1);
127 			offset = extent->offset + extent->disk.length;
128 		}
129 	}
130 
131 	int32 start = count;
132 	count += vecCount;
133 
134 	for (uint32 i = 0; i < vecCount; i++) {
135 		file_extent *extent = ExtentAt(start + i);
136 
137 		extent->offset = offset;
138 		extent->disk = vecs[i];
139 
140 		offset += extent->disk.length;
141 	}
142 
143 #ifdef TRACE_FILE_CACHE
144 	for (uint32 i = 0; i < count; i++) {
145 		file_extent *extent = ExtentAt(i);
146 		dprintf("  [%ld] extend offset %lld, disk offset %lld, length %lld\n",
147 			i, extent->offset, extent->disk.offset, extent->disk.length);
148 	}
149 #endif
150 
151 	lastOffset = offset;
152 	return B_OK;
153 }
154 
155 
156 void
Free()157 file_map::Free()
158 {
159 	if (count > CACHED_FILE_EXTENTS)
160 		free(array);
161 
162 	array = NULL;
163 	count = 0;
164 }
165 
166 
167 //	#pragma mark -
168 
169 
170 void
set_vecs(iovec * vecs,size_t * _count,...)171 set_vecs(iovec *vecs, size_t *_count, ...)
172 {
173 	uint32 base = 0;
174 	size_t count = 0;
175 
176 	va_list args;
177 	va_start(args, _count);
178 
179 	while (count < MAX_IO_VECS) {
180 		int32 length = va_arg(args, int32);
181 		if (length < 0)
182 			break;
183 
184 		vecs[count].iov_base = (void *)base;
185 		vecs[count].iov_len = length;
186 
187 		base += length;
188 		count++;
189 	}
190 
191 	va_end(args);
192 	*_count = count;
193 }
194 
195 
196 void
set_file_map(int32 base,int32 length,...)197 set_file_map(int32 base, int32 length, ...)
198 {
199 	gFileVecs[0].offset = base;
200 	gFileVecs[0].length = length;
201 
202 	gFileSize = length;
203 	gFileVecCount = 1;
204 
205 	va_list args;
206 	va_start(args, length);
207 
208 	while (gFileVecCount < kMaxFileVecs) {
209 		off_t offset = va_arg(args, int32);
210 		if (offset < 0)
211 			break;
212 
213 		length = va_arg(args, int32);
214 
215 		gFileVecs[gFileVecCount].offset = offset;
216 		gFileVecs[gFileVecCount].length = length;
217 
218 		gFileSize += length;
219 		gFileVecCount++;
220 	}
221 
222 	va_end(args);
223 }
224 
225 
226 status_t
find_map_base(off_t offset,off_t & diskOffset,off_t & diskLength,off_t & fileOffset)227 find_map_base(off_t offset, off_t &diskOffset, off_t &diskLength,
228 	off_t &fileOffset)
229 {
230 	fileOffset = 0;
231 
232 	for (uint32 i = 0; i < gFileVecCount; i++) {
233 		if (offset < gFileVecs[i].length) {
234 			diskOffset = gFileVecs[i].offset;
235 			diskLength = gFileVecs[i].length;
236 			return B_OK;
237 		}
238 
239 		fileOffset += gFileVecs[i].length;
240 		offset -= gFileVecs[i].length;
241 	}
242 
243 	return B_ENTRY_NOT_FOUND;
244 }
245 
246 
247 //	#pragma mark - VFS functions
248 
249 
250 static status_t
vfs_get_file_map(void * vnode,off_t offset,size_t size,file_io_vec * vecs,size_t * _count)251 vfs_get_file_map(void *vnode, off_t offset, size_t size, file_io_vec *vecs,
252 	size_t *_count)
253 {
254 	off_t diskOffset, diskLength, fileOffset;
255 	size_t max = *_count;
256 	uint32 index = 0;
257 
258 	printf("vfs_get_file_map(offset = %lld, size = %lu, count = %lu)\n",
259 		offset, size, *_count);
260 
261 	while (true) {
262 		status_t status = find_map_base(offset, diskOffset, diskLength, fileOffset);
263 		//status_t status = inode->FindBlockRun(offset, run, fileOffset);
264 		if (status != B_OK)
265 			return status;
266 
267 		vecs[index].offset = diskOffset + offset - fileOffset;
268 		vecs[index].length = diskLength - offset + fileOffset;
269 		offset += vecs[index].length;
270 
271 		// are we already done?
272 		if (size <= vecs[index].length
273 			|| offset >= gFileSize) {
274 			if (offset > gFileSize) {
275 				// make sure the extent ends with the last official file
276 				// block (without taking any preallocations into account)
277 				vecs[index].length = gFileSize - fileOffset;
278 			}
279 			*_count = index + 1;
280 			return B_OK;
281 		}
282 
283 		size -= vecs[index].length;
284 		index++;
285 
286 		if (index >= max) {
287 			// we're out of file_io_vecs; let's bail out
288 			*_count = index;
289 			return B_BUFFER_OVERFLOW;
290 		}
291 	}
292 }
293 
294 
295 static status_t
vfs_read_pages(void * device,void * cookie,off_t offset,const iovec * vecs,size_t count,size_t * bytes,bool kernel)296 vfs_read_pages(void *device, void *cookie, off_t offset,
297 	const iovec *vecs, size_t count, size_t *bytes, bool kernel)
298 {
299 	printf("read offset %lld, length %lu\n", offset, *bytes);
300 	for (uint32 i = 0; i < count; i++) {
301 		printf("  [%lu] base %lu, length %lu\n",
302 			i, (uint32)vecs[i].iov_base, vecs[i].iov_len);
303 	}
304 	return B_OK;
305 }
306 
307 
308 static status_t
vfs_write_pages(void * device,void * cookie,off_t offset,const iovec * vecs,size_t count,size_t * bytes,bool kernel)309 vfs_write_pages(void *device, void *cookie, off_t offset,
310 	const iovec *vecs, size_t count, size_t *bytes, bool kernel)
311 {
312 	printf("write offset %lld, length %lu\n", offset, *bytes);
313 	for (uint32 i = 0; i < count; i++) {
314 		printf("  [%lu] base %lu, length %lu\n",
315 			i, (uint32)vecs[i].iov_base, vecs[i].iov_len);
316 	}
317 	return B_OK;
318 }
319 
320 
321 //	#pragma mark - file_cache.cpp copies
322 
323 
324 static file_extent *
find_file_extent(file_cache_ref * ref,off_t offset,uint32 * _index)325 find_file_extent(file_cache_ref *ref, off_t offset, uint32 *_index)
326 {
327 	// TODO: do binary search
328 
329 	for (uint32 index = 0; index < ref->map.count; index++) {
330 		file_extent *extent = ref->map[index];
331 
332 		if (extent->offset <= offset
333 			&& extent->offset + extent->disk.length > offset) {
334 			if (_index)
335 				*_index = index;
336 			return extent;
337 		}
338 	}
339 
340 	return NULL;
341 }
342 
343 
344 static status_t
get_file_map(file_cache_ref * ref,off_t offset,size_t size,file_io_vec * vecs,size_t * _count)345 get_file_map(file_cache_ref *ref, off_t offset, size_t size,
346 	file_io_vec *vecs, size_t *_count)
347 {
348 	size_t maxVecs = *_count;
349 	status_t status = B_OK;
350 
351 	if (ref->map.count == 0) {
352 		// we don't yet have the map of this file, so let's grab it
353 		// (ordered by offset, so that we can do a binary search on them)
354 
355 		//mutex_lock(&ref->cache->lock);
356 
357 		// the file map could have been requested in the mean time
358 		if (ref->map.count == 0) {
359 			size_t vecCount = maxVecs;
360 			off_t mapOffset = 0;
361 
362 			while (true) {
363 				status = vfs_get_file_map(ref->vnode, mapOffset, ~0UL, vecs, &vecCount);
364 				if (status < B_OK && status != B_BUFFER_OVERFLOW) {
365 					//mutex_unlock(&ref->cache->lock);
366 					return status;
367 				}
368 
369 				status_t addStatus = ref->map.Add(vecs, vecCount, mapOffset);
370 				if (addStatus != B_OK) {
371 					// only clobber the status in case of failure
372 					status = addStatus;
373 				}
374 
375 				if (status != B_BUFFER_OVERFLOW)
376 					break;
377 
378 				// when we are here, the map has been stored in the array, and
379 				// the array size was still too small to cover the whole file
380 				vecCount = maxVecs;
381 			}
382 		}
383 
384 		//mutex_unlock(&ref->cache->lock);
385 	}
386 
387 	if (status != B_OK) {
388 		// We must invalidate the (part of the) map we already
389 		// have, as we cannot know if it's complete or not
390 		ref->map.Free();
391 		return status;
392 	}
393 
394 	// We now have cached the map of this file, we now need to
395 	// translate it for the requested access.
396 
397 	uint32 index;
398 	file_extent *fileExtent = find_file_extent(ref, offset, &index);
399 	if (fileExtent == NULL) {
400 		// access outside file bounds? But that's not our problem
401 		*_count = 0;
402 		return B_OK;
403 	}
404 
405 	offset -= fileExtent->offset;
406 	vecs[0].offset = fileExtent->disk.offset + offset;
407 	vecs[0].length = fileExtent->disk.length - offset;
408 
409 	if (vecs[0].length >= size || index >= ref->map.count - 1) {
410 		*_count = 1;
411 		return B_OK;
412 	}
413 
414 	// copy the rest of the vecs
415 
416 	size -= vecs[0].length;
417 
418 	for (index = 1; index < ref->map.count;) {
419 		fileExtent++;
420 
421 		vecs[index] = fileExtent->disk;
422 		index++;
423 
424 		if (size <= fileExtent->disk.length)
425 			break;
426 
427 		if (index >= maxVecs) {
428 			*_count = index;
429 			return B_BUFFER_OVERFLOW;
430 		}
431 
432 		size -= fileExtent->disk.length;
433 	}
434 
435 	*_count = index;
436 	return B_OK;
437 }
438 
439 
440 /*!
441 	Does the dirty work of translating the request into actual disk offsets
442 	and reads to or writes from the supplied iovecs as specified by \a doWrite.
443 */
444 static status_t
pages_io(file_cache_ref * ref,off_t offset,const iovec * vecs,size_t count,size_t * _numBytes,bool doWrite)445 pages_io(file_cache_ref *ref, off_t offset, const iovec *vecs, size_t count,
446 	size_t *_numBytes, bool doWrite)
447 {
448 	TRACE(("pages_io: ref = %p, offset = %lld, size = %lu, vecCount = %lu, %s\n", ref, offset,
449 		*_numBytes, count, doWrite ? "write" : "read"));
450 
451 	// translate the iovecs into direct device accesses
452 	file_io_vec fileVecs[MAX_FILE_IO_VECS];
453 	size_t fileVecCount = MAX_FILE_IO_VECS;
454 	size_t numBytes = *_numBytes;
455 
456 	status_t status = get_file_map(ref, offset, numBytes, fileVecs,
457 		&fileVecCount);
458 	if (status < B_OK && status != B_BUFFER_OVERFLOW) {
459 		TRACE(("get_file_map(offset = %lld, numBytes = %lu) failed: %s\n", offset,
460 			numBytes, strerror(status)));
461 		return status;
462 	}
463 
464 	bool bufferOverflow = status == B_BUFFER_OVERFLOW;
465 
466 #ifdef TRACE_FILE_CACHE
467 	dprintf("got %lu file vecs for %lld:%lu%s:\n", fileVecCount, offset, numBytes,
468 		bufferOverflow ? " (array too small)" : "");
469 	for (size_t i = 0; i < fileVecCount; i++) {
470 		dprintf("  [%lu] offset = %lld, size = %lld\n",
471 			i, fileVecs[i].offset, fileVecs[i].length);
472 	}
473 #endif
474 
475 	if (fileVecCount == 0) {
476 		// There are no file vecs at this offset, so we're obviously trying
477 		// to access the file outside of its bounds
478 		TRACE(("pages_io: access outside of vnode %p at offset %lld\n",
479 			ref->vnode, offset));
480 		return B_BAD_VALUE;
481 	}
482 
483 	uint32 fileVecIndex;
484 	size_t size;
485 
486 	if (!doWrite) {
487 		// now directly read the data from the device
488 		// the first file_io_vec can be read directly
489 
490 		size = fileVecs[0].length;
491 		if (size > numBytes)
492 			size = numBytes;
493 
494 		status = vfs_read_pages(ref->device, ref->cookie, fileVecs[0].offset, vecs,
495 			count, &size, false);
496 		if (status < B_OK)
497 			return status;
498 
499 		// TODO: this is a work-around for buggy device drivers!
500 		//	When our own drivers honour the length, we can:
501 		//	a) also use this direct I/O for writes (otherwise, it would
502 		//	   overwrite precious data)
503 		//	b) panic if the term below is true (at least for writes)
504 		if (size > fileVecs[0].length) {
505 			//dprintf("warning: device driver %p doesn't respect total length in read_pages() call!\n", ref->device);
506 			size = fileVecs[0].length;
507 		}
508 
509 		ASSERT(size <= fileVecs[0].length);
510 
511 		// If the file portion was contiguous, we're already done now
512 		if (size == numBytes)
513 			return B_OK;
514 
515 		// if we reached the end of the file, we can return as well
516 		if (size != fileVecs[0].length) {
517 			*_numBytes = size;
518 			return B_OK;
519 		}
520 
521 		fileVecIndex = 1;
522 	} else {
523 		fileVecIndex = 0;
524 		size = 0;
525 	}
526 
527 	// Too bad, let's process the rest of the file_io_vecs
528 
529 	size_t totalSize = size;
530 
531 	// first, find out where we have to continue in our iovecs
532 	uint32 i = 0;
533 	for (; i < count; i++) {
534 		if (size < vecs[i].iov_len)
535 			break;
536 
537 		size -= vecs[i].iov_len;
538 	}
539 
540 	size_t vecOffset = size;
541 	size_t bytesLeft = numBytes - size;
542 
543 	while (true) {
544 		for (; fileVecIndex < fileVecCount; fileVecIndex++) {
545 			file_io_vec &fileVec = fileVecs[fileVecIndex];
546 			off_t fileOffset = fileVec.offset;
547 			off_t fileLeft = min_c(fileVec.length, bytesLeft);
548 
549 			TRACE(("FILE VEC [%lu] length %lld\n", fileVecIndex, fileLeft));
550 
551 			// process the complete fileVec
552 			while (fileLeft > 0) {
553 				iovec tempVecs[MAX_TEMP_IO_VECS];
554 				uint32 tempCount = 0;
555 
556 				// size tracks how much of what is left of the current fileVec
557 				// (fileLeft) has been assigned to tempVecs
558 				size = 0;
559 
560 				// assign what is left of the current fileVec to the tempVecs
561 				for (size = 0; size < fileLeft && i < count
562 						&& tempCount < MAX_TEMP_IO_VECS;) {
563 					// try to satisfy one iovec per iteration (or as much as
564 					// possible)
565 
566 					// bytes left of the current iovec
567 					size_t vecLeft = vecs[i].iov_len - vecOffset;
568 					if (vecLeft == 0) {
569 						vecOffset = 0;
570 						i++;
571 						continue;
572 					}
573 
574 					TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
575 						i, vecOffset, size));
576 
577 					// actually available bytes
578 					size_t tempVecSize = min_c(vecLeft, fileLeft - size);
579 
580 					tempVecs[tempCount].iov_base
581 						= (void *)((addr_t)vecs[i].iov_base + vecOffset);
582 					tempVecs[tempCount].iov_len = tempVecSize;
583 					tempCount++;
584 
585 					size += tempVecSize;
586 					vecOffset += tempVecSize;
587 				}
588 
589 				size_t bytes = size;
590 				if (doWrite) {
591 					status = vfs_write_pages(ref->device, ref->cookie,
592 						fileOffset, tempVecs, tempCount, &bytes, false);
593 				} else {
594 					status = vfs_read_pages(ref->device, ref->cookie,
595 						fileOffset, tempVecs, tempCount, &bytes, false);
596 				}
597 				if (status < B_OK)
598 					return status;
599 
600 				totalSize += bytes;
601 				bytesLeft -= size;
602 				fileOffset += size;
603 				fileLeft -= size;
604 				//dprintf("-> file left = %Lu\n", fileLeft);
605 
606 				if (size != bytes || i >= count) {
607 					// there are no more bytes or iovecs, let's bail out
608 					*_numBytes = totalSize;
609 					return B_OK;
610 				}
611 			}
612 		}
613 
614 		if (bufferOverflow) {
615 			status = get_file_map(ref, offset + totalSize, bytesLeft, fileVecs,
616 				&fileVecCount);
617 			if (status < B_OK && status != B_BUFFER_OVERFLOW) {
618 				TRACE(("get_file_map(offset = %lld, numBytes = %lu) failed: %s\n",
619 					offset, numBytes, strerror(status)));
620 				return status;
621 			}
622 
623 			bufferOverflow = status == B_BUFFER_OVERFLOW;
624 			fileVecIndex = 0;
625 
626 #ifdef TRACE_FILE_CACHE
627 			dprintf("got %lu file vecs for %lld:%lu%s:\n", fileVecCount,
628 				offset + totalSize, numBytes,
629 				bufferOverflow ? " (array too small)" : "");
630 			for (size_t i = 0; i < fileVecCount; i++) {
631 				dprintf("  [%lu] offset = %lld, size = %lld\n",
632 					i, fileVecs[i].offset, fileVecs[i].length);
633 			}
634 #endif
635 		} else
636 			break;
637 	}
638 
639 	*_numBytes = totalSize;
640 	return B_OK;
641 }
642 
643 
644 //	#pragma mark -
645 
646 
647 int
main(int argc,char ** argv)648 main(int argc, char **argv)
649 {
650 	file_cache_ref ref;
651 	iovec vecs[MAX_IO_VECS];
652 	size_t count = 1;
653 	size_t numBytes = 10000;
654 	off_t offset = 4999;
655 
656 	set_vecs(vecs, &count, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
657 		16, 4096, 8192, 16384, 4096, 4096, -1);
658 	set_file_map(0, 2000, 5000, 3000, 10000, 800, 11000, 20, 12000, 30,
659 		13000, 70, 14000, 100, 15000, 900, 20000, 30000, -1);
660 
661 	status_t status = pages_io(&ref, offset, vecs, count, &numBytes, false);
662 	if (status < B_OK)
663 		fprintf(stderr, "pages_io() returned: %s\n", strerror(status));
664 
665 	return 0;
666 }
667 
668