xref: /haiku/src/system/kernel/cache/file_cache.cpp (revision 6aff37d1c79e20748c683ae224bd629f88a5b0be)
1 /*
2  * Copyright 2004-2009, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "vnode_store.h"
8 
9 #include <unistd.h>
10 #include <stdlib.h>
11 #include <string.h>
12 
13 #include <KernelExport.h>
14 #include <fs_cache.h>
15 
16 #include <condition_variable.h>
17 #include <file_cache.h>
18 #include <generic_syscall.h>
19 #include <low_resource_manager.h>
20 #include <thread.h>
21 #include <util/AutoLock.h>
22 #include <util/kernel_cpp.h>
23 #include <vfs.h>
24 #include <vm/vm.h>
25 #include <vm/vm_page.h>
26 #include <vm/VMCache.h>
27 
28 #include "IORequest.h"
29 
30 
31 //#define TRACE_FILE_CACHE
32 #ifdef TRACE_FILE_CACHE
33 #	define TRACE(x) dprintf x
34 #else
35 #	define TRACE(x) ;
36 #endif
37 
38 // maximum number of iovecs per request
39 #define MAX_IO_VECS			32	// 128 kB
40 #define MAX_FILE_IO_VECS	32
41 
42 #define BYPASS_IO_SIZE		65536
43 #define LAST_ACCESSES		3
44 
45 struct file_cache_ref {
46 	VMCache			*cache;
47 	struct vnode	*vnode;
48 	off_t			last_access[LAST_ACCESSES];
49 		// TODO: it would probably be enough to only store the least
50 		//	significant 31 bits, and make this uint32 (one bit for
51 		//	write vs. read)
52 	int32			last_access_index;
53 	uint16			disabled_count;
54 
55 	inline void SetLastAccess(int32 index, off_t access, bool isWrite)
56 	{
57 		// we remember writes as negative offsets
58 		last_access[index] = isWrite ? -access : access;
59 	}
60 
61 	inline off_t LastAccess(int32 index, bool isWrite) const
62 	{
63 		return isWrite ? -last_access[index] : last_access[index];
64 	}
65 
66 	inline uint32 LastAccessPageOffset(int32 index, bool isWrite)
67 	{
68 		return LastAccess(index, isWrite) >> PAGE_SHIFT;
69 	}
70 };
71 
72 class PrecacheIO : public AsyncIOCallback {
73 public:
74 								PrecacheIO(file_cache_ref* ref, off_t offset,
75 									generic_size_t size);
76 								~PrecacheIO();
77 
78 			status_t			Prepare(vm_page_reservation* reservation);
79 			void				ReadAsync();
80 
81 	virtual	void				IOFinished(status_t status,
82 									bool partialTransfer,
83 									generic_size_t bytesTransferred);
84 
85 private:
86 			file_cache_ref*		fRef;
87 			VMCache*			fCache;
88 			vm_page**			fPages;
89 			size_t				fPageCount;
90 			ConditionVariable*	fBusyConditions;
91 			generic_io_vec*		fVecs;
92 			off_t				fOffset;
93 			uint32				fVecCount;
94 			generic_size_t		fSize;
95 #if DEBUG_PAGE_ACCESS
96 			thread_id			fAllocatingThread;
97 #endif
98 };
99 
100 typedef status_t (*cache_func)(file_cache_ref* ref, void* cookie, off_t offset,
101 	int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer,
102 	vm_page_reservation* reservation, size_t reservePages);
103 
104 static void add_to_iovec(generic_io_vec* vecs, uint32 &index, uint32 max,
105 	generic_addr_t address, generic_size_t size);
106 
107 
108 static struct cache_module_info* sCacheModule;
109 
110 
111 static const uint32 kZeroVecCount = 32;
112 static const size_t kZeroVecSize = kZeroVecCount * B_PAGE_SIZE;
113 static phys_addr_t sZeroPage;	// physical address
114 static generic_io_vec sZeroVecs[kZeroVecCount];
115 
116 
117 //	#pragma mark -
118 
119 
120 PrecacheIO::PrecacheIO(file_cache_ref* ref, off_t offset, generic_size_t size)
121 	:
122 	fRef(ref),
123 	fCache(ref->cache),
124 	fPages(NULL),
125 	fVecs(NULL),
126 	fOffset(offset),
127 	fVecCount(0),
128 	fSize(size)
129 {
130 	fPageCount = (size + B_PAGE_SIZE - 1) / B_PAGE_SIZE;
131 	fCache->AcquireRefLocked();
132 }
133 
134 
135 PrecacheIO::~PrecacheIO()
136 {
137 	delete[] fPages;
138 	delete[] fVecs;
139 	fCache->ReleaseRefLocked();
140 }
141 
142 
143 status_t
144 PrecacheIO::Prepare(vm_page_reservation* reservation)
145 {
146 	if (fPageCount == 0)
147 		return B_BAD_VALUE;
148 
149 	fPages = new(std::nothrow) vm_page*[fPageCount];
150 	if (fPages == NULL)
151 		return B_NO_MEMORY;
152 
153 	fVecs = new(std::nothrow) generic_io_vec[fPageCount];
154 	if (fVecs == NULL)
155 		return B_NO_MEMORY;
156 
157 	// allocate pages for the cache and mark them busy
158 	uint32 i = 0;
159 	for (generic_size_t pos = 0; pos < fSize; pos += B_PAGE_SIZE) {
160 		vm_page* page = vm_page_allocate_page(reservation,
161 			PAGE_STATE_CACHED | VM_PAGE_ALLOC_BUSY);
162 
163 		fCache->InsertPage(page, fOffset + pos);
164 
165 		add_to_iovec(fVecs, fVecCount, fPageCount,
166 			page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE);
167 		fPages[i++] = page;
168 	}
169 
170 #if DEBUG_PAGE_ACCESS
171 	fAllocatingThread = find_thread(NULL);
172 #endif
173 
174 	return B_OK;
175 }
176 
177 
178 void
179 PrecacheIO::ReadAsync()
180 {
181 	// This object is going to be deleted after the I/O request has been
182 	// fulfilled
183 	vfs_asynchronous_read_pages(fRef->vnode, NULL, fOffset, fVecs, fVecCount,
184 		fSize, B_PHYSICAL_IO_REQUEST, this);
185 }
186 
187 
188 void
189 PrecacheIO::IOFinished(status_t status, bool partialTransfer,
190 	generic_size_t bytesTransferred)
191 {
192 	AutoLocker<VMCache> locker(fCache);
193 
194 	// Make successfully loaded pages accessible again (partially
195 	// transferred pages are considered failed)
196 	phys_size_t pagesTransferred
197 		= (bytesTransferred + B_PAGE_SIZE - 1) / B_PAGE_SIZE;
198 
199 	if (fOffset + (off_t)bytesTransferred > fCache->virtual_end)
200 		bytesTransferred = fCache->virtual_end - fOffset;
201 
202 	for (uint32 i = 0; i < pagesTransferred; i++) {
203 		if (i == pagesTransferred - 1
204 			&& (bytesTransferred % B_PAGE_SIZE) != 0) {
205 			// clear partial page
206 			size_t bytesTouched = bytesTransferred % B_PAGE_SIZE;
207 			vm_memset_physical(
208 				((phys_addr_t)fPages[i]->physical_page_number << PAGE_SHIFT)
209 					+ bytesTouched,
210 				0, B_PAGE_SIZE - bytesTouched);
211 		}
212 
213 		DEBUG_PAGE_ACCESS_TRANSFER(fPages[i], fAllocatingThread);
214 
215 		fCache->MarkPageUnbusy(fPages[i]);
216 
217 		DEBUG_PAGE_ACCESS_END(fPages[i]);
218 	}
219 
220 	// Free pages after failed I/O
221 	for (uint32 i = pagesTransferred; i < fPageCount; i++) {
222 		DEBUG_PAGE_ACCESS_TRANSFER(fPages[i], fAllocatingThread);
223 		fCache->NotifyPageEvents(fPages[i], PAGE_EVENT_NOT_BUSY);
224 		fCache->RemovePage(fPages[i]);
225 		vm_page_set_state(fPages[i], PAGE_STATE_FREE);
226 	}
227 
228 	delete this;
229 }
230 
231 
232 //	#pragma mark -
233 
234 
235 static void
236 add_to_iovec(generic_io_vec* vecs, uint32 &index, uint32 max,
237 	generic_addr_t address, generic_size_t size)
238 {
239 	if (index > 0 && vecs[index - 1].base + vecs[index - 1].length == address) {
240 		// the iovec can be combined with the previous one
241 		vecs[index - 1].length += size;
242 		return;
243 	}
244 
245 	if (index == max)
246 		panic("no more space for iovecs!");
247 
248 	// we need to start a new iovec
249 	vecs[index].base = address;
250 	vecs[index].length = size;
251 	index++;
252 }
253 
254 
255 static inline bool
256 access_is_sequential(file_cache_ref* ref)
257 {
258 	return ref->last_access[ref->last_access_index] != 0;
259 }
260 
261 
262 static inline void
263 push_access(file_cache_ref* ref, off_t offset, generic_size_t bytes,
264 	bool isWrite)
265 {
266 	TRACE(("%p: push %Ld, %ld, %s\n", ref, offset, bytes,
267 		isWrite ? "write" : "read"));
268 
269 	int32 index = ref->last_access_index;
270 	int32 previous = index - 1;
271 	if (previous < 0)
272 		previous = LAST_ACCESSES - 1;
273 
274 	if (offset != ref->LastAccess(previous, isWrite))
275 		ref->last_access[previous] = 0;
276 
277 	ref->SetLastAccess(index, offset + bytes, isWrite);
278 
279 	if (++index >= LAST_ACCESSES)
280 		index = 0;
281 	ref->last_access_index = index;
282 }
283 
284 
285 static void
286 reserve_pages(file_cache_ref* ref, vm_page_reservation* reservation,
287 	size_t reservePages, bool isWrite)
288 {
289 	if (low_resource_state(B_KERNEL_RESOURCE_PAGES) != B_NO_LOW_RESOURCE) {
290 		VMCache* cache = ref->cache;
291 		cache->Lock();
292 
293 		if (cache->consumers.IsEmpty() && cache->areas == NULL
294 			&& access_is_sequential(ref)) {
295 			// we are not mapped, and we're accessed sequentially
296 
297 			if (isWrite) {
298 				// Just write some pages back, and actually wait until they
299 				// have been written back in order to relieve the page pressure
300 				// a bit.
301 				int32 index = ref->last_access_index;
302 				int32 previous = index - 1;
303 				if (previous < 0)
304 					previous = LAST_ACCESSES - 1;
305 
306 				vm_page_write_modified_page_range(cache,
307 					ref->LastAccessPageOffset(previous, true),
308 					ref->LastAccessPageOffset(index, true));
309 			} else {
310 				// free some pages from our cache
311 				// TODO: start with oldest
312 				uint32 left = reservePages;
313 				vm_page* page;
314 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
315 						(page = it.Next()) != NULL && left > 0;) {
316 					if (page->State() == PAGE_STATE_CACHED && !page->busy) {
317 						DEBUG_PAGE_ACCESS_START(page);
318 						ASSERT(!page->IsMapped());
319 						ASSERT(!page->modified);
320 						cache->RemovePage(page);
321 						vm_page_set_state(page, PAGE_STATE_FREE);
322 						left--;
323 					}
324 				}
325 			}
326 		}
327 		cache->Unlock();
328 	}
329 
330 	vm_page_reserve_pages(reservation, reservePages, VM_PRIORITY_USER);
331 }
332 
333 
334 static inline status_t
335 read_pages_and_clear_partial(file_cache_ref* ref, void* cookie, off_t offset,
336 	const generic_io_vec* vecs, size_t count, uint32 flags,
337 	generic_size_t* _numBytes)
338 {
339 	generic_size_t bytesUntouched = *_numBytes;
340 
341 	status_t status = vfs_read_pages(ref->vnode, cookie, offset, vecs, count,
342 		flags, _numBytes);
343 
344 	generic_size_t bytesEnd = *_numBytes;
345 
346 	if (offset + (off_t)bytesEnd > ref->cache->virtual_end)
347 		bytesEnd = ref->cache->virtual_end - offset;
348 
349 	if (status == B_OK && bytesEnd < bytesUntouched) {
350 		// Clear out any leftovers that were not touched by the above read.
351 		// We're doing this here so that not every file system/device has to
352 		// implement this.
353 		bytesUntouched -= bytesEnd;
354 
355 		for (int32 i = count; i-- > 0 && bytesUntouched != 0; ) {
356 			generic_size_t length = min_c(bytesUntouched, vecs[i].length);
357 			vm_memset_physical(vecs[i].base + vecs[i].length - length, 0,
358 				length);
359 
360 			bytesUntouched -= length;
361 		}
362 	}
363 
364 	return status;
365 }
366 
367 
368 /*!	Reads the requested amount of data into the cache, and allocates
369 	pages needed to fulfill that request. This function is called by cache_io().
370 	It can only handle a certain amount of bytes, and the caller must make
371 	sure that it matches that criterion.
372 	The cache_ref lock must be held when calling this function; during
373 	operation it will unlock the cache, though.
374 */
375 static status_t
376 read_into_cache(file_cache_ref* ref, void* cookie, off_t offset,
377 	int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer,
378 	vm_page_reservation* reservation, size_t reservePages)
379 {
380 	TRACE(("read_into_cache(offset = %Ld, pageOffset = %ld, buffer = %#lx, "
381 		"bufferSize = %lu\n", offset, pageOffset, buffer, bufferSize));
382 
383 	VMCache* cache = ref->cache;
384 
385 	// TODO: We're using way too much stack! Rather allocate a sufficiently
386 	// large chunk on the heap.
387 	generic_io_vec vecs[MAX_IO_VECS];
388 	uint32 vecCount = 0;
389 
390 	generic_size_t numBytes = PAGE_ALIGN(pageOffset + bufferSize);
391 	vm_page* pages[MAX_IO_VECS];
392 	int32 pageIndex = 0;
393 
394 	// allocate pages for the cache and mark them busy
395 	for (generic_size_t pos = 0; pos < numBytes; pos += B_PAGE_SIZE) {
396 		vm_page* page = pages[pageIndex++] = vm_page_allocate_page(
397 			reservation, PAGE_STATE_CACHED | VM_PAGE_ALLOC_BUSY);
398 
399 		cache->InsertPage(page, offset + pos);
400 
401 		add_to_iovec(vecs, vecCount, MAX_IO_VECS,
402 			page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE);
403 			// TODO: check if the array is large enough (currently panics)!
404 	}
405 
406 	push_access(ref, offset, bufferSize, false);
407 	cache->Unlock();
408 	vm_page_unreserve_pages(reservation);
409 
410 	// read file into reserved pages
411 	status_t status = read_pages_and_clear_partial(ref, cookie, offset, vecs,
412 		vecCount, B_PHYSICAL_IO_REQUEST, &numBytes);
413 	if (status != B_OK) {
414 		// reading failed, free allocated pages
415 
416 		dprintf("file_cache: read pages failed: %s\n", strerror(status));
417 
418 		cache->Lock();
419 
420 		for (int32 i = 0; i < pageIndex; i++) {
421 			cache->NotifyPageEvents(pages[i], PAGE_EVENT_NOT_BUSY);
422 			cache->RemovePage(pages[i]);
423 			vm_page_set_state(pages[i], PAGE_STATE_FREE);
424 		}
425 
426 		return status;
427 	}
428 
429 	// copy the pages if needed and unmap them again
430 
431 	for (int32 i = 0; i < pageIndex; i++) {
432 		if (useBuffer && bufferSize != 0) {
433 			size_t bytes = min_c(bufferSize, (size_t)B_PAGE_SIZE - pageOffset);
434 
435 			vm_memcpy_from_physical((void*)buffer,
436 				pages[i]->physical_page_number * B_PAGE_SIZE + pageOffset,
437 				bytes, IS_USER_ADDRESS(buffer));
438 
439 			buffer += bytes;
440 			bufferSize -= bytes;
441 			pageOffset = 0;
442 		}
443 	}
444 
445 	reserve_pages(ref, reservation, reservePages, false);
446 	cache->Lock();
447 
448 	// make the pages accessible in the cache
449 	for (int32 i = pageIndex; i-- > 0;) {
450 		DEBUG_PAGE_ACCESS_END(pages[i]);
451 
452 		cache->MarkPageUnbusy(pages[i]);
453 	}
454 
455 	return B_OK;
456 }
457 
458 
459 static status_t
460 read_from_file(file_cache_ref* ref, void* cookie, off_t offset,
461 	int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer,
462 	vm_page_reservation* reservation, size_t reservePages)
463 {
464 	TRACE(("read_from_file(offset = %Ld, pageOffset = %ld, buffer = %#lx, "
465 		"bufferSize = %lu\n", offset, pageOffset, buffer, bufferSize));
466 
467 	if (!useBuffer)
468 		return B_OK;
469 
470 	generic_io_vec vec;
471 	vec.base = buffer;
472 	vec.length = bufferSize;
473 
474 	push_access(ref, offset, bufferSize, false);
475 	ref->cache->Unlock();
476 	vm_page_unreserve_pages(reservation);
477 
478 	generic_size_t toRead = bufferSize;
479 	status_t status = vfs_read_pages(ref->vnode, cookie, offset + pageOffset,
480 		&vec, 1, 0, &toRead);
481 
482 	if (status == B_OK)
483 		reserve_pages(ref, reservation, reservePages, false);
484 
485 	ref->cache->Lock();
486 
487 	return status;
488 }
489 
490 
491 /*!	Like read_into_cache() but writes data into the cache.
492 	To preserve data consistency, it might also read pages into the cache,
493 	though, if only a partial page gets written.
494 	The same restrictions apply.
495 */
496 static status_t
497 write_to_cache(file_cache_ref* ref, void* cookie, off_t offset,
498 	int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer,
499 	vm_page_reservation* reservation, size_t reservePages)
500 {
501 	// TODO: We're using way too much stack! Rather allocate a sufficiently
502 	// large chunk on the heap.
503 	generic_io_vec vecs[MAX_IO_VECS];
504 	uint32 vecCount = 0;
505 	generic_size_t numBytes = PAGE_ALIGN(pageOffset + bufferSize);
506 	vm_page* pages[MAX_IO_VECS];
507 	int32 pageIndex = 0;
508 	status_t status = B_OK;
509 
510 	// ToDo: this should be settable somewhere
511 	bool writeThrough = false;
512 
513 	// allocate pages for the cache and mark them busy
514 	for (generic_size_t pos = 0; pos < numBytes; pos += B_PAGE_SIZE) {
515 		// TODO: if space is becoming tight, and this cache is already grown
516 		//	big - shouldn't we better steal the pages directly in that case?
517 		//	(a working set like approach for the file cache)
518 		// TODO: the pages we allocate here should have been reserved upfront
519 		//	in cache_io()
520 		vm_page* page = pages[pageIndex++] = vm_page_allocate_page(
521 			reservation,
522 			(writeThrough ? PAGE_STATE_CACHED : PAGE_STATE_MODIFIED)
523 				| VM_PAGE_ALLOC_BUSY);
524 
525 		page->modified = !writeThrough;
526 
527 		ref->cache->InsertPage(page, offset + pos);
528 
529 		add_to_iovec(vecs, vecCount, MAX_IO_VECS,
530 			page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE);
531 	}
532 
533 	push_access(ref, offset, bufferSize, true);
534 	ref->cache->Unlock();
535 	vm_page_unreserve_pages(reservation);
536 
537 	// copy contents (and read in partially written pages first)
538 
539 	if (pageOffset != 0) {
540 		// This is only a partial write, so we have to read the rest of the page
541 		// from the file to have consistent data in the cache
542 		generic_io_vec readVec = { vecs[0].base, B_PAGE_SIZE };
543 		generic_size_t bytesRead = B_PAGE_SIZE;
544 
545 		status = vfs_read_pages(ref->vnode, cookie, offset, &readVec, 1,
546 			B_PHYSICAL_IO_REQUEST, &bytesRead);
547 		// ToDo: handle errors for real!
548 		if (status < B_OK)
549 			panic("1. vfs_read_pages() failed: %s!\n", strerror(status));
550 	}
551 
552 	size_t lastPageOffset = (pageOffset + bufferSize) % B_PAGE_SIZE;
553 	if (lastPageOffset != 0) {
554 		// get the last page in the I/O vectors
555 		generic_addr_t last = vecs[vecCount - 1].base
556 			+ vecs[vecCount - 1].length - B_PAGE_SIZE;
557 
558 		if ((off_t)(offset + pageOffset + bufferSize) == ref->cache->virtual_end) {
559 			// the space in the page after this write action needs to be cleaned
560 			vm_memset_physical(last + lastPageOffset, 0,
561 				B_PAGE_SIZE - lastPageOffset);
562 		} else {
563 			// the end of this write does not happen on a page boundary, so we
564 			// need to fetch the last page before we can update it
565 			generic_io_vec readVec = { last, B_PAGE_SIZE };
566 			generic_size_t bytesRead = B_PAGE_SIZE;
567 
568 			status = vfs_read_pages(ref->vnode, cookie,
569 				PAGE_ALIGN(offset + pageOffset + bufferSize) - B_PAGE_SIZE,
570 				&readVec, 1, B_PHYSICAL_IO_REQUEST, &bytesRead);
571 			// ToDo: handle errors for real!
572 			if (status < B_OK)
573 				panic("vfs_read_pages() failed: %s!\n", strerror(status));
574 
575 			if (bytesRead < B_PAGE_SIZE) {
576 				// the space beyond the file size needs to be cleaned
577 				vm_memset_physical(last + bytesRead, 0,
578 					B_PAGE_SIZE - bytesRead);
579 			}
580 		}
581 	}
582 
583 	for (uint32 i = 0; i < vecCount; i++) {
584 		generic_addr_t base = vecs[i].base;
585 		generic_size_t bytes = min_c((generic_size_t)bufferSize,
586 			generic_size_t(vecs[i].length - pageOffset));
587 
588 		if (useBuffer) {
589 			// copy data from user buffer
590 			vm_memcpy_to_physical(base + pageOffset, (void*)buffer, bytes,
591 				IS_USER_ADDRESS(buffer));
592 		} else {
593 			// clear buffer instead
594 			vm_memset_physical(base + pageOffset, 0, bytes);
595 		}
596 
597 		bufferSize -= bytes;
598 		if (bufferSize == 0)
599 			break;
600 
601 		buffer += bytes;
602 		pageOffset = 0;
603 	}
604 
605 	if (writeThrough) {
606 		// write cached pages back to the file if we were asked to do that
607 		status_t status = vfs_write_pages(ref->vnode, cookie, offset, vecs,
608 			vecCount, B_PHYSICAL_IO_REQUEST, &numBytes);
609 		if (status < B_OK) {
610 			// ToDo: remove allocated pages, ...?
611 			panic("file_cache: remove allocated pages! write pages failed: %s\n",
612 				strerror(status));
613 		}
614 	}
615 
616 	if (status == B_OK)
617 		reserve_pages(ref, reservation, reservePages, true);
618 
619 	ref->cache->Lock();
620 
621 	// make the pages accessible in the cache
622 	for (int32 i = pageIndex; i-- > 0;) {
623 		ref->cache->MarkPageUnbusy(pages[i]);
624 
625 		DEBUG_PAGE_ACCESS_END(pages[i]);
626 	}
627 
628 	return status;
629 }
630 
631 
632 static status_t
633 write_to_file(file_cache_ref* ref, void* cookie, off_t offset, int32 pageOffset,
634 	addr_t buffer, size_t bufferSize, bool useBuffer,
635 	vm_page_reservation* reservation, size_t reservePages)
636 {
637 	push_access(ref, offset, bufferSize, true);
638 	ref->cache->Unlock();
639 	vm_page_unreserve_pages(reservation);
640 
641 	status_t status = B_OK;
642 
643 	if (!useBuffer) {
644 		while (bufferSize > 0) {
645 			generic_size_t written = min_c(bufferSize, kZeroVecSize);
646 			status = vfs_write_pages(ref->vnode, cookie, offset + pageOffset,
647 				sZeroVecs, kZeroVecCount, B_PHYSICAL_IO_REQUEST, &written);
648 			if (status != B_OK)
649 				return status;
650 			if (written == 0)
651 				return B_ERROR;
652 
653 			bufferSize -= written;
654 			pageOffset += written;
655 		}
656 	} else {
657 		generic_io_vec vec;
658 		vec.base = buffer;
659 		vec.length = bufferSize;
660 		generic_size_t toWrite = bufferSize;
661 		status = vfs_write_pages(ref->vnode, cookie, offset + pageOffset,
662 			&vec, 1, 0, &toWrite);
663 	}
664 
665 	if (status == B_OK)
666 		reserve_pages(ref, reservation, reservePages, true);
667 
668 	ref->cache->Lock();
669 
670 	return status;
671 }
672 
673 
674 static inline status_t
675 satisfy_cache_io(file_cache_ref* ref, void* cookie, cache_func function,
676 	off_t offset, addr_t buffer, bool useBuffer, int32 &pageOffset,
677 	size_t bytesLeft, size_t &reservePages, off_t &lastOffset,
678 	addr_t &lastBuffer, int32 &lastPageOffset, size_t &lastLeft,
679 	size_t &lastReservedPages, vm_page_reservation* reservation)
680 {
681 	if (lastBuffer == buffer)
682 		return B_OK;
683 
684 	size_t requestSize = buffer - lastBuffer;
685 	reservePages = min_c(MAX_IO_VECS, (lastLeft - requestSize
686 		+ lastPageOffset + B_PAGE_SIZE - 1) >> PAGE_SHIFT);
687 
688 	status_t status = function(ref, cookie, lastOffset, lastPageOffset,
689 		lastBuffer, requestSize, useBuffer, reservation, reservePages);
690 	if (status == B_OK) {
691 		lastReservedPages = reservePages;
692 		lastBuffer = buffer;
693 		lastLeft = bytesLeft;
694 		lastOffset = offset;
695 		lastPageOffset = 0;
696 		pageOffset = 0;
697 	}
698 	return status;
699 }
700 
701 
702 static status_t
703 cache_io(void* _cacheRef, void* cookie, off_t offset, addr_t buffer,
704 	size_t* _size, bool doWrite)
705 {
706 	if (_cacheRef == NULL)
707 		panic("cache_io() called with NULL ref!\n");
708 
709 	file_cache_ref* ref = (file_cache_ref*)_cacheRef;
710 	VMCache* cache = ref->cache;
711 	off_t fileSize = cache->virtual_end;
712 	bool useBuffer = buffer != 0;
713 
714 	TRACE(("cache_io(ref = %p, offset = %Ld, buffer = %p, size = %lu, %s)\n",
715 		ref, offset, (void*)buffer, *_size, doWrite ? "write" : "read"));
716 
717 	// out of bounds access?
718 	if (offset >= fileSize || offset < 0) {
719 		*_size = 0;
720 		return B_OK;
721 	}
722 
723 	int32 pageOffset = offset & (B_PAGE_SIZE - 1);
724 	size_t size = *_size;
725 	offset -= pageOffset;
726 
727 	if ((off_t)(offset + pageOffset + size) > fileSize) {
728 		// adapt size to be within the file's offsets
729 		size = fileSize - pageOffset - offset;
730 		*_size = size;
731 	}
732 	if (size == 0)
733 		return B_OK;
734 
735 	// "offset" and "lastOffset" are always aligned to B_PAGE_SIZE,
736 	// the "last*" variables always point to the end of the last
737 	// satisfied request part
738 
739 	const uint32 kMaxChunkSize = MAX_IO_VECS * B_PAGE_SIZE;
740 	size_t bytesLeft = size, lastLeft = size;
741 	int32 lastPageOffset = pageOffset;
742 	addr_t lastBuffer = buffer;
743 	off_t lastOffset = offset;
744 	size_t lastReservedPages = min_c(MAX_IO_VECS, (pageOffset + bytesLeft
745 		+ B_PAGE_SIZE - 1) >> PAGE_SHIFT);
746 	size_t reservePages = 0;
747 	size_t pagesProcessed = 0;
748 	cache_func function = NULL;
749 
750 	vm_page_reservation reservation;
751 	reserve_pages(ref, &reservation, lastReservedPages, doWrite);
752 
753 	AutoLocker<VMCache> locker(cache);
754 
755 	while (bytesLeft > 0) {
756 		// Periodically reevaluate the low memory situation and select the
757 		// read/write hook accordingly
758 		if (pagesProcessed % 32 == 0) {
759 			if (size >= BYPASS_IO_SIZE
760 				&& low_resource_state(B_KERNEL_RESOURCE_PAGES)
761 					!= B_NO_LOW_RESOURCE) {
762 				// In low memory situations we bypass the cache beyond a
763 				// certain I/O size.
764 				function = doWrite ? write_to_file : read_from_file;
765 			} else
766 				function = doWrite ? write_to_cache : read_into_cache;
767 		}
768 
769 		// check if this page is already in memory
770 		vm_page* page = cache->LookupPage(offset);
771 		if (page != NULL) {
772 			// The page may be busy - since we need to unlock the cache sometime
773 			// in the near future, we need to satisfy the request of the pages
774 			// we didn't get yet (to make sure no one else interferes in the
775 			// meantime).
776 			status_t status = satisfy_cache_io(ref, cookie, function, offset,
777 				buffer, useBuffer, pageOffset, bytesLeft, reservePages,
778 				lastOffset, lastBuffer, lastPageOffset, lastLeft,
779 				lastReservedPages, &reservation);
780 			if (status != B_OK)
781 				return status;
782 
783 			// Since satisfy_cache_io() unlocks the cache, we need to look up
784 			// the page again.
785 			page = cache->LookupPage(offset);
786 			if (page != NULL && page->busy) {
787 				cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, true);
788 				continue;
789 			}
790 		}
791 
792 		size_t bytesInPage = min_c(size_t(B_PAGE_SIZE - pageOffset), bytesLeft);
793 
794 		TRACE(("lookup page from offset %Ld: %p, size = %lu, pageOffset "
795 			"= %lu\n", offset, page, bytesLeft, pageOffset));
796 
797 		if (page != NULL) {
798 			if (doWrite || useBuffer) {
799 				// Since the following user_mem{cpy,set}() might cause a page
800 				// fault, which in turn might cause pages to be reserved, we
801 				// need to unlock the cache temporarily to avoid a potential
802 				// deadlock. To make sure that our page doesn't go away, we mark
803 				// it busy for the time.
804 				page->busy = true;
805 				locker.Unlock();
806 
807 				// copy the contents of the page already in memory
808 				phys_addr_t pageAddress
809 					= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
810 						+ pageOffset;
811 				bool userBuffer = IS_USER_ADDRESS(buffer);
812 				if (doWrite) {
813 					if (useBuffer) {
814 						vm_memcpy_to_physical(pageAddress, (void*)buffer,
815 							bytesInPage, userBuffer);
816 					} else {
817 						vm_memset_physical(pageAddress, 0, bytesInPage);
818 					}
819 				} else if (useBuffer) {
820 					vm_memcpy_from_physical((void*)buffer, pageAddress,
821 						bytesInPage, userBuffer);
822 				}
823 
824 				locker.Lock();
825 
826 				if (doWrite) {
827 					DEBUG_PAGE_ACCESS_START(page);
828 
829 					page->modified = true;
830 
831 					if (page->State() != PAGE_STATE_MODIFIED)
832 						vm_page_set_state(page, PAGE_STATE_MODIFIED);
833 
834 					DEBUG_PAGE_ACCESS_END(page);
835 				}
836 
837 				cache->MarkPageUnbusy(page);
838 			}
839 
840 			// If it is cached only, requeue the page, so the respective queue
841 			// roughly remains LRU first sorted.
842 			if (page->State() == PAGE_STATE_CACHED
843 					|| page->State() == PAGE_STATE_MODIFIED) {
844 				DEBUG_PAGE_ACCESS_START(page);
845 				vm_page_requeue(page, true);
846 				DEBUG_PAGE_ACCESS_END(page);
847 			}
848 
849 			if (bytesLeft <= bytesInPage) {
850 				// we've read the last page, so we're done!
851 				locker.Unlock();
852 				vm_page_unreserve_pages(&reservation);
853 				return B_OK;
854 			}
855 
856 			// prepare a potential gap request
857 			lastBuffer = buffer + bytesInPage;
858 			lastLeft = bytesLeft - bytesInPage;
859 			lastOffset = offset + B_PAGE_SIZE;
860 			lastPageOffset = 0;
861 		}
862 
863 		if (bytesLeft <= bytesInPage)
864 			break;
865 
866 		buffer += bytesInPage;
867 		bytesLeft -= bytesInPage;
868 		pageOffset = 0;
869 		offset += B_PAGE_SIZE;
870 		pagesProcessed++;
871 
872 		if (buffer - lastBuffer + lastPageOffset >= kMaxChunkSize) {
873 			status_t status = satisfy_cache_io(ref, cookie, function, offset,
874 				buffer, useBuffer, pageOffset, bytesLeft, reservePages,
875 				lastOffset, lastBuffer, lastPageOffset, lastLeft,
876 				lastReservedPages, &reservation);
877 			if (status != B_OK)
878 				return status;
879 		}
880 	}
881 
882 	// fill the last remaining bytes of the request (either write or read)
883 
884 	return function(ref, cookie, lastOffset, lastPageOffset, lastBuffer,
885 		lastLeft, useBuffer, &reservation, 0);
886 }
887 
888 
889 static status_t
890 file_cache_control(const char* subsystem, uint32 function, void* buffer,
891 	size_t bufferSize)
892 {
893 	switch (function) {
894 		case CACHE_CLEAR:
895 			// ToDo: clear the cache
896 			dprintf("cache_control: clear cache!\n");
897 			return B_OK;
898 
899 		case CACHE_SET_MODULE:
900 		{
901 			cache_module_info* module = sCacheModule;
902 
903 			// unset previous module
904 
905 			if (sCacheModule != NULL) {
906 				sCacheModule = NULL;
907 				snooze(100000);	// 0.1 secs
908 				put_module(module->info.name);
909 			}
910 
911 			// get new module, if any
912 
913 			if (buffer == NULL)
914 				return B_OK;
915 
916 			char name[B_FILE_NAME_LENGTH];
917 			if (!IS_USER_ADDRESS(buffer)
918 				|| user_strlcpy(name, (char*)buffer,
919 						B_FILE_NAME_LENGTH) < B_OK)
920 				return B_BAD_ADDRESS;
921 
922 			if (strncmp(name, CACHE_MODULES_NAME, strlen(CACHE_MODULES_NAME)))
923 				return B_BAD_VALUE;
924 
925 			dprintf("cache_control: set module %s!\n", name);
926 
927 			status_t status = get_module(name, (module_info**)&module);
928 			if (status == B_OK)
929 				sCacheModule = module;
930 
931 			return status;
932 		}
933 	}
934 
935 	return B_BAD_HANDLER;
936 }
937 
938 
939 //	#pragma mark - private kernel API
940 
941 
942 extern "C" void
943 cache_prefetch_vnode(struct vnode* vnode, off_t offset, size_t size)
944 {
945 	if (size == 0)
946 		return;
947 
948 	VMCache* cache;
949 	if (vfs_get_vnode_cache(vnode, &cache, false) != B_OK)
950 		return;
951 
952 	file_cache_ref* ref = ((VMVnodeCache*)cache)->FileCacheRef();
953 	off_t fileSize = cache->virtual_end;
954 
955 	if ((off_t)(offset + size) > fileSize)
956 		size = fileSize - offset;
957 
958 	// "offset" and "size" are always aligned to B_PAGE_SIZE,
959 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
960 	size = ROUNDUP(size, B_PAGE_SIZE);
961 
962 	size_t reservePages = size / B_PAGE_SIZE;
963 
964 	// Don't do anything if we don't have the resources left, or the cache
965 	// already contains more than 2/3 of its pages
966 	if (offset >= fileSize || vm_page_num_unused_pages() < 2 * reservePages
967 		|| 3 * cache->page_count > 2 * fileSize / B_PAGE_SIZE) {
968 		cache->ReleaseRef();
969 		return;
970 	}
971 
972 	size_t bytesToRead = 0;
973 	off_t lastOffset = offset;
974 
975 	vm_page_reservation reservation;
976 	vm_page_reserve_pages(&reservation, reservePages, VM_PRIORITY_USER);
977 
978 	cache->Lock();
979 
980 	while (true) {
981 		// check if this page is already in memory
982 		if (size > 0) {
983 			vm_page* page = cache->LookupPage(offset);
984 
985 			offset += B_PAGE_SIZE;
986 			size -= B_PAGE_SIZE;
987 
988 			if (page == NULL) {
989 				bytesToRead += B_PAGE_SIZE;
990 				continue;
991 			}
992 		}
993 		if (bytesToRead != 0) {
994 			// read the part before the current page (or the end of the request)
995 			PrecacheIO* io = new(std::nothrow) PrecacheIO(ref, lastOffset,
996 				bytesToRead);
997 			if (io == NULL || io->Prepare(&reservation) != B_OK) {
998 				delete io;
999 				break;
1000 			}
1001 
1002 			// we must not have the cache locked during I/O
1003 			cache->Unlock();
1004 			io->ReadAsync();
1005 			cache->Lock();
1006 
1007 			bytesToRead = 0;
1008 		}
1009 
1010 		if (size == 0) {
1011 			// we have reached the end of the request
1012 			break;
1013 		}
1014 
1015 		lastOffset = offset;
1016 	}
1017 
1018 	cache->ReleaseRefAndUnlock();
1019 	vm_page_unreserve_pages(&reservation);
1020 }
1021 
1022 
1023 extern "C" void
1024 cache_prefetch(dev_t mountID, ino_t vnodeID, off_t offset, size_t size)
1025 {
1026 	// ToDo: schedule prefetch
1027 
1028 	TRACE(("cache_prefetch(vnode %ld:%Ld)\n", mountID, vnodeID));
1029 
1030 	// get the vnode for the object, this also grabs a ref to it
1031 	struct vnode* vnode;
1032 	if (vfs_get_vnode(mountID, vnodeID, true, &vnode) != B_OK)
1033 		return;
1034 
1035 	cache_prefetch_vnode(vnode, offset, size);
1036 	vfs_put_vnode(vnode);
1037 }
1038 
1039 
1040 extern "C" void
1041 cache_node_opened(struct vnode* vnode, int32 fdType, VMCache* cache,
1042 	dev_t mountID, ino_t parentID, ino_t vnodeID, const char* name)
1043 {
1044 	if (sCacheModule == NULL || sCacheModule->node_opened == NULL)
1045 		return;
1046 
1047 	off_t size = -1;
1048 	if (cache != NULL) {
1049 		file_cache_ref* ref = ((VMVnodeCache*)cache)->FileCacheRef();
1050 		if (ref != NULL)
1051 			size = cache->virtual_end;
1052 	}
1053 
1054 	sCacheModule->node_opened(vnode, fdType, mountID, parentID, vnodeID, name,
1055 		size);
1056 }
1057 
1058 
1059 extern "C" void
1060 cache_node_closed(struct vnode* vnode, int32 fdType, VMCache* cache,
1061 	dev_t mountID, ino_t vnodeID)
1062 {
1063 	if (sCacheModule == NULL || sCacheModule->node_closed == NULL)
1064 		return;
1065 
1066 	int32 accessType = 0;
1067 	if (cache != NULL) {
1068 		// ToDo: set accessType
1069 	}
1070 
1071 	sCacheModule->node_closed(vnode, fdType, mountID, vnodeID, accessType);
1072 }
1073 
1074 
1075 extern "C" void
1076 cache_node_launched(size_t argCount, char*  const* args)
1077 {
1078 	if (sCacheModule == NULL || sCacheModule->node_launched == NULL)
1079 		return;
1080 
1081 	sCacheModule->node_launched(argCount, args);
1082 }
1083 
1084 
1085 extern "C" status_t
1086 file_cache_init_post_boot_device(void)
1087 {
1088 	// ToDo: get cache module out of driver settings
1089 
1090 	if (get_module("file_cache/launch_speedup/v1",
1091 			(module_info**)&sCacheModule) == B_OK) {
1092 		dprintf("** opened launch speedup: %" B_PRId64 "\n", system_time());
1093 	}
1094 	return B_OK;
1095 }
1096 
1097 
1098 extern "C" status_t
1099 file_cache_init(void)
1100 {
1101 	// allocate a clean page we can use for writing zeroes
1102 	vm_page_reservation reservation;
1103 	vm_page_reserve_pages(&reservation, 1, VM_PRIORITY_SYSTEM);
1104 	vm_page* page = vm_page_allocate_page(&reservation,
1105 		PAGE_STATE_WIRED | VM_PAGE_ALLOC_CLEAR);
1106 	vm_page_unreserve_pages(&reservation);
1107 
1108 	sZeroPage = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1109 
1110 	for (uint32 i = 0; i < kZeroVecCount; i++) {
1111 		sZeroVecs[i].base = sZeroPage;
1112 		sZeroVecs[i].length = B_PAGE_SIZE;
1113 	}
1114 
1115 	register_generic_syscall(CACHE_SYSCALLS, file_cache_control, 1, 0);
1116 	return B_OK;
1117 }
1118 
1119 
1120 //	#pragma mark - public FS API
1121 
1122 
1123 extern "C" void*
1124 file_cache_create(dev_t mountID, ino_t vnodeID, off_t size)
1125 {
1126 	TRACE(("file_cache_create(mountID = %ld, vnodeID = %Ld, size = %Ld)\n",
1127 		mountID, vnodeID, size));
1128 
1129 	file_cache_ref* ref = new file_cache_ref;
1130 	if (ref == NULL)
1131 		return NULL;
1132 
1133 	memset(ref->last_access, 0, sizeof(ref->last_access));
1134 	ref->last_access_index = 0;
1135 	ref->disabled_count = 0;
1136 
1137 	// TODO: delay VMCache creation until data is
1138 	//	requested/written for the first time? Listing lots of
1139 	//	files in Tracker (and elsewhere) could be slowed down.
1140 	//	Since the file_cache_ref itself doesn't have a lock,
1141 	//	we would need to "rent" one during construction, possibly
1142 	//	the vnode lock, maybe a dedicated one.
1143 	//	As there shouldn't be too much contention, we could also
1144 	//	use atomic_test_and_set(), and free the resources again
1145 	//	when that fails...
1146 
1147 	// Get the vnode for the object
1148 	// (note, this does not grab a reference to the node)
1149 	if (vfs_lookup_vnode(mountID, vnodeID, &ref->vnode) != B_OK)
1150 		goto err1;
1151 
1152 	// Gets (usually creates) the cache for the node
1153 	if (vfs_get_vnode_cache(ref->vnode, &ref->cache, true) != B_OK)
1154 		goto err1;
1155 
1156 	ref->cache->virtual_end = size;
1157 	((VMVnodeCache*)ref->cache)->SetFileCacheRef(ref);
1158 	return ref;
1159 
1160 err1:
1161 	delete ref;
1162 	return NULL;
1163 }
1164 
1165 
1166 extern "C" void
1167 file_cache_delete(void* _cacheRef)
1168 {
1169 	file_cache_ref* ref = (file_cache_ref*)_cacheRef;
1170 
1171 	if (ref == NULL)
1172 		return;
1173 
1174 	TRACE(("file_cache_delete(ref = %p)\n", ref));
1175 
1176 	ref->cache->ReleaseRef();
1177 	delete ref;
1178 }
1179 
1180 
1181 extern "C" void
1182 file_cache_enable(void* _cacheRef)
1183 {
1184 	file_cache_ref* ref = (file_cache_ref*)_cacheRef;
1185 
1186 	AutoLocker<VMCache> _(ref->cache);
1187 
1188 	if (ref->disabled_count == 0) {
1189 		panic("Unbalanced file_cache_enable()!");
1190 		return;
1191 	}
1192 
1193 	ref->disabled_count--;
1194 }
1195 
1196 
1197 extern "C" status_t
1198 file_cache_disable(void* _cacheRef)
1199 {
1200 	// TODO: This function only removes all pages from the cache and prevents
1201 	// that the file cache functions add any new ones until re-enabled. The
1202 	// VM (on page fault) can still add pages, if the file is mmap()ed. We
1203 	// should mark the cache to prevent shared mappings of the file and fix
1204 	// the page fault code to deal correctly with private mappings (i.e. only
1205 	// insert pages in consumer caches).
1206 
1207 	file_cache_ref* ref = (file_cache_ref*)_cacheRef;
1208 
1209 	AutoLocker<VMCache> _(ref->cache);
1210 
1211 	// If already disabled, there's nothing to do for us.
1212 	if (ref->disabled_count > 0) {
1213 		ref->disabled_count++;
1214 		return B_OK;
1215 	}
1216 
1217 	// The file cache is not yet disabled. We need to evict all cached pages.
1218 	status_t error = ref->cache->FlushAndRemoveAllPages();
1219 	if (error != B_OK)
1220 		return error;
1221 
1222 	ref->disabled_count++;
1223 	return B_OK;
1224 }
1225 
1226 
1227 extern "C" bool
1228 file_cache_is_enabled(void* _cacheRef)
1229 {
1230 	file_cache_ref* ref = (file_cache_ref*)_cacheRef;
1231 	AutoLocker<VMCache> _(ref->cache);
1232 
1233 	return ref->disabled_count == 0;
1234 }
1235 
1236 
1237 extern "C" status_t
1238 file_cache_set_size(void* _cacheRef, off_t newSize)
1239 {
1240 	file_cache_ref* ref = (file_cache_ref*)_cacheRef;
1241 
1242 	TRACE(("file_cache_set_size(ref = %p, size = %Ld)\n", ref, newSize));
1243 
1244 	if (ref == NULL)
1245 		return B_OK;
1246 
1247 	VMCache* cache = ref->cache;
1248 	AutoLocker<VMCache> _(cache);
1249 
1250 	off_t oldSize = cache->virtual_end;
1251 	status_t status = cache->Resize(newSize, VM_PRIORITY_USER);
1252 		// Note, the priority doesn't really matter, since this cache doesn't
1253 		// reserve any memory.
1254 	if (status == B_OK && newSize < oldSize) {
1255 		// We may have a new partial page at the end of the cache that must be
1256 		// cleared.
1257 		uint32 partialBytes = newSize % B_PAGE_SIZE;
1258 		if (partialBytes != 0) {
1259 			vm_page* page = cache->LookupPage(newSize - partialBytes);
1260 			if (page != NULL) {
1261 				vm_memset_physical(page->physical_page_number * B_PAGE_SIZE
1262 					+ partialBytes, 0, B_PAGE_SIZE - partialBytes);
1263 			}
1264 		}
1265 	}
1266 
1267 	return status;
1268 }
1269 
1270 
1271 extern "C" status_t
1272 file_cache_sync(void* _cacheRef)
1273 {
1274 	file_cache_ref* ref = (file_cache_ref*)_cacheRef;
1275 	if (ref == NULL)
1276 		return B_BAD_VALUE;
1277 
1278 	return ref->cache->WriteModified();
1279 }
1280 
1281 
1282 extern "C" status_t
1283 file_cache_read(void* _cacheRef, void* cookie, off_t offset, void* buffer,
1284 	size_t* _size)
1285 {
1286 	file_cache_ref* ref = (file_cache_ref*)_cacheRef;
1287 
1288 	TRACE(("file_cache_read(ref = %p, offset = %Ld, buffer = %p, size = %lu)\n",
1289 		ref, offset, buffer, *_size));
1290 
1291 	if (ref->disabled_count > 0) {
1292 		// Caching is disabled -- read directly from the file.
1293 		generic_io_vec vec;
1294 		vec.base = (addr_t)buffer;
1295 		generic_size_t size = vec.length = *_size;
1296 		status_t error = vfs_read_pages(ref->vnode, cookie, offset, &vec, 1, 0,
1297 			&size);
1298 		*_size = size;
1299 		return error;
1300 	}
1301 
1302 	return cache_io(ref, cookie, offset, (addr_t)buffer, _size, false);
1303 }
1304 
1305 
1306 extern "C" status_t
1307 file_cache_write(void* _cacheRef, void* cookie, off_t offset,
1308 	const void* buffer, size_t* _size)
1309 {
1310 	file_cache_ref* ref = (file_cache_ref*)_cacheRef;
1311 
1312 	if (ref->disabled_count > 0) {
1313 		// Caching is disabled -- write directly to the file.
1314 
1315 		if (buffer != NULL) {
1316 			generic_io_vec vec;
1317 			vec.base = (addr_t)buffer;
1318 			generic_size_t size = vec.length = *_size;
1319 
1320 			status_t error = vfs_write_pages(ref->vnode, cookie, offset, &vec,
1321 				1, 0, &size);
1322 			*_size = size;
1323 			return error;
1324 		}
1325 
1326 		// NULL buffer -- use a dummy buffer to write zeroes
1327 		size_t size = *_size;
1328 		while (size > 0) {
1329 			size_t toWrite = min_c(size, kZeroVecSize);
1330 			generic_size_t written = toWrite;
1331 			status_t error = vfs_write_pages(ref->vnode, cookie, offset,
1332 				sZeroVecs, kZeroVecCount, B_PHYSICAL_IO_REQUEST, &written);
1333 			if (error != B_OK)
1334 				return error;
1335 			if (written == 0)
1336 				break;
1337 
1338 			offset += written;
1339 			size -= written;
1340 		}
1341 
1342 		*_size -= size;
1343 		return B_OK;
1344 	}
1345 
1346 	status_t status = cache_io(ref, cookie, offset,
1347 		(addr_t)const_cast<void*>(buffer), _size, true);
1348 
1349 	TRACE(("file_cache_write(ref = %p, offset = %Ld, buffer = %p, size = %lu)"
1350 		" = %ld\n", ref, offset, buffer, *_size, status));
1351 
1352 	return status;
1353 }
1354