xref: /haiku/src/system/kernel/cache/file_cache.cpp (revision e1c4049fed1047bdb957b0529e1921e97ef94770)
1 /*
2  * Copyright 2004-2009, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "vnode_store.h"
8 
9 #include <unistd.h>
10 #include <stdlib.h>
11 #include <string.h>
12 
13 #include <KernelExport.h>
14 #include <fs_cache.h>
15 
16 #include <condition_variable.h>
17 #include <file_cache.h>
18 #include <generic_syscall.h>
19 #include <low_resource_manager.h>
20 #include <thread.h>
21 #include <util/AutoLock.h>
22 #include <util/kernel_cpp.h>
23 #include <vfs.h>
24 #include <vm/vm.h>
25 #include <vm/vm_page.h>
26 #include <vm/VMCache.h>
27 
28 #include "IORequest.h"
29 
30 
31 //#define TRACE_FILE_CACHE
32 #ifdef TRACE_FILE_CACHE
33 #	define TRACE(x) dprintf x
34 #else
35 #	define TRACE(x) ;
36 #endif
37 
38 // maximum number of iovecs per request
39 #define MAX_IO_VECS			32	// 128 kB
40 
41 #define BYPASS_IO_SIZE		65536
42 #define LAST_ACCESSES		3
43 
44 struct file_cache_ref {
45 	VMCache			*cache;
46 	struct vnode	*vnode;
47 	off_t			last_access[LAST_ACCESSES];
48 		// TODO: it would probably be enough to only store the least
49 		//	significant 31 bits, and make this uint32 (one bit for
50 		//	write vs. read)
51 	int32			last_access_index;
52 	uint16			disabled_count;
53 
54 	inline void SetLastAccess(int32 index, off_t access, bool isWrite)
55 	{
56 		// we remember writes as negative offsets
57 		last_access[index] = isWrite ? -access : access;
58 	}
59 
60 	inline off_t LastAccess(int32 index, bool isWrite) const
61 	{
62 		return isWrite ? -last_access[index] : last_access[index];
63 	}
64 
65 	inline uint32 LastAccessPageOffset(int32 index, bool isWrite)
66 	{
67 		return LastAccess(index, isWrite) >> PAGE_SHIFT;
68 	}
69 };
70 
71 class PrecacheIO : public AsyncIOCallback {
72 public:
73 								PrecacheIO(file_cache_ref* ref, off_t offset,
74 									generic_size_t size);
75 								~PrecacheIO();
76 
77 			status_t			Prepare(vm_page_reservation* reservation);
78 			void				ReadAsync();
79 
80 	virtual	void				IOFinished(status_t status,
81 									bool partialTransfer,
82 									generic_size_t bytesTransferred);
83 
84 private:
85 			file_cache_ref*		fRef;
86 			VMCache*			fCache;
87 			vm_page**			fPages;
88 			size_t				fPageCount;
89 			ConditionVariable*	fBusyConditions;
90 			generic_io_vec*		fVecs;
91 			off_t				fOffset;
92 			uint32				fVecCount;
93 			generic_size_t		fSize;
94 #if DEBUG_PAGE_ACCESS
95 			thread_id			fAllocatingThread;
96 #endif
97 };
98 
99 typedef status_t (*cache_func)(file_cache_ref* ref, void* cookie, off_t offset,
100 	int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer,
101 	vm_page_reservation* reservation, size_t reservePages);
102 
103 static void add_to_iovec(generic_io_vec* vecs, uint32 &index, uint32 max,
104 	generic_addr_t address, generic_size_t size);
105 
106 
107 static struct cache_module_info* sCacheModule;
108 
109 
110 static const uint32 kZeroVecCount = 32;
111 static const size_t kZeroVecSize = kZeroVecCount * B_PAGE_SIZE;
112 static phys_addr_t sZeroPage;
113 static generic_io_vec sZeroVecs[kZeroVecCount];
114 
115 
116 //	#pragma mark -
117 
118 
119 PrecacheIO::PrecacheIO(file_cache_ref* ref, off_t offset, generic_size_t size)
120 	:
121 	fRef(ref),
122 	fCache(ref->cache),
123 	fPages(NULL),
124 	fVecs(NULL),
125 	fOffset(offset),
126 	fVecCount(0),
127 	fSize(size)
128 {
129 	fPageCount = (size + B_PAGE_SIZE - 1) / B_PAGE_SIZE;
130 	fCache->AcquireRefLocked();
131 }
132 
133 
134 PrecacheIO::~PrecacheIO()
135 {
136 	delete[] fPages;
137 	delete[] fVecs;
138 	fCache->ReleaseRefLocked();
139 }
140 
141 
142 status_t
143 PrecacheIO::Prepare(vm_page_reservation* reservation)
144 {
145 	if (fPageCount == 0)
146 		return B_BAD_VALUE;
147 
148 	fPages = new(std::nothrow) vm_page*[fPageCount];
149 	if (fPages == NULL)
150 		return B_NO_MEMORY;
151 
152 	fVecs = new(std::nothrow) generic_io_vec[fPageCount];
153 	if (fVecs == NULL)
154 		return B_NO_MEMORY;
155 
156 	// allocate pages for the cache and mark them busy
157 	uint32 i = 0;
158 	for (generic_size_t pos = 0; pos < fSize; pos += B_PAGE_SIZE) {
159 		vm_page* page = vm_page_allocate_page(reservation,
160 			PAGE_STATE_CACHED | VM_PAGE_ALLOC_BUSY);
161 
162 		fCache->InsertPage(page, fOffset + pos);
163 
164 		add_to_iovec(fVecs, fVecCount, fPageCount,
165 			page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE);
166 		fPages[i++] = page;
167 	}
168 
169 #if DEBUG_PAGE_ACCESS
170 	fAllocatingThread = find_thread(NULL);
171 #endif
172 
173 	return B_OK;
174 }
175 
176 
177 void
178 PrecacheIO::ReadAsync()
179 {
180 	// This object is going to be deleted after the I/O request has been
181 	// fulfilled
182 	vfs_asynchronous_read_pages(fRef->vnode, NULL, fOffset, fVecs, fVecCount,
183 		fSize, B_PHYSICAL_IO_REQUEST, this);
184 }
185 
186 
187 void
188 PrecacheIO::IOFinished(status_t status, bool partialTransfer,
189 	generic_size_t bytesTransferred)
190 {
191 	AutoLocker<VMCache> locker(fCache);
192 
193 	// Make successfully loaded pages accessible again (partially
194 	// transferred pages are considered failed)
195 	phys_size_t pagesTransferred
196 		= (bytesTransferred + B_PAGE_SIZE - 1) / B_PAGE_SIZE;
197 
198 	if (fOffset + (off_t)bytesTransferred > fCache->virtual_end)
199 		bytesTransferred = fCache->virtual_end - fOffset;
200 
201 	for (uint32 i = 0; i < pagesTransferred; i++) {
202 		if (i == pagesTransferred - 1
203 			&& (bytesTransferred % B_PAGE_SIZE) != 0) {
204 			// clear partial page
205 			size_t bytesTouched = bytesTransferred % B_PAGE_SIZE;
206 			vm_memset_physical(
207 				((phys_addr_t)fPages[i]->physical_page_number << PAGE_SHIFT)
208 					+ bytesTouched,
209 				0, B_PAGE_SIZE - bytesTouched);
210 		}
211 
212 		DEBUG_PAGE_ACCESS_TRANSFER(fPages[i], fAllocatingThread);
213 
214 		fCache->MarkPageUnbusy(fPages[i]);
215 
216 		DEBUG_PAGE_ACCESS_END(fPages[i]);
217 	}
218 
219 	// Free pages after failed I/O
220 	for (uint32 i = pagesTransferred; i < fPageCount; i++) {
221 		DEBUG_PAGE_ACCESS_TRANSFER(fPages[i], fAllocatingThread);
222 		fCache->NotifyPageEvents(fPages[i], PAGE_EVENT_NOT_BUSY);
223 		fCache->RemovePage(fPages[i]);
224 		vm_page_set_state(fPages[i], PAGE_STATE_FREE);
225 	}
226 
227 	delete this;
228 }
229 
230 
231 //	#pragma mark -
232 
233 
234 static void
235 add_to_iovec(generic_io_vec* vecs, uint32 &index, uint32 max,
236 	generic_addr_t address, generic_size_t size)
237 {
238 	if (index > 0 && vecs[index - 1].base + vecs[index - 1].length == address) {
239 		// the iovec can be combined with the previous one
240 		vecs[index - 1].length += size;
241 		return;
242 	}
243 
244 	if (index == max)
245 		panic("no more space for iovecs!");
246 
247 	// we need to start a new iovec
248 	vecs[index].base = address;
249 	vecs[index].length = size;
250 	index++;
251 }
252 
253 
254 static inline bool
255 access_is_sequential(file_cache_ref* ref)
256 {
257 	return ref->last_access[ref->last_access_index] != 0;
258 }
259 
260 
261 static inline void
262 push_access(file_cache_ref* ref, off_t offset, generic_size_t bytes,
263 	bool isWrite)
264 {
265 	TRACE(("%p: push %lld, %ld, %s\n", ref, offset, bytes,
266 		isWrite ? "write" : "read"));
267 
268 	int32 index = ref->last_access_index;
269 	int32 previous = index - 1;
270 	if (previous < 0)
271 		previous = LAST_ACCESSES - 1;
272 
273 	if (offset != ref->LastAccess(previous, isWrite))
274 		ref->last_access[previous] = 0;
275 
276 	ref->SetLastAccess(index, offset + bytes, isWrite);
277 
278 	if (++index >= LAST_ACCESSES)
279 		index = 0;
280 	ref->last_access_index = index;
281 }
282 
283 
284 static void
285 reserve_pages(file_cache_ref* ref, vm_page_reservation* reservation,
286 	size_t reservePages, bool isWrite)
287 {
288 	if (low_resource_state(B_KERNEL_RESOURCE_PAGES) != B_NO_LOW_RESOURCE) {
289 		VMCache* cache = ref->cache;
290 		cache->Lock();
291 
292 		if (cache->consumers.IsEmpty() && cache->areas == NULL
293 			&& access_is_sequential(ref)) {
294 			// we are not mapped, and we're accessed sequentially
295 
296 			if (isWrite) {
297 				// Just write some pages back, and actually wait until they
298 				// have been written back in order to relieve the page pressure
299 				// a bit.
300 				int32 index = ref->last_access_index;
301 				int32 previous = index - 1;
302 				if (previous < 0)
303 					previous = LAST_ACCESSES - 1;
304 
305 				vm_page_write_modified_page_range(cache,
306 					ref->LastAccessPageOffset(previous, true),
307 					ref->LastAccessPageOffset(index, true));
308 			} else {
309 				// free some pages from our cache
310 				// TODO: start with oldest
311 				uint32 left = reservePages;
312 				vm_page* page;
313 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
314 						(page = it.Next()) != NULL && left > 0;) {
315 					if (page->State() == PAGE_STATE_CACHED && !page->busy) {
316 						DEBUG_PAGE_ACCESS_START(page);
317 						ASSERT(!page->IsMapped());
318 						ASSERT(!page->modified);
319 						cache->RemovePage(page);
320 						vm_page_set_state(page, PAGE_STATE_FREE);
321 						left--;
322 					}
323 				}
324 			}
325 		}
326 		cache->Unlock();
327 	}
328 
329 	vm_page_reserve_pages(reservation, reservePages, VM_PRIORITY_USER);
330 }
331 
332 
333 static inline status_t
334 read_pages_and_clear_partial(file_cache_ref* ref, void* cookie, off_t offset,
335 	const generic_io_vec* vecs, size_t count, uint32 flags,
336 	generic_size_t* _numBytes)
337 {
338 	generic_size_t bytesUntouched = *_numBytes;
339 
340 	status_t status = vfs_read_pages(ref->vnode, cookie, offset, vecs, count,
341 		flags, _numBytes);
342 
343 	generic_size_t bytesEnd = *_numBytes;
344 
345 	if (offset + (off_t)bytesEnd > ref->cache->virtual_end)
346 		bytesEnd = ref->cache->virtual_end - offset;
347 
348 	if (status == B_OK && bytesEnd < bytesUntouched) {
349 		// Clear out any leftovers that were not touched by the above read.
350 		// We're doing this here so that not every file system/device has to
351 		// implement this.
352 		bytesUntouched -= bytesEnd;
353 
354 		for (int32 i = count; i-- > 0 && bytesUntouched != 0; ) {
355 			generic_size_t length = min_c(bytesUntouched, vecs[i].length);
356 			vm_memset_physical(vecs[i].base + vecs[i].length - length, 0,
357 				length);
358 
359 			bytesUntouched -= length;
360 		}
361 	}
362 
363 	return status;
364 }
365 
366 
367 /*!	Reads the requested amount of data into the cache, and allocates
368 	pages needed to fulfill that request. This function is called by cache_io().
369 	It can only handle a certain amount of bytes, and the caller must make
370 	sure that it matches that criterion.
371 	The cache_ref lock must be held when calling this function; during
372 	operation it will unlock the cache, though.
373 */
374 static status_t
375 read_into_cache(file_cache_ref* ref, void* cookie, off_t offset,
376 	int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer,
377 	vm_page_reservation* reservation, size_t reservePages)
378 {
379 	TRACE(("read_into_cache(offset = %lld, pageOffset = %ld, buffer = %#lx, "
380 		"bufferSize = %lu\n", offset, pageOffset, buffer, bufferSize));
381 
382 	VMCache* cache = ref->cache;
383 
384 	// TODO: We're using way too much stack! Rather allocate a sufficiently
385 	// large chunk on the heap.
386 	generic_io_vec vecs[MAX_IO_VECS];
387 	uint32 vecCount = 0;
388 
389 	generic_size_t numBytes = PAGE_ALIGN(pageOffset + bufferSize);
390 	vm_page* pages[MAX_IO_VECS];
391 	int32 pageIndex = 0;
392 
393 	// allocate pages for the cache and mark them busy
394 	for (generic_size_t pos = 0; pos < numBytes; pos += B_PAGE_SIZE) {
395 		vm_page* page = pages[pageIndex++] = vm_page_allocate_page(
396 			reservation, PAGE_STATE_CACHED | VM_PAGE_ALLOC_BUSY);
397 
398 		cache->InsertPage(page, offset + pos);
399 
400 		add_to_iovec(vecs, vecCount, MAX_IO_VECS,
401 			page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE);
402 			// TODO: check if the array is large enough (currently panics)!
403 	}
404 
405 	push_access(ref, offset, bufferSize, false);
406 	cache->Unlock();
407 	vm_page_unreserve_pages(reservation);
408 
409 	// read file into reserved pages
410 	status_t status = read_pages_and_clear_partial(ref, cookie, offset, vecs,
411 		vecCount, B_PHYSICAL_IO_REQUEST, &numBytes);
412 	if (status != B_OK) {
413 		// reading failed, free allocated pages
414 
415 		dprintf("file_cache: read pages failed: %s\n", strerror(status));
416 
417 		cache->Lock();
418 
419 		for (int32 i = 0; i < pageIndex; i++) {
420 			cache->NotifyPageEvents(pages[i], PAGE_EVENT_NOT_BUSY);
421 			cache->RemovePage(pages[i]);
422 			vm_page_set_state(pages[i], PAGE_STATE_FREE);
423 		}
424 
425 		return status;
426 	}
427 
428 	// copy the pages if needed and unmap them again
429 
430 	for (int32 i = 0; i < pageIndex; i++) {
431 		if (useBuffer && bufferSize != 0) {
432 			size_t bytes = min_c(bufferSize, (size_t)B_PAGE_SIZE - pageOffset);
433 
434 			vm_memcpy_from_physical((void*)buffer,
435 				pages[i]->physical_page_number * B_PAGE_SIZE + pageOffset,
436 				bytes, IS_USER_ADDRESS(buffer));
437 
438 			buffer += bytes;
439 			bufferSize -= bytes;
440 			pageOffset = 0;
441 		}
442 	}
443 
444 	reserve_pages(ref, reservation, reservePages, false);
445 	cache->Lock();
446 
447 	// make the pages accessible in the cache
448 	for (int32 i = pageIndex; i-- > 0;) {
449 		DEBUG_PAGE_ACCESS_END(pages[i]);
450 
451 		cache->MarkPageUnbusy(pages[i]);
452 	}
453 
454 	return B_OK;
455 }
456 
457 
458 static status_t
459 read_from_file(file_cache_ref* ref, void* cookie, off_t offset,
460 	int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer,
461 	vm_page_reservation* reservation, size_t reservePages)
462 {
463 	TRACE(("read_from_file(offset = %lld, pageOffset = %ld, buffer = %#lx, "
464 		"bufferSize = %lu\n", offset, pageOffset, buffer, bufferSize));
465 
466 	if (!useBuffer)
467 		return B_OK;
468 
469 	generic_io_vec vec;
470 	vec.base = buffer;
471 	vec.length = bufferSize;
472 
473 	push_access(ref, offset, bufferSize, false);
474 	ref->cache->Unlock();
475 	vm_page_unreserve_pages(reservation);
476 
477 	generic_size_t toRead = bufferSize;
478 	status_t status = vfs_read_pages(ref->vnode, cookie, offset + pageOffset,
479 		&vec, 1, 0, &toRead);
480 
481 	if (status == B_OK)
482 		reserve_pages(ref, reservation, reservePages, false);
483 
484 	ref->cache->Lock();
485 
486 	return status;
487 }
488 
489 
490 /*!	Like read_into_cache() but writes data into the cache.
491 	To preserve data consistency, it might also read pages into the cache,
492 	though, if only a partial page gets written.
493 	The same restrictions apply.
494 */
495 static status_t
496 write_to_cache(file_cache_ref* ref, void* cookie, off_t offset,
497 	int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer,
498 	vm_page_reservation* reservation, size_t reservePages)
499 {
500 	// TODO: We're using way too much stack! Rather allocate a sufficiently
501 	// large chunk on the heap.
502 	generic_io_vec vecs[MAX_IO_VECS];
503 	uint32 vecCount = 0;
504 	generic_size_t numBytes = PAGE_ALIGN(pageOffset + bufferSize);
505 	vm_page* pages[MAX_IO_VECS];
506 	int32 pageIndex = 0;
507 	status_t status = B_OK;
508 
509 	// ToDo: this should be settable somewhere
510 	bool writeThrough = false;
511 
512 	// allocate pages for the cache and mark them busy
513 	for (generic_size_t pos = 0; pos < numBytes; pos += B_PAGE_SIZE) {
514 		// TODO: if space is becoming tight, and this cache is already grown
515 		//	big - shouldn't we better steal the pages directly in that case?
516 		//	(a working set like approach for the file cache)
517 		// TODO: the pages we allocate here should have been reserved upfront
518 		//	in cache_io()
519 		vm_page* page = pages[pageIndex++] = vm_page_allocate_page(
520 			reservation,
521 			(writeThrough ? PAGE_STATE_CACHED : PAGE_STATE_MODIFIED)
522 				| VM_PAGE_ALLOC_BUSY);
523 
524 		page->modified = !writeThrough;
525 
526 		ref->cache->InsertPage(page, offset + pos);
527 
528 		add_to_iovec(vecs, vecCount, MAX_IO_VECS,
529 			page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE);
530 	}
531 
532 	push_access(ref, offset, bufferSize, true);
533 	ref->cache->Unlock();
534 	vm_page_unreserve_pages(reservation);
535 
536 	// copy contents (and read in partially written pages first)
537 
538 	if (pageOffset != 0) {
539 		// This is only a partial write, so we have to read the rest of the page
540 		// from the file to have consistent data in the cache
541 		generic_io_vec readVec = { vecs[0].base, B_PAGE_SIZE };
542 		generic_size_t bytesRead = B_PAGE_SIZE;
543 
544 		status = vfs_read_pages(ref->vnode, cookie, offset, &readVec, 1,
545 			B_PHYSICAL_IO_REQUEST, &bytesRead);
546 		// ToDo: handle errors for real!
547 		if (status < B_OK)
548 			panic("1. vfs_read_pages() failed: %s!\n", strerror(status));
549 	}
550 
551 	size_t lastPageOffset = (pageOffset + bufferSize) % B_PAGE_SIZE;
552 	if (lastPageOffset != 0) {
553 		// get the last page in the I/O vectors
554 		generic_addr_t last = vecs[vecCount - 1].base
555 			+ vecs[vecCount - 1].length - B_PAGE_SIZE;
556 
557 		if ((off_t)(offset + pageOffset + bufferSize) == ref->cache->virtual_end) {
558 			// the space in the page after this write action needs to be cleaned
559 			vm_memset_physical(last + lastPageOffset, 0,
560 				B_PAGE_SIZE - lastPageOffset);
561 		} else {
562 			// the end of this write does not happen on a page boundary, so we
563 			// need to fetch the last page before we can update it
564 			generic_io_vec readVec = { last, B_PAGE_SIZE };
565 			generic_size_t bytesRead = B_PAGE_SIZE;
566 
567 			status = vfs_read_pages(ref->vnode, cookie,
568 				PAGE_ALIGN(offset + pageOffset + bufferSize) - B_PAGE_SIZE,
569 				&readVec, 1, B_PHYSICAL_IO_REQUEST, &bytesRead);
570 			// ToDo: handle errors for real!
571 			if (status < B_OK)
572 				panic("vfs_read_pages() failed: %s!\n", strerror(status));
573 
574 			if (bytesRead < B_PAGE_SIZE) {
575 				// the space beyond the file size needs to be cleaned
576 				vm_memset_physical(last + bytesRead, 0,
577 					B_PAGE_SIZE - bytesRead);
578 			}
579 		}
580 	}
581 
582 	for (uint32 i = 0; i < vecCount; i++) {
583 		generic_addr_t base = vecs[i].base;
584 		generic_size_t bytes = min_c((generic_size_t)bufferSize,
585 			generic_size_t(vecs[i].length - pageOffset));
586 
587 		if (useBuffer) {
588 			// copy data from user buffer
589 			vm_memcpy_to_physical(base + pageOffset, (void*)buffer, bytes,
590 				IS_USER_ADDRESS(buffer));
591 		} else {
592 			// clear buffer instead
593 			vm_memset_physical(base + pageOffset, 0, bytes);
594 		}
595 
596 		bufferSize -= bytes;
597 		if (bufferSize == 0)
598 			break;
599 
600 		buffer += bytes;
601 		pageOffset = 0;
602 	}
603 
604 	if (writeThrough) {
605 		// write cached pages back to the file if we were asked to do that
606 		status_t status = vfs_write_pages(ref->vnode, cookie, offset, vecs,
607 			vecCount, B_PHYSICAL_IO_REQUEST, &numBytes);
608 		if (status < B_OK) {
609 			// ToDo: remove allocated pages, ...?
610 			panic("file_cache: remove allocated pages! write pages failed: %s\n",
611 				strerror(status));
612 		}
613 	}
614 
615 	if (status == B_OK)
616 		reserve_pages(ref, reservation, reservePages, true);
617 
618 	ref->cache->Lock();
619 
620 	// make the pages accessible in the cache
621 	for (int32 i = pageIndex; i-- > 0;) {
622 		ref->cache->MarkPageUnbusy(pages[i]);
623 
624 		DEBUG_PAGE_ACCESS_END(pages[i]);
625 	}
626 
627 	return status;
628 }
629 
630 
631 static status_t
632 write_zeros_to_file(struct vnode* vnode, void* cookie, off_t offset,
633 	size_t* _size)
634 {
635 	size_t size = *_size;
636 	status_t status = B_OK;
637 	while (size > 0) {
638 		generic_size_t length = min_c(size, kZeroVecSize);
639 		generic_io_vec* vecs = sZeroVecs;
640 		generic_io_vec vec;
641 		size_t count = kZeroVecCount;
642 		if (length != kZeroVecSize) {
643 			if (length > B_PAGE_SIZE) {
644 				length = ROUNDDOWN(length, B_PAGE_SIZE);
645 				count = length / B_PAGE_SIZE;
646 			} else {
647 				vec.base = sZeroPage;
648 				vec.length = length;
649 				vecs = &vec;
650 				count = 1;
651 			}
652 		}
653 
654 		status = vfs_write_pages(vnode, cookie, offset,
655 			vecs, count, B_PHYSICAL_IO_REQUEST, &length);
656 		if (status != B_OK || length == 0)
657 			break;
658 
659 		offset += length;
660 		size -= length;
661 	}
662 
663 	*_size = *_size - size;
664 	return status;
665 }
666 
667 
668 static status_t
669 write_to_file(file_cache_ref* ref, void* cookie, off_t offset, int32 pageOffset,
670 	addr_t buffer, size_t bufferSize, bool useBuffer,
671 	vm_page_reservation* reservation, size_t reservePages)
672 {
673 	push_access(ref, offset, bufferSize, true);
674 	ref->cache->Unlock();
675 	vm_page_unreserve_pages(reservation);
676 
677 	status_t status = B_OK;
678 
679 	if (!useBuffer) {
680 		status = write_zeros_to_file(ref->vnode, cookie, offset + pageOffset,
681 			&bufferSize);
682 	} else {
683 		generic_io_vec vec;
684 		vec.base = buffer;
685 		vec.length = bufferSize;
686 		generic_size_t toWrite = bufferSize;
687 		status = vfs_write_pages(ref->vnode, cookie, offset + pageOffset,
688 			&vec, 1, 0, &toWrite);
689 	}
690 
691 	if (status == B_OK)
692 		reserve_pages(ref, reservation, reservePages, true);
693 
694 	ref->cache->Lock();
695 
696 	return status;
697 }
698 
699 
700 static inline status_t
701 satisfy_cache_io(file_cache_ref* ref, void* cookie, cache_func function,
702 	off_t offset, addr_t buffer, bool useBuffer, int32 &pageOffset,
703 	size_t bytesLeft, size_t &reservePages, off_t &lastOffset,
704 	addr_t &lastBuffer, int32 &lastPageOffset, size_t &lastLeft,
705 	size_t &lastReservedPages, vm_page_reservation* reservation)
706 {
707 	if (lastBuffer == buffer)
708 		return B_OK;
709 
710 	size_t requestSize = buffer - lastBuffer;
711 	reservePages = min_c(MAX_IO_VECS, (lastLeft - requestSize
712 		+ lastPageOffset + B_PAGE_SIZE - 1) >> PAGE_SHIFT);
713 
714 	status_t status = function(ref, cookie, lastOffset, lastPageOffset,
715 		lastBuffer, requestSize, useBuffer, reservation, reservePages);
716 	if (status == B_OK) {
717 		lastReservedPages = reservePages;
718 		lastBuffer = buffer;
719 		lastLeft = bytesLeft;
720 		lastOffset = offset;
721 		lastPageOffset = 0;
722 		pageOffset = 0;
723 	}
724 	return status;
725 }
726 
727 
728 static status_t
729 cache_io(void* _cacheRef, void* cookie, off_t offset, addr_t buffer,
730 	size_t* _size, bool doWrite)
731 {
732 	if (_cacheRef == NULL)
733 		panic("cache_io() called with NULL ref!\n");
734 
735 	file_cache_ref* ref = (file_cache_ref*)_cacheRef;
736 	VMCache* cache = ref->cache;
737 	bool useBuffer = buffer != 0;
738 
739 	TRACE(("cache_io(ref = %p, offset = %lld, buffer = %p, size = %lu, %s)\n",
740 		ref, offset, (void*)buffer, *_size, doWrite ? "write" : "read"));
741 
742 	int32 pageOffset = offset & (B_PAGE_SIZE - 1);
743 	size_t size = *_size;
744 	offset -= pageOffset;
745 
746 	// "offset" and "lastOffset" are always aligned to B_PAGE_SIZE,
747 	// the "last*" variables always point to the end of the last
748 	// satisfied request part
749 
750 	const uint32 kMaxChunkSize = MAX_IO_VECS * B_PAGE_SIZE;
751 	size_t bytesLeft = size, lastLeft = size;
752 	int32 lastPageOffset = pageOffset;
753 	addr_t lastBuffer = buffer;
754 	off_t lastOffset = offset;
755 	size_t lastReservedPages = min_c(MAX_IO_VECS, (pageOffset + bytesLeft
756 		+ B_PAGE_SIZE - 1) >> PAGE_SHIFT);
757 	size_t reservePages = 0;
758 	size_t pagesProcessed = 0;
759 	cache_func function = NULL;
760 
761 	vm_page_reservation reservation;
762 	reserve_pages(ref, &reservation, lastReservedPages, doWrite);
763 
764 	AutoLocker<VMCache> locker(cache);
765 
766 	while (bytesLeft > 0) {
767 		// Periodically reevaluate the low memory situation and select the
768 		// read/write hook accordingly
769 		if (pagesProcessed % 32 == 0) {
770 			if (size >= BYPASS_IO_SIZE
771 				&& low_resource_state(B_KERNEL_RESOURCE_PAGES)
772 					!= B_NO_LOW_RESOURCE) {
773 				// In low memory situations we bypass the cache beyond a
774 				// certain I/O size.
775 				function = doWrite ? write_to_file : read_from_file;
776 			} else
777 				function = doWrite ? write_to_cache : read_into_cache;
778 		}
779 
780 		// check if this page is already in memory
781 		vm_page* page = cache->LookupPage(offset);
782 		if (page != NULL) {
783 			// The page may be busy - since we need to unlock the cache sometime
784 			// in the near future, we need to satisfy the request of the pages
785 			// we didn't get yet (to make sure no one else interferes in the
786 			// meantime).
787 			status_t status = satisfy_cache_io(ref, cookie, function, offset,
788 				buffer, useBuffer, pageOffset, bytesLeft, reservePages,
789 				lastOffset, lastBuffer, lastPageOffset, lastLeft,
790 				lastReservedPages, &reservation);
791 			if (status != B_OK)
792 				return status;
793 
794 			// Since satisfy_cache_io() unlocks the cache, we need to look up
795 			// the page again.
796 			page = cache->LookupPage(offset);
797 			if (page != NULL && page->busy) {
798 				cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, true);
799 				continue;
800 			}
801 		}
802 
803 		size_t bytesInPage = min_c(size_t(B_PAGE_SIZE - pageOffset), bytesLeft);
804 
805 		TRACE(("lookup page from offset %lld: %p, size = %lu, pageOffset "
806 			"= %lu\n", offset, page, bytesLeft, pageOffset));
807 
808 		if (page != NULL) {
809 			if (doWrite || useBuffer) {
810 				// Since the following user_mem{cpy,set}() might cause a page
811 				// fault, which in turn might cause pages to be reserved, we
812 				// need to unlock the cache temporarily to avoid a potential
813 				// deadlock. To make sure that our page doesn't go away, we mark
814 				// it busy for the time.
815 				page->busy = true;
816 				locker.Unlock();
817 
818 				// copy the contents of the page already in memory
819 				phys_addr_t pageAddress
820 					= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
821 						+ pageOffset;
822 				bool userBuffer = IS_USER_ADDRESS(buffer);
823 				if (doWrite) {
824 					if (useBuffer) {
825 						vm_memcpy_to_physical(pageAddress, (void*)buffer,
826 							bytesInPage, userBuffer);
827 					} else {
828 						vm_memset_physical(pageAddress, 0, bytesInPage);
829 					}
830 				} else if (useBuffer) {
831 					vm_memcpy_from_physical((void*)buffer, pageAddress,
832 						bytesInPage, userBuffer);
833 				}
834 
835 				locker.Lock();
836 
837 				if (doWrite) {
838 					DEBUG_PAGE_ACCESS_START(page);
839 
840 					page->modified = true;
841 
842 					if (page->State() != PAGE_STATE_MODIFIED)
843 						vm_page_set_state(page, PAGE_STATE_MODIFIED);
844 
845 					DEBUG_PAGE_ACCESS_END(page);
846 				}
847 
848 				cache->MarkPageUnbusy(page);
849 			}
850 
851 			// If it is cached only, requeue the page, so the respective queue
852 			// roughly remains LRU first sorted.
853 			if (page->State() == PAGE_STATE_CACHED
854 					|| page->State() == PAGE_STATE_MODIFIED) {
855 				DEBUG_PAGE_ACCESS_START(page);
856 				vm_page_requeue(page, true);
857 				DEBUG_PAGE_ACCESS_END(page);
858 			}
859 
860 			if (bytesLeft <= bytesInPage) {
861 				// we've read the last page, so we're done!
862 				locker.Unlock();
863 				vm_page_unreserve_pages(&reservation);
864 				return B_OK;
865 			}
866 
867 			// prepare a potential gap request
868 			lastBuffer = buffer + bytesInPage;
869 			lastLeft = bytesLeft - bytesInPage;
870 			lastOffset = offset + B_PAGE_SIZE;
871 			lastPageOffset = 0;
872 		}
873 
874 		if (bytesLeft <= bytesInPage)
875 			break;
876 
877 		buffer += bytesInPage;
878 		bytesLeft -= bytesInPage;
879 		pageOffset = 0;
880 		offset += B_PAGE_SIZE;
881 		pagesProcessed++;
882 
883 		if (buffer - lastBuffer + lastPageOffset >= kMaxChunkSize) {
884 			status_t status = satisfy_cache_io(ref, cookie, function, offset,
885 				buffer, useBuffer, pageOffset, bytesLeft, reservePages,
886 				lastOffset, lastBuffer, lastPageOffset, lastLeft,
887 				lastReservedPages, &reservation);
888 			if (status != B_OK)
889 				return status;
890 		}
891 	}
892 
893 	// fill the last remaining bytes of the request (either write or read)
894 
895 	return function(ref, cookie, lastOffset, lastPageOffset, lastBuffer,
896 		lastLeft, useBuffer, &reservation, 0);
897 }
898 
899 
900 static status_t
901 file_cache_control(const char* subsystem, uint32 function, void* buffer,
902 	size_t bufferSize)
903 {
904 	switch (function) {
905 		case CACHE_CLEAR:
906 			// ToDo: clear the cache
907 			dprintf("cache_control: clear cache!\n");
908 			return B_OK;
909 
910 		case CACHE_SET_MODULE:
911 		{
912 			cache_module_info* module = sCacheModule;
913 
914 			// unset previous module
915 
916 			if (sCacheModule != NULL) {
917 				sCacheModule = NULL;
918 				snooze(100000);	// 0.1 secs
919 				put_module(module->info.name);
920 			}
921 
922 			// get new module, if any
923 
924 			if (buffer == NULL)
925 				return B_OK;
926 
927 			char name[B_FILE_NAME_LENGTH];
928 			if (!IS_USER_ADDRESS(buffer)
929 				|| user_strlcpy(name, (char*)buffer,
930 						B_FILE_NAME_LENGTH) < B_OK)
931 				return B_BAD_ADDRESS;
932 
933 			if (strncmp(name, CACHE_MODULES_NAME, strlen(CACHE_MODULES_NAME)))
934 				return B_BAD_VALUE;
935 
936 			dprintf("cache_control: set module %s!\n", name);
937 
938 			status_t status = get_module(name, (module_info**)&module);
939 			if (status == B_OK)
940 				sCacheModule = module;
941 
942 			return status;
943 		}
944 	}
945 
946 	return B_BAD_HANDLER;
947 }
948 
949 
950 //	#pragma mark - private kernel API
951 
952 
953 extern "C" void
954 cache_prefetch_vnode(struct vnode* vnode, off_t offset, size_t size)
955 {
956 	if (size == 0)
957 		return;
958 
959 	VMCache* cache;
960 	if (vfs_get_vnode_cache(vnode, &cache, false) != B_OK)
961 		return;
962 	if (cache->type != CACHE_TYPE_VNODE) {
963 		cache->ReleaseRef();
964 		return;
965 	}
966 
967 	file_cache_ref* ref = ((VMVnodeCache*)cache)->FileCacheRef();
968 	off_t fileSize = cache->virtual_end;
969 
970 	if ((off_t)(offset + size) > fileSize)
971 		size = fileSize - offset;
972 
973 	// "offset" and "size" are always aligned to B_PAGE_SIZE,
974 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
975 	size = ROUNDUP(size, B_PAGE_SIZE);
976 
977 	size_t reservePages = size / B_PAGE_SIZE;
978 
979 	// Don't do anything if we don't have the resources left, or the cache
980 	// already contains more than 2/3 of its pages
981 	if (offset >= fileSize || vm_page_num_unused_pages() < 2 * reservePages
982 		|| 3 * cache->page_count > 2 * fileSize / B_PAGE_SIZE) {
983 		cache->ReleaseRef();
984 		return;
985 	}
986 
987 	size_t bytesToRead = 0;
988 	off_t lastOffset = offset;
989 
990 	vm_page_reservation reservation;
991 	vm_page_reserve_pages(&reservation, reservePages, VM_PRIORITY_USER);
992 
993 	cache->Lock();
994 
995 	while (true) {
996 		// check if this page is already in memory
997 		if (size > 0) {
998 			vm_page* page = cache->LookupPage(offset);
999 
1000 			offset += B_PAGE_SIZE;
1001 			size -= B_PAGE_SIZE;
1002 
1003 			if (page == NULL) {
1004 				bytesToRead += B_PAGE_SIZE;
1005 				continue;
1006 			}
1007 		}
1008 		if (bytesToRead != 0) {
1009 			// read the part before the current page (or the end of the request)
1010 			PrecacheIO* io = new(std::nothrow) PrecacheIO(ref, lastOffset,
1011 				bytesToRead);
1012 			if (io == NULL || io->Prepare(&reservation) != B_OK) {
1013 				delete io;
1014 				break;
1015 			}
1016 
1017 			// we must not have the cache locked during I/O
1018 			cache->Unlock();
1019 			io->ReadAsync();
1020 			cache->Lock();
1021 
1022 			bytesToRead = 0;
1023 		}
1024 
1025 		if (size == 0) {
1026 			// we have reached the end of the request
1027 			break;
1028 		}
1029 
1030 		lastOffset = offset;
1031 	}
1032 
1033 	cache->ReleaseRefAndUnlock();
1034 	vm_page_unreserve_pages(&reservation);
1035 }
1036 
1037 
1038 extern "C" void
1039 cache_prefetch(dev_t mountID, ino_t vnodeID, off_t offset, size_t size)
1040 {
1041 	// ToDo: schedule prefetch
1042 
1043 	TRACE(("cache_prefetch(vnode %ld:%lld)\n", mountID, vnodeID));
1044 
1045 	// get the vnode for the object, this also grabs a ref to it
1046 	struct vnode* vnode;
1047 	if (vfs_get_vnode(mountID, vnodeID, true, &vnode) != B_OK)
1048 		return;
1049 
1050 	cache_prefetch_vnode(vnode, offset, size);
1051 	vfs_put_vnode(vnode);
1052 }
1053 
1054 
1055 extern "C" void
1056 cache_node_opened(struct vnode* vnode, int32 fdType, VMCache* cache,
1057 	dev_t mountID, ino_t parentID, ino_t vnodeID, const char* name)
1058 {
1059 	if (sCacheModule == NULL || sCacheModule->node_opened == NULL)
1060 		return;
1061 
1062 	off_t size = -1;
1063 	if (cache != NULL && cache->type == CACHE_TYPE_VNODE) {
1064 		file_cache_ref* ref = ((VMVnodeCache*)cache)->FileCacheRef();
1065 		if (ref != NULL)
1066 			size = cache->virtual_end;
1067 	}
1068 
1069 	sCacheModule->node_opened(vnode, fdType, mountID, parentID, vnodeID, name,
1070 		size);
1071 }
1072 
1073 
1074 extern "C" void
1075 cache_node_closed(struct vnode* vnode, int32 fdType, VMCache* cache,
1076 	dev_t mountID, ino_t vnodeID)
1077 {
1078 	if (sCacheModule == NULL || sCacheModule->node_closed == NULL)
1079 		return;
1080 
1081 	int32 accessType = 0;
1082 	if (cache != NULL && cache->type == CACHE_TYPE_VNODE) {
1083 		// ToDo: set accessType
1084 	}
1085 
1086 	sCacheModule->node_closed(vnode, fdType, mountID, vnodeID, accessType);
1087 }
1088 
1089 
1090 extern "C" void
1091 cache_node_launched(size_t argCount, char*  const* args)
1092 {
1093 	if (sCacheModule == NULL || sCacheModule->node_launched == NULL)
1094 		return;
1095 
1096 	sCacheModule->node_launched(argCount, args);
1097 }
1098 
1099 
1100 extern "C" status_t
1101 file_cache_init_post_boot_device(void)
1102 {
1103 	// ToDo: get cache module out of driver settings
1104 
1105 	if (get_module("file_cache/launch_speedup/v1",
1106 			(module_info**)&sCacheModule) == B_OK) {
1107 		dprintf("** opened launch speedup: %" B_PRId64 "\n", system_time());
1108 	}
1109 	return B_OK;
1110 }
1111 
1112 
1113 extern "C" status_t
1114 file_cache_init(void)
1115 {
1116 	// allocate a clean page we can use for writing zeroes
1117 	vm_page_reservation reservation;
1118 	vm_page_reserve_pages(&reservation, 1, VM_PRIORITY_SYSTEM);
1119 	vm_page* page = vm_page_allocate_page(&reservation,
1120 		PAGE_STATE_WIRED | VM_PAGE_ALLOC_CLEAR);
1121 	vm_page_unreserve_pages(&reservation);
1122 
1123 	sZeroPage = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1124 
1125 	for (uint32 i = 0; i < kZeroVecCount; i++) {
1126 		sZeroVecs[i].base = sZeroPage;
1127 		sZeroVecs[i].length = B_PAGE_SIZE;
1128 	}
1129 
1130 	register_generic_syscall(CACHE_SYSCALLS, file_cache_control, 1, 0);
1131 	return B_OK;
1132 }
1133 
1134 
1135 //	#pragma mark - public FS API
1136 
1137 
1138 extern "C" void*
1139 file_cache_create(dev_t mountID, ino_t vnodeID, off_t size)
1140 {
1141 	TRACE(("file_cache_create(mountID = %ld, vnodeID = %lld, size = %lld)\n",
1142 		mountID, vnodeID, size));
1143 
1144 	file_cache_ref* ref = new file_cache_ref;
1145 	if (ref == NULL)
1146 		return NULL;
1147 
1148 	memset(ref->last_access, 0, sizeof(ref->last_access));
1149 	ref->last_access_index = 0;
1150 	ref->disabled_count = 0;
1151 
1152 	// TODO: delay VMCache creation until data is
1153 	//	requested/written for the first time? Listing lots of
1154 	//	files in Tracker (and elsewhere) could be slowed down.
1155 	//	Since the file_cache_ref itself doesn't have a lock,
1156 	//	we would need to "rent" one during construction, possibly
1157 	//	the vnode lock, maybe a dedicated one.
1158 	//	As there shouldn't be too much contention, we could also
1159 	//	use atomic_test_and_set(), and free the resources again
1160 	//	when that fails...
1161 
1162 	// Get the vnode for the object
1163 	// (note, this does not grab a reference to the node)
1164 	if (vfs_lookup_vnode(mountID, vnodeID, &ref->vnode) != B_OK)
1165 		goto err1;
1166 
1167 	// Gets (usually creates) the cache for the node
1168 	if (vfs_get_vnode_cache(ref->vnode, &ref->cache, true) != B_OK)
1169 		goto err1;
1170 
1171 	ref->cache->virtual_end = size;
1172 	((VMVnodeCache*)ref->cache)->SetFileCacheRef(ref);
1173 	return ref;
1174 
1175 err1:
1176 	delete ref;
1177 	return NULL;
1178 }
1179 
1180 
1181 extern "C" void
1182 file_cache_delete(void* _cacheRef)
1183 {
1184 	file_cache_ref* ref = (file_cache_ref*)_cacheRef;
1185 
1186 	if (ref == NULL)
1187 		return;
1188 
1189 	TRACE(("file_cache_delete(ref = %p)\n", ref));
1190 
1191 	ref->cache->ReleaseRef();
1192 	delete ref;
1193 }
1194 
1195 
1196 extern "C" void
1197 file_cache_enable(void* _cacheRef)
1198 {
1199 	file_cache_ref* ref = (file_cache_ref*)_cacheRef;
1200 
1201 	AutoLocker<VMCache> _(ref->cache);
1202 
1203 	if (ref->disabled_count == 0) {
1204 		panic("Unbalanced file_cache_enable()!");
1205 		return;
1206 	}
1207 
1208 	ref->disabled_count--;
1209 }
1210 
1211 
1212 extern "C" status_t
1213 file_cache_disable(void* _cacheRef)
1214 {
1215 	// TODO: This function only removes all pages from the cache and prevents
1216 	// that the file cache functions add any new ones until re-enabled. The
1217 	// VM (on page fault) can still add pages, if the file is mmap()ed. We
1218 	// should mark the cache to prevent shared mappings of the file and fix
1219 	// the page fault code to deal correctly with private mappings (i.e. only
1220 	// insert pages in consumer caches).
1221 
1222 	file_cache_ref* ref = (file_cache_ref*)_cacheRef;
1223 
1224 	AutoLocker<VMCache> _(ref->cache);
1225 
1226 	// If already disabled, there's nothing to do for us.
1227 	if (ref->disabled_count > 0) {
1228 		ref->disabled_count++;
1229 		return B_OK;
1230 	}
1231 
1232 	// The file cache is not yet disabled. We need to evict all cached pages.
1233 	status_t error = ref->cache->FlushAndRemoveAllPages();
1234 	if (error != B_OK)
1235 		return error;
1236 
1237 	ref->disabled_count++;
1238 	return B_OK;
1239 }
1240 
1241 
1242 extern "C" bool
1243 file_cache_is_enabled(void* _cacheRef)
1244 {
1245 	file_cache_ref* ref = (file_cache_ref*)_cacheRef;
1246 	AutoLocker<VMCache> _(ref->cache);
1247 
1248 	return ref->disabled_count == 0;
1249 }
1250 
1251 
1252 extern "C" status_t
1253 file_cache_set_size(void* _cacheRef, off_t newSize)
1254 {
1255 	file_cache_ref* ref = (file_cache_ref*)_cacheRef;
1256 
1257 	TRACE(("file_cache_set_size(ref = %p, size = %lld)\n", ref, newSize));
1258 
1259 	if (ref == NULL)
1260 		return B_OK;
1261 
1262 	VMCache* cache = ref->cache;
1263 	AutoLocker<VMCache> _(cache);
1264 
1265 	off_t oldSize = cache->virtual_end;
1266 	status_t status = cache->Resize(newSize, VM_PRIORITY_USER);
1267 		// Note, the priority doesn't really matter, since this cache doesn't
1268 		// reserve any memory.
1269 	if (status == B_OK && newSize < oldSize) {
1270 		// We may have a new partial page at the end of the cache that must be
1271 		// cleared.
1272 		uint32 partialBytes = newSize % B_PAGE_SIZE;
1273 		if (partialBytes != 0) {
1274 			vm_page* page = cache->LookupPage(newSize - partialBytes);
1275 			if (page != NULL) {
1276 				vm_memset_physical(page->physical_page_number * B_PAGE_SIZE
1277 					+ partialBytes, 0, B_PAGE_SIZE - partialBytes);
1278 			}
1279 		}
1280 	}
1281 
1282 	return status;
1283 }
1284 
1285 
1286 extern "C" status_t
1287 file_cache_sync(void* _cacheRef)
1288 {
1289 	file_cache_ref* ref = (file_cache_ref*)_cacheRef;
1290 	if (ref == NULL)
1291 		return B_BAD_VALUE;
1292 
1293 	return ref->cache->WriteModified();
1294 }
1295 
1296 
1297 extern "C" status_t
1298 file_cache_read(void* _cacheRef, void* cookie, off_t offset, void* buffer,
1299 	size_t* _size)
1300 {
1301 	file_cache_ref* ref = (file_cache_ref*)_cacheRef;
1302 
1303 	TRACE(("file_cache_read(ref = %p, offset = %lld, buffer = %p, size = %lu)\n",
1304 		ref, offset, buffer, *_size));
1305 
1306 	// Bounds checking. We do this here so it applies to uncached I/O.
1307 	if (offset < 0)
1308 		return B_BAD_VALUE;
1309 	const off_t fileSize = ref->cache->virtual_end;
1310 	if (offset >= fileSize || *_size == 0) {
1311 		*_size = 0;
1312 		return B_OK;
1313 	}
1314 	if ((off_t)(offset + *_size) > fileSize)
1315 		*_size = fileSize - offset;
1316 
1317 	if (ref->disabled_count > 0) {
1318 		// Caching is disabled -- read directly from the file.
1319 		generic_io_vec vec;
1320 		vec.base = (addr_t)buffer;
1321 		generic_size_t size = vec.length = *_size;
1322 		status_t error = vfs_read_pages(ref->vnode, cookie, offset, &vec, 1, 0,
1323 			&size);
1324 		*_size = size;
1325 		return error;
1326 	}
1327 
1328 	return cache_io(ref, cookie, offset, (addr_t)buffer, _size, false);
1329 }
1330 
1331 
1332 extern "C" status_t
1333 file_cache_write(void* _cacheRef, void* cookie, off_t offset,
1334 	const void* buffer, size_t* _size)
1335 {
1336 	file_cache_ref* ref = (file_cache_ref*)_cacheRef;
1337 
1338 	// We don't do bounds checking here, as we are relying on the
1339 	// file system which called us to already have done that and made
1340 	// adjustments as necessary, unlike in read().
1341 
1342 	if (ref->disabled_count > 0) {
1343 		// Caching is disabled -- write directly to the file.
1344 		if (buffer != NULL) {
1345 			generic_io_vec vec;
1346 			vec.base = (addr_t)buffer;
1347 			generic_size_t size = vec.length = *_size;
1348 
1349 			status_t error = vfs_write_pages(ref->vnode, cookie, offset, &vec,
1350 				1, 0, &size);
1351 			*_size = size;
1352 			return error;
1353 		}
1354 		return write_zeros_to_file(ref->vnode, cookie, offset, _size);
1355 	}
1356 
1357 	status_t status = cache_io(ref, cookie, offset,
1358 		(addr_t)const_cast<void*>(buffer), _size, true);
1359 
1360 	TRACE(("file_cache_write(ref = %p, offset = %lld, buffer = %p, size = %lu)"
1361 		" = %ld\n", ref, offset, buffer, *_size, status));
1362 
1363 	return status;
1364 }
1365