xref: /haiku/src/system/kernel/vm/VMAnonymousCache.cpp (revision 125183f9e5c136781f71c879faaeab43fdc3ea7b)
1 /*
2  * Copyright 2008, Zhao Shuai, upczhsh@163.com.
3  * Copyright 2008-2010, Ingo Weinhold, ingo_weinhold@gmx.de.
4  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
5  * Distributed under the terms of the MIT License.
6  *
7  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
8  * Distributed under the terms of the NewOS License.
9  */
10 
11 
12 #include "VMAnonymousCache.h"
13 
14 #include <errno.h>
15 #include <fcntl.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <unistd.h>
19 
20 #include <KernelExport.h>
21 #include <NodeMonitor.h>
22 
23 #include <arch_config.h>
24 #include <boot_device.h>
25 #include <driver_settings.h>
26 #include <fs/fd.h>
27 #include <fs_interface.h>
28 #include <heap.h>
29 #include <kernel_daemon.h>
30 #include <slab/Slab.h>
31 #include <syscalls.h>
32 #include <system_info.h>
33 #include <tracing.h>
34 #include <util/AutoLock.h>
35 #include <util/DoublyLinkedList.h>
36 #include <util/OpenHashTable.h>
37 #include <util/RadixBitmap.h>
38 #include <vfs.h>
39 #include <vm/vm.h>
40 #include <vm/vm_page.h>
41 #include <vm/vm_priv.h>
42 #include <vm/VMAddressSpace.h>
43 
44 #include "IORequest.h"
45 
46 
47 #if	ENABLE_SWAP_SUPPORT
48 
49 //#define TRACE_VM_ANONYMOUS_CACHE
50 #ifdef TRACE_VM_ANONYMOUS_CACHE
51 #	define TRACE(x...) dprintf(x)
52 #else
53 #	define TRACE(x...) do { } while (false)
54 #endif
55 
56 
57 // number of free swap blocks the object cache shall minimally have
58 #define MIN_SWAP_BLOCK_RESERVE	4096
59 
60 // interval the has resizer is triggered (in 0.1s)
61 #define SWAP_HASH_RESIZE_INTERVAL	5
62 
63 #define INITIAL_SWAP_HASH_SIZE		1024
64 
65 #define SWAP_BLOCK_PAGES 32
66 #define SWAP_BLOCK_SHIFT 5		/* 1 << SWAP_BLOCK_SHIFT == SWAP_BLOCK_PAGES */
67 #define SWAP_BLOCK_MASK  (SWAP_BLOCK_PAGES - 1)
68 
69 
70 struct swap_file : DoublyLinkedListLinkImpl<swap_file> {
71 	int				fd;
72 	struct vnode*	vnode;
73 	void*			cookie;
74 	swap_addr_t		first_slot;
75 	swap_addr_t		last_slot;
76 	radix_bitmap*	bmp;
77 };
78 
79 struct swap_hash_key {
80 	VMAnonymousCache	*cache;
81 	off_t				page_index;  // page index in the cache
82 };
83 
84 // Each swap block contains swap address information for
85 // SWAP_BLOCK_PAGES continuous pages from the same cache
86 struct swap_block {
87 	swap_block*		hash_link;
88 	swap_hash_key	key;
89 	uint32			used;
90 	swap_addr_t		swap_slots[SWAP_BLOCK_PAGES];
91 };
92 
93 struct SwapHashTableDefinition {
94 	typedef swap_hash_key KeyType;
95 	typedef swap_block ValueType;
96 
97 	SwapHashTableDefinition() {}
98 
99 	size_t HashKey(const swap_hash_key& key) const
100 	{
101 		off_t blockIndex = key.page_index >> SWAP_BLOCK_SHIFT;
102 		VMAnonymousCache* cache = key.cache;
103 		return blockIndex ^ (size_t)(int*)cache;
104 	}
105 
106 	size_t Hash(const swap_block* value) const
107 	{
108 		return HashKey(value->key);
109 	}
110 
111 	bool Compare(const swap_hash_key& key, const swap_block* value) const
112 	{
113 		return (key.page_index & ~(off_t)SWAP_BLOCK_MASK)
114 				== (value->key.page_index & ~(off_t)SWAP_BLOCK_MASK)
115 			&& key.cache == value->key.cache;
116 	}
117 
118 	swap_block*& GetLink(swap_block* value) const
119 	{
120 		return value->hash_link;
121 	}
122 };
123 
124 typedef BOpenHashTable<SwapHashTableDefinition> SwapHashTable;
125 typedef DoublyLinkedList<swap_file> SwapFileList;
126 
127 static SwapHashTable sSwapHashTable;
128 static rw_lock sSwapHashLock;
129 
130 static SwapFileList sSwapFileList;
131 static mutex sSwapFileListLock;
132 static swap_file* sSwapFileAlloc = NULL; // allocate from here
133 static uint32 sSwapFileCount = 0;
134 
135 static off_t sAvailSwapSpace = 0;
136 static mutex sAvailSwapSpaceLock;
137 
138 static object_cache* sSwapBlockCache;
139 
140 
141 #if SWAP_TRACING
142 namespace SwapTracing {
143 
144 class SwapTraceEntry : public AbstractTraceEntry {
145 public:
146 	SwapTraceEntry(VMAnonymousCache* cache)
147 		:
148 		fCache(cache)
149 	{
150 	}
151 
152 protected:
153 	VMAnonymousCache*	fCache;
154 };
155 
156 
157 class ReadPage : public SwapTraceEntry {
158 public:
159 	ReadPage(VMAnonymousCache* cache, page_num_t pageIndex,
160 			swap_addr_t swapSlotIndex)
161 		:
162 		SwapTraceEntry(cache),
163 		fPageIndex(pageIndex),
164 		fSwapSlotIndex(swapSlotIndex)
165 	{
166 		Initialized();
167 	}
168 
169 	virtual void AddDump(TraceOutput& out)
170 	{
171 		out.Print("swap read:  cache %p, page index: %lu <- swap slot: %lu",
172 			fCache, fPageIndex, fSwapSlotIndex);
173 	}
174 
175 private:
176 	page_num_t		fPageIndex;
177 	swap_addr_t		fSwapSlotIndex;
178 };
179 
180 
181 class WritePage : public SwapTraceEntry {
182 public:
183 	WritePage(VMAnonymousCache* cache, page_num_t pageIndex,
184 			swap_addr_t swapSlotIndex)
185 		:
186 		SwapTraceEntry(cache),
187 		fPageIndex(pageIndex),
188 		fSwapSlotIndex(swapSlotIndex)
189 	{
190 		Initialized();
191 	}
192 
193 	virtual void AddDump(TraceOutput& out)
194 	{
195 		out.Print("swap write: cache %p, page index: %lu -> swap slot: %lu",
196 			fCache, fPageIndex, fSwapSlotIndex);
197 	}
198 
199 private:
200 	page_num_t		fPageIndex;
201 	swap_addr_t		fSwapSlotIndex;
202 };
203 
204 }	// namespace SwapTracing
205 
206 #	define T(x) new(std::nothrow) SwapTracing::x;
207 #else
208 #	define T(x) ;
209 #endif
210 
211 
212 static int
213 dump_swap_info(int argc, char** argv)
214 {
215 	swap_addr_t totalSwapPages = 0;
216 	swap_addr_t freeSwapPages = 0;
217 
218 	kprintf("swap files:\n");
219 
220 	for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
221 			swap_file* file = it.Next();) {
222 		swap_addr_t total = file->last_slot - file->first_slot;
223 		kprintf("  vnode: %p, pages: total: %lu, free: %lu\n",
224 			file->vnode, total, file->bmp->free_slots);
225 
226 		totalSwapPages += total;
227 		freeSwapPages += file->bmp->free_slots;
228 	}
229 
230 	kprintf("\n");
231 	kprintf("swap space in pages:\n");
232 	kprintf("total:     %9lu\n", totalSwapPages);
233 	kprintf("available: %9llu\n", sAvailSwapSpace / B_PAGE_SIZE);
234 	kprintf("reserved:  %9llu\n",
235 		totalSwapPages - sAvailSwapSpace / B_PAGE_SIZE);
236 	kprintf("used:      %9lu\n", totalSwapPages - freeSwapPages);
237 	kprintf("free:      %9lu\n", freeSwapPages);
238 
239 	return 0;
240 }
241 
242 
243 static swap_addr_t
244 swap_slot_alloc(uint32 count)
245 {
246 	mutex_lock(&sSwapFileListLock);
247 
248 	if (sSwapFileList.IsEmpty()) {
249 		mutex_unlock(&sSwapFileListLock);
250 		panic("swap_slot_alloc(): no swap file in the system\n");
251 		return SWAP_SLOT_NONE;
252 	}
253 
254 	// since radix bitmap could not handle more than 32 pages, we return
255 	// SWAP_SLOT_NONE, this forces Write() adjust allocation amount
256 	if (count > BITMAP_RADIX) {
257 		mutex_unlock(&sSwapFileListLock);
258 		return SWAP_SLOT_NONE;
259 	}
260 
261 	swap_addr_t j, addr = SWAP_SLOT_NONE;
262 	for (j = 0; j < sSwapFileCount; j++) {
263 		if (sSwapFileAlloc == NULL)
264 			sSwapFileAlloc = sSwapFileList.First();
265 
266 		addr = radix_bitmap_alloc(sSwapFileAlloc->bmp, count);
267 		if (addr != SWAP_SLOT_NONE) {
268 			addr += sSwapFileAlloc->first_slot;
269 			break;
270 		}
271 
272 		// this swap_file is full, find another
273 		sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc);
274 	}
275 
276 	if (j == sSwapFileCount) {
277 		mutex_unlock(&sSwapFileListLock);
278 		panic("swap_slot_alloc: swap space exhausted!\n");
279 		return SWAP_SLOT_NONE;
280 	}
281 
282 	// if this swap file has used more than 90% percent of its space
283 	// switch to another
284     if (sSwapFileAlloc->bmp->free_slots
285 			< (sSwapFileAlloc->last_slot - sSwapFileAlloc->first_slot) / 10)
286 		sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc);
287 
288 	mutex_unlock(&sSwapFileListLock);
289 
290 	return addr;
291 }
292 
293 
294 static swap_file*
295 find_swap_file(swap_addr_t slotIndex)
296 {
297 	for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
298 			swap_file* swapFile = it.Next();) {
299 		if (slotIndex >= swapFile->first_slot
300 				&& slotIndex < swapFile->last_slot)
301 			return swapFile;
302 	}
303 
304 	panic("find_swap_file(): can't find swap file for slot %ld\n", slotIndex);
305 	return NULL;
306 }
307 
308 
309 static void
310 swap_slot_dealloc(swap_addr_t slotIndex, uint32 count)
311 {
312 	if (slotIndex == SWAP_SLOT_NONE)
313 		return;
314 
315 	mutex_lock(&sSwapFileListLock);
316 	swap_file* swapFile = find_swap_file(slotIndex);
317 	slotIndex -= swapFile->first_slot;
318 	radix_bitmap_dealloc(swapFile->bmp, slotIndex, count);
319 	mutex_unlock(&sSwapFileListLock);
320 }
321 
322 
323 static off_t
324 swap_space_reserve(off_t amount)
325 {
326 	mutex_lock(&sAvailSwapSpaceLock);
327 	if (sAvailSwapSpace >= amount)
328 		sAvailSwapSpace -= amount;
329 	else {
330 		amount = sAvailSwapSpace;
331 		sAvailSwapSpace = 0;
332 	}
333 	mutex_unlock(&sAvailSwapSpaceLock);
334 
335 	return amount;
336 }
337 
338 
339 static void
340 swap_space_unreserve(off_t amount)
341 {
342 	mutex_lock(&sAvailSwapSpaceLock);
343 	sAvailSwapSpace += amount;
344 	mutex_unlock(&sAvailSwapSpaceLock);
345 }
346 
347 
348 static void
349 swap_hash_resizer(void*, int)
350 {
351 	WriteLocker locker(sSwapHashLock);
352 
353 	size_t size;
354 	void* allocation;
355 
356 	do {
357 		size = sSwapHashTable.ResizeNeeded();
358 		if (size == 0)
359 			return;
360 
361 		locker.Unlock();
362 
363 		allocation = malloc(size);
364 		if (allocation == NULL)
365 			return;
366 
367 		locker.Lock();
368 
369 	} while (!sSwapHashTable.Resize(allocation, size));
370 }
371 
372 
373 // #pragma mark -
374 
375 
376 class VMAnonymousCache::WriteCallback : public StackableAsyncIOCallback {
377 public:
378 	WriteCallback(VMAnonymousCache* cache, AsyncIOCallback* callback)
379 		:
380 		StackableAsyncIOCallback(callback),
381 		fCache(cache)
382 	{
383 	}
384 
385 	void SetTo(page_num_t pageIndex, swap_addr_t slotIndex, bool newSlot)
386 	{
387 		fPageIndex = pageIndex;
388 		fSlotIndex = slotIndex;
389 		fNewSlot = newSlot;
390 	}
391 
392 	virtual void IOFinished(status_t status, bool partialTransfer,
393 		size_t bytesTransferred)
394 	{
395 		if (fNewSlot) {
396 			if (status == B_OK) {
397 				fCache->_SwapBlockBuild(fPageIndex, fSlotIndex, 1);
398 			} else {
399 				AutoLocker<VMCache> locker(fCache);
400 				fCache->fAllocatedSwapSize -= B_PAGE_SIZE;
401 				locker.Unlock();
402 
403 				swap_slot_dealloc(fSlotIndex, 1);
404 			}
405 		}
406 
407 		fNextCallback->IOFinished(status, partialTransfer, bytesTransferred);
408 
409 		delete this;
410 	}
411 
412 private:
413 	VMAnonymousCache*	fCache;
414 	page_num_t			fPageIndex;
415 	swap_addr_t			fSlotIndex;
416 	bool				fNewSlot;
417 };
418 
419 
420 // #pragma mark -
421 
422 
423 VMAnonymousCache::~VMAnonymousCache()
424 {
425 	// free allocated swap space and swap block
426 	for (off_t offset = virtual_base, toFree = fAllocatedSwapSize;
427 			offset < virtual_end && toFree > 0; offset += B_PAGE_SIZE) {
428 		swap_addr_t slotIndex = _SwapBlockGetAddress(offset >> PAGE_SHIFT);
429 		if (slotIndex == SWAP_SLOT_NONE)
430 			continue;
431 
432 		swap_slot_dealloc(slotIndex, 1);
433 		_SwapBlockFree(offset >> PAGE_SHIFT, 1);
434 		toFree -= B_PAGE_SIZE;
435 	}
436 
437 	swap_space_unreserve(fCommittedSwapSize);
438 	if (committed_size > fCommittedSwapSize)
439 		vm_unreserve_memory(committed_size - fCommittedSwapSize);
440 }
441 
442 
443 status_t
444 VMAnonymousCache::Init(bool canOvercommit, int32 numPrecommittedPages,
445 	int32 numGuardPages, uint32 allocationFlags)
446 {
447 	TRACE("%p->VMAnonymousCache::Init(canOvercommit = %s, "
448 		"numPrecommittedPages = %ld, numGuardPages = %ld)\n", this,
449 		canOvercommit ? "yes" : "no", numPrecommittedPages, numGuardPages);
450 
451 	status_t error = VMCache::Init(CACHE_TYPE_RAM, allocationFlags);
452 	if (error != B_OK)
453 		return error;
454 
455 	fCanOvercommit = canOvercommit;
456 	fHasPrecommitted = false;
457 	fPrecommittedPages = min_c(numPrecommittedPages, 255);
458 	fGuardedSize = numGuardPages * B_PAGE_SIZE;
459 	fCommittedSwapSize = 0;
460 	fAllocatedSwapSize = 0;
461 
462 	return B_OK;
463 }
464 
465 
466 status_t
467 VMAnonymousCache::Resize(off_t newSize, int priority)
468 {
469 	// If the cache size shrinks, drop all swap pages beyond the new size.
470 	if (fAllocatedSwapSize > 0) {
471 		page_num_t oldPageCount = (virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT;
472 		swap_block* swapBlock = NULL;
473 
474 		for (page_num_t pageIndex = (newSize + B_PAGE_SIZE - 1) >> PAGE_SHIFT;
475 				pageIndex < oldPageCount && fAllocatedSwapSize > 0;
476 				pageIndex++) {
477 			WriteLocker locker(sSwapHashLock);
478 
479 			// Get the swap slot index for the page.
480 			swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
481 			if (swapBlock == NULL || blockIndex == 0) {
482 				swap_hash_key key = { this, pageIndex };
483 				swapBlock = sSwapHashTable.Lookup(key);
484 
485 				if (swapBlock == NULL) {
486 					pageIndex = ROUNDUP(pageIndex + 1, SWAP_BLOCK_PAGES);
487 					continue;
488 				}
489 			}
490 
491 			swap_addr_t slotIndex = swapBlock->swap_slots[blockIndex];
492 			vm_page* page;
493 			if (slotIndex != SWAP_SLOT_NONE
494 				&& ((page = LookupPage((off_t)pageIndex * B_PAGE_SIZE)) == NULL
495 					|| !page->busy)) {
496 					// TODO: We skip (i.e. leak) swap space of busy pages, since
497 					// there could be I/O going on (paging in/out). Waiting is
498 					// not an option as 1. unlocking the cache means that new
499 					// swap pages could be added in a range we've already
500 					// cleared (since the cache still has the old size) and 2.
501 					// we'd risk a deadlock in case we come from the file cache
502 					// and the FS holds the node's write-lock. We should mark
503 					// the page invalid and let the one responsible clean up.
504 					// There's just no such mechanism yet.
505 				swap_slot_dealloc(slotIndex, 1);
506 				fAllocatedSwapSize -= B_PAGE_SIZE;
507 
508 				swapBlock->swap_slots[blockIndex] = SWAP_SLOT_NONE;
509 				if (--swapBlock->used == 0) {
510 					// All swap pages have been freed -- we can discard the swap
511 					// block.
512 					sSwapHashTable.RemoveUnchecked(swapBlock);
513 					object_cache_free(sSwapBlockCache, swapBlock,
514 						CACHE_DONT_WAIT_FOR_MEMORY
515 							| CACHE_DONT_LOCK_KERNEL_SPACE);
516 				}
517 			}
518 		}
519 	}
520 
521 	return VMCache::Resize(newSize, priority);
522 }
523 
524 
525 status_t
526 VMAnonymousCache::Commit(off_t size, int priority)
527 {
528 	TRACE("%p->VMAnonymousCache::Commit(%lld)\n", this, size);
529 
530 	// If we can overcommit, we don't commit here, but in Fault(). We always
531 	// unreserve memory, if we're asked to shrink our commitment, though.
532 	if (fCanOvercommit && size > committed_size) {
533 		if (fHasPrecommitted)
534 			return B_OK;
535 
536 		// pre-commit some pages to make a later failure less probable
537 		fHasPrecommitted = true;
538 		uint32 precommitted = fPrecommittedPages * B_PAGE_SIZE;
539 		if (size > precommitted)
540 			size = precommitted;
541 	}
542 
543 	return _Commit(size, priority);
544 }
545 
546 
547 bool
548 VMAnonymousCache::HasPage(off_t offset)
549 {
550 	if (_SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE)
551 		return true;
552 
553 	return false;
554 }
555 
556 
557 bool
558 VMAnonymousCache::DebugHasPage(off_t offset)
559 {
560 	page_num_t pageIndex = offset >> PAGE_SHIFT;
561 	swap_hash_key key = { this, pageIndex };
562 	swap_block* swap = sSwapHashTable.Lookup(key);
563 	if (swap == NULL)
564 		return false;
565 
566 	return swap->swap_slots[pageIndex & SWAP_BLOCK_MASK] != SWAP_SLOT_NONE;
567 }
568 
569 
570 status_t
571 VMAnonymousCache::Read(off_t offset, const iovec* vecs, size_t count,
572 	uint32 flags, size_t* _numBytes)
573 {
574 	off_t pageIndex = offset >> PAGE_SHIFT;
575 
576 	for (uint32 i = 0, j = 0; i < count; i = j) {
577 		swap_addr_t startSlotIndex = _SwapBlockGetAddress(pageIndex + i);
578 		for (j = i + 1; j < count; j++) {
579 			swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + j);
580 			if (slotIndex != startSlotIndex + j - i)
581 				break;
582 		}
583 
584 		T(ReadPage(this, pageIndex, startSlotIndex));
585 			// TODO: Assumes that only one page is read.
586 
587 		swap_file* swapFile = find_swap_file(startSlotIndex);
588 
589 		off_t pos = (off_t)(startSlotIndex - swapFile->first_slot)
590 			* B_PAGE_SIZE;
591 
592 		status_t status = vfs_read_pages(swapFile->vnode, swapFile->cookie, pos,
593 			vecs + i, j - i, flags, _numBytes);
594 		if (status != B_OK)
595 			return status;
596 	}
597 
598 	return B_OK;
599 }
600 
601 
602 status_t
603 VMAnonymousCache::Write(off_t offset, const iovec* vecs, size_t count,
604 	uint32 flags, size_t* _numBytes)
605 {
606 	off_t pageIndex = offset >> PAGE_SHIFT;
607 
608 	AutoLocker<VMCache> locker(this);
609 
610 	uint32 totalPages = 0;
611 	for (uint32 i = 0; i < count; i++) {
612 		uint32 pageCount = (vecs[i].iov_len + B_PAGE_SIZE - 1) >> PAGE_SHIFT;
613 		swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + totalPages);
614 		if (slotIndex != SWAP_SLOT_NONE) {
615 			swap_slot_dealloc(slotIndex, pageCount);
616 			_SwapBlockFree(pageIndex + totalPages, pageCount);
617 			fAllocatedSwapSize -= pageCount * B_PAGE_SIZE;
618 		}
619 
620 		totalPages += pageCount;
621 	}
622 
623 	off_t totalSize = totalPages * B_PAGE_SIZE;
624 	if (fAllocatedSwapSize + totalSize > fCommittedSwapSize)
625 		return B_ERROR;
626 
627 	fAllocatedSwapSize += totalSize;
628 	locker.Unlock();
629 
630 	uint32 pagesLeft = totalPages;
631 	totalPages = 0;
632 
633 	for (uint32 i = 0; i < count; i++) {
634 		uint32 pageCount = (vecs[i].iov_len + B_PAGE_SIZE - 1) >> PAGE_SHIFT;
635 
636 		void* vectorBase = vecs[i].iov_base;
637 		size_t vectorLength = vecs[i].iov_len;
638 		uint32 n = pageCount;
639 
640 		for (uint32 j = 0; j < pageCount; j += n) {
641 			swap_addr_t slotIndex;
642 			// try to allocate n slots, if fail, try to allocate n/2
643 			while ((slotIndex = swap_slot_alloc(n)) == SWAP_SLOT_NONE && n >= 2)
644 				n >>= 1;
645 
646 			if (slotIndex == SWAP_SLOT_NONE)
647 				panic("VMAnonymousCache::Write(): can't allocate swap space\n");
648 
649 			T(WritePage(this, pageIndex, slotIndex));
650 				// TODO: Assumes that only one page is written.
651 
652 			swap_file* swapFile = find_swap_file(slotIndex);
653 
654 			off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE;
655 
656 			size_t length = n * B_PAGE_SIZE;
657 			iovec vector[1];
658 			vector->iov_base = vectorBase;
659 			vector->iov_len = length;
660 
661 			status_t status = vfs_write_pages(swapFile->vnode, swapFile->cookie,
662 				pos, vector, 1, flags, &length);
663 			if (status != B_OK) {
664 				locker.Lock();
665 				fAllocatedSwapSize -= (off_t)pagesLeft * B_PAGE_SIZE;
666 				locker.Unlock();
667 
668 				swap_slot_dealloc(slotIndex, n);
669 				return status;
670 			}
671 
672 			_SwapBlockBuild(pageIndex + totalPages, slotIndex, n);
673 			pagesLeft -= n;
674 
675 			if (n != pageCount) {
676 				vectorBase = (void*)((addr_t)vectorBase + n * B_PAGE_SIZE);
677 				vectorLength -= n * B_PAGE_SIZE;
678 			}
679 		}
680 
681 		totalPages += pageCount;
682 	}
683 
684 	ASSERT(pagesLeft == 0);
685 	return B_OK;
686 }
687 
688 
689 status_t
690 VMAnonymousCache::WriteAsync(off_t offset, const iovec* vecs, size_t count,
691 	size_t numBytes, uint32 flags, AsyncIOCallback* _callback)
692 {
693 	// TODO: Currently this method is only used for single pages. Either make
694 	// more flexible use of it or change the interface!
695 	// This implementation relies on the current usage!
696 	ASSERT(count == 1);
697 	ASSERT(numBytes <= B_PAGE_SIZE);
698 
699 	page_num_t pageIndex = offset >> PAGE_SHIFT;
700 	swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex);
701 	bool newSlot = slotIndex == SWAP_SLOT_NONE;
702 
703 	// If the page doesn't have any swap space yet, allocate it.
704 	if (newSlot) {
705 		AutoLocker<VMCache> locker(this);
706 		if (fAllocatedSwapSize + B_PAGE_SIZE > fCommittedSwapSize) {
707 			_callback->IOFinished(B_ERROR, true, 0);
708 			return B_ERROR;
709 		}
710 
711 		fAllocatedSwapSize += B_PAGE_SIZE;
712 
713 		slotIndex = swap_slot_alloc(1);
714 	}
715 
716 	// create our callback
717 	WriteCallback* callback = (flags & B_VIP_IO_REQUEST) != 0
718  		? new(malloc_flags(HEAP_PRIORITY_VIP)) WriteCallback(this, _callback)
719 		: new(std::nothrow) WriteCallback(this, _callback);
720 	if (callback == NULL) {
721 		if (newSlot) {
722 			AutoLocker<VMCache> locker(this);
723 			fAllocatedSwapSize -= B_PAGE_SIZE;
724 			locker.Unlock();
725 
726 			swap_slot_dealloc(slotIndex, 1);
727 		}
728 		_callback->IOFinished(B_NO_MEMORY, true, 0);
729 		return B_NO_MEMORY;
730 	}
731 // TODO: If the page already had swap space assigned, we don't need an own
732 // callback.
733 
734 	callback->SetTo(pageIndex, slotIndex, newSlot);
735 
736 	T(WritePage(this, pageIndex, slotIndex));
737 
738 	// write the page asynchrounously
739 	swap_file* swapFile = find_swap_file(slotIndex);
740 	off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE;
741 
742 	return vfs_asynchronous_write_pages(swapFile->vnode, swapFile->cookie, pos,
743 		vecs, 1, numBytes, flags, callback);
744 }
745 
746 
747 bool
748 VMAnonymousCache::CanWritePage(off_t offset)
749 {
750 	// We can write the page, if we have not used all of our committed swap
751 	// space or the page already has a swap slot assigned.
752 	return fAllocatedSwapSize < fCommittedSwapSize
753 		|| _SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE;
754 }
755 
756 
757 int32
758 VMAnonymousCache::MaxPagesPerAsyncWrite() const
759 {
760 	return 1;
761 }
762 
763 
764 status_t
765 VMAnonymousCache::Fault(struct VMAddressSpace* aspace, off_t offset)
766 {
767 	if (fCanOvercommit && LookupPage(offset) == NULL && !HasPage(offset)) {
768 		if (fGuardedSize > 0) {
769 			uint32 guardOffset;
770 
771 #ifdef STACK_GROWS_DOWNWARDS
772 			guardOffset = 0;
773 #elif defined(STACK_GROWS_UPWARDS)
774 			guardOffset = virtual_size - fGuardedSize;
775 #else
776 #	error Stack direction has not been defined in arch_config.h
777 #endif
778 
779 			// report stack fault, guard page hit!
780 			if (offset >= guardOffset && offset < guardOffset + fGuardedSize) {
781 				TRACE(("stack overflow!\n"));
782 				return B_BAD_ADDRESS;
783 			}
784 		}
785 
786 		if (fPrecommittedPages == 0) {
787 			// never commit more than needed
788 			if (committed_size / B_PAGE_SIZE > page_count)
789 				return B_BAD_HANDLER;
790 
791 			// try to commit additional swap space/memory
792 			if (swap_space_reserve(B_PAGE_SIZE) == B_PAGE_SIZE) {
793 				fCommittedSwapSize += B_PAGE_SIZE;
794 			} else {
795 				int priority = aspace == VMAddressSpace::Kernel()
796 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
797 				if (vm_try_reserve_memory(B_PAGE_SIZE, priority, 0) != B_OK) {
798 					dprintf("%p->VMAnonymousCache::Fault(): Failed to reserve "
799 						"%d bytes of RAM.\n", this, (int)B_PAGE_SIZE);
800 					return B_NO_MEMORY;
801 				}
802 			}
803 
804 			committed_size += B_PAGE_SIZE;
805 		} else
806 			fPrecommittedPages--;
807 	}
808 
809 	// This will cause vm_soft_fault() to handle the fault
810 	return B_BAD_HANDLER;
811 }
812 
813 
814 void
815 VMAnonymousCache::Merge(VMCache* _source)
816 {
817 	VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source);
818 	if (source == NULL) {
819 		panic("VMAnonymousCache::MergeStore(): merge with incompatible cache "
820 			"%p requested", _source);
821 		return;
822 	}
823 
824 	// take over the source' committed size
825 	fCommittedSwapSize += source->fCommittedSwapSize;
826 	source->fCommittedSwapSize = 0;
827 	committed_size += source->committed_size;
828 	source->committed_size = 0;
829 
830 	off_t actualSize = virtual_end - virtual_base;
831 	if (committed_size > actualSize)
832 		_Commit(actualSize, VM_PRIORITY_USER);
833 
834 	// Move all not shadowed swap pages from the source to the consumer cache.
835 	// Also remove all source pages that are shadowed by consumer swap pages.
836 	_MergeSwapPages(source);
837 
838 	// Move all not shadowed pages from the source to the consumer cache.
839 	if (source->page_count < page_count)
840 		_MergePagesSmallerSource(source);
841 	else
842 		_MergePagesSmallerConsumer(source);
843 }
844 
845 
846 void
847 VMAnonymousCache::_SwapBlockBuild(off_t startPageIndex,
848 	swap_addr_t startSlotIndex, uint32 count)
849 {
850 	WriteLocker locker(sSwapHashLock);
851 
852 	uint32 left = count;
853 	for (uint32 i = 0, j = 0; i < count; i += j) {
854 		off_t pageIndex = startPageIndex + i;
855 		swap_addr_t slotIndex = startSlotIndex + i;
856 
857 		swap_hash_key key = { this, pageIndex };
858 
859 		swap_block* swap = sSwapHashTable.Lookup(key);
860 		while (swap == NULL) {
861 			swap = (swap_block*)object_cache_alloc(sSwapBlockCache,
862 				CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE);
863 			if (swap == NULL) {
864 				// Wait a short time until memory is available again.
865 				locker.Unlock();
866 				snooze(10000);
867 				locker.Lock();
868 				swap = sSwapHashTable.Lookup(key);
869 				continue;
870 			}
871 
872 			swap->key.cache = this;
873 			swap->key.page_index = pageIndex & ~(off_t)SWAP_BLOCK_MASK;
874 			swap->used = 0;
875 			for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++)
876 				swap->swap_slots[i] = SWAP_SLOT_NONE;
877 
878 			sSwapHashTable.InsertUnchecked(swap);
879 		}
880 
881 		swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
882 		for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) {
883 			swap->swap_slots[blockIndex++] = slotIndex + j;
884 			left--;
885 		}
886 
887 		swap->used += j;
888 	}
889 }
890 
891 
892 void
893 VMAnonymousCache::_SwapBlockFree(off_t startPageIndex, uint32 count)
894 {
895 	WriteLocker locker(sSwapHashLock);
896 
897 	uint32 left = count;
898 	for (uint32 i = 0, j = 0; i < count; i += j) {
899 		off_t pageIndex = startPageIndex + i;
900 		swap_hash_key key = { this, pageIndex };
901 		swap_block* swap = sSwapHashTable.Lookup(key);
902 
903 		ASSERT(swap != NULL);
904 
905 		swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
906 		for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) {
907 			swap->swap_slots[blockIndex++] = SWAP_SLOT_NONE;
908 			left--;
909 		}
910 
911 		swap->used -= j;
912 		if (swap->used == 0) {
913 			sSwapHashTable.RemoveUnchecked(swap);
914 			object_cache_free(sSwapBlockCache, swap,
915 				CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE);
916 		}
917 	}
918 }
919 
920 
921 swap_addr_t
922 VMAnonymousCache::_SwapBlockGetAddress(off_t pageIndex)
923 {
924 	ReadLocker locker(sSwapHashLock);
925 
926 	swap_hash_key key = { this, pageIndex };
927 	swap_block* swap = sSwapHashTable.Lookup(key);
928 	swap_addr_t slotIndex = SWAP_SLOT_NONE;
929 
930 	if (swap != NULL) {
931 		swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
932 		slotIndex = swap->swap_slots[blockIndex];
933 	}
934 
935 	return slotIndex;
936 }
937 
938 
939 status_t
940 VMAnonymousCache::_Commit(off_t size, int priority)
941 {
942 	TRACE("%p->VMAnonymousCache::_Commit(%lld), already committed: %lld "
943 		"(%lld swap)\n", this, size, committed_size, fCommittedSwapSize);
944 
945 	// Basic strategy: reserve swap space first, only when running out of swap
946 	// space, reserve real memory.
947 
948 	off_t committedMemory = committed_size - fCommittedSwapSize;
949 
950 	// Regardless of whether we're asked to grow or shrink the commitment,
951 	// we always try to reserve as much as possible of the final commitment
952 	// in the swap space.
953 	if (size > fCommittedSwapSize) {
954 		fCommittedSwapSize += swap_space_reserve(size - fCommittedSwapSize);
955 		committed_size = fCommittedSwapSize + committedMemory;
956 		if (size > fCommittedSwapSize) {
957 			TRACE("%p->VMAnonymousCache::_Commit(%lld), reserved only %lld "
958 				"swap\n", this, size, fCommittedSwapSize);
959 		}
960 	}
961 
962 	if (committed_size == size)
963 		return B_OK;
964 
965 	if (committed_size > size) {
966 		// The commitment shrinks -- unreserve real memory first.
967 		off_t toUnreserve = committed_size - size;
968 		if (committedMemory > 0) {
969 			off_t unreserved = min_c(toUnreserve, committedMemory);
970 			vm_unreserve_memory(unreserved);
971 			committedMemory -= unreserved;
972 			committed_size -= unreserved;
973 			toUnreserve -= unreserved;
974 		}
975 
976 		// Unreserve swap space.
977 		if (toUnreserve > 0) {
978 			swap_space_unreserve(toUnreserve);
979 			fCommittedSwapSize -= toUnreserve;
980 			committed_size -= toUnreserve;
981 		}
982 
983 		return B_OK;
984 	}
985 
986 	// The commitment grows -- we have already tried to reserve swap space at
987 	// the start of the method, so we try to reserve real memory, now.
988 
989 	off_t toReserve = size - committed_size;
990 	if (vm_try_reserve_memory(toReserve, priority, 1000000) != B_OK) {
991 		dprintf("%p->VMAnonymousCache::_Commit(%lld): Failed to reserve %lld "
992 			"bytes of RAM\n", this, size, toReserve);
993 		return B_NO_MEMORY;
994 	}
995 
996 	committed_size = size;
997 	return B_OK;
998 }
999 
1000 
1001 void
1002 VMAnonymousCache::_MergePagesSmallerSource(VMAnonymousCache* source)
1003 {
1004 	// The source cache has less pages than the consumer (this cache), so we
1005 	// iterate through the source's pages and move the ones that are not
1006 	// shadowed up to the consumer.
1007 
1008 	for (VMCachePagesTree::Iterator it = source->pages.GetIterator();
1009 			vm_page* page = it.Next();) {
1010 		// Note: Removing the current node while iterating through a
1011 		// IteratableSplayTree is safe.
1012 		vm_page* consumerPage = LookupPage(
1013 			(off_t)page->cache_offset << PAGE_SHIFT);
1014 		if (consumerPage == NULL) {
1015 			// the page is not yet in the consumer cache - move it upwards
1016 			ASSERT_PRINT(!page->busy, "page: %p", page);
1017 			source->RemovePage(page);
1018 			InsertPage(page, (off_t)page->cache_offset << PAGE_SHIFT);
1019 		}
1020 	}
1021 }
1022 
1023 
1024 void
1025 VMAnonymousCache::_MergePagesSmallerConsumer(VMAnonymousCache* source)
1026 {
1027 	// The consumer (this cache) has less pages than the source, so we move the
1028 	// consumer's pages to the source (freeing shadowed ones) and finally just
1029 	// all pages of the source back to the consumer.
1030 
1031 	for (VMCachePagesTree::Iterator it = pages.GetIterator();
1032 			vm_page* page = it.Next();) {
1033 		// If a source page is in the way, remove and free it.
1034 		vm_page* sourcePage = source->LookupPage(
1035 			(off_t)page->cache_offset << PAGE_SHIFT);
1036 		if (sourcePage != NULL) {
1037 			DEBUG_PAGE_ACCESS_START(sourcePage);
1038 			ASSERT_PRINT(!sourcePage->busy, "page: %p", sourcePage);
1039 			source->RemovePage(sourcePage);
1040 			vm_page_free(source, sourcePage);
1041 		}
1042 
1043 		// Note: Removing the current node while iterating through a
1044 		// IteratableSplayTree is safe.
1045 		source->MovePage(page);
1046 	}
1047 
1048 	MoveAllPages(source);
1049 }
1050 
1051 
1052 void
1053 VMAnonymousCache::_MergeSwapPages(VMAnonymousCache* source)
1054 {
1055 	// If neither source nor consumer have swap pages, we don't have to do
1056 	// anything.
1057 	if (source->fAllocatedSwapSize == 0 && fAllocatedSwapSize == 0)
1058 		return;
1059 
1060 	for (off_t offset = source->virtual_base
1061 				& ~(off_t)(B_PAGE_SIZE * SWAP_BLOCK_PAGES - 1);
1062 			offset < source->virtual_end;
1063 			offset += B_PAGE_SIZE * SWAP_BLOCK_PAGES) {
1064 
1065 		WriteLocker locker(sSwapHashLock);
1066 
1067 		page_num_t swapBlockPageIndex = offset >> PAGE_SHIFT;
1068 		swap_hash_key key = { source, swapBlockPageIndex };
1069 		swap_block* sourceSwapBlock = sSwapHashTable.Lookup(key);
1070 
1071 		// remove the source swap block -- we will either take over the swap
1072 		// space (and the block) or free it
1073 		if (sourceSwapBlock != NULL)
1074 			sSwapHashTable.RemoveUnchecked(sourceSwapBlock);
1075 
1076 		key.cache = this;
1077 		swap_block* swapBlock = sSwapHashTable.Lookup(key);
1078 
1079 		locker.Unlock();
1080 
1081 		// remove all source pages that are shadowed by consumer swap pages
1082 		if (swapBlock != NULL) {
1083 			for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) {
1084 				if (swapBlock->swap_slots[i] != SWAP_SLOT_NONE) {
1085 					vm_page* page = source->LookupPage(
1086 						(off_t)(swapBlockPageIndex + i) << PAGE_SHIFT);
1087 					if (page != NULL) {
1088 						DEBUG_PAGE_ACCESS_START(page);
1089 						ASSERT_PRINT(!page->busy, "page: %p", page);
1090 						source->RemovePage(page);
1091 						vm_page_free(source, page);
1092 					}
1093 				}
1094 			}
1095 		}
1096 
1097 		if (sourceSwapBlock == NULL)
1098 			continue;
1099 
1100 		for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) {
1101 			off_t pageIndex = swapBlockPageIndex + i;
1102 			swap_addr_t sourceSlotIndex = sourceSwapBlock->swap_slots[i];
1103 
1104 			if (sourceSlotIndex == SWAP_SLOT_NONE)
1105 				continue;
1106 
1107 			if ((swapBlock != NULL
1108 					&& swapBlock->swap_slots[i] != SWAP_SLOT_NONE)
1109 				|| LookupPage((off_t)pageIndex << PAGE_SHIFT) != NULL) {
1110 				// The consumer already has a page or a swapped out page
1111 				// at this index. So we can free the source swap space.
1112 				swap_slot_dealloc(sourceSlotIndex, 1);
1113 				sourceSwapBlock->swap_slots[i] = SWAP_SLOT_NONE;
1114 				sourceSwapBlock->used--;
1115 			}
1116 
1117 			// We've either freed the source swap page or are going to move it
1118 			// to the consumer. At any rate, the source cache doesn't own it
1119 			// anymore.
1120 			source->fAllocatedSwapSize -= B_PAGE_SIZE;
1121 		}
1122 
1123 		// All source swap pages that have not been freed yet are taken over by
1124 		// the consumer.
1125 		fAllocatedSwapSize += B_PAGE_SIZE * (off_t)sourceSwapBlock->used;
1126 
1127 		if (sourceSwapBlock->used == 0) {
1128 			// All swap pages have been freed -- we can discard the source swap
1129 			// block.
1130 			object_cache_free(sSwapBlockCache, sourceSwapBlock,
1131 				CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE);
1132 		} else if (swapBlock == NULL) {
1133 			// We need to take over some of the source's swap pages and there's
1134 			// no swap block in the consumer cache. Just take over the source
1135 			// swap block.
1136 			sourceSwapBlock->key.cache = this;
1137 			locker.Lock();
1138 			sSwapHashTable.InsertUnchecked(sourceSwapBlock);
1139 			locker.Unlock();
1140 		} else {
1141 			// We need to take over some of the source's swap pages and there's
1142 			// already a swap block in the consumer cache. Copy the respective
1143 			// swap addresses and discard the source swap block.
1144 			for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) {
1145 				if (sourceSwapBlock->swap_slots[i] != SWAP_SLOT_NONE)
1146 					swapBlock->swap_slots[i] = sourceSwapBlock->swap_slots[i];
1147 			}
1148 
1149 			object_cache_free(sSwapBlockCache, sourceSwapBlock,
1150 				CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE);
1151 		}
1152 	}
1153 }
1154 
1155 
1156 // #pragma mark -
1157 
1158 
1159 status_t
1160 swap_file_add(const char* path)
1161 {
1162 	// open the file
1163 	int fd = open(path, O_RDWR | O_NOCACHE, S_IRUSR | S_IWUSR);
1164 	if (fd < 0)
1165 		return errno;
1166 
1167 	// fstat() it and check whether we can use it
1168 	struct stat st;
1169 	if (fstat(fd, &st) < 0) {
1170 		close(fd);
1171 		return errno;
1172 	}
1173 
1174 	if (!(S_ISREG(st.st_mode) || S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
1175 		close(fd);
1176 		return B_BAD_VALUE;
1177 	}
1178 
1179 	if (st.st_size < B_PAGE_SIZE) {
1180 		close(fd);
1181 		return B_BAD_VALUE;
1182 	}
1183 
1184 	// get file descriptor, vnode, and cookie
1185 	file_descriptor* descriptor = get_fd(get_current_io_context(true), fd);
1186 	put_fd(descriptor);
1187 
1188 	vnode* node = fd_vnode(descriptor);
1189 	if (node == NULL) {
1190 		close(fd);
1191 		return B_BAD_VALUE;
1192 	}
1193 
1194 	// do the allocations and prepare the swap_file structure
1195 	swap_file* swap = (swap_file*)malloc(sizeof(swap_file));
1196 	if (swap == NULL) {
1197 		close(fd);
1198 		return B_NO_MEMORY;
1199 	}
1200 
1201 	swap->fd = fd;
1202 	swap->vnode = node;
1203 	swap->cookie = descriptor->cookie;
1204 
1205 	uint32 pageCount = st.st_size >> PAGE_SHIFT;
1206 	swap->bmp = radix_bitmap_create(pageCount);
1207 	if (swap->bmp == NULL) {
1208 		free(swap);
1209 		close(fd);
1210 		return B_NO_MEMORY;
1211 	}
1212 
1213 	// set slot index and add this file to swap file list
1214 	mutex_lock(&sSwapFileListLock);
1215 	// TODO: Also check whether the swap file is already registered!
1216 	if (sSwapFileList.IsEmpty()) {
1217 		swap->first_slot = 0;
1218 		swap->last_slot = pageCount;
1219 	} else {
1220 		// leave one page gap between two swap files
1221 		swap->first_slot = sSwapFileList.Last()->last_slot + 1;
1222 		swap->last_slot = swap->first_slot + pageCount;
1223 	}
1224 	sSwapFileList.Add(swap);
1225 	sSwapFileCount++;
1226 	mutex_unlock(&sSwapFileListLock);
1227 
1228 	mutex_lock(&sAvailSwapSpaceLock);
1229 	sAvailSwapSpace += (off_t)pageCount * B_PAGE_SIZE;
1230 	mutex_unlock(&sAvailSwapSpaceLock);
1231 
1232 	return B_OK;
1233 }
1234 
1235 
1236 status_t
1237 swap_file_delete(const char* path)
1238 {
1239 	vnode* node = NULL;
1240 	status_t status = vfs_get_vnode_from_path(path, true, &node);
1241 	if (status != B_OK)
1242 		return status;
1243 
1244 	MutexLocker locker(sSwapFileListLock);
1245 
1246 	swap_file* swapFile = NULL;
1247 	for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
1248 			(swapFile = it.Next()) != NULL;) {
1249 		if (swapFile->vnode == node)
1250 			break;
1251 	}
1252 
1253 	vfs_put_vnode(node);
1254 
1255 	if (swapFile == NULL)
1256 		return B_ERROR;
1257 
1258 	// if this file is currently used, we can't delete
1259 	// TODO: mark this swap file deleting, and remove it after releasing
1260 	// all the swap space
1261 	if (swapFile->bmp->free_slots < swapFile->last_slot - swapFile->first_slot)
1262 		return B_ERROR;
1263 
1264 	sSwapFileList.Remove(swapFile);
1265 	sSwapFileCount--;
1266 	locker.Unlock();
1267 
1268 	mutex_lock(&sAvailSwapSpaceLock);
1269 	sAvailSwapSpace -= (off_t)(swapFile->last_slot - swapFile->first_slot)
1270 		* PAGE_SIZE;
1271 	mutex_unlock(&sAvailSwapSpaceLock);
1272 
1273 	close(swapFile->fd);
1274 	radix_bitmap_destroy(swapFile->bmp);
1275 	free(swapFile);
1276 
1277 	return B_OK;
1278 }
1279 
1280 
1281 void
1282 swap_init(void)
1283 {
1284 	// create swap block cache
1285 	sSwapBlockCache = create_object_cache("swapblock",
1286 			sizeof(swap_block), sizeof(void*), NULL, NULL, NULL);
1287 	if (sSwapBlockCache == NULL)
1288 		panic("swap_init(): can't create object cache for swap blocks\n");
1289 
1290 	status_t error = object_cache_set_minimum_reserve(sSwapBlockCache,
1291 		MIN_SWAP_BLOCK_RESERVE);
1292 	if (error != B_OK) {
1293 		panic("swap_init(): object_cache_set_minimum_reserve() failed: %s",
1294 			strerror(error));
1295 	}
1296 
1297 	// init swap hash table
1298 	sSwapHashTable.Init(INITIAL_SWAP_HASH_SIZE);
1299 	rw_lock_init(&sSwapHashLock, "swaphash");
1300 
1301 	error = register_resource_resizer(swap_hash_resizer, NULL,
1302 		SWAP_HASH_RESIZE_INTERVAL);
1303 	if (error != B_OK) {
1304 		panic("swap_init(): Failed to register swap hash resizer: %s",
1305 			strerror(error));
1306 	}
1307 
1308 	// init swap file list
1309 	mutex_init(&sSwapFileListLock, "swaplist");
1310 	sSwapFileAlloc = NULL;
1311 	sSwapFileCount = 0;
1312 
1313 	// init available swap space
1314 	mutex_init(&sAvailSwapSpaceLock, "avail swap space");
1315 	sAvailSwapSpace = 0;
1316 
1317 	add_debugger_command_etc("swap", &dump_swap_info,
1318 		"Print infos about the swap usage",
1319 		"\n"
1320 		"Print infos about the swap usage.\n", 0);
1321 }
1322 
1323 
1324 void
1325 swap_init_post_modules()
1326 {
1327 	// Never try to create a swap file on a read-only device - when booting
1328 	// from CD, the write overlay is used.
1329 	if (gReadOnlyBootDevice)
1330 		return;
1331 
1332 	off_t size = 0;
1333 
1334 	void* settings = load_driver_settings("virtual_memory");
1335 	if (settings != NULL) {
1336 		if (!get_driver_boolean_parameter(settings, "vm", false, false))
1337 			return;
1338 
1339 		const char* string = get_driver_parameter(settings, "swap_size", NULL,
1340 			NULL);
1341 		size = string ? atoll(string) : 0;
1342 
1343 		unload_driver_settings(settings);
1344 	} else
1345 		size = (off_t)vm_page_num_pages() * B_PAGE_SIZE * 2;
1346 
1347 	if (size < B_PAGE_SIZE)
1348 		return;
1349 
1350 	int fd = open("/var/swap", O_RDWR | O_CREAT | O_NOCACHE, S_IRUSR | S_IWUSR);
1351 	if (fd < 0) {
1352 		dprintf("Can't open/create /var/swap: %s\n", strerror(errno));
1353 		return;
1354 	}
1355 
1356 	struct stat stat;
1357 	stat.st_size = size;
1358 	status_t error = _kern_write_stat(fd, NULL, false, &stat,
1359 		sizeof(struct stat), B_STAT_SIZE | B_STAT_SIZE_INSECURE);
1360 	if (error != B_OK) {
1361 		dprintf("Failed to resize /var/swap to %lld bytes: %s\n", size,
1362 			strerror(error));
1363 	}
1364 
1365 	close(fd);
1366 
1367 	error = swap_file_add("/var/swap");
1368 	if (error != B_OK)
1369 		dprintf("Failed to add swap file /var/swap: %s\n", strerror(error));
1370 }
1371 
1372 
1373 //! Used by page daemon to free swap space.
1374 bool
1375 swap_free_page_swap_space(vm_page* page)
1376 {
1377 	VMAnonymousCache* cache = dynamic_cast<VMAnonymousCache*>(page->Cache());
1378 	if (cache == NULL)
1379 		return false;
1380 
1381 	swap_addr_t slotIndex = cache->_SwapBlockGetAddress(page->cache_offset);
1382 	if (slotIndex == SWAP_SLOT_NONE)
1383 		return false;
1384 
1385 	swap_slot_dealloc(slotIndex, 1);
1386 	cache->fAllocatedSwapSize -= B_PAGE_SIZE;
1387 	cache->_SwapBlockFree(page->cache_offset, 1);
1388 
1389   	return true;
1390 }
1391 
1392 
1393 uint32
1394 swap_available_pages()
1395 {
1396 	mutex_lock(&sAvailSwapSpaceLock);
1397 	uint32 avail = sAvailSwapSpace >> PAGE_SHIFT;
1398 	mutex_unlock(&sAvailSwapSpaceLock);
1399 
1400 	return avail;
1401 }
1402 
1403 
1404 uint32
1405 swap_total_swap_pages()
1406 {
1407 	mutex_lock(&sSwapFileListLock);
1408 
1409 	uint32 totalSwapSlots = 0;
1410 	for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
1411 			swap_file* swapFile = it.Next();)
1412 		totalSwapSlots += swapFile->last_slot - swapFile->first_slot;
1413 
1414 	mutex_unlock(&sSwapFileListLock);
1415 
1416 	return totalSwapSlots;
1417 }
1418 
1419 #endif	// ENABLE_SWAP_SUPPORT
1420 
1421 void
1422 swap_get_info(struct system_memory_info* info)
1423 {
1424 #if ENABLE_SWAP_SUPPORT
1425 	info->max_swap_space = (uint64)swap_total_swap_pages() * B_PAGE_SIZE;
1426 	info->free_swap_space = (uint64)swap_available_pages() * B_PAGE_SIZE;
1427 #else
1428 	info->max_swap_space = 0;
1429 	info->free_swap_space = 0;
1430 #endif
1431 }
1432 
1433