xref: /haiku/src/system/kernel/vm/VMAnonymousCache.cpp (revision e0ef64750f3169cd634bb2f7a001e22488b05231)
1 /*
2  * Copyright 2008, Zhao Shuai, upczhsh@163.com.
3  * Copyright 2008-2010, Ingo Weinhold, ingo_weinhold@gmx.de.
4  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
5  * Distributed under the terms of the MIT License.
6  *
7  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
8  * Distributed under the terms of the NewOS License.
9  */
10 
11 
12 #include "VMAnonymousCache.h"
13 
14 #include <errno.h>
15 #include <fcntl.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <unistd.h>
19 
20 #include <KernelExport.h>
21 #include <NodeMonitor.h>
22 
23 #include <arch_config.h>
24 #include <boot_device.h>
25 #include <driver_settings.h>
26 #include <fs/fd.h>
27 #include <fs_interface.h>
28 #include <heap.h>
29 #include <kernel_daemon.h>
30 #include <slab/Slab.h>
31 #include <syscalls.h>
32 #include <system_info.h>
33 #include <tracing.h>
34 #include <util/AutoLock.h>
35 #include <util/DoublyLinkedList.h>
36 #include <util/OpenHashTable.h>
37 #include <util/RadixBitmap.h>
38 #include <vfs.h>
39 #include <vm/vm.h>
40 #include <vm/vm_page.h>
41 #include <vm/vm_priv.h>
42 #include <vm/VMAddressSpace.h>
43 
44 #include "IORequest.h"
45 
46 
47 #if	ENABLE_SWAP_SUPPORT
48 
49 //#define TRACE_VM_ANONYMOUS_CACHE
50 #ifdef TRACE_VM_ANONYMOUS_CACHE
51 #	define TRACE(x...) dprintf(x)
52 #else
53 #	define TRACE(x...) do { } while (false)
54 #endif
55 
56 
57 // number of free swap blocks the object cache shall minimally have
58 #define MIN_SWAP_BLOCK_RESERVE	4096
59 
60 // interval the has resizer is triggered (in 0.1s)
61 #define SWAP_HASH_RESIZE_INTERVAL	5
62 
63 #define INITIAL_SWAP_HASH_SIZE		1024
64 
65 #define SWAP_SLOT_NONE	RADIX_SLOT_NONE
66 
67 #define SWAP_BLOCK_PAGES 32
68 #define SWAP_BLOCK_SHIFT 5		/* 1 << SWAP_BLOCK_SHIFT == SWAP_BLOCK_PAGES */
69 #define SWAP_BLOCK_MASK  (SWAP_BLOCK_PAGES - 1)
70 
71 
72 struct swap_file : DoublyLinkedListLinkImpl<swap_file> {
73 	int				fd;
74 	struct vnode*	vnode;
75 	void*			cookie;
76 	swap_addr_t		first_slot;
77 	swap_addr_t		last_slot;
78 	radix_bitmap*	bmp;
79 };
80 
81 struct swap_hash_key {
82 	VMAnonymousCache	*cache;
83 	off_t				page_index;  // page index in the cache
84 };
85 
86 // Each swap block contains swap address information for
87 // SWAP_BLOCK_PAGES continuous pages from the same cache
88 struct swap_block {
89 	swap_block*		hash_link;
90 	swap_hash_key	key;
91 	uint32			used;
92 	swap_addr_t		swap_slots[SWAP_BLOCK_PAGES];
93 };
94 
95 struct SwapHashTableDefinition {
96 	typedef swap_hash_key KeyType;
97 	typedef swap_block ValueType;
98 
99 	SwapHashTableDefinition() {}
100 
101 	size_t HashKey(const swap_hash_key& key) const
102 	{
103 		off_t blockIndex = key.page_index >> SWAP_BLOCK_SHIFT;
104 		VMAnonymousCache* cache = key.cache;
105 		return blockIndex ^ (size_t)(int*)cache;
106 	}
107 
108 	size_t Hash(const swap_block* value) const
109 	{
110 		return HashKey(value->key);
111 	}
112 
113 	bool Compare(const swap_hash_key& key, const swap_block* value) const
114 	{
115 		return (key.page_index & ~(off_t)SWAP_BLOCK_MASK)
116 				== (value->key.page_index & ~(off_t)SWAP_BLOCK_MASK)
117 			&& key.cache == value->key.cache;
118 	}
119 
120 	swap_block*& GetLink(swap_block* value) const
121 	{
122 		return value->hash_link;
123 	}
124 };
125 
126 typedef BOpenHashTable<SwapHashTableDefinition> SwapHashTable;
127 typedef DoublyLinkedList<swap_file> SwapFileList;
128 
129 static SwapHashTable sSwapHashTable;
130 static rw_lock sSwapHashLock;
131 
132 static SwapFileList sSwapFileList;
133 static mutex sSwapFileListLock;
134 static swap_file* sSwapFileAlloc = NULL; // allocate from here
135 static uint32 sSwapFileCount = 0;
136 
137 static off_t sAvailSwapSpace = 0;
138 static mutex sAvailSwapSpaceLock;
139 
140 static object_cache* sSwapBlockCache;
141 
142 
143 #if SWAP_TRACING
144 namespace SwapTracing {
145 
146 class SwapTraceEntry : public AbstractTraceEntry {
147 public:
148 	SwapTraceEntry(VMAnonymousCache* cache)
149 		:
150 		fCache(cache)
151 	{
152 	}
153 
154 protected:
155 	VMAnonymousCache*	fCache;
156 };
157 
158 
159 class ReadPage : public SwapTraceEntry {
160 public:
161 	ReadPage(VMAnonymousCache* cache, page_num_t pageIndex,
162 			swap_addr_t swapSlotIndex)
163 		:
164 		SwapTraceEntry(cache),
165 		fPageIndex(pageIndex),
166 		fSwapSlotIndex(swapSlotIndex)
167 	{
168 		Initialized();
169 	}
170 
171 	virtual void AddDump(TraceOutput& out)
172 	{
173 		out.Print("swap read:  cache %p, page index: %lu <- swap slot: %lu",
174 			fCache, fPageIndex, fSwapSlotIndex);
175 	}
176 
177 private:
178 	page_num_t		fPageIndex;
179 	swap_addr_t		fSwapSlotIndex;
180 };
181 
182 
183 class WritePage : public SwapTraceEntry {
184 public:
185 	WritePage(VMAnonymousCache* cache, page_num_t pageIndex,
186 			swap_addr_t swapSlotIndex)
187 		:
188 		SwapTraceEntry(cache),
189 		fPageIndex(pageIndex),
190 		fSwapSlotIndex(swapSlotIndex)
191 	{
192 		Initialized();
193 	}
194 
195 	virtual void AddDump(TraceOutput& out)
196 	{
197 		out.Print("swap write: cache %p, page index: %lu -> swap slot: %lu",
198 			fCache, fPageIndex, fSwapSlotIndex);
199 	}
200 
201 private:
202 	page_num_t		fPageIndex;
203 	swap_addr_t		fSwapSlotIndex;
204 };
205 
206 }	// namespace SwapTracing
207 
208 #	define T(x) new(std::nothrow) SwapTracing::x;
209 #else
210 #	define T(x) ;
211 #endif
212 
213 
214 static int
215 dump_swap_info(int argc, char** argv)
216 {
217 	swap_addr_t totalSwapPages = 0;
218 	swap_addr_t freeSwapPages = 0;
219 
220 	kprintf("swap files:\n");
221 
222 	for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
223 			swap_file* file = it.Next();) {
224 		swap_addr_t total = file->last_slot - file->first_slot;
225 		kprintf("  vnode: %p, pages: total: %lu, free: %lu\n",
226 			file->vnode, total, file->bmp->free_slots);
227 
228 		totalSwapPages += total;
229 		freeSwapPages += file->bmp->free_slots;
230 	}
231 
232 	kprintf("\n");
233 	kprintf("swap space in pages:\n");
234 	kprintf("total:     %9lu\n", totalSwapPages);
235 	kprintf("available: %9llu\n", sAvailSwapSpace / B_PAGE_SIZE);
236 	kprintf("reserved:  %9llu\n",
237 		totalSwapPages - sAvailSwapSpace / B_PAGE_SIZE);
238 	kprintf("used:      %9lu\n", totalSwapPages - freeSwapPages);
239 	kprintf("free:      %9lu\n", freeSwapPages);
240 
241 	return 0;
242 }
243 
244 
245 static swap_addr_t
246 swap_slot_alloc(uint32 count)
247 {
248 	mutex_lock(&sSwapFileListLock);
249 
250 	if (sSwapFileList.IsEmpty()) {
251 		mutex_unlock(&sSwapFileListLock);
252 		panic("swap_slot_alloc(): no swap file in the system\n");
253 		return SWAP_SLOT_NONE;
254 	}
255 
256 	// since radix bitmap could not handle more than 32 pages, we return
257 	// SWAP_SLOT_NONE, this forces Write() adjust allocation amount
258 	if (count > BITMAP_RADIX) {
259 		mutex_unlock(&sSwapFileListLock);
260 		return SWAP_SLOT_NONE;
261 	}
262 
263 	swap_addr_t j, addr = SWAP_SLOT_NONE;
264 	for (j = 0; j < sSwapFileCount; j++) {
265 		if (sSwapFileAlloc == NULL)
266 			sSwapFileAlloc = sSwapFileList.First();
267 
268 		addr = radix_bitmap_alloc(sSwapFileAlloc->bmp, count);
269 		if (addr != SWAP_SLOT_NONE) {
270 			addr += sSwapFileAlloc->first_slot;
271 			break;
272 		}
273 
274 		// this swap_file is full, find another
275 		sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc);
276 	}
277 
278 	if (j == sSwapFileCount) {
279 		mutex_unlock(&sSwapFileListLock);
280 		panic("swap_slot_alloc: swap space exhausted!\n");
281 		return SWAP_SLOT_NONE;
282 	}
283 
284 	// if this swap file has used more than 90% percent of its space
285 	// switch to another
286     if (sSwapFileAlloc->bmp->free_slots
287 			< (sSwapFileAlloc->last_slot - sSwapFileAlloc->first_slot) / 10)
288 		sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc);
289 
290 	mutex_unlock(&sSwapFileListLock);
291 
292 	return addr;
293 }
294 
295 
296 static swap_file*
297 find_swap_file(swap_addr_t slotIndex)
298 {
299 	for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
300 			swap_file* swapFile = it.Next();) {
301 		if (slotIndex >= swapFile->first_slot
302 				&& slotIndex < swapFile->last_slot)
303 			return swapFile;
304 	}
305 
306 	panic("find_swap_file(): can't find swap file for slot %ld\n", slotIndex);
307 	return NULL;
308 }
309 
310 
311 static void
312 swap_slot_dealloc(swap_addr_t slotIndex, uint32 count)
313 {
314 	if (slotIndex == SWAP_SLOT_NONE)
315 		return;
316 
317 	mutex_lock(&sSwapFileListLock);
318 	swap_file* swapFile = find_swap_file(slotIndex);
319 	slotIndex -= swapFile->first_slot;
320 	radix_bitmap_dealloc(swapFile->bmp, slotIndex, count);
321 	mutex_unlock(&sSwapFileListLock);
322 }
323 
324 
325 static off_t
326 swap_space_reserve(off_t amount)
327 {
328 	mutex_lock(&sAvailSwapSpaceLock);
329 	if (sAvailSwapSpace >= amount)
330 		sAvailSwapSpace -= amount;
331 	else {
332 		amount = sAvailSwapSpace;
333 		sAvailSwapSpace = 0;
334 	}
335 	mutex_unlock(&sAvailSwapSpaceLock);
336 
337 	return amount;
338 }
339 
340 
341 static void
342 swap_space_unreserve(off_t amount)
343 {
344 	mutex_lock(&sAvailSwapSpaceLock);
345 	sAvailSwapSpace += amount;
346 	mutex_unlock(&sAvailSwapSpaceLock);
347 }
348 
349 
350 static void
351 swap_hash_resizer(void*, int)
352 {
353 	WriteLocker locker(sSwapHashLock);
354 
355 	size_t size;
356 	void* allocation;
357 
358 	do {
359 		size = sSwapHashTable.ResizeNeeded();
360 		if (size == 0)
361 			return;
362 
363 		locker.Unlock();
364 
365 		allocation = malloc(size);
366 		if (allocation == NULL)
367 			return;
368 
369 		locker.Lock();
370 
371 	} while (!sSwapHashTable.Resize(allocation, size));
372 }
373 
374 
375 // #pragma mark -
376 
377 
378 class VMAnonymousCache::WriteCallback : public StackableAsyncIOCallback {
379 public:
380 	WriteCallback(VMAnonymousCache* cache, AsyncIOCallback* callback)
381 		:
382 		StackableAsyncIOCallback(callback),
383 		fCache(cache)
384 	{
385 	}
386 
387 	void SetTo(page_num_t pageIndex, swap_addr_t slotIndex, bool newSlot)
388 	{
389 		fPageIndex = pageIndex;
390 		fSlotIndex = slotIndex;
391 		fNewSlot = newSlot;
392 	}
393 
394 	virtual void IOFinished(status_t status, bool partialTransfer,
395 		generic_size_t bytesTransferred)
396 	{
397 		if (fNewSlot) {
398 			if (status == B_OK) {
399 				fCache->_SwapBlockBuild(fPageIndex, fSlotIndex, 1);
400 			} else {
401 				AutoLocker<VMCache> locker(fCache);
402 				fCache->fAllocatedSwapSize -= B_PAGE_SIZE;
403 				locker.Unlock();
404 
405 				swap_slot_dealloc(fSlotIndex, 1);
406 			}
407 		}
408 
409 		fNextCallback->IOFinished(status, partialTransfer, bytesTransferred);
410 
411 		delete this;
412 	}
413 
414 private:
415 	VMAnonymousCache*	fCache;
416 	page_num_t			fPageIndex;
417 	swap_addr_t			fSlotIndex;
418 	bool				fNewSlot;
419 };
420 
421 
422 // #pragma mark -
423 
424 
425 VMAnonymousCache::~VMAnonymousCache()
426 {
427 	// free allocated swap space and swap block
428 	for (off_t offset = virtual_base, toFree = fAllocatedSwapSize;
429 			offset < virtual_end && toFree > 0; offset += B_PAGE_SIZE) {
430 		swap_addr_t slotIndex = _SwapBlockGetAddress(offset >> PAGE_SHIFT);
431 		if (slotIndex == SWAP_SLOT_NONE)
432 			continue;
433 
434 		swap_slot_dealloc(slotIndex, 1);
435 		_SwapBlockFree(offset >> PAGE_SHIFT, 1);
436 		toFree -= B_PAGE_SIZE;
437 	}
438 
439 	swap_space_unreserve(fCommittedSwapSize);
440 	if (committed_size > fCommittedSwapSize)
441 		vm_unreserve_memory(committed_size - fCommittedSwapSize);
442 }
443 
444 
445 status_t
446 VMAnonymousCache::Init(bool canOvercommit, int32 numPrecommittedPages,
447 	int32 numGuardPages, uint32 allocationFlags)
448 {
449 	TRACE("%p->VMAnonymousCache::Init(canOvercommit = %s, "
450 		"numPrecommittedPages = %ld, numGuardPages = %ld)\n", this,
451 		canOvercommit ? "yes" : "no", numPrecommittedPages, numGuardPages);
452 
453 	status_t error = VMCache::Init(CACHE_TYPE_RAM, allocationFlags);
454 	if (error != B_OK)
455 		return error;
456 
457 	fCanOvercommit = canOvercommit;
458 	fHasPrecommitted = false;
459 	fPrecommittedPages = min_c(numPrecommittedPages, 255);
460 	fGuardedSize = numGuardPages * B_PAGE_SIZE;
461 	fCommittedSwapSize = 0;
462 	fAllocatedSwapSize = 0;
463 
464 	return B_OK;
465 }
466 
467 
468 status_t
469 VMAnonymousCache::Resize(off_t newSize, int priority)
470 {
471 	// If the cache size shrinks, drop all swap pages beyond the new size.
472 	if (fAllocatedSwapSize > 0) {
473 		page_num_t oldPageCount = (virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT;
474 		swap_block* swapBlock = NULL;
475 
476 		for (page_num_t pageIndex = (newSize + B_PAGE_SIZE - 1) >> PAGE_SHIFT;
477 				pageIndex < oldPageCount && fAllocatedSwapSize > 0;
478 				pageIndex++) {
479 			WriteLocker locker(sSwapHashLock);
480 
481 			// Get the swap slot index for the page.
482 			swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
483 			if (swapBlock == NULL || blockIndex == 0) {
484 				swap_hash_key key = { this, pageIndex };
485 				swapBlock = sSwapHashTable.Lookup(key);
486 
487 				if (swapBlock == NULL) {
488 					pageIndex = ROUNDUP(pageIndex + 1, SWAP_BLOCK_PAGES);
489 					continue;
490 				}
491 			}
492 
493 			swap_addr_t slotIndex = swapBlock->swap_slots[blockIndex];
494 			vm_page* page;
495 			if (slotIndex != SWAP_SLOT_NONE
496 				&& ((page = LookupPage((off_t)pageIndex * B_PAGE_SIZE)) == NULL
497 					|| !page->busy)) {
498 					// TODO: We skip (i.e. leak) swap space of busy pages, since
499 					// there could be I/O going on (paging in/out). Waiting is
500 					// not an option as 1. unlocking the cache means that new
501 					// swap pages could be added in a range we've already
502 					// cleared (since the cache still has the old size) and 2.
503 					// we'd risk a deadlock in case we come from the file cache
504 					// and the FS holds the node's write-lock. We should mark
505 					// the page invalid and let the one responsible clean up.
506 					// There's just no such mechanism yet.
507 				swap_slot_dealloc(slotIndex, 1);
508 				fAllocatedSwapSize -= B_PAGE_SIZE;
509 
510 				swapBlock->swap_slots[blockIndex] = SWAP_SLOT_NONE;
511 				if (--swapBlock->used == 0) {
512 					// All swap pages have been freed -- we can discard the swap
513 					// block.
514 					sSwapHashTable.RemoveUnchecked(swapBlock);
515 					object_cache_free(sSwapBlockCache, swapBlock,
516 						CACHE_DONT_WAIT_FOR_MEMORY
517 							| CACHE_DONT_LOCK_KERNEL_SPACE);
518 				}
519 			}
520 		}
521 	}
522 
523 	return VMCache::Resize(newSize, priority);
524 }
525 
526 
527 status_t
528 VMAnonymousCache::Commit(off_t size, int priority)
529 {
530 	TRACE("%p->VMAnonymousCache::Commit(%lld)\n", this, size);
531 
532 	// If we can overcommit, we don't commit here, but in Fault(). We always
533 	// unreserve memory, if we're asked to shrink our commitment, though.
534 	if (fCanOvercommit && size > committed_size) {
535 		if (fHasPrecommitted)
536 			return B_OK;
537 
538 		// pre-commit some pages to make a later failure less probable
539 		fHasPrecommitted = true;
540 		uint32 precommitted = fPrecommittedPages * B_PAGE_SIZE;
541 		if (size > precommitted)
542 			size = precommitted;
543 	}
544 
545 	return _Commit(size, priority);
546 }
547 
548 
549 bool
550 VMAnonymousCache::HasPage(off_t offset)
551 {
552 	if (_SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE)
553 		return true;
554 
555 	return false;
556 }
557 
558 
559 bool
560 VMAnonymousCache::DebugHasPage(off_t offset)
561 {
562 	page_num_t pageIndex = offset >> PAGE_SHIFT;
563 	swap_hash_key key = { this, pageIndex };
564 	swap_block* swap = sSwapHashTable.Lookup(key);
565 	if (swap == NULL)
566 		return false;
567 
568 	return swap->swap_slots[pageIndex & SWAP_BLOCK_MASK] != SWAP_SLOT_NONE;
569 }
570 
571 
572 status_t
573 VMAnonymousCache::Read(off_t offset, const generic_io_vec* vecs, size_t count,
574 	uint32 flags, generic_size_t* _numBytes)
575 {
576 	off_t pageIndex = offset >> PAGE_SHIFT;
577 
578 	for (uint32 i = 0, j = 0; i < count; i = j) {
579 		swap_addr_t startSlotIndex = _SwapBlockGetAddress(pageIndex + i);
580 		for (j = i + 1; j < count; j++) {
581 			swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + j);
582 			if (slotIndex != startSlotIndex + j - i)
583 				break;
584 		}
585 
586 		T(ReadPage(this, pageIndex, startSlotIndex));
587 			// TODO: Assumes that only one page is read.
588 
589 		swap_file* swapFile = find_swap_file(startSlotIndex);
590 
591 		off_t pos = (off_t)(startSlotIndex - swapFile->first_slot)
592 			* B_PAGE_SIZE;
593 
594 		status_t status = vfs_read_pages(swapFile->vnode, swapFile->cookie, pos,
595 			vecs + i, j - i, flags, _numBytes);
596 		if (status != B_OK)
597 			return status;
598 	}
599 
600 	return B_OK;
601 }
602 
603 
604 status_t
605 VMAnonymousCache::Write(off_t offset, const generic_io_vec* vecs, size_t count,
606 	uint32 flags, generic_size_t* _numBytes)
607 {
608 	off_t pageIndex = offset >> PAGE_SHIFT;
609 
610 	AutoLocker<VMCache> locker(this);
611 
612 	page_num_t totalPages = 0;
613 	for (uint32 i = 0; i < count; i++) {
614 		page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT;
615 		swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + totalPages);
616 		if (slotIndex != SWAP_SLOT_NONE) {
617 			swap_slot_dealloc(slotIndex, pageCount);
618 			_SwapBlockFree(pageIndex + totalPages, pageCount);
619 			fAllocatedSwapSize -= pageCount * B_PAGE_SIZE;
620 		}
621 
622 		totalPages += pageCount;
623 	}
624 
625 	off_t totalSize = totalPages * B_PAGE_SIZE;
626 	if (fAllocatedSwapSize + totalSize > fCommittedSwapSize)
627 		return B_ERROR;
628 
629 	fAllocatedSwapSize += totalSize;
630 	locker.Unlock();
631 
632 	page_num_t pagesLeft = totalPages;
633 	totalPages = 0;
634 
635 	for (uint32 i = 0; i < count; i++) {
636 		page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT;
637 
638 		generic_addr_t vectorBase = vecs[i].base;
639 		generic_size_t vectorLength = vecs[i].length;
640 		page_num_t n = pageCount;
641 
642 		for (page_num_t j = 0; j < pageCount; j += n) {
643 			swap_addr_t slotIndex;
644 			// try to allocate n slots, if fail, try to allocate n/2
645 			while ((slotIndex = swap_slot_alloc(n)) == SWAP_SLOT_NONE && n >= 2)
646 				n >>= 1;
647 
648 			if (slotIndex == SWAP_SLOT_NONE)
649 				panic("VMAnonymousCache::Write(): can't allocate swap space\n");
650 
651 			T(WritePage(this, pageIndex, slotIndex));
652 				// TODO: Assumes that only one page is written.
653 
654 			swap_file* swapFile = find_swap_file(slotIndex);
655 
656 			off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE;
657 
658 			generic_size_t length = (phys_addr_t)n * B_PAGE_SIZE;
659 			generic_io_vec vector[1];
660 			vector->base = vectorBase;
661 			vector->length = length;
662 
663 			status_t status = vfs_write_pages(swapFile->vnode, swapFile->cookie,
664 				pos, vector, 1, flags, &length);
665 			if (status != B_OK) {
666 				locker.Lock();
667 				fAllocatedSwapSize -= (off_t)pagesLeft * B_PAGE_SIZE;
668 				locker.Unlock();
669 
670 				swap_slot_dealloc(slotIndex, n);
671 				return status;
672 			}
673 
674 			_SwapBlockBuild(pageIndex + totalPages, slotIndex, n);
675 			pagesLeft -= n;
676 
677 			if (n != pageCount) {
678 				vectorBase = vectorBase + n * B_PAGE_SIZE;
679 				vectorLength -= n * B_PAGE_SIZE;
680 			}
681 		}
682 
683 		totalPages += pageCount;
684 	}
685 
686 	ASSERT(pagesLeft == 0);
687 	return B_OK;
688 }
689 
690 
691 status_t
692 VMAnonymousCache::WriteAsync(off_t offset, const generic_io_vec* vecs,
693 	size_t count, generic_size_t numBytes, uint32 flags,
694 	AsyncIOCallback* _callback)
695 {
696 	// TODO: Currently this method is only used for single pages. Either make
697 	// more flexible use of it or change the interface!
698 	// This implementation relies on the current usage!
699 	ASSERT(count == 1);
700 	ASSERT(numBytes <= B_PAGE_SIZE);
701 
702 	page_num_t pageIndex = offset >> PAGE_SHIFT;
703 	swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex);
704 	bool newSlot = slotIndex == SWAP_SLOT_NONE;
705 
706 	// If the page doesn't have any swap space yet, allocate it.
707 	if (newSlot) {
708 		AutoLocker<VMCache> locker(this);
709 		if (fAllocatedSwapSize + B_PAGE_SIZE > fCommittedSwapSize) {
710 			_callback->IOFinished(B_ERROR, true, 0);
711 			return B_ERROR;
712 		}
713 
714 		fAllocatedSwapSize += B_PAGE_SIZE;
715 
716 		slotIndex = swap_slot_alloc(1);
717 	}
718 
719 	// create our callback
720 	WriteCallback* callback = (flags & B_VIP_IO_REQUEST) != 0
721  		? new(malloc_flags(HEAP_PRIORITY_VIP)) WriteCallback(this, _callback)
722 		: new(std::nothrow) WriteCallback(this, _callback);
723 	if (callback == NULL) {
724 		if (newSlot) {
725 			AutoLocker<VMCache> locker(this);
726 			fAllocatedSwapSize -= B_PAGE_SIZE;
727 			locker.Unlock();
728 
729 			swap_slot_dealloc(slotIndex, 1);
730 		}
731 		_callback->IOFinished(B_NO_MEMORY, true, 0);
732 		return B_NO_MEMORY;
733 	}
734 // TODO: If the page already had swap space assigned, we don't need an own
735 // callback.
736 
737 	callback->SetTo(pageIndex, slotIndex, newSlot);
738 
739 	T(WritePage(this, pageIndex, slotIndex));
740 
741 	// write the page asynchrounously
742 	swap_file* swapFile = find_swap_file(slotIndex);
743 	off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE;
744 
745 	return vfs_asynchronous_write_pages(swapFile->vnode, swapFile->cookie, pos,
746 		vecs, 1, numBytes, flags, callback);
747 }
748 
749 
750 bool
751 VMAnonymousCache::CanWritePage(off_t offset)
752 {
753 	// We can write the page, if we have not used all of our committed swap
754 	// space or the page already has a swap slot assigned.
755 	return fAllocatedSwapSize < fCommittedSwapSize
756 		|| _SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE;
757 }
758 
759 
760 int32
761 VMAnonymousCache::MaxPagesPerAsyncWrite() const
762 {
763 	return 1;
764 }
765 
766 
767 status_t
768 VMAnonymousCache::Fault(struct VMAddressSpace* aspace, off_t offset)
769 {
770 	if (fCanOvercommit && LookupPage(offset) == NULL && !HasPage(offset)) {
771 		if (fGuardedSize > 0) {
772 			uint32 guardOffset;
773 
774 #ifdef STACK_GROWS_DOWNWARDS
775 			guardOffset = 0;
776 #elif defined(STACK_GROWS_UPWARDS)
777 			guardOffset = virtual_size - fGuardedSize;
778 #else
779 #	error Stack direction has not been defined in arch_config.h
780 #endif
781 
782 			// report stack fault, guard page hit!
783 			if (offset >= guardOffset && offset < guardOffset + fGuardedSize) {
784 				TRACE(("stack overflow!\n"));
785 				return B_BAD_ADDRESS;
786 			}
787 		}
788 
789 		if (fPrecommittedPages == 0) {
790 			// never commit more than needed
791 			if (committed_size / B_PAGE_SIZE > page_count)
792 				return B_BAD_HANDLER;
793 
794 			// try to commit additional swap space/memory
795 			if (swap_space_reserve(B_PAGE_SIZE) == B_PAGE_SIZE) {
796 				fCommittedSwapSize += B_PAGE_SIZE;
797 			} else {
798 				int priority = aspace == VMAddressSpace::Kernel()
799 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
800 				if (vm_try_reserve_memory(B_PAGE_SIZE, priority, 0) != B_OK) {
801 					dprintf("%p->VMAnonymousCache::Fault(): Failed to reserve "
802 						"%d bytes of RAM.\n", this, (int)B_PAGE_SIZE);
803 					return B_NO_MEMORY;
804 				}
805 			}
806 
807 			committed_size += B_PAGE_SIZE;
808 		} else
809 			fPrecommittedPages--;
810 	}
811 
812 	// This will cause vm_soft_fault() to handle the fault
813 	return B_BAD_HANDLER;
814 }
815 
816 
817 void
818 VMAnonymousCache::Merge(VMCache* _source)
819 {
820 	VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source);
821 	if (source == NULL) {
822 		panic("VMAnonymousCache::MergeStore(): merge with incompatible cache "
823 			"%p requested", _source);
824 		return;
825 	}
826 
827 	// take over the source' committed size
828 	fCommittedSwapSize += source->fCommittedSwapSize;
829 	source->fCommittedSwapSize = 0;
830 	committed_size += source->committed_size;
831 	source->committed_size = 0;
832 
833 	off_t actualSize = virtual_end - virtual_base;
834 	if (committed_size > actualSize)
835 		_Commit(actualSize, VM_PRIORITY_USER);
836 
837 	// Move all not shadowed swap pages from the source to the consumer cache.
838 	// Also remove all source pages that are shadowed by consumer swap pages.
839 	_MergeSwapPages(source);
840 
841 	// Move all not shadowed pages from the source to the consumer cache.
842 	if (source->page_count < page_count)
843 		_MergePagesSmallerSource(source);
844 	else
845 		_MergePagesSmallerConsumer(source);
846 }
847 
848 
849 void
850 VMAnonymousCache::_SwapBlockBuild(off_t startPageIndex,
851 	swap_addr_t startSlotIndex, uint32 count)
852 {
853 	WriteLocker locker(sSwapHashLock);
854 
855 	uint32 left = count;
856 	for (uint32 i = 0, j = 0; i < count; i += j) {
857 		off_t pageIndex = startPageIndex + i;
858 		swap_addr_t slotIndex = startSlotIndex + i;
859 
860 		swap_hash_key key = { this, pageIndex };
861 
862 		swap_block* swap = sSwapHashTable.Lookup(key);
863 		while (swap == NULL) {
864 			swap = (swap_block*)object_cache_alloc(sSwapBlockCache,
865 				CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE);
866 			if (swap == NULL) {
867 				// Wait a short time until memory is available again.
868 				locker.Unlock();
869 				snooze(10000);
870 				locker.Lock();
871 				swap = sSwapHashTable.Lookup(key);
872 				continue;
873 			}
874 
875 			swap->key.cache = this;
876 			swap->key.page_index = pageIndex & ~(off_t)SWAP_BLOCK_MASK;
877 			swap->used = 0;
878 			for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++)
879 				swap->swap_slots[i] = SWAP_SLOT_NONE;
880 
881 			sSwapHashTable.InsertUnchecked(swap);
882 		}
883 
884 		swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
885 		for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) {
886 			swap->swap_slots[blockIndex++] = slotIndex + j;
887 			left--;
888 		}
889 
890 		swap->used += j;
891 	}
892 }
893 
894 
895 void
896 VMAnonymousCache::_SwapBlockFree(off_t startPageIndex, uint32 count)
897 {
898 	WriteLocker locker(sSwapHashLock);
899 
900 	uint32 left = count;
901 	for (uint32 i = 0, j = 0; i < count; i += j) {
902 		off_t pageIndex = startPageIndex + i;
903 		swap_hash_key key = { this, pageIndex };
904 		swap_block* swap = sSwapHashTable.Lookup(key);
905 
906 		ASSERT(swap != NULL);
907 
908 		swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
909 		for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) {
910 			swap->swap_slots[blockIndex++] = SWAP_SLOT_NONE;
911 			left--;
912 		}
913 
914 		swap->used -= j;
915 		if (swap->used == 0) {
916 			sSwapHashTable.RemoveUnchecked(swap);
917 			object_cache_free(sSwapBlockCache, swap,
918 				CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE);
919 		}
920 	}
921 }
922 
923 
924 swap_addr_t
925 VMAnonymousCache::_SwapBlockGetAddress(off_t pageIndex)
926 {
927 	ReadLocker locker(sSwapHashLock);
928 
929 	swap_hash_key key = { this, pageIndex };
930 	swap_block* swap = sSwapHashTable.Lookup(key);
931 	swap_addr_t slotIndex = SWAP_SLOT_NONE;
932 
933 	if (swap != NULL) {
934 		swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
935 		slotIndex = swap->swap_slots[blockIndex];
936 	}
937 
938 	return slotIndex;
939 }
940 
941 
942 status_t
943 VMAnonymousCache::_Commit(off_t size, int priority)
944 {
945 	TRACE("%p->VMAnonymousCache::_Commit(%lld), already committed: %lld "
946 		"(%lld swap)\n", this, size, committed_size, fCommittedSwapSize);
947 
948 	// Basic strategy: reserve swap space first, only when running out of swap
949 	// space, reserve real memory.
950 
951 	off_t committedMemory = committed_size - fCommittedSwapSize;
952 
953 	// Regardless of whether we're asked to grow or shrink the commitment,
954 	// we always try to reserve as much as possible of the final commitment
955 	// in the swap space.
956 	if (size > fCommittedSwapSize) {
957 		fCommittedSwapSize += swap_space_reserve(size - fCommittedSwapSize);
958 		committed_size = fCommittedSwapSize + committedMemory;
959 		if (size > fCommittedSwapSize) {
960 			TRACE("%p->VMAnonymousCache::_Commit(%lld), reserved only %lld "
961 				"swap\n", this, size, fCommittedSwapSize);
962 		}
963 	}
964 
965 	if (committed_size == size)
966 		return B_OK;
967 
968 	if (committed_size > size) {
969 		// The commitment shrinks -- unreserve real memory first.
970 		off_t toUnreserve = committed_size - size;
971 		if (committedMemory > 0) {
972 			off_t unreserved = min_c(toUnreserve, committedMemory);
973 			vm_unreserve_memory(unreserved);
974 			committedMemory -= unreserved;
975 			committed_size -= unreserved;
976 			toUnreserve -= unreserved;
977 		}
978 
979 		// Unreserve swap space.
980 		if (toUnreserve > 0) {
981 			swap_space_unreserve(toUnreserve);
982 			fCommittedSwapSize -= toUnreserve;
983 			committed_size -= toUnreserve;
984 		}
985 
986 		return B_OK;
987 	}
988 
989 	// The commitment grows -- we have already tried to reserve swap space at
990 	// the start of the method, so we try to reserve real memory, now.
991 
992 	off_t toReserve = size - committed_size;
993 	if (vm_try_reserve_memory(toReserve, priority, 1000000) != B_OK) {
994 		dprintf("%p->VMAnonymousCache::_Commit(%lld): Failed to reserve %lld "
995 			"bytes of RAM\n", this, size, toReserve);
996 		return B_NO_MEMORY;
997 	}
998 
999 	committed_size = size;
1000 	return B_OK;
1001 }
1002 
1003 
1004 void
1005 VMAnonymousCache::_MergePagesSmallerSource(VMAnonymousCache* source)
1006 {
1007 	// The source cache has less pages than the consumer (this cache), so we
1008 	// iterate through the source's pages and move the ones that are not
1009 	// shadowed up to the consumer.
1010 
1011 	for (VMCachePagesTree::Iterator it = source->pages.GetIterator();
1012 			vm_page* page = it.Next();) {
1013 		// Note: Removing the current node while iterating through a
1014 		// IteratableSplayTree is safe.
1015 		vm_page* consumerPage = LookupPage(
1016 			(off_t)page->cache_offset << PAGE_SHIFT);
1017 		if (consumerPage == NULL) {
1018 			// the page is not yet in the consumer cache - move it upwards
1019 			ASSERT_PRINT(!page->busy, "page: %p", page);
1020 			MovePage(page);
1021 		}
1022 	}
1023 }
1024 
1025 
1026 void
1027 VMAnonymousCache::_MergePagesSmallerConsumer(VMAnonymousCache* source)
1028 {
1029 	// The consumer (this cache) has less pages than the source, so we move the
1030 	// consumer's pages to the source (freeing shadowed ones) and finally just
1031 	// all pages of the source back to the consumer.
1032 
1033 	for (VMCachePagesTree::Iterator it = pages.GetIterator();
1034 			vm_page* page = it.Next();) {
1035 		// If a source page is in the way, remove and free it.
1036 		vm_page* sourcePage = source->LookupPage(
1037 			(off_t)page->cache_offset << PAGE_SHIFT);
1038 		if (sourcePage != NULL) {
1039 			DEBUG_PAGE_ACCESS_START(sourcePage);
1040 			ASSERT_PRINT(!sourcePage->busy, "page: %p", sourcePage);
1041 			source->RemovePage(sourcePage);
1042 			vm_page_free(source, sourcePage);
1043 		}
1044 
1045 		// Note: Removing the current node while iterating through a
1046 		// IteratableSplayTree is safe.
1047 		source->MovePage(page);
1048 	}
1049 
1050 	MoveAllPages(source);
1051 }
1052 
1053 
1054 void
1055 VMAnonymousCache::_MergeSwapPages(VMAnonymousCache* source)
1056 {
1057 	// If neither source nor consumer have swap pages, we don't have to do
1058 	// anything.
1059 	if (source->fAllocatedSwapSize == 0 && fAllocatedSwapSize == 0)
1060 		return;
1061 
1062 	for (off_t offset = source->virtual_base
1063 				& ~(off_t)(B_PAGE_SIZE * SWAP_BLOCK_PAGES - 1);
1064 			offset < source->virtual_end;
1065 			offset += B_PAGE_SIZE * SWAP_BLOCK_PAGES) {
1066 
1067 		WriteLocker locker(sSwapHashLock);
1068 
1069 		page_num_t swapBlockPageIndex = offset >> PAGE_SHIFT;
1070 		swap_hash_key key = { source, swapBlockPageIndex };
1071 		swap_block* sourceSwapBlock = sSwapHashTable.Lookup(key);
1072 
1073 		// remove the source swap block -- we will either take over the swap
1074 		// space (and the block) or free it
1075 		if (sourceSwapBlock != NULL)
1076 			sSwapHashTable.RemoveUnchecked(sourceSwapBlock);
1077 
1078 		key.cache = this;
1079 		swap_block* swapBlock = sSwapHashTable.Lookup(key);
1080 
1081 		locker.Unlock();
1082 
1083 		// remove all source pages that are shadowed by consumer swap pages
1084 		if (swapBlock != NULL) {
1085 			for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) {
1086 				if (swapBlock->swap_slots[i] != SWAP_SLOT_NONE) {
1087 					vm_page* page = source->LookupPage(
1088 						(off_t)(swapBlockPageIndex + i) << PAGE_SHIFT);
1089 					if (page != NULL) {
1090 						DEBUG_PAGE_ACCESS_START(page);
1091 						ASSERT_PRINT(!page->busy, "page: %p", page);
1092 						source->RemovePage(page);
1093 						vm_page_free(source, page);
1094 					}
1095 				}
1096 			}
1097 		}
1098 
1099 		if (sourceSwapBlock == NULL)
1100 			continue;
1101 
1102 		for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) {
1103 			off_t pageIndex = swapBlockPageIndex + i;
1104 			swap_addr_t sourceSlotIndex = sourceSwapBlock->swap_slots[i];
1105 
1106 			if (sourceSlotIndex == SWAP_SLOT_NONE)
1107 				continue;
1108 
1109 			if ((swapBlock != NULL
1110 					&& swapBlock->swap_slots[i] != SWAP_SLOT_NONE)
1111 				|| LookupPage((off_t)pageIndex << PAGE_SHIFT) != NULL) {
1112 				// The consumer already has a page or a swapped out page
1113 				// at this index. So we can free the source swap space.
1114 				swap_slot_dealloc(sourceSlotIndex, 1);
1115 				sourceSwapBlock->swap_slots[i] = SWAP_SLOT_NONE;
1116 				sourceSwapBlock->used--;
1117 			}
1118 
1119 			// We've either freed the source swap page or are going to move it
1120 			// to the consumer. At any rate, the source cache doesn't own it
1121 			// anymore.
1122 			source->fAllocatedSwapSize -= B_PAGE_SIZE;
1123 		}
1124 
1125 		// All source swap pages that have not been freed yet are taken over by
1126 		// the consumer.
1127 		fAllocatedSwapSize += B_PAGE_SIZE * (off_t)sourceSwapBlock->used;
1128 
1129 		if (sourceSwapBlock->used == 0) {
1130 			// All swap pages have been freed -- we can discard the source swap
1131 			// block.
1132 			object_cache_free(sSwapBlockCache, sourceSwapBlock,
1133 				CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE);
1134 		} else if (swapBlock == NULL) {
1135 			// We need to take over some of the source's swap pages and there's
1136 			// no swap block in the consumer cache. Just take over the source
1137 			// swap block.
1138 			sourceSwapBlock->key.cache = this;
1139 			locker.Lock();
1140 			sSwapHashTable.InsertUnchecked(sourceSwapBlock);
1141 			locker.Unlock();
1142 		} else {
1143 			// We need to take over some of the source's swap pages and there's
1144 			// already a swap block in the consumer cache. Copy the respective
1145 			// swap addresses and discard the source swap block.
1146 			for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) {
1147 				if (sourceSwapBlock->swap_slots[i] != SWAP_SLOT_NONE)
1148 					swapBlock->swap_slots[i] = sourceSwapBlock->swap_slots[i];
1149 			}
1150 
1151 			object_cache_free(sSwapBlockCache, sourceSwapBlock,
1152 				CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE);
1153 		}
1154 	}
1155 }
1156 
1157 
1158 // #pragma mark -
1159 
1160 
1161 status_t
1162 swap_file_add(const char* path)
1163 {
1164 	// open the file
1165 	int fd = open(path, O_RDWR | O_NOCACHE, S_IRUSR | S_IWUSR);
1166 	if (fd < 0)
1167 		return errno;
1168 
1169 	// fstat() it and check whether we can use it
1170 	struct stat st;
1171 	if (fstat(fd, &st) < 0) {
1172 		close(fd);
1173 		return errno;
1174 	}
1175 
1176 	if (!(S_ISREG(st.st_mode) || S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
1177 		close(fd);
1178 		return B_BAD_VALUE;
1179 	}
1180 
1181 	if (st.st_size < B_PAGE_SIZE) {
1182 		close(fd);
1183 		return B_BAD_VALUE;
1184 	}
1185 
1186 	// get file descriptor, vnode, and cookie
1187 	file_descriptor* descriptor = get_fd(get_current_io_context(true), fd);
1188 	put_fd(descriptor);
1189 
1190 	vnode* node = fd_vnode(descriptor);
1191 	if (node == NULL) {
1192 		close(fd);
1193 		return B_BAD_VALUE;
1194 	}
1195 
1196 	// do the allocations and prepare the swap_file structure
1197 	swap_file* swap = (swap_file*)malloc(sizeof(swap_file));
1198 	if (swap == NULL) {
1199 		close(fd);
1200 		return B_NO_MEMORY;
1201 	}
1202 
1203 	swap->fd = fd;
1204 	swap->vnode = node;
1205 	swap->cookie = descriptor->cookie;
1206 
1207 	uint32 pageCount = st.st_size >> PAGE_SHIFT;
1208 	swap->bmp = radix_bitmap_create(pageCount);
1209 	if (swap->bmp == NULL) {
1210 		free(swap);
1211 		close(fd);
1212 		return B_NO_MEMORY;
1213 	}
1214 
1215 	// set slot index and add this file to swap file list
1216 	mutex_lock(&sSwapFileListLock);
1217 	// TODO: Also check whether the swap file is already registered!
1218 	if (sSwapFileList.IsEmpty()) {
1219 		swap->first_slot = 0;
1220 		swap->last_slot = pageCount;
1221 	} else {
1222 		// leave one page gap between two swap files
1223 		swap->first_slot = sSwapFileList.Last()->last_slot + 1;
1224 		swap->last_slot = swap->first_slot + pageCount;
1225 	}
1226 	sSwapFileList.Add(swap);
1227 	sSwapFileCount++;
1228 	mutex_unlock(&sSwapFileListLock);
1229 
1230 	mutex_lock(&sAvailSwapSpaceLock);
1231 	sAvailSwapSpace += (off_t)pageCount * B_PAGE_SIZE;
1232 	mutex_unlock(&sAvailSwapSpaceLock);
1233 
1234 	return B_OK;
1235 }
1236 
1237 
1238 status_t
1239 swap_file_delete(const char* path)
1240 {
1241 	vnode* node = NULL;
1242 	status_t status = vfs_get_vnode_from_path(path, true, &node);
1243 	if (status != B_OK)
1244 		return status;
1245 
1246 	MutexLocker locker(sSwapFileListLock);
1247 
1248 	swap_file* swapFile = NULL;
1249 	for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
1250 			(swapFile = it.Next()) != NULL;) {
1251 		if (swapFile->vnode == node)
1252 			break;
1253 	}
1254 
1255 	vfs_put_vnode(node);
1256 
1257 	if (swapFile == NULL)
1258 		return B_ERROR;
1259 
1260 	// if this file is currently used, we can't delete
1261 	// TODO: mark this swap file deleting, and remove it after releasing
1262 	// all the swap space
1263 	if (swapFile->bmp->free_slots < swapFile->last_slot - swapFile->first_slot)
1264 		return B_ERROR;
1265 
1266 	sSwapFileList.Remove(swapFile);
1267 	sSwapFileCount--;
1268 	locker.Unlock();
1269 
1270 	mutex_lock(&sAvailSwapSpaceLock);
1271 	sAvailSwapSpace -= (off_t)(swapFile->last_slot - swapFile->first_slot)
1272 		* PAGE_SIZE;
1273 	mutex_unlock(&sAvailSwapSpaceLock);
1274 
1275 	close(swapFile->fd);
1276 	radix_bitmap_destroy(swapFile->bmp);
1277 	free(swapFile);
1278 
1279 	return B_OK;
1280 }
1281 
1282 
1283 void
1284 swap_init(void)
1285 {
1286 	// create swap block cache
1287 	sSwapBlockCache = create_object_cache("swapblock",
1288 			sizeof(swap_block), sizeof(void*), NULL, NULL, NULL);
1289 	if (sSwapBlockCache == NULL)
1290 		panic("swap_init(): can't create object cache for swap blocks\n");
1291 
1292 	status_t error = object_cache_set_minimum_reserve(sSwapBlockCache,
1293 		MIN_SWAP_BLOCK_RESERVE);
1294 	if (error != B_OK) {
1295 		panic("swap_init(): object_cache_set_minimum_reserve() failed: %s",
1296 			strerror(error));
1297 	}
1298 
1299 	// init swap hash table
1300 	sSwapHashTable.Init(INITIAL_SWAP_HASH_SIZE);
1301 	rw_lock_init(&sSwapHashLock, "swaphash");
1302 
1303 	error = register_resource_resizer(swap_hash_resizer, NULL,
1304 		SWAP_HASH_RESIZE_INTERVAL);
1305 	if (error != B_OK) {
1306 		panic("swap_init(): Failed to register swap hash resizer: %s",
1307 			strerror(error));
1308 	}
1309 
1310 	// init swap file list
1311 	mutex_init(&sSwapFileListLock, "swaplist");
1312 	sSwapFileAlloc = NULL;
1313 	sSwapFileCount = 0;
1314 
1315 	// init available swap space
1316 	mutex_init(&sAvailSwapSpaceLock, "avail swap space");
1317 	sAvailSwapSpace = 0;
1318 
1319 	add_debugger_command_etc("swap", &dump_swap_info,
1320 		"Print infos about the swap usage",
1321 		"\n"
1322 		"Print infos about the swap usage.\n", 0);
1323 }
1324 
1325 
1326 void
1327 swap_init_post_modules()
1328 {
1329 	// Never try to create a swap file on a read-only device - when booting
1330 	// from CD, the write overlay is used.
1331 	if (gReadOnlyBootDevice)
1332 		return;
1333 
1334 	off_t size = 0;
1335 
1336 	void* settings = load_driver_settings("virtual_memory");
1337 	if (settings != NULL) {
1338 		if (!get_driver_boolean_parameter(settings, "vm", false, false))
1339 			return;
1340 
1341 		const char* string = get_driver_parameter(settings, "swap_size", NULL,
1342 			NULL);
1343 		size = string ? atoll(string) : 0;
1344 
1345 		unload_driver_settings(settings);
1346 	} else
1347 		size = (off_t)vm_page_num_pages() * B_PAGE_SIZE * 2;
1348 
1349 	if (size < B_PAGE_SIZE)
1350 		return;
1351 
1352 	int fd = open("/var/swap", O_RDWR | O_CREAT | O_NOCACHE, S_IRUSR | S_IWUSR);
1353 	if (fd < 0) {
1354 		dprintf("Can't open/create /var/swap: %s\n", strerror(errno));
1355 		return;
1356 	}
1357 
1358 	struct stat stat;
1359 	stat.st_size = size;
1360 	status_t error = _kern_write_stat(fd, NULL, false, &stat,
1361 		sizeof(struct stat), B_STAT_SIZE | B_STAT_SIZE_INSECURE);
1362 	if (error != B_OK) {
1363 		dprintf("Failed to resize /var/swap to %lld bytes: %s\n", size,
1364 			strerror(error));
1365 	}
1366 
1367 	close(fd);
1368 
1369 	error = swap_file_add("/var/swap");
1370 	if (error != B_OK)
1371 		dprintf("Failed to add swap file /var/swap: %s\n", strerror(error));
1372 }
1373 
1374 
1375 //! Used by page daemon to free swap space.
1376 bool
1377 swap_free_page_swap_space(vm_page* page)
1378 {
1379 	VMAnonymousCache* cache = dynamic_cast<VMAnonymousCache*>(page->Cache());
1380 	if (cache == NULL)
1381 		return false;
1382 
1383 	swap_addr_t slotIndex = cache->_SwapBlockGetAddress(page->cache_offset);
1384 	if (slotIndex == SWAP_SLOT_NONE)
1385 		return false;
1386 
1387 	swap_slot_dealloc(slotIndex, 1);
1388 	cache->fAllocatedSwapSize -= B_PAGE_SIZE;
1389 	cache->_SwapBlockFree(page->cache_offset, 1);
1390 
1391   	return true;
1392 }
1393 
1394 
1395 uint32
1396 swap_available_pages()
1397 {
1398 	mutex_lock(&sAvailSwapSpaceLock);
1399 	uint32 avail = sAvailSwapSpace >> PAGE_SHIFT;
1400 	mutex_unlock(&sAvailSwapSpaceLock);
1401 
1402 	return avail;
1403 }
1404 
1405 
1406 uint32
1407 swap_total_swap_pages()
1408 {
1409 	mutex_lock(&sSwapFileListLock);
1410 
1411 	uint32 totalSwapSlots = 0;
1412 	for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
1413 			swap_file* swapFile = it.Next();)
1414 		totalSwapSlots += swapFile->last_slot - swapFile->first_slot;
1415 
1416 	mutex_unlock(&sSwapFileListLock);
1417 
1418 	return totalSwapSlots;
1419 }
1420 
1421 #endif	// ENABLE_SWAP_SUPPORT
1422 
1423 void
1424 swap_get_info(struct system_memory_info* info)
1425 {
1426 #if ENABLE_SWAP_SUPPORT
1427 	info->max_swap_space = (uint64)swap_total_swap_pages() * B_PAGE_SIZE;
1428 	info->free_swap_space = (uint64)swap_available_pages() * B_PAGE_SIZE;
1429 #else
1430 	info->max_swap_space = 0;
1431 	info->free_swap_space = 0;
1432 #endif
1433 }
1434 
1435