xref: /haiku/src/system/kernel/vm/VMAnonymousCache.cpp (revision 865a0be1c022cdd7d155ecb3a44ca2769bce4f60)
1 /*
2  * Copyright 2008, Zhao Shuai, upczhsh@163.com.
3  * Copyright 2008-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
4  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
5  * Distributed under the terms of the MIT License.
6  *
7  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
8  * Distributed under the terms of the NewOS License.
9  */
10 
11 
12 #include "VMAnonymousCache.h"
13 
14 #include <errno.h>
15 #include <fcntl.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <unistd.h>
19 
20 #include <KernelExport.h>
21 #include <NodeMonitor.h>
22 
23 #include <arch_config.h>
24 #include <boot_device.h>
25 #include <driver_settings.h>
26 #include <fs/fd.h>
27 #include <fs_interface.h>
28 #include <heap.h>
29 #include <kernel_daemon.h>
30 #include <slab/Slab.h>
31 #include <syscalls.h>
32 #include <system_info.h>
33 #include <tracing.h>
34 #include <util/AutoLock.h>
35 #include <util/DoublyLinkedList.h>
36 #include <util/OpenHashTable.h>
37 #include <util/RadixBitmap.h>
38 #include <vfs.h>
39 #include <vm/vm.h>
40 #include <vm/vm_page.h>
41 #include <vm/vm_priv.h>
42 #include <vm/VMAddressSpace.h>
43 
44 #include "IORequest.h"
45 
46 
47 #if	ENABLE_SWAP_SUPPORT
48 
49 //#define TRACE_VM_ANONYMOUS_CACHE
50 #ifdef TRACE_VM_ANONYMOUS_CACHE
51 #	define TRACE(x...) dprintf(x)
52 #else
53 #	define TRACE(x...) do { } while (false)
54 #endif
55 
56 
57 // number of free swap blocks the object cache shall minimally have
58 #define MIN_SWAP_BLOCK_RESERVE	4096
59 
60 // interval the has resizer is triggered (in 0.1s)
61 #define SWAP_HASH_RESIZE_INTERVAL	5
62 
63 #define INITIAL_SWAP_HASH_SIZE		1024
64 
65 #define SWAP_SLOT_NONE	RADIX_SLOT_NONE
66 
67 #define SWAP_BLOCK_PAGES 32
68 #define SWAP_BLOCK_SHIFT 5		/* 1 << SWAP_BLOCK_SHIFT == SWAP_BLOCK_PAGES */
69 #define SWAP_BLOCK_MASK  (SWAP_BLOCK_PAGES - 1)
70 
71 
72 struct swap_file : DoublyLinkedListLinkImpl<swap_file> {
73 	int				fd;
74 	struct vnode*	vnode;
75 	void*			cookie;
76 	swap_addr_t		first_slot;
77 	swap_addr_t		last_slot;
78 	radix_bitmap*	bmp;
79 };
80 
81 struct swap_hash_key {
82 	VMAnonymousCache	*cache;
83 	off_t				page_index;  // page index in the cache
84 };
85 
86 // Each swap block contains swap address information for
87 // SWAP_BLOCK_PAGES continuous pages from the same cache
88 struct swap_block {
89 	swap_block*		hash_link;
90 	swap_hash_key	key;
91 	uint32			used;
92 	swap_addr_t		swap_slots[SWAP_BLOCK_PAGES];
93 };
94 
95 struct SwapHashTableDefinition {
96 	typedef swap_hash_key KeyType;
97 	typedef swap_block ValueType;
98 
99 	SwapHashTableDefinition() {}
100 
101 	size_t HashKey(const swap_hash_key& key) const
102 	{
103 		off_t blockIndex = key.page_index >> SWAP_BLOCK_SHIFT;
104 		VMAnonymousCache* cache = key.cache;
105 		return blockIndex ^ (size_t)(int*)cache;
106 	}
107 
108 	size_t Hash(const swap_block* value) const
109 	{
110 		return HashKey(value->key);
111 	}
112 
113 	bool Compare(const swap_hash_key& key, const swap_block* value) const
114 	{
115 		return (key.page_index & ~(off_t)SWAP_BLOCK_MASK)
116 				== (value->key.page_index & ~(off_t)SWAP_BLOCK_MASK)
117 			&& key.cache == value->key.cache;
118 	}
119 
120 	swap_block*& GetLink(swap_block* value) const
121 	{
122 		return value->hash_link;
123 	}
124 };
125 
126 typedef BOpenHashTable<SwapHashTableDefinition> SwapHashTable;
127 typedef DoublyLinkedList<swap_file> SwapFileList;
128 
129 static SwapHashTable sSwapHashTable;
130 static rw_lock sSwapHashLock;
131 
132 static SwapFileList sSwapFileList;
133 static mutex sSwapFileListLock;
134 static swap_file* sSwapFileAlloc = NULL; // allocate from here
135 static uint32 sSwapFileCount = 0;
136 
137 static off_t sAvailSwapSpace = 0;
138 static mutex sAvailSwapSpaceLock;
139 
140 static object_cache* sSwapBlockCache;
141 
142 
143 #if SWAP_TRACING
144 namespace SwapTracing {
145 
146 class SwapTraceEntry : public AbstractTraceEntry {
147 public:
148 	SwapTraceEntry(VMAnonymousCache* cache)
149 		:
150 		fCache(cache)
151 	{
152 	}
153 
154 protected:
155 	VMAnonymousCache*	fCache;
156 };
157 
158 
159 class ReadPage : public SwapTraceEntry {
160 public:
161 	ReadPage(VMAnonymousCache* cache, page_num_t pageIndex,
162 			swap_addr_t swapSlotIndex)
163 		:
164 		SwapTraceEntry(cache),
165 		fPageIndex(pageIndex),
166 		fSwapSlotIndex(swapSlotIndex)
167 	{
168 		Initialized();
169 	}
170 
171 	virtual void AddDump(TraceOutput& out)
172 	{
173 		out.Print("swap read:  cache %p, page index: %lu <- swap slot: %lu",
174 			fCache, fPageIndex, fSwapSlotIndex);
175 	}
176 
177 private:
178 	page_num_t		fPageIndex;
179 	swap_addr_t		fSwapSlotIndex;
180 };
181 
182 
183 class WritePage : public SwapTraceEntry {
184 public:
185 	WritePage(VMAnonymousCache* cache, page_num_t pageIndex,
186 			swap_addr_t swapSlotIndex)
187 		:
188 		SwapTraceEntry(cache),
189 		fPageIndex(pageIndex),
190 		fSwapSlotIndex(swapSlotIndex)
191 	{
192 		Initialized();
193 	}
194 
195 	virtual void AddDump(TraceOutput& out)
196 	{
197 		out.Print("swap write: cache %p, page index: %lu -> swap slot: %lu",
198 			fCache, fPageIndex, fSwapSlotIndex);
199 	}
200 
201 private:
202 	page_num_t		fPageIndex;
203 	swap_addr_t		fSwapSlotIndex;
204 };
205 
206 }	// namespace SwapTracing
207 
208 #	define T(x) new(std::nothrow) SwapTracing::x;
209 #else
210 #	define T(x) ;
211 #endif
212 
213 
214 static int
215 dump_swap_info(int argc, char** argv)
216 {
217 	swap_addr_t totalSwapPages = 0;
218 	swap_addr_t freeSwapPages = 0;
219 
220 	kprintf("swap files:\n");
221 
222 	for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
223 			swap_file* file = it.Next();) {
224 		swap_addr_t total = file->last_slot - file->first_slot;
225 		kprintf("  vnode: %p, pages: total: %lu, free: %lu\n",
226 			file->vnode, total, file->bmp->free_slots);
227 
228 		totalSwapPages += total;
229 		freeSwapPages += file->bmp->free_slots;
230 	}
231 
232 	kprintf("\n");
233 	kprintf("swap space in pages:\n");
234 	kprintf("total:     %9lu\n", totalSwapPages);
235 	kprintf("available: %9llu\n", sAvailSwapSpace / B_PAGE_SIZE);
236 	kprintf("reserved:  %9llu\n",
237 		totalSwapPages - sAvailSwapSpace / B_PAGE_SIZE);
238 	kprintf("used:      %9lu\n", totalSwapPages - freeSwapPages);
239 	kprintf("free:      %9lu\n", freeSwapPages);
240 
241 	return 0;
242 }
243 
244 
245 static swap_addr_t
246 swap_slot_alloc(uint32 count)
247 {
248 	mutex_lock(&sSwapFileListLock);
249 
250 	if (sSwapFileList.IsEmpty()) {
251 		mutex_unlock(&sSwapFileListLock);
252 		panic("swap_slot_alloc(): no swap file in the system\n");
253 		return SWAP_SLOT_NONE;
254 	}
255 
256 	// since radix bitmap could not handle more than 32 pages, we return
257 	// SWAP_SLOT_NONE, this forces Write() adjust allocation amount
258 	if (count > BITMAP_RADIX) {
259 		mutex_unlock(&sSwapFileListLock);
260 		return SWAP_SLOT_NONE;
261 	}
262 
263 	swap_addr_t j, addr = SWAP_SLOT_NONE;
264 	for (j = 0; j < sSwapFileCount; j++) {
265 		if (sSwapFileAlloc == NULL)
266 			sSwapFileAlloc = sSwapFileList.First();
267 
268 		addr = radix_bitmap_alloc(sSwapFileAlloc->bmp, count);
269 		if (addr != SWAP_SLOT_NONE) {
270 			addr += sSwapFileAlloc->first_slot;
271 			break;
272 		}
273 
274 		// this swap_file is full, find another
275 		sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc);
276 	}
277 
278 	if (j == sSwapFileCount) {
279 		mutex_unlock(&sSwapFileListLock);
280 		panic("swap_slot_alloc: swap space exhausted!\n");
281 		return SWAP_SLOT_NONE;
282 	}
283 
284 	// if this swap file has used more than 90% percent of its space
285 	// switch to another
286     if (sSwapFileAlloc->bmp->free_slots
287 			< (sSwapFileAlloc->last_slot - sSwapFileAlloc->first_slot) / 10)
288 		sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc);
289 
290 	mutex_unlock(&sSwapFileListLock);
291 
292 	return addr;
293 }
294 
295 
296 static swap_file*
297 find_swap_file(swap_addr_t slotIndex)
298 {
299 	for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
300 			swap_file* swapFile = it.Next();) {
301 		if (slotIndex >= swapFile->first_slot
302 				&& slotIndex < swapFile->last_slot)
303 			return swapFile;
304 	}
305 
306 	panic("find_swap_file(): can't find swap file for slot %ld\n", slotIndex);
307 	return NULL;
308 }
309 
310 
311 static void
312 swap_slot_dealloc(swap_addr_t slotIndex, uint32 count)
313 {
314 	if (slotIndex == SWAP_SLOT_NONE)
315 		return;
316 
317 	mutex_lock(&sSwapFileListLock);
318 	swap_file* swapFile = find_swap_file(slotIndex);
319 	slotIndex -= swapFile->first_slot;
320 	radix_bitmap_dealloc(swapFile->bmp, slotIndex, count);
321 	mutex_unlock(&sSwapFileListLock);
322 }
323 
324 
325 static off_t
326 swap_space_reserve(off_t amount)
327 {
328 	mutex_lock(&sAvailSwapSpaceLock);
329 	if (sAvailSwapSpace >= amount)
330 		sAvailSwapSpace -= amount;
331 	else {
332 		amount = sAvailSwapSpace;
333 		sAvailSwapSpace = 0;
334 	}
335 	mutex_unlock(&sAvailSwapSpaceLock);
336 
337 	return amount;
338 }
339 
340 
341 static void
342 swap_space_unreserve(off_t amount)
343 {
344 	mutex_lock(&sAvailSwapSpaceLock);
345 	sAvailSwapSpace += amount;
346 	mutex_unlock(&sAvailSwapSpaceLock);
347 }
348 
349 
350 static void
351 swap_hash_resizer(void*, int)
352 {
353 	WriteLocker locker(sSwapHashLock);
354 
355 	size_t size;
356 	void* allocation;
357 
358 	do {
359 		size = sSwapHashTable.ResizeNeeded();
360 		if (size == 0)
361 			return;
362 
363 		locker.Unlock();
364 
365 		allocation = malloc(size);
366 		if (allocation == NULL)
367 			return;
368 
369 		locker.Lock();
370 
371 	} while (!sSwapHashTable.Resize(allocation, size));
372 }
373 
374 
375 // #pragma mark -
376 
377 
378 class VMAnonymousCache::WriteCallback : public StackableAsyncIOCallback {
379 public:
380 	WriteCallback(VMAnonymousCache* cache, AsyncIOCallback* callback)
381 		:
382 		StackableAsyncIOCallback(callback),
383 		fCache(cache)
384 	{
385 	}
386 
387 	void SetTo(page_num_t pageIndex, swap_addr_t slotIndex, bool newSlot)
388 	{
389 		fPageIndex = pageIndex;
390 		fSlotIndex = slotIndex;
391 		fNewSlot = newSlot;
392 	}
393 
394 	virtual void IOFinished(status_t status, bool partialTransfer,
395 		generic_size_t bytesTransferred)
396 	{
397 		if (fNewSlot) {
398 			if (status == B_OK) {
399 				fCache->_SwapBlockBuild(fPageIndex, fSlotIndex, 1);
400 			} else {
401 				AutoLocker<VMCache> locker(fCache);
402 				fCache->fAllocatedSwapSize -= B_PAGE_SIZE;
403 				locker.Unlock();
404 
405 				swap_slot_dealloc(fSlotIndex, 1);
406 			}
407 		}
408 
409 		fNextCallback->IOFinished(status, partialTransfer, bytesTransferred);
410 
411 		delete this;
412 	}
413 
414 private:
415 	VMAnonymousCache*	fCache;
416 	page_num_t			fPageIndex;
417 	swap_addr_t			fSlotIndex;
418 	bool				fNewSlot;
419 };
420 
421 
422 // #pragma mark -
423 
424 
425 VMAnonymousCache::~VMAnonymousCache()
426 {
427 	// free allocated swap space and swap block
428 	for (off_t offset = virtual_base, toFree = fAllocatedSwapSize;
429 			offset < virtual_end && toFree > 0; offset += B_PAGE_SIZE) {
430 		swap_addr_t slotIndex = _SwapBlockGetAddress(offset >> PAGE_SHIFT);
431 		if (slotIndex == SWAP_SLOT_NONE)
432 			continue;
433 
434 		swap_slot_dealloc(slotIndex, 1);
435 		_SwapBlockFree(offset >> PAGE_SHIFT, 1);
436 		toFree -= B_PAGE_SIZE;
437 	}
438 
439 	swap_space_unreserve(fCommittedSwapSize);
440 	if (committed_size > fCommittedSwapSize)
441 		vm_unreserve_memory(committed_size - fCommittedSwapSize);
442 }
443 
444 
445 status_t
446 VMAnonymousCache::Init(bool canOvercommit, int32 numPrecommittedPages,
447 	int32 numGuardPages, uint32 allocationFlags)
448 {
449 	TRACE("%p->VMAnonymousCache::Init(canOvercommit = %s, "
450 		"numPrecommittedPages = %ld, numGuardPages = %ld)\n", this,
451 		canOvercommit ? "yes" : "no", numPrecommittedPages, numGuardPages);
452 
453 	status_t error = VMCache::Init(CACHE_TYPE_RAM, allocationFlags);
454 	if (error != B_OK)
455 		return error;
456 
457 	fCanOvercommit = canOvercommit;
458 	fHasPrecommitted = false;
459 	fPrecommittedPages = min_c(numPrecommittedPages, 255);
460 	fGuardedSize = numGuardPages * B_PAGE_SIZE;
461 	fCommittedSwapSize = 0;
462 	fAllocatedSwapSize = 0;
463 
464 	return B_OK;
465 }
466 
467 
468 status_t
469 VMAnonymousCache::Resize(off_t newSize, int priority)
470 {
471 	// If the cache size shrinks, drop all swap pages beyond the new size.
472 	if (fAllocatedSwapSize > 0) {
473 		page_num_t oldPageCount = (virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT;
474 		swap_block* swapBlock = NULL;
475 
476 		for (page_num_t pageIndex = (newSize + B_PAGE_SIZE - 1) >> PAGE_SHIFT;
477 				pageIndex < oldPageCount && fAllocatedSwapSize > 0;
478 				pageIndex++) {
479 			WriteLocker locker(sSwapHashLock);
480 
481 			// Get the swap slot index for the page.
482 			swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
483 			if (swapBlock == NULL || blockIndex == 0) {
484 				swap_hash_key key = { this, pageIndex };
485 				swapBlock = sSwapHashTable.Lookup(key);
486 
487 				if (swapBlock == NULL) {
488 					pageIndex = ROUNDUP(pageIndex + 1, SWAP_BLOCK_PAGES);
489 					continue;
490 				}
491 			}
492 
493 			swap_addr_t slotIndex = swapBlock->swap_slots[blockIndex];
494 			vm_page* page;
495 			if (slotIndex != SWAP_SLOT_NONE
496 				&& ((page = LookupPage((off_t)pageIndex * B_PAGE_SIZE)) == NULL
497 					|| !page->busy)) {
498 					// TODO: We skip (i.e. leak) swap space of busy pages, since
499 					// there could be I/O going on (paging in/out). Waiting is
500 					// not an option as 1. unlocking the cache means that new
501 					// swap pages could be added in a range we've already
502 					// cleared (since the cache still has the old size) and 2.
503 					// we'd risk a deadlock in case we come from the file cache
504 					// and the FS holds the node's write-lock. We should mark
505 					// the page invalid and let the one responsible clean up.
506 					// There's just no such mechanism yet.
507 				swap_slot_dealloc(slotIndex, 1);
508 				fAllocatedSwapSize -= B_PAGE_SIZE;
509 
510 				swapBlock->swap_slots[blockIndex] = SWAP_SLOT_NONE;
511 				if (--swapBlock->used == 0) {
512 					// All swap pages have been freed -- we can discard the swap
513 					// block.
514 					sSwapHashTable.RemoveUnchecked(swapBlock);
515 					object_cache_free(sSwapBlockCache, swapBlock,
516 						CACHE_DONT_WAIT_FOR_MEMORY
517 							| CACHE_DONT_LOCK_KERNEL_SPACE);
518 				}
519 			}
520 		}
521 	}
522 
523 	return VMCache::Resize(newSize, priority);
524 }
525 
526 
527 status_t
528 VMAnonymousCache::Commit(off_t size, int priority)
529 {
530 	TRACE("%p->VMAnonymousCache::Commit(%lld)\n", this, size);
531 
532 	// If we can overcommit, we don't commit here, but in Fault(). We always
533 	// unreserve memory, if we're asked to shrink our commitment, though.
534 	if (fCanOvercommit && size > committed_size) {
535 		if (fHasPrecommitted)
536 			return B_OK;
537 
538 		// pre-commit some pages to make a later failure less probable
539 		fHasPrecommitted = true;
540 		uint32 precommitted = fPrecommittedPages * B_PAGE_SIZE;
541 		if (size > precommitted)
542 			size = precommitted;
543 	}
544 
545 	return _Commit(size, priority);
546 }
547 
548 
549 bool
550 VMAnonymousCache::HasPage(off_t offset)
551 {
552 	if (_SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE)
553 		return true;
554 
555 	return false;
556 }
557 
558 
559 bool
560 VMAnonymousCache::DebugHasPage(off_t offset)
561 {
562 	page_num_t pageIndex = offset >> PAGE_SHIFT;
563 	swap_hash_key key = { this, pageIndex };
564 	swap_block* swap = sSwapHashTable.Lookup(key);
565 	if (swap == NULL)
566 		return false;
567 
568 	return swap->swap_slots[pageIndex & SWAP_BLOCK_MASK] != SWAP_SLOT_NONE;
569 }
570 
571 
572 status_t
573 VMAnonymousCache::Read(off_t offset, const generic_io_vec* vecs, size_t count,
574 	uint32 flags, generic_size_t* _numBytes)
575 {
576 	off_t pageIndex = offset >> PAGE_SHIFT;
577 
578 	for (uint32 i = 0, j = 0; i < count; i = j) {
579 		swap_addr_t startSlotIndex = _SwapBlockGetAddress(pageIndex + i);
580 		for (j = i + 1; j < count; j++) {
581 			swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + j);
582 			if (slotIndex != startSlotIndex + j - i)
583 				break;
584 		}
585 
586 		T(ReadPage(this, pageIndex, startSlotIndex));
587 			// TODO: Assumes that only one page is read.
588 
589 		swap_file* swapFile = find_swap_file(startSlotIndex);
590 
591 		off_t pos = (off_t)(startSlotIndex - swapFile->first_slot)
592 			* B_PAGE_SIZE;
593 
594 		status_t status = vfs_read_pages(swapFile->vnode, swapFile->cookie, pos,
595 			vecs + i, j - i, flags, _numBytes);
596 		if (status != B_OK)
597 			return status;
598 	}
599 
600 	return B_OK;
601 }
602 
603 
604 status_t
605 VMAnonymousCache::Write(off_t offset, const generic_io_vec* vecs, size_t count,
606 	uint32 flags, generic_size_t* _numBytes)
607 {
608 	off_t pageIndex = offset >> PAGE_SHIFT;
609 
610 	AutoLocker<VMCache> locker(this);
611 
612 	page_num_t totalPages = 0;
613 	for (uint32 i = 0; i < count; i++) {
614 		page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT;
615 		swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + totalPages);
616 		if (slotIndex != SWAP_SLOT_NONE) {
617 			swap_slot_dealloc(slotIndex, pageCount);
618 			_SwapBlockFree(pageIndex + totalPages, pageCount);
619 			fAllocatedSwapSize -= pageCount * B_PAGE_SIZE;
620 		}
621 
622 		totalPages += pageCount;
623 	}
624 
625 	off_t totalSize = totalPages * B_PAGE_SIZE;
626 	if (fAllocatedSwapSize + totalSize > fCommittedSwapSize)
627 		return B_ERROR;
628 
629 	fAllocatedSwapSize += totalSize;
630 	locker.Unlock();
631 
632 	page_num_t pagesLeft = totalPages;
633 	totalPages = 0;
634 
635 	for (uint32 i = 0; i < count; i++) {
636 		page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT;
637 
638 		generic_addr_t vectorBase = vecs[i].base;
639 		generic_size_t vectorLength = vecs[i].length;
640 		page_num_t n = pageCount;
641 
642 		for (page_num_t j = 0; j < pageCount; j += n) {
643 			swap_addr_t slotIndex;
644 			// try to allocate n slots, if fail, try to allocate n/2
645 			while ((slotIndex = swap_slot_alloc(n)) == SWAP_SLOT_NONE && n >= 2)
646 				n >>= 1;
647 
648 			if (slotIndex == SWAP_SLOT_NONE)
649 				panic("VMAnonymousCache::Write(): can't allocate swap space\n");
650 
651 			T(WritePage(this, pageIndex, slotIndex));
652 				// TODO: Assumes that only one page is written.
653 
654 			swap_file* swapFile = find_swap_file(slotIndex);
655 
656 			off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE;
657 
658 			generic_size_t length = (phys_addr_t)n * B_PAGE_SIZE;
659 			generic_io_vec vector[1];
660 			vector->base = vectorBase;
661 			vector->length = length;
662 
663 			status_t status = vfs_write_pages(swapFile->vnode, swapFile->cookie,
664 				pos, vector, 1, flags, &length);
665 			if (status != B_OK) {
666 				locker.Lock();
667 				fAllocatedSwapSize -= (off_t)pagesLeft * B_PAGE_SIZE;
668 				locker.Unlock();
669 
670 				swap_slot_dealloc(slotIndex, n);
671 				return status;
672 			}
673 
674 			_SwapBlockBuild(pageIndex + totalPages, slotIndex, n);
675 			pagesLeft -= n;
676 
677 			if (n != pageCount) {
678 				vectorBase = vectorBase + n * B_PAGE_SIZE;
679 				vectorLength -= n * B_PAGE_SIZE;
680 			}
681 		}
682 
683 		totalPages += pageCount;
684 	}
685 
686 	ASSERT(pagesLeft == 0);
687 	return B_OK;
688 }
689 
690 
691 status_t
692 VMAnonymousCache::WriteAsync(off_t offset, const generic_io_vec* vecs,
693 	size_t count, generic_size_t numBytes, uint32 flags,
694 	AsyncIOCallback* _callback)
695 {
696 	// TODO: Currently this method is only used for single pages. Either make
697 	// more flexible use of it or change the interface!
698 	// This implementation relies on the current usage!
699 	ASSERT(count == 1);
700 	ASSERT(numBytes <= B_PAGE_SIZE);
701 
702 	page_num_t pageIndex = offset >> PAGE_SHIFT;
703 	swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex);
704 	bool newSlot = slotIndex == SWAP_SLOT_NONE;
705 
706 	// If the page doesn't have any swap space yet, allocate it.
707 	if (newSlot) {
708 		AutoLocker<VMCache> locker(this);
709 		if (fAllocatedSwapSize + B_PAGE_SIZE > fCommittedSwapSize) {
710 			_callback->IOFinished(B_ERROR, true, 0);
711 			return B_ERROR;
712 		}
713 
714 		fAllocatedSwapSize += B_PAGE_SIZE;
715 
716 		slotIndex = swap_slot_alloc(1);
717 	}
718 
719 	// create our callback
720 	WriteCallback* callback = (flags & B_VIP_IO_REQUEST) != 0
721  		? new(malloc_flags(HEAP_PRIORITY_VIP)) WriteCallback(this, _callback)
722 		: new(std::nothrow) WriteCallback(this, _callback);
723 	if (callback == NULL) {
724 		if (newSlot) {
725 			AutoLocker<VMCache> locker(this);
726 			fAllocatedSwapSize -= B_PAGE_SIZE;
727 			locker.Unlock();
728 
729 			swap_slot_dealloc(slotIndex, 1);
730 		}
731 		_callback->IOFinished(B_NO_MEMORY, true, 0);
732 		return B_NO_MEMORY;
733 	}
734 // TODO: If the page already had swap space assigned, we don't need an own
735 // callback.
736 
737 	callback->SetTo(pageIndex, slotIndex, newSlot);
738 
739 	T(WritePage(this, pageIndex, slotIndex));
740 
741 	// write the page asynchrounously
742 	swap_file* swapFile = find_swap_file(slotIndex);
743 	off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE;
744 
745 	return vfs_asynchronous_write_pages(swapFile->vnode, swapFile->cookie, pos,
746 		vecs, 1, numBytes, flags, callback);
747 }
748 
749 
750 bool
751 VMAnonymousCache::CanWritePage(off_t offset)
752 {
753 	// We can write the page, if we have not used all of our committed swap
754 	// space or the page already has a swap slot assigned.
755 	return fAllocatedSwapSize < fCommittedSwapSize
756 		|| _SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE;
757 }
758 
759 
760 int32
761 VMAnonymousCache::MaxPagesPerAsyncWrite() const
762 {
763 	return 1;
764 }
765 
766 
767 status_t
768 VMAnonymousCache::Fault(struct VMAddressSpace* aspace, off_t offset)
769 {
770 	if (fCanOvercommit && LookupPage(offset) == NULL && !HasPage(offset)) {
771 		if (fGuardedSize > 0) {
772 			uint32 guardOffset;
773 
774 #ifdef STACK_GROWS_DOWNWARDS
775 			guardOffset = 0;
776 #elif defined(STACK_GROWS_UPWARDS)
777 			guardOffset = virtual_size - fGuardedSize;
778 #else
779 #	error Stack direction has not been defined in arch_config.h
780 #endif
781 
782 			// report stack fault, guard page hit!
783 			if (offset >= guardOffset && offset < guardOffset + fGuardedSize) {
784 				TRACE(("stack overflow!\n"));
785 				return B_BAD_ADDRESS;
786 			}
787 		}
788 
789 		if (fPrecommittedPages == 0) {
790 			// never commit more than needed
791 			if (committed_size / B_PAGE_SIZE > page_count)
792 				return B_BAD_HANDLER;
793 
794 			// try to commit additional swap space/memory
795 			if (swap_space_reserve(B_PAGE_SIZE) == B_PAGE_SIZE) {
796 				fCommittedSwapSize += B_PAGE_SIZE;
797 			} else {
798 				int priority = aspace == VMAddressSpace::Kernel()
799 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
800 				if (vm_try_reserve_memory(B_PAGE_SIZE, priority, 0) != B_OK) {
801 					dprintf("%p->VMAnonymousCache::Fault(): Failed to reserve "
802 						"%d bytes of RAM.\n", this, (int)B_PAGE_SIZE);
803 					return B_NO_MEMORY;
804 				}
805 			}
806 
807 			committed_size += B_PAGE_SIZE;
808 		} else
809 			fPrecommittedPages--;
810 	}
811 
812 	// This will cause vm_soft_fault() to handle the fault
813 	return B_BAD_HANDLER;
814 }
815 
816 
817 void
818 VMAnonymousCache::Merge(VMCache* _source)
819 {
820 	VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source);
821 	if (source == NULL) {
822 		panic("VMAnonymousCache::MergeStore(): merge with incompatible cache "
823 			"%p requested", _source);
824 		return;
825 	}
826 
827 	// take over the source' committed size
828 	fCommittedSwapSize += source->fCommittedSwapSize;
829 	source->fCommittedSwapSize = 0;
830 	committed_size += source->committed_size;
831 	source->committed_size = 0;
832 
833 	off_t actualSize = virtual_end - virtual_base;
834 	if (committed_size > actualSize)
835 		_Commit(actualSize, VM_PRIORITY_USER);
836 
837 	// Move all not shadowed swap pages from the source to the consumer cache.
838 	// Also remove all source pages that are shadowed by consumer swap pages.
839 	_MergeSwapPages(source);
840 
841 	// Move all not shadowed pages from the source to the consumer cache.
842 	if (source->page_count < page_count)
843 		_MergePagesSmallerSource(source);
844 	else
845 		_MergePagesSmallerConsumer(source);
846 }
847 
848 
849 void
850 VMAnonymousCache::DeleteObject()
851 {
852 	object_cache_delete(gAnonymousCacheObjectCache, this);
853 }
854 
855 
856 void
857 VMAnonymousCache::_SwapBlockBuild(off_t startPageIndex,
858 	swap_addr_t startSlotIndex, uint32 count)
859 {
860 	WriteLocker locker(sSwapHashLock);
861 
862 	uint32 left = count;
863 	for (uint32 i = 0, j = 0; i < count; i += j) {
864 		off_t pageIndex = startPageIndex + i;
865 		swap_addr_t slotIndex = startSlotIndex + i;
866 
867 		swap_hash_key key = { this, pageIndex };
868 
869 		swap_block* swap = sSwapHashTable.Lookup(key);
870 		while (swap == NULL) {
871 			swap = (swap_block*)object_cache_alloc(sSwapBlockCache,
872 				CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE);
873 			if (swap == NULL) {
874 				// Wait a short time until memory is available again.
875 				locker.Unlock();
876 				snooze(10000);
877 				locker.Lock();
878 				swap = sSwapHashTable.Lookup(key);
879 				continue;
880 			}
881 
882 			swap->key.cache = this;
883 			swap->key.page_index = pageIndex & ~(off_t)SWAP_BLOCK_MASK;
884 			swap->used = 0;
885 			for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++)
886 				swap->swap_slots[i] = SWAP_SLOT_NONE;
887 
888 			sSwapHashTable.InsertUnchecked(swap);
889 		}
890 
891 		swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
892 		for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) {
893 			swap->swap_slots[blockIndex++] = slotIndex + j;
894 			left--;
895 		}
896 
897 		swap->used += j;
898 	}
899 }
900 
901 
902 void
903 VMAnonymousCache::_SwapBlockFree(off_t startPageIndex, uint32 count)
904 {
905 	WriteLocker locker(sSwapHashLock);
906 
907 	uint32 left = count;
908 	for (uint32 i = 0, j = 0; i < count; i += j) {
909 		off_t pageIndex = startPageIndex + i;
910 		swap_hash_key key = { this, pageIndex };
911 		swap_block* swap = sSwapHashTable.Lookup(key);
912 
913 		ASSERT(swap != NULL);
914 
915 		swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
916 		for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) {
917 			swap->swap_slots[blockIndex++] = SWAP_SLOT_NONE;
918 			left--;
919 		}
920 
921 		swap->used -= j;
922 		if (swap->used == 0) {
923 			sSwapHashTable.RemoveUnchecked(swap);
924 			object_cache_free(sSwapBlockCache, swap,
925 				CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE);
926 		}
927 	}
928 }
929 
930 
931 swap_addr_t
932 VMAnonymousCache::_SwapBlockGetAddress(off_t pageIndex)
933 {
934 	ReadLocker locker(sSwapHashLock);
935 
936 	swap_hash_key key = { this, pageIndex };
937 	swap_block* swap = sSwapHashTable.Lookup(key);
938 	swap_addr_t slotIndex = SWAP_SLOT_NONE;
939 
940 	if (swap != NULL) {
941 		swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
942 		slotIndex = swap->swap_slots[blockIndex];
943 	}
944 
945 	return slotIndex;
946 }
947 
948 
949 status_t
950 VMAnonymousCache::_Commit(off_t size, int priority)
951 {
952 	TRACE("%p->VMAnonymousCache::_Commit(%lld), already committed: %lld "
953 		"(%lld swap)\n", this, size, committed_size, fCommittedSwapSize);
954 
955 	// Basic strategy: reserve swap space first, only when running out of swap
956 	// space, reserve real memory.
957 
958 	off_t committedMemory = committed_size - fCommittedSwapSize;
959 
960 	// Regardless of whether we're asked to grow or shrink the commitment,
961 	// we always try to reserve as much as possible of the final commitment
962 	// in the swap space.
963 	if (size > fCommittedSwapSize) {
964 		fCommittedSwapSize += swap_space_reserve(size - fCommittedSwapSize);
965 		committed_size = fCommittedSwapSize + committedMemory;
966 		if (size > fCommittedSwapSize) {
967 			TRACE("%p->VMAnonymousCache::_Commit(%lld), reserved only %lld "
968 				"swap\n", this, size, fCommittedSwapSize);
969 		}
970 	}
971 
972 	if (committed_size == size)
973 		return B_OK;
974 
975 	if (committed_size > size) {
976 		// The commitment shrinks -- unreserve real memory first.
977 		off_t toUnreserve = committed_size - size;
978 		if (committedMemory > 0) {
979 			off_t unreserved = min_c(toUnreserve, committedMemory);
980 			vm_unreserve_memory(unreserved);
981 			committedMemory -= unreserved;
982 			committed_size -= unreserved;
983 			toUnreserve -= unreserved;
984 		}
985 
986 		// Unreserve swap space.
987 		if (toUnreserve > 0) {
988 			swap_space_unreserve(toUnreserve);
989 			fCommittedSwapSize -= toUnreserve;
990 			committed_size -= toUnreserve;
991 		}
992 
993 		return B_OK;
994 	}
995 
996 	// The commitment grows -- we have already tried to reserve swap space at
997 	// the start of the method, so we try to reserve real memory, now.
998 
999 	off_t toReserve = size - committed_size;
1000 	if (vm_try_reserve_memory(toReserve, priority, 1000000) != B_OK) {
1001 		dprintf("%p->VMAnonymousCache::_Commit(%lld): Failed to reserve %lld "
1002 			"bytes of RAM\n", this, size, toReserve);
1003 		return B_NO_MEMORY;
1004 	}
1005 
1006 	committed_size = size;
1007 	return B_OK;
1008 }
1009 
1010 
1011 void
1012 VMAnonymousCache::_MergePagesSmallerSource(VMAnonymousCache* source)
1013 {
1014 	// The source cache has less pages than the consumer (this cache), so we
1015 	// iterate through the source's pages and move the ones that are not
1016 	// shadowed up to the consumer.
1017 
1018 	for (VMCachePagesTree::Iterator it = source->pages.GetIterator();
1019 			vm_page* page = it.Next();) {
1020 		// Note: Removing the current node while iterating through a
1021 		// IteratableSplayTree is safe.
1022 		vm_page* consumerPage = LookupPage(
1023 			(off_t)page->cache_offset << PAGE_SHIFT);
1024 		if (consumerPage == NULL) {
1025 			// the page is not yet in the consumer cache - move it upwards
1026 			ASSERT_PRINT(!page->busy, "page: %p", page);
1027 			MovePage(page);
1028 		}
1029 	}
1030 }
1031 
1032 
1033 void
1034 VMAnonymousCache::_MergePagesSmallerConsumer(VMAnonymousCache* source)
1035 {
1036 	// The consumer (this cache) has less pages than the source, so we move the
1037 	// consumer's pages to the source (freeing shadowed ones) and finally just
1038 	// all pages of the source back to the consumer.
1039 
1040 	for (VMCachePagesTree::Iterator it = pages.GetIterator();
1041 			vm_page* page = it.Next();) {
1042 		// If a source page is in the way, remove and free it.
1043 		vm_page* sourcePage = source->LookupPage(
1044 			(off_t)page->cache_offset << PAGE_SHIFT);
1045 		if (sourcePage != NULL) {
1046 			DEBUG_PAGE_ACCESS_START(sourcePage);
1047 			ASSERT_PRINT(!sourcePage->busy, "page: %p", sourcePage);
1048 			source->RemovePage(sourcePage);
1049 			vm_page_free(source, sourcePage);
1050 		}
1051 
1052 		// Note: Removing the current node while iterating through a
1053 		// IteratableSplayTree is safe.
1054 		source->MovePage(page);
1055 	}
1056 
1057 	MoveAllPages(source);
1058 }
1059 
1060 
1061 void
1062 VMAnonymousCache::_MergeSwapPages(VMAnonymousCache* source)
1063 {
1064 	// If neither source nor consumer have swap pages, we don't have to do
1065 	// anything.
1066 	if (source->fAllocatedSwapSize == 0 && fAllocatedSwapSize == 0)
1067 		return;
1068 
1069 	for (off_t offset = source->virtual_base
1070 				& ~(off_t)(B_PAGE_SIZE * SWAP_BLOCK_PAGES - 1);
1071 			offset < source->virtual_end;
1072 			offset += B_PAGE_SIZE * SWAP_BLOCK_PAGES) {
1073 
1074 		WriteLocker locker(sSwapHashLock);
1075 
1076 		page_num_t swapBlockPageIndex = offset >> PAGE_SHIFT;
1077 		swap_hash_key key = { source, swapBlockPageIndex };
1078 		swap_block* sourceSwapBlock = sSwapHashTable.Lookup(key);
1079 
1080 		// remove the source swap block -- we will either take over the swap
1081 		// space (and the block) or free it
1082 		if (sourceSwapBlock != NULL)
1083 			sSwapHashTable.RemoveUnchecked(sourceSwapBlock);
1084 
1085 		key.cache = this;
1086 		swap_block* swapBlock = sSwapHashTable.Lookup(key);
1087 
1088 		locker.Unlock();
1089 
1090 		// remove all source pages that are shadowed by consumer swap pages
1091 		if (swapBlock != NULL) {
1092 			for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) {
1093 				if (swapBlock->swap_slots[i] != SWAP_SLOT_NONE) {
1094 					vm_page* page = source->LookupPage(
1095 						(off_t)(swapBlockPageIndex + i) << PAGE_SHIFT);
1096 					if (page != NULL) {
1097 						DEBUG_PAGE_ACCESS_START(page);
1098 						ASSERT_PRINT(!page->busy, "page: %p", page);
1099 						source->RemovePage(page);
1100 						vm_page_free(source, page);
1101 					}
1102 				}
1103 			}
1104 		}
1105 
1106 		if (sourceSwapBlock == NULL)
1107 			continue;
1108 
1109 		for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) {
1110 			off_t pageIndex = swapBlockPageIndex + i;
1111 			swap_addr_t sourceSlotIndex = sourceSwapBlock->swap_slots[i];
1112 
1113 			if (sourceSlotIndex == SWAP_SLOT_NONE)
1114 				continue;
1115 
1116 			if ((swapBlock != NULL
1117 					&& swapBlock->swap_slots[i] != SWAP_SLOT_NONE)
1118 				|| LookupPage((off_t)pageIndex << PAGE_SHIFT) != NULL) {
1119 				// The consumer already has a page or a swapped out page
1120 				// at this index. So we can free the source swap space.
1121 				swap_slot_dealloc(sourceSlotIndex, 1);
1122 				sourceSwapBlock->swap_slots[i] = SWAP_SLOT_NONE;
1123 				sourceSwapBlock->used--;
1124 			}
1125 
1126 			// We've either freed the source swap page or are going to move it
1127 			// to the consumer. At any rate, the source cache doesn't own it
1128 			// anymore.
1129 			source->fAllocatedSwapSize -= B_PAGE_SIZE;
1130 		}
1131 
1132 		// All source swap pages that have not been freed yet are taken over by
1133 		// the consumer.
1134 		fAllocatedSwapSize += B_PAGE_SIZE * (off_t)sourceSwapBlock->used;
1135 
1136 		if (sourceSwapBlock->used == 0) {
1137 			// All swap pages have been freed -- we can discard the source swap
1138 			// block.
1139 			object_cache_free(sSwapBlockCache, sourceSwapBlock,
1140 				CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE);
1141 		} else if (swapBlock == NULL) {
1142 			// We need to take over some of the source's swap pages and there's
1143 			// no swap block in the consumer cache. Just take over the source
1144 			// swap block.
1145 			sourceSwapBlock->key.cache = this;
1146 			locker.Lock();
1147 			sSwapHashTable.InsertUnchecked(sourceSwapBlock);
1148 			locker.Unlock();
1149 		} else {
1150 			// We need to take over some of the source's swap pages and there's
1151 			// already a swap block in the consumer cache. Copy the respective
1152 			// swap addresses and discard the source swap block.
1153 			for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) {
1154 				if (sourceSwapBlock->swap_slots[i] != SWAP_SLOT_NONE)
1155 					swapBlock->swap_slots[i] = sourceSwapBlock->swap_slots[i];
1156 			}
1157 
1158 			object_cache_free(sSwapBlockCache, sourceSwapBlock,
1159 				CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE);
1160 		}
1161 	}
1162 }
1163 
1164 
1165 // #pragma mark -
1166 
1167 
1168 status_t
1169 swap_file_add(const char* path)
1170 {
1171 	// open the file
1172 	int fd = open(path, O_RDWR | O_NOCACHE, S_IRUSR | S_IWUSR);
1173 	if (fd < 0)
1174 		return errno;
1175 
1176 	// fstat() it and check whether we can use it
1177 	struct stat st;
1178 	if (fstat(fd, &st) < 0) {
1179 		close(fd);
1180 		return errno;
1181 	}
1182 
1183 	if (!(S_ISREG(st.st_mode) || S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
1184 		close(fd);
1185 		return B_BAD_VALUE;
1186 	}
1187 
1188 	if (st.st_size < B_PAGE_SIZE) {
1189 		close(fd);
1190 		return B_BAD_VALUE;
1191 	}
1192 
1193 	// get file descriptor, vnode, and cookie
1194 	file_descriptor* descriptor = get_fd(get_current_io_context(true), fd);
1195 	put_fd(descriptor);
1196 
1197 	vnode* node = fd_vnode(descriptor);
1198 	if (node == NULL) {
1199 		close(fd);
1200 		return B_BAD_VALUE;
1201 	}
1202 
1203 	// do the allocations and prepare the swap_file structure
1204 	swap_file* swap = (swap_file*)malloc(sizeof(swap_file));
1205 	if (swap == NULL) {
1206 		close(fd);
1207 		return B_NO_MEMORY;
1208 	}
1209 
1210 	swap->fd = fd;
1211 	swap->vnode = node;
1212 	swap->cookie = descriptor->cookie;
1213 
1214 	uint32 pageCount = st.st_size >> PAGE_SHIFT;
1215 	swap->bmp = radix_bitmap_create(pageCount);
1216 	if (swap->bmp == NULL) {
1217 		free(swap);
1218 		close(fd);
1219 		return B_NO_MEMORY;
1220 	}
1221 
1222 	// set slot index and add this file to swap file list
1223 	mutex_lock(&sSwapFileListLock);
1224 	// TODO: Also check whether the swap file is already registered!
1225 	if (sSwapFileList.IsEmpty()) {
1226 		swap->first_slot = 0;
1227 		swap->last_slot = pageCount;
1228 	} else {
1229 		// leave one page gap between two swap files
1230 		swap->first_slot = sSwapFileList.Last()->last_slot + 1;
1231 		swap->last_slot = swap->first_slot + pageCount;
1232 	}
1233 	sSwapFileList.Add(swap);
1234 	sSwapFileCount++;
1235 	mutex_unlock(&sSwapFileListLock);
1236 
1237 	mutex_lock(&sAvailSwapSpaceLock);
1238 	sAvailSwapSpace += (off_t)pageCount * B_PAGE_SIZE;
1239 	mutex_unlock(&sAvailSwapSpaceLock);
1240 
1241 	return B_OK;
1242 }
1243 
1244 
1245 status_t
1246 swap_file_delete(const char* path)
1247 {
1248 	vnode* node = NULL;
1249 	status_t status = vfs_get_vnode_from_path(path, true, &node);
1250 	if (status != B_OK)
1251 		return status;
1252 
1253 	MutexLocker locker(sSwapFileListLock);
1254 
1255 	swap_file* swapFile = NULL;
1256 	for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
1257 			(swapFile = it.Next()) != NULL;) {
1258 		if (swapFile->vnode == node)
1259 			break;
1260 	}
1261 
1262 	vfs_put_vnode(node);
1263 
1264 	if (swapFile == NULL)
1265 		return B_ERROR;
1266 
1267 	// if this file is currently used, we can't delete
1268 	// TODO: mark this swap file deleting, and remove it after releasing
1269 	// all the swap space
1270 	if (swapFile->bmp->free_slots < swapFile->last_slot - swapFile->first_slot)
1271 		return B_ERROR;
1272 
1273 	sSwapFileList.Remove(swapFile);
1274 	sSwapFileCount--;
1275 	locker.Unlock();
1276 
1277 	mutex_lock(&sAvailSwapSpaceLock);
1278 	sAvailSwapSpace -= (off_t)(swapFile->last_slot - swapFile->first_slot)
1279 		* PAGE_SIZE;
1280 	mutex_unlock(&sAvailSwapSpaceLock);
1281 
1282 	close(swapFile->fd);
1283 	radix_bitmap_destroy(swapFile->bmp);
1284 	free(swapFile);
1285 
1286 	return B_OK;
1287 }
1288 
1289 
1290 void
1291 swap_init(void)
1292 {
1293 	// create swap block cache
1294 	sSwapBlockCache = create_object_cache("swapblock",
1295 			sizeof(swap_block), sizeof(void*), NULL, NULL, NULL);
1296 	if (sSwapBlockCache == NULL)
1297 		panic("swap_init(): can't create object cache for swap blocks\n");
1298 
1299 	status_t error = object_cache_set_minimum_reserve(sSwapBlockCache,
1300 		MIN_SWAP_BLOCK_RESERVE);
1301 	if (error != B_OK) {
1302 		panic("swap_init(): object_cache_set_minimum_reserve() failed: %s",
1303 			strerror(error));
1304 	}
1305 
1306 	// init swap hash table
1307 	sSwapHashTable.Init(INITIAL_SWAP_HASH_SIZE);
1308 	rw_lock_init(&sSwapHashLock, "swaphash");
1309 
1310 	error = register_resource_resizer(swap_hash_resizer, NULL,
1311 		SWAP_HASH_RESIZE_INTERVAL);
1312 	if (error != B_OK) {
1313 		panic("swap_init(): Failed to register swap hash resizer: %s",
1314 			strerror(error));
1315 	}
1316 
1317 	// init swap file list
1318 	mutex_init(&sSwapFileListLock, "swaplist");
1319 	sSwapFileAlloc = NULL;
1320 	sSwapFileCount = 0;
1321 
1322 	// init available swap space
1323 	mutex_init(&sAvailSwapSpaceLock, "avail swap space");
1324 	sAvailSwapSpace = 0;
1325 
1326 	add_debugger_command_etc("swap", &dump_swap_info,
1327 		"Print infos about the swap usage",
1328 		"\n"
1329 		"Print infos about the swap usage.\n", 0);
1330 }
1331 
1332 
1333 void
1334 swap_init_post_modules()
1335 {
1336 	// Never try to create a swap file on a read-only device - when booting
1337 	// from CD, the write overlay is used.
1338 	if (gReadOnlyBootDevice)
1339 		return;
1340 
1341 	off_t size = 0;
1342 
1343 	void* settings = load_driver_settings("virtual_memory");
1344 	if (settings != NULL) {
1345 		if (!get_driver_boolean_parameter(settings, "vm", false, false))
1346 			return;
1347 
1348 		const char* string = get_driver_parameter(settings, "swap_size", NULL,
1349 			NULL);
1350 		size = string ? atoll(string) : 0;
1351 
1352 		unload_driver_settings(settings);
1353 	} else
1354 		size = (off_t)vm_page_num_pages() * B_PAGE_SIZE * 2;
1355 
1356 	if (size < B_PAGE_SIZE)
1357 		return;
1358 
1359 	int fd = open("/var/swap", O_RDWR | O_CREAT | O_NOCACHE, S_IRUSR | S_IWUSR);
1360 	if (fd < 0) {
1361 		dprintf("Can't open/create /var/swap: %s\n", strerror(errno));
1362 		return;
1363 	}
1364 
1365 	struct stat stat;
1366 	stat.st_size = size;
1367 	status_t error = _kern_write_stat(fd, NULL, false, &stat,
1368 		sizeof(struct stat), B_STAT_SIZE | B_STAT_SIZE_INSECURE);
1369 	if (error != B_OK) {
1370 		dprintf("Failed to resize /var/swap to %lld bytes: %s\n", size,
1371 			strerror(error));
1372 	}
1373 
1374 	close(fd);
1375 
1376 	error = swap_file_add("/var/swap");
1377 	if (error != B_OK)
1378 		dprintf("Failed to add swap file /var/swap: %s\n", strerror(error));
1379 }
1380 
1381 
1382 //! Used by page daemon to free swap space.
1383 bool
1384 swap_free_page_swap_space(vm_page* page)
1385 {
1386 	VMAnonymousCache* cache = dynamic_cast<VMAnonymousCache*>(page->Cache());
1387 	if (cache == NULL)
1388 		return false;
1389 
1390 	swap_addr_t slotIndex = cache->_SwapBlockGetAddress(page->cache_offset);
1391 	if (slotIndex == SWAP_SLOT_NONE)
1392 		return false;
1393 
1394 	swap_slot_dealloc(slotIndex, 1);
1395 	cache->fAllocatedSwapSize -= B_PAGE_SIZE;
1396 	cache->_SwapBlockFree(page->cache_offset, 1);
1397 
1398   	return true;
1399 }
1400 
1401 
1402 uint32
1403 swap_available_pages()
1404 {
1405 	mutex_lock(&sAvailSwapSpaceLock);
1406 	uint32 avail = sAvailSwapSpace >> PAGE_SHIFT;
1407 	mutex_unlock(&sAvailSwapSpaceLock);
1408 
1409 	return avail;
1410 }
1411 
1412 
1413 uint32
1414 swap_total_swap_pages()
1415 {
1416 	mutex_lock(&sSwapFileListLock);
1417 
1418 	uint32 totalSwapSlots = 0;
1419 	for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
1420 			swap_file* swapFile = it.Next();)
1421 		totalSwapSlots += swapFile->last_slot - swapFile->first_slot;
1422 
1423 	mutex_unlock(&sSwapFileListLock);
1424 
1425 	return totalSwapSlots;
1426 }
1427 
1428 #endif	// ENABLE_SWAP_SUPPORT
1429 
1430 void
1431 swap_get_info(struct system_memory_info* info)
1432 {
1433 #if ENABLE_SWAP_SUPPORT
1434 	info->max_swap_space = (uint64)swap_total_swap_pages() * B_PAGE_SIZE;
1435 	info->free_swap_space = (uint64)swap_available_pages() * B_PAGE_SIZE;
1436 #else
1437 	info->max_swap_space = 0;
1438 	info->free_swap_space = 0;
1439 #endif
1440 }
1441 
1442