xref: /haiku/src/system/kernel/vm/VMAnonymousCache.cpp (revision 3be9edf8da228afd9fec0390f408c964766122aa)
1 /*
2  * Copyright 2008, Zhao Shuai, upczhsh@163.com.
3  * Copyright 2008-2009, Ingo Weinhold, ingo_weinhold@gmx.de.
4  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
5  * Distributed under the terms of the MIT License.
6  *
7  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
8  * Distributed under the terms of the NewOS License.
9  */
10 
11 #include "VMAnonymousCache.h"
12 
13 #include <errno.h>
14 #include <fcntl.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <unistd.h>
18 
19 #include <KernelExport.h>
20 #include <NodeMonitor.h>
21 
22 #include <arch_config.h>
23 #include <boot_device.h>
24 #include <driver_settings.h>
25 #include <fs/fd.h>
26 #include <fs_interface.h>
27 #include <heap.h>
28 #include <kernel_daemon.h>
29 #include <slab/Slab.h>
30 #include <syscalls.h>
31 #include <system_info.h>
32 #include <tracing.h>
33 #include <util/AutoLock.h>
34 #include <util/DoublyLinkedList.h>
35 #include <util/OpenHashTable.h>
36 #include <util/RadixBitmap.h>
37 #include <vfs.h>
38 #include <vm.h>
39 #include <vm_page.h>
40 #include <vm_priv.h>
41 
42 #include "IORequest.h"
43 
44 
45 #if	ENABLE_SWAP_SUPPORT
46 
47 //#define TRACE_VM_ANONYMOUS_CACHE
48 #ifdef TRACE_VM_ANONYMOUS_CACHE
49 #	define TRACE(x...) dprintf(x)
50 #else
51 #	define TRACE(x...) do { } while (false)
52 #endif
53 
54 
55 // number of free swap blocks the object cache shall minimally have
56 #define MIN_SWAP_BLOCK_RESERVE	4096
57 
58 // interval the has resizer is triggered (in 0.1s)
59 #define SWAP_HASH_RESIZE_INTERVAL	5
60 
61 #define INITIAL_SWAP_HASH_SIZE		1024
62 
63 #define SWAP_BLOCK_PAGES 32
64 #define SWAP_BLOCK_SHIFT 5		/* 1 << SWAP_BLOCK_SHIFT == SWAP_BLOCK_PAGES */
65 #define SWAP_BLOCK_MASK  (SWAP_BLOCK_PAGES - 1)
66 
67 struct swap_file : DoublyLinkedListLinkImpl<swap_file> {
68 	int				fd;
69 	struct vnode	*vnode;
70 	void			*cookie;
71 	swap_addr_t		first_slot;
72 	swap_addr_t		last_slot;
73 	radix_bitmap    *bmp;
74 };
75 
76 struct swap_hash_key {
77 	VMAnonymousCache	*cache;
78 	off_t				page_index;  // page index in the cache
79 };
80 
81 // Each swap block contains swap address information for
82 // SWAP_BLOCK_PAGES continuous pages from the same cache
83 struct swap_block {
84 	swap_block*		hash_link;
85 	swap_hash_key	key;
86 	uint32			used;
87 	swap_addr_t		swap_slots[SWAP_BLOCK_PAGES];
88 };
89 
90 struct SwapHashTableDefinition {
91 	typedef swap_hash_key KeyType;
92 	typedef swap_block ValueType;
93 
94 	SwapHashTableDefinition() {}
95 
96 	size_t HashKey(const swap_hash_key& key) const
97 	{
98 		off_t blockIndex = key.page_index >> SWAP_BLOCK_SHIFT;
99 		VMAnonymousCache *cache = key.cache;
100 		return blockIndex ^ (int)(int *)cache;
101 	}
102 
103 	size_t Hash(const swap_block *value) const
104 	{
105 		return HashKey(value->key);
106 	}
107 
108 	bool Compare(const swap_hash_key& key, const swap_block *value) const
109 	{
110 		return (key.page_index & ~(off_t)SWAP_BLOCK_MASK)
111 				== (value->key.page_index & ~(off_t)SWAP_BLOCK_MASK)
112 			&& key.cache == value->key.cache;
113 	}
114 
115 	swap_block*& GetLink(swap_block *value) const
116 	{
117 		return value->hash_link;
118 	}
119 };
120 
121 typedef BOpenHashTable<SwapHashTableDefinition> SwapHashTable;
122 typedef DoublyLinkedList<swap_file> SwapFileList;
123 
124 static SwapHashTable sSwapHashTable;
125 static rw_lock sSwapHashLock;
126 
127 static SwapFileList sSwapFileList;
128 static mutex sSwapFileListLock;
129 static swap_file *sSwapFileAlloc = NULL; // allocate from here
130 static uint32 sSwapFileCount = 0;
131 
132 static off_t sAvailSwapSpace = 0;
133 static mutex sAvailSwapSpaceLock;
134 
135 static object_cache *sSwapBlockCache;
136 
137 
138 #if SWAP_TRACING
139 namespace SwapTracing {
140 
141 class SwapTraceEntry : public AbstractTraceEntry {
142 public:
143 	SwapTraceEntry(VMAnonymousCache* cache)
144 		:
145 		fCache(cache)
146 	{
147 	}
148 
149 protected:
150 	VMAnonymousCache*	fCache;
151 };
152 
153 
154 class ReadPage : public SwapTraceEntry {
155 public:
156 	ReadPage(VMAnonymousCache* cache, page_num_t pageIndex,
157 			swap_addr_t swapSlotIndex)
158 		:
159 		SwapTraceEntry(cache),
160 		fPageIndex(pageIndex),
161 		fSwapSlotIndex(swapSlotIndex)
162 	{
163 		Initialized();
164 	}
165 
166 	virtual void AddDump(TraceOutput& out)
167 	{
168 		out.Print("swap read:  cache %p, page index: %lu <- swap slot: %lu",
169 			fCache, fPageIndex, fSwapSlotIndex);
170 	}
171 
172 private:
173 	page_num_t		fPageIndex;
174 	swap_addr_t		fSwapSlotIndex;
175 };
176 
177 
178 class WritePage : public SwapTraceEntry {
179 public:
180 	WritePage(VMAnonymousCache* cache, page_num_t pageIndex,
181 			swap_addr_t swapSlotIndex)
182 		:
183 		SwapTraceEntry(cache),
184 		fPageIndex(pageIndex),
185 		fSwapSlotIndex(swapSlotIndex)
186 	{
187 		Initialized();
188 	}
189 
190 	virtual void AddDump(TraceOutput& out)
191 	{
192 		out.Print("swap write: cache %p, page index: %lu -> swap slot: %lu",
193 			fCache, fPageIndex, fSwapSlotIndex);
194 	}
195 
196 private:
197 	page_num_t		fPageIndex;
198 	swap_addr_t		fSwapSlotIndex;
199 };
200 
201 }	// namespace SwapTracing
202 
203 #	define T(x) new(std::nothrow) SwapTracing::x;
204 #else
205 #	define T(x) ;
206 #endif
207 
208 
209 static int
210 dump_swap_info(int argc, char** argv)
211 {
212 	swap_addr_t totalSwapPages = 0;
213 	swap_addr_t freeSwapPages = 0;
214 
215 	kprintf("swap files:\n");
216 
217 	for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
218 			swap_file* file = it.Next();) {
219 		swap_addr_t total = file->last_slot - file->first_slot;
220 		kprintf("  vnode: %p, pages: total: %lu, free: %lu\n",
221 			file->vnode, total, file->bmp->free_slots);
222 
223 		totalSwapPages += total;
224 		freeSwapPages += file->bmp->free_slots;
225 	}
226 
227 	kprintf("\n");
228 	kprintf("swap space in pages:\n");
229 	kprintf("total:     %9lu\n", totalSwapPages);
230 	kprintf("available: %9llu\n", sAvailSwapSpace / B_PAGE_SIZE);
231 	kprintf("reserved:  %9llu\n",
232 		totalSwapPages - sAvailSwapSpace / B_PAGE_SIZE);
233 	kprintf("used:      %9lu\n", totalSwapPages - freeSwapPages);
234 	kprintf("free:      %9lu\n", freeSwapPages);
235 
236 	return 0;
237 }
238 
239 
240 static swap_addr_t
241 swap_slot_alloc(uint32 count)
242 {
243 	mutex_lock(&sSwapFileListLock);
244 
245 	if (sSwapFileList.IsEmpty()) {
246 		mutex_unlock(&sSwapFileListLock);
247 		panic("swap_slot_alloc(): no swap file in the system\n");
248 		return SWAP_SLOT_NONE;
249 	}
250 
251 	// since radix bitmap could not handle more than 32 pages, we return
252 	// SWAP_SLOT_NONE, this forces Write() adjust allocation amount
253 	if (count > BITMAP_RADIX) {
254 		mutex_unlock(&sSwapFileListLock);
255 		return SWAP_SLOT_NONE;
256 	}
257 
258 	swap_addr_t j, addr = SWAP_SLOT_NONE;
259 	for (j = 0; j < sSwapFileCount; j++) {
260 		if (sSwapFileAlloc == NULL)
261 			sSwapFileAlloc = sSwapFileList.First();
262 
263 		addr = radix_bitmap_alloc(sSwapFileAlloc->bmp, count);
264 		if (addr != SWAP_SLOT_NONE) {
265 			addr += sSwapFileAlloc->first_slot;
266 			break;
267 		}
268 
269 		// this swap_file is full, find another
270 		sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc);
271 	}
272 
273 	if (j == sSwapFileCount) {
274 		mutex_unlock(&sSwapFileListLock);
275 		panic("swap_slot_alloc: swap space exhausted!\n");
276 		return SWAP_SLOT_NONE;
277 	}
278 
279 	// if this swap file has used more than 90% percent of its space
280 	// switch to another
281     if (sSwapFileAlloc->bmp->free_slots
282 			< (sSwapFileAlloc->last_slot - sSwapFileAlloc->first_slot) / 10)
283 		sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc);
284 
285 	mutex_unlock(&sSwapFileListLock);
286 
287 	return addr;
288 }
289 
290 
291 static swap_file *
292 find_swap_file(swap_addr_t slotIndex)
293 {
294 	for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
295 			swap_file *swapFile = it.Next();) {
296 		if (slotIndex >= swapFile->first_slot
297 				&& slotIndex < swapFile->last_slot)
298 			return swapFile;
299 	}
300 
301 	panic("find_swap_file(): can't find swap file for slot %ld\n", slotIndex);
302 	return NULL;
303 }
304 
305 
306 static void
307 swap_slot_dealloc(swap_addr_t slotIndex, uint32 count)
308 {
309 	if (slotIndex == SWAP_SLOT_NONE)
310 		return;
311 
312 	mutex_lock(&sSwapFileListLock);
313 	swap_file *swapFile = find_swap_file(slotIndex);
314 	slotIndex -= swapFile->first_slot;
315 	radix_bitmap_dealloc(swapFile->bmp, slotIndex, count);
316 	mutex_unlock(&sSwapFileListLock);
317 }
318 
319 
320 static off_t
321 swap_space_reserve(off_t amount)
322 {
323 	mutex_lock(&sAvailSwapSpaceLock);
324 	if (sAvailSwapSpace >= amount)
325 		sAvailSwapSpace -= amount;
326 	else {
327 		amount = sAvailSwapSpace;
328 		sAvailSwapSpace = 0;
329 	}
330 	mutex_unlock(&sAvailSwapSpaceLock);
331 
332 	return amount;
333 }
334 
335 
336 static void
337 swap_space_unreserve(off_t amount)
338 {
339 	mutex_lock(&sAvailSwapSpaceLock);
340 	sAvailSwapSpace += amount;
341 	mutex_unlock(&sAvailSwapSpaceLock);
342 }
343 
344 
345 static void
346 swap_hash_resizer(void*, int)
347 {
348 	WriteLocker locker(sSwapHashLock);
349 
350 	size_t size;
351 	void* allocation;
352 
353 	do {
354 		size = sSwapHashTable.ResizeNeeded();
355 		if (size == 0)
356 			return;
357 
358 		locker.Unlock();
359 
360 		allocation = malloc(size);
361 		if (allocation == NULL)
362 			return;
363 
364 		locker.Lock();
365 
366 	} while (!sSwapHashTable.Resize(allocation, size));
367 }
368 
369 
370 // #pragma mark -
371 
372 
373 class VMAnonymousCache::WriteCallback : public StackableAsyncIOCallback {
374 public:
375 	WriteCallback(VMAnonymousCache* cache, AsyncIOCallback* callback)
376 		:
377 		StackableAsyncIOCallback(callback),
378 		fCache(cache)
379 	{
380 	}
381 
382 	void SetTo(page_num_t pageIndex, swap_addr_t slotIndex, bool newSlot)
383 	{
384 		fPageIndex = pageIndex;
385 		fSlotIndex = slotIndex;
386 		fNewSlot = newSlot;
387 	}
388 
389 	virtual void IOFinished(status_t status, bool partialTransfer,
390 		size_t bytesTransferred)
391 	{
392 		if (fNewSlot) {
393 			if (status == B_OK) {
394 				fCache->_SwapBlockBuild(fPageIndex, fSlotIndex, 1);
395 			} else {
396 				AutoLocker<VMCache> locker(fCache);
397 				fCache->fAllocatedSwapSize -= B_PAGE_SIZE;
398 				locker.Unlock();
399 
400 				swap_slot_dealloc(fSlotIndex, 1);
401 			}
402 		}
403 
404 		fNextCallback->IOFinished(status, partialTransfer, bytesTransferred);
405 
406 		delete this;
407 	}
408 
409 	void operator delete(void* address, size_t size)
410 	{
411 		io_request_free(address);
412 	}
413 
414 private:
415 	VMAnonymousCache*	fCache;
416 	page_num_t			fPageIndex;
417 	swap_addr_t			fSlotIndex;
418 	bool				fNewSlot;
419 };
420 
421 
422 // #pragma mark -
423 
424 
425 VMAnonymousCache::~VMAnonymousCache()
426 {
427 	// free allocated swap space and swap block
428 	for (off_t offset = virtual_base, toFree = fAllocatedSwapSize;
429 			offset < virtual_end && toFree > 0; offset += B_PAGE_SIZE) {
430 		swap_addr_t slotIndex = _SwapBlockGetAddress(offset >> PAGE_SHIFT);
431 		if (slotIndex == SWAP_SLOT_NONE)
432 			continue;
433 
434 		swap_slot_dealloc(slotIndex, 1);
435 		_SwapBlockFree(offset >> PAGE_SHIFT, 1);
436 		toFree -= B_PAGE_SIZE;
437 	}
438 
439 	swap_space_unreserve(fCommittedSwapSize);
440 	if (committed_size > fCommittedSwapSize)
441 		vm_unreserve_memory(committed_size - fCommittedSwapSize);
442 }
443 
444 
445 status_t
446 VMAnonymousCache::Init(bool canOvercommit, int32 numPrecommittedPages,
447 	int32 numGuardPages)
448 {
449 	TRACE("%p->VMAnonymousCache::Init(canOvercommit = %s, "
450 		"numPrecommittedPages = %ld, numGuardPages = %ld)\n", this,
451 		canOvercommit ? "yes" : "no", numPrecommittedPages, numGuardPages);
452 
453 	status_t error = VMCache::Init(CACHE_TYPE_RAM);
454 	if (error != B_OK)
455 		return error;
456 
457 	fCanOvercommit = canOvercommit;
458 	fHasPrecommitted = false;
459 	fPrecommittedPages = min_c(numPrecommittedPages, 255);
460 	fGuardedSize = numGuardPages * B_PAGE_SIZE;
461 	fCommittedSwapSize = 0;
462 	fAllocatedSwapSize = 0;
463 
464 	return B_OK;
465 }
466 
467 
468 status_t
469 VMAnonymousCache::Commit(off_t size)
470 {
471 	TRACE("%p->VMAnonymousCache::Commit(%lld)\n", this, size);
472 
473 	// if we can overcommit, we don't commit here, but in anonymous_fault()
474 	if (fCanOvercommit) {
475 		if (fHasPrecommitted)
476 			return B_OK;
477 
478 		// pre-commit some pages to make a later failure less probable
479 		fHasPrecommitted = true;
480 		uint32 precommitted = fPrecommittedPages * B_PAGE_SIZE;
481 		if (size > precommitted)
482 			size = precommitted;
483 	}
484 
485 	return _Commit(size);
486 }
487 
488 
489 bool
490 VMAnonymousCache::HasPage(off_t offset)
491 {
492 	if (_SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE)
493 		return true;
494 
495 	return false;
496 }
497 
498 
499 status_t
500 VMAnonymousCache::Read(off_t offset, const iovec *vecs, size_t count,
501 	uint32 flags, size_t *_numBytes)
502 {
503 	off_t pageIndex = offset >> PAGE_SHIFT;
504 
505 	for (uint32 i = 0, j = 0; i < count; i = j) {
506 		swap_addr_t startSlotIndex = _SwapBlockGetAddress(pageIndex + i);
507 		for (j = i + 1; j < count; j++) {
508 			swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + j);
509 			if (slotIndex != startSlotIndex + j - i)
510 				break;
511 		}
512 
513 		T(ReadPage(this, pageIndex, startSlotIndex));
514 			// TODO: Assumes that only one page is read.
515 
516 		swap_file *swapFile = find_swap_file(startSlotIndex);
517 
518 		off_t pos = (off_t)(startSlotIndex - swapFile->first_slot)
519 			* B_PAGE_SIZE;
520 
521 		status_t status = vfs_read_pages(swapFile->vnode, swapFile->cookie, pos,
522 			vecs + i, j - i, flags, _numBytes);
523 		if (status != B_OK)
524 			return status;
525 	}
526 
527 	return B_OK;
528 }
529 
530 
531 status_t
532 VMAnonymousCache::Write(off_t offset, const iovec *vecs, size_t count,
533 	uint32 flags, size_t *_numBytes)
534 {
535 	off_t pageIndex = offset >> PAGE_SHIFT;
536 
537 	AutoLocker<VMCache> locker(this);
538 
539 	uint32 totalPages = 0;
540 	for (uint32 i = 0; i < count; i++) {
541 		uint32 pageCount = (vecs[i].iov_len + B_PAGE_SIZE - 1) >> PAGE_SHIFT;
542 		swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + totalPages);
543 		if (slotIndex != SWAP_SLOT_NONE) {
544 			swap_slot_dealloc(slotIndex, pageCount);
545 			_SwapBlockFree(pageIndex + totalPages, pageCount);
546 			fAllocatedSwapSize -= pageCount * B_PAGE_SIZE;
547 		}
548 
549 		totalPages += pageCount;
550 	}
551 
552 	off_t totalSize = totalPages * B_PAGE_SIZE;
553 	if (fAllocatedSwapSize + totalSize > fCommittedSwapSize)
554 		return B_ERROR;
555 
556 	fAllocatedSwapSize += totalSize;
557 	locker.Unlock();
558 
559 	uint32 pagesLeft = totalPages;
560 	totalPages = 0;
561 
562 	for (uint32 i = 0; i < count; i++) {
563 		uint32 pageCount = (vecs[i].iov_len + B_PAGE_SIZE - 1) >> PAGE_SHIFT;
564 
565 		void *vectorBase = vecs[i].iov_base;
566 		size_t vectorLength = vecs[i].iov_len;
567 		uint32 n = pageCount;
568 
569 		for (uint32 j = 0; j < pageCount; j += n) {
570 			swap_addr_t slotIndex;
571 			// try to allocate n slots, if fail, try to allocate n/2
572 			while ((slotIndex = swap_slot_alloc(n)) == SWAP_SLOT_NONE && n >= 2)
573 				n >>= 1;
574 
575 			if (slotIndex == SWAP_SLOT_NONE)
576 				panic("VMAnonymousCache::Write(): can't allocate swap space\n");
577 
578 			T(WritePage(this, pageIndex, slotIndex));
579 				// TODO: Assumes that only one page is written.
580 
581 			swap_file *swapFile = find_swap_file(slotIndex);
582 
583 			off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE;
584 
585 			size_t length = n * B_PAGE_SIZE;
586 			iovec vector[1];
587 			vector->iov_base = vectorBase;
588 			vector->iov_len = length;
589 
590 			status_t status = vfs_write_pages(swapFile->vnode, swapFile->cookie,
591 				pos, vector, 1, flags, &length);
592 			if (status != B_OK) {
593 				locker.Lock();
594 				fAllocatedSwapSize -= (off_t)pagesLeft * B_PAGE_SIZE;
595 				locker.Unlock();
596 
597 				swap_slot_dealloc(slotIndex, n);
598 				return status;
599 			}
600 
601 			_SwapBlockBuild(pageIndex + totalPages, slotIndex, n);
602 			pagesLeft -= n;
603 
604 			if (n != pageCount) {
605 				vectorBase = (void *)((addr_t)vectorBase + n * B_PAGE_SIZE);
606 				vectorLength -= n * B_PAGE_SIZE;
607 			}
608 		}
609 
610 		totalPages += pageCount;
611 	}
612 
613 	ASSERT(pagesLeft == 0);
614 	return B_OK;
615 }
616 
617 
618 status_t
619 VMAnonymousCache::WriteAsync(off_t offset, const iovec* vecs, size_t count,
620 	size_t numBytes, uint32 flags, AsyncIOCallback* _callback)
621 {
622 	// TODO: Currently this method is only used for single pages. Either make
623 	// more flexible use of it or change the interface!
624 	// This implementation relies on the current usage!
625 	ASSERT(count == 1);
626 	ASSERT(numBytes <= B_PAGE_SIZE);
627 
628 	page_num_t pageIndex = offset >> PAGE_SHIFT;
629 	swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex);
630 	bool newSlot = slotIndex == SWAP_SLOT_NONE;
631 
632 	// If the page doesn't have any swap space yet, allocate it.
633 	if (newSlot) {
634 		AutoLocker<VMCache> locker(this);
635 		if (fAllocatedSwapSize + B_PAGE_SIZE > fCommittedSwapSize) {
636 			_callback->IOFinished(B_ERROR, true, 0);
637 			return B_ERROR;
638 		}
639 
640 		fAllocatedSwapSize += B_PAGE_SIZE;
641 
642 		slotIndex = swap_slot_alloc(1);
643 	}
644 
645 	// create our callback
646 	WriteCallback* callback = (flags & B_VIP_IO_REQUEST) != 0
647  		? new(vip_io_alloc) WriteCallback(this, _callback)
648 		: new(std::nothrow) WriteCallback(this, _callback);
649 	if (callback == NULL) {
650 		if (newSlot) {
651 			AutoLocker<VMCache> locker(this);
652 			fAllocatedSwapSize -= B_PAGE_SIZE;
653 			locker.Unlock();
654 
655 			swap_slot_dealloc(slotIndex, 1);
656 		}
657 		_callback->IOFinished(B_NO_MEMORY, true, 0);
658 		return B_NO_MEMORY;
659 	}
660 // TODO: If the page already had swap space assigned, we don't need an own
661 // callback.
662 
663 	callback->SetTo(pageIndex, slotIndex, newSlot);
664 
665 	T(WritePage(this, pageIndex, slotIndex));
666 
667 	// write the page asynchrounously
668 	swap_file* swapFile = find_swap_file(slotIndex);
669 	off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE;
670 
671 	return vfs_asynchronous_write_pages(swapFile->vnode, swapFile->cookie, pos,
672 		vecs, 1, numBytes, flags, callback);
673 }
674 
675 
676 bool
677 VMAnonymousCache::CanWritePage(off_t offset)
678 {
679 	// We can write the page, if we have not used all of our committed swap
680 	// space or the page already has a swap slot assigned.
681 	return fAllocatedSwapSize < fCommittedSwapSize
682 		|| _SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE;
683 }
684 
685 
686 status_t
687 VMAnonymousCache::Fault(struct vm_address_space *aspace, off_t offset)
688 {
689 	if (fCanOvercommit && LookupPage(offset) == NULL && !HasPage(offset)) {
690 		if (fGuardedSize > 0) {
691 			uint32 guardOffset;
692 
693 #ifdef STACK_GROWS_DOWNWARDS
694 			guardOffset = 0;
695 #elif defined(STACK_GROWS_UPWARDS)
696 			guardOffset = virtual_size - fGuardedSize;
697 #else
698 #	error Stack direction has not been defined in arch_config.h
699 #endif
700 
701 			// report stack fault, guard page hit!
702 			if (offset >= guardOffset && offset < guardOffset + fGuardedSize) {
703 				TRACE(("stack overflow!\n"));
704 				return B_BAD_ADDRESS;
705 			}
706 		}
707 
708 		if (fPrecommittedPages == 0) {
709 			// try to commit additional swap space/memory
710 			if (swap_space_reserve(B_PAGE_SIZE) == B_PAGE_SIZE)
711 				fCommittedSwapSize += B_PAGE_SIZE;
712 			else if (vm_try_reserve_memory(B_PAGE_SIZE, 0) != B_OK)
713 				return B_NO_MEMORY;
714 
715 			committed_size += B_PAGE_SIZE;
716 		} else
717 			fPrecommittedPages--;
718 	}
719 
720 	// This will cause vm_soft_fault() to handle the fault
721 	return B_BAD_HANDLER;
722 }
723 
724 
725 void
726 VMAnonymousCache::Merge(VMCache* _source)
727 {
728 	VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source);
729 	if (source == NULL) {
730 		panic("VMAnonymousCache::MergeStore(): merge with incompatible cache "
731 			"%p requested", _source);
732 		return;
733 	}
734 
735 	// take over the source' committed size
736 	fCommittedSwapSize += source->fCommittedSwapSize;
737 	source->fCommittedSwapSize = 0;
738 	committed_size += source->committed_size;
739 	source->committed_size = 0;
740 
741 	off_t actualSize = virtual_end - virtual_base;
742 	if (committed_size > actualSize)
743 		_Commit(actualSize);
744 
745 	// Move all not shadowed pages from the source to the consumer cache.
746 
747 	for (VMCachePagesTree::Iterator it = source->pages.GetIterator();
748 			vm_page* page = it.Next();) {
749 		// Note: Removing the current node while iterating through a
750 		// IteratableSplayTree is safe.
751 		vm_page* consumerPage = LookupPage(
752 			(off_t)page->cache_offset << PAGE_SHIFT);
753 		swap_addr_t consumerSwapSlot = _SwapBlockGetAddress(page->cache_offset);
754 		if (consumerPage == NULL && consumerSwapSlot == SWAP_SLOT_NONE) {
755 			// the page is not yet in the consumer cache - move it upwards
756 			source->RemovePage(page);
757 			InsertPage(page, (off_t)page->cache_offset << PAGE_SHIFT);
758 
759 			// If the moved-up page has a swap page associated, we mark it, so
760 			// that the swap page is moved upwards, too. We would lose if the
761 			// page was modified and written to swap, and is now not marked
762 			// modified.
763 			if (source->_SwapBlockGetAddress(page->cache_offset)
764 					!= SWAP_SLOT_NONE) {
765 				page->merge_swap = true;
766 			}
767 #if DEBUG_PAGE_CACHE_TRANSITIONS
768 		} else {
769 			page->debug_flags = 0;
770 			if (consumerPage->state == PAGE_STATE_BUSY)
771 				page->debug_flags |= 0x1;
772 			if (consumerPage->type == PAGE_TYPE_DUMMY)
773 				page->debug_flags |= 0x2;
774 			page->collided_page = consumerPage;
775 			consumerPage->collided_page = page;
776 #endif	// DEBUG_PAGE_CACHE_TRANSITIONS
777 		}
778 	}
779 
780 	// Move all not shadowed swap pages from the source to the consumer cache.
781 
782 	for (off_t offset = source->virtual_base
783 				& ~(off_t)(B_PAGE_SIZE * SWAP_BLOCK_PAGES - 1);
784 			offset < source->virtual_end;
785 			offset += B_PAGE_SIZE * SWAP_BLOCK_PAGES) {
786 
787 		WriteLocker locker(sSwapHashLock);
788 
789 		page_num_t swapBlockPageIndex = offset >> PAGE_SHIFT;
790 		swap_hash_key key = { source, swapBlockPageIndex };
791 		swap_block* sourceSwapBlock = sSwapHashTable.Lookup(key);
792 
793 		if (sourceSwapBlock == NULL)
794 			continue;
795 
796 		// remove the source swap block -- we will either take over the swap
797 		// space (and the block) or free it
798 		sSwapHashTable.RemoveUnchecked(sourceSwapBlock);
799 
800 		key.cache = this;
801 		swap_block* swapBlock = sSwapHashTable.Lookup(key);
802 
803 		locker.Unlock();
804 
805 		for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) {
806 			off_t pageIndex = swapBlockPageIndex + i;
807 			swap_addr_t sourceSlotIndex = sourceSwapBlock->swap_slots[i];
808 
809 			if (sourceSlotIndex == SWAP_SLOT_NONE)
810 				// this page is not swapped out
811 				continue;
812 
813 			vm_page* page = LookupPage((off_t)pageIndex << PAGE_SHIFT);
814 
815 			bool keepSwapPage = true;
816 			if (page != NULL && !page->merge_swap) {
817 				// The consumer already has a page at this index and it wasn't
818 				// one taken over from the source. So we can simply free the
819 				// swap space.
820 				keepSwapPage = false;
821 			} else {
822 				if (page != NULL) {
823 					// The page was taken over from the source cache. Clear the
824 					// indicator flag. We'll take over the swap page too.
825 					page->merge_swap = false;
826 				} else if (swapBlock != NULL
827 						&& swapBlock->swap_slots[i] != SWAP_SLOT_NONE) {
828 					// There's no page in the consumer cache, but a swap page.
829 					// Free the source swap page.
830 					keepSwapPage = false;
831 				}
832 			}
833 
834 			if (!keepSwapPage) {
835 				swap_slot_dealloc(sourceSlotIndex, 1);
836 				sourceSwapBlock->swap_slots[i] = SWAP_SLOT_NONE;
837 				sourceSwapBlock->used--;
838 			}
839 
840 			// We've either freed the source swap page or are going to move it
841 			// to the consumer. At any rate, the source cache doesn't own it
842 			// anymore.
843 			source->fAllocatedSwapSize -= B_PAGE_SIZE;
844 		}
845 
846 		// All source swap pages that have not been freed yet are taken over by
847 		// by the consumer.
848 		fAllocatedSwapSize += B_PAGE_SIZE * (off_t)sourceSwapBlock->used;
849 
850 		if (sourceSwapBlock->used == 0) {
851 			// All swap pages have been freed -- we can discard the source swap
852 			// block.
853 			object_cache_free(sSwapBlockCache, sourceSwapBlock);
854 		} else if (swapBlock == NULL) {
855 			// We need to take over some of the source's swap pages and there's
856 			// no swap block in the consumer cache. Just take over the source
857 			// swap block.
858 			sourceSwapBlock->key.cache = this;
859 			locker.Lock();
860 			sSwapHashTable.InsertUnchecked(sourceSwapBlock);
861 			locker.Unlock();
862 		} else {
863 			// We need to take over some of the source's swap pages and there's
864 			// already swap block in the consumer cache. Copy the respective
865 			// swap addresses and discard the source swap block.
866 			for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) {
867 				if (sourceSwapBlock->swap_slots[i] != SWAP_SLOT_NONE)
868 					swapBlock->swap_slots[i] = sourceSwapBlock->swap_slots[i];
869 			}
870 
871 			object_cache_free(sSwapBlockCache, sourceSwapBlock);
872 		}
873 	}
874 }
875 
876 
877 void
878 VMAnonymousCache::_SwapBlockBuild(off_t startPageIndex,
879 	swap_addr_t startSlotIndex, uint32 count)
880 {
881 	WriteLocker locker(sSwapHashLock);
882 
883 	uint32 left = count;
884 	for (uint32 i = 0, j = 0; i < count; i += j) {
885 		off_t pageIndex = startPageIndex + i;
886 		swap_addr_t slotIndex = startSlotIndex + i;
887 
888 		swap_hash_key key = { this, pageIndex };
889 
890 		swap_block *swap = sSwapHashTable.Lookup(key);
891 		while (swap == NULL) {
892 			swap = (swap_block *)object_cache_alloc(sSwapBlockCache,
893 				CACHE_DONT_SLEEP);
894 			if (swap == NULL) {
895 				// Wait a short time until memory is available again.
896 				locker.Unlock();
897 				snooze(10000);
898 				locker.Lock();
899 				swap = sSwapHashTable.Lookup(key);
900 				continue;
901 			}
902 
903 			swap->key.cache = this;
904 			swap->key.page_index = pageIndex & ~(off_t)SWAP_BLOCK_MASK;
905 			swap->used = 0;
906 			for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++)
907 				swap->swap_slots[i] = SWAP_SLOT_NONE;
908 
909 			sSwapHashTable.InsertUnchecked(swap);
910 		}
911 
912 		swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
913 		for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) {
914 			swap->swap_slots[blockIndex++] = slotIndex + j;
915 			left--;
916 		}
917 
918 		swap->used += j;
919 	}
920 }
921 
922 
923 void
924 VMAnonymousCache::_SwapBlockFree(off_t startPageIndex, uint32 count)
925 {
926 	WriteLocker locker(sSwapHashLock);
927 
928 	uint32 left = count;
929 	for (uint32 i = 0, j = 0; i < count; i += j) {
930 		off_t pageIndex = startPageIndex + i;
931 		swap_hash_key key = { this, pageIndex };
932 		swap_block *swap = sSwapHashTable.Lookup(key);
933 
934 		ASSERT(swap != NULL);
935 
936 		swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
937 		for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) {
938 			swap->swap_slots[blockIndex++] = SWAP_SLOT_NONE;
939 			left--;
940 		}
941 
942 		swap->used -= j;
943 		if (swap->used == 0) {
944 			sSwapHashTable.RemoveUnchecked(swap);
945 			object_cache_free(sSwapBlockCache, swap);
946 		}
947 	}
948 }
949 
950 
951 swap_addr_t
952 VMAnonymousCache::_SwapBlockGetAddress(off_t pageIndex)
953 {
954 	ReadLocker locker(sSwapHashLock);
955 
956 	swap_hash_key key = { this, pageIndex };
957 	swap_block *swap = sSwapHashTable.Lookup(key);
958 	swap_addr_t slotIndex = SWAP_SLOT_NONE;
959 
960 	if (swap != NULL) {
961 		swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
962 		slotIndex = swap->swap_slots[blockIndex];
963 	}
964 
965 	return slotIndex;
966 }
967 
968 
969 status_t
970 VMAnonymousCache::_Commit(off_t size)
971 {
972 	TRACE("%p->VMAnonymousCache::_Commit(%lld), already committed: %lld "
973 		"(%lld swap)\n", this, size, committed_size, fCommittedSwapSize);
974 
975 	// Basic strategy: reserve swap space first, only when running out of swap
976 	// space, reserve real memory.
977 
978 	off_t committedMemory = committed_size - fCommittedSwapSize;
979 
980 	// Regardless of whether we're asked to grow or shrink the commitment,
981 	// we always try to reserve as much as possible of the final commitment
982 	// in the swap space.
983 	if (size > fCommittedSwapSize) {
984 		fCommittedSwapSize += swap_space_reserve(size - fCommittedSwapSize);
985 		committed_size = fCommittedSwapSize + committedMemory;
986 		if (size > fCommittedSwapSize) {
987 			TRACE("%p->VMAnonymousCache::_Commit(%lld), reserved only %lld "
988 				"swap\n", this, size, fCommittedSwapSize);
989 		}
990 	}
991 
992 	if (committed_size == size)
993 		return B_OK;
994 
995 	if (committed_size > size) {
996 		// The commitment shrinks -- unreserve real memory first.
997 		off_t toUnreserve = committed_size - size;
998 		if (committedMemory > 0) {
999 			off_t unreserved = min_c(toUnreserve, committedMemory);
1000 			vm_unreserve_memory(unreserved);
1001 			committedMemory -= unreserved;
1002 			committed_size -= unreserved;
1003 			toUnreserve -= unreserved;
1004 		}
1005 
1006 		// Unreserve swap space.
1007 		if (toUnreserve > 0) {
1008 			swap_space_unreserve(toUnreserve);
1009 			fCommittedSwapSize -= toUnreserve;
1010 			committed_size -= toUnreserve;
1011 		}
1012 
1013 		return B_OK;
1014 	}
1015 
1016 	// The commitment grows -- we have already tried to reserve swap space at
1017 	// the start of the method, so we try to reserve real memory, now.
1018 
1019 	off_t toReserve = size - committed_size;
1020 	if (vm_try_reserve_memory(toReserve, 1000000) != B_OK) {
1021 		dprintf("%p->VMAnonymousCache::_Commit(%lld): Failed to reserve %lld "
1022 			"bytes of RAM\n", this, size, toReserve);
1023 		return B_NO_MEMORY;
1024 	}
1025 
1026 	committed_size = size;
1027 	return B_OK;
1028 }
1029 
1030 
1031 // #pragma mark -
1032 
1033 
1034 status_t
1035 swap_file_add(const char *path)
1036 {
1037 	// open the file
1038 	int fd = open(path, O_RDWR | O_NOCACHE, S_IRUSR | S_IWUSR);
1039 	if (fd < 0)
1040 		return errno;
1041 
1042 	// fstat() it and check whether we can use it
1043 	struct stat st;
1044 	if (fstat(fd, &st) < 0) {
1045 		close(fd);
1046 		return errno;
1047 	}
1048 
1049 	if (!(S_ISREG(st.st_mode) || S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
1050 		close(fd);
1051 		return B_BAD_VALUE;
1052 	}
1053 
1054 	if (st.st_size < B_PAGE_SIZE) {
1055 		close(fd);
1056 		return B_BAD_VALUE;
1057 	}
1058 
1059 	// get file descriptor, vnode, and cookie
1060 	file_descriptor* descriptor = get_fd(get_current_io_context(true), fd);
1061 	put_fd(descriptor);
1062 
1063 	vnode *node = fd_vnode(descriptor);
1064 	if (node == NULL) {
1065 		close(fd);
1066 		return B_BAD_VALUE;
1067 	}
1068 
1069 	// do the allocations and prepare the swap_file structure
1070 	swap_file *swap = (swap_file *)malloc(sizeof(swap_file));
1071 	if (swap == NULL) {
1072 		close(fd);
1073 		return B_NO_MEMORY;
1074 	}
1075 
1076 	swap->fd = fd;
1077 	swap->vnode = node;
1078 	swap->cookie = descriptor->cookie;
1079 
1080 	uint32 pageCount = st.st_size >> PAGE_SHIFT;
1081 	swap->bmp = radix_bitmap_create(pageCount);
1082 	if (swap->bmp == NULL) {
1083 		free(swap);
1084 		close(fd);
1085 		return B_NO_MEMORY;
1086 	}
1087 
1088 	// set slot index and add this file to swap file list
1089 	mutex_lock(&sSwapFileListLock);
1090 	// TODO: Also check whether the swap file is already registered!
1091 	if (sSwapFileList.IsEmpty()) {
1092 		swap->first_slot = 0;
1093 		swap->last_slot = pageCount;
1094 	} else {
1095 		// leave one page gap between two swap files
1096 		swap->first_slot = sSwapFileList.Last()->last_slot + 1;
1097 		swap->last_slot = swap->first_slot + pageCount;
1098 	}
1099 	sSwapFileList.Add(swap);
1100 	sSwapFileCount++;
1101 	mutex_unlock(&sSwapFileListLock);
1102 
1103 	mutex_lock(&sAvailSwapSpaceLock);
1104 	sAvailSwapSpace += (off_t)pageCount * B_PAGE_SIZE;
1105 	mutex_unlock(&sAvailSwapSpaceLock);
1106 
1107 	return B_OK;
1108 }
1109 
1110 
1111 status_t
1112 swap_file_delete(const char *path)
1113 {
1114 	vnode *node = NULL;
1115 	status_t status = vfs_get_vnode_from_path(path, true, &node);
1116 	if (status != B_OK)
1117 		return status;
1118 
1119 	MutexLocker locker(sSwapFileListLock);
1120 
1121 	swap_file *swapFile = NULL;
1122 	for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
1123 			(swapFile = it.Next()) != NULL;) {
1124 		if (swapFile->vnode == node)
1125 			break;
1126 	}
1127 
1128 	vfs_put_vnode(node);
1129 
1130 	if (swapFile == NULL)
1131 		return B_ERROR;
1132 
1133 	// if this file is currently used, we can't delete
1134 	// TODO: mark this swap file deleting, and remove it after releasing
1135 	// all the swap space
1136 	if (swapFile->bmp->free_slots < swapFile->last_slot - swapFile->first_slot)
1137 		return B_ERROR;
1138 
1139 	sSwapFileList.Remove(swapFile);
1140 	sSwapFileCount--;
1141 	locker.Unlock();
1142 
1143 	mutex_lock(&sAvailSwapSpaceLock);
1144 	sAvailSwapSpace -= (off_t)(swapFile->last_slot - swapFile->first_slot)
1145 		* PAGE_SIZE;
1146 	mutex_unlock(&sAvailSwapSpaceLock);
1147 
1148 	close(swapFile->fd);
1149 	radix_bitmap_destroy(swapFile->bmp);
1150 	free(swapFile);
1151 
1152 	return B_OK;
1153 }
1154 
1155 
1156 void
1157 swap_init(void)
1158 {
1159 	// create swap block cache
1160 	sSwapBlockCache = create_object_cache("swapblock",
1161 			sizeof(swap_block), sizeof(void*), NULL, NULL, NULL);
1162 	if (sSwapBlockCache == NULL)
1163 		panic("swap_init(): can't create object cache for swap blocks\n");
1164 
1165 	status_t error = object_cache_set_minimum_reserve(sSwapBlockCache,
1166 		MIN_SWAP_BLOCK_RESERVE);
1167 	if (error != B_OK) {
1168 		panic("swap_init(): object_cache_set_minimum_reserve() failed: %s",
1169 			strerror(error));
1170 	}
1171 
1172 	// init swap hash table
1173 	sSwapHashTable.Init(INITIAL_SWAP_HASH_SIZE);
1174 	rw_lock_init(&sSwapHashLock, "swaphash");
1175 
1176 	error = register_resource_resizer(swap_hash_resizer, NULL,
1177 		SWAP_HASH_RESIZE_INTERVAL);
1178 	if (error != B_OK) {
1179 		panic("swap_init(): Failed to register swap hash resizer: %s",
1180 			strerror(error));
1181 	}
1182 
1183 	// init swap file list
1184 	mutex_init(&sSwapFileListLock, "swaplist");
1185 	sSwapFileAlloc = NULL;
1186 	sSwapFileCount = 0;
1187 
1188 	// init available swap space
1189 	mutex_init(&sAvailSwapSpaceLock, "avail swap space");
1190 	sAvailSwapSpace = 0;
1191 
1192 	add_debugger_command_etc("swap", &dump_swap_info,
1193 		"Print infos about the swap usage",
1194 		"\n"
1195 		"Print infos about the swap usage.\n", 0);
1196 }
1197 
1198 
1199 void
1200 swap_init_post_modules()
1201 {
1202 	// Never try to create a swap file on a read-only device - when booting
1203 	// from CD, the write overlay is used.
1204 	if (gReadOnlyBootDevice)
1205 		return;
1206 
1207 	off_t size = 0;
1208 
1209 	void *settings = load_driver_settings("virtual_memory");
1210 	if (settings != NULL) {
1211 		if (!get_driver_boolean_parameter(settings, "vm", false, false))
1212 			return;
1213 
1214 		const char *string = get_driver_parameter(settings, "swap_size", NULL,
1215 			NULL);
1216 		size = string ? atoll(string) : 0;
1217 
1218 		unload_driver_settings(settings);
1219 	} else
1220 		size = (off_t)vm_page_num_pages() * B_PAGE_SIZE * 2;
1221 
1222 	if (size < B_PAGE_SIZE)
1223 		return;
1224 
1225 	int fd = open("/var/swap", O_RDWR | O_CREAT | O_NOCACHE, S_IRUSR | S_IWUSR);
1226 	if (fd < 0) {
1227 		dprintf("Can't open/create /var/swap: %s\n", strerror(errno));
1228 		return;
1229 	}
1230 
1231 	struct stat stat;
1232 	stat.st_size = size;
1233 	status_t error = _kern_write_stat(fd, NULL, false, &stat,
1234 		sizeof(struct stat), B_STAT_SIZE | B_STAT_SIZE_INSECURE);
1235 	if (error != B_OK) {
1236 		dprintf("Failed to resize /var/swap to %lld bytes: %s\n", size,
1237 			strerror(error));
1238 	}
1239 
1240 	close(fd);
1241 
1242 	error = swap_file_add("/var/swap");
1243 	if (error != B_OK)
1244 		dprintf("Failed to add swap file /var/swap: %s\n", strerror(error));
1245 }
1246 
1247 
1248 //! Used by page daemon to free swap space.
1249 bool
1250 swap_free_page_swap_space(vm_page *page)
1251 {
1252 	VMAnonymousCache *cache = dynamic_cast<VMAnonymousCache *>(page->cache);
1253 	if (cache == NULL)
1254 		return false;
1255 
1256 	swap_addr_t slotIndex = cache->_SwapBlockGetAddress(page->cache_offset);
1257 	if (slotIndex == SWAP_SLOT_NONE)
1258 		return false;
1259 
1260 	swap_slot_dealloc(slotIndex, 1);
1261 	cache->fAllocatedSwapSize -= B_PAGE_SIZE;
1262 	cache->_SwapBlockFree(page->cache_offset, 1);
1263 
1264   	return true;
1265 }
1266 
1267 
1268 uint32
1269 swap_available_pages()
1270 {
1271 	mutex_lock(&sAvailSwapSpaceLock);
1272 	uint32 avail = sAvailSwapSpace >> PAGE_SHIFT;
1273 	mutex_unlock(&sAvailSwapSpaceLock);
1274 
1275 	return avail;
1276 }
1277 
1278 
1279 uint32
1280 swap_total_swap_pages()
1281 {
1282 	mutex_lock(&sSwapFileListLock);
1283 
1284 	uint32 totalSwapSlots = 0;
1285 	for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
1286 			swap_file *swapFile = it.Next();)
1287 		totalSwapSlots += swapFile->last_slot - swapFile->first_slot;
1288 
1289 	mutex_unlock(&sSwapFileListLock);
1290 
1291 	return totalSwapSlots;
1292 }
1293 
1294 #endif	// ENABLE_SWAP_SUPPORT
1295 
1296 void
1297 swap_get_info(struct system_memory_info *info)
1298 {
1299 #if ENABLE_SWAP_SUPPORT
1300 	info->max_swap_space = (uint64)swap_total_swap_pages() * B_PAGE_SIZE;
1301 	info->free_swap_space = (uint64)swap_available_pages() * B_PAGE_SIZE;
1302 #else
1303 	info->max_swap_space = 0;
1304 	info->free_swap_space = 0;
1305 #endif
1306 }
1307 
1308