xref: /haiku/src/system/kernel/vm/VMAnonymousCache.cpp (revision b6b0567fbd186f8ce8a0c90bdc7a7b5b4c649678)
1 /*
2  * Copyright 2008, Zhao Shuai, upczhsh@163.com.
3  * Copyright 2008-2009, Ingo Weinhold, ingo_weinhold@gmx.de.
4  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
5  * Distributed under the terms of the MIT License.
6  *
7  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
8  * Distributed under the terms of the NewOS License.
9  */
10 
11 #include "VMAnonymousCache.h"
12 
13 #include <errno.h>
14 #include <fcntl.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <unistd.h>
18 
19 #include <KernelExport.h>
20 #include <NodeMonitor.h>
21 
22 #include <arch_config.h>
23 #include <boot_device.h>
24 #include <driver_settings.h>
25 #include <fs/fd.h>
26 #include <fs_interface.h>
27 #include <heap.h>
28 #include <kernel_daemon.h>
29 #include <slab/Slab.h>
30 #include <syscalls.h>
31 #include <system_info.h>
32 #include <tracing.h>
33 #include <util/AutoLock.h>
34 #include <util/DoublyLinkedList.h>
35 #include <util/OpenHashTable.h>
36 #include <util/RadixBitmap.h>
37 #include <vfs.h>
38 #include <vm.h>
39 #include <vm_page.h>
40 #include <vm_priv.h>
41 
42 #include "IORequest.h"
43 
44 
45 #if	ENABLE_SWAP_SUPPORT
46 
47 //#define TRACE_VM_ANONYMOUS_CACHE
48 #ifdef TRACE_VM_ANONYMOUS_CACHE
49 #	define TRACE(x...) dprintf(x)
50 #else
51 #	define TRACE(x...) do { } while (false)
52 #endif
53 
54 
55 // number of free swap blocks the object cache shall minimally have
56 #define MIN_SWAP_BLOCK_RESERVE	4096
57 
58 // interval the has resizer is triggered (in 0.1s)
59 #define SWAP_HASH_RESIZE_INTERVAL	5
60 
61 #define INITIAL_SWAP_HASH_SIZE		1024
62 
63 #define SWAP_BLOCK_PAGES 32
64 #define SWAP_BLOCK_SHIFT 5		/* 1 << SWAP_BLOCK_SHIFT == SWAP_BLOCK_PAGES */
65 #define SWAP_BLOCK_MASK  (SWAP_BLOCK_PAGES - 1)
66 
67 struct swap_file : DoublyLinkedListLinkImpl<swap_file> {
68 	int				fd;
69 	struct vnode	*vnode;
70 	void			*cookie;
71 	swap_addr_t		first_slot;
72 	swap_addr_t		last_slot;
73 	radix_bitmap    *bmp;
74 };
75 
76 struct swap_hash_key {
77 	VMAnonymousCache	*cache;
78 	off_t				page_index;  // page index in the cache
79 };
80 
81 // Each swap block contains swap address information for
82 // SWAP_BLOCK_PAGES continuous pages from the same cache
83 struct swap_block {
84 	swap_block*		hash_link;
85 	swap_hash_key	key;
86 	uint32			used;
87 	swap_addr_t		swap_slots[SWAP_BLOCK_PAGES];
88 };
89 
90 struct SwapHashTableDefinition {
91 	typedef swap_hash_key KeyType;
92 	typedef swap_block ValueType;
93 
94 	SwapHashTableDefinition() {}
95 
96 	size_t HashKey(const swap_hash_key& key) const
97 	{
98 		off_t blockIndex = key.page_index >> SWAP_BLOCK_SHIFT;
99 		VMAnonymousCache *cache = key.cache;
100 		return blockIndex ^ (int)(int *)cache;
101 	}
102 
103 	size_t Hash(const swap_block *value) const
104 	{
105 		return HashKey(value->key);
106 	}
107 
108 	bool Compare(const swap_hash_key& key, const swap_block *value) const
109 	{
110 		return (key.page_index & ~(off_t)SWAP_BLOCK_MASK)
111 				== (value->key.page_index & ~(off_t)SWAP_BLOCK_MASK)
112 			&& key.cache == value->key.cache;
113 	}
114 
115 	swap_block*& GetLink(swap_block *value) const
116 	{
117 		return value->hash_link;
118 	}
119 };
120 
121 typedef BOpenHashTable<SwapHashTableDefinition> SwapHashTable;
122 typedef DoublyLinkedList<swap_file> SwapFileList;
123 
124 static SwapHashTable sSwapHashTable;
125 static mutex sSwapHashLock;
126 
127 static SwapFileList sSwapFileList;
128 static mutex sSwapFileListLock;
129 static swap_file *sSwapFileAlloc = NULL; // allocate from here
130 static uint32 sSwapFileCount = 0;
131 
132 static off_t sAvailSwapSpace = 0;
133 static mutex sAvailSwapSpaceLock;
134 
135 static object_cache *sSwapBlockCache;
136 
137 
138 #if SWAP_TRACING
139 namespace SwapTracing {
140 
141 class SwapTraceEntry : public AbstractTraceEntry {
142 public:
143 	SwapTraceEntry(VMAnonymousCache* cache)
144 		:
145 		fCache(cache)
146 	{
147 	}
148 
149 protected:
150 	VMAnonymousCache*	fCache;
151 };
152 
153 
154 class ReadPage : public SwapTraceEntry {
155 public:
156 	ReadPage(VMAnonymousCache* cache, page_num_t pageIndex,
157 			swap_addr_t swapSlotIndex)
158 		:
159 		SwapTraceEntry(cache),
160 		fPageIndex(pageIndex),
161 		fSwapSlotIndex(swapSlotIndex)
162 	{
163 		Initialized();
164 	}
165 
166 	virtual void AddDump(TraceOutput& out)
167 	{
168 		out.Print("swap read:  cache %p, page index: %lu <- swap slot: %lu",
169 			fCache, fPageIndex, fSwapSlotIndex);
170 	}
171 
172 private:
173 	page_num_t		fPageIndex;
174 	swap_addr_t		fSwapSlotIndex;
175 };
176 
177 
178 class WritePage : public SwapTraceEntry {
179 public:
180 	WritePage(VMAnonymousCache* cache, page_num_t pageIndex,
181 			swap_addr_t swapSlotIndex)
182 		:
183 		SwapTraceEntry(cache),
184 		fPageIndex(pageIndex),
185 		fSwapSlotIndex(swapSlotIndex)
186 	{
187 		Initialized();
188 	}
189 
190 	virtual void AddDump(TraceOutput& out)
191 	{
192 		out.Print("swap write: cache %p, page index: %lu -> swap slot: %lu",
193 			fCache, fPageIndex, fSwapSlotIndex);
194 	}
195 
196 private:
197 	page_num_t		fPageIndex;
198 	swap_addr_t		fSwapSlotIndex;
199 };
200 
201 }	// namespace SwapTracing
202 
203 #	define T(x) new(std::nothrow) SwapTracing::x;
204 #else
205 #	define T(x) ;
206 #endif
207 
208 
209 static int
210 dump_swap_info(int argc, char** argv)
211 {
212 	swap_addr_t totalSwapPages = 0;
213 	swap_addr_t freeSwapPages = 0;
214 
215 	kprintf("swap files:\n");
216 
217 	for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
218 			swap_file* file = it.Next();) {
219 		swap_addr_t total = file->last_slot - file->first_slot;
220 		kprintf("  vnode: %p, pages: total: %lu, free: %lu\n",
221 			file->vnode, total, file->bmp->free_slots);
222 
223 		totalSwapPages += total;
224 		freeSwapPages += file->bmp->free_slots;
225 	}
226 
227 	kprintf("\n");
228 	kprintf("swap space in pages:\n");
229 	kprintf("total:     %9lu\n", totalSwapPages);
230 	kprintf("available: %9llu\n", sAvailSwapSpace / B_PAGE_SIZE);
231 	kprintf("reserved:  %9llu\n",
232 		totalSwapPages - sAvailSwapSpace / B_PAGE_SIZE);
233 	kprintf("used:      %9lu\n", totalSwapPages - freeSwapPages);
234 	kprintf("free:      %9lu\n", freeSwapPages);
235 
236 	return 0;
237 }
238 
239 
240 static swap_addr_t
241 swap_slot_alloc(uint32 count)
242 {
243 	mutex_lock(&sSwapFileListLock);
244 
245 	if (sSwapFileList.IsEmpty()) {
246 		mutex_unlock(&sSwapFileListLock);
247 		panic("swap_slot_alloc(): no swap file in the system\n");
248 		return SWAP_SLOT_NONE;
249 	}
250 
251 	// since radix bitmap could not handle more than 32 pages, we return
252 	// SWAP_SLOT_NONE, this forces Write() adjust allocation amount
253 	if (count > BITMAP_RADIX) {
254 		mutex_unlock(&sSwapFileListLock);
255 		return SWAP_SLOT_NONE;
256 	}
257 
258 	swap_addr_t j, addr = SWAP_SLOT_NONE;
259 	for (j = 0; j < sSwapFileCount; j++) {
260 		if (sSwapFileAlloc == NULL)
261 			sSwapFileAlloc = sSwapFileList.First();
262 
263 		addr = radix_bitmap_alloc(sSwapFileAlloc->bmp, count);
264 		if (addr != SWAP_SLOT_NONE) {
265 			addr += sSwapFileAlloc->first_slot;
266 			break;
267 		}
268 
269 		// this swap_file is full, find another
270 		sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc);
271 	}
272 
273 	if (j == sSwapFileCount) {
274 		mutex_unlock(&sSwapFileListLock);
275 		panic("swap_slot_alloc: swap space exhausted!\n");
276 		return SWAP_SLOT_NONE;
277 	}
278 
279 	// if this swap file has used more than 90% percent of its space
280 	// switch to another
281     if (sSwapFileAlloc->bmp->free_slots
282 			< (sSwapFileAlloc->last_slot - sSwapFileAlloc->first_slot) / 10)
283 		sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc);
284 
285 	mutex_unlock(&sSwapFileListLock);
286 
287 	return addr;
288 }
289 
290 
291 static swap_file *
292 find_swap_file(swap_addr_t slotIndex)
293 {
294 	for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
295 			swap_file *swapFile = it.Next();) {
296 		if (slotIndex >= swapFile->first_slot
297 				&& slotIndex < swapFile->last_slot)
298 			return swapFile;
299 	}
300 
301 	panic("find_swap_file(): can't find swap file for slot %ld\n", slotIndex);
302 	return NULL;
303 }
304 
305 
306 static void
307 swap_slot_dealloc(swap_addr_t slotIndex, uint32 count)
308 {
309 	if (slotIndex == SWAP_SLOT_NONE)
310 		return;
311 
312 	mutex_lock(&sSwapFileListLock);
313 	swap_file *swapFile = find_swap_file(slotIndex);
314 	slotIndex -= swapFile->first_slot;
315 	radix_bitmap_dealloc(swapFile->bmp, slotIndex, count);
316 	mutex_unlock(&sSwapFileListLock);
317 }
318 
319 
320 static off_t
321 swap_space_reserve(off_t amount)
322 {
323 	mutex_lock(&sAvailSwapSpaceLock);
324 	if (sAvailSwapSpace >= amount)
325 		sAvailSwapSpace -= amount;
326 	else {
327 		amount = sAvailSwapSpace;
328 		sAvailSwapSpace = 0;
329 	}
330 	mutex_unlock(&sAvailSwapSpaceLock);
331 
332 	return amount;
333 }
334 
335 
336 static void
337 swap_space_unreserve(off_t amount)
338 {
339 	mutex_lock(&sAvailSwapSpaceLock);
340 	sAvailSwapSpace += amount;
341 	mutex_unlock(&sAvailSwapSpaceLock);
342 }
343 
344 
345 static void
346 swap_hash_resizer(void*, int)
347 {
348 	MutexLocker locker(sSwapHashLock);
349 
350 	size_t size;
351 	void* allocation;
352 
353 	do {
354 		size = sSwapHashTable.ResizeNeeded();
355 		if (size == 0)
356 			return;
357 
358 		locker.Unlock();
359 
360 		allocation = malloc(size);
361 		if (allocation == NULL)
362 			return;
363 
364 		locker.Lock();
365 
366 	} while (!sSwapHashTable.Resize(allocation, size));
367 }
368 
369 
370 // #pragma mark -
371 
372 
373 class VMAnonymousCache::WriteCallback : public StackableAsyncIOCallback {
374 public:
375 	WriteCallback(VMAnonymousCache* cache, AsyncIOCallback* callback)
376 		:
377 		StackableAsyncIOCallback(callback),
378 		fCache(cache)
379 	{
380 	}
381 
382 	void SetTo(page_num_t pageIndex, swap_addr_t slotIndex, bool newSlot)
383 	{
384 		fPageIndex = pageIndex;
385 		fSlotIndex = slotIndex;
386 		fNewSlot = newSlot;
387 	}
388 
389 	virtual void IOFinished(status_t status, bool partialTransfer,
390 		size_t bytesTransferred)
391 	{
392 		if (fNewSlot) {
393 			if (status == B_OK) {
394 				fCache->_SwapBlockBuild(fPageIndex, fSlotIndex, 1);
395 			} else {
396 				AutoLocker<VMCache> locker(fCache);
397 				fCache->fAllocatedSwapSize -= B_PAGE_SIZE;
398 				locker.Unlock();
399 
400 				swap_slot_dealloc(fSlotIndex, 1);
401 			}
402 		}
403 
404 		fNextCallback->IOFinished(status, partialTransfer, bytesTransferred);
405 
406 		delete this;
407 	}
408 
409 	void operator delete(void* address, size_t size)
410 	{
411 		io_request_free(address);
412 	}
413 
414 private:
415 	VMAnonymousCache*	fCache;
416 	page_num_t			fPageIndex;
417 	swap_addr_t			fSlotIndex;
418 	bool				fNewSlot;
419 };
420 
421 
422 // #pragma mark -
423 
424 
425 VMAnonymousCache::~VMAnonymousCache()
426 {
427 	// free allocated swap space and swap block
428 	for (off_t offset = virtual_base, toFree = fAllocatedSwapSize;
429 			offset < virtual_end && toFree > 0; offset += B_PAGE_SIZE) {
430 		swap_addr_t slotIndex = _SwapBlockGetAddress(offset >> PAGE_SHIFT);
431 		if (slotIndex == SWAP_SLOT_NONE)
432 			continue;
433 
434 		swap_slot_dealloc(slotIndex, 1);
435 		_SwapBlockFree(offset >> PAGE_SHIFT, 1);
436 		toFree -= B_PAGE_SIZE;
437 	}
438 
439 	swap_space_unreserve(fCommittedSwapSize);
440 	if (committed_size > fCommittedSwapSize)
441 		vm_unreserve_memory(committed_size - fCommittedSwapSize);
442 }
443 
444 
445 status_t
446 VMAnonymousCache::Init(bool canOvercommit, int32 numPrecommittedPages,
447 	int32 numGuardPages)
448 {
449 	TRACE("%p->VMAnonymousCache::Init(canOvercommit = %s, "
450 		"numPrecommittedPages = %ld, numGuardPages = %ld)\n", this,
451 		canOvercommit ? "yes" : "no", numPrecommittedPages, numGuardPages);
452 
453 	status_t error = VMCache::Init(CACHE_TYPE_RAM);
454 	if (error != B_OK)
455 		return error;
456 
457 	fCanOvercommit = canOvercommit;
458 	fHasPrecommitted = false;
459 	fPrecommittedPages = min_c(numPrecommittedPages, 255);
460 	fGuardedSize = numGuardPages * B_PAGE_SIZE;
461 	fCommittedSwapSize = 0;
462 	fAllocatedSwapSize = 0;
463 
464 	return B_OK;
465 }
466 
467 
468 status_t
469 VMAnonymousCache::Commit(off_t size)
470 {
471 	TRACE("%p->VMAnonymousCache::Commit(%lld)\n", this, size);
472 
473 	// if we can overcommit, we don't commit here, but in anonymous_fault()
474 	if (fCanOvercommit) {
475 		if (fHasPrecommitted)
476 			return B_OK;
477 
478 		// pre-commit some pages to make a later failure less probable
479 		fHasPrecommitted = true;
480 		uint32 precommitted = fPrecommittedPages * B_PAGE_SIZE;
481 		if (size > precommitted)
482 			size = precommitted;
483 	}
484 
485 	return _Commit(size);
486 }
487 
488 
489 bool
490 VMAnonymousCache::HasPage(off_t offset)
491 {
492 	if (_SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE)
493 		return true;
494 
495 	return false;
496 }
497 
498 
499 status_t
500 VMAnonymousCache::Read(off_t offset, const iovec *vecs, size_t count,
501 	uint32 flags, size_t *_numBytes)
502 {
503 	off_t pageIndex = offset >> PAGE_SHIFT;
504 
505 	for (uint32 i = 0, j = 0; i < count; i = j) {
506 		swap_addr_t startSlotIndex = _SwapBlockGetAddress(pageIndex + i);
507 		for (j = i + 1; j < count; j++) {
508 			swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + j);
509 			if (slotIndex != startSlotIndex + j - i)
510 				break;
511 		}
512 
513 		T(ReadPage(this, pageIndex, startSlotIndex));
514 			// TODO: Assumes that only one page is read.
515 
516 		swap_file *swapFile = find_swap_file(startSlotIndex);
517 
518 		off_t pos = (off_t)(startSlotIndex - swapFile->first_slot)
519 			* B_PAGE_SIZE;
520 
521 		status_t status = vfs_read_pages(swapFile->vnode, swapFile->cookie, pos,
522 			vecs + i, j - i, flags, _numBytes);
523 		if (status != B_OK)
524 			return status;
525 	}
526 
527 	return B_OK;
528 }
529 
530 
531 status_t
532 VMAnonymousCache::Write(off_t offset, const iovec *vecs, size_t count,
533 	uint32 flags, size_t *_numBytes)
534 {
535 	off_t pageIndex = offset >> PAGE_SHIFT;
536 
537 	AutoLocker<VMCache> locker(this);
538 
539 	for (uint32 i = 0; i < count; i++) {
540 		swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + i);
541 		if (slotIndex != SWAP_SLOT_NONE) {
542 			swap_slot_dealloc(slotIndex, 1);
543 			_SwapBlockFree(pageIndex + i, 1);
544 			fAllocatedSwapSize -= B_PAGE_SIZE;
545 		}
546 	}
547 
548 	if (fAllocatedSwapSize + (off_t)count * B_PAGE_SIZE > fCommittedSwapSize)
549 		return B_ERROR;
550 
551 	fAllocatedSwapSize += (off_t)count * B_PAGE_SIZE;
552 	locker.Unlock();
553 
554 	uint32 n = count;
555 	for (uint32 i = 0; i < count; i += n) {
556 		swap_addr_t slotIndex;
557 		// try to allocate n slots, if fail, try to allocate n/2
558 		while ((slotIndex = swap_slot_alloc(n)) == SWAP_SLOT_NONE && n >= 2)
559 			n >>= 1;
560 		if (slotIndex == SWAP_SLOT_NONE)
561 			panic("VMAnonymousCache::Write(): can't allocate swap space\n");
562 
563 		T(WritePage(this, pageIndex, slotIndex));
564 			// TODO: Assumes that only one page is written.
565 
566 		swap_file *swapFile = find_swap_file(slotIndex);
567 
568 		off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE;
569 
570 		status_t status = vfs_write_pages(swapFile->vnode, swapFile->cookie,
571 			pos, vecs + i, n, flags, _numBytes);
572 		if (status != B_OK) {
573 			locker.Lock();
574 			fAllocatedSwapSize -= (off_t)n * B_PAGE_SIZE;
575 			locker.Unlock();
576 
577 			swap_slot_dealloc(slotIndex, n);
578 			return status;
579 		}
580 
581 		_SwapBlockBuild(pageIndex + i, slotIndex, n);
582 	}
583 
584 	return B_OK;
585 }
586 
587 
588 status_t
589 VMAnonymousCache::WriteAsync(off_t offset, const iovec* vecs, size_t count,
590 	size_t numBytes, uint32 flags, AsyncIOCallback* _callback)
591 {
592 	// TODO: Currently this method is only used for single pages. Either make
593 	// more flexible use of it or change the interface!
594 	// This implementation relies on the current usage!
595 	ASSERT(count == 1);
596 	ASSERT(numBytes <= B_PAGE_SIZE);
597 
598 	page_num_t pageIndex = offset >> PAGE_SHIFT;
599 	swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex);
600 	bool newSlot = slotIndex == SWAP_SLOT_NONE;
601 
602 	// If the page doesn't have any swap space yet, allocate it.
603 	if (newSlot) {
604 		AutoLocker<VMCache> locker(this);
605 		if (fAllocatedSwapSize + B_PAGE_SIZE > fCommittedSwapSize) {
606 			_callback->IOFinished(B_ERROR, true, 0);
607 			return B_ERROR;
608 		}
609 
610 		fAllocatedSwapSize += B_PAGE_SIZE;
611 
612 		slotIndex = swap_slot_alloc(1);
613 	}
614 
615 	// create our callback
616 	WriteCallback* callback = (flags & B_VIP_IO_REQUEST) != 0
617  		? new(vip_io_alloc) WriteCallback(this, _callback)
618 		: new(std::nothrow) WriteCallback(this, _callback);
619 	if (callback == NULL) {
620 		if (newSlot) {
621 			AutoLocker<VMCache> locker(this);
622 			fAllocatedSwapSize -= B_PAGE_SIZE;
623 			locker.Unlock();
624 
625 			swap_slot_dealloc(slotIndex, 1);
626 		}
627 		_callback->IOFinished(B_NO_MEMORY, true, 0);
628 		return B_NO_MEMORY;
629 	}
630 // TODO: If the page already had swap space assigned, we don't need an own
631 // callback.
632 
633 	callback->SetTo(pageIndex, slotIndex, newSlot);
634 
635 	T(WritePage(this, pageIndex, slotIndex));
636 
637 	// write the page asynchrounously
638 	swap_file* swapFile = find_swap_file(slotIndex);
639 	off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE;
640 
641 	return vfs_asynchronous_write_pages(swapFile->vnode, swapFile->cookie, pos,
642 		vecs, 1, numBytes, flags, callback);
643 }
644 
645 
646 bool
647 VMAnonymousCache::CanWritePage(off_t offset)
648 {
649 	// We can write the page, if we have not used all of our committed swap
650 	// space or the page already has a swap slot assigned.
651 	return fAllocatedSwapSize < fCommittedSwapSize
652 		|| _SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE;
653 }
654 
655 
656 status_t
657 VMAnonymousCache::Fault(struct vm_address_space *aspace, off_t offset)
658 {
659 	if (fCanOvercommit && LookupPage(offset) == NULL && !HasPage(offset)) {
660 		if (fGuardedSize > 0) {
661 			uint32 guardOffset;
662 
663 #ifdef STACK_GROWS_DOWNWARDS
664 			guardOffset = 0;
665 #elif defined(STACK_GROWS_UPWARDS)
666 			guardOffset = virtual_size - fGuardedSize;
667 #else
668 #	error Stack direction has not been defined in arch_config.h
669 #endif
670 
671 			// report stack fault, guard page hit!
672 			if (offset >= guardOffset && offset < guardOffset + fGuardedSize) {
673 				TRACE(("stack overflow!\n"));
674 				return B_BAD_ADDRESS;
675 			}
676 		}
677 
678 		if (fPrecommittedPages == 0) {
679 			// try to commit additional swap space/memory
680 			if (swap_space_reserve(B_PAGE_SIZE) == B_PAGE_SIZE)
681 				fCommittedSwapSize += B_PAGE_SIZE;
682 			else if (vm_try_reserve_memory(B_PAGE_SIZE, 0) != B_OK)
683 				return B_NO_MEMORY;
684 
685 			committed_size += B_PAGE_SIZE;
686 		} else
687 			fPrecommittedPages--;
688 	}
689 
690 	// This will cause vm_soft_fault() to handle the fault
691 	return B_BAD_HANDLER;
692 }
693 
694 
695 void
696 VMAnonymousCache::Merge(VMCache* _source)
697 {
698 	VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source);
699 	if (source == NULL) {
700 		panic("VMAnonymousCache::MergeStore(): merge with incompatible cache "
701 			"%p requested", _source);
702 		return;
703 	}
704 
705 	// take over the source' committed size
706 	fCommittedSwapSize += source->fCommittedSwapSize;
707 	source->fCommittedSwapSize = 0;
708 	committed_size += source->committed_size;
709 	source->committed_size = 0;
710 
711 	off_t actualSize = virtual_end - virtual_base;
712 	if (committed_size > actualSize)
713 		_Commit(actualSize);
714 
715 	// Move all not shadowed pages from the source to the consumer cache.
716 
717 	for (VMCachePagesTree::Iterator it = source->pages.GetIterator();
718 			vm_page* page = it.Next();) {
719 		// Note: Removing the current node while iterating through a
720 		// IteratableSplayTree is safe.
721 		vm_page* consumerPage = LookupPage(
722 			(off_t)page->cache_offset << PAGE_SHIFT);
723 		swap_addr_t consumerSwapSlot = _SwapBlockGetAddress(page->cache_offset);
724 		if (consumerPage == NULL && consumerSwapSlot == SWAP_SLOT_NONE) {
725 			// the page is not yet in the consumer cache - move it upwards
726 			source->RemovePage(page);
727 			InsertPage(page, (off_t)page->cache_offset << PAGE_SHIFT);
728 
729 			// If the moved-up page has a swap page associated, we mark it, so
730 			// that the swap page is moved upwards, too. We would lose if the
731 			// page was modified and written to swap, and is now not marked
732 			// modified.
733 			if (source->_SwapBlockGetAddress(page->cache_offset)
734 					!= SWAP_SLOT_NONE) {
735 				page->merge_swap = true;
736 			}
737 #if DEBUG_PAGE_CACHE_TRANSITIONS
738 		} else {
739 			page->debug_flags = 0;
740 			if (consumerPage->state == PAGE_STATE_BUSY)
741 				page->debug_flags |= 0x1;
742 			if (consumerPage->type == PAGE_TYPE_DUMMY)
743 				page->debug_flags |= 0x2;
744 			page->collided_page = consumerPage;
745 			consumerPage->collided_page = page;
746 #endif	// DEBUG_PAGE_CACHE_TRANSITIONS
747 		}
748 	}
749 
750 	// Move all not shadowed swap pages from the source to the consumer cache.
751 
752 	for (off_t offset = source->virtual_base
753 				& ~(off_t)(B_PAGE_SIZE * SWAP_BLOCK_PAGES - 1);
754 			offset < source->virtual_end;
755 			offset += B_PAGE_SIZE * SWAP_BLOCK_PAGES) {
756 
757 		MutexLocker locker(sSwapHashLock);
758 
759 		page_num_t swapBlockPageIndex = offset >> PAGE_SHIFT;
760 		swap_hash_key key = { source, swapBlockPageIndex };
761 		swap_block* sourceSwapBlock = sSwapHashTable.Lookup(key);
762 
763 		if (sourceSwapBlock == NULL)
764 			continue;
765 
766 		// remove the source swap block -- we will either take over the swap
767 		// space (and the block) or free it
768 		sSwapHashTable.RemoveUnchecked(sourceSwapBlock);
769 
770 		key.cache = this;
771 		swap_block* swapBlock = sSwapHashTable.Lookup(key);
772 
773 		locker.Unlock();
774 
775 		for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) {
776 			off_t pageIndex = swapBlockPageIndex + i;
777 			swap_addr_t sourceSlotIndex = sourceSwapBlock->swap_slots[i];
778 
779 			if (sourceSlotIndex == SWAP_SLOT_NONE)
780 				// this page is not swapped out
781 				continue;
782 
783 			vm_page* page = LookupPage((off_t)pageIndex << PAGE_SHIFT);
784 
785 			bool keepSwapPage = true;
786 			if (page != NULL && !page->merge_swap) {
787 				// The consumer already has a page at this index and it wasn't
788 				// one taken over from the source. So we can simply free the
789 				// swap space.
790 				keepSwapPage = false;
791 			} else {
792 				if (page != NULL) {
793 					// The page was taken over from the source cache. Clear the
794 					// indicator flag. We'll take over the swap page too.
795 					page->merge_swap = false;
796 				} else if (swapBlock != NULL
797 						&& swapBlock->swap_slots[i] != SWAP_SLOT_NONE) {
798 					// There's no page in the consumer cache, but a swap page.
799 					// Free the source swap page.
800 					keepSwapPage = false;
801 				}
802 			}
803 
804 			if (!keepSwapPage) {
805 				swap_slot_dealloc(sourceSlotIndex, 1);
806 				sourceSwapBlock->swap_slots[i] = SWAP_SLOT_NONE;
807 				sourceSwapBlock->used--;
808 			}
809 
810 			// We've either freed the source swap page or are going to move it
811 			// to the consumer. At any rate, the source cache doesn't own it
812 			// anymore.
813 			source->fAllocatedSwapSize -= B_PAGE_SIZE;
814 		}
815 
816 		// All source swap pages that have not been freed yet are taken over by
817 		// by the consumer.
818 		fAllocatedSwapSize += B_PAGE_SIZE * (off_t)sourceSwapBlock->used;
819 
820 		if (sourceSwapBlock->used == 0) {
821 			// All swap pages have been freed -- we can discard the source swap
822 			// block.
823 			object_cache_free(sSwapBlockCache, sourceSwapBlock);
824 		} else if (swapBlock == NULL) {
825 			// We need to take over some of the source's swap pages and there's
826 			// no swap block in the consumer cache. Just take over the source
827 			// swap block.
828 			sourceSwapBlock->key.cache = this;
829 			locker.Lock();
830 			sSwapHashTable.InsertUnchecked(sourceSwapBlock);
831 			locker.Unlock();
832 		} else {
833 			// We need to take over some of the source's swap pages and there's
834 			// already swap block in the consumer cache. Copy the respective
835 			// swap addresses and discard the source swap block.
836 			for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) {
837 				if (sourceSwapBlock->swap_slots[i] != SWAP_SLOT_NONE)
838 					swapBlock->swap_slots[i] = sourceSwapBlock->swap_slots[i];
839 			}
840 
841 			object_cache_free(sSwapBlockCache, sourceSwapBlock);
842 		}
843 	}
844 }
845 
846 
847 void
848 VMAnonymousCache::_SwapBlockBuild(off_t startPageIndex,
849 	swap_addr_t startSlotIndex, uint32 count)
850 {
851 	mutex_lock(&sSwapHashLock);
852 
853 	uint32 left = count;
854 	for (uint32 i = 0, j = 0; i < count; i += j) {
855 		off_t pageIndex = startPageIndex + i;
856 		swap_addr_t slotIndex = startSlotIndex + i;
857 
858 		swap_hash_key key = { this, pageIndex };
859 
860 		swap_block *swap = sSwapHashTable.Lookup(key);
861 		while (swap == NULL) {
862 			swap = (swap_block *)object_cache_alloc(sSwapBlockCache,
863 				CACHE_DONT_SLEEP);
864 			if (swap == NULL) {
865 				// Wait a short time until memory is available again.
866 				mutex_unlock(&sSwapHashLock);
867 				snooze(10000);
868 				mutex_lock(&sSwapHashLock);
869 				swap = sSwapHashTable.Lookup(key);
870 				continue;
871 			}
872 
873 			swap->key.cache = this;
874 			swap->key.page_index = pageIndex & ~(off_t)SWAP_BLOCK_MASK;
875 			swap->used = 0;
876 			for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++)
877 				swap->swap_slots[i] = SWAP_SLOT_NONE;
878 
879 			sSwapHashTable.InsertUnchecked(swap);
880 		}
881 
882 		swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
883 		for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) {
884 			swap->swap_slots[blockIndex++] = slotIndex + j;
885 			left--;
886 		}
887 
888 		swap->used += j;
889 	}
890 
891 	mutex_unlock(&sSwapHashLock);
892 }
893 
894 
895 void
896 VMAnonymousCache::_SwapBlockFree(off_t startPageIndex, uint32 count)
897 {
898 	mutex_lock(&sSwapHashLock);
899 
900 	uint32 left = count;
901 	for (uint32 i = 0, j = 0; i < count; i += j) {
902 		off_t pageIndex = startPageIndex + i;
903 		swap_hash_key key = { this, pageIndex };
904 		swap_block *swap = sSwapHashTable.Lookup(key);
905 
906 		ASSERT(swap != NULL);
907 
908 		swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
909 		for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) {
910 			swap->swap_slots[blockIndex++] = SWAP_SLOT_NONE;
911 			left--;
912 		}
913 
914 		swap->used -= j;
915 		if (swap->used == 0) {
916 			sSwapHashTable.RemoveUnchecked(swap);
917 			object_cache_free(sSwapBlockCache, swap);
918 		}
919 	}
920 
921 	mutex_unlock(&sSwapHashLock);
922 }
923 
924 
925 swap_addr_t
926 VMAnonymousCache::_SwapBlockGetAddress(off_t pageIndex)
927 {
928 	mutex_lock(&sSwapHashLock);
929 
930 	swap_hash_key key = { this, pageIndex };
931 	swap_block *swap = sSwapHashTable.Lookup(key);
932 	swap_addr_t slotIndex = SWAP_SLOT_NONE;
933 
934 	if (swap != NULL) {
935 		swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
936 		slotIndex = swap->swap_slots[blockIndex];
937 	}
938 
939 	mutex_unlock(&sSwapHashLock);
940 
941 	return slotIndex;
942 }
943 
944 
945 status_t
946 VMAnonymousCache::_Commit(off_t size)
947 {
948 	TRACE("%p->VMAnonymousCache::_Commit(%lld), already committed: %lld "
949 		"(%lld swap)\n", this, size, committed_size, fCommittedSwapSize);
950 
951 	// Basic strategy: reserve swap space first, only when running out of swap
952 	// space, reserve real memory.
953 
954 	off_t committedMemory = committed_size - fCommittedSwapSize;
955 
956 	// Regardless of whether we're asked to grow or shrink the commitment,
957 	// we always try to reserve as much as possible of the final commitment
958 	// in the swap space.
959 	if (size > fCommittedSwapSize) {
960 		fCommittedSwapSize += swap_space_reserve(size - fCommittedSwapSize);
961 		committed_size = fCommittedSwapSize + committedMemory;
962 		if (size > fCommittedSwapSize) {
963 			TRACE("%p->VMAnonymousCache::_Commit(%lld), reserved only %lld "
964 				"swap\n", this, size, fCommittedSwapSize);
965 		}
966 	}
967 
968 	if (committed_size == size)
969 		return B_OK;
970 
971 	if (committed_size > size) {
972 		// The commitment shrinks -- unreserve real memory first.
973 		off_t toUnreserve = committed_size - size;
974 		if (committedMemory > 0) {
975 			off_t unreserved = min_c(toUnreserve, committedMemory);
976 			vm_unreserve_memory(unreserved);
977 			committedMemory -= unreserved;
978 			committed_size -= unreserved;
979 			toUnreserve -= unreserved;
980 		}
981 
982 		// Unreserve swap space.
983 		if (toUnreserve > 0) {
984 			swap_space_unreserve(toUnreserve);
985 			fCommittedSwapSize -= toUnreserve;
986 			committed_size -= toUnreserve;
987 		}
988 
989 		return B_OK;
990 	}
991 
992 	// The commitment grows -- we have already tried to reserve swap space at
993 	// the start of the method, so we try to reserve real memory, now.
994 
995 	off_t toReserve = size - committed_size;
996 	if (vm_try_reserve_memory(toReserve, 1000000) != B_OK) {
997 		dprintf("%p->VMAnonymousCache::_Commit(%lld): Failed to reserve %lld "
998 			"bytes of RAM\n", this, size, toReserve);
999 		return B_NO_MEMORY;
1000 	}
1001 
1002 	committed_size = size;
1003 	return B_OK;
1004 }
1005 
1006 
1007 // #pragma mark -
1008 
1009 
1010 status_t
1011 swap_file_add(const char *path)
1012 {
1013 	// open the file
1014 	int fd = open(path, O_RDWR | O_NOCACHE, S_IRUSR | S_IWUSR);
1015 	if (fd < 0)
1016 		return errno;
1017 
1018 	// fstat() it and check whether we can use it
1019 	struct stat st;
1020 	if (fstat(fd, &st) < 0) {
1021 		close(fd);
1022 		return errno;
1023 	}
1024 
1025 	if (!(S_ISREG(st.st_mode) || S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
1026 		close(fd);
1027 		return B_BAD_VALUE;
1028 	}
1029 
1030 	if (st.st_size < B_PAGE_SIZE) {
1031 		close(fd);
1032 		return B_BAD_VALUE;
1033 	}
1034 
1035 	// get file descriptor, vnode, and cookie
1036 	file_descriptor* descriptor = get_fd(get_current_io_context(true), fd);
1037 	put_fd(descriptor);
1038 
1039 	vnode *node = fd_vnode(descriptor);
1040 	if (node == NULL) {
1041 		close(fd);
1042 		return B_BAD_VALUE;
1043 	}
1044 
1045 	// do the allocations and prepare the swap_file structure
1046 	swap_file *swap = (swap_file *)malloc(sizeof(swap_file));
1047 	if (swap == NULL) {
1048 		close(fd);
1049 		return B_NO_MEMORY;
1050 	}
1051 
1052 	swap->fd = fd;
1053 	swap->vnode = node;
1054 	swap->cookie = descriptor->cookie;
1055 
1056 	uint32 pageCount = st.st_size >> PAGE_SHIFT;
1057 	swap->bmp = radix_bitmap_create(pageCount);
1058 	if (swap->bmp == NULL) {
1059 		free(swap);
1060 		close(fd);
1061 		return B_NO_MEMORY;
1062 	}
1063 
1064 	// set slot index and add this file to swap file list
1065 	mutex_lock(&sSwapFileListLock);
1066 	// TODO: Also check whether the swap file is already registered!
1067 	if (sSwapFileList.IsEmpty()) {
1068 		swap->first_slot = 0;
1069 		swap->last_slot = pageCount;
1070 	} else {
1071 		// leave one page gap between two swap files
1072 		swap->first_slot = sSwapFileList.Last()->last_slot + 1;
1073 		swap->last_slot = swap->first_slot + pageCount;
1074 	}
1075 	sSwapFileList.Add(swap);
1076 	sSwapFileCount++;
1077 	mutex_unlock(&sSwapFileListLock);
1078 
1079 	mutex_lock(&sAvailSwapSpaceLock);
1080 	sAvailSwapSpace += (off_t)pageCount * B_PAGE_SIZE;
1081 	mutex_unlock(&sAvailSwapSpaceLock);
1082 
1083 	return B_OK;
1084 }
1085 
1086 
1087 status_t
1088 swap_file_delete(const char *path)
1089 {
1090 	vnode *node = NULL;
1091 	status_t status = vfs_get_vnode_from_path(path, true, &node);
1092 	if (status != B_OK)
1093 		return status;
1094 
1095 	MutexLocker locker(sSwapFileListLock);
1096 
1097 	swap_file *swapFile = NULL;
1098 	for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
1099 			(swapFile = it.Next()) != NULL;) {
1100 		if (swapFile->vnode == node)
1101 			break;
1102 	}
1103 
1104 	vfs_put_vnode(node);
1105 
1106 	if (swapFile == NULL)
1107 		return B_ERROR;
1108 
1109 	// if this file is currently used, we can't delete
1110 	// TODO: mark this swap file deleting, and remove it after releasing
1111 	// all the swap space
1112 	if (swapFile->bmp->free_slots < swapFile->last_slot - swapFile->first_slot)
1113 		return B_ERROR;
1114 
1115 	sSwapFileList.Remove(swapFile);
1116 	sSwapFileCount--;
1117 	locker.Unlock();
1118 
1119 	mutex_lock(&sAvailSwapSpaceLock);
1120 	sAvailSwapSpace -= (off_t)(swapFile->last_slot - swapFile->first_slot)
1121 		* PAGE_SIZE;
1122 	mutex_unlock(&sAvailSwapSpaceLock);
1123 
1124 	close(swapFile->fd);
1125 	radix_bitmap_destroy(swapFile->bmp);
1126 	free(swapFile);
1127 
1128 	return B_OK;
1129 }
1130 
1131 
1132 void
1133 swap_init(void)
1134 {
1135 	// create swap block cache
1136 	sSwapBlockCache = create_object_cache("swapblock",
1137 			sizeof(swap_block), sizeof(void*), NULL, NULL, NULL);
1138 	if (sSwapBlockCache == NULL)
1139 		panic("swap_init(): can't create object cache for swap blocks\n");
1140 
1141 	status_t error = object_cache_set_minimum_reserve(sSwapBlockCache,
1142 		MIN_SWAP_BLOCK_RESERVE);
1143 	if (error != B_OK) {
1144 		panic("swap_init(): object_cache_set_minimum_reserve() failed: %s",
1145 			strerror(error));
1146 	}
1147 
1148 	// init swap hash table
1149 	sSwapHashTable.Init(INITIAL_SWAP_HASH_SIZE);
1150 	mutex_init(&sSwapHashLock, "swaphash");
1151 
1152 	error = register_resource_resizer(swap_hash_resizer, NULL,
1153 		SWAP_HASH_RESIZE_INTERVAL);
1154 	if (error != B_OK) {
1155 		panic("swap_init(): Failed to register swap hash resizer: %s",
1156 			strerror(error));
1157 	}
1158 
1159 	// init swap file list
1160 	mutex_init(&sSwapFileListLock, "swaplist");
1161 	sSwapFileAlloc = NULL;
1162 	sSwapFileCount = 0;
1163 
1164 	// init available swap space
1165 	mutex_init(&sAvailSwapSpaceLock, "avail swap space");
1166 	sAvailSwapSpace = 0;
1167 
1168 	add_debugger_command_etc("swap", &dump_swap_info,
1169 		"Print infos about the swap usage",
1170 		"\n"
1171 		"Print infos about the swap usage.\n", 0);
1172 }
1173 
1174 
1175 void
1176 swap_init_post_modules()
1177 {
1178 	// Never try to create a swap file on a read-only device - when booting
1179 	// from CD, the write overlay is used.
1180 	if (gReadOnlyBootDevice)
1181 		return;
1182 
1183 	off_t size = 0;
1184 
1185 	void *settings = load_driver_settings("virtual_memory");
1186 	if (settings != NULL) {
1187 		if (!get_driver_boolean_parameter(settings, "vm", false, false))
1188 			return;
1189 
1190 		const char *string = get_driver_parameter(settings, "swap_size", NULL,
1191 			NULL);
1192 		size = string ? atoll(string) : 0;
1193 
1194 		unload_driver_settings(settings);
1195 	} else
1196 		size = (off_t)vm_page_num_pages() * B_PAGE_SIZE * 2;
1197 
1198 	if (size < B_PAGE_SIZE)
1199 		return;
1200 
1201 	int fd = open("/var/swap", O_RDWR | O_CREAT | O_NOCACHE, S_IRUSR | S_IWUSR);
1202 	if (fd < 0) {
1203 		dprintf("Can't open/create /var/swap: %s\n", strerror(errno));
1204 		return;
1205 	}
1206 
1207 	struct stat stat;
1208 	stat.st_size = size;
1209 	status_t error = _kern_write_stat(fd, NULL, false, &stat,
1210 		sizeof(struct stat), B_STAT_SIZE | B_STAT_SIZE_INSECURE);
1211 	if (error != B_OK) {
1212 		dprintf("Failed to resize /var/swap to %lld bytes: %s\n", size,
1213 			strerror(error));
1214 	}
1215 
1216 	close(fd);
1217 
1218 	error = swap_file_add("/var/swap");
1219 	if (error != B_OK)
1220 		dprintf("Failed to add swap file /var/swap: %s\n", strerror(error));
1221 }
1222 
1223 
1224 //! Used by page daemon to free swap space.
1225 bool
1226 swap_free_page_swap_space(vm_page *page)
1227 {
1228 	VMAnonymousCache *cache = dynamic_cast<VMAnonymousCache *>(page->cache);
1229 	if (cache == NULL)
1230 		return false;
1231 
1232 	swap_addr_t slotIndex = cache->_SwapBlockGetAddress(page->cache_offset);
1233 	if (slotIndex == SWAP_SLOT_NONE)
1234 		return false;
1235 
1236 	swap_slot_dealloc(slotIndex, 1);
1237 	cache->fAllocatedSwapSize -= B_PAGE_SIZE;
1238 	cache->_SwapBlockFree(page->cache_offset, 1);
1239 
1240   	return true;
1241 }
1242 
1243 
1244 uint32
1245 swap_available_pages()
1246 {
1247 	mutex_lock(&sAvailSwapSpaceLock);
1248 	uint32 avail = sAvailSwapSpace >> PAGE_SHIFT;
1249 	mutex_unlock(&sAvailSwapSpaceLock);
1250 
1251 	return avail;
1252 }
1253 
1254 
1255 uint32
1256 swap_total_swap_pages()
1257 {
1258 	mutex_lock(&sSwapFileListLock);
1259 
1260 	uint32 totalSwapSlots = 0;
1261 	for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
1262 			swap_file *swapFile = it.Next();)
1263 		totalSwapSlots += swapFile->last_slot - swapFile->first_slot;
1264 
1265 	mutex_unlock(&sSwapFileListLock);
1266 
1267 	return totalSwapSlots;
1268 }
1269 
1270 #endif	// ENABLE_SWAP_SUPPORT
1271 
1272 void
1273 swap_get_info(struct system_memory_info *info)
1274 {
1275 #if ENABLE_SWAP_SUPPORT
1276 	info->max_swap_space = (uint64)swap_total_swap_pages() * B_PAGE_SIZE;
1277 	info->free_swap_space = (uint64)swap_available_pages() * B_PAGE_SIZE;
1278 #else
1279 	info->max_swap_space = 0;
1280 	info->free_swap_space = 0;
1281 #endif
1282 }
1283 
1284