1 /*
2 * Copyright 2008, Zhao Shuai, upczhsh@163.com.
3 * Copyright 2008-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
4 * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
5 * Distributed under the terms of the MIT License.
6 *
7 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
8 * Distributed under the terms of the NewOS License.
9 *
10 * Copyright 2011-2012 Haiku, Inc. All rights reserved.
11 * Distributed under the terms of the MIT License.
12 *
13 * Authors:
14 * Hamish Morrison, hamish@lavabit.com
15 * Alexander von Gluck IV, kallisti5@unixzen.com
16 */
17
18
19 #include "VMAnonymousCache.h"
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <unistd.h>
26
27 #include <FindDirectory.h>
28 #include <KernelExport.h>
29 #include <NodeMonitor.h>
30
31 #include <arch_config.h>
32 #include <boot_device.h>
33 #include <disk_device_manager/KDiskDevice.h>
34 #include <disk_device_manager/KDiskDeviceManager.h>
35 #include <disk_device_manager/KDiskSystem.h>
36 #include <disk_device_manager/KPartitionVisitor.h>
37 #include <driver_settings.h>
38 #include <fs/fd.h>
39 #include <fs/KPath.h>
40 #include <fs_info.h>
41 #include <fs_interface.h>
42 #include <heap.h>
43 #include <kernel_daemon.h>
44 #include <slab/Slab.h>
45 #include <syscalls.h>
46 #include <system_info.h>
47 #include <thread.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <util/Bitmap.h>
51 #include <util/DoublyLinkedList.h>
52 #include <util/OpenHashTable.h>
53 #include <util/RadixBitmap.h>
54 #include <vfs.h>
55 #include <vm/vm.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_priv.h>
58 #include <vm/VMAddressSpace.h>
59
60 #include "IORequest.h"
61
62
63 #if ENABLE_SWAP_SUPPORT
64
65 //#define TRACE_VM_ANONYMOUS_CACHE
66 #ifdef TRACE_VM_ANONYMOUS_CACHE
67 # define TRACE(x...) dprintf(x)
68 #else
69 # define TRACE(x...) do { } while (false)
70 #endif
71
72
73 // number of free swap blocks the object cache shall minimally have
74 #define MIN_SWAP_BLOCK_RESERVE 4096
75
76 // interval the has resizer is triggered (in 0.1s)
77 #define SWAP_HASH_RESIZE_INTERVAL 5
78
79 #define INITIAL_SWAP_HASH_SIZE 1024
80
81 #define SWAP_SLOT_NONE RADIX_SLOT_NONE
82
83 #define SWAP_BLOCK_PAGES 32
84 #define SWAP_BLOCK_SHIFT 5 /* 1 << SWAP_BLOCK_SHIFT == SWAP_BLOCK_PAGES */
85 #define SWAP_BLOCK_MASK (SWAP_BLOCK_PAGES - 1)
86
87
88 static const char* const kDefaultSwapPath = "/var/swap";
89
90 struct swap_file : DoublyLinkedListLinkImpl<swap_file> {
91 int fd;
92 struct vnode* vnode;
93 void* cookie;
94 swap_addr_t first_slot;
95 swap_addr_t last_slot;
96 radix_bitmap* bmp;
97 };
98
99 struct swap_hash_key {
100 VMAnonymousCache *cache;
101 off_t page_index; // page index in the cache
102 };
103
104 // Each swap block contains swap address information for
105 // SWAP_BLOCK_PAGES continuous pages from the same cache
106 struct swap_block {
107 swap_block* hash_link;
108 swap_hash_key key;
109 uint32 used;
110 swap_addr_t swap_slots[SWAP_BLOCK_PAGES];
111 };
112
113 struct SwapHashTableDefinition {
114 typedef swap_hash_key KeyType;
115 typedef swap_block ValueType;
116
SwapHashTableDefinitionSwapHashTableDefinition117 SwapHashTableDefinition() {}
118
HashKeySwapHashTableDefinition119 size_t HashKey(const swap_hash_key& key) const
120 {
121 off_t blockIndex = key.page_index >> SWAP_BLOCK_SHIFT;
122 VMAnonymousCache* cache = key.cache;
123 return blockIndex ^ (size_t)(int*)cache;
124 }
125
HashSwapHashTableDefinition126 size_t Hash(const swap_block* value) const
127 {
128 return HashKey(value->key);
129 }
130
CompareSwapHashTableDefinition131 bool Compare(const swap_hash_key& key, const swap_block* value) const
132 {
133 return (key.page_index & ~(off_t)SWAP_BLOCK_MASK)
134 == (value->key.page_index & ~(off_t)SWAP_BLOCK_MASK)
135 && key.cache == value->key.cache;
136 }
137
GetLinkSwapHashTableDefinition138 swap_block*& GetLink(swap_block* value) const
139 {
140 return value->hash_link;
141 }
142 };
143
144 typedef BOpenHashTable<SwapHashTableDefinition> SwapHashTable;
145 typedef DoublyLinkedList<swap_file> SwapFileList;
146
147 static SwapHashTable sSwapHashTable;
148 static rw_lock sSwapHashLock;
149
150 static SwapFileList sSwapFileList;
151 static mutex sSwapFileListLock;
152 static swap_file* sSwapFileAlloc = NULL; // allocate from here
153 static uint32 sSwapFileCount = 0;
154
155 static off_t sAvailSwapSpace = 0;
156 static mutex sAvailSwapSpaceLock;
157
158 static object_cache* sSwapBlockCache;
159
160
161 #if SWAP_TRACING
162 namespace SwapTracing {
163
164 class SwapTraceEntry : public AbstractTraceEntry {
165 public:
SwapTraceEntry(VMAnonymousCache * cache)166 SwapTraceEntry(VMAnonymousCache* cache)
167 :
168 fCache(cache)
169 {
170 }
171
172 protected:
173 VMAnonymousCache* fCache;
174 };
175
176
177 class ReadPage : public SwapTraceEntry {
178 public:
ReadPage(VMAnonymousCache * cache,page_num_t pageIndex,swap_addr_t swapSlotIndex)179 ReadPage(VMAnonymousCache* cache, page_num_t pageIndex,
180 swap_addr_t swapSlotIndex)
181 :
182 SwapTraceEntry(cache),
183 fPageIndex(pageIndex),
184 fSwapSlotIndex(swapSlotIndex)
185 {
186 Initialized();
187 }
188
AddDump(TraceOutput & out)189 virtual void AddDump(TraceOutput& out)
190 {
191 out.Print("swap read: cache %p, page index: %lu <- swap slot: %lu",
192 fCache, fPageIndex, fSwapSlotIndex);
193 }
194
195 private:
196 page_num_t fPageIndex;
197 swap_addr_t fSwapSlotIndex;
198 };
199
200
201 class WritePage : public SwapTraceEntry {
202 public:
WritePage(VMAnonymousCache * cache,page_num_t pageIndex,swap_addr_t swapSlotIndex)203 WritePage(VMAnonymousCache* cache, page_num_t pageIndex,
204 swap_addr_t swapSlotIndex)
205 :
206 SwapTraceEntry(cache),
207 fPageIndex(pageIndex),
208 fSwapSlotIndex(swapSlotIndex)
209 {
210 Initialized();
211 }
212
AddDump(TraceOutput & out)213 virtual void AddDump(TraceOutput& out)
214 {
215 out.Print("swap write: cache %p, page index: %lu -> swap slot: %lu",
216 fCache, fPageIndex, fSwapSlotIndex);
217 }
218
219 private:
220 page_num_t fPageIndex;
221 swap_addr_t fSwapSlotIndex;
222 };
223
224 } // namespace SwapTracing
225
226 # define T(x) new(std::nothrow) SwapTracing::x;
227 #else
228 # define T(x) ;
229 #endif
230
231
232 static int
dump_swap_info(int argc,char ** argv)233 dump_swap_info(int argc, char** argv)
234 {
235 swap_addr_t totalSwapPages = 0;
236 swap_addr_t freeSwapPages = 0;
237
238 kprintf("swap files:\n");
239
240 for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
241 swap_file* file = it.Next();) {
242 swap_addr_t total = file->last_slot - file->first_slot;
243 kprintf(" vnode: %p, pages: total: %" B_PRIu32 ", free: %" B_PRIu32
244 "\n", file->vnode, total, file->bmp->free_slots);
245
246 totalSwapPages += total;
247 freeSwapPages += file->bmp->free_slots;
248 }
249
250 kprintf("\n");
251 kprintf("swap space in pages:\n");
252 kprintf("total: %9" B_PRIu32 "\n", totalSwapPages);
253 kprintf("available: %9" B_PRIdOFF "\n", sAvailSwapSpace / B_PAGE_SIZE);
254 kprintf("reserved: %9" B_PRIdOFF "\n",
255 totalSwapPages - sAvailSwapSpace / B_PAGE_SIZE);
256 kprintf("used: %9" B_PRIu32 "\n", totalSwapPages - freeSwapPages);
257 kprintf("free: %9" B_PRIu32 "\n", freeSwapPages);
258
259 return 0;
260 }
261
262
263 static swap_addr_t
swap_slot_alloc(uint32 count)264 swap_slot_alloc(uint32 count)
265 {
266 mutex_lock(&sSwapFileListLock);
267
268 if (sSwapFileList.IsEmpty()) {
269 mutex_unlock(&sSwapFileListLock);
270 panic("swap_slot_alloc(): no swap file in the system\n");
271 return SWAP_SLOT_NONE;
272 }
273
274 // since radix bitmap could not handle more than 32 pages, we return
275 // SWAP_SLOT_NONE, this forces Write() adjust allocation amount
276 if (count > BITMAP_RADIX) {
277 mutex_unlock(&sSwapFileListLock);
278 return SWAP_SLOT_NONE;
279 }
280
281 swap_addr_t j, addr = SWAP_SLOT_NONE;
282 for (j = 0; j < sSwapFileCount; j++) {
283 if (sSwapFileAlloc == NULL)
284 sSwapFileAlloc = sSwapFileList.First();
285
286 addr = radix_bitmap_alloc(sSwapFileAlloc->bmp, count);
287 if (addr != SWAP_SLOT_NONE) {
288 addr += sSwapFileAlloc->first_slot;
289 break;
290 }
291
292 // this swap_file is full, find another
293 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc);
294 }
295
296 if (j == sSwapFileCount) {
297 mutex_unlock(&sSwapFileListLock);
298 panic("swap_slot_alloc: swap space exhausted!\n");
299 return SWAP_SLOT_NONE;
300 }
301
302 // if this swap file has used more than 90% percent of its space
303 // switch to another
304 if (sSwapFileAlloc->bmp->free_slots
305 < (sSwapFileAlloc->last_slot - sSwapFileAlloc->first_slot) / 10) {
306 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc);
307 }
308
309 mutex_unlock(&sSwapFileListLock);
310
311 return addr;
312 }
313
314
315 static swap_file*
find_swap_file(swap_addr_t slotIndex)316 find_swap_file(swap_addr_t slotIndex)
317 {
318 for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
319 swap_file* swapFile = it.Next();) {
320 if (slotIndex >= swapFile->first_slot
321 && slotIndex < swapFile->last_slot) {
322 return swapFile;
323 }
324 }
325
326 panic("find_swap_file(): can't find swap file for slot %" B_PRIu32 "\n",
327 slotIndex);
328 return NULL;
329 }
330
331
332 static void
swap_slot_dealloc(swap_addr_t slotIndex,uint32 count)333 swap_slot_dealloc(swap_addr_t slotIndex, uint32 count)
334 {
335 if (slotIndex == SWAP_SLOT_NONE)
336 return;
337
338 mutex_lock(&sSwapFileListLock);
339 swap_file* swapFile = find_swap_file(slotIndex);
340 slotIndex -= swapFile->first_slot;
341 radix_bitmap_dealloc(swapFile->bmp, slotIndex, count);
342 mutex_unlock(&sSwapFileListLock);
343 }
344
345
346 static off_t
swap_space_reserve(off_t amount)347 swap_space_reserve(off_t amount)
348 {
349 mutex_lock(&sAvailSwapSpaceLock);
350 if (sAvailSwapSpace >= amount)
351 sAvailSwapSpace -= amount;
352 else {
353 amount = sAvailSwapSpace;
354 sAvailSwapSpace = 0;
355 }
356 mutex_unlock(&sAvailSwapSpaceLock);
357
358 return amount;
359 }
360
361
362 static void
swap_space_unreserve(off_t amount)363 swap_space_unreserve(off_t amount)
364 {
365 mutex_lock(&sAvailSwapSpaceLock);
366 sAvailSwapSpace += amount;
367 mutex_unlock(&sAvailSwapSpaceLock);
368 }
369
370
371 static void
swap_hash_resizer(void *,int)372 swap_hash_resizer(void*, int)
373 {
374 WriteLocker locker(sSwapHashLock);
375
376 size_t size;
377 void* allocation;
378
379 do {
380 size = sSwapHashTable.ResizeNeeded();
381 if (size == 0)
382 return;
383
384 locker.Unlock();
385
386 allocation = malloc(size);
387 if (allocation == NULL)
388 return;
389
390 locker.Lock();
391
392 } while (!sSwapHashTable.Resize(allocation, size));
393 }
394
395
396 // #pragma mark -
397
398
399 class VMAnonymousCache::WriteCallback : public StackableAsyncIOCallback {
400 public:
WriteCallback(VMAnonymousCache * cache,AsyncIOCallback * callback)401 WriteCallback(VMAnonymousCache* cache, AsyncIOCallback* callback)
402 :
403 StackableAsyncIOCallback(callback),
404 fCache(cache)
405 {
406 }
407
SetTo(page_num_t pageIndex,swap_addr_t slotIndex,bool newSlot)408 void SetTo(page_num_t pageIndex, swap_addr_t slotIndex, bool newSlot)
409 {
410 fPageIndex = pageIndex;
411 fSlotIndex = slotIndex;
412 fNewSlot = newSlot;
413 }
414
IOFinished(status_t status,bool partialTransfer,generic_size_t bytesTransferred)415 virtual void IOFinished(status_t status, bool partialTransfer,
416 generic_size_t bytesTransferred)
417 {
418 if (fNewSlot) {
419 if (status == B_OK) {
420 fCache->_SwapBlockBuild(fPageIndex, fSlotIndex, 1);
421 } else {
422 AutoLocker<VMCache> locker(fCache);
423 fCache->fAllocatedSwapSize -= B_PAGE_SIZE;
424 locker.Unlock();
425
426 swap_slot_dealloc(fSlotIndex, 1);
427 }
428 }
429
430 fNextCallback->IOFinished(status, partialTransfer, bytesTransferred);
431 delete this;
432 }
433
434 private:
435 VMAnonymousCache* fCache;
436 page_num_t fPageIndex;
437 swap_addr_t fSlotIndex;
438 bool fNewSlot;
439 };
440
441
442 // #pragma mark -
443
444
~VMAnonymousCache()445 VMAnonymousCache::~VMAnonymousCache()
446 {
447 delete fNoSwapPages;
448 fNoSwapPages = NULL;
449
450 _FreeSwapPageRange(virtual_base, virtual_end, false);
451 swap_space_unreserve(fCommittedSwapSize);
452 if (committed_size > fCommittedSwapSize)
453 vm_unreserve_memory(committed_size - fCommittedSwapSize);
454 }
455
456
457 status_t
Init(bool canOvercommit,int32 numPrecommittedPages,int32 numGuardPages,uint32 allocationFlags)458 VMAnonymousCache::Init(bool canOvercommit, int32 numPrecommittedPages,
459 int32 numGuardPages, uint32 allocationFlags)
460 {
461 TRACE("%p->VMAnonymousCache::Init(canOvercommit = %s, "
462 "numPrecommittedPages = %" B_PRId32 ", numGuardPages = %" B_PRId32
463 ")\n", this, canOvercommit ? "yes" : "no", numPrecommittedPages,
464 numGuardPages);
465
466 status_t error = VMCache::Init(CACHE_TYPE_RAM, allocationFlags);
467 if (error != B_OK)
468 return error;
469
470 fCanOvercommit = canOvercommit;
471 fHasPrecommitted = false;
472 fPrecommittedPages = min_c(numPrecommittedPages, 255);
473 fNoSwapPages = NULL;
474 fGuardedSize = numGuardPages * B_PAGE_SIZE;
475 fCommittedSwapSize = 0;
476 fAllocatedSwapSize = 0;
477
478 return B_OK;
479 }
480
481
482 status_t
SetCanSwapPages(off_t base,size_t size,bool canSwap)483 VMAnonymousCache::SetCanSwapPages(off_t base, size_t size, bool canSwap)
484 {
485 const page_num_t first = base >> PAGE_SHIFT;
486 const size_t count = PAGE_ALIGN(size + ((first << PAGE_SHIFT) - base)) >> PAGE_SHIFT;
487
488 if (count == 0)
489 return B_OK;
490 if (canSwap && fNoSwapPages == NULL)
491 return B_OK;
492
493 if (fNoSwapPages == NULL)
494 fNoSwapPages = new(std::nothrow) Bitmap(0);
495 if (fNoSwapPages == NULL)
496 return B_NO_MEMORY;
497
498 const page_num_t pageCount = PAGE_ALIGN(virtual_end) >> PAGE_SHIFT;
499
500 if (fNoSwapPages->Resize(pageCount) != B_OK)
501 return B_NO_MEMORY;
502
503 for (size_t i = 0; i < count; i++) {
504 if (canSwap)
505 fNoSwapPages->Clear(first + i);
506 else
507 fNoSwapPages->Set(first + i);
508 }
509
510 if (fNoSwapPages->GetHighestSet() < 0) {
511 delete fNoSwapPages;
512 fNoSwapPages = NULL;
513 }
514 return B_OK;
515 }
516
517
518 void
_FreeSwapPageRange(off_t fromOffset,off_t toOffset,bool skipBusyPages)519 VMAnonymousCache::_FreeSwapPageRange(off_t fromOffset, off_t toOffset,
520 bool skipBusyPages)
521 {
522 swap_block* swapBlock = NULL;
523 off_t toIndex = toOffset >> PAGE_SHIFT;
524 for (off_t pageIndex = fromOffset >> PAGE_SHIFT;
525 pageIndex < toIndex && fAllocatedSwapSize > 0; pageIndex++) {
526
527 WriteLocker locker(sSwapHashLock);
528
529 // Get the swap slot index for the page.
530 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
531 if (swapBlock == NULL || blockIndex == 0) {
532 swap_hash_key key = { this, pageIndex };
533 swapBlock = sSwapHashTable.Lookup(key);
534
535 if (swapBlock == NULL) {
536 pageIndex = ROUNDUP(pageIndex + 1, SWAP_BLOCK_PAGES) - 1;
537 continue;
538 }
539 }
540
541 swap_addr_t slotIndex = swapBlock->swap_slots[blockIndex];
542 if (slotIndex == SWAP_SLOT_NONE)
543 continue;
544
545 if (skipBusyPages) {
546 vm_page* page = LookupPage(pageIndex * B_PAGE_SIZE);
547 if (page != NULL && page->busy) {
548 // TODO: We skip (i.e. leak) swap space of busy pages, since
549 // there could be I/O going on (paging in/out). Waiting is
550 // not an option as 1. unlocking the cache means that new
551 // swap pages could be added in a range we've already
552 // cleared (since the cache still has the old size) and 2.
553 // we'd risk a deadlock in case we come from the file cache
554 // and the FS holds the node's write-lock. We should mark
555 // the page invalid and let the one responsible clean up.
556 // There's just no such mechanism yet.
557 continue;
558 }
559 }
560
561 swap_slot_dealloc(slotIndex, 1);
562 fAllocatedSwapSize -= B_PAGE_SIZE;
563
564 swapBlock->swap_slots[blockIndex] = SWAP_SLOT_NONE;
565 if (--swapBlock->used == 0) {
566 // All swap pages have been freed -- we can discard the swap block.
567 sSwapHashTable.RemoveUnchecked(swapBlock);
568 object_cache_free(sSwapBlockCache, swapBlock,
569 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE);
570
571 // There are no swap pages for possibly remaining pages, skip to the
572 // next block.
573 pageIndex = ROUNDUP(pageIndex + 1, SWAP_BLOCK_PAGES) - 1;
574 swapBlock = NULL;
575 }
576 }
577 }
578
579
580 status_t
Resize(off_t newSize,int priority)581 VMAnonymousCache::Resize(off_t newSize, int priority)
582 {
583 if (fNoSwapPages != NULL) {
584 if (fNoSwapPages->Resize(PAGE_ALIGN(newSize) >> PAGE_SHIFT) != B_OK)
585 return B_NO_MEMORY;
586 }
587
588 _FreeSwapPageRange(newSize + B_PAGE_SIZE - 1,
589 virtual_end + B_PAGE_SIZE - 1);
590 return VMCache::Resize(newSize, priority);
591 }
592
593
594 status_t
Rebase(off_t newBase,int priority)595 VMAnonymousCache::Rebase(off_t newBase, int priority)
596 {
597 if (fNoSwapPages != NULL) {
598 const ssize_t sizeDifference = (newBase >> PAGE_SHIFT) - (virtual_base >> PAGE_SHIFT);
599 fNoSwapPages->Shift(sizeDifference);
600 }
601
602 _FreeSwapPageRange(virtual_base, newBase);
603 return VMCache::Rebase(newBase, priority);
604 }
605
606
607 status_t
Discard(off_t offset,off_t size)608 VMAnonymousCache::Discard(off_t offset, off_t size)
609 {
610 _FreeSwapPageRange(offset, offset + size);
611 return VMCache::Discard(offset, size);
612 }
613
614
615 /*! Moves the swap pages for the given range from the source cache into this
616 cache. Both caches must be locked.
617 */
618 status_t
Adopt(VMCache * _source,off_t offset,off_t size,off_t newOffset)619 VMAnonymousCache::Adopt(VMCache* _source, off_t offset, off_t size,
620 off_t newOffset)
621 {
622 VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source);
623 if (source == NULL) {
624 panic("VMAnonymousCache::Adopt(): adopt from incompatible cache %p "
625 "requested", _source);
626 return B_ERROR;
627 }
628
629 off_t pageIndex = newOffset >> PAGE_SHIFT;
630 off_t sourcePageIndex = offset >> PAGE_SHIFT;
631 off_t sourceEndPageIndex = (offset + size + B_PAGE_SIZE - 1) >> PAGE_SHIFT;
632 swap_block* swapBlock = NULL;
633
634 WriteLocker locker(sSwapHashLock);
635
636 while (sourcePageIndex < sourceEndPageIndex
637 && source->fAllocatedSwapSize > 0) {
638 swap_addr_t left
639 = SWAP_BLOCK_PAGES - (sourcePageIndex & SWAP_BLOCK_MASK);
640
641 swap_hash_key sourceKey = { source, sourcePageIndex };
642 swap_block* sourceSwapBlock = sSwapHashTable.Lookup(sourceKey);
643 if (sourceSwapBlock == NULL || sourceSwapBlock->used == 0) {
644 sourcePageIndex += left;
645 pageIndex += left;
646 swapBlock = NULL;
647 continue;
648 }
649
650 for (; left > 0 && sourceSwapBlock->used > 0;
651 left--, sourcePageIndex++, pageIndex++) {
652
653 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
654 if (swapBlock == NULL || blockIndex == 0) {
655 swap_hash_key key = { this, pageIndex };
656 swapBlock = sSwapHashTable.Lookup(key);
657
658 if (swapBlock == NULL) {
659 swapBlock = (swap_block*)object_cache_alloc(sSwapBlockCache,
660 CACHE_DONT_WAIT_FOR_MEMORY
661 | CACHE_DONT_LOCK_KERNEL_SPACE);
662 if (swapBlock == NULL)
663 return B_NO_MEMORY;
664
665 swapBlock->key.cache = this;
666 swapBlock->key.page_index
667 = pageIndex & ~(off_t)SWAP_BLOCK_MASK;
668 swapBlock->used = 0;
669 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++)
670 swapBlock->swap_slots[i] = SWAP_SLOT_NONE;
671
672 sSwapHashTable.InsertUnchecked(swapBlock);
673 }
674 }
675
676 swap_addr_t sourceBlockIndex = sourcePageIndex & SWAP_BLOCK_MASK;
677 swap_addr_t slotIndex
678 = sourceSwapBlock->swap_slots[sourceBlockIndex];
679 if (slotIndex == SWAP_SLOT_NONE)
680 continue;
681
682 ASSERT(swapBlock->swap_slots[blockIndex] == SWAP_SLOT_NONE);
683
684 swapBlock->swap_slots[blockIndex] = slotIndex;
685 swapBlock->used++;
686 fAllocatedSwapSize += B_PAGE_SIZE;
687
688 sourceSwapBlock->swap_slots[sourceBlockIndex] = SWAP_SLOT_NONE;
689 sourceSwapBlock->used--;
690 source->fAllocatedSwapSize -= B_PAGE_SIZE;
691
692 TRACE("adopted slot %#" B_PRIx32 " from %p at page %" B_PRIdOFF
693 " to %p at page %" B_PRIdOFF "\n", slotIndex, source,
694 sourcePageIndex, this, pageIndex);
695 }
696
697 if (left > 0) {
698 sourcePageIndex += left;
699 pageIndex += left;
700 swapBlock = NULL;
701 }
702
703 if (sourceSwapBlock->used == 0) {
704 // All swap pages have been adopted, we can discard the swap block.
705 sSwapHashTable.RemoveUnchecked(sourceSwapBlock);
706 object_cache_free(sSwapBlockCache, sourceSwapBlock,
707 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE);
708 }
709 }
710
711 locker.Unlock();
712
713 return VMCache::Adopt(source, offset, size, newOffset);
714 }
715
716
717 status_t
Commit(off_t size,int priority)718 VMAnonymousCache::Commit(off_t size, int priority)
719 {
720 TRACE("%p->VMAnonymousCache::Commit(%" B_PRIdOFF ")\n", this, size);
721
722 AssertLocked();
723
724 // If we can overcommit, we don't commit here, but in Fault(). We always
725 // unreserve memory, if we're asked to shrink our commitment, though.
726 if (fCanOvercommit && size > committed_size) {
727 if (fHasPrecommitted)
728 return B_OK;
729
730 // pre-commit some pages to make a later failure less probable
731 fHasPrecommitted = true;
732 uint32 precommitted = fPrecommittedPages * B_PAGE_SIZE;
733 if (size > precommitted)
734 size = precommitted;
735 }
736
737 return _Commit(size, priority);
738 }
739
740
741 bool
HasPage(off_t offset)742 VMAnonymousCache::HasPage(off_t offset)
743 {
744 if (_SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE)
745 return true;
746
747 return false;
748 }
749
750
751 bool
DebugHasPage(off_t offset)752 VMAnonymousCache::DebugHasPage(off_t offset)
753 {
754 off_t pageIndex = offset >> PAGE_SHIFT;
755 swap_hash_key key = { this, pageIndex };
756 swap_block* swap = sSwapHashTable.Lookup(key);
757 if (swap == NULL)
758 return false;
759
760 return swap->swap_slots[pageIndex & SWAP_BLOCK_MASK] != SWAP_SLOT_NONE;
761 }
762
763
764 status_t
Read(off_t offset,const generic_io_vec * vecs,size_t count,uint32 flags,generic_size_t * _numBytes)765 VMAnonymousCache::Read(off_t offset, const generic_io_vec* vecs, size_t count,
766 uint32 flags, generic_size_t* _numBytes)
767 {
768 off_t pageIndex = offset >> PAGE_SHIFT;
769
770 for (uint32 i = 0, j = 0; i < count; i = j) {
771 swap_addr_t startSlotIndex = _SwapBlockGetAddress(pageIndex + i);
772 for (j = i + 1; j < count; j++) {
773 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + j);
774 if (slotIndex != startSlotIndex + j - i)
775 break;
776 }
777
778 T(ReadPage(this, pageIndex, startSlotIndex));
779 // TODO: Assumes that only one page is read.
780
781 swap_file* swapFile = find_swap_file(startSlotIndex);
782
783 off_t pos = (off_t)(startSlotIndex - swapFile->first_slot)
784 * B_PAGE_SIZE;
785
786 status_t status = vfs_read_pages(swapFile->vnode, swapFile->cookie, pos,
787 vecs + i, j - i, flags, _numBytes);
788 if (status != B_OK)
789 return status;
790 }
791
792 return B_OK;
793 }
794
795
796 status_t
Write(off_t offset,const generic_io_vec * vecs,size_t count,uint32 flags,generic_size_t * _numBytes)797 VMAnonymousCache::Write(off_t offset, const generic_io_vec* vecs, size_t count,
798 uint32 flags, generic_size_t* _numBytes)
799 {
800 off_t pageIndex = offset >> PAGE_SHIFT;
801
802 AutoLocker<VMCache> locker(this);
803
804 page_num_t totalPages = 0;
805 for (uint32 i = 0; i < count; i++) {
806 page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT;
807 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + totalPages);
808 if (slotIndex != SWAP_SLOT_NONE) {
809 swap_slot_dealloc(slotIndex, pageCount);
810 _SwapBlockFree(pageIndex + totalPages, pageCount);
811 fAllocatedSwapSize -= pageCount * B_PAGE_SIZE;
812 }
813
814 totalPages += pageCount;
815 }
816
817 off_t totalSize = totalPages * B_PAGE_SIZE;
818 if (fAllocatedSwapSize + totalSize > fCommittedSwapSize)
819 return B_ERROR;
820
821 fAllocatedSwapSize += totalSize;
822 locker.Unlock();
823
824 page_num_t pagesLeft = totalPages;
825 totalPages = 0;
826
827 for (uint32 i = 0; i < count; i++) {
828 page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT;
829
830 generic_addr_t vectorBase = vecs[i].base;
831 generic_size_t vectorLength = vecs[i].length;
832 page_num_t n = pageCount;
833
834 for (page_num_t j = 0; j < pageCount; j += n) {
835 swap_addr_t slotIndex;
836 // try to allocate n slots, if fail, try to allocate n/2
837 while ((slotIndex = swap_slot_alloc(n)) == SWAP_SLOT_NONE && n >= 2)
838 n >>= 1;
839
840 if (slotIndex == SWAP_SLOT_NONE)
841 panic("VMAnonymousCache::Write(): can't allocate swap space\n");
842
843 T(WritePage(this, pageIndex, slotIndex));
844 // TODO: Assumes that only one page is written.
845
846 swap_file* swapFile = find_swap_file(slotIndex);
847
848 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE;
849
850 generic_size_t length = (phys_addr_t)n * B_PAGE_SIZE;
851 generic_io_vec vector[1];
852 vector->base = vectorBase;
853 vector->length = length;
854
855 status_t status = vfs_write_pages(swapFile->vnode, swapFile->cookie,
856 pos, vector, 1, flags, &length);
857 if (status != B_OK) {
858 locker.Lock();
859 fAllocatedSwapSize -= (off_t)pagesLeft * B_PAGE_SIZE;
860 locker.Unlock();
861
862 swap_slot_dealloc(slotIndex, n);
863 return status;
864 }
865
866 _SwapBlockBuild(pageIndex + totalPages, slotIndex, n);
867 pagesLeft -= n;
868
869 if (n != pageCount) {
870 vectorBase = vectorBase + n * B_PAGE_SIZE;
871 vectorLength -= n * B_PAGE_SIZE;
872 }
873 }
874
875 totalPages += pageCount;
876 }
877
878 ASSERT(pagesLeft == 0);
879 return B_OK;
880 }
881
882
883 status_t
WriteAsync(off_t offset,const generic_io_vec * vecs,size_t count,generic_size_t numBytes,uint32 flags,AsyncIOCallback * _callback)884 VMAnonymousCache::WriteAsync(off_t offset, const generic_io_vec* vecs,
885 size_t count, generic_size_t numBytes, uint32 flags,
886 AsyncIOCallback* _callback)
887 {
888 // TODO: Currently this method is only used for single pages. Either make
889 // more flexible use of it or change the interface!
890 // This implementation relies on the current usage!
891 ASSERT(count == 1);
892 ASSERT(numBytes <= B_PAGE_SIZE);
893
894 page_num_t pageIndex = offset >> PAGE_SHIFT;
895 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex);
896 bool newSlot = slotIndex == SWAP_SLOT_NONE;
897
898 // If the page doesn't have any swap space yet, allocate it.
899 if (newSlot) {
900 AutoLocker<VMCache> locker(this);
901 if (fAllocatedSwapSize + B_PAGE_SIZE > fCommittedSwapSize) {
902 _callback->IOFinished(B_ERROR, true, 0);
903 return B_ERROR;
904 }
905
906 fAllocatedSwapSize += B_PAGE_SIZE;
907
908 slotIndex = swap_slot_alloc(1);
909 }
910
911 // create our callback
912 WriteCallback* callback = (flags & B_VIP_IO_REQUEST) != 0
913 ? new(malloc_flags(HEAP_PRIORITY_VIP)) WriteCallback(this, _callback)
914 : new(std::nothrow) WriteCallback(this, _callback);
915 if (callback == NULL) {
916 if (newSlot) {
917 AutoLocker<VMCache> locker(this);
918 fAllocatedSwapSize -= B_PAGE_SIZE;
919 locker.Unlock();
920
921 swap_slot_dealloc(slotIndex, 1);
922 }
923 _callback->IOFinished(B_NO_MEMORY, true, 0);
924 return B_NO_MEMORY;
925 }
926 // TODO: If the page already had swap space assigned, we don't need an own
927 // callback.
928
929 callback->SetTo(pageIndex, slotIndex, newSlot);
930
931 T(WritePage(this, pageIndex, slotIndex));
932
933 // write the page asynchrounously
934 swap_file* swapFile = find_swap_file(slotIndex);
935 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE;
936
937 return vfs_asynchronous_write_pages(swapFile->vnode, swapFile->cookie, pos,
938 vecs, 1, numBytes, flags, callback);
939 }
940
941
942 bool
CanWritePage(off_t offset)943 VMAnonymousCache::CanWritePage(off_t offset)
944 {
945 const off_t pageIndex = offset >> PAGE_SHIFT;
946 if (fNoSwapPages != NULL && fNoSwapPages->Get(pageIndex))
947 return false;
948
949 // We can write the page, if we have not used all of our committed swap
950 // space or the page already has a swap slot assigned.
951 return fAllocatedSwapSize < fCommittedSwapSize
952 || _SwapBlockGetAddress(pageIndex) != SWAP_SLOT_NONE;
953 }
954
955
956 int32
MaxPagesPerAsyncWrite() const957 VMAnonymousCache::MaxPagesPerAsyncWrite() const
958 {
959 return 1;
960 }
961
962
963 status_t
Fault(struct VMAddressSpace * aspace,off_t offset)964 VMAnonymousCache::Fault(struct VMAddressSpace* aspace, off_t offset)
965 {
966 if (fGuardedSize > 0) {
967 uint32 guardOffset;
968
969 #ifdef STACK_GROWS_DOWNWARDS
970 guardOffset = 0;
971 #elif defined(STACK_GROWS_UPWARDS)
972 guardOffset = virtual_size - fGuardedSize;
973 #else
974 # error Stack direction has not been defined in arch_config.h
975 #endif
976 // report stack fault, guard page hit!
977 if (offset >= guardOffset && offset < guardOffset + fGuardedSize) {
978 TRACE(("stack overflow!\n"));
979 return B_BAD_ADDRESS;
980 }
981 }
982
983 if (fCanOvercommit && LookupPage(offset) == NULL && !HasPage(offset)) {
984 if (fPrecommittedPages == 0) {
985 // never commit more than needed
986 if (committed_size / B_PAGE_SIZE > page_count)
987 return B_BAD_HANDLER;
988
989 // try to commit additional swap space/memory
990 if (swap_space_reserve(B_PAGE_SIZE) == B_PAGE_SIZE) {
991 fCommittedSwapSize += B_PAGE_SIZE;
992 } else {
993 int priority = aspace == VMAddressSpace::Kernel()
994 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
995 if (vm_try_reserve_memory(B_PAGE_SIZE, priority, 0) != B_OK) {
996 dprintf("%p->VMAnonymousCache::Fault(): Failed to reserve "
997 "%d bytes of RAM.\n", this, (int)B_PAGE_SIZE);
998 return B_NO_MEMORY;
999 }
1000 }
1001
1002 committed_size += B_PAGE_SIZE;
1003 } else
1004 fPrecommittedPages--;
1005 }
1006
1007 // This will cause vm_soft_fault() to handle the fault
1008 return B_BAD_HANDLER;
1009 }
1010
1011
1012 void
Merge(VMCache * _source)1013 VMAnonymousCache::Merge(VMCache* _source)
1014 {
1015 VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source);
1016 if (source == NULL) {
1017 panic("VMAnonymousCache::Merge(): merge with incompatible cache "
1018 "%p requested", _source);
1019 return;
1020 }
1021
1022 // take over the source' committed size
1023 fCommittedSwapSize += source->fCommittedSwapSize;
1024 source->fCommittedSwapSize = 0;
1025 committed_size += source->committed_size;
1026 source->committed_size = 0;
1027
1028 off_t actualSize = virtual_end - virtual_base;
1029 if (committed_size > actualSize)
1030 _Commit(actualSize, VM_PRIORITY_USER);
1031
1032 // Move all not shadowed swap pages from the source to the consumer cache.
1033 // Also remove all source pages that are shadowed by consumer swap pages.
1034 _MergeSwapPages(source);
1035
1036 // Move all not shadowed pages from the source to the consumer cache.
1037 if (source->page_count < page_count)
1038 _MergePagesSmallerSource(source);
1039 else
1040 _MergePagesSmallerConsumer(source);
1041 }
1042
1043
1044 void
DeleteObject()1045 VMAnonymousCache::DeleteObject()
1046 {
1047 object_cache_delete(gAnonymousCacheObjectCache, this);
1048 }
1049
1050
1051 void
_SwapBlockBuild(off_t startPageIndex,swap_addr_t startSlotIndex,uint32 count)1052 VMAnonymousCache::_SwapBlockBuild(off_t startPageIndex,
1053 swap_addr_t startSlotIndex, uint32 count)
1054 {
1055 WriteLocker locker(sSwapHashLock);
1056
1057 uint32 left = count;
1058 for (uint32 i = 0, j = 0; i < count; i += j) {
1059 off_t pageIndex = startPageIndex + i;
1060 swap_addr_t slotIndex = startSlotIndex + i;
1061
1062 swap_hash_key key = { this, pageIndex };
1063
1064 swap_block* swap = sSwapHashTable.Lookup(key);
1065 while (swap == NULL) {
1066 swap = (swap_block*)object_cache_alloc(sSwapBlockCache,
1067 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE);
1068 if (swap == NULL) {
1069 // Wait a short time until memory is available again.
1070 locker.Unlock();
1071 snooze(10000);
1072 locker.Lock();
1073 swap = sSwapHashTable.Lookup(key);
1074 continue;
1075 }
1076
1077 swap->key.cache = this;
1078 swap->key.page_index = pageIndex & ~(off_t)SWAP_BLOCK_MASK;
1079 swap->used = 0;
1080 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++)
1081 swap->swap_slots[i] = SWAP_SLOT_NONE;
1082
1083 sSwapHashTable.InsertUnchecked(swap);
1084 }
1085
1086 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
1087 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) {
1088 swap->swap_slots[blockIndex++] = slotIndex + j;
1089 left--;
1090 }
1091
1092 swap->used += j;
1093 }
1094 }
1095
1096
1097 void
_SwapBlockFree(off_t startPageIndex,uint32 count)1098 VMAnonymousCache::_SwapBlockFree(off_t startPageIndex, uint32 count)
1099 {
1100 WriteLocker locker(sSwapHashLock);
1101
1102 uint32 left = count;
1103 for (uint32 i = 0, j = 0; i < count; i += j) {
1104 off_t pageIndex = startPageIndex + i;
1105 swap_hash_key key = { this, pageIndex };
1106 swap_block* swap = sSwapHashTable.Lookup(key);
1107
1108 ASSERT(swap != NULL);
1109
1110 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
1111 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) {
1112 swap->swap_slots[blockIndex++] = SWAP_SLOT_NONE;
1113 left--;
1114 }
1115
1116 swap->used -= j;
1117 if (swap->used == 0) {
1118 sSwapHashTable.RemoveUnchecked(swap);
1119 object_cache_free(sSwapBlockCache, swap,
1120 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE);
1121 }
1122 }
1123 }
1124
1125
1126 swap_addr_t
_SwapBlockGetAddress(off_t pageIndex)1127 VMAnonymousCache::_SwapBlockGetAddress(off_t pageIndex)
1128 {
1129 ReadLocker locker(sSwapHashLock);
1130
1131 swap_hash_key key = { this, pageIndex };
1132 swap_block* swap = sSwapHashTable.Lookup(key);
1133 swap_addr_t slotIndex = SWAP_SLOT_NONE;
1134
1135 if (swap != NULL) {
1136 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK;
1137 slotIndex = swap->swap_slots[blockIndex];
1138 }
1139
1140 return slotIndex;
1141 }
1142
1143
1144 status_t
_Commit(off_t size,int priority)1145 VMAnonymousCache::_Commit(off_t size, int priority)
1146 {
1147 TRACE("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "), already committed: "
1148 "%" B_PRIdOFF " (%" B_PRIdOFF " swap)\n", this, size, committed_size,
1149 fCommittedSwapSize);
1150
1151 // Basic strategy: reserve swap space first, only when running out of swap
1152 // space, reserve real memory.
1153
1154 off_t committedMemory = committed_size - fCommittedSwapSize;
1155
1156 // Regardless of whether we're asked to grow or shrink the commitment,
1157 // we always try to reserve as much as possible of the final commitment
1158 // in the swap space.
1159 if (size > fCommittedSwapSize) {
1160 fCommittedSwapSize += swap_space_reserve(size - fCommittedSwapSize);
1161 committed_size = fCommittedSwapSize + committedMemory;
1162 if (size > fCommittedSwapSize) {
1163 TRACE("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "), reserved "
1164 "only %" B_PRIdOFF " swap\n", this, size, fCommittedSwapSize);
1165 }
1166 }
1167
1168 if (committed_size == size)
1169 return B_OK;
1170
1171 if (committed_size > size) {
1172 // The commitment shrinks -- unreserve real memory first.
1173 off_t toUnreserve = committed_size - size;
1174 if (committedMemory > 0) {
1175 off_t unreserved = min_c(toUnreserve, committedMemory);
1176 vm_unreserve_memory(unreserved);
1177 committedMemory -= unreserved;
1178 committed_size -= unreserved;
1179 toUnreserve -= unreserved;
1180 }
1181
1182 // Unreserve swap space.
1183 if (toUnreserve > 0) {
1184 swap_space_unreserve(toUnreserve);
1185 fCommittedSwapSize -= toUnreserve;
1186 committed_size -= toUnreserve;
1187 }
1188
1189 return B_OK;
1190 }
1191
1192 // The commitment grows -- we have already tried to reserve swap space at
1193 // the start of the method, so we try to reserve real memory, now.
1194
1195 off_t toReserve = size - committed_size;
1196 if (vm_try_reserve_memory(toReserve, priority, 1000000) != B_OK) {
1197 dprintf("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "): Failed to "
1198 "reserve %" B_PRIdOFF " bytes of RAM\n", this, size, toReserve);
1199 return B_NO_MEMORY;
1200 }
1201
1202 committed_size = size;
1203 return B_OK;
1204 }
1205
1206
1207 void
_MergePagesSmallerSource(VMAnonymousCache * source)1208 VMAnonymousCache::_MergePagesSmallerSource(VMAnonymousCache* source)
1209 {
1210 // The source cache has less pages than the consumer (this cache), so we
1211 // iterate through the source's pages and move the ones that are not
1212 // shadowed up to the consumer.
1213
1214 for (VMCachePagesTree::Iterator it = source->pages.GetIterator();
1215 vm_page* page = it.Next();) {
1216 // Note: Removing the current node while iterating through a
1217 // IteratableSplayTree is safe.
1218 vm_page* consumerPage = LookupPage(
1219 (off_t)page->cache_offset << PAGE_SHIFT);
1220 if (consumerPage == NULL) {
1221 // the page is not yet in the consumer cache - move it upwards
1222 ASSERT_PRINT(!page->busy, "page: %p", page);
1223 MovePage(page);
1224 }
1225 }
1226 }
1227
1228
1229 void
_MergePagesSmallerConsumer(VMAnonymousCache * source)1230 VMAnonymousCache::_MergePagesSmallerConsumer(VMAnonymousCache* source)
1231 {
1232 // The consumer (this cache) has less pages than the source, so we move the
1233 // consumer's pages to the source (freeing shadowed ones) and finally just
1234 // all pages of the source back to the consumer.
1235
1236 for (VMCachePagesTree::Iterator it = pages.GetIterator();
1237 vm_page* page = it.Next();) {
1238 // If a source page is in the way, remove and free it.
1239 vm_page* sourcePage = source->LookupPage(
1240 (off_t)page->cache_offset << PAGE_SHIFT);
1241 if (sourcePage != NULL) {
1242 DEBUG_PAGE_ACCESS_START(sourcePage);
1243 ASSERT_PRINT(!sourcePage->busy, "page: %p", sourcePage);
1244 ASSERT_PRINT(sourcePage->WiredCount() == 0
1245 && sourcePage->mappings.IsEmpty(),
1246 "sourcePage: %p, page: %p", sourcePage, page);
1247 source->RemovePage(sourcePage);
1248 vm_page_free(source, sourcePage);
1249 }
1250
1251 // Note: Removing the current node while iterating through a
1252 // IteratableSplayTree is safe.
1253 source->MovePage(page);
1254 }
1255
1256 MoveAllPages(source);
1257 }
1258
1259
1260 void
_MergeSwapPages(VMAnonymousCache * source)1261 VMAnonymousCache::_MergeSwapPages(VMAnonymousCache* source)
1262 {
1263 // If neither source nor consumer have swap pages, we don't have to do
1264 // anything.
1265 if (source->fAllocatedSwapSize == 0 && fAllocatedSwapSize == 0)
1266 return;
1267
1268 for (off_t offset = source->virtual_base
1269 & ~(off_t)(B_PAGE_SIZE * SWAP_BLOCK_PAGES - 1);
1270 offset < source->virtual_end;
1271 offset += B_PAGE_SIZE * SWAP_BLOCK_PAGES) {
1272
1273 WriteLocker locker(sSwapHashLock);
1274
1275 off_t swapBlockPageIndex = offset >> PAGE_SHIFT;
1276 swap_hash_key key = { source, swapBlockPageIndex };
1277 swap_block* sourceSwapBlock = sSwapHashTable.Lookup(key);
1278
1279 // remove the source swap block -- we will either take over the swap
1280 // space (and the block) or free it
1281 if (sourceSwapBlock != NULL)
1282 sSwapHashTable.RemoveUnchecked(sourceSwapBlock);
1283
1284 key.cache = this;
1285 swap_block* swapBlock = sSwapHashTable.Lookup(key);
1286
1287 locker.Unlock();
1288
1289 // remove all source pages that are shadowed by consumer swap pages
1290 if (swapBlock != NULL) {
1291 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) {
1292 if (swapBlock->swap_slots[i] != SWAP_SLOT_NONE) {
1293 vm_page* page = source->LookupPage(
1294 (off_t)(swapBlockPageIndex + i) << PAGE_SHIFT);
1295 if (page != NULL) {
1296 DEBUG_PAGE_ACCESS_START(page);
1297 ASSERT_PRINT(!page->busy, "page: %p", page);
1298 source->RemovePage(page);
1299 vm_page_free(source, page);
1300 }
1301 }
1302 }
1303 }
1304
1305 if (sourceSwapBlock == NULL)
1306 continue;
1307
1308 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) {
1309 off_t pageIndex = swapBlockPageIndex + i;
1310 swap_addr_t sourceSlotIndex = sourceSwapBlock->swap_slots[i];
1311
1312 if (sourceSlotIndex == SWAP_SLOT_NONE)
1313 continue;
1314
1315 if ((swapBlock != NULL
1316 && swapBlock->swap_slots[i] != SWAP_SLOT_NONE)
1317 || LookupPage((off_t)pageIndex << PAGE_SHIFT) != NULL) {
1318 // The consumer already has a page or a swapped out page
1319 // at this index. So we can free the source swap space.
1320 swap_slot_dealloc(sourceSlotIndex, 1);
1321 sourceSwapBlock->swap_slots[i] = SWAP_SLOT_NONE;
1322 sourceSwapBlock->used--;
1323 }
1324
1325 // We've either freed the source swap page or are going to move it
1326 // to the consumer. At any rate, the source cache doesn't own it
1327 // anymore.
1328 source->fAllocatedSwapSize -= B_PAGE_SIZE;
1329 }
1330
1331 // All source swap pages that have not been freed yet are taken over by
1332 // the consumer.
1333 fAllocatedSwapSize += B_PAGE_SIZE * (off_t)sourceSwapBlock->used;
1334
1335 if (sourceSwapBlock->used == 0) {
1336 // All swap pages have been freed -- we can discard the source swap
1337 // block.
1338 object_cache_free(sSwapBlockCache, sourceSwapBlock,
1339 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE);
1340 } else if (swapBlock == NULL) {
1341 // We need to take over some of the source's swap pages and there's
1342 // no swap block in the consumer cache. Just take over the source
1343 // swap block.
1344 sourceSwapBlock->key.cache = this;
1345 locker.Lock();
1346 sSwapHashTable.InsertUnchecked(sourceSwapBlock);
1347 locker.Unlock();
1348 } else {
1349 // We need to take over some of the source's swap pages and there's
1350 // already a swap block in the consumer cache. Copy the respective
1351 // swap addresses and discard the source swap block.
1352 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) {
1353 if (sourceSwapBlock->swap_slots[i] != SWAP_SLOT_NONE)
1354 swapBlock->swap_slots[i] = sourceSwapBlock->swap_slots[i];
1355 }
1356
1357 object_cache_free(sSwapBlockCache, sourceSwapBlock,
1358 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE);
1359 }
1360 }
1361 }
1362
1363
1364 // #pragma mark -
1365
1366
1367 // TODO: This can be removed if we get BFS uuid's
1368 struct VolumeInfo {
1369 char name[B_FILE_NAME_LENGTH];
1370 char device[B_FILE_NAME_LENGTH];
1371 char filesystem[B_OS_NAME_LENGTH];
1372 off_t capacity;
1373 };
1374
1375
1376 class PartitionScorer : public KPartitionVisitor {
1377 public:
PartitionScorer(VolumeInfo & volumeInfo)1378 PartitionScorer(VolumeInfo& volumeInfo)
1379 :
1380 fBestPartition(NULL),
1381 fBestScore(-1),
1382 fVolumeInfo(volumeInfo)
1383 {
1384 }
1385
VisitPre(KPartition * partition)1386 virtual bool VisitPre(KPartition* partition)
1387 {
1388 if (!partition->ContainsFileSystem())
1389 return false;
1390
1391 KPath path;
1392 partition->GetPath(&path);
1393
1394 int score = 0;
1395 if (strcmp(fVolumeInfo.name, partition->ContentName()) == 0)
1396 score += 4;
1397 if (strcmp(fVolumeInfo.device, path.Path()) == 0)
1398 score += 3;
1399 if (fVolumeInfo.capacity == partition->Size())
1400 score += 2;
1401 if (strcmp(fVolumeInfo.filesystem,
1402 partition->DiskSystem()->ShortName()) == 0) {
1403 score += 1;
1404 }
1405 if (score >= 4 && score > fBestScore) {
1406 fBestPartition = partition;
1407 fBestScore = score;
1408 }
1409
1410 return false;
1411 }
1412
1413 KPartition* fBestPartition;
1414
1415 private:
1416 int32 fBestScore;
1417 VolumeInfo& fVolumeInfo;
1418 };
1419
1420
1421 status_t
swap_file_add(const char * path)1422 swap_file_add(const char* path)
1423 {
1424 // open the file
1425 int fd = open(path, O_RDWR | O_NOCACHE, S_IRUSR | S_IWUSR);
1426 if (fd < 0)
1427 return errno;
1428
1429 // fstat() it and check whether we can use it
1430 struct stat st;
1431 if (fstat(fd, &st) < 0) {
1432 close(fd);
1433 return errno;
1434 }
1435
1436 if (!(S_ISREG(st.st_mode) || S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
1437 close(fd);
1438 return B_BAD_VALUE;
1439 }
1440
1441 if (st.st_size < B_PAGE_SIZE) {
1442 close(fd);
1443 return B_BAD_VALUE;
1444 }
1445
1446 // get file descriptor, vnode, and cookie
1447 file_descriptor* descriptor = get_fd(get_current_io_context(true), fd);
1448 put_fd(descriptor);
1449
1450 vnode* node = fd_vnode(descriptor);
1451 if (node == NULL) {
1452 close(fd);
1453 return B_BAD_VALUE;
1454 }
1455
1456 // do the allocations and prepare the swap_file structure
1457 swap_file* swap = new(std::nothrow) swap_file;
1458 if (swap == NULL) {
1459 close(fd);
1460 return B_NO_MEMORY;
1461 }
1462
1463 swap->fd = fd;
1464 swap->vnode = node;
1465 swap->cookie = descriptor->cookie;
1466
1467 uint32 pageCount = st.st_size >> PAGE_SHIFT;
1468 swap->bmp = radix_bitmap_create(pageCount);
1469 if (swap->bmp == NULL) {
1470 delete swap;
1471 close(fd);
1472 return B_NO_MEMORY;
1473 }
1474
1475 // set slot index and add this file to swap file list
1476 mutex_lock(&sSwapFileListLock);
1477 // TODO: Also check whether the swap file is already registered!
1478 if (sSwapFileList.IsEmpty()) {
1479 swap->first_slot = 0;
1480 swap->last_slot = pageCount;
1481 } else {
1482 // leave one page gap between two swap files
1483 swap->first_slot = sSwapFileList.Last()->last_slot + 1;
1484 swap->last_slot = swap->first_slot + pageCount;
1485 }
1486 sSwapFileList.Add(swap);
1487 sSwapFileCount++;
1488 mutex_unlock(&sSwapFileListLock);
1489
1490 mutex_lock(&sAvailSwapSpaceLock);
1491 sAvailSwapSpace += (off_t)pageCount * B_PAGE_SIZE;
1492 mutex_unlock(&sAvailSwapSpaceLock);
1493
1494 return B_OK;
1495 }
1496
1497
1498 status_t
swap_file_delete(const char * path)1499 swap_file_delete(const char* path)
1500 {
1501 vnode* node = NULL;
1502 status_t status = vfs_get_vnode_from_path(path, true, &node);
1503 if (status != B_OK)
1504 return status;
1505
1506 MutexLocker locker(sSwapFileListLock);
1507
1508 swap_file* swapFile = NULL;
1509 for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
1510 (swapFile = it.Next()) != NULL;) {
1511 if (swapFile->vnode == node)
1512 break;
1513 }
1514
1515 vfs_put_vnode(node);
1516
1517 if (swapFile == NULL)
1518 return B_ERROR;
1519
1520 // if this file is currently used, we can't delete
1521 // TODO: mark this swap file deleting, and remove it after releasing
1522 // all the swap space
1523 if (swapFile->bmp->free_slots < swapFile->last_slot - swapFile->first_slot)
1524 return B_ERROR;
1525
1526 sSwapFileList.Remove(swapFile);
1527 sSwapFileCount--;
1528 locker.Unlock();
1529
1530 mutex_lock(&sAvailSwapSpaceLock);
1531 sAvailSwapSpace -= (off_t)(swapFile->last_slot - swapFile->first_slot)
1532 * B_PAGE_SIZE;
1533 mutex_unlock(&sAvailSwapSpaceLock);
1534
1535 truncate(path, 0);
1536 close(swapFile->fd);
1537 radix_bitmap_destroy(swapFile->bmp);
1538 delete swapFile;
1539
1540 return B_OK;
1541 }
1542
1543
1544 void
swap_init(void)1545 swap_init(void)
1546 {
1547 // create swap block cache
1548 sSwapBlockCache = create_object_cache("swapblock", sizeof(swap_block),
1549 sizeof(void*), NULL, NULL, NULL);
1550 if (sSwapBlockCache == NULL)
1551 panic("swap_init(): can't create object cache for swap blocks\n");
1552
1553 status_t error = object_cache_set_minimum_reserve(sSwapBlockCache,
1554 MIN_SWAP_BLOCK_RESERVE);
1555 if (error != B_OK) {
1556 panic("swap_init(): object_cache_set_minimum_reserve() failed: %s",
1557 strerror(error));
1558 }
1559
1560 // init swap hash table
1561 sSwapHashTable.Init(INITIAL_SWAP_HASH_SIZE);
1562 rw_lock_init(&sSwapHashLock, "swaphash");
1563
1564 error = register_resource_resizer(swap_hash_resizer, NULL,
1565 SWAP_HASH_RESIZE_INTERVAL);
1566 if (error != B_OK) {
1567 panic("swap_init(): Failed to register swap hash resizer: %s",
1568 strerror(error));
1569 }
1570
1571 // init swap file list
1572 mutex_init(&sSwapFileListLock, "swaplist");
1573 sSwapFileAlloc = NULL;
1574 sSwapFileCount = 0;
1575
1576 // init available swap space
1577 mutex_init(&sAvailSwapSpaceLock, "avail swap space");
1578 sAvailSwapSpace = 0;
1579
1580 add_debugger_command_etc("swap", &dump_swap_info,
1581 "Print infos about the swap usage",
1582 "\n"
1583 "Print infos about the swap usage.\n", 0);
1584 }
1585
1586
1587 void
swap_init_post_modules()1588 swap_init_post_modules()
1589 {
1590 // Never try to create a swap file on a read-only device - when booting
1591 // from CD, the write overlay is used.
1592 if (gReadOnlyBootDevice)
1593 return;
1594
1595 bool swapEnabled = true;
1596 bool swapAutomatic = true;
1597 off_t swapSize = 0;
1598
1599 dev_t swapDeviceID = -1;
1600 VolumeInfo selectedVolume = {};
1601
1602 void* settings = load_driver_settings("virtual_memory");
1603
1604 if (settings != NULL) {
1605 // We pass a lot of information on the swap device, this is mostly to
1606 // ensure that we are dealing with the same device that was configured.
1607
1608 // TODO: Some kind of BFS uuid would be great here :)
1609 const char* enabled = get_driver_parameter(settings, "vm", NULL, NULL);
1610
1611 if (enabled != NULL) {
1612 swapEnabled = get_driver_boolean_parameter(settings, "vm",
1613 true, false);
1614 swapAutomatic = get_driver_boolean_parameter(settings, "swap_auto",
1615 true, false);
1616
1617 if (swapEnabled && !swapAutomatic) {
1618 const char* size = get_driver_parameter(settings, "swap_size",
1619 NULL, NULL);
1620 const char* volume = get_driver_parameter(settings,
1621 "swap_volume_name", NULL, NULL);
1622 const char* device = get_driver_parameter(settings,
1623 "swap_volume_device", NULL, NULL);
1624 const char* filesystem = get_driver_parameter(settings,
1625 "swap_volume_filesystem", NULL, NULL);
1626 const char* capacity = get_driver_parameter(settings,
1627 "swap_volume_capacity", NULL, NULL);
1628
1629 if (size != NULL && device != NULL && volume != NULL
1630 && filesystem != NULL && capacity != NULL) {
1631 // User specified a size / volume that seems valid
1632 swapAutomatic = false;
1633 swapSize = atoll(size);
1634 strlcpy(selectedVolume.name, volume,
1635 sizeof(selectedVolume.name));
1636 strlcpy(selectedVolume.device, device,
1637 sizeof(selectedVolume.device));
1638 strlcpy(selectedVolume.filesystem, filesystem,
1639 sizeof(selectedVolume.filesystem));
1640 selectedVolume.capacity = atoll(capacity);
1641 } else {
1642 // Something isn't right with swap config, go auto
1643 swapAutomatic = true;
1644 dprintf("%s: virtual_memory configuration is invalid, "
1645 "using automatic swap\n", __func__);
1646 }
1647 }
1648 }
1649 unload_driver_settings(settings);
1650 }
1651
1652 if (swapAutomatic) {
1653 swapSize = (off_t)vm_page_num_pages() * B_PAGE_SIZE;
1654 if (swapSize <= (1024 * 1024 * 1024)) {
1655 // Memory under 1GB? double the swap
1656 swapSize *= 2;
1657 }
1658 // Automatic swap defaults to the boot device
1659 swapDeviceID = gBootDevice;
1660 }
1661
1662 if (!swapEnabled || swapSize < B_PAGE_SIZE) {
1663 dprintf("%s: virtual_memory is disabled\n", __func__);
1664 truncate(kDefaultSwapPath, 0);
1665 return;
1666 }
1667
1668 if (!swapAutomatic && swapDeviceID < 0) {
1669 // If user-specified swap, and no swap device has been chosen yet...
1670 KDiskDeviceManager::CreateDefault();
1671 KDiskDeviceManager* manager = KDiskDeviceManager::Default();
1672 PartitionScorer visitor(selectedVolume);
1673
1674 KDiskDevice* device;
1675 int32 cookie = 0;
1676 while ((device = manager->NextDevice(&cookie)) != NULL) {
1677 if (device->IsReadOnlyMedia() || device->IsWriteOnce()
1678 || device->IsRemovable()) {
1679 continue;
1680 }
1681 device->VisitEachDescendant(&visitor);
1682 }
1683
1684 if (!visitor.fBestPartition) {
1685 dprintf("%s: Can't find configured swap partition '%s'\n",
1686 __func__, selectedVolume.name);
1687 } else {
1688 if (visitor.fBestPartition->IsMounted())
1689 swapDeviceID = visitor.fBestPartition->VolumeID();
1690 else {
1691 KPath devPath, mountPoint;
1692 visitor.fBestPartition->GetPath(&devPath);
1693 visitor.fBestPartition->GetMountPoint(&mountPoint);
1694 const char* mountPath = mountPoint.Path();
1695 mkdir(mountPath, S_IRWXU | S_IRWXG | S_IRWXO);
1696 swapDeviceID = _kern_mount(mountPath, devPath.Path(),
1697 NULL, 0, NULL, 0);
1698 if (swapDeviceID < 0) {
1699 dprintf("%s: Can't mount configured swap partition '%s'\n",
1700 __func__, selectedVolume.name);
1701 }
1702 }
1703 }
1704 }
1705
1706 if (swapDeviceID < 0)
1707 swapDeviceID = gBootDevice;
1708
1709 // We now have a swapDeviceID which is used for the swap file
1710
1711 KPath path;
1712 struct fs_info info;
1713 _kern_read_fs_info(swapDeviceID, &info);
1714 if (swapDeviceID == gBootDevice)
1715 path = kDefaultSwapPath;
1716 else {
1717 vfs_entry_ref_to_path(info.dev, info.root, ".", true, path.LockBuffer(),
1718 path.BufferSize());
1719 path.UnlockBuffer();
1720 path.Append("swap");
1721 }
1722
1723 const char* swapPath = path.Path();
1724
1725 // Swap size limits prevent oversized swap files
1726 if (swapAutomatic) {
1727 off_t existingSwapSize = 0;
1728 struct stat existingSwapStat;
1729 if (stat(swapPath, &existingSwapStat) == 0)
1730 existingSwapSize = existingSwapStat.st_size;
1731
1732 off_t freeSpace = info.free_blocks * info.block_size + existingSwapSize;
1733
1734 // Adjust automatic swap to a maximum of 25% of the free space
1735 if (swapSize > (freeSpace / 4))
1736 swapSize = (freeSpace / 4);
1737 }
1738
1739 // Create swap file
1740 int fd = open(swapPath, O_RDWR | O_CREAT | O_NOCACHE, S_IRUSR | S_IWUSR);
1741 if (fd < 0) {
1742 dprintf("%s: Can't open/create %s: %s\n", __func__,
1743 swapPath, strerror(errno));
1744 return;
1745 }
1746
1747 struct stat stat;
1748 stat.st_size = swapSize;
1749 status_t error = _kern_write_stat(fd, NULL, false, &stat,
1750 sizeof(struct stat), B_STAT_SIZE | B_STAT_SIZE_INSECURE);
1751 if (error != B_OK) {
1752 dprintf("%s: Failed to resize %s to %" B_PRIdOFF " bytes: %s\n",
1753 __func__, swapPath, swapSize, strerror(error));
1754 }
1755
1756 close(fd);
1757
1758 error = swap_file_add(swapPath);
1759 if (error != B_OK) {
1760 dprintf("%s: Failed to add swap file %s: %s\n", __func__, swapPath,
1761 strerror(error));
1762 }
1763 }
1764
1765
1766 //! Used by page daemon to free swap space.
1767 bool
swap_free_page_swap_space(vm_page * page)1768 swap_free_page_swap_space(vm_page* page)
1769 {
1770 VMAnonymousCache* cache = dynamic_cast<VMAnonymousCache*>(page->Cache());
1771 if (cache == NULL)
1772 return false;
1773
1774 swap_addr_t slotIndex = cache->_SwapBlockGetAddress(page->cache_offset);
1775 if (slotIndex == SWAP_SLOT_NONE)
1776 return false;
1777
1778 swap_slot_dealloc(slotIndex, 1);
1779 cache->fAllocatedSwapSize -= B_PAGE_SIZE;
1780 cache->_SwapBlockFree(page->cache_offset, 1);
1781
1782 return true;
1783 }
1784
1785
1786 uint32
swap_available_pages()1787 swap_available_pages()
1788 {
1789 mutex_lock(&sAvailSwapSpaceLock);
1790 uint32 avail = sAvailSwapSpace >> PAGE_SHIFT;
1791 mutex_unlock(&sAvailSwapSpaceLock);
1792
1793 return avail;
1794 }
1795
1796
1797 uint32
swap_total_swap_pages()1798 swap_total_swap_pages()
1799 {
1800 mutex_lock(&sSwapFileListLock);
1801
1802 uint32 totalSwapSlots = 0;
1803 for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
1804 swap_file* swapFile = it.Next();) {
1805 totalSwapSlots += swapFile->last_slot - swapFile->first_slot;
1806 }
1807
1808 mutex_unlock(&sSwapFileListLock);
1809
1810 return totalSwapSlots;
1811 }
1812
1813
1814 #endif // ENABLE_SWAP_SUPPORT
1815
1816
1817 void
swap_get_info(system_info * info)1818 swap_get_info(system_info* info)
1819 {
1820 #if ENABLE_SWAP_SUPPORT
1821 MutexLocker locker(sSwapFileListLock);
1822 for (SwapFileList::Iterator it = sSwapFileList.GetIterator();
1823 swap_file* swapFile = it.Next();) {
1824 info->max_swap_pages += swapFile->last_slot - swapFile->first_slot;
1825 info->free_swap_pages += swapFile->bmp->free_slots;
1826 }
1827 #else
1828 info->max_swap_pages = 0;
1829 info->free_swap_pages = 0;
1830 #endif
1831 }
1832
1833