1 /* 2 * Copyright 2008, Zhao Shuai, upczhsh@163.com. 3 * Copyright 2008-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 4 * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de. 5 * Distributed under the terms of the MIT License. 6 * 7 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 8 * Distributed under the terms of the NewOS License. 9 * 10 * Copyright 2011-2012 Haiku, Inc. All rights reserved. 11 * Distributed under the terms of the MIT License. 12 * 13 * Authors: 14 * Hamish Morrison, hamish@lavabit.com 15 * Alexander von Gluck IV, kallisti5@unixzen.com 16 */ 17 18 19 #include "VMAnonymousCache.h" 20 21 #include <errno.h> 22 #include <fcntl.h> 23 #include <stdlib.h> 24 #include <string.h> 25 #include <unistd.h> 26 27 #include <FindDirectory.h> 28 #include <KernelExport.h> 29 #include <NodeMonitor.h> 30 31 #include <arch_config.h> 32 #include <boot_device.h> 33 #include <disk_device_manager/KDiskDevice.h> 34 #include <disk_device_manager/KDiskDeviceManager.h> 35 #include <disk_device_manager/KDiskSystem.h> 36 #include <disk_device_manager/KPartitionVisitor.h> 37 #include <driver_settings.h> 38 #include <fs/fd.h> 39 #include <fs/KPath.h> 40 #include <fs_info.h> 41 #include <fs_interface.h> 42 #include <heap.h> 43 #include <kernel_daemon.h> 44 #include <slab/Slab.h> 45 #include <syscalls.h> 46 #include <system_info.h> 47 #include <thread.h> 48 #include <tracing.h> 49 #include <util/AutoLock.h> 50 #include <util/Bitmap.h> 51 #include <util/DoublyLinkedList.h> 52 #include <util/OpenHashTable.h> 53 #include <util/RadixBitmap.h> 54 #include <vfs.h> 55 #include <vm/vm.h> 56 #include <vm/vm_page.h> 57 #include <vm/vm_priv.h> 58 #include <vm/VMAddressSpace.h> 59 60 #include "IORequest.h" 61 62 63 #if ENABLE_SWAP_SUPPORT 64 65 //#define TRACE_VM_ANONYMOUS_CACHE 66 #ifdef TRACE_VM_ANONYMOUS_CACHE 67 # define TRACE(x...) dprintf(x) 68 #else 69 # define TRACE(x...) do { } while (false) 70 #endif 71 72 73 // number of free swap blocks the object cache shall minimally have 74 #define MIN_SWAP_BLOCK_RESERVE 4096 75 76 // interval the has resizer is triggered (in 0.1s) 77 #define SWAP_HASH_RESIZE_INTERVAL 5 78 79 #define INITIAL_SWAP_HASH_SIZE 1024 80 81 #define SWAP_SLOT_NONE RADIX_SLOT_NONE 82 83 #define SWAP_BLOCK_PAGES 32 84 #define SWAP_BLOCK_SHIFT 5 /* 1 << SWAP_BLOCK_SHIFT == SWAP_BLOCK_PAGES */ 85 #define SWAP_BLOCK_MASK (SWAP_BLOCK_PAGES - 1) 86 87 88 static const char* const kDefaultSwapPath = "/var/swap"; 89 90 struct swap_file : DoublyLinkedListLinkImpl<swap_file> { 91 int fd; 92 struct vnode* vnode; 93 void* cookie; 94 swap_addr_t first_slot; 95 swap_addr_t last_slot; 96 radix_bitmap* bmp; 97 }; 98 99 struct swap_hash_key { 100 VMAnonymousCache *cache; 101 off_t page_index; // page index in the cache 102 }; 103 104 // Each swap block contains swap address information for 105 // SWAP_BLOCK_PAGES continuous pages from the same cache 106 struct swap_block { 107 swap_block* hash_link; 108 swap_hash_key key; 109 uint32 used; 110 swap_addr_t swap_slots[SWAP_BLOCK_PAGES]; 111 }; 112 113 struct SwapHashTableDefinition { 114 typedef swap_hash_key KeyType; 115 typedef swap_block ValueType; 116 117 SwapHashTableDefinition() {} 118 119 size_t HashKey(const swap_hash_key& key) const 120 { 121 off_t blockIndex = key.page_index >> SWAP_BLOCK_SHIFT; 122 VMAnonymousCache* cache = key.cache; 123 return blockIndex ^ (size_t)(int*)cache; 124 } 125 126 size_t Hash(const swap_block* value) const 127 { 128 return HashKey(value->key); 129 } 130 131 bool Compare(const swap_hash_key& key, const swap_block* value) const 132 { 133 return (key.page_index & ~(off_t)SWAP_BLOCK_MASK) 134 == (value->key.page_index & ~(off_t)SWAP_BLOCK_MASK) 135 && key.cache == value->key.cache; 136 } 137 138 swap_block*& GetLink(swap_block* value) const 139 { 140 return value->hash_link; 141 } 142 }; 143 144 typedef BOpenHashTable<SwapHashTableDefinition> SwapHashTable; 145 typedef DoublyLinkedList<swap_file> SwapFileList; 146 147 static SwapHashTable sSwapHashTable; 148 static rw_lock sSwapHashLock; 149 150 static SwapFileList sSwapFileList; 151 static mutex sSwapFileListLock; 152 static swap_file* sSwapFileAlloc = NULL; // allocate from here 153 static uint32 sSwapFileCount = 0; 154 155 static off_t sAvailSwapSpace = 0; 156 static mutex sAvailSwapSpaceLock; 157 158 static object_cache* sSwapBlockCache; 159 160 161 #if SWAP_TRACING 162 namespace SwapTracing { 163 164 class SwapTraceEntry : public AbstractTraceEntry { 165 public: 166 SwapTraceEntry(VMAnonymousCache* cache) 167 : 168 fCache(cache) 169 { 170 } 171 172 protected: 173 VMAnonymousCache* fCache; 174 }; 175 176 177 class ReadPage : public SwapTraceEntry { 178 public: 179 ReadPage(VMAnonymousCache* cache, page_num_t pageIndex, 180 swap_addr_t swapSlotIndex) 181 : 182 SwapTraceEntry(cache), 183 fPageIndex(pageIndex), 184 fSwapSlotIndex(swapSlotIndex) 185 { 186 Initialized(); 187 } 188 189 virtual void AddDump(TraceOutput& out) 190 { 191 out.Print("swap read: cache %p, page index: %lu <- swap slot: %lu", 192 fCache, fPageIndex, fSwapSlotIndex); 193 } 194 195 private: 196 page_num_t fPageIndex; 197 swap_addr_t fSwapSlotIndex; 198 }; 199 200 201 class WritePage : public SwapTraceEntry { 202 public: 203 WritePage(VMAnonymousCache* cache, page_num_t pageIndex, 204 swap_addr_t swapSlotIndex) 205 : 206 SwapTraceEntry(cache), 207 fPageIndex(pageIndex), 208 fSwapSlotIndex(swapSlotIndex) 209 { 210 Initialized(); 211 } 212 213 virtual void AddDump(TraceOutput& out) 214 { 215 out.Print("swap write: cache %p, page index: %lu -> swap slot: %lu", 216 fCache, fPageIndex, fSwapSlotIndex); 217 } 218 219 private: 220 page_num_t fPageIndex; 221 swap_addr_t fSwapSlotIndex; 222 }; 223 224 } // namespace SwapTracing 225 226 # define T(x) new(std::nothrow) SwapTracing::x; 227 #else 228 # define T(x) ; 229 #endif 230 231 232 static int 233 dump_swap_info(int argc, char** argv) 234 { 235 swap_addr_t totalSwapPages = 0; 236 swap_addr_t freeSwapPages = 0; 237 238 kprintf("swap files:\n"); 239 240 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 241 swap_file* file = it.Next();) { 242 swap_addr_t total = file->last_slot - file->first_slot; 243 kprintf(" vnode: %p, pages: total: %" B_PRIu32 ", free: %" B_PRIu32 244 "\n", file->vnode, total, file->bmp->free_slots); 245 246 totalSwapPages += total; 247 freeSwapPages += file->bmp->free_slots; 248 } 249 250 kprintf("\n"); 251 kprintf("swap space in pages:\n"); 252 kprintf("total: %9" B_PRIu32 "\n", totalSwapPages); 253 kprintf("available: %9" B_PRIdOFF "\n", sAvailSwapSpace / B_PAGE_SIZE); 254 kprintf("reserved: %9" B_PRIdOFF "\n", 255 totalSwapPages - sAvailSwapSpace / B_PAGE_SIZE); 256 kprintf("used: %9" B_PRIu32 "\n", totalSwapPages - freeSwapPages); 257 kprintf("free: %9" B_PRIu32 "\n", freeSwapPages); 258 259 return 0; 260 } 261 262 263 static swap_addr_t 264 swap_slot_alloc(uint32 count) 265 { 266 mutex_lock(&sSwapFileListLock); 267 268 if (sSwapFileList.IsEmpty()) { 269 mutex_unlock(&sSwapFileListLock); 270 panic("swap_slot_alloc(): no swap file in the system\n"); 271 return SWAP_SLOT_NONE; 272 } 273 274 // since radix bitmap could not handle more than 32 pages, we return 275 // SWAP_SLOT_NONE, this forces Write() adjust allocation amount 276 if (count > BITMAP_RADIX) { 277 mutex_unlock(&sSwapFileListLock); 278 return SWAP_SLOT_NONE; 279 } 280 281 swap_addr_t j, addr = SWAP_SLOT_NONE; 282 for (j = 0; j < sSwapFileCount; j++) { 283 if (sSwapFileAlloc == NULL) 284 sSwapFileAlloc = sSwapFileList.First(); 285 286 addr = radix_bitmap_alloc(sSwapFileAlloc->bmp, count); 287 if (addr != SWAP_SLOT_NONE) { 288 addr += sSwapFileAlloc->first_slot; 289 break; 290 } 291 292 // this swap_file is full, find another 293 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 294 } 295 296 if (j == sSwapFileCount) { 297 mutex_unlock(&sSwapFileListLock); 298 panic("swap_slot_alloc: swap space exhausted!\n"); 299 return SWAP_SLOT_NONE; 300 } 301 302 // if this swap file has used more than 90% percent of its space 303 // switch to another 304 if (sSwapFileAlloc->bmp->free_slots 305 < (sSwapFileAlloc->last_slot - sSwapFileAlloc->first_slot) / 10) { 306 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 307 } 308 309 mutex_unlock(&sSwapFileListLock); 310 311 return addr; 312 } 313 314 315 static swap_file* 316 find_swap_file(swap_addr_t slotIndex) 317 { 318 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 319 swap_file* swapFile = it.Next();) { 320 if (slotIndex >= swapFile->first_slot 321 && slotIndex < swapFile->last_slot) { 322 return swapFile; 323 } 324 } 325 326 panic("find_swap_file(): can't find swap file for slot %" B_PRIu32 "\n", 327 slotIndex); 328 return NULL; 329 } 330 331 332 static void 333 swap_slot_dealloc(swap_addr_t slotIndex, uint32 count) 334 { 335 if (slotIndex == SWAP_SLOT_NONE) 336 return; 337 338 mutex_lock(&sSwapFileListLock); 339 swap_file* swapFile = find_swap_file(slotIndex); 340 slotIndex -= swapFile->first_slot; 341 radix_bitmap_dealloc(swapFile->bmp, slotIndex, count); 342 mutex_unlock(&sSwapFileListLock); 343 } 344 345 346 static off_t 347 swap_space_reserve(off_t amount) 348 { 349 mutex_lock(&sAvailSwapSpaceLock); 350 if (sAvailSwapSpace >= amount) 351 sAvailSwapSpace -= amount; 352 else { 353 amount = sAvailSwapSpace; 354 sAvailSwapSpace = 0; 355 } 356 mutex_unlock(&sAvailSwapSpaceLock); 357 358 return amount; 359 } 360 361 362 static void 363 swap_space_unreserve(off_t amount) 364 { 365 mutex_lock(&sAvailSwapSpaceLock); 366 sAvailSwapSpace += amount; 367 mutex_unlock(&sAvailSwapSpaceLock); 368 } 369 370 371 static void 372 swap_hash_resizer(void*, int) 373 { 374 WriteLocker locker(sSwapHashLock); 375 376 size_t size; 377 void* allocation; 378 379 do { 380 size = sSwapHashTable.ResizeNeeded(); 381 if (size == 0) 382 return; 383 384 locker.Unlock(); 385 386 allocation = malloc(size); 387 if (allocation == NULL) 388 return; 389 390 locker.Lock(); 391 392 } while (!sSwapHashTable.Resize(allocation, size)); 393 } 394 395 396 // #pragma mark - 397 398 399 class VMAnonymousCache::WriteCallback : public StackableAsyncIOCallback { 400 public: 401 WriteCallback(VMAnonymousCache* cache, AsyncIOCallback* callback) 402 : 403 StackableAsyncIOCallback(callback), 404 fCache(cache) 405 { 406 } 407 408 void SetTo(page_num_t pageIndex, swap_addr_t slotIndex, bool newSlot) 409 { 410 fPageIndex = pageIndex; 411 fSlotIndex = slotIndex; 412 fNewSlot = newSlot; 413 } 414 415 virtual void IOFinished(status_t status, bool partialTransfer, 416 generic_size_t bytesTransferred) 417 { 418 if (fNewSlot) { 419 if (status == B_OK) { 420 fCache->_SwapBlockBuild(fPageIndex, fSlotIndex, 1); 421 } else { 422 AutoLocker<VMCache> locker(fCache); 423 fCache->fAllocatedSwapSize -= B_PAGE_SIZE; 424 locker.Unlock(); 425 426 swap_slot_dealloc(fSlotIndex, 1); 427 } 428 } 429 430 fNextCallback->IOFinished(status, partialTransfer, bytesTransferred); 431 delete this; 432 } 433 434 private: 435 VMAnonymousCache* fCache; 436 page_num_t fPageIndex; 437 swap_addr_t fSlotIndex; 438 bool fNewSlot; 439 }; 440 441 442 // #pragma mark - 443 444 445 VMAnonymousCache::~VMAnonymousCache() 446 { 447 delete fNoSwapPages; 448 fNoSwapPages = NULL; 449 450 _FreeSwapPageRange(virtual_base, virtual_end, false); 451 swap_space_unreserve(fCommittedSwapSize); 452 if (committed_size > fCommittedSwapSize) 453 vm_unreserve_memory(committed_size - fCommittedSwapSize); 454 } 455 456 457 status_t 458 VMAnonymousCache::Init(bool canOvercommit, int32 numPrecommittedPages, 459 int32 numGuardPages, uint32 allocationFlags) 460 { 461 TRACE("%p->VMAnonymousCache::Init(canOvercommit = %s, " 462 "numPrecommittedPages = %" B_PRId32 ", numGuardPages = %" B_PRId32 463 ")\n", this, canOvercommit ? "yes" : "no", numPrecommittedPages, 464 numGuardPages); 465 466 status_t error = VMCache::Init(CACHE_TYPE_RAM, allocationFlags); 467 if (error != B_OK) 468 return error; 469 470 fCanOvercommit = canOvercommit; 471 fHasPrecommitted = false; 472 fPrecommittedPages = min_c(numPrecommittedPages, 255); 473 fNoSwapPages = NULL; 474 fGuardedSize = numGuardPages * B_PAGE_SIZE; 475 fCommittedSwapSize = 0; 476 fAllocatedSwapSize = 0; 477 478 return B_OK; 479 } 480 481 482 status_t 483 VMAnonymousCache::SetCanSwapPages(off_t base, size_t size, bool canSwap) 484 { 485 const page_num_t first = base >> PAGE_SHIFT; 486 const size_t count = PAGE_ALIGN(size + ((first << PAGE_SHIFT) - base)) >> PAGE_SHIFT; 487 488 if (count == 0) 489 return B_OK; 490 if (canSwap && fNoSwapPages == NULL) 491 return B_OK; 492 493 if (fNoSwapPages == NULL) 494 fNoSwapPages = new(std::nothrow) Bitmap(0); 495 if (fNoSwapPages == NULL) 496 return B_NO_MEMORY; 497 498 const page_num_t pageCount = PAGE_ALIGN(virtual_end) >> PAGE_SHIFT; 499 500 if (fNoSwapPages->Resize(pageCount) != B_OK) 501 return B_NO_MEMORY; 502 503 for (size_t i = 0; i < count; i++) { 504 if (canSwap) 505 fNoSwapPages->Clear(first + i); 506 else 507 fNoSwapPages->Set(first + i); 508 } 509 510 if (fNoSwapPages->GetHighestSet() < 0) { 511 delete fNoSwapPages; 512 fNoSwapPages = NULL; 513 } 514 return B_OK; 515 } 516 517 518 void 519 VMAnonymousCache::_FreeSwapPageRange(off_t fromOffset, off_t toOffset, 520 bool skipBusyPages) 521 { 522 swap_block* swapBlock = NULL; 523 off_t toIndex = toOffset >> PAGE_SHIFT; 524 for (off_t pageIndex = fromOffset >> PAGE_SHIFT; 525 pageIndex < toIndex && fAllocatedSwapSize > 0; pageIndex++) { 526 527 WriteLocker locker(sSwapHashLock); 528 529 // Get the swap slot index for the page. 530 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 531 if (swapBlock == NULL || blockIndex == 0) { 532 swap_hash_key key = { this, pageIndex }; 533 swapBlock = sSwapHashTable.Lookup(key); 534 535 if (swapBlock == NULL) { 536 pageIndex = ROUNDUP(pageIndex + 1, SWAP_BLOCK_PAGES) - 1; 537 continue; 538 } 539 } 540 541 swap_addr_t slotIndex = swapBlock->swap_slots[blockIndex]; 542 if (slotIndex == SWAP_SLOT_NONE) 543 continue; 544 545 if (skipBusyPages) { 546 vm_page* page = LookupPage(pageIndex * B_PAGE_SIZE); 547 if (page != NULL && page->busy) { 548 // TODO: We skip (i.e. leak) swap space of busy pages, since 549 // there could be I/O going on (paging in/out). Waiting is 550 // not an option as 1. unlocking the cache means that new 551 // swap pages could be added in a range we've already 552 // cleared (since the cache still has the old size) and 2. 553 // we'd risk a deadlock in case we come from the file cache 554 // and the FS holds the node's write-lock. We should mark 555 // the page invalid and let the one responsible clean up. 556 // There's just no such mechanism yet. 557 continue; 558 } 559 } 560 561 swap_slot_dealloc(slotIndex, 1); 562 fAllocatedSwapSize -= B_PAGE_SIZE; 563 564 swapBlock->swap_slots[blockIndex] = SWAP_SLOT_NONE; 565 if (--swapBlock->used == 0) { 566 // All swap pages have been freed -- we can discard the swap block. 567 sSwapHashTable.RemoveUnchecked(swapBlock); 568 object_cache_free(sSwapBlockCache, swapBlock, 569 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 570 571 // There are no swap pages for possibly remaining pages, skip to the 572 // next block. 573 pageIndex = ROUNDUP(pageIndex + 1, SWAP_BLOCK_PAGES) - 1; 574 swapBlock = NULL; 575 } 576 } 577 } 578 579 580 status_t 581 VMAnonymousCache::Resize(off_t newSize, int priority) 582 { 583 if (fNoSwapPages != NULL) { 584 if (fNoSwapPages->Resize(PAGE_ALIGN(newSize) >> PAGE_SHIFT) != B_OK) 585 return B_NO_MEMORY; 586 } 587 588 _FreeSwapPageRange(newSize + B_PAGE_SIZE - 1, 589 virtual_end + B_PAGE_SIZE - 1); 590 return VMCache::Resize(newSize, priority); 591 } 592 593 594 status_t 595 VMAnonymousCache::Rebase(off_t newBase, int priority) 596 { 597 if (fNoSwapPages != NULL) { 598 const ssize_t sizeDifference = (newBase >> PAGE_SHIFT) - (virtual_base >> PAGE_SHIFT); 599 fNoSwapPages->Shift(sizeDifference); 600 } 601 602 _FreeSwapPageRange(virtual_base, newBase); 603 return VMCache::Rebase(newBase, priority); 604 } 605 606 607 status_t 608 VMAnonymousCache::Discard(off_t offset, off_t size) 609 { 610 _FreeSwapPageRange(offset, offset + size); 611 return VMCache::Discard(offset, size); 612 } 613 614 615 /*! Moves the swap pages for the given range from the source cache into this 616 cache. Both caches must be locked. 617 */ 618 status_t 619 VMAnonymousCache::Adopt(VMCache* _source, off_t offset, off_t size, 620 off_t newOffset) 621 { 622 VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source); 623 if (source == NULL) { 624 panic("VMAnonymousCache::Adopt(): adopt from incompatible cache %p " 625 "requested", _source); 626 return B_ERROR; 627 } 628 629 off_t pageIndex = newOffset >> PAGE_SHIFT; 630 off_t sourcePageIndex = offset >> PAGE_SHIFT; 631 off_t sourceEndPageIndex = (offset + size + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 632 swap_block* swapBlock = NULL; 633 634 WriteLocker locker(sSwapHashLock); 635 636 while (sourcePageIndex < sourceEndPageIndex 637 && source->fAllocatedSwapSize > 0) { 638 swap_addr_t left 639 = SWAP_BLOCK_PAGES - (sourcePageIndex & SWAP_BLOCK_MASK); 640 641 swap_hash_key sourceKey = { source, sourcePageIndex }; 642 swap_block* sourceSwapBlock = sSwapHashTable.Lookup(sourceKey); 643 if (sourceSwapBlock == NULL || sourceSwapBlock->used == 0) { 644 sourcePageIndex += left; 645 pageIndex += left; 646 swapBlock = NULL; 647 continue; 648 } 649 650 for (; left > 0 && sourceSwapBlock->used > 0; 651 left--, sourcePageIndex++, pageIndex++) { 652 653 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 654 if (swapBlock == NULL || blockIndex == 0) { 655 swap_hash_key key = { this, pageIndex }; 656 swapBlock = sSwapHashTable.Lookup(key); 657 658 if (swapBlock == NULL) { 659 swapBlock = (swap_block*)object_cache_alloc(sSwapBlockCache, 660 CACHE_DONT_WAIT_FOR_MEMORY 661 | CACHE_DONT_LOCK_KERNEL_SPACE); 662 if (swapBlock == NULL) 663 return B_NO_MEMORY; 664 665 swapBlock->key.cache = this; 666 swapBlock->key.page_index 667 = pageIndex & ~(off_t)SWAP_BLOCK_MASK; 668 swapBlock->used = 0; 669 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) 670 swapBlock->swap_slots[i] = SWAP_SLOT_NONE; 671 672 sSwapHashTable.InsertUnchecked(swapBlock); 673 } 674 } 675 676 swap_addr_t sourceBlockIndex = sourcePageIndex & SWAP_BLOCK_MASK; 677 swap_addr_t slotIndex 678 = sourceSwapBlock->swap_slots[sourceBlockIndex]; 679 if (slotIndex == SWAP_SLOT_NONE) 680 continue; 681 682 ASSERT(swapBlock->swap_slots[blockIndex] == SWAP_SLOT_NONE); 683 684 swapBlock->swap_slots[blockIndex] = slotIndex; 685 swapBlock->used++; 686 fAllocatedSwapSize += B_PAGE_SIZE; 687 688 sourceSwapBlock->swap_slots[sourceBlockIndex] = SWAP_SLOT_NONE; 689 sourceSwapBlock->used--; 690 source->fAllocatedSwapSize -= B_PAGE_SIZE; 691 692 TRACE("adopted slot %#" B_PRIx32 " from %p at page %" B_PRIdOFF 693 " to %p at page %" B_PRIdOFF "\n", slotIndex, source, 694 sourcePageIndex, this, pageIndex); 695 } 696 697 if (left > 0) { 698 sourcePageIndex += left; 699 pageIndex += left; 700 swapBlock = NULL; 701 } 702 703 if (sourceSwapBlock->used == 0) { 704 // All swap pages have been adopted, we can discard the swap block. 705 sSwapHashTable.RemoveUnchecked(sourceSwapBlock); 706 object_cache_free(sSwapBlockCache, sourceSwapBlock, 707 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 708 } 709 } 710 711 locker.Unlock(); 712 713 return VMCache::Adopt(source, offset, size, newOffset); 714 } 715 716 717 status_t 718 VMAnonymousCache::Commit(off_t size, int priority) 719 { 720 TRACE("%p->VMAnonymousCache::Commit(%" B_PRIdOFF ")\n", this, size); 721 722 AssertLocked(); 723 724 // If we can overcommit, we don't commit here, but in Fault(). We always 725 // unreserve memory, if we're asked to shrink our commitment, though. 726 if (fCanOvercommit && size > committed_size) { 727 if (fHasPrecommitted) 728 return B_OK; 729 730 // pre-commit some pages to make a later failure less probable 731 fHasPrecommitted = true; 732 uint32 precommitted = fPrecommittedPages * B_PAGE_SIZE; 733 if (size > precommitted) 734 size = precommitted; 735 } 736 737 return _Commit(size, priority); 738 } 739 740 741 bool 742 VMAnonymousCache::HasPage(off_t offset) 743 { 744 if (_SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE) 745 return true; 746 747 return false; 748 } 749 750 751 bool 752 VMAnonymousCache::DebugHasPage(off_t offset) 753 { 754 off_t pageIndex = offset >> PAGE_SHIFT; 755 swap_hash_key key = { this, pageIndex }; 756 swap_block* swap = sSwapHashTable.Lookup(key); 757 if (swap == NULL) 758 return false; 759 760 return swap->swap_slots[pageIndex & SWAP_BLOCK_MASK] != SWAP_SLOT_NONE; 761 } 762 763 764 status_t 765 VMAnonymousCache::Read(off_t offset, const generic_io_vec* vecs, size_t count, 766 uint32 flags, generic_size_t* _numBytes) 767 { 768 off_t pageIndex = offset >> PAGE_SHIFT; 769 770 for (uint32 i = 0, j = 0; i < count; i = j) { 771 swap_addr_t startSlotIndex = _SwapBlockGetAddress(pageIndex + i); 772 for (j = i + 1; j < count; j++) { 773 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + j); 774 if (slotIndex != startSlotIndex + j - i) 775 break; 776 } 777 778 T(ReadPage(this, pageIndex, startSlotIndex)); 779 // TODO: Assumes that only one page is read. 780 781 swap_file* swapFile = find_swap_file(startSlotIndex); 782 783 off_t pos = (off_t)(startSlotIndex - swapFile->first_slot) 784 * B_PAGE_SIZE; 785 786 status_t status = vfs_read_pages(swapFile->vnode, swapFile->cookie, pos, 787 vecs + i, j - i, flags, _numBytes); 788 if (status != B_OK) 789 return status; 790 } 791 792 return B_OK; 793 } 794 795 796 status_t 797 VMAnonymousCache::Write(off_t offset, const generic_io_vec* vecs, size_t count, 798 uint32 flags, generic_size_t* _numBytes) 799 { 800 off_t pageIndex = offset >> PAGE_SHIFT; 801 802 AutoLocker<VMCache> locker(this); 803 804 page_num_t totalPages = 0; 805 for (uint32 i = 0; i < count; i++) { 806 page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 807 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + totalPages); 808 if (slotIndex != SWAP_SLOT_NONE) { 809 swap_slot_dealloc(slotIndex, pageCount); 810 _SwapBlockFree(pageIndex + totalPages, pageCount); 811 fAllocatedSwapSize -= pageCount * B_PAGE_SIZE; 812 } 813 814 totalPages += pageCount; 815 } 816 817 off_t totalSize = totalPages * B_PAGE_SIZE; 818 if (fAllocatedSwapSize + totalSize > fCommittedSwapSize) 819 return B_ERROR; 820 821 fAllocatedSwapSize += totalSize; 822 locker.Unlock(); 823 824 page_num_t pagesLeft = totalPages; 825 totalPages = 0; 826 827 for (uint32 i = 0; i < count; i++) { 828 page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 829 830 generic_addr_t vectorBase = vecs[i].base; 831 generic_size_t vectorLength = vecs[i].length; 832 page_num_t n = pageCount; 833 834 for (page_num_t j = 0; j < pageCount; j += n) { 835 swap_addr_t slotIndex; 836 // try to allocate n slots, if fail, try to allocate n/2 837 while ((slotIndex = swap_slot_alloc(n)) == SWAP_SLOT_NONE && n >= 2) 838 n >>= 1; 839 840 if (slotIndex == SWAP_SLOT_NONE) 841 panic("VMAnonymousCache::Write(): can't allocate swap space\n"); 842 843 T(WritePage(this, pageIndex, slotIndex)); 844 // TODO: Assumes that only one page is written. 845 846 swap_file* swapFile = find_swap_file(slotIndex); 847 848 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 849 850 generic_size_t length = (phys_addr_t)n * B_PAGE_SIZE; 851 generic_io_vec vector[1]; 852 vector->base = vectorBase; 853 vector->length = length; 854 855 status_t status = vfs_write_pages(swapFile->vnode, swapFile->cookie, 856 pos, vector, 1, flags, &length); 857 if (status != B_OK) { 858 locker.Lock(); 859 fAllocatedSwapSize -= (off_t)pagesLeft * B_PAGE_SIZE; 860 locker.Unlock(); 861 862 swap_slot_dealloc(slotIndex, n); 863 return status; 864 } 865 866 _SwapBlockBuild(pageIndex + totalPages, slotIndex, n); 867 pagesLeft -= n; 868 869 if (n != pageCount) { 870 vectorBase = vectorBase + n * B_PAGE_SIZE; 871 vectorLength -= n * B_PAGE_SIZE; 872 } 873 } 874 875 totalPages += pageCount; 876 } 877 878 ASSERT(pagesLeft == 0); 879 return B_OK; 880 } 881 882 883 status_t 884 VMAnonymousCache::WriteAsync(off_t offset, const generic_io_vec* vecs, 885 size_t count, generic_size_t numBytes, uint32 flags, 886 AsyncIOCallback* _callback) 887 { 888 // TODO: Currently this method is only used for single pages. Either make 889 // more flexible use of it or change the interface! 890 // This implementation relies on the current usage! 891 ASSERT(count == 1); 892 ASSERT(numBytes <= B_PAGE_SIZE); 893 894 page_num_t pageIndex = offset >> PAGE_SHIFT; 895 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex); 896 bool newSlot = slotIndex == SWAP_SLOT_NONE; 897 898 // If the page doesn't have any swap space yet, allocate it. 899 if (newSlot) { 900 AutoLocker<VMCache> locker(this); 901 if (fAllocatedSwapSize + B_PAGE_SIZE > fCommittedSwapSize) { 902 _callback->IOFinished(B_ERROR, true, 0); 903 return B_ERROR; 904 } 905 906 fAllocatedSwapSize += B_PAGE_SIZE; 907 908 slotIndex = swap_slot_alloc(1); 909 } 910 911 // create our callback 912 WriteCallback* callback = (flags & B_VIP_IO_REQUEST) != 0 913 ? new(malloc_flags(HEAP_PRIORITY_VIP)) WriteCallback(this, _callback) 914 : new(std::nothrow) WriteCallback(this, _callback); 915 if (callback == NULL) { 916 if (newSlot) { 917 AutoLocker<VMCache> locker(this); 918 fAllocatedSwapSize -= B_PAGE_SIZE; 919 locker.Unlock(); 920 921 swap_slot_dealloc(slotIndex, 1); 922 } 923 _callback->IOFinished(B_NO_MEMORY, true, 0); 924 return B_NO_MEMORY; 925 } 926 // TODO: If the page already had swap space assigned, we don't need an own 927 // callback. 928 929 callback->SetTo(pageIndex, slotIndex, newSlot); 930 931 T(WritePage(this, pageIndex, slotIndex)); 932 933 // write the page asynchrounously 934 swap_file* swapFile = find_swap_file(slotIndex); 935 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 936 937 return vfs_asynchronous_write_pages(swapFile->vnode, swapFile->cookie, pos, 938 vecs, 1, numBytes, flags, callback); 939 } 940 941 942 bool 943 VMAnonymousCache::CanWritePage(off_t offset) 944 { 945 const off_t pageIndex = offset >> PAGE_SHIFT; 946 if (fNoSwapPages != NULL && fNoSwapPages->Get(pageIndex)) 947 return false; 948 949 // We can write the page, if we have not used all of our committed swap 950 // space or the page already has a swap slot assigned. 951 return fAllocatedSwapSize < fCommittedSwapSize 952 || _SwapBlockGetAddress(pageIndex) != SWAP_SLOT_NONE; 953 } 954 955 956 int32 957 VMAnonymousCache::MaxPagesPerAsyncWrite() const 958 { 959 return 1; 960 } 961 962 963 status_t 964 VMAnonymousCache::Fault(struct VMAddressSpace* aspace, off_t offset) 965 { 966 if (fGuardedSize > 0) { 967 uint32 guardOffset; 968 969 #ifdef STACK_GROWS_DOWNWARDS 970 guardOffset = 0; 971 #elif defined(STACK_GROWS_UPWARDS) 972 guardOffset = virtual_size - fGuardedSize; 973 #else 974 # error Stack direction has not been defined in arch_config.h 975 #endif 976 // report stack fault, guard page hit! 977 if (offset >= guardOffset && offset < guardOffset + fGuardedSize) { 978 TRACE(("stack overflow!\n")); 979 return B_BAD_ADDRESS; 980 } 981 } 982 983 if (fCanOvercommit && LookupPage(offset) == NULL && !HasPage(offset)) { 984 if (fPrecommittedPages == 0) { 985 // never commit more than needed 986 if (committed_size / B_PAGE_SIZE > page_count) 987 return B_BAD_HANDLER; 988 989 // try to commit additional swap space/memory 990 if (swap_space_reserve(B_PAGE_SIZE) == B_PAGE_SIZE) { 991 fCommittedSwapSize += B_PAGE_SIZE; 992 } else { 993 int priority = aspace == VMAddressSpace::Kernel() 994 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 995 if (vm_try_reserve_memory(B_PAGE_SIZE, priority, 0) != B_OK) { 996 dprintf("%p->VMAnonymousCache::Fault(): Failed to reserve " 997 "%d bytes of RAM.\n", this, (int)B_PAGE_SIZE); 998 return B_NO_MEMORY; 999 } 1000 } 1001 1002 committed_size += B_PAGE_SIZE; 1003 } else 1004 fPrecommittedPages--; 1005 } 1006 1007 // This will cause vm_soft_fault() to handle the fault 1008 return B_BAD_HANDLER; 1009 } 1010 1011 1012 void 1013 VMAnonymousCache::Merge(VMCache* _source) 1014 { 1015 VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source); 1016 if (source == NULL) { 1017 panic("VMAnonymousCache::Merge(): merge with incompatible cache " 1018 "%p requested", _source); 1019 return; 1020 } 1021 1022 // take over the source' committed size 1023 fCommittedSwapSize += source->fCommittedSwapSize; 1024 source->fCommittedSwapSize = 0; 1025 committed_size += source->committed_size; 1026 source->committed_size = 0; 1027 1028 off_t actualSize = virtual_end - virtual_base; 1029 if (committed_size > actualSize) 1030 _Commit(actualSize, VM_PRIORITY_USER); 1031 1032 // Move all not shadowed swap pages from the source to the consumer cache. 1033 // Also remove all source pages that are shadowed by consumer swap pages. 1034 _MergeSwapPages(source); 1035 1036 // Move all not shadowed pages from the source to the consumer cache. 1037 if (source->page_count < page_count) 1038 _MergePagesSmallerSource(source); 1039 else 1040 _MergePagesSmallerConsumer(source); 1041 } 1042 1043 1044 void 1045 VMAnonymousCache::DeleteObject() 1046 { 1047 object_cache_delete(gAnonymousCacheObjectCache, this); 1048 } 1049 1050 1051 void 1052 VMAnonymousCache::_SwapBlockBuild(off_t startPageIndex, 1053 swap_addr_t startSlotIndex, uint32 count) 1054 { 1055 WriteLocker locker(sSwapHashLock); 1056 1057 uint32 left = count; 1058 for (uint32 i = 0, j = 0; i < count; i += j) { 1059 off_t pageIndex = startPageIndex + i; 1060 swap_addr_t slotIndex = startSlotIndex + i; 1061 1062 swap_hash_key key = { this, pageIndex }; 1063 1064 swap_block* swap = sSwapHashTable.Lookup(key); 1065 while (swap == NULL) { 1066 swap = (swap_block*)object_cache_alloc(sSwapBlockCache, 1067 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1068 if (swap == NULL) { 1069 // Wait a short time until memory is available again. 1070 locker.Unlock(); 1071 snooze(10000); 1072 locker.Lock(); 1073 swap = sSwapHashTable.Lookup(key); 1074 continue; 1075 } 1076 1077 swap->key.cache = this; 1078 swap->key.page_index = pageIndex & ~(off_t)SWAP_BLOCK_MASK; 1079 swap->used = 0; 1080 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) 1081 swap->swap_slots[i] = SWAP_SLOT_NONE; 1082 1083 sSwapHashTable.InsertUnchecked(swap); 1084 } 1085 1086 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 1087 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 1088 swap->swap_slots[blockIndex++] = slotIndex + j; 1089 left--; 1090 } 1091 1092 swap->used += j; 1093 } 1094 } 1095 1096 1097 void 1098 VMAnonymousCache::_SwapBlockFree(off_t startPageIndex, uint32 count) 1099 { 1100 WriteLocker locker(sSwapHashLock); 1101 1102 uint32 left = count; 1103 for (uint32 i = 0, j = 0; i < count; i += j) { 1104 off_t pageIndex = startPageIndex + i; 1105 swap_hash_key key = { this, pageIndex }; 1106 swap_block* swap = sSwapHashTable.Lookup(key); 1107 1108 ASSERT(swap != NULL); 1109 1110 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 1111 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 1112 swap->swap_slots[blockIndex++] = SWAP_SLOT_NONE; 1113 left--; 1114 } 1115 1116 swap->used -= j; 1117 if (swap->used == 0) { 1118 sSwapHashTable.RemoveUnchecked(swap); 1119 object_cache_free(sSwapBlockCache, swap, 1120 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1121 } 1122 } 1123 } 1124 1125 1126 swap_addr_t 1127 VMAnonymousCache::_SwapBlockGetAddress(off_t pageIndex) 1128 { 1129 ReadLocker locker(sSwapHashLock); 1130 1131 swap_hash_key key = { this, pageIndex }; 1132 swap_block* swap = sSwapHashTable.Lookup(key); 1133 swap_addr_t slotIndex = SWAP_SLOT_NONE; 1134 1135 if (swap != NULL) { 1136 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 1137 slotIndex = swap->swap_slots[blockIndex]; 1138 } 1139 1140 return slotIndex; 1141 } 1142 1143 1144 status_t 1145 VMAnonymousCache::_Commit(off_t size, int priority) 1146 { 1147 TRACE("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "), already committed: " 1148 "%" B_PRIdOFF " (%" B_PRIdOFF " swap)\n", this, size, committed_size, 1149 fCommittedSwapSize); 1150 1151 // Basic strategy: reserve swap space first, only when running out of swap 1152 // space, reserve real memory. 1153 1154 off_t committedMemory = committed_size - fCommittedSwapSize; 1155 1156 // Regardless of whether we're asked to grow or shrink the commitment, 1157 // we always try to reserve as much as possible of the final commitment 1158 // in the swap space. 1159 if (size > fCommittedSwapSize) { 1160 fCommittedSwapSize += swap_space_reserve(size - fCommittedSwapSize); 1161 committed_size = fCommittedSwapSize + committedMemory; 1162 if (size > fCommittedSwapSize) { 1163 TRACE("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "), reserved " 1164 "only %" B_PRIdOFF " swap\n", this, size, fCommittedSwapSize); 1165 } 1166 } 1167 1168 if (committed_size == size) 1169 return B_OK; 1170 1171 if (committed_size > size) { 1172 // The commitment shrinks -- unreserve real memory first. 1173 off_t toUnreserve = committed_size - size; 1174 if (committedMemory > 0) { 1175 off_t unreserved = min_c(toUnreserve, committedMemory); 1176 vm_unreserve_memory(unreserved); 1177 committedMemory -= unreserved; 1178 committed_size -= unreserved; 1179 toUnreserve -= unreserved; 1180 } 1181 1182 // Unreserve swap space. 1183 if (toUnreserve > 0) { 1184 swap_space_unreserve(toUnreserve); 1185 fCommittedSwapSize -= toUnreserve; 1186 committed_size -= toUnreserve; 1187 } 1188 1189 return B_OK; 1190 } 1191 1192 // The commitment grows -- we have already tried to reserve swap space at 1193 // the start of the method, so we try to reserve real memory, now. 1194 1195 off_t toReserve = size - committed_size; 1196 if (vm_try_reserve_memory(toReserve, priority, 1000000) != B_OK) { 1197 dprintf("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "): Failed to " 1198 "reserve %" B_PRIdOFF " bytes of RAM\n", this, size, toReserve); 1199 return B_NO_MEMORY; 1200 } 1201 1202 committed_size = size; 1203 return B_OK; 1204 } 1205 1206 1207 void 1208 VMAnonymousCache::_MergePagesSmallerSource(VMAnonymousCache* source) 1209 { 1210 // The source cache has less pages than the consumer (this cache), so we 1211 // iterate through the source's pages and move the ones that are not 1212 // shadowed up to the consumer. 1213 1214 for (VMCachePagesTree::Iterator it = source->pages.GetIterator(); 1215 vm_page* page = it.Next();) { 1216 // Note: Removing the current node while iterating through a 1217 // IteratableSplayTree is safe. 1218 vm_page* consumerPage = LookupPage( 1219 (off_t)page->cache_offset << PAGE_SHIFT); 1220 if (consumerPage == NULL) { 1221 // the page is not yet in the consumer cache - move it upwards 1222 ASSERT_PRINT(!page->busy, "page: %p", page); 1223 MovePage(page); 1224 } 1225 } 1226 } 1227 1228 1229 void 1230 VMAnonymousCache::_MergePagesSmallerConsumer(VMAnonymousCache* source) 1231 { 1232 // The consumer (this cache) has less pages than the source, so we move the 1233 // consumer's pages to the source (freeing shadowed ones) and finally just 1234 // all pages of the source back to the consumer. 1235 1236 for (VMCachePagesTree::Iterator it = pages.GetIterator(); 1237 vm_page* page = it.Next();) { 1238 // If a source page is in the way, remove and free it. 1239 vm_page* sourcePage = source->LookupPage( 1240 (off_t)page->cache_offset << PAGE_SHIFT); 1241 if (sourcePage != NULL) { 1242 DEBUG_PAGE_ACCESS_START(sourcePage); 1243 ASSERT_PRINT(!sourcePage->busy, "page: %p", sourcePage); 1244 ASSERT_PRINT(sourcePage->WiredCount() == 0 1245 && sourcePage->mappings.IsEmpty(), 1246 "sourcePage: %p, page: %p", sourcePage, page); 1247 source->RemovePage(sourcePage); 1248 vm_page_free(source, sourcePage); 1249 } 1250 1251 // Note: Removing the current node while iterating through a 1252 // IteratableSplayTree is safe. 1253 source->MovePage(page); 1254 } 1255 1256 MoveAllPages(source); 1257 } 1258 1259 1260 void 1261 VMAnonymousCache::_MergeSwapPages(VMAnonymousCache* source) 1262 { 1263 // If neither source nor consumer have swap pages, we don't have to do 1264 // anything. 1265 if (source->fAllocatedSwapSize == 0 && fAllocatedSwapSize == 0) 1266 return; 1267 1268 for (off_t offset = source->virtual_base 1269 & ~(off_t)(B_PAGE_SIZE * SWAP_BLOCK_PAGES - 1); 1270 offset < source->virtual_end; 1271 offset += B_PAGE_SIZE * SWAP_BLOCK_PAGES) { 1272 1273 WriteLocker locker(sSwapHashLock); 1274 1275 off_t swapBlockPageIndex = offset >> PAGE_SHIFT; 1276 swap_hash_key key = { source, swapBlockPageIndex }; 1277 swap_block* sourceSwapBlock = sSwapHashTable.Lookup(key); 1278 1279 // remove the source swap block -- we will either take over the swap 1280 // space (and the block) or free it 1281 if (sourceSwapBlock != NULL) 1282 sSwapHashTable.RemoveUnchecked(sourceSwapBlock); 1283 1284 key.cache = this; 1285 swap_block* swapBlock = sSwapHashTable.Lookup(key); 1286 1287 locker.Unlock(); 1288 1289 // remove all source pages that are shadowed by consumer swap pages 1290 if (swapBlock != NULL) { 1291 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1292 if (swapBlock->swap_slots[i] != SWAP_SLOT_NONE) { 1293 vm_page* page = source->LookupPage( 1294 (off_t)(swapBlockPageIndex + i) << PAGE_SHIFT); 1295 if (page != NULL) { 1296 DEBUG_PAGE_ACCESS_START(page); 1297 ASSERT_PRINT(!page->busy, "page: %p", page); 1298 source->RemovePage(page); 1299 vm_page_free(source, page); 1300 } 1301 } 1302 } 1303 } 1304 1305 if (sourceSwapBlock == NULL) 1306 continue; 1307 1308 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1309 off_t pageIndex = swapBlockPageIndex + i; 1310 swap_addr_t sourceSlotIndex = sourceSwapBlock->swap_slots[i]; 1311 1312 if (sourceSlotIndex == SWAP_SLOT_NONE) 1313 continue; 1314 1315 if ((swapBlock != NULL 1316 && swapBlock->swap_slots[i] != SWAP_SLOT_NONE) 1317 || LookupPage((off_t)pageIndex << PAGE_SHIFT) != NULL) { 1318 // The consumer already has a page or a swapped out page 1319 // at this index. So we can free the source swap space. 1320 swap_slot_dealloc(sourceSlotIndex, 1); 1321 sourceSwapBlock->swap_slots[i] = SWAP_SLOT_NONE; 1322 sourceSwapBlock->used--; 1323 } 1324 1325 // We've either freed the source swap page or are going to move it 1326 // to the consumer. At any rate, the source cache doesn't own it 1327 // anymore. 1328 source->fAllocatedSwapSize -= B_PAGE_SIZE; 1329 } 1330 1331 // All source swap pages that have not been freed yet are taken over by 1332 // the consumer. 1333 fAllocatedSwapSize += B_PAGE_SIZE * (off_t)sourceSwapBlock->used; 1334 1335 if (sourceSwapBlock->used == 0) { 1336 // All swap pages have been freed -- we can discard the source swap 1337 // block. 1338 object_cache_free(sSwapBlockCache, sourceSwapBlock, 1339 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1340 } else if (swapBlock == NULL) { 1341 // We need to take over some of the source's swap pages and there's 1342 // no swap block in the consumer cache. Just take over the source 1343 // swap block. 1344 sourceSwapBlock->key.cache = this; 1345 locker.Lock(); 1346 sSwapHashTable.InsertUnchecked(sourceSwapBlock); 1347 locker.Unlock(); 1348 } else { 1349 // We need to take over some of the source's swap pages and there's 1350 // already a swap block in the consumer cache. Copy the respective 1351 // swap addresses and discard the source swap block. 1352 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1353 if (sourceSwapBlock->swap_slots[i] != SWAP_SLOT_NONE) 1354 swapBlock->swap_slots[i] = sourceSwapBlock->swap_slots[i]; 1355 } 1356 1357 object_cache_free(sSwapBlockCache, sourceSwapBlock, 1358 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1359 } 1360 } 1361 } 1362 1363 1364 // #pragma mark - 1365 1366 1367 // TODO: This can be removed if we get BFS uuid's 1368 struct VolumeInfo { 1369 char name[B_FILE_NAME_LENGTH]; 1370 char device[B_FILE_NAME_LENGTH]; 1371 char filesystem[B_OS_NAME_LENGTH]; 1372 off_t capacity; 1373 }; 1374 1375 1376 class PartitionScorer : public KPartitionVisitor { 1377 public: 1378 PartitionScorer(VolumeInfo& volumeInfo) 1379 : 1380 fBestPartition(NULL), 1381 fBestScore(-1), 1382 fVolumeInfo(volumeInfo) 1383 { 1384 } 1385 1386 virtual bool VisitPre(KPartition* partition) 1387 { 1388 if (!partition->ContainsFileSystem()) 1389 return false; 1390 1391 KPath path; 1392 partition->GetPath(&path); 1393 1394 int score = 0; 1395 if (strcmp(fVolumeInfo.name, partition->ContentName()) == 0) 1396 score += 4; 1397 if (strcmp(fVolumeInfo.device, path.Path()) == 0) 1398 score += 3; 1399 if (fVolumeInfo.capacity == partition->Size()) 1400 score += 2; 1401 if (strcmp(fVolumeInfo.filesystem, 1402 partition->DiskSystem()->ShortName()) == 0) { 1403 score += 1; 1404 } 1405 if (score >= 4 && score > fBestScore) { 1406 fBestPartition = partition; 1407 fBestScore = score; 1408 } 1409 1410 return false; 1411 } 1412 1413 KPartition* fBestPartition; 1414 1415 private: 1416 int32 fBestScore; 1417 VolumeInfo& fVolumeInfo; 1418 }; 1419 1420 1421 status_t 1422 swap_file_add(const char* path) 1423 { 1424 // open the file 1425 int fd = open(path, O_RDWR | O_NOCACHE, S_IRUSR | S_IWUSR); 1426 if (fd < 0) 1427 return errno; 1428 1429 // fstat() it and check whether we can use it 1430 struct stat st; 1431 if (fstat(fd, &st) < 0) { 1432 close(fd); 1433 return errno; 1434 } 1435 1436 if (!(S_ISREG(st.st_mode) || S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) { 1437 close(fd); 1438 return B_BAD_VALUE; 1439 } 1440 1441 if (st.st_size < B_PAGE_SIZE) { 1442 close(fd); 1443 return B_BAD_VALUE; 1444 } 1445 1446 // get file descriptor, vnode, and cookie 1447 file_descriptor* descriptor = get_fd(get_current_io_context(true), fd); 1448 put_fd(descriptor); 1449 1450 vnode* node = fd_vnode(descriptor); 1451 if (node == NULL) { 1452 close(fd); 1453 return B_BAD_VALUE; 1454 } 1455 1456 // do the allocations and prepare the swap_file structure 1457 swap_file* swap = new(std::nothrow) swap_file; 1458 if (swap == NULL) { 1459 close(fd); 1460 return B_NO_MEMORY; 1461 } 1462 1463 swap->fd = fd; 1464 swap->vnode = node; 1465 swap->cookie = descriptor->cookie; 1466 1467 uint32 pageCount = st.st_size >> PAGE_SHIFT; 1468 swap->bmp = radix_bitmap_create(pageCount); 1469 if (swap->bmp == NULL) { 1470 delete swap; 1471 close(fd); 1472 return B_NO_MEMORY; 1473 } 1474 1475 // set slot index and add this file to swap file list 1476 mutex_lock(&sSwapFileListLock); 1477 // TODO: Also check whether the swap file is already registered! 1478 if (sSwapFileList.IsEmpty()) { 1479 swap->first_slot = 0; 1480 swap->last_slot = pageCount; 1481 } else { 1482 // leave one page gap between two swap files 1483 swap->first_slot = sSwapFileList.Last()->last_slot + 1; 1484 swap->last_slot = swap->first_slot + pageCount; 1485 } 1486 sSwapFileList.Add(swap); 1487 sSwapFileCount++; 1488 mutex_unlock(&sSwapFileListLock); 1489 1490 mutex_lock(&sAvailSwapSpaceLock); 1491 sAvailSwapSpace += (off_t)pageCount * B_PAGE_SIZE; 1492 mutex_unlock(&sAvailSwapSpaceLock); 1493 1494 return B_OK; 1495 } 1496 1497 1498 status_t 1499 swap_file_delete(const char* path) 1500 { 1501 vnode* node = NULL; 1502 status_t status = vfs_get_vnode_from_path(path, true, &node); 1503 if (status != B_OK) 1504 return status; 1505 1506 MutexLocker locker(sSwapFileListLock); 1507 1508 swap_file* swapFile = NULL; 1509 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1510 (swapFile = it.Next()) != NULL;) { 1511 if (swapFile->vnode == node) 1512 break; 1513 } 1514 1515 vfs_put_vnode(node); 1516 1517 if (swapFile == NULL) 1518 return B_ERROR; 1519 1520 // if this file is currently used, we can't delete 1521 // TODO: mark this swap file deleting, and remove it after releasing 1522 // all the swap space 1523 if (swapFile->bmp->free_slots < swapFile->last_slot - swapFile->first_slot) 1524 return B_ERROR; 1525 1526 sSwapFileList.Remove(swapFile); 1527 sSwapFileCount--; 1528 locker.Unlock(); 1529 1530 mutex_lock(&sAvailSwapSpaceLock); 1531 sAvailSwapSpace -= (off_t)(swapFile->last_slot - swapFile->first_slot) 1532 * B_PAGE_SIZE; 1533 mutex_unlock(&sAvailSwapSpaceLock); 1534 1535 truncate(path, 0); 1536 close(swapFile->fd); 1537 radix_bitmap_destroy(swapFile->bmp); 1538 delete swapFile; 1539 1540 return B_OK; 1541 } 1542 1543 1544 void 1545 swap_init(void) 1546 { 1547 // create swap block cache 1548 sSwapBlockCache = create_object_cache("swapblock", sizeof(swap_block), 1549 sizeof(void*), NULL, NULL, NULL); 1550 if (sSwapBlockCache == NULL) 1551 panic("swap_init(): can't create object cache for swap blocks\n"); 1552 1553 status_t error = object_cache_set_minimum_reserve(sSwapBlockCache, 1554 MIN_SWAP_BLOCK_RESERVE); 1555 if (error != B_OK) { 1556 panic("swap_init(): object_cache_set_minimum_reserve() failed: %s", 1557 strerror(error)); 1558 } 1559 1560 // init swap hash table 1561 sSwapHashTable.Init(INITIAL_SWAP_HASH_SIZE); 1562 rw_lock_init(&sSwapHashLock, "swaphash"); 1563 1564 error = register_resource_resizer(swap_hash_resizer, NULL, 1565 SWAP_HASH_RESIZE_INTERVAL); 1566 if (error != B_OK) { 1567 panic("swap_init(): Failed to register swap hash resizer: %s", 1568 strerror(error)); 1569 } 1570 1571 // init swap file list 1572 mutex_init(&sSwapFileListLock, "swaplist"); 1573 sSwapFileAlloc = NULL; 1574 sSwapFileCount = 0; 1575 1576 // init available swap space 1577 mutex_init(&sAvailSwapSpaceLock, "avail swap space"); 1578 sAvailSwapSpace = 0; 1579 1580 add_debugger_command_etc("swap", &dump_swap_info, 1581 "Print infos about the swap usage", 1582 "\n" 1583 "Print infos about the swap usage.\n", 0); 1584 } 1585 1586 1587 void 1588 swap_init_post_modules() 1589 { 1590 // Never try to create a swap file on a read-only device - when booting 1591 // from CD, the write overlay is used. 1592 if (gReadOnlyBootDevice) 1593 return; 1594 1595 bool swapEnabled = true; 1596 bool swapAutomatic = true; 1597 off_t swapSize = 0; 1598 1599 dev_t swapDeviceID = -1; 1600 VolumeInfo selectedVolume = {}; 1601 1602 void* settings = load_driver_settings("virtual_memory"); 1603 1604 if (settings != NULL) { 1605 // We pass a lot of information on the swap device, this is mostly to 1606 // ensure that we are dealing with the same device that was configured. 1607 1608 // TODO: Some kind of BFS uuid would be great here :) 1609 const char* enabled = get_driver_parameter(settings, "vm", NULL, NULL); 1610 1611 if (enabled != NULL) { 1612 swapEnabled = get_driver_boolean_parameter(settings, "vm", 1613 true, false); 1614 swapAutomatic = get_driver_boolean_parameter(settings, "swap_auto", 1615 true, false); 1616 1617 if (swapEnabled && !swapAutomatic) { 1618 const char* size = get_driver_parameter(settings, "swap_size", 1619 NULL, NULL); 1620 const char* volume = get_driver_parameter(settings, 1621 "swap_volume_name", NULL, NULL); 1622 const char* device = get_driver_parameter(settings, 1623 "swap_volume_device", NULL, NULL); 1624 const char* filesystem = get_driver_parameter(settings, 1625 "swap_volume_filesystem", NULL, NULL); 1626 const char* capacity = get_driver_parameter(settings, 1627 "swap_volume_capacity", NULL, NULL); 1628 1629 if (size != NULL && device != NULL && volume != NULL 1630 && filesystem != NULL && capacity != NULL) { 1631 // User specified a size / volume that seems valid 1632 swapAutomatic = false; 1633 swapSize = atoll(size); 1634 strlcpy(selectedVolume.name, volume, 1635 sizeof(selectedVolume.name)); 1636 strlcpy(selectedVolume.device, device, 1637 sizeof(selectedVolume.device)); 1638 strlcpy(selectedVolume.filesystem, filesystem, 1639 sizeof(selectedVolume.filesystem)); 1640 selectedVolume.capacity = atoll(capacity); 1641 } else { 1642 // Something isn't right with swap config, go auto 1643 swapAutomatic = true; 1644 dprintf("%s: virtual_memory configuration is invalid, " 1645 "using automatic swap\n", __func__); 1646 } 1647 } 1648 } 1649 unload_driver_settings(settings); 1650 } 1651 1652 if (swapAutomatic) { 1653 swapSize = (off_t)vm_page_num_pages() * B_PAGE_SIZE; 1654 if (swapSize <= (1024 * 1024 * 1024)) { 1655 // Memory under 1GB? double the swap 1656 swapSize *= 2; 1657 } 1658 // Automatic swap defaults to the boot device 1659 swapDeviceID = gBootDevice; 1660 } 1661 1662 if (!swapEnabled || swapSize < B_PAGE_SIZE) { 1663 dprintf("%s: virtual_memory is disabled\n", __func__); 1664 truncate(kDefaultSwapPath, 0); 1665 return; 1666 } 1667 1668 if (!swapAutomatic && swapDeviceID < 0) { 1669 // If user-specified swap, and no swap device has been chosen yet... 1670 KDiskDeviceManager::CreateDefault(); 1671 KDiskDeviceManager* manager = KDiskDeviceManager::Default(); 1672 PartitionScorer visitor(selectedVolume); 1673 1674 KDiskDevice* device; 1675 int32 cookie = 0; 1676 while ((device = manager->NextDevice(&cookie)) != NULL) { 1677 if (device->IsReadOnlyMedia() || device->IsWriteOnce() 1678 || device->IsRemovable()) { 1679 continue; 1680 } 1681 device->VisitEachDescendant(&visitor); 1682 } 1683 1684 if (!visitor.fBestPartition) { 1685 dprintf("%s: Can't find configured swap partition '%s'\n", 1686 __func__, selectedVolume.name); 1687 } else { 1688 if (visitor.fBestPartition->IsMounted()) 1689 swapDeviceID = visitor.fBestPartition->VolumeID(); 1690 else { 1691 KPath devPath, mountPoint; 1692 visitor.fBestPartition->GetPath(&devPath); 1693 visitor.fBestPartition->GetMountPoint(&mountPoint); 1694 const char* mountPath = mountPoint.Path(); 1695 mkdir(mountPath, S_IRWXU | S_IRWXG | S_IRWXO); 1696 swapDeviceID = _kern_mount(mountPath, devPath.Path(), 1697 NULL, 0, NULL, 0); 1698 if (swapDeviceID < 0) { 1699 dprintf("%s: Can't mount configured swap partition '%s'\n", 1700 __func__, selectedVolume.name); 1701 } 1702 } 1703 } 1704 } 1705 1706 if (swapDeviceID < 0) 1707 swapDeviceID = gBootDevice; 1708 1709 // We now have a swapDeviceID which is used for the swap file 1710 1711 KPath path; 1712 struct fs_info info; 1713 _kern_read_fs_info(swapDeviceID, &info); 1714 if (swapDeviceID == gBootDevice) 1715 path = kDefaultSwapPath; 1716 else { 1717 vfs_entry_ref_to_path(info.dev, info.root, ".", true, path.LockBuffer(), 1718 path.BufferSize()); 1719 path.UnlockBuffer(); 1720 path.Append("swap"); 1721 } 1722 1723 const char* swapPath = path.Path(); 1724 1725 // Swap size limits prevent oversized swap files 1726 if (swapAutomatic) { 1727 off_t existingSwapSize = 0; 1728 struct stat existingSwapStat; 1729 if (stat(swapPath, &existingSwapStat) == 0) 1730 existingSwapSize = existingSwapStat.st_size; 1731 1732 off_t freeSpace = info.free_blocks * info.block_size + existingSwapSize; 1733 1734 // Adjust automatic swap to a maximum of 25% of the free space 1735 if (swapSize > (freeSpace / 4)) 1736 swapSize = (freeSpace / 4); 1737 } 1738 1739 // Create swap file 1740 int fd = open(swapPath, O_RDWR | O_CREAT | O_NOCACHE, S_IRUSR | S_IWUSR); 1741 if (fd < 0) { 1742 dprintf("%s: Can't open/create %s: %s\n", __func__, 1743 swapPath, strerror(errno)); 1744 return; 1745 } 1746 1747 struct stat stat; 1748 stat.st_size = swapSize; 1749 status_t error = _kern_write_stat(fd, NULL, false, &stat, 1750 sizeof(struct stat), B_STAT_SIZE | B_STAT_SIZE_INSECURE); 1751 if (error != B_OK) { 1752 dprintf("%s: Failed to resize %s to %" B_PRIdOFF " bytes: %s\n", 1753 __func__, swapPath, swapSize, strerror(error)); 1754 } 1755 1756 close(fd); 1757 1758 error = swap_file_add(swapPath); 1759 if (error != B_OK) { 1760 dprintf("%s: Failed to add swap file %s: %s\n", __func__, swapPath, 1761 strerror(error)); 1762 } 1763 } 1764 1765 1766 //! Used by page daemon to free swap space. 1767 bool 1768 swap_free_page_swap_space(vm_page* page) 1769 { 1770 VMAnonymousCache* cache = dynamic_cast<VMAnonymousCache*>(page->Cache()); 1771 if (cache == NULL) 1772 return false; 1773 1774 swap_addr_t slotIndex = cache->_SwapBlockGetAddress(page->cache_offset); 1775 if (slotIndex == SWAP_SLOT_NONE) 1776 return false; 1777 1778 swap_slot_dealloc(slotIndex, 1); 1779 cache->fAllocatedSwapSize -= B_PAGE_SIZE; 1780 cache->_SwapBlockFree(page->cache_offset, 1); 1781 1782 return true; 1783 } 1784 1785 1786 uint32 1787 swap_available_pages() 1788 { 1789 mutex_lock(&sAvailSwapSpaceLock); 1790 uint32 avail = sAvailSwapSpace >> PAGE_SHIFT; 1791 mutex_unlock(&sAvailSwapSpaceLock); 1792 1793 return avail; 1794 } 1795 1796 1797 uint32 1798 swap_total_swap_pages() 1799 { 1800 mutex_lock(&sSwapFileListLock); 1801 1802 uint32 totalSwapSlots = 0; 1803 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1804 swap_file* swapFile = it.Next();) { 1805 totalSwapSlots += swapFile->last_slot - swapFile->first_slot; 1806 } 1807 1808 mutex_unlock(&sSwapFileListLock); 1809 1810 return totalSwapSlots; 1811 } 1812 1813 1814 #endif // ENABLE_SWAP_SUPPORT 1815 1816 1817 void 1818 swap_get_info(system_info* info) 1819 { 1820 #if ENABLE_SWAP_SUPPORT 1821 MutexLocker locker(sSwapFileListLock); 1822 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1823 swap_file* swapFile = it.Next();) { 1824 info->max_swap_pages += swapFile->last_slot - swapFile->first_slot; 1825 info->free_swap_pages += swapFile->bmp->free_slots; 1826 } 1827 #else 1828 info->max_swap_pages = 0; 1829 info->free_swap_pages = 0; 1830 #endif 1831 } 1832 1833