1 /* 2 * Copyright 2008, Zhao Shuai, upczhsh@163.com. 3 * Copyright 2008-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 4 * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de. 5 * Distributed under the terms of the MIT License. 6 * 7 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 8 * Distributed under the terms of the NewOS License. 9 * 10 * Copyright 2011-2012 Haiku, Inc. All rights reserved. 11 * Distributed under the terms of the MIT License. 12 * 13 * Authors: 14 * Hamish Morrison, hamish@lavabit.com 15 * Alexander von Gluck IV, kallisti5@unixzen.com 16 */ 17 18 19 #include "VMAnonymousCache.h" 20 21 #include <errno.h> 22 #include <fcntl.h> 23 #include <stdlib.h> 24 #include <string.h> 25 #include <unistd.h> 26 27 #include <FindDirectory.h> 28 #include <KernelExport.h> 29 #include <NodeMonitor.h> 30 31 #include <arch_config.h> 32 #include <boot_device.h> 33 #include <disk_device_manager/KDiskDevice.h> 34 #include <disk_device_manager/KDiskDeviceManager.h> 35 #include <disk_device_manager/KDiskSystem.h> 36 #include <disk_device_manager/KPartitionVisitor.h> 37 #include <driver_settings.h> 38 #include <fs/fd.h> 39 #include <fs/KPath.h> 40 #include <fs_info.h> 41 #include <fs_interface.h> 42 #include <heap.h> 43 #include <kernel_daemon.h> 44 #include <slab/Slab.h> 45 #include <syscalls.h> 46 #include <system_info.h> 47 #include <thread.h> 48 #include <tracing.h> 49 #include <util/AutoLock.h> 50 #include <util/Bitmap.h> 51 #include <util/DoublyLinkedList.h> 52 #include <util/OpenHashTable.h> 53 #include <util/RadixBitmap.h> 54 #include <vfs.h> 55 #include <vm/vm.h> 56 #include <vm/vm_page.h> 57 #include <vm/vm_priv.h> 58 #include <vm/VMAddressSpace.h> 59 60 #include "IORequest.h" 61 #include "VMUtils.h" 62 63 64 #if ENABLE_SWAP_SUPPORT 65 66 //#define TRACE_VM_ANONYMOUS_CACHE 67 #ifdef TRACE_VM_ANONYMOUS_CACHE 68 # define TRACE(x...) dprintf(x) 69 #else 70 # define TRACE(x...) do { } while (false) 71 #endif 72 73 74 // number of free swap blocks the object cache shall minimally have 75 #define MIN_SWAP_BLOCK_RESERVE 4096 76 77 // interval the has resizer is triggered (in 0.1s) 78 #define SWAP_HASH_RESIZE_INTERVAL 5 79 80 #define INITIAL_SWAP_HASH_SIZE 1024 81 82 #define SWAP_SLOT_NONE RADIX_SLOT_NONE 83 84 #define SWAP_BLOCK_PAGES 32 85 #define SWAP_BLOCK_SHIFT 5 /* 1 << SWAP_BLOCK_SHIFT == SWAP_BLOCK_PAGES */ 86 #define SWAP_BLOCK_MASK (SWAP_BLOCK_PAGES - 1) 87 88 89 static const char* const kDefaultSwapPath = "/var/swap"; 90 91 struct swap_file : DoublyLinkedListLinkImpl<swap_file> { 92 int fd; 93 struct vnode* vnode; 94 void* cookie; 95 swap_addr_t first_slot; 96 swap_addr_t last_slot; 97 radix_bitmap* bmp; 98 }; 99 100 struct swap_hash_key { 101 VMAnonymousCache *cache; 102 off_t page_index; // page index in the cache 103 }; 104 105 // Each swap block contains swap address information for 106 // SWAP_BLOCK_PAGES continuous pages from the same cache 107 struct swap_block { 108 swap_block* hash_link; 109 swap_hash_key key; 110 uint32 used; 111 swap_addr_t swap_slots[SWAP_BLOCK_PAGES]; 112 }; 113 114 struct SwapHashTableDefinition { 115 typedef swap_hash_key KeyType; 116 typedef swap_block ValueType; 117 118 SwapHashTableDefinition() {} 119 120 size_t HashKey(const swap_hash_key& key) const 121 { 122 off_t blockIndex = key.page_index >> SWAP_BLOCK_SHIFT; 123 VMAnonymousCache* cache = key.cache; 124 return blockIndex ^ (size_t)(int*)cache; 125 } 126 127 size_t Hash(const swap_block* value) const 128 { 129 return HashKey(value->key); 130 } 131 132 bool Compare(const swap_hash_key& key, const swap_block* value) const 133 { 134 return (key.page_index & ~(off_t)SWAP_BLOCK_MASK) 135 == (value->key.page_index & ~(off_t)SWAP_BLOCK_MASK) 136 && key.cache == value->key.cache; 137 } 138 139 swap_block*& GetLink(swap_block* value) const 140 { 141 return value->hash_link; 142 } 143 }; 144 145 typedef BOpenHashTable<SwapHashTableDefinition> SwapHashTable; 146 typedef DoublyLinkedList<swap_file> SwapFileList; 147 148 static SwapHashTable sSwapHashTable; 149 static rw_lock sSwapHashLock; 150 151 static SwapFileList sSwapFileList; 152 static mutex sSwapFileListLock; 153 static swap_file* sSwapFileAlloc = NULL; // allocate from here 154 static uint32 sSwapFileCount = 0; 155 156 static off_t sAvailSwapSpace = 0; 157 static mutex sAvailSwapSpaceLock; 158 159 static object_cache* sSwapBlockCache; 160 161 162 #if SWAP_TRACING 163 namespace SwapTracing { 164 165 class SwapTraceEntry : public AbstractTraceEntry { 166 public: 167 SwapTraceEntry(VMAnonymousCache* cache) 168 : 169 fCache(cache) 170 { 171 } 172 173 protected: 174 VMAnonymousCache* fCache; 175 }; 176 177 178 class ReadPage : public SwapTraceEntry { 179 public: 180 ReadPage(VMAnonymousCache* cache, page_num_t pageIndex, 181 swap_addr_t swapSlotIndex) 182 : 183 SwapTraceEntry(cache), 184 fPageIndex(pageIndex), 185 fSwapSlotIndex(swapSlotIndex) 186 { 187 Initialized(); 188 } 189 190 virtual void AddDump(TraceOutput& out) 191 { 192 out.Print("swap read: cache %p, page index: %lu <- swap slot: %lu", 193 fCache, fPageIndex, fSwapSlotIndex); 194 } 195 196 private: 197 page_num_t fPageIndex; 198 swap_addr_t fSwapSlotIndex; 199 }; 200 201 202 class WritePage : public SwapTraceEntry { 203 public: 204 WritePage(VMAnonymousCache* cache, page_num_t pageIndex, 205 swap_addr_t swapSlotIndex) 206 : 207 SwapTraceEntry(cache), 208 fPageIndex(pageIndex), 209 fSwapSlotIndex(swapSlotIndex) 210 { 211 Initialized(); 212 } 213 214 virtual void AddDump(TraceOutput& out) 215 { 216 out.Print("swap write: cache %p, page index: %lu -> swap slot: %lu", 217 fCache, fPageIndex, fSwapSlotIndex); 218 } 219 220 private: 221 page_num_t fPageIndex; 222 swap_addr_t fSwapSlotIndex; 223 }; 224 225 } // namespace SwapTracing 226 227 # define T(x) new(std::nothrow) SwapTracing::x; 228 #else 229 # define T(x) ; 230 #endif 231 232 233 static int 234 dump_swap_info(int argc, char** argv) 235 { 236 swap_addr_t totalSwapPages = 0; 237 swap_addr_t freeSwapPages = 0; 238 239 kprintf("swap files:\n"); 240 241 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 242 swap_file* file = it.Next();) { 243 swap_addr_t total = file->last_slot - file->first_slot; 244 kprintf(" vnode: %p, pages: total: %" B_PRIu32 ", free: %" B_PRIu32 245 "\n", file->vnode, total, file->bmp->free_slots); 246 247 totalSwapPages += total; 248 freeSwapPages += file->bmp->free_slots; 249 } 250 251 kprintf("\n"); 252 kprintf("swap space in pages:\n"); 253 kprintf("total: %9" B_PRIu32 "\n", totalSwapPages); 254 kprintf("available: %9" B_PRIdOFF "\n", sAvailSwapSpace / B_PAGE_SIZE); 255 kprintf("reserved: %9" B_PRIdOFF "\n", 256 totalSwapPages - sAvailSwapSpace / B_PAGE_SIZE); 257 kprintf("used: %9" B_PRIu32 "\n", totalSwapPages - freeSwapPages); 258 kprintf("free: %9" B_PRIu32 "\n", freeSwapPages); 259 260 return 0; 261 } 262 263 264 static swap_addr_t 265 swap_slot_alloc(uint32 count) 266 { 267 mutex_lock(&sSwapFileListLock); 268 269 if (sSwapFileList.IsEmpty()) { 270 mutex_unlock(&sSwapFileListLock); 271 panic("swap_slot_alloc(): no swap file in the system\n"); 272 return SWAP_SLOT_NONE; 273 } 274 275 // since radix bitmap could not handle more than 32 pages, we return 276 // SWAP_SLOT_NONE, this forces Write() adjust allocation amount 277 if (count > BITMAP_RADIX) { 278 mutex_unlock(&sSwapFileListLock); 279 return SWAP_SLOT_NONE; 280 } 281 282 swap_addr_t j, addr = SWAP_SLOT_NONE; 283 for (j = 0; j < sSwapFileCount; j++) { 284 if (sSwapFileAlloc == NULL) 285 sSwapFileAlloc = sSwapFileList.First(); 286 287 addr = radix_bitmap_alloc(sSwapFileAlloc->bmp, count); 288 if (addr != SWAP_SLOT_NONE) { 289 addr += sSwapFileAlloc->first_slot; 290 break; 291 } 292 293 // this swap_file is full, find another 294 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 295 } 296 297 if (j == sSwapFileCount) { 298 mutex_unlock(&sSwapFileListLock); 299 panic("swap_slot_alloc: swap space exhausted!\n"); 300 return SWAP_SLOT_NONE; 301 } 302 303 // if this swap file has used more than 90% percent of its space 304 // switch to another 305 if (sSwapFileAlloc->bmp->free_slots 306 < (sSwapFileAlloc->last_slot - sSwapFileAlloc->first_slot) / 10) { 307 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 308 } 309 310 mutex_unlock(&sSwapFileListLock); 311 312 return addr; 313 } 314 315 316 static swap_file* 317 find_swap_file(swap_addr_t slotIndex) 318 { 319 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 320 swap_file* swapFile = it.Next();) { 321 if (slotIndex >= swapFile->first_slot 322 && slotIndex < swapFile->last_slot) { 323 return swapFile; 324 } 325 } 326 327 panic("find_swap_file(): can't find swap file for slot %" B_PRIu32 "\n", 328 slotIndex); 329 return NULL; 330 } 331 332 333 static void 334 swap_slot_dealloc(swap_addr_t slotIndex, uint32 count) 335 { 336 if (slotIndex == SWAP_SLOT_NONE) 337 return; 338 339 mutex_lock(&sSwapFileListLock); 340 swap_file* swapFile = find_swap_file(slotIndex); 341 slotIndex -= swapFile->first_slot; 342 radix_bitmap_dealloc(swapFile->bmp, slotIndex, count); 343 mutex_unlock(&sSwapFileListLock); 344 } 345 346 347 static off_t 348 swap_space_reserve(off_t amount) 349 { 350 mutex_lock(&sAvailSwapSpaceLock); 351 if (sAvailSwapSpace >= amount) 352 sAvailSwapSpace -= amount; 353 else { 354 amount = sAvailSwapSpace; 355 sAvailSwapSpace = 0; 356 } 357 mutex_unlock(&sAvailSwapSpaceLock); 358 359 return amount; 360 } 361 362 363 static void 364 swap_space_unreserve(off_t amount) 365 { 366 mutex_lock(&sAvailSwapSpaceLock); 367 sAvailSwapSpace += amount; 368 mutex_unlock(&sAvailSwapSpaceLock); 369 } 370 371 372 static void 373 swap_hash_resizer(void*, int) 374 { 375 WriteLocker locker(sSwapHashLock); 376 377 size_t size; 378 void* allocation; 379 380 do { 381 size = sSwapHashTable.ResizeNeeded(); 382 if (size == 0) 383 return; 384 385 locker.Unlock(); 386 387 allocation = malloc(size); 388 if (allocation == NULL) 389 return; 390 391 locker.Lock(); 392 393 } while (!sSwapHashTable.Resize(allocation, size)); 394 } 395 396 397 // #pragma mark - 398 399 400 class VMAnonymousCache::WriteCallback : public StackableAsyncIOCallback { 401 public: 402 WriteCallback(VMAnonymousCache* cache, AsyncIOCallback* callback) 403 : 404 StackableAsyncIOCallback(callback), 405 fCache(cache) 406 { 407 } 408 409 void SetTo(page_num_t pageIndex, swap_addr_t slotIndex, bool newSlot) 410 { 411 fPageIndex = pageIndex; 412 fSlotIndex = slotIndex; 413 fNewSlot = newSlot; 414 } 415 416 virtual void IOFinished(status_t status, bool partialTransfer, 417 generic_size_t bytesTransferred) 418 { 419 if (fNewSlot) { 420 if (status == B_OK) { 421 fCache->_SwapBlockBuild(fPageIndex, fSlotIndex, 1); 422 } else { 423 AutoLocker<VMCache> locker(fCache); 424 fCache->fAllocatedSwapSize -= B_PAGE_SIZE; 425 locker.Unlock(); 426 427 swap_slot_dealloc(fSlotIndex, 1); 428 } 429 } 430 431 fNextCallback->IOFinished(status, partialTransfer, bytesTransferred); 432 delete this; 433 } 434 435 private: 436 VMAnonymousCache* fCache; 437 page_num_t fPageIndex; 438 swap_addr_t fSlotIndex; 439 bool fNewSlot; 440 }; 441 442 443 // #pragma mark - 444 445 446 VMAnonymousCache::~VMAnonymousCache() 447 { 448 delete fNoSwapPages; 449 fNoSwapPages = NULL; 450 451 _FreeSwapPageRange(virtual_base, virtual_end, false); 452 swap_space_unreserve(fCommittedSwapSize); 453 if (committed_size > fCommittedSwapSize) 454 vm_unreserve_memory(committed_size - fCommittedSwapSize); 455 } 456 457 458 status_t 459 VMAnonymousCache::Init(bool canOvercommit, int32 numPrecommittedPages, 460 int32 numGuardPages, uint32 allocationFlags) 461 { 462 TRACE("%p->VMAnonymousCache::Init(canOvercommit = %s, " 463 "numPrecommittedPages = %" B_PRId32 ", numGuardPages = %" B_PRId32 464 ")\n", this, canOvercommit ? "yes" : "no", numPrecommittedPages, 465 numGuardPages); 466 467 status_t error = VMCache::Init(CACHE_TYPE_RAM, allocationFlags); 468 if (error != B_OK) 469 return error; 470 471 fCanOvercommit = canOvercommit; 472 fHasPrecommitted = false; 473 fPrecommittedPages = min_c(numPrecommittedPages, 255); 474 fNoSwapPages = NULL; 475 fGuardedSize = numGuardPages * B_PAGE_SIZE; 476 fCommittedSwapSize = 0; 477 fAllocatedSwapSize = 0; 478 479 return B_OK; 480 } 481 482 483 status_t 484 VMAnonymousCache::SetCanSwapPages(off_t base, size_t size, bool canSwap) 485 { 486 const page_num_t first = base >> PAGE_SHIFT; 487 const size_t count = PAGE_ALIGN(size + ((first << PAGE_SHIFT) - base)) >> PAGE_SHIFT; 488 489 if (count == 0) 490 return B_OK; 491 if (canSwap && fNoSwapPages == NULL) 492 return B_OK; 493 494 if (fNoSwapPages == NULL) 495 fNoSwapPages = new(std::nothrow) Bitmap(0); 496 if (fNoSwapPages == NULL) 497 return B_NO_MEMORY; 498 499 const page_num_t pageCount = PAGE_ALIGN(virtual_end) >> PAGE_SHIFT; 500 501 if (fNoSwapPages->Resize(pageCount) != B_OK) 502 return B_NO_MEMORY; 503 504 for (size_t i = 0; i < count; i++) { 505 if (canSwap) 506 fNoSwapPages->Clear(first + i); 507 else 508 fNoSwapPages->Set(first + i); 509 } 510 511 if (fNoSwapPages->GetHighestSet() < 0) { 512 delete fNoSwapPages; 513 fNoSwapPages = NULL; 514 } 515 return B_OK; 516 } 517 518 519 void 520 VMAnonymousCache::_FreeSwapPageRange(off_t fromOffset, off_t toOffset, 521 bool skipBusyPages) 522 { 523 swap_block* swapBlock = NULL; 524 off_t toIndex = toOffset >> PAGE_SHIFT; 525 for (off_t pageIndex = fromOffset >> PAGE_SHIFT; 526 pageIndex < toIndex && fAllocatedSwapSize > 0; pageIndex++) { 527 528 WriteLocker locker(sSwapHashLock); 529 530 // Get the swap slot index for the page. 531 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 532 if (swapBlock == NULL || blockIndex == 0) { 533 swap_hash_key key = { this, pageIndex }; 534 swapBlock = sSwapHashTable.Lookup(key); 535 536 if (swapBlock == NULL) { 537 pageIndex = ROUNDUP(pageIndex + 1, SWAP_BLOCK_PAGES) - 1; 538 continue; 539 } 540 } 541 542 swap_addr_t slotIndex = swapBlock->swap_slots[blockIndex]; 543 if (slotIndex == SWAP_SLOT_NONE) 544 continue; 545 546 if (skipBusyPages) { 547 vm_page* page = LookupPage(pageIndex * B_PAGE_SIZE); 548 if (page != NULL && page->busy) { 549 // TODO: We skip (i.e. leak) swap space of busy pages, since 550 // there could be I/O going on (paging in/out). Waiting is 551 // not an option as 1. unlocking the cache means that new 552 // swap pages could be added in a range we've already 553 // cleared (since the cache still has the old size) and 2. 554 // we'd risk a deadlock in case we come from the file cache 555 // and the FS holds the node's write-lock. We should mark 556 // the page invalid and let the one responsible clean up. 557 // There's just no such mechanism yet. 558 continue; 559 } 560 } 561 562 swap_slot_dealloc(slotIndex, 1); 563 fAllocatedSwapSize -= B_PAGE_SIZE; 564 565 swapBlock->swap_slots[blockIndex] = SWAP_SLOT_NONE; 566 if (--swapBlock->used == 0) { 567 // All swap pages have been freed -- we can discard the swap block. 568 sSwapHashTable.RemoveUnchecked(swapBlock); 569 object_cache_free(sSwapBlockCache, swapBlock, 570 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 571 572 // There are no swap pages for possibly remaining pages, skip to the 573 // next block. 574 pageIndex = ROUNDUP(pageIndex + 1, SWAP_BLOCK_PAGES) - 1; 575 swapBlock = NULL; 576 } 577 } 578 } 579 580 581 status_t 582 VMAnonymousCache::Resize(off_t newSize, int priority) 583 { 584 if (fNoSwapPages != NULL) { 585 if (fNoSwapPages->Resize(PAGE_ALIGN(newSize) >> PAGE_SHIFT) != B_OK) 586 return B_NO_MEMORY; 587 } 588 589 _FreeSwapPageRange(newSize + B_PAGE_SIZE - 1, 590 virtual_end + B_PAGE_SIZE - 1); 591 return VMCache::Resize(newSize, priority); 592 } 593 594 595 status_t 596 VMAnonymousCache::Rebase(off_t newBase, int priority) 597 { 598 if (fNoSwapPages != NULL) { 599 const ssize_t sizeDifference = (newBase >> PAGE_SHIFT) - (virtual_base >> PAGE_SHIFT); 600 fNoSwapPages->Shift(sizeDifference); 601 } 602 603 _FreeSwapPageRange(virtual_base, newBase); 604 return VMCache::Rebase(newBase, priority); 605 } 606 607 608 status_t 609 VMAnonymousCache::Discard(off_t offset, off_t size) 610 { 611 _FreeSwapPageRange(offset, offset + size); 612 return VMCache::Discard(offset, size); 613 } 614 615 616 /*! Moves the swap pages for the given range from the source cache into this 617 cache. Both caches must be locked. 618 */ 619 status_t 620 VMAnonymousCache::Adopt(VMCache* _source, off_t offset, off_t size, 621 off_t newOffset) 622 { 623 VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source); 624 if (source == NULL) { 625 panic("VMAnonymousCache::Adopt(): adopt from incompatible cache %p " 626 "requested", _source); 627 return B_ERROR; 628 } 629 630 off_t pageIndex = newOffset >> PAGE_SHIFT; 631 off_t sourcePageIndex = offset >> PAGE_SHIFT; 632 off_t sourceEndPageIndex = (offset + size + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 633 swap_block* swapBlock = NULL; 634 635 WriteLocker locker(sSwapHashLock); 636 637 while (sourcePageIndex < sourceEndPageIndex 638 && source->fAllocatedSwapSize > 0) { 639 swap_addr_t left 640 = SWAP_BLOCK_PAGES - (sourcePageIndex & SWAP_BLOCK_MASK); 641 642 swap_hash_key sourceKey = { source, sourcePageIndex }; 643 swap_block* sourceSwapBlock = sSwapHashTable.Lookup(sourceKey); 644 if (sourceSwapBlock == NULL || sourceSwapBlock->used == 0) { 645 sourcePageIndex += left; 646 pageIndex += left; 647 swapBlock = NULL; 648 continue; 649 } 650 651 for (; left > 0 && sourceSwapBlock->used > 0; 652 left--, sourcePageIndex++, pageIndex++) { 653 654 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 655 if (swapBlock == NULL || blockIndex == 0) { 656 swap_hash_key key = { this, pageIndex }; 657 swapBlock = sSwapHashTable.Lookup(key); 658 659 if (swapBlock == NULL) { 660 swapBlock = (swap_block*)object_cache_alloc(sSwapBlockCache, 661 CACHE_DONT_WAIT_FOR_MEMORY 662 | CACHE_DONT_LOCK_KERNEL_SPACE); 663 if (swapBlock == NULL) 664 return B_NO_MEMORY; 665 666 swapBlock->key.cache = this; 667 swapBlock->key.page_index 668 = pageIndex & ~(off_t)SWAP_BLOCK_MASK; 669 swapBlock->used = 0; 670 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) 671 swapBlock->swap_slots[i] = SWAP_SLOT_NONE; 672 673 sSwapHashTable.InsertUnchecked(swapBlock); 674 } 675 } 676 677 swap_addr_t sourceBlockIndex = sourcePageIndex & SWAP_BLOCK_MASK; 678 swap_addr_t slotIndex 679 = sourceSwapBlock->swap_slots[sourceBlockIndex]; 680 if (slotIndex == SWAP_SLOT_NONE) 681 continue; 682 683 ASSERT(swapBlock->swap_slots[blockIndex] == SWAP_SLOT_NONE); 684 685 swapBlock->swap_slots[blockIndex] = slotIndex; 686 swapBlock->used++; 687 fAllocatedSwapSize += B_PAGE_SIZE; 688 689 sourceSwapBlock->swap_slots[sourceBlockIndex] = SWAP_SLOT_NONE; 690 sourceSwapBlock->used--; 691 source->fAllocatedSwapSize -= B_PAGE_SIZE; 692 693 TRACE("adopted slot %#" B_PRIx32 " from %p at page %" B_PRIdOFF 694 " to %p at page %" B_PRIdOFF "\n", slotIndex, source, 695 sourcePageIndex, this, pageIndex); 696 } 697 698 if (left > 0) { 699 sourcePageIndex += left; 700 pageIndex += left; 701 swapBlock = NULL; 702 } 703 704 if (sourceSwapBlock->used == 0) { 705 // All swap pages have been adopted, we can discard the swap block. 706 sSwapHashTable.RemoveUnchecked(sourceSwapBlock); 707 object_cache_free(sSwapBlockCache, sourceSwapBlock, 708 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 709 } 710 } 711 712 locker.Unlock(); 713 714 return VMCache::Adopt(source, offset, size, newOffset); 715 } 716 717 718 status_t 719 VMAnonymousCache::Commit(off_t size, int priority) 720 { 721 TRACE("%p->VMAnonymousCache::Commit(%" B_PRIdOFF ")\n", this, size); 722 723 // If we can overcommit, we don't commit here, but in Fault(). We always 724 // unreserve memory, if we're asked to shrink our commitment, though. 725 if (fCanOvercommit && size > committed_size) { 726 if (fHasPrecommitted) 727 return B_OK; 728 729 // pre-commit some pages to make a later failure less probable 730 fHasPrecommitted = true; 731 uint32 precommitted = fPrecommittedPages * B_PAGE_SIZE; 732 if (size > precommitted) 733 size = precommitted; 734 } 735 736 return _Commit(size, priority); 737 } 738 739 740 bool 741 VMAnonymousCache::HasPage(off_t offset) 742 { 743 if (_SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE) 744 return true; 745 746 return false; 747 } 748 749 750 bool 751 VMAnonymousCache::DebugHasPage(off_t offset) 752 { 753 off_t pageIndex = offset >> PAGE_SHIFT; 754 swap_hash_key key = { this, pageIndex }; 755 swap_block* swap = sSwapHashTable.Lookup(key); 756 if (swap == NULL) 757 return false; 758 759 return swap->swap_slots[pageIndex & SWAP_BLOCK_MASK] != SWAP_SLOT_NONE; 760 } 761 762 763 status_t 764 VMAnonymousCache::Read(off_t offset, const generic_io_vec* vecs, size_t count, 765 uint32 flags, generic_size_t* _numBytes) 766 { 767 off_t pageIndex = offset >> PAGE_SHIFT; 768 769 for (uint32 i = 0, j = 0; i < count; i = j) { 770 swap_addr_t startSlotIndex = _SwapBlockGetAddress(pageIndex + i); 771 for (j = i + 1; j < count; j++) { 772 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + j); 773 if (slotIndex != startSlotIndex + j - i) 774 break; 775 } 776 777 T(ReadPage(this, pageIndex, startSlotIndex)); 778 // TODO: Assumes that only one page is read. 779 780 swap_file* swapFile = find_swap_file(startSlotIndex); 781 782 off_t pos = (off_t)(startSlotIndex - swapFile->first_slot) 783 * B_PAGE_SIZE; 784 785 status_t status = vfs_read_pages(swapFile->vnode, swapFile->cookie, pos, 786 vecs + i, j - i, flags, _numBytes); 787 if (status != B_OK) 788 return status; 789 } 790 791 return B_OK; 792 } 793 794 795 status_t 796 VMAnonymousCache::Write(off_t offset, const generic_io_vec* vecs, size_t count, 797 uint32 flags, generic_size_t* _numBytes) 798 { 799 off_t pageIndex = offset >> PAGE_SHIFT; 800 801 AutoLocker<VMCache> locker(this); 802 803 page_num_t totalPages = 0; 804 for (uint32 i = 0; i < count; i++) { 805 page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 806 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + totalPages); 807 if (slotIndex != SWAP_SLOT_NONE) { 808 swap_slot_dealloc(slotIndex, pageCount); 809 _SwapBlockFree(pageIndex + totalPages, pageCount); 810 fAllocatedSwapSize -= pageCount * B_PAGE_SIZE; 811 } 812 813 totalPages += pageCount; 814 } 815 816 off_t totalSize = totalPages * B_PAGE_SIZE; 817 if (fAllocatedSwapSize + totalSize > fCommittedSwapSize) 818 return B_ERROR; 819 820 fAllocatedSwapSize += totalSize; 821 locker.Unlock(); 822 823 page_num_t pagesLeft = totalPages; 824 totalPages = 0; 825 826 for (uint32 i = 0; i < count; i++) { 827 page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 828 829 generic_addr_t vectorBase = vecs[i].base; 830 generic_size_t vectorLength = vecs[i].length; 831 page_num_t n = pageCount; 832 833 for (page_num_t j = 0; j < pageCount; j += n) { 834 swap_addr_t slotIndex; 835 // try to allocate n slots, if fail, try to allocate n/2 836 while ((slotIndex = swap_slot_alloc(n)) == SWAP_SLOT_NONE && n >= 2) 837 n >>= 1; 838 839 if (slotIndex == SWAP_SLOT_NONE) 840 panic("VMAnonymousCache::Write(): can't allocate swap space\n"); 841 842 T(WritePage(this, pageIndex, slotIndex)); 843 // TODO: Assumes that only one page is written. 844 845 swap_file* swapFile = find_swap_file(slotIndex); 846 847 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 848 849 generic_size_t length = (phys_addr_t)n * B_PAGE_SIZE; 850 generic_io_vec vector[1]; 851 vector->base = vectorBase; 852 vector->length = length; 853 854 status_t status = vfs_write_pages(swapFile->vnode, swapFile->cookie, 855 pos, vector, 1, flags, &length); 856 if (status != B_OK) { 857 locker.Lock(); 858 fAllocatedSwapSize -= (off_t)pagesLeft * B_PAGE_SIZE; 859 locker.Unlock(); 860 861 swap_slot_dealloc(slotIndex, n); 862 return status; 863 } 864 865 _SwapBlockBuild(pageIndex + totalPages, slotIndex, n); 866 pagesLeft -= n; 867 868 if (n != pageCount) { 869 vectorBase = vectorBase + n * B_PAGE_SIZE; 870 vectorLength -= n * B_PAGE_SIZE; 871 } 872 } 873 874 totalPages += pageCount; 875 } 876 877 ASSERT(pagesLeft == 0); 878 return B_OK; 879 } 880 881 882 status_t 883 VMAnonymousCache::WriteAsync(off_t offset, const generic_io_vec* vecs, 884 size_t count, generic_size_t numBytes, uint32 flags, 885 AsyncIOCallback* _callback) 886 { 887 // TODO: Currently this method is only used for single pages. Either make 888 // more flexible use of it or change the interface! 889 // This implementation relies on the current usage! 890 ASSERT(count == 1); 891 ASSERT(numBytes <= B_PAGE_SIZE); 892 893 page_num_t pageIndex = offset >> PAGE_SHIFT; 894 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex); 895 bool newSlot = slotIndex == SWAP_SLOT_NONE; 896 897 // If the page doesn't have any swap space yet, allocate it. 898 if (newSlot) { 899 AutoLocker<VMCache> locker(this); 900 if (fAllocatedSwapSize + B_PAGE_SIZE > fCommittedSwapSize) { 901 _callback->IOFinished(B_ERROR, true, 0); 902 return B_ERROR; 903 } 904 905 fAllocatedSwapSize += B_PAGE_SIZE; 906 907 slotIndex = swap_slot_alloc(1); 908 } 909 910 // create our callback 911 WriteCallback* callback = (flags & B_VIP_IO_REQUEST) != 0 912 ? new(malloc_flags(HEAP_PRIORITY_VIP)) WriteCallback(this, _callback) 913 : new(std::nothrow) WriteCallback(this, _callback); 914 if (callback == NULL) { 915 if (newSlot) { 916 AutoLocker<VMCache> locker(this); 917 fAllocatedSwapSize -= B_PAGE_SIZE; 918 locker.Unlock(); 919 920 swap_slot_dealloc(slotIndex, 1); 921 } 922 _callback->IOFinished(B_NO_MEMORY, true, 0); 923 return B_NO_MEMORY; 924 } 925 // TODO: If the page already had swap space assigned, we don't need an own 926 // callback. 927 928 callback->SetTo(pageIndex, slotIndex, newSlot); 929 930 T(WritePage(this, pageIndex, slotIndex)); 931 932 // write the page asynchrounously 933 swap_file* swapFile = find_swap_file(slotIndex); 934 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 935 936 return vfs_asynchronous_write_pages(swapFile->vnode, swapFile->cookie, pos, 937 vecs, 1, numBytes, flags, callback); 938 } 939 940 941 bool 942 VMAnonymousCache::CanWritePage(off_t offset) 943 { 944 const off_t pageIndex = offset >> PAGE_SHIFT; 945 if (fNoSwapPages != NULL && fNoSwapPages->Get(pageIndex)) 946 return false; 947 948 // We can write the page, if we have not used all of our committed swap 949 // space or the page already has a swap slot assigned. 950 return fAllocatedSwapSize < fCommittedSwapSize 951 || _SwapBlockGetAddress(pageIndex) != SWAP_SLOT_NONE; 952 } 953 954 955 int32 956 VMAnonymousCache::MaxPagesPerAsyncWrite() const 957 { 958 return 1; 959 } 960 961 962 status_t 963 VMAnonymousCache::Fault(struct VMAddressSpace* aspace, off_t offset) 964 { 965 if (fGuardedSize > 0) { 966 uint32 guardOffset; 967 968 #ifdef STACK_GROWS_DOWNWARDS 969 guardOffset = 0; 970 #elif defined(STACK_GROWS_UPWARDS) 971 guardOffset = virtual_size - fGuardedSize; 972 #else 973 # error Stack direction has not been defined in arch_config.h 974 #endif 975 // report stack fault, guard page hit! 976 if (offset >= guardOffset && offset < guardOffset + fGuardedSize) { 977 TRACE(("stack overflow!\n")); 978 return B_BAD_ADDRESS; 979 } 980 } 981 982 if (fCanOvercommit && LookupPage(offset) == NULL && !HasPage(offset)) { 983 if (fPrecommittedPages == 0) { 984 // never commit more than needed 985 if (committed_size / B_PAGE_SIZE > page_count) 986 return B_BAD_HANDLER; 987 988 // try to commit additional swap space/memory 989 if (swap_space_reserve(B_PAGE_SIZE) == B_PAGE_SIZE) { 990 fCommittedSwapSize += B_PAGE_SIZE; 991 } else { 992 int priority = aspace == VMAddressSpace::Kernel() 993 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 994 if (vm_try_reserve_memory(B_PAGE_SIZE, priority, 0) != B_OK) { 995 dprintf("%p->VMAnonymousCache::Fault(): Failed to reserve " 996 "%d bytes of RAM.\n", this, (int)B_PAGE_SIZE); 997 return B_NO_MEMORY; 998 } 999 } 1000 1001 committed_size += B_PAGE_SIZE; 1002 } else 1003 fPrecommittedPages--; 1004 } 1005 1006 // This will cause vm_soft_fault() to handle the fault 1007 return B_BAD_HANDLER; 1008 } 1009 1010 1011 void 1012 VMAnonymousCache::Merge(VMCache* _source) 1013 { 1014 VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source); 1015 if (source == NULL) { 1016 panic("VMAnonymousCache::Merge(): merge with incompatible cache " 1017 "%p requested", _source); 1018 return; 1019 } 1020 1021 // take over the source' committed size 1022 fCommittedSwapSize += source->fCommittedSwapSize; 1023 source->fCommittedSwapSize = 0; 1024 committed_size += source->committed_size; 1025 source->committed_size = 0; 1026 1027 off_t actualSize = virtual_end - virtual_base; 1028 if (committed_size > actualSize) 1029 _Commit(actualSize, VM_PRIORITY_USER); 1030 1031 // Move all not shadowed swap pages from the source to the consumer cache. 1032 // Also remove all source pages that are shadowed by consumer swap pages. 1033 _MergeSwapPages(source); 1034 1035 // Move all not shadowed pages from the source to the consumer cache. 1036 if (source->page_count < page_count) 1037 _MergePagesSmallerSource(source); 1038 else 1039 _MergePagesSmallerConsumer(source); 1040 } 1041 1042 1043 void 1044 VMAnonymousCache::DeleteObject() 1045 { 1046 object_cache_delete(gAnonymousCacheObjectCache, this); 1047 } 1048 1049 1050 void 1051 VMAnonymousCache::_SwapBlockBuild(off_t startPageIndex, 1052 swap_addr_t startSlotIndex, uint32 count) 1053 { 1054 WriteLocker locker(sSwapHashLock); 1055 1056 uint32 left = count; 1057 for (uint32 i = 0, j = 0; i < count; i += j) { 1058 off_t pageIndex = startPageIndex + i; 1059 swap_addr_t slotIndex = startSlotIndex + i; 1060 1061 swap_hash_key key = { this, pageIndex }; 1062 1063 swap_block* swap = sSwapHashTable.Lookup(key); 1064 while (swap == NULL) { 1065 swap = (swap_block*)object_cache_alloc(sSwapBlockCache, 1066 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1067 if (swap == NULL) { 1068 // Wait a short time until memory is available again. 1069 locker.Unlock(); 1070 snooze(10000); 1071 locker.Lock(); 1072 swap = sSwapHashTable.Lookup(key); 1073 continue; 1074 } 1075 1076 swap->key.cache = this; 1077 swap->key.page_index = pageIndex & ~(off_t)SWAP_BLOCK_MASK; 1078 swap->used = 0; 1079 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) 1080 swap->swap_slots[i] = SWAP_SLOT_NONE; 1081 1082 sSwapHashTable.InsertUnchecked(swap); 1083 } 1084 1085 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 1086 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 1087 swap->swap_slots[blockIndex++] = slotIndex + j; 1088 left--; 1089 } 1090 1091 swap->used += j; 1092 } 1093 } 1094 1095 1096 void 1097 VMAnonymousCache::_SwapBlockFree(off_t startPageIndex, uint32 count) 1098 { 1099 WriteLocker locker(sSwapHashLock); 1100 1101 uint32 left = count; 1102 for (uint32 i = 0, j = 0; i < count; i += j) { 1103 off_t pageIndex = startPageIndex + i; 1104 swap_hash_key key = { this, pageIndex }; 1105 swap_block* swap = sSwapHashTable.Lookup(key); 1106 1107 ASSERT(swap != NULL); 1108 1109 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 1110 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 1111 swap->swap_slots[blockIndex++] = SWAP_SLOT_NONE; 1112 left--; 1113 } 1114 1115 swap->used -= j; 1116 if (swap->used == 0) { 1117 sSwapHashTable.RemoveUnchecked(swap); 1118 object_cache_free(sSwapBlockCache, swap, 1119 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1120 } 1121 } 1122 } 1123 1124 1125 swap_addr_t 1126 VMAnonymousCache::_SwapBlockGetAddress(off_t pageIndex) 1127 { 1128 ReadLocker locker(sSwapHashLock); 1129 1130 swap_hash_key key = { this, pageIndex }; 1131 swap_block* swap = sSwapHashTable.Lookup(key); 1132 swap_addr_t slotIndex = SWAP_SLOT_NONE; 1133 1134 if (swap != NULL) { 1135 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 1136 slotIndex = swap->swap_slots[blockIndex]; 1137 } 1138 1139 return slotIndex; 1140 } 1141 1142 1143 status_t 1144 VMAnonymousCache::_Commit(off_t size, int priority) 1145 { 1146 TRACE("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "), already committed: " 1147 "%" B_PRIdOFF " (%" B_PRIdOFF " swap)\n", this, size, committed_size, 1148 fCommittedSwapSize); 1149 1150 // Basic strategy: reserve swap space first, only when running out of swap 1151 // space, reserve real memory. 1152 1153 off_t committedMemory = committed_size - fCommittedSwapSize; 1154 1155 // Regardless of whether we're asked to grow or shrink the commitment, 1156 // we always try to reserve as much as possible of the final commitment 1157 // in the swap space. 1158 if (size > fCommittedSwapSize) { 1159 fCommittedSwapSize += swap_space_reserve(size - fCommittedSwapSize); 1160 committed_size = fCommittedSwapSize + committedMemory; 1161 if (size > fCommittedSwapSize) { 1162 TRACE("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "), reserved " 1163 "only %" B_PRIdOFF " swap\n", this, size, fCommittedSwapSize); 1164 } 1165 } 1166 1167 if (committed_size == size) 1168 return B_OK; 1169 1170 if (committed_size > size) { 1171 // The commitment shrinks -- unreserve real memory first. 1172 off_t toUnreserve = committed_size - size; 1173 if (committedMemory > 0) { 1174 off_t unreserved = min_c(toUnreserve, committedMemory); 1175 vm_unreserve_memory(unreserved); 1176 committedMemory -= unreserved; 1177 committed_size -= unreserved; 1178 toUnreserve -= unreserved; 1179 } 1180 1181 // Unreserve swap space. 1182 if (toUnreserve > 0) { 1183 swap_space_unreserve(toUnreserve); 1184 fCommittedSwapSize -= toUnreserve; 1185 committed_size -= toUnreserve; 1186 } 1187 1188 return B_OK; 1189 } 1190 1191 // The commitment grows -- we have already tried to reserve swap space at 1192 // the start of the method, so we try to reserve real memory, now. 1193 1194 off_t toReserve = size - committed_size; 1195 if (vm_try_reserve_memory(toReserve, priority, 1000000) != B_OK) { 1196 dprintf("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "): Failed to " 1197 "reserve %" B_PRIdOFF " bytes of RAM\n", this, size, toReserve); 1198 return B_NO_MEMORY; 1199 } 1200 1201 committed_size = size; 1202 return B_OK; 1203 } 1204 1205 1206 void 1207 VMAnonymousCache::_MergePagesSmallerSource(VMAnonymousCache* source) 1208 { 1209 // The source cache has less pages than the consumer (this cache), so we 1210 // iterate through the source's pages and move the ones that are not 1211 // shadowed up to the consumer. 1212 1213 for (VMCachePagesTree::Iterator it = source->pages.GetIterator(); 1214 vm_page* page = it.Next();) { 1215 // Note: Removing the current node while iterating through a 1216 // IteratableSplayTree is safe. 1217 vm_page* consumerPage = LookupPage( 1218 (off_t)page->cache_offset << PAGE_SHIFT); 1219 if (consumerPage == NULL) { 1220 // the page is not yet in the consumer cache - move it upwards 1221 ASSERT_PRINT(!page->busy, "page: %p", page); 1222 MovePage(page); 1223 } 1224 } 1225 } 1226 1227 1228 void 1229 VMAnonymousCache::_MergePagesSmallerConsumer(VMAnonymousCache* source) 1230 { 1231 // The consumer (this cache) has less pages than the source, so we move the 1232 // consumer's pages to the source (freeing shadowed ones) and finally just 1233 // all pages of the source back to the consumer. 1234 1235 for (VMCachePagesTree::Iterator it = pages.GetIterator(); 1236 vm_page* page = it.Next();) { 1237 // If a source page is in the way, remove and free it. 1238 vm_page* sourcePage = source->LookupPage( 1239 (off_t)page->cache_offset << PAGE_SHIFT); 1240 if (sourcePage != NULL) { 1241 DEBUG_PAGE_ACCESS_START(sourcePage); 1242 ASSERT_PRINT(!sourcePage->busy, "page: %p", sourcePage); 1243 ASSERT_PRINT(sourcePage->WiredCount() == 0 1244 && sourcePage->mappings.IsEmpty(), 1245 "sourcePage: %p, page: %p", sourcePage, page); 1246 source->RemovePage(sourcePage); 1247 vm_page_free(source, sourcePage); 1248 } 1249 1250 // Note: Removing the current node while iterating through a 1251 // IteratableSplayTree is safe. 1252 source->MovePage(page); 1253 } 1254 1255 MoveAllPages(source); 1256 } 1257 1258 1259 void 1260 VMAnonymousCache::_MergeSwapPages(VMAnonymousCache* source) 1261 { 1262 // If neither source nor consumer have swap pages, we don't have to do 1263 // anything. 1264 if (source->fAllocatedSwapSize == 0 && fAllocatedSwapSize == 0) 1265 return; 1266 1267 for (off_t offset = source->virtual_base 1268 & ~(off_t)(B_PAGE_SIZE * SWAP_BLOCK_PAGES - 1); 1269 offset < source->virtual_end; 1270 offset += B_PAGE_SIZE * SWAP_BLOCK_PAGES) { 1271 1272 WriteLocker locker(sSwapHashLock); 1273 1274 off_t swapBlockPageIndex = offset >> PAGE_SHIFT; 1275 swap_hash_key key = { source, swapBlockPageIndex }; 1276 swap_block* sourceSwapBlock = sSwapHashTable.Lookup(key); 1277 1278 // remove the source swap block -- we will either take over the swap 1279 // space (and the block) or free it 1280 if (sourceSwapBlock != NULL) 1281 sSwapHashTable.RemoveUnchecked(sourceSwapBlock); 1282 1283 key.cache = this; 1284 swap_block* swapBlock = sSwapHashTable.Lookup(key); 1285 1286 locker.Unlock(); 1287 1288 // remove all source pages that are shadowed by consumer swap pages 1289 if (swapBlock != NULL) { 1290 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1291 if (swapBlock->swap_slots[i] != SWAP_SLOT_NONE) { 1292 vm_page* page = source->LookupPage( 1293 (off_t)(swapBlockPageIndex + i) << PAGE_SHIFT); 1294 if (page != NULL) { 1295 DEBUG_PAGE_ACCESS_START(page); 1296 ASSERT_PRINT(!page->busy, "page: %p", page); 1297 source->RemovePage(page); 1298 vm_page_free(source, page); 1299 } 1300 } 1301 } 1302 } 1303 1304 if (sourceSwapBlock == NULL) 1305 continue; 1306 1307 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1308 off_t pageIndex = swapBlockPageIndex + i; 1309 swap_addr_t sourceSlotIndex = sourceSwapBlock->swap_slots[i]; 1310 1311 if (sourceSlotIndex == SWAP_SLOT_NONE) 1312 continue; 1313 1314 if ((swapBlock != NULL 1315 && swapBlock->swap_slots[i] != SWAP_SLOT_NONE) 1316 || LookupPage((off_t)pageIndex << PAGE_SHIFT) != NULL) { 1317 // The consumer already has a page or a swapped out page 1318 // at this index. So we can free the source swap space. 1319 swap_slot_dealloc(sourceSlotIndex, 1); 1320 sourceSwapBlock->swap_slots[i] = SWAP_SLOT_NONE; 1321 sourceSwapBlock->used--; 1322 } 1323 1324 // We've either freed the source swap page or are going to move it 1325 // to the consumer. At any rate, the source cache doesn't own it 1326 // anymore. 1327 source->fAllocatedSwapSize -= B_PAGE_SIZE; 1328 } 1329 1330 // All source swap pages that have not been freed yet are taken over by 1331 // the consumer. 1332 fAllocatedSwapSize += B_PAGE_SIZE * (off_t)sourceSwapBlock->used; 1333 1334 if (sourceSwapBlock->used == 0) { 1335 // All swap pages have been freed -- we can discard the source swap 1336 // block. 1337 object_cache_free(sSwapBlockCache, sourceSwapBlock, 1338 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1339 } else if (swapBlock == NULL) { 1340 // We need to take over some of the source's swap pages and there's 1341 // no swap block in the consumer cache. Just take over the source 1342 // swap block. 1343 sourceSwapBlock->key.cache = this; 1344 locker.Lock(); 1345 sSwapHashTable.InsertUnchecked(sourceSwapBlock); 1346 locker.Unlock(); 1347 } else { 1348 // We need to take over some of the source's swap pages and there's 1349 // already a swap block in the consumer cache. Copy the respective 1350 // swap addresses and discard the source swap block. 1351 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1352 if (sourceSwapBlock->swap_slots[i] != SWAP_SLOT_NONE) 1353 swapBlock->swap_slots[i] = sourceSwapBlock->swap_slots[i]; 1354 } 1355 1356 object_cache_free(sSwapBlockCache, sourceSwapBlock, 1357 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1358 } 1359 } 1360 } 1361 1362 1363 // #pragma mark - 1364 1365 1366 // TODO: This can be removed if we get BFS uuid's 1367 struct VolumeInfo { 1368 char name[B_FILE_NAME_LENGTH]; 1369 char device[B_FILE_NAME_LENGTH]; 1370 char filesystem[B_OS_NAME_LENGTH]; 1371 off_t capacity; 1372 }; 1373 1374 1375 class PartitionScorer : public KPartitionVisitor { 1376 public: 1377 PartitionScorer(VolumeInfo& volumeInfo) 1378 : 1379 fBestPartition(NULL), 1380 fBestScore(-1), 1381 fVolumeInfo(volumeInfo) 1382 { 1383 } 1384 1385 virtual bool VisitPre(KPartition* partition) 1386 { 1387 if (!partition->ContainsFileSystem()) 1388 return false; 1389 1390 KPath path; 1391 partition->GetPath(&path); 1392 1393 int score = 0; 1394 if (strcmp(fVolumeInfo.name, partition->ContentName()) == 0) 1395 score += 4; 1396 if (strcmp(fVolumeInfo.device, path.Path()) == 0) 1397 score += 3; 1398 if (fVolumeInfo.capacity == partition->Size()) 1399 score += 2; 1400 if (strcmp(fVolumeInfo.filesystem, 1401 partition->DiskSystem()->ShortName()) == 0) { 1402 score += 1; 1403 } 1404 if (score >= 4 && score > fBestScore) { 1405 fBestPartition = partition; 1406 fBestScore = score; 1407 } 1408 1409 return false; 1410 } 1411 1412 KPartition* fBestPartition; 1413 1414 private: 1415 int32 fBestScore; 1416 VolumeInfo& fVolumeInfo; 1417 }; 1418 1419 1420 status_t 1421 swap_file_add(const char* path) 1422 { 1423 // open the file 1424 int fd = open(path, O_RDWR | O_NOCACHE, S_IRUSR | S_IWUSR); 1425 if (fd < 0) 1426 return errno; 1427 1428 // fstat() it and check whether we can use it 1429 struct stat st; 1430 if (fstat(fd, &st) < 0) { 1431 close(fd); 1432 return errno; 1433 } 1434 1435 if (!(S_ISREG(st.st_mode) || S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) { 1436 close(fd); 1437 return B_BAD_VALUE; 1438 } 1439 1440 if (st.st_size < B_PAGE_SIZE) { 1441 close(fd); 1442 return B_BAD_VALUE; 1443 } 1444 1445 // get file descriptor, vnode, and cookie 1446 file_descriptor* descriptor = get_fd(get_current_io_context(true), fd); 1447 put_fd(descriptor); 1448 1449 vnode* node = fd_vnode(descriptor); 1450 if (node == NULL) { 1451 close(fd); 1452 return B_BAD_VALUE; 1453 } 1454 1455 // do the allocations and prepare the swap_file structure 1456 swap_file* swap = new(std::nothrow) swap_file; 1457 if (swap == NULL) { 1458 close(fd); 1459 return B_NO_MEMORY; 1460 } 1461 1462 swap->fd = fd; 1463 swap->vnode = node; 1464 swap->cookie = descriptor->cookie; 1465 1466 uint32 pageCount = st.st_size >> PAGE_SHIFT; 1467 swap->bmp = radix_bitmap_create(pageCount); 1468 if (swap->bmp == NULL) { 1469 delete swap; 1470 close(fd); 1471 return B_NO_MEMORY; 1472 } 1473 1474 // set slot index and add this file to swap file list 1475 mutex_lock(&sSwapFileListLock); 1476 // TODO: Also check whether the swap file is already registered! 1477 if (sSwapFileList.IsEmpty()) { 1478 swap->first_slot = 0; 1479 swap->last_slot = pageCount; 1480 } else { 1481 // leave one page gap between two swap files 1482 swap->first_slot = sSwapFileList.Last()->last_slot + 1; 1483 swap->last_slot = swap->first_slot + pageCount; 1484 } 1485 sSwapFileList.Add(swap); 1486 sSwapFileCount++; 1487 mutex_unlock(&sSwapFileListLock); 1488 1489 mutex_lock(&sAvailSwapSpaceLock); 1490 sAvailSwapSpace += (off_t)pageCount * B_PAGE_SIZE; 1491 mutex_unlock(&sAvailSwapSpaceLock); 1492 1493 return B_OK; 1494 } 1495 1496 1497 status_t 1498 swap_file_delete(const char* path) 1499 { 1500 vnode* node = NULL; 1501 status_t status = vfs_get_vnode_from_path(path, true, &node); 1502 if (status != B_OK) 1503 return status; 1504 1505 MutexLocker locker(sSwapFileListLock); 1506 1507 swap_file* swapFile = NULL; 1508 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1509 (swapFile = it.Next()) != NULL;) { 1510 if (swapFile->vnode == node) 1511 break; 1512 } 1513 1514 vfs_put_vnode(node); 1515 1516 if (swapFile == NULL) 1517 return B_ERROR; 1518 1519 // if this file is currently used, we can't delete 1520 // TODO: mark this swap file deleting, and remove it after releasing 1521 // all the swap space 1522 if (swapFile->bmp->free_slots < swapFile->last_slot - swapFile->first_slot) 1523 return B_ERROR; 1524 1525 sSwapFileList.Remove(swapFile); 1526 sSwapFileCount--; 1527 locker.Unlock(); 1528 1529 mutex_lock(&sAvailSwapSpaceLock); 1530 sAvailSwapSpace -= (off_t)(swapFile->last_slot - swapFile->first_slot) 1531 * B_PAGE_SIZE; 1532 mutex_unlock(&sAvailSwapSpaceLock); 1533 1534 close(swapFile->fd); 1535 radix_bitmap_destroy(swapFile->bmp); 1536 delete swapFile; 1537 1538 return B_OK; 1539 } 1540 1541 1542 void 1543 swap_init(void) 1544 { 1545 // create swap block cache 1546 sSwapBlockCache = create_object_cache("swapblock", sizeof(swap_block), 1547 sizeof(void*), NULL, NULL, NULL); 1548 if (sSwapBlockCache == NULL) 1549 panic("swap_init(): can't create object cache for swap blocks\n"); 1550 1551 status_t error = object_cache_set_minimum_reserve(sSwapBlockCache, 1552 MIN_SWAP_BLOCK_RESERVE); 1553 if (error != B_OK) { 1554 panic("swap_init(): object_cache_set_minimum_reserve() failed: %s", 1555 strerror(error)); 1556 } 1557 1558 // init swap hash table 1559 sSwapHashTable.Init(INITIAL_SWAP_HASH_SIZE); 1560 rw_lock_init(&sSwapHashLock, "swaphash"); 1561 1562 error = register_resource_resizer(swap_hash_resizer, NULL, 1563 SWAP_HASH_RESIZE_INTERVAL); 1564 if (error != B_OK) { 1565 panic("swap_init(): Failed to register swap hash resizer: %s", 1566 strerror(error)); 1567 } 1568 1569 // init swap file list 1570 mutex_init(&sSwapFileListLock, "swaplist"); 1571 sSwapFileAlloc = NULL; 1572 sSwapFileCount = 0; 1573 1574 // init available swap space 1575 mutex_init(&sAvailSwapSpaceLock, "avail swap space"); 1576 sAvailSwapSpace = 0; 1577 1578 add_debugger_command_etc("swap", &dump_swap_info, 1579 "Print infos about the swap usage", 1580 "\n" 1581 "Print infos about the swap usage.\n", 0); 1582 } 1583 1584 1585 void 1586 swap_init_post_modules() 1587 { 1588 // Never try to create a swap file on a read-only device - when booting 1589 // from CD, the write overlay is used. 1590 if (gReadOnlyBootDevice) 1591 return; 1592 1593 bool swapEnabled = true; 1594 bool swapAutomatic = true; 1595 off_t swapSize = 0; 1596 1597 dev_t swapDeviceID = -1; 1598 VolumeInfo selectedVolume = {}; 1599 1600 void* settings = load_driver_settings("virtual_memory"); 1601 1602 if (settings != NULL) { 1603 // We pass a lot of information on the swap device, this is mostly to 1604 // ensure that we are dealing with the same device that was configured. 1605 1606 // TODO: Some kind of BFS uuid would be great here :) 1607 const char* enabled = get_driver_parameter(settings, "vm", NULL, NULL); 1608 1609 if (enabled != NULL) { 1610 swapEnabled = get_driver_boolean_parameter(settings, "vm", 1611 true, false); 1612 swapAutomatic = get_driver_boolean_parameter(settings, "swap_auto", 1613 true, false); 1614 1615 if (swapEnabled && !swapAutomatic) { 1616 const char* size = get_driver_parameter(settings, "swap_size", 1617 NULL, NULL); 1618 const char* volume = get_driver_parameter(settings, 1619 "swap_volume_name", NULL, NULL); 1620 const char* device = get_driver_parameter(settings, 1621 "swap_volume_device", NULL, NULL); 1622 const char* filesystem = get_driver_parameter(settings, 1623 "swap_volume_filesystem", NULL, NULL); 1624 const char* capacity = get_driver_parameter(settings, 1625 "swap_volume_capacity", NULL, NULL); 1626 1627 if (size != NULL && device != NULL && volume != NULL 1628 && filesystem != NULL && capacity != NULL) { 1629 // User specified a size / volume that seems valid 1630 swapAutomatic = false; 1631 swapSize = atoll(size); 1632 strlcpy(selectedVolume.name, volume, 1633 sizeof(selectedVolume.name)); 1634 strlcpy(selectedVolume.device, device, 1635 sizeof(selectedVolume.device)); 1636 strlcpy(selectedVolume.filesystem, filesystem, 1637 sizeof(selectedVolume.filesystem)); 1638 selectedVolume.capacity = atoll(capacity); 1639 } else { 1640 // Something isn't right with swap config, go auto 1641 swapAutomatic = true; 1642 dprintf("%s: virtual_memory configuration is invalid, " 1643 "using automatic swap\n", __func__); 1644 } 1645 } 1646 } 1647 unload_driver_settings(settings); 1648 } 1649 1650 if (swapAutomatic) { 1651 swapSize = (off_t)vm_page_num_pages() * B_PAGE_SIZE; 1652 if (swapSize <= (1024 * 1024 * 1024)) { 1653 // Memory under 1GB? double the swap 1654 swapSize *= 2; 1655 } 1656 // Automatic swap defaults to the boot device 1657 swapDeviceID = gBootDevice; 1658 } 1659 1660 if (!swapEnabled || swapSize < B_PAGE_SIZE) { 1661 dprintf("%s: virtual_memory is disabled\n", __func__); 1662 return; 1663 } 1664 1665 if (!swapAutomatic && swapDeviceID < 0) { 1666 // If user-specified swap, and no swap device has been chosen yet... 1667 KDiskDeviceManager::CreateDefault(); 1668 KDiskDeviceManager* manager = KDiskDeviceManager::Default(); 1669 PartitionScorer visitor(selectedVolume); 1670 1671 KDiskDevice* device; 1672 int32 cookie = 0; 1673 while ((device = manager->NextDevice(&cookie)) != NULL) { 1674 if (device->IsReadOnlyMedia() || device->IsWriteOnce() 1675 || device->IsRemovable()) { 1676 continue; 1677 } 1678 device->VisitEachDescendant(&visitor); 1679 } 1680 1681 if (!visitor.fBestPartition) { 1682 dprintf("%s: Can't find configured swap partition '%s'\n", 1683 __func__, selectedVolume.name); 1684 } else { 1685 if (visitor.fBestPartition->IsMounted()) 1686 swapDeviceID = visitor.fBestPartition->VolumeID(); 1687 else { 1688 KPath devPath, mountPoint; 1689 visitor.fBestPartition->GetPath(&devPath); 1690 get_mount_point(visitor.fBestPartition, &mountPoint); 1691 const char* mountPath = mountPoint.Path(); 1692 mkdir(mountPath, S_IRWXU | S_IRWXG | S_IRWXO); 1693 swapDeviceID = _kern_mount(mountPath, devPath.Path(), 1694 NULL, 0, NULL, 0); 1695 if (swapDeviceID < 0) { 1696 dprintf("%s: Can't mount configured swap partition '%s'\n", 1697 __func__, selectedVolume.name); 1698 } 1699 } 1700 } 1701 } 1702 1703 if (swapDeviceID < 0) 1704 swapDeviceID = gBootDevice; 1705 1706 // We now have a swapDeviceID which is used for the swap file 1707 1708 KPath path; 1709 struct fs_info info; 1710 _kern_read_fs_info(swapDeviceID, &info); 1711 if (swapDeviceID == gBootDevice) 1712 path = kDefaultSwapPath; 1713 else { 1714 vfs_entry_ref_to_path(info.dev, info.root, ".", true, path.LockBuffer(), 1715 path.BufferSize()); 1716 path.UnlockBuffer(); 1717 path.Append("swap"); 1718 } 1719 1720 const char* swapPath = path.Path(); 1721 1722 // Swap size limits prevent oversized swap files 1723 if (swapAutomatic) { 1724 off_t existingSwapSize = 0; 1725 struct stat existingSwapStat; 1726 if (stat(swapPath, &existingSwapStat) == 0) 1727 existingSwapSize = existingSwapStat.st_size; 1728 1729 off_t freeSpace = info.free_blocks * info.block_size + existingSwapSize; 1730 1731 // Adjust automatic swap to a maximum of 25% of the free space 1732 if (swapSize > (freeSpace / 4)) 1733 swapSize = (freeSpace / 4); 1734 } 1735 1736 // Create swap file 1737 int fd = open(swapPath, O_RDWR | O_CREAT | O_NOCACHE, S_IRUSR | S_IWUSR); 1738 if (fd < 0) { 1739 dprintf("%s: Can't open/create %s: %s\n", __func__, 1740 swapPath, strerror(errno)); 1741 return; 1742 } 1743 1744 struct stat stat; 1745 stat.st_size = swapSize; 1746 status_t error = _kern_write_stat(fd, NULL, false, &stat, 1747 sizeof(struct stat), B_STAT_SIZE | B_STAT_SIZE_INSECURE); 1748 if (error != B_OK) { 1749 dprintf("%s: Failed to resize %s to %" B_PRIdOFF " bytes: %s\n", 1750 __func__, swapPath, swapSize, strerror(error)); 1751 } 1752 1753 close(fd); 1754 1755 error = swap_file_add(swapPath); 1756 if (error != B_OK) { 1757 dprintf("%s: Failed to add swap file %s: %s\n", __func__, swapPath, 1758 strerror(error)); 1759 } 1760 } 1761 1762 1763 //! Used by page daemon to free swap space. 1764 bool 1765 swap_free_page_swap_space(vm_page* page) 1766 { 1767 VMAnonymousCache* cache = dynamic_cast<VMAnonymousCache*>(page->Cache()); 1768 if (cache == NULL) 1769 return false; 1770 1771 swap_addr_t slotIndex = cache->_SwapBlockGetAddress(page->cache_offset); 1772 if (slotIndex == SWAP_SLOT_NONE) 1773 return false; 1774 1775 swap_slot_dealloc(slotIndex, 1); 1776 cache->fAllocatedSwapSize -= B_PAGE_SIZE; 1777 cache->_SwapBlockFree(page->cache_offset, 1); 1778 1779 return true; 1780 } 1781 1782 1783 uint32 1784 swap_available_pages() 1785 { 1786 mutex_lock(&sAvailSwapSpaceLock); 1787 uint32 avail = sAvailSwapSpace >> PAGE_SHIFT; 1788 mutex_unlock(&sAvailSwapSpaceLock); 1789 1790 return avail; 1791 } 1792 1793 1794 uint32 1795 swap_total_swap_pages() 1796 { 1797 mutex_lock(&sSwapFileListLock); 1798 1799 uint32 totalSwapSlots = 0; 1800 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1801 swap_file* swapFile = it.Next();) { 1802 totalSwapSlots += swapFile->last_slot - swapFile->first_slot; 1803 } 1804 1805 mutex_unlock(&sSwapFileListLock); 1806 1807 return totalSwapSlots; 1808 } 1809 1810 1811 #endif // ENABLE_SWAP_SUPPORT 1812 1813 1814 void 1815 swap_get_info(system_info* info) 1816 { 1817 #if ENABLE_SWAP_SUPPORT 1818 MutexLocker locker(sSwapFileListLock); 1819 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1820 swap_file* swapFile = it.Next();) { 1821 info->max_swap_pages += swapFile->last_slot - swapFile->first_slot; 1822 info->free_swap_pages += swapFile->bmp->free_slots; 1823 } 1824 #else 1825 info->max_swap_pages = 0; 1826 info->free_swap_pages = 0; 1827 #endif 1828 } 1829 1830