1 /* 2 * Copyright 2008, Zhao Shuai, upczhsh@163.com. 3 * Copyright 2008-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 4 * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de. 5 * Distributed under the terms of the MIT License. 6 * 7 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 8 * Distributed under the terms of the NewOS License. 9 * 10 * Copyright 2011-2012 Haiku, Inc. All rights reserved. 11 * Distributed under the terms of the MIT License. 12 * 13 * Authors: 14 * Hamish Morrison, hamish@lavabit.com 15 * Alexander von Gluck IV, kallisti5@unixzen.com 16 */ 17 18 19 #include "VMAnonymousCache.h" 20 21 #include <errno.h> 22 #include <fcntl.h> 23 #include <stdlib.h> 24 #include <string.h> 25 #include <unistd.h> 26 27 #include <FindDirectory.h> 28 #include <KernelExport.h> 29 #include <NodeMonitor.h> 30 31 #include <arch_config.h> 32 #include <boot_device.h> 33 #include <disk_device_manager/KDiskDevice.h> 34 #include <disk_device_manager/KDiskDeviceManager.h> 35 #include <disk_device_manager/KDiskSystem.h> 36 #include <disk_device_manager/KPartitionVisitor.h> 37 #include <driver_settings.h> 38 #include <fs/fd.h> 39 #include <fs/KPath.h> 40 #include <fs_info.h> 41 #include <fs_interface.h> 42 #include <heap.h> 43 #include <kernel_daemon.h> 44 #include <slab/Slab.h> 45 #include <syscalls.h> 46 #include <system_info.h> 47 #include <thread.h> 48 #include <tracing.h> 49 #include <util/AutoLock.h> 50 #include <util/Bitmap.h> 51 #include <util/DoublyLinkedList.h> 52 #include <util/OpenHashTable.h> 53 #include <util/RadixBitmap.h> 54 #include <vfs.h> 55 #include <vm/vm.h> 56 #include <vm/vm_page.h> 57 #include <vm/vm_priv.h> 58 #include <vm/VMAddressSpace.h> 59 60 #include "IORequest.h" 61 #include "VMUtils.h" 62 63 64 #if ENABLE_SWAP_SUPPORT 65 66 //#define TRACE_VM_ANONYMOUS_CACHE 67 #ifdef TRACE_VM_ANONYMOUS_CACHE 68 # define TRACE(x...) dprintf(x) 69 #else 70 # define TRACE(x...) do { } while (false) 71 #endif 72 73 74 // number of free swap blocks the object cache shall minimally have 75 #define MIN_SWAP_BLOCK_RESERVE 4096 76 77 // interval the has resizer is triggered (in 0.1s) 78 #define SWAP_HASH_RESIZE_INTERVAL 5 79 80 #define INITIAL_SWAP_HASH_SIZE 1024 81 82 #define SWAP_SLOT_NONE RADIX_SLOT_NONE 83 84 #define SWAP_BLOCK_PAGES 32 85 #define SWAP_BLOCK_SHIFT 5 /* 1 << SWAP_BLOCK_SHIFT == SWAP_BLOCK_PAGES */ 86 #define SWAP_BLOCK_MASK (SWAP_BLOCK_PAGES - 1) 87 88 89 static const char* const kDefaultSwapPath = "/var/swap"; 90 91 struct swap_file : DoublyLinkedListLinkImpl<swap_file> { 92 int fd; 93 struct vnode* vnode; 94 void* cookie; 95 swap_addr_t first_slot; 96 swap_addr_t last_slot; 97 radix_bitmap* bmp; 98 }; 99 100 struct swap_hash_key { 101 VMAnonymousCache *cache; 102 off_t page_index; // page index in the cache 103 }; 104 105 // Each swap block contains swap address information for 106 // SWAP_BLOCK_PAGES continuous pages from the same cache 107 struct swap_block { 108 swap_block* hash_link; 109 swap_hash_key key; 110 uint32 used; 111 swap_addr_t swap_slots[SWAP_BLOCK_PAGES]; 112 }; 113 114 struct SwapHashTableDefinition { 115 typedef swap_hash_key KeyType; 116 typedef swap_block ValueType; 117 118 SwapHashTableDefinition() {} 119 120 size_t HashKey(const swap_hash_key& key) const 121 { 122 off_t blockIndex = key.page_index >> SWAP_BLOCK_SHIFT; 123 VMAnonymousCache* cache = key.cache; 124 return blockIndex ^ (size_t)(int*)cache; 125 } 126 127 size_t Hash(const swap_block* value) const 128 { 129 return HashKey(value->key); 130 } 131 132 bool Compare(const swap_hash_key& key, const swap_block* value) const 133 { 134 return (key.page_index & ~(off_t)SWAP_BLOCK_MASK) 135 == (value->key.page_index & ~(off_t)SWAP_BLOCK_MASK) 136 && key.cache == value->key.cache; 137 } 138 139 swap_block*& GetLink(swap_block* value) const 140 { 141 return value->hash_link; 142 } 143 }; 144 145 typedef BOpenHashTable<SwapHashTableDefinition> SwapHashTable; 146 typedef DoublyLinkedList<swap_file> SwapFileList; 147 148 static SwapHashTable sSwapHashTable; 149 static rw_lock sSwapHashLock; 150 151 static SwapFileList sSwapFileList; 152 static mutex sSwapFileListLock; 153 static swap_file* sSwapFileAlloc = NULL; // allocate from here 154 static uint32 sSwapFileCount = 0; 155 156 static off_t sAvailSwapSpace = 0; 157 static mutex sAvailSwapSpaceLock; 158 159 static object_cache* sSwapBlockCache; 160 161 162 #if SWAP_TRACING 163 namespace SwapTracing { 164 165 class SwapTraceEntry : public AbstractTraceEntry { 166 public: 167 SwapTraceEntry(VMAnonymousCache* cache) 168 : 169 fCache(cache) 170 { 171 } 172 173 protected: 174 VMAnonymousCache* fCache; 175 }; 176 177 178 class ReadPage : public SwapTraceEntry { 179 public: 180 ReadPage(VMAnonymousCache* cache, page_num_t pageIndex, 181 swap_addr_t swapSlotIndex) 182 : 183 SwapTraceEntry(cache), 184 fPageIndex(pageIndex), 185 fSwapSlotIndex(swapSlotIndex) 186 { 187 Initialized(); 188 } 189 190 virtual void AddDump(TraceOutput& out) 191 { 192 out.Print("swap read: cache %p, page index: %lu <- swap slot: %lu", 193 fCache, fPageIndex, fSwapSlotIndex); 194 } 195 196 private: 197 page_num_t fPageIndex; 198 swap_addr_t fSwapSlotIndex; 199 }; 200 201 202 class WritePage : public SwapTraceEntry { 203 public: 204 WritePage(VMAnonymousCache* cache, page_num_t pageIndex, 205 swap_addr_t swapSlotIndex) 206 : 207 SwapTraceEntry(cache), 208 fPageIndex(pageIndex), 209 fSwapSlotIndex(swapSlotIndex) 210 { 211 Initialized(); 212 } 213 214 virtual void AddDump(TraceOutput& out) 215 { 216 out.Print("swap write: cache %p, page index: %lu -> swap slot: %lu", 217 fCache, fPageIndex, fSwapSlotIndex); 218 } 219 220 private: 221 page_num_t fPageIndex; 222 swap_addr_t fSwapSlotIndex; 223 }; 224 225 } // namespace SwapTracing 226 227 # define T(x) new(std::nothrow) SwapTracing::x; 228 #else 229 # define T(x) ; 230 #endif 231 232 233 static int 234 dump_swap_info(int argc, char** argv) 235 { 236 swap_addr_t totalSwapPages = 0; 237 swap_addr_t freeSwapPages = 0; 238 239 kprintf("swap files:\n"); 240 241 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 242 swap_file* file = it.Next();) { 243 swap_addr_t total = file->last_slot - file->first_slot; 244 kprintf(" vnode: %p, pages: total: %" B_PRIu32 ", free: %" B_PRIu32 245 "\n", file->vnode, total, file->bmp->free_slots); 246 247 totalSwapPages += total; 248 freeSwapPages += file->bmp->free_slots; 249 } 250 251 kprintf("\n"); 252 kprintf("swap space in pages:\n"); 253 kprintf("total: %9" B_PRIu32 "\n", totalSwapPages); 254 kprintf("available: %9" B_PRIdOFF "\n", sAvailSwapSpace / B_PAGE_SIZE); 255 kprintf("reserved: %9" B_PRIdOFF "\n", 256 totalSwapPages - sAvailSwapSpace / B_PAGE_SIZE); 257 kprintf("used: %9" B_PRIu32 "\n", totalSwapPages - freeSwapPages); 258 kprintf("free: %9" B_PRIu32 "\n", freeSwapPages); 259 260 return 0; 261 } 262 263 264 static swap_addr_t 265 swap_slot_alloc(uint32 count) 266 { 267 mutex_lock(&sSwapFileListLock); 268 269 if (sSwapFileList.IsEmpty()) { 270 mutex_unlock(&sSwapFileListLock); 271 panic("swap_slot_alloc(): no swap file in the system\n"); 272 return SWAP_SLOT_NONE; 273 } 274 275 // since radix bitmap could not handle more than 32 pages, we return 276 // SWAP_SLOT_NONE, this forces Write() adjust allocation amount 277 if (count > BITMAP_RADIX) { 278 mutex_unlock(&sSwapFileListLock); 279 return SWAP_SLOT_NONE; 280 } 281 282 swap_addr_t j, addr = SWAP_SLOT_NONE; 283 for (j = 0; j < sSwapFileCount; j++) { 284 if (sSwapFileAlloc == NULL) 285 sSwapFileAlloc = sSwapFileList.First(); 286 287 addr = radix_bitmap_alloc(sSwapFileAlloc->bmp, count); 288 if (addr != SWAP_SLOT_NONE) { 289 addr += sSwapFileAlloc->first_slot; 290 break; 291 } 292 293 // this swap_file is full, find another 294 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 295 } 296 297 if (j == sSwapFileCount) { 298 mutex_unlock(&sSwapFileListLock); 299 panic("swap_slot_alloc: swap space exhausted!\n"); 300 return SWAP_SLOT_NONE; 301 } 302 303 // if this swap file has used more than 90% percent of its space 304 // switch to another 305 if (sSwapFileAlloc->bmp->free_slots 306 < (sSwapFileAlloc->last_slot - sSwapFileAlloc->first_slot) / 10) { 307 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 308 } 309 310 mutex_unlock(&sSwapFileListLock); 311 312 return addr; 313 } 314 315 316 static swap_file* 317 find_swap_file(swap_addr_t slotIndex) 318 { 319 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 320 swap_file* swapFile = it.Next();) { 321 if (slotIndex >= swapFile->first_slot 322 && slotIndex < swapFile->last_slot) { 323 return swapFile; 324 } 325 } 326 327 panic("find_swap_file(): can't find swap file for slot %" B_PRIu32 "\n", 328 slotIndex); 329 return NULL; 330 } 331 332 333 static void 334 swap_slot_dealloc(swap_addr_t slotIndex, uint32 count) 335 { 336 if (slotIndex == SWAP_SLOT_NONE) 337 return; 338 339 mutex_lock(&sSwapFileListLock); 340 swap_file* swapFile = find_swap_file(slotIndex); 341 slotIndex -= swapFile->first_slot; 342 radix_bitmap_dealloc(swapFile->bmp, slotIndex, count); 343 mutex_unlock(&sSwapFileListLock); 344 } 345 346 347 static off_t 348 swap_space_reserve(off_t amount) 349 { 350 mutex_lock(&sAvailSwapSpaceLock); 351 if (sAvailSwapSpace >= amount) 352 sAvailSwapSpace -= amount; 353 else { 354 amount = sAvailSwapSpace; 355 sAvailSwapSpace = 0; 356 } 357 mutex_unlock(&sAvailSwapSpaceLock); 358 359 return amount; 360 } 361 362 363 static void 364 swap_space_unreserve(off_t amount) 365 { 366 mutex_lock(&sAvailSwapSpaceLock); 367 sAvailSwapSpace += amount; 368 mutex_unlock(&sAvailSwapSpaceLock); 369 } 370 371 372 static void 373 swap_hash_resizer(void*, int) 374 { 375 WriteLocker locker(sSwapHashLock); 376 377 size_t size; 378 void* allocation; 379 380 do { 381 size = sSwapHashTable.ResizeNeeded(); 382 if (size == 0) 383 return; 384 385 locker.Unlock(); 386 387 allocation = malloc(size); 388 if (allocation == NULL) 389 return; 390 391 locker.Lock(); 392 393 } while (!sSwapHashTable.Resize(allocation, size)); 394 } 395 396 397 // #pragma mark - 398 399 400 class VMAnonymousCache::WriteCallback : public StackableAsyncIOCallback { 401 public: 402 WriteCallback(VMAnonymousCache* cache, AsyncIOCallback* callback) 403 : 404 StackableAsyncIOCallback(callback), 405 fCache(cache) 406 { 407 } 408 409 void SetTo(page_num_t pageIndex, swap_addr_t slotIndex, bool newSlot) 410 { 411 fPageIndex = pageIndex; 412 fSlotIndex = slotIndex; 413 fNewSlot = newSlot; 414 } 415 416 virtual void IOFinished(status_t status, bool partialTransfer, 417 generic_size_t bytesTransferred) 418 { 419 if (fNewSlot) { 420 if (status == B_OK) { 421 fCache->_SwapBlockBuild(fPageIndex, fSlotIndex, 1); 422 } else { 423 AutoLocker<VMCache> locker(fCache); 424 fCache->fAllocatedSwapSize -= B_PAGE_SIZE; 425 locker.Unlock(); 426 427 swap_slot_dealloc(fSlotIndex, 1); 428 } 429 } 430 431 fNextCallback->IOFinished(status, partialTransfer, bytesTransferred); 432 433 delete this; 434 } 435 436 private: 437 VMAnonymousCache* fCache; 438 page_num_t fPageIndex; 439 swap_addr_t fSlotIndex; 440 bool fNewSlot; 441 }; 442 443 444 // #pragma mark - 445 446 447 VMAnonymousCache::~VMAnonymousCache() 448 { 449 delete fNoSwapPages; 450 fNoSwapPages = NULL; 451 452 _FreeSwapPageRange(virtual_base, virtual_end, false); 453 swap_space_unreserve(fCommittedSwapSize); 454 if (committed_size > fCommittedSwapSize) 455 vm_unreserve_memory(committed_size - fCommittedSwapSize); 456 } 457 458 459 status_t 460 VMAnonymousCache::Init(bool canOvercommit, int32 numPrecommittedPages, 461 int32 numGuardPages, uint32 allocationFlags) 462 { 463 TRACE("%p->VMAnonymousCache::Init(canOvercommit = %s, " 464 "numPrecommittedPages = %" B_PRId32 ", numGuardPages = %" B_PRId32 465 ")\n", this, canOvercommit ? "yes" : "no", numPrecommittedPages, 466 numGuardPages); 467 468 status_t error = VMCache::Init(CACHE_TYPE_RAM, allocationFlags); 469 if (error != B_OK) 470 return error; 471 472 fCanOvercommit = canOvercommit; 473 fHasPrecommitted = false; 474 fPrecommittedPages = min_c(numPrecommittedPages, 255); 475 fNoSwapPages = NULL; 476 fGuardedSize = numGuardPages * B_PAGE_SIZE; 477 fCommittedSwapSize = 0; 478 fAllocatedSwapSize = 0; 479 480 return B_OK; 481 } 482 483 484 status_t 485 VMAnonymousCache::SetCanSwapPages(off_t base, size_t size, bool canSwap) 486 { 487 const page_num_t first = base >> PAGE_SHIFT; 488 const size_t count = PAGE_ALIGN(size + ((first << PAGE_SHIFT) - base)) >> PAGE_SHIFT; 489 490 if (count == 0) 491 return B_OK; 492 if (canSwap && fNoSwapPages == NULL) 493 return B_OK; 494 495 if (fNoSwapPages == NULL) 496 fNoSwapPages = new(std::nothrow) Bitmap(0); 497 if (fNoSwapPages == NULL) 498 return B_NO_MEMORY; 499 500 const page_num_t pageCount = PAGE_ALIGN(virtual_end) >> PAGE_SHIFT; 501 502 if (fNoSwapPages->Resize(pageCount) != B_OK) 503 return B_NO_MEMORY; 504 505 for (size_t i = 0; i < count; i++) { 506 if (canSwap) 507 fNoSwapPages->Clear(first + i); 508 else 509 fNoSwapPages->Set(first + i); 510 } 511 512 if (fNoSwapPages->GetHighestSet() < 0) { 513 delete fNoSwapPages; 514 fNoSwapPages = NULL; 515 } 516 return B_OK; 517 } 518 519 520 void 521 VMAnonymousCache::_FreeSwapPageRange(off_t fromOffset, off_t toOffset, 522 bool skipBusyPages) 523 { 524 swap_block* swapBlock = NULL; 525 off_t toIndex = toOffset >> PAGE_SHIFT; 526 for (off_t pageIndex = fromOffset >> PAGE_SHIFT; 527 pageIndex < toIndex && fAllocatedSwapSize > 0; pageIndex++) { 528 529 WriteLocker locker(sSwapHashLock); 530 531 // Get the swap slot index for the page. 532 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 533 if (swapBlock == NULL || blockIndex == 0) { 534 swap_hash_key key = { this, pageIndex }; 535 swapBlock = sSwapHashTable.Lookup(key); 536 537 if (swapBlock == NULL) { 538 pageIndex = ROUNDUP(pageIndex + 1, SWAP_BLOCK_PAGES) - 1; 539 continue; 540 } 541 } 542 543 swap_addr_t slotIndex = swapBlock->swap_slots[blockIndex]; 544 if (slotIndex == SWAP_SLOT_NONE) 545 continue; 546 547 if (skipBusyPages) { 548 vm_page* page = LookupPage(pageIndex * B_PAGE_SIZE); 549 if (page != NULL && page->busy) { 550 // TODO: We skip (i.e. leak) swap space of busy pages, since 551 // there could be I/O going on (paging in/out). Waiting is 552 // not an option as 1. unlocking the cache means that new 553 // swap pages could be added in a range we've already 554 // cleared (since the cache still has the old size) and 2. 555 // we'd risk a deadlock in case we come from the file cache 556 // and the FS holds the node's write-lock. We should mark 557 // the page invalid and let the one responsible clean up. 558 // There's just no such mechanism yet. 559 continue; 560 } 561 } 562 563 swap_slot_dealloc(slotIndex, 1); 564 fAllocatedSwapSize -= B_PAGE_SIZE; 565 566 swapBlock->swap_slots[blockIndex] = SWAP_SLOT_NONE; 567 if (--swapBlock->used == 0) { 568 // All swap pages have been freed -- we can discard the swap block. 569 sSwapHashTable.RemoveUnchecked(swapBlock); 570 object_cache_free(sSwapBlockCache, swapBlock, 571 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 572 573 // There are no swap pages for possibly remaining pages, skip to the 574 // next block. 575 pageIndex = ROUNDUP(pageIndex + 1, SWAP_BLOCK_PAGES) - 1; 576 swapBlock = NULL; 577 } 578 } 579 } 580 581 582 status_t 583 VMAnonymousCache::Resize(off_t newSize, int priority) 584 { 585 if (fNoSwapPages != NULL) { 586 if (fNoSwapPages->Resize(PAGE_ALIGN(newSize) >> PAGE_SHIFT) != B_OK) 587 return B_NO_MEMORY; 588 } 589 590 _FreeSwapPageRange(newSize + B_PAGE_SIZE - 1, 591 virtual_end + B_PAGE_SIZE - 1); 592 return VMCache::Resize(newSize, priority); 593 } 594 595 596 status_t 597 VMAnonymousCache::Rebase(off_t newBase, int priority) 598 { 599 if (fNoSwapPages != NULL) { 600 const ssize_t sizeDifference = (newBase >> PAGE_SHIFT) - (virtual_base >> PAGE_SHIFT); 601 fNoSwapPages->Shift(sizeDifference); 602 } 603 604 _FreeSwapPageRange(virtual_base, newBase); 605 return VMCache::Rebase(newBase, priority); 606 } 607 608 609 status_t 610 VMAnonymousCache::Discard(off_t offset, off_t size) 611 { 612 _FreeSwapPageRange(offset, offset + size); 613 return VMCache::Discard(offset, size); 614 } 615 616 617 /*! Moves the swap pages for the given range from the source cache into this 618 cache. Both caches must be locked. 619 */ 620 status_t 621 VMAnonymousCache::Adopt(VMCache* _source, off_t offset, off_t size, 622 off_t newOffset) 623 { 624 VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source); 625 if (source == NULL) { 626 panic("VMAnonymousCache::Adopt(): adopt from incompatible cache %p " 627 "requested", _source); 628 return B_ERROR; 629 } 630 631 off_t pageIndex = newOffset >> PAGE_SHIFT; 632 off_t sourcePageIndex = offset >> PAGE_SHIFT; 633 off_t sourceEndPageIndex = (offset + size + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 634 swap_block* swapBlock = NULL; 635 636 WriteLocker locker(sSwapHashLock); 637 638 while (sourcePageIndex < sourceEndPageIndex 639 && source->fAllocatedSwapSize > 0) { 640 swap_addr_t left 641 = SWAP_BLOCK_PAGES - (sourcePageIndex & SWAP_BLOCK_MASK); 642 643 swap_hash_key sourceKey = { source, sourcePageIndex }; 644 swap_block* sourceSwapBlock = sSwapHashTable.Lookup(sourceKey); 645 if (sourceSwapBlock == NULL || sourceSwapBlock->used == 0) { 646 sourcePageIndex += left; 647 pageIndex += left; 648 swapBlock = NULL; 649 continue; 650 } 651 652 for (; left > 0 && sourceSwapBlock->used > 0; 653 left--, sourcePageIndex++, pageIndex++) { 654 655 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 656 if (swapBlock == NULL || blockIndex == 0) { 657 swap_hash_key key = { this, pageIndex }; 658 swapBlock = sSwapHashTable.Lookup(key); 659 660 if (swapBlock == NULL) { 661 swapBlock = (swap_block*)object_cache_alloc(sSwapBlockCache, 662 CACHE_DONT_WAIT_FOR_MEMORY 663 | CACHE_DONT_LOCK_KERNEL_SPACE); 664 if (swapBlock == NULL) 665 return B_NO_MEMORY; 666 667 swapBlock->key.cache = this; 668 swapBlock->key.page_index 669 = pageIndex & ~(off_t)SWAP_BLOCK_MASK; 670 swapBlock->used = 0; 671 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) 672 swapBlock->swap_slots[i] = SWAP_SLOT_NONE; 673 674 sSwapHashTable.InsertUnchecked(swapBlock); 675 } 676 } 677 678 swap_addr_t sourceBlockIndex = sourcePageIndex & SWAP_BLOCK_MASK; 679 swap_addr_t slotIndex 680 = sourceSwapBlock->swap_slots[sourceBlockIndex]; 681 if (slotIndex == SWAP_SLOT_NONE) 682 continue; 683 684 ASSERT(swapBlock->swap_slots[blockIndex] == SWAP_SLOT_NONE); 685 686 swapBlock->swap_slots[blockIndex] = slotIndex; 687 swapBlock->used++; 688 fAllocatedSwapSize += B_PAGE_SIZE; 689 690 sourceSwapBlock->swap_slots[sourceBlockIndex] = SWAP_SLOT_NONE; 691 sourceSwapBlock->used--; 692 source->fAllocatedSwapSize -= B_PAGE_SIZE; 693 694 TRACE("adopted slot %#" B_PRIx32 " from %p at page %" B_PRIdOFF 695 " to %p at page %" B_PRIdOFF "\n", slotIndex, source, 696 sourcePageIndex, this, pageIndex); 697 } 698 699 if (left > 0) { 700 sourcePageIndex += left; 701 pageIndex += left; 702 swapBlock = NULL; 703 } 704 705 if (sourceSwapBlock->used == 0) { 706 // All swap pages have been adopted, we can discard the swap block. 707 sSwapHashTable.RemoveUnchecked(sourceSwapBlock); 708 object_cache_free(sSwapBlockCache, sourceSwapBlock, 709 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 710 } 711 } 712 713 locker.Unlock(); 714 715 return VMCache::Adopt(source, offset, size, newOffset); 716 } 717 718 719 status_t 720 VMAnonymousCache::Commit(off_t size, int priority) 721 { 722 TRACE("%p->VMAnonymousCache::Commit(%" B_PRIdOFF ")\n", this, size); 723 724 // If we can overcommit, we don't commit here, but in Fault(). We always 725 // unreserve memory, if we're asked to shrink our commitment, though. 726 if (fCanOvercommit && size > committed_size) { 727 if (fHasPrecommitted) 728 return B_OK; 729 730 // pre-commit some pages to make a later failure less probable 731 fHasPrecommitted = true; 732 uint32 precommitted = fPrecommittedPages * B_PAGE_SIZE; 733 if (size > precommitted) 734 size = precommitted; 735 } 736 737 return _Commit(size, priority); 738 } 739 740 741 bool 742 VMAnonymousCache::HasPage(off_t offset) 743 { 744 if (_SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE) 745 return true; 746 747 return false; 748 } 749 750 751 bool 752 VMAnonymousCache::DebugHasPage(off_t offset) 753 { 754 off_t pageIndex = offset >> PAGE_SHIFT; 755 swap_hash_key key = { this, pageIndex }; 756 swap_block* swap = sSwapHashTable.Lookup(key); 757 if (swap == NULL) 758 return false; 759 760 return swap->swap_slots[pageIndex & SWAP_BLOCK_MASK] != SWAP_SLOT_NONE; 761 } 762 763 764 status_t 765 VMAnonymousCache::Read(off_t offset, const generic_io_vec* vecs, size_t count, 766 uint32 flags, generic_size_t* _numBytes) 767 { 768 off_t pageIndex = offset >> PAGE_SHIFT; 769 770 for (uint32 i = 0, j = 0; i < count; i = j) { 771 swap_addr_t startSlotIndex = _SwapBlockGetAddress(pageIndex + i); 772 for (j = i + 1; j < count; j++) { 773 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + j); 774 if (slotIndex != startSlotIndex + j - i) 775 break; 776 } 777 778 T(ReadPage(this, pageIndex, startSlotIndex)); 779 // TODO: Assumes that only one page is read. 780 781 swap_file* swapFile = find_swap_file(startSlotIndex); 782 783 off_t pos = (off_t)(startSlotIndex - swapFile->first_slot) 784 * B_PAGE_SIZE; 785 786 status_t status = vfs_read_pages(swapFile->vnode, swapFile->cookie, pos, 787 vecs + i, j - i, flags, _numBytes); 788 if (status != B_OK) 789 return status; 790 } 791 792 return B_OK; 793 } 794 795 796 status_t 797 VMAnonymousCache::Write(off_t offset, const generic_io_vec* vecs, size_t count, 798 uint32 flags, generic_size_t* _numBytes) 799 { 800 off_t pageIndex = offset >> PAGE_SHIFT; 801 802 AutoLocker<VMCache> locker(this); 803 804 page_num_t totalPages = 0; 805 for (uint32 i = 0; i < count; i++) { 806 page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 807 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + totalPages); 808 if (slotIndex != SWAP_SLOT_NONE) { 809 swap_slot_dealloc(slotIndex, pageCount); 810 _SwapBlockFree(pageIndex + totalPages, pageCount); 811 fAllocatedSwapSize -= pageCount * B_PAGE_SIZE; 812 } 813 814 totalPages += pageCount; 815 } 816 817 off_t totalSize = totalPages * B_PAGE_SIZE; 818 if (fAllocatedSwapSize + totalSize > fCommittedSwapSize) 819 return B_ERROR; 820 821 fAllocatedSwapSize += totalSize; 822 locker.Unlock(); 823 824 page_num_t pagesLeft = totalPages; 825 totalPages = 0; 826 827 for (uint32 i = 0; i < count; i++) { 828 page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 829 830 generic_addr_t vectorBase = vecs[i].base; 831 generic_size_t vectorLength = vecs[i].length; 832 page_num_t n = pageCount; 833 834 for (page_num_t j = 0; j < pageCount; j += n) { 835 swap_addr_t slotIndex; 836 // try to allocate n slots, if fail, try to allocate n/2 837 while ((slotIndex = swap_slot_alloc(n)) == SWAP_SLOT_NONE && n >= 2) 838 n >>= 1; 839 840 if (slotIndex == SWAP_SLOT_NONE) 841 panic("VMAnonymousCache::Write(): can't allocate swap space\n"); 842 843 T(WritePage(this, pageIndex, slotIndex)); 844 // TODO: Assumes that only one page is written. 845 846 swap_file* swapFile = find_swap_file(slotIndex); 847 848 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 849 850 generic_size_t length = (phys_addr_t)n * B_PAGE_SIZE; 851 generic_io_vec vector[1]; 852 vector->base = vectorBase; 853 vector->length = length; 854 855 status_t status = vfs_write_pages(swapFile->vnode, swapFile->cookie, 856 pos, vector, 1, flags, &length); 857 if (status != B_OK) { 858 locker.Lock(); 859 fAllocatedSwapSize -= (off_t)pagesLeft * B_PAGE_SIZE; 860 locker.Unlock(); 861 862 swap_slot_dealloc(slotIndex, n); 863 return status; 864 } 865 866 _SwapBlockBuild(pageIndex + totalPages, slotIndex, n); 867 pagesLeft -= n; 868 869 if (n != pageCount) { 870 vectorBase = vectorBase + n * B_PAGE_SIZE; 871 vectorLength -= n * B_PAGE_SIZE; 872 } 873 } 874 875 totalPages += pageCount; 876 } 877 878 ASSERT(pagesLeft == 0); 879 return B_OK; 880 } 881 882 883 status_t 884 VMAnonymousCache::WriteAsync(off_t offset, const generic_io_vec* vecs, 885 size_t count, generic_size_t numBytes, uint32 flags, 886 AsyncIOCallback* _callback) 887 { 888 // TODO: Currently this method is only used for single pages. Either make 889 // more flexible use of it or change the interface! 890 // This implementation relies on the current usage! 891 ASSERT(count == 1); 892 ASSERT(numBytes <= B_PAGE_SIZE); 893 894 page_num_t pageIndex = offset >> PAGE_SHIFT; 895 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex); 896 bool newSlot = slotIndex == SWAP_SLOT_NONE; 897 898 // If the page doesn't have any swap space yet, allocate it. 899 if (newSlot) { 900 AutoLocker<VMCache> locker(this); 901 if (fAllocatedSwapSize + B_PAGE_SIZE > fCommittedSwapSize) { 902 _callback->IOFinished(B_ERROR, true, 0); 903 return B_ERROR; 904 } 905 906 fAllocatedSwapSize += B_PAGE_SIZE; 907 908 slotIndex = swap_slot_alloc(1); 909 } 910 911 // create our callback 912 WriteCallback* callback = (flags & B_VIP_IO_REQUEST) != 0 913 ? new(malloc_flags(HEAP_PRIORITY_VIP)) WriteCallback(this, _callback) 914 : new(std::nothrow) WriteCallback(this, _callback); 915 if (callback == NULL) { 916 if (newSlot) { 917 AutoLocker<VMCache> locker(this); 918 fAllocatedSwapSize -= B_PAGE_SIZE; 919 locker.Unlock(); 920 921 swap_slot_dealloc(slotIndex, 1); 922 } 923 _callback->IOFinished(B_NO_MEMORY, true, 0); 924 return B_NO_MEMORY; 925 } 926 // TODO: If the page already had swap space assigned, we don't need an own 927 // callback. 928 929 callback->SetTo(pageIndex, slotIndex, newSlot); 930 931 T(WritePage(this, pageIndex, slotIndex)); 932 933 // write the page asynchrounously 934 swap_file* swapFile = find_swap_file(slotIndex); 935 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 936 937 return vfs_asynchronous_write_pages(swapFile->vnode, swapFile->cookie, pos, 938 vecs, 1, numBytes, flags, callback); 939 } 940 941 942 bool 943 VMAnonymousCache::CanWritePage(off_t offset) 944 { 945 const off_t pageIndex = offset >> PAGE_SHIFT; 946 if (fNoSwapPages != NULL && fNoSwapPages->Get(pageIndex)) 947 return false; 948 949 // We can write the page, if we have not used all of our committed swap 950 // space or the page already has a swap slot assigned. 951 return fAllocatedSwapSize < fCommittedSwapSize 952 || _SwapBlockGetAddress(pageIndex) != SWAP_SLOT_NONE; 953 } 954 955 956 int32 957 VMAnonymousCache::MaxPagesPerAsyncWrite() const 958 { 959 return 1; 960 } 961 962 963 status_t 964 VMAnonymousCache::Fault(struct VMAddressSpace* aspace, off_t offset) 965 { 966 if (fGuardedSize > 0) { 967 uint32 guardOffset; 968 969 #ifdef STACK_GROWS_DOWNWARDS 970 guardOffset = 0; 971 #elif defined(STACK_GROWS_UPWARDS) 972 guardOffset = virtual_size - fGuardedSize; 973 #else 974 # error Stack direction has not been defined in arch_config.h 975 #endif 976 // report stack fault, guard page hit! 977 if (offset >= guardOffset && offset < guardOffset + fGuardedSize) { 978 TRACE(("stack overflow!\n")); 979 return B_BAD_ADDRESS; 980 } 981 } 982 983 if (fCanOvercommit && LookupPage(offset) == NULL && !HasPage(offset)) { 984 if (fPrecommittedPages == 0) { 985 // never commit more than needed 986 if (committed_size / B_PAGE_SIZE > page_count) 987 return B_BAD_HANDLER; 988 989 // try to commit additional swap space/memory 990 if (swap_space_reserve(B_PAGE_SIZE) == B_PAGE_SIZE) { 991 fCommittedSwapSize += B_PAGE_SIZE; 992 } else { 993 int priority = aspace == VMAddressSpace::Kernel() 994 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 995 if (vm_try_reserve_memory(B_PAGE_SIZE, priority, 0) != B_OK) { 996 dprintf("%p->VMAnonymousCache::Fault(): Failed to reserve " 997 "%d bytes of RAM.\n", this, (int)B_PAGE_SIZE); 998 return B_NO_MEMORY; 999 } 1000 } 1001 1002 committed_size += B_PAGE_SIZE; 1003 } else 1004 fPrecommittedPages--; 1005 } 1006 1007 // This will cause vm_soft_fault() to handle the fault 1008 return B_BAD_HANDLER; 1009 } 1010 1011 1012 void 1013 VMAnonymousCache::Merge(VMCache* _source) 1014 { 1015 VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source); 1016 if (source == NULL) { 1017 panic("VMAnonymousCache::Merge(): merge with incompatible cache " 1018 "%p requested", _source); 1019 return; 1020 } 1021 1022 // take over the source' committed size 1023 fCommittedSwapSize += source->fCommittedSwapSize; 1024 source->fCommittedSwapSize = 0; 1025 committed_size += source->committed_size; 1026 source->committed_size = 0; 1027 1028 off_t actualSize = virtual_end - virtual_base; 1029 if (committed_size > actualSize) 1030 _Commit(actualSize, VM_PRIORITY_USER); 1031 1032 // Move all not shadowed swap pages from the source to the consumer cache. 1033 // Also remove all source pages that are shadowed by consumer swap pages. 1034 _MergeSwapPages(source); 1035 1036 // Move all not shadowed pages from the source to the consumer cache. 1037 if (source->page_count < page_count) 1038 _MergePagesSmallerSource(source); 1039 else 1040 _MergePagesSmallerConsumer(source); 1041 } 1042 1043 1044 void 1045 VMAnonymousCache::DeleteObject() 1046 { 1047 object_cache_delete(gAnonymousCacheObjectCache, this); 1048 } 1049 1050 1051 void 1052 VMAnonymousCache::_SwapBlockBuild(off_t startPageIndex, 1053 swap_addr_t startSlotIndex, uint32 count) 1054 { 1055 WriteLocker locker(sSwapHashLock); 1056 1057 uint32 left = count; 1058 for (uint32 i = 0, j = 0; i < count; i += j) { 1059 off_t pageIndex = startPageIndex + i; 1060 swap_addr_t slotIndex = startSlotIndex + i; 1061 1062 swap_hash_key key = { this, pageIndex }; 1063 1064 swap_block* swap = sSwapHashTable.Lookup(key); 1065 while (swap == NULL) { 1066 swap = (swap_block*)object_cache_alloc(sSwapBlockCache, 1067 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1068 if (swap == NULL) { 1069 // Wait a short time until memory is available again. 1070 locker.Unlock(); 1071 snooze(10000); 1072 locker.Lock(); 1073 swap = sSwapHashTable.Lookup(key); 1074 continue; 1075 } 1076 1077 swap->key.cache = this; 1078 swap->key.page_index = pageIndex & ~(off_t)SWAP_BLOCK_MASK; 1079 swap->used = 0; 1080 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) 1081 swap->swap_slots[i] = SWAP_SLOT_NONE; 1082 1083 sSwapHashTable.InsertUnchecked(swap); 1084 } 1085 1086 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 1087 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 1088 swap->swap_slots[blockIndex++] = slotIndex + j; 1089 left--; 1090 } 1091 1092 swap->used += j; 1093 } 1094 } 1095 1096 1097 void 1098 VMAnonymousCache::_SwapBlockFree(off_t startPageIndex, uint32 count) 1099 { 1100 WriteLocker locker(sSwapHashLock); 1101 1102 uint32 left = count; 1103 for (uint32 i = 0, j = 0; i < count; i += j) { 1104 off_t pageIndex = startPageIndex + i; 1105 swap_hash_key key = { this, pageIndex }; 1106 swap_block* swap = sSwapHashTable.Lookup(key); 1107 1108 ASSERT(swap != NULL); 1109 1110 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 1111 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 1112 swap->swap_slots[blockIndex++] = SWAP_SLOT_NONE; 1113 left--; 1114 } 1115 1116 swap->used -= j; 1117 if (swap->used == 0) { 1118 sSwapHashTable.RemoveUnchecked(swap); 1119 object_cache_free(sSwapBlockCache, swap, 1120 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1121 } 1122 } 1123 } 1124 1125 1126 swap_addr_t 1127 VMAnonymousCache::_SwapBlockGetAddress(off_t pageIndex) 1128 { 1129 ReadLocker locker(sSwapHashLock); 1130 1131 swap_hash_key key = { this, pageIndex }; 1132 swap_block* swap = sSwapHashTable.Lookup(key); 1133 swap_addr_t slotIndex = SWAP_SLOT_NONE; 1134 1135 if (swap != NULL) { 1136 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 1137 slotIndex = swap->swap_slots[blockIndex]; 1138 } 1139 1140 return slotIndex; 1141 } 1142 1143 1144 status_t 1145 VMAnonymousCache::_Commit(off_t size, int priority) 1146 { 1147 TRACE("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "), already committed: " 1148 "%" B_PRIdOFF " (%" B_PRIdOFF " swap)\n", this, size, committed_size, 1149 fCommittedSwapSize); 1150 1151 // Basic strategy: reserve swap space first, only when running out of swap 1152 // space, reserve real memory. 1153 1154 off_t committedMemory = committed_size - fCommittedSwapSize; 1155 1156 // Regardless of whether we're asked to grow or shrink the commitment, 1157 // we always try to reserve as much as possible of the final commitment 1158 // in the swap space. 1159 if (size > fCommittedSwapSize) { 1160 fCommittedSwapSize += swap_space_reserve(size - fCommittedSwapSize); 1161 committed_size = fCommittedSwapSize + committedMemory; 1162 if (size > fCommittedSwapSize) { 1163 TRACE("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "), reserved " 1164 "only %" B_PRIdOFF " swap\n", this, size, fCommittedSwapSize); 1165 } 1166 } 1167 1168 if (committed_size == size) 1169 return B_OK; 1170 1171 if (committed_size > size) { 1172 // The commitment shrinks -- unreserve real memory first. 1173 off_t toUnreserve = committed_size - size; 1174 if (committedMemory > 0) { 1175 off_t unreserved = min_c(toUnreserve, committedMemory); 1176 vm_unreserve_memory(unreserved); 1177 committedMemory -= unreserved; 1178 committed_size -= unreserved; 1179 toUnreserve -= unreserved; 1180 } 1181 1182 // Unreserve swap space. 1183 if (toUnreserve > 0) { 1184 swap_space_unreserve(toUnreserve); 1185 fCommittedSwapSize -= toUnreserve; 1186 committed_size -= toUnreserve; 1187 } 1188 1189 return B_OK; 1190 } 1191 1192 // The commitment grows -- we have already tried to reserve swap space at 1193 // the start of the method, so we try to reserve real memory, now. 1194 1195 off_t toReserve = size - committed_size; 1196 if (vm_try_reserve_memory(toReserve, priority, 1000000) != B_OK) { 1197 dprintf("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "): Failed to " 1198 "reserve %" B_PRIdOFF " bytes of RAM\n", this, size, toReserve); 1199 return B_NO_MEMORY; 1200 } 1201 1202 committed_size = size; 1203 return B_OK; 1204 } 1205 1206 1207 void 1208 VMAnonymousCache::_MergePagesSmallerSource(VMAnonymousCache* source) 1209 { 1210 // The source cache has less pages than the consumer (this cache), so we 1211 // iterate through the source's pages and move the ones that are not 1212 // shadowed up to the consumer. 1213 1214 for (VMCachePagesTree::Iterator it = source->pages.GetIterator(); 1215 vm_page* page = it.Next();) { 1216 // Note: Removing the current node while iterating through a 1217 // IteratableSplayTree is safe. 1218 vm_page* consumerPage = LookupPage( 1219 (off_t)page->cache_offset << PAGE_SHIFT); 1220 if (consumerPage == NULL) { 1221 // the page is not yet in the consumer cache - move it upwards 1222 ASSERT_PRINT(!page->busy, "page: %p", page); 1223 MovePage(page); 1224 } 1225 } 1226 } 1227 1228 1229 void 1230 VMAnonymousCache::_MergePagesSmallerConsumer(VMAnonymousCache* source) 1231 { 1232 // The consumer (this cache) has less pages than the source, so we move the 1233 // consumer's pages to the source (freeing shadowed ones) and finally just 1234 // all pages of the source back to the consumer. 1235 1236 for (VMCachePagesTree::Iterator it = pages.GetIterator(); 1237 vm_page* page = it.Next();) { 1238 // If a source page is in the way, remove and free it. 1239 vm_page* sourcePage = source->LookupPage( 1240 (off_t)page->cache_offset << PAGE_SHIFT); 1241 if (sourcePage != NULL) { 1242 DEBUG_PAGE_ACCESS_START(sourcePage); 1243 ASSERT_PRINT(!sourcePage->busy, "page: %p", sourcePage); 1244 ASSERT_PRINT(sourcePage->WiredCount() == 0 1245 && sourcePage->mappings.IsEmpty(), 1246 "sourcePage: %p, page: %p", sourcePage, page); 1247 source->RemovePage(sourcePage); 1248 vm_page_free(source, sourcePage); 1249 } 1250 1251 // Note: Removing the current node while iterating through a 1252 // IteratableSplayTree is safe. 1253 source->MovePage(page); 1254 } 1255 1256 MoveAllPages(source); 1257 } 1258 1259 1260 void 1261 VMAnonymousCache::_MergeSwapPages(VMAnonymousCache* source) 1262 { 1263 // If neither source nor consumer have swap pages, we don't have to do 1264 // anything. 1265 if (source->fAllocatedSwapSize == 0 && fAllocatedSwapSize == 0) 1266 return; 1267 1268 for (off_t offset = source->virtual_base 1269 & ~(off_t)(B_PAGE_SIZE * SWAP_BLOCK_PAGES - 1); 1270 offset < source->virtual_end; 1271 offset += B_PAGE_SIZE * SWAP_BLOCK_PAGES) { 1272 1273 WriteLocker locker(sSwapHashLock); 1274 1275 off_t swapBlockPageIndex = offset >> PAGE_SHIFT; 1276 swap_hash_key key = { source, swapBlockPageIndex }; 1277 swap_block* sourceSwapBlock = sSwapHashTable.Lookup(key); 1278 1279 // remove the source swap block -- we will either take over the swap 1280 // space (and the block) or free it 1281 if (sourceSwapBlock != NULL) 1282 sSwapHashTable.RemoveUnchecked(sourceSwapBlock); 1283 1284 key.cache = this; 1285 swap_block* swapBlock = sSwapHashTable.Lookup(key); 1286 1287 locker.Unlock(); 1288 1289 // remove all source pages that are shadowed by consumer swap pages 1290 if (swapBlock != NULL) { 1291 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1292 if (swapBlock->swap_slots[i] != SWAP_SLOT_NONE) { 1293 vm_page* page = source->LookupPage( 1294 (off_t)(swapBlockPageIndex + i) << PAGE_SHIFT); 1295 if (page != NULL) { 1296 DEBUG_PAGE_ACCESS_START(page); 1297 ASSERT_PRINT(!page->busy, "page: %p", page); 1298 source->RemovePage(page); 1299 vm_page_free(source, page); 1300 } 1301 } 1302 } 1303 } 1304 1305 if (sourceSwapBlock == NULL) 1306 continue; 1307 1308 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1309 off_t pageIndex = swapBlockPageIndex + i; 1310 swap_addr_t sourceSlotIndex = sourceSwapBlock->swap_slots[i]; 1311 1312 if (sourceSlotIndex == SWAP_SLOT_NONE) 1313 continue; 1314 1315 if ((swapBlock != NULL 1316 && swapBlock->swap_slots[i] != SWAP_SLOT_NONE) 1317 || LookupPage((off_t)pageIndex << PAGE_SHIFT) != NULL) { 1318 // The consumer already has a page or a swapped out page 1319 // at this index. So we can free the source swap space. 1320 swap_slot_dealloc(sourceSlotIndex, 1); 1321 sourceSwapBlock->swap_slots[i] = SWAP_SLOT_NONE; 1322 sourceSwapBlock->used--; 1323 } 1324 1325 // We've either freed the source swap page or are going to move it 1326 // to the consumer. At any rate, the source cache doesn't own it 1327 // anymore. 1328 source->fAllocatedSwapSize -= B_PAGE_SIZE; 1329 } 1330 1331 // All source swap pages that have not been freed yet are taken over by 1332 // the consumer. 1333 fAllocatedSwapSize += B_PAGE_SIZE * (off_t)sourceSwapBlock->used; 1334 1335 if (sourceSwapBlock->used == 0) { 1336 // All swap pages have been freed -- we can discard the source swap 1337 // block. 1338 object_cache_free(sSwapBlockCache, sourceSwapBlock, 1339 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1340 } else if (swapBlock == NULL) { 1341 // We need to take over some of the source's swap pages and there's 1342 // no swap block in the consumer cache. Just take over the source 1343 // swap block. 1344 sourceSwapBlock->key.cache = this; 1345 locker.Lock(); 1346 sSwapHashTable.InsertUnchecked(sourceSwapBlock); 1347 locker.Unlock(); 1348 } else { 1349 // We need to take over some of the source's swap pages and there's 1350 // already a swap block in the consumer cache. Copy the respective 1351 // swap addresses and discard the source swap block. 1352 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1353 if (sourceSwapBlock->swap_slots[i] != SWAP_SLOT_NONE) 1354 swapBlock->swap_slots[i] = sourceSwapBlock->swap_slots[i]; 1355 } 1356 1357 object_cache_free(sSwapBlockCache, sourceSwapBlock, 1358 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1359 } 1360 } 1361 } 1362 1363 1364 // #pragma mark - 1365 1366 1367 // TODO: This can be removed if we get BFS uuid's 1368 struct VolumeInfo { 1369 char name[B_FILE_NAME_LENGTH]; 1370 char device[B_FILE_NAME_LENGTH]; 1371 char filesystem[B_OS_NAME_LENGTH]; 1372 off_t capacity; 1373 }; 1374 1375 1376 class PartitionScorer : public KPartitionVisitor { 1377 public: 1378 PartitionScorer(VolumeInfo& volumeInfo) 1379 : 1380 fBestPartition(NULL), 1381 fBestScore(-1), 1382 fVolumeInfo(volumeInfo) 1383 { 1384 } 1385 1386 virtual bool VisitPre(KPartition* partition) 1387 { 1388 if (!partition->ContainsFileSystem()) 1389 return false; 1390 1391 KPath path; 1392 partition->GetPath(&path); 1393 1394 int score = 0; 1395 if (strcmp(fVolumeInfo.name, partition->ContentName()) == 0) 1396 score += 4; 1397 if (strcmp(fVolumeInfo.device, path.Path()) == 0) 1398 score += 3; 1399 if (fVolumeInfo.capacity == partition->Size()) 1400 score += 2; 1401 if (strcmp(fVolumeInfo.filesystem, 1402 partition->DiskSystem()->ShortName()) == 0) { 1403 score += 1; 1404 } 1405 if (score >= 4 && score > fBestScore) { 1406 fBestPartition = partition; 1407 fBestScore = score; 1408 } 1409 1410 return false; 1411 } 1412 1413 KPartition* fBestPartition; 1414 1415 private: 1416 int32 fBestScore; 1417 VolumeInfo& fVolumeInfo; 1418 }; 1419 1420 1421 status_t 1422 swap_file_add(const char* path) 1423 { 1424 // open the file 1425 int fd = open(path, O_RDWR | O_NOCACHE, S_IRUSR | S_IWUSR); 1426 if (fd < 0) 1427 return errno; 1428 1429 // fstat() it and check whether we can use it 1430 struct stat st; 1431 if (fstat(fd, &st) < 0) { 1432 close(fd); 1433 return errno; 1434 } 1435 1436 if (!(S_ISREG(st.st_mode) || S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) { 1437 close(fd); 1438 return B_BAD_VALUE; 1439 } 1440 1441 if (st.st_size < B_PAGE_SIZE) { 1442 close(fd); 1443 return B_BAD_VALUE; 1444 } 1445 1446 // get file descriptor, vnode, and cookie 1447 file_descriptor* descriptor = get_fd(get_current_io_context(true), fd); 1448 put_fd(descriptor); 1449 1450 vnode* node = fd_vnode(descriptor); 1451 if (node == NULL) { 1452 close(fd); 1453 return B_BAD_VALUE; 1454 } 1455 1456 // do the allocations and prepare the swap_file structure 1457 swap_file* swap = new(std::nothrow) swap_file; 1458 if (swap == NULL) { 1459 close(fd); 1460 return B_NO_MEMORY; 1461 } 1462 1463 swap->fd = fd; 1464 swap->vnode = node; 1465 swap->cookie = descriptor->cookie; 1466 1467 uint32 pageCount = st.st_size >> PAGE_SHIFT; 1468 swap->bmp = radix_bitmap_create(pageCount); 1469 if (swap->bmp == NULL) { 1470 delete swap; 1471 close(fd); 1472 return B_NO_MEMORY; 1473 } 1474 1475 // set slot index and add this file to swap file list 1476 mutex_lock(&sSwapFileListLock); 1477 // TODO: Also check whether the swap file is already registered! 1478 if (sSwapFileList.IsEmpty()) { 1479 swap->first_slot = 0; 1480 swap->last_slot = pageCount; 1481 } else { 1482 // leave one page gap between two swap files 1483 swap->first_slot = sSwapFileList.Last()->last_slot + 1; 1484 swap->last_slot = swap->first_slot + pageCount; 1485 } 1486 sSwapFileList.Add(swap); 1487 sSwapFileCount++; 1488 mutex_unlock(&sSwapFileListLock); 1489 1490 mutex_lock(&sAvailSwapSpaceLock); 1491 sAvailSwapSpace += (off_t)pageCount * B_PAGE_SIZE; 1492 mutex_unlock(&sAvailSwapSpaceLock); 1493 1494 return B_OK; 1495 } 1496 1497 1498 status_t 1499 swap_file_delete(const char* path) 1500 { 1501 vnode* node = NULL; 1502 status_t status = vfs_get_vnode_from_path(path, true, &node); 1503 if (status != B_OK) 1504 return status; 1505 1506 MutexLocker locker(sSwapFileListLock); 1507 1508 swap_file* swapFile = NULL; 1509 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1510 (swapFile = it.Next()) != NULL;) { 1511 if (swapFile->vnode == node) 1512 break; 1513 } 1514 1515 vfs_put_vnode(node); 1516 1517 if (swapFile == NULL) 1518 return B_ERROR; 1519 1520 // if this file is currently used, we can't delete 1521 // TODO: mark this swap file deleting, and remove it after releasing 1522 // all the swap space 1523 if (swapFile->bmp->free_slots < swapFile->last_slot - swapFile->first_slot) 1524 return B_ERROR; 1525 1526 sSwapFileList.Remove(swapFile); 1527 sSwapFileCount--; 1528 locker.Unlock(); 1529 1530 mutex_lock(&sAvailSwapSpaceLock); 1531 sAvailSwapSpace -= (off_t)(swapFile->last_slot - swapFile->first_slot) 1532 * B_PAGE_SIZE; 1533 mutex_unlock(&sAvailSwapSpaceLock); 1534 1535 close(swapFile->fd); 1536 radix_bitmap_destroy(swapFile->bmp); 1537 delete swapFile; 1538 1539 return B_OK; 1540 } 1541 1542 1543 void 1544 swap_init(void) 1545 { 1546 // create swap block cache 1547 sSwapBlockCache = create_object_cache("swapblock", sizeof(swap_block), 1548 sizeof(void*), NULL, NULL, NULL); 1549 if (sSwapBlockCache == NULL) 1550 panic("swap_init(): can't create object cache for swap blocks\n"); 1551 1552 status_t error = object_cache_set_minimum_reserve(sSwapBlockCache, 1553 MIN_SWAP_BLOCK_RESERVE); 1554 if (error != B_OK) { 1555 panic("swap_init(): object_cache_set_minimum_reserve() failed: %s", 1556 strerror(error)); 1557 } 1558 1559 // init swap hash table 1560 sSwapHashTable.Init(INITIAL_SWAP_HASH_SIZE); 1561 rw_lock_init(&sSwapHashLock, "swaphash"); 1562 1563 error = register_resource_resizer(swap_hash_resizer, NULL, 1564 SWAP_HASH_RESIZE_INTERVAL); 1565 if (error != B_OK) { 1566 panic("swap_init(): Failed to register swap hash resizer: %s", 1567 strerror(error)); 1568 } 1569 1570 // init swap file list 1571 mutex_init(&sSwapFileListLock, "swaplist"); 1572 sSwapFileAlloc = NULL; 1573 sSwapFileCount = 0; 1574 1575 // init available swap space 1576 mutex_init(&sAvailSwapSpaceLock, "avail swap space"); 1577 sAvailSwapSpace = 0; 1578 1579 add_debugger_command_etc("swap", &dump_swap_info, 1580 "Print infos about the swap usage", 1581 "\n" 1582 "Print infos about the swap usage.\n", 0); 1583 } 1584 1585 1586 void 1587 swap_init_post_modules() 1588 { 1589 // Never try to create a swap file on a read-only device - when booting 1590 // from CD, the write overlay is used. 1591 if (gReadOnlyBootDevice) 1592 return; 1593 1594 bool swapEnabled = true; 1595 bool swapAutomatic = true; 1596 off_t swapSize = 0; 1597 1598 dev_t swapDeviceID = -1; 1599 VolumeInfo selectedVolume = {}; 1600 1601 void* settings = load_driver_settings("virtual_memory"); 1602 1603 if (settings != NULL) { 1604 // We pass a lot of information on the swap device, this is mostly to 1605 // ensure that we are dealing with the same device that was configured. 1606 1607 // TODO: Some kind of BFS uuid would be great here :) 1608 const char* enabled = get_driver_parameter(settings, "vm", NULL, NULL); 1609 1610 if (enabled != NULL) { 1611 swapEnabled = get_driver_boolean_parameter(settings, "vm", 1612 true, false); 1613 swapAutomatic = get_driver_boolean_parameter(settings, "swap_auto", 1614 true, false); 1615 1616 if (swapEnabled && !swapAutomatic) { 1617 const char* size = get_driver_parameter(settings, "swap_size", 1618 NULL, NULL); 1619 const char* volume = get_driver_parameter(settings, 1620 "swap_volume_name", NULL, NULL); 1621 const char* device = get_driver_parameter(settings, 1622 "swap_volume_device", NULL, NULL); 1623 const char* filesystem = get_driver_parameter(settings, 1624 "swap_volume_filesystem", NULL, NULL); 1625 const char* capacity = get_driver_parameter(settings, 1626 "swap_volume_capacity", NULL, NULL); 1627 1628 if (size != NULL && device != NULL && volume != NULL 1629 && filesystem != NULL && capacity != NULL) { 1630 // User specified a size / volume that seems valid 1631 swapAutomatic = false; 1632 swapSize = atoll(size); 1633 strlcpy(selectedVolume.name, volume, 1634 sizeof(selectedVolume.name)); 1635 strlcpy(selectedVolume.device, device, 1636 sizeof(selectedVolume.device)); 1637 strlcpy(selectedVolume.filesystem, filesystem, 1638 sizeof(selectedVolume.filesystem)); 1639 selectedVolume.capacity = atoll(capacity); 1640 } else { 1641 // Something isn't right with swap config, go auto 1642 swapAutomatic = true; 1643 dprintf("%s: virtual_memory configuration is invalid, " 1644 "using automatic swap\n", __func__); 1645 } 1646 } 1647 } 1648 unload_driver_settings(settings); 1649 } 1650 1651 if (swapAutomatic) { 1652 swapSize = (off_t)vm_page_num_pages() * B_PAGE_SIZE; 1653 if (swapSize <= (1024 * 1024 * 1024)) { 1654 // Memory under 1GB? double the swap 1655 swapSize *= 2; 1656 } 1657 // Automatic swap defaults to the boot device 1658 swapDeviceID = gBootDevice; 1659 } 1660 1661 if (!swapEnabled || swapSize < B_PAGE_SIZE) { 1662 dprintf("%s: virtual_memory is disabled\n", __func__); 1663 return; 1664 } 1665 1666 if (!swapAutomatic && swapDeviceID < 0) { 1667 // If user-specified swap, and no swap device has been chosen yet... 1668 KDiskDeviceManager::CreateDefault(); 1669 KDiskDeviceManager* manager = KDiskDeviceManager::Default(); 1670 PartitionScorer visitor(selectedVolume); 1671 1672 KDiskDevice* device; 1673 int32 cookie = 0; 1674 while ((device = manager->NextDevice(&cookie)) != NULL) { 1675 if (device->IsReadOnlyMedia() || device->IsWriteOnce() 1676 || device->IsRemovable()) { 1677 continue; 1678 } 1679 device->VisitEachDescendant(&visitor); 1680 } 1681 1682 if (!visitor.fBestPartition) { 1683 dprintf("%s: Can't find configured swap partition '%s'\n", 1684 __func__, selectedVolume.name); 1685 } else { 1686 if (visitor.fBestPartition->IsMounted()) 1687 swapDeviceID = visitor.fBestPartition->VolumeID(); 1688 else { 1689 KPath devPath, mountPoint; 1690 visitor.fBestPartition->GetPath(&devPath); 1691 get_mount_point(visitor.fBestPartition, &mountPoint); 1692 const char* mountPath = mountPoint.Path(); 1693 mkdir(mountPath, S_IRWXU | S_IRWXG | S_IRWXO); 1694 swapDeviceID = _kern_mount(mountPath, devPath.Path(), 1695 NULL, 0, NULL, 0); 1696 if (swapDeviceID < 0) { 1697 dprintf("%s: Can't mount configured swap partition '%s'\n", 1698 __func__, selectedVolume.name); 1699 } 1700 } 1701 } 1702 } 1703 1704 if (swapDeviceID < 0) 1705 swapDeviceID = gBootDevice; 1706 1707 // We now have a swapDeviceID which is used for the swap file 1708 1709 KPath path; 1710 struct fs_info info; 1711 _kern_read_fs_info(swapDeviceID, &info); 1712 if (swapDeviceID == gBootDevice) 1713 path = kDefaultSwapPath; 1714 else { 1715 vfs_entry_ref_to_path(info.dev, info.root, ".", true, path.LockBuffer(), 1716 path.BufferSize()); 1717 path.UnlockBuffer(); 1718 path.Append("swap"); 1719 } 1720 1721 const char* swapPath = path.Path(); 1722 1723 // Swap size limits prevent oversized swap files 1724 if (swapAutomatic) { 1725 off_t existingSwapSize = 0; 1726 struct stat existingSwapStat; 1727 if (stat(swapPath, &existingSwapStat) == 0) 1728 existingSwapSize = existingSwapStat.st_size; 1729 1730 off_t freeSpace = info.free_blocks * info.block_size + existingSwapSize; 1731 1732 // Adjust automatic swap to a maximum of 25% of the free space 1733 if (swapSize > (freeSpace / 4)) 1734 swapSize = (freeSpace / 4); 1735 } 1736 1737 // Create swap file 1738 int fd = open(swapPath, O_RDWR | O_CREAT | O_NOCACHE, S_IRUSR | S_IWUSR); 1739 if (fd < 0) { 1740 dprintf("%s: Can't open/create %s: %s\n", __func__, 1741 swapPath, strerror(errno)); 1742 return; 1743 } 1744 1745 struct stat stat; 1746 stat.st_size = swapSize; 1747 status_t error = _kern_write_stat(fd, NULL, false, &stat, 1748 sizeof(struct stat), B_STAT_SIZE | B_STAT_SIZE_INSECURE); 1749 if (error != B_OK) { 1750 dprintf("%s: Failed to resize %s to %" B_PRIdOFF " bytes: %s\n", 1751 __func__, swapPath, swapSize, strerror(error)); 1752 } 1753 1754 close(fd); 1755 1756 error = swap_file_add(swapPath); 1757 if (error != B_OK) { 1758 dprintf("%s: Failed to add swap file %s: %s\n", __func__, swapPath, 1759 strerror(error)); 1760 } 1761 } 1762 1763 1764 //! Used by page daemon to free swap space. 1765 bool 1766 swap_free_page_swap_space(vm_page* page) 1767 { 1768 VMAnonymousCache* cache = dynamic_cast<VMAnonymousCache*>(page->Cache()); 1769 if (cache == NULL) 1770 return false; 1771 1772 swap_addr_t slotIndex = cache->_SwapBlockGetAddress(page->cache_offset); 1773 if (slotIndex == SWAP_SLOT_NONE) 1774 return false; 1775 1776 swap_slot_dealloc(slotIndex, 1); 1777 cache->fAllocatedSwapSize -= B_PAGE_SIZE; 1778 cache->_SwapBlockFree(page->cache_offset, 1); 1779 1780 return true; 1781 } 1782 1783 1784 uint32 1785 swap_available_pages() 1786 { 1787 mutex_lock(&sAvailSwapSpaceLock); 1788 uint32 avail = sAvailSwapSpace >> PAGE_SHIFT; 1789 mutex_unlock(&sAvailSwapSpaceLock); 1790 1791 return avail; 1792 } 1793 1794 1795 uint32 1796 swap_total_swap_pages() 1797 { 1798 mutex_lock(&sSwapFileListLock); 1799 1800 uint32 totalSwapSlots = 0; 1801 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1802 swap_file* swapFile = it.Next();) { 1803 totalSwapSlots += swapFile->last_slot - swapFile->first_slot; 1804 } 1805 1806 mutex_unlock(&sSwapFileListLock); 1807 1808 return totalSwapSlots; 1809 } 1810 1811 1812 #endif // ENABLE_SWAP_SUPPORT 1813 1814 1815 void 1816 swap_get_info(system_info* info) 1817 { 1818 #if ENABLE_SWAP_SUPPORT 1819 MutexLocker locker(sSwapFileListLock); 1820 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1821 swap_file* swapFile = it.Next();) { 1822 info->max_swap_pages += swapFile->last_slot - swapFile->first_slot; 1823 info->free_swap_pages += swapFile->bmp->free_slots; 1824 } 1825 #else 1826 info->max_swap_pages = 0; 1827 info->free_swap_pages = 0; 1828 #endif 1829 } 1830 1831