1 /* 2 * Copyright 2008, Zhao Shuai, upczhsh@163.com. 3 * Copyright 2008-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 4 * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de. 5 * Distributed under the terms of the MIT License. 6 * 7 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 8 * Distributed under the terms of the NewOS License. 9 * 10 * Copyright 2011-2012 Haiku, Inc. All rights reserved. 11 * Distributed under the terms of the MIT License. 12 * 13 * Authors: 14 * Hamish Morrison, hamish@lavabit.com 15 * Alexander von Gluck IV, kallisti5@unixzen.com 16 */ 17 18 19 #include "VMAnonymousCache.h" 20 21 #include <errno.h> 22 #include <fcntl.h> 23 #include <stdlib.h> 24 #include <string.h> 25 #include <unistd.h> 26 27 #include <FindDirectory.h> 28 #include <KernelExport.h> 29 #include <NodeMonitor.h> 30 31 #include <arch_config.h> 32 #include <boot_device.h> 33 #include <disk_device_manager/KDiskDevice.h> 34 #include <disk_device_manager/KDiskDeviceManager.h> 35 #include <disk_device_manager/KDiskSystem.h> 36 #include <disk_device_manager/KPartitionVisitor.h> 37 #include <driver_settings.h> 38 #include <fs/fd.h> 39 #include <fs/KPath.h> 40 #include <fs_info.h> 41 #include <fs_interface.h> 42 #include <heap.h> 43 #include <kernel_daemon.h> 44 #include <slab/Slab.h> 45 #include <syscalls.h> 46 #include <system_info.h> 47 #include <tracing.h> 48 #include <util/AutoLock.h> 49 #include <util/DoublyLinkedList.h> 50 #include <util/OpenHashTable.h> 51 #include <util/RadixBitmap.h> 52 #include <vfs.h> 53 #include <vm/vm.h> 54 #include <vm/vm_page.h> 55 #include <vm/vm_priv.h> 56 #include <vm/VMAddressSpace.h> 57 58 #include "IORequest.h" 59 #include "VMUtils.h" 60 61 62 #if ENABLE_SWAP_SUPPORT 63 64 //#define TRACE_VM_ANONYMOUS_CACHE 65 #ifdef TRACE_VM_ANONYMOUS_CACHE 66 # define TRACE(x...) dprintf(x) 67 #else 68 # define TRACE(x...) do { } while (false) 69 #endif 70 71 72 // number of free swap blocks the object cache shall minimally have 73 #define MIN_SWAP_BLOCK_RESERVE 4096 74 75 // interval the has resizer is triggered (in 0.1s) 76 #define SWAP_HASH_RESIZE_INTERVAL 5 77 78 #define INITIAL_SWAP_HASH_SIZE 1024 79 80 #define SWAP_SLOT_NONE RADIX_SLOT_NONE 81 82 #define SWAP_BLOCK_PAGES 32 83 #define SWAP_BLOCK_SHIFT 5 /* 1 << SWAP_BLOCK_SHIFT == SWAP_BLOCK_PAGES */ 84 #define SWAP_BLOCK_MASK (SWAP_BLOCK_PAGES - 1) 85 86 87 static const char* const kDefaultSwapPath = "/var/swap"; 88 89 struct swap_file : DoublyLinkedListLinkImpl<swap_file> { 90 int fd; 91 struct vnode* vnode; 92 void* cookie; 93 swap_addr_t first_slot; 94 swap_addr_t last_slot; 95 radix_bitmap* bmp; 96 }; 97 98 struct swap_hash_key { 99 VMAnonymousCache *cache; 100 off_t page_index; // page index in the cache 101 }; 102 103 // Each swap block contains swap address information for 104 // SWAP_BLOCK_PAGES continuous pages from the same cache 105 struct swap_block { 106 swap_block* hash_link; 107 swap_hash_key key; 108 uint32 used; 109 swap_addr_t swap_slots[SWAP_BLOCK_PAGES]; 110 }; 111 112 struct SwapHashTableDefinition { 113 typedef swap_hash_key KeyType; 114 typedef swap_block ValueType; 115 116 SwapHashTableDefinition() {} 117 118 size_t HashKey(const swap_hash_key& key) const 119 { 120 off_t blockIndex = key.page_index >> SWAP_BLOCK_SHIFT; 121 VMAnonymousCache* cache = key.cache; 122 return blockIndex ^ (size_t)(int*)cache; 123 } 124 125 size_t Hash(const swap_block* value) const 126 { 127 return HashKey(value->key); 128 } 129 130 bool Compare(const swap_hash_key& key, const swap_block* value) const 131 { 132 return (key.page_index & ~(off_t)SWAP_BLOCK_MASK) 133 == (value->key.page_index & ~(off_t)SWAP_BLOCK_MASK) 134 && key.cache == value->key.cache; 135 } 136 137 swap_block*& GetLink(swap_block* value) const 138 { 139 return value->hash_link; 140 } 141 }; 142 143 typedef BOpenHashTable<SwapHashTableDefinition> SwapHashTable; 144 typedef DoublyLinkedList<swap_file> SwapFileList; 145 146 static SwapHashTable sSwapHashTable; 147 static rw_lock sSwapHashLock; 148 149 static SwapFileList sSwapFileList; 150 static mutex sSwapFileListLock; 151 static swap_file* sSwapFileAlloc = NULL; // allocate from here 152 static uint32 sSwapFileCount = 0; 153 154 static off_t sAvailSwapSpace = 0; 155 static mutex sAvailSwapSpaceLock; 156 157 static object_cache* sSwapBlockCache; 158 159 160 #if SWAP_TRACING 161 namespace SwapTracing { 162 163 class SwapTraceEntry : public AbstractTraceEntry { 164 public: 165 SwapTraceEntry(VMAnonymousCache* cache) 166 : 167 fCache(cache) 168 { 169 } 170 171 protected: 172 VMAnonymousCache* fCache; 173 }; 174 175 176 class ReadPage : public SwapTraceEntry { 177 public: 178 ReadPage(VMAnonymousCache* cache, page_num_t pageIndex, 179 swap_addr_t swapSlotIndex) 180 : 181 SwapTraceEntry(cache), 182 fPageIndex(pageIndex), 183 fSwapSlotIndex(swapSlotIndex) 184 { 185 Initialized(); 186 } 187 188 virtual void AddDump(TraceOutput& out) 189 { 190 out.Print("swap read: cache %p, page index: %lu <- swap slot: %lu", 191 fCache, fPageIndex, fSwapSlotIndex); 192 } 193 194 private: 195 page_num_t fPageIndex; 196 swap_addr_t fSwapSlotIndex; 197 }; 198 199 200 class WritePage : public SwapTraceEntry { 201 public: 202 WritePage(VMAnonymousCache* cache, page_num_t pageIndex, 203 swap_addr_t swapSlotIndex) 204 : 205 SwapTraceEntry(cache), 206 fPageIndex(pageIndex), 207 fSwapSlotIndex(swapSlotIndex) 208 { 209 Initialized(); 210 } 211 212 virtual void AddDump(TraceOutput& out) 213 { 214 out.Print("swap write: cache %p, page index: %lu -> swap slot: %lu", 215 fCache, fPageIndex, fSwapSlotIndex); 216 } 217 218 private: 219 page_num_t fPageIndex; 220 swap_addr_t fSwapSlotIndex; 221 }; 222 223 } // namespace SwapTracing 224 225 # define T(x) new(std::nothrow) SwapTracing::x; 226 #else 227 # define T(x) ; 228 #endif 229 230 231 static int 232 dump_swap_info(int argc, char** argv) 233 { 234 swap_addr_t totalSwapPages = 0; 235 swap_addr_t freeSwapPages = 0; 236 237 kprintf("swap files:\n"); 238 239 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 240 swap_file* file = it.Next();) { 241 swap_addr_t total = file->last_slot - file->first_slot; 242 kprintf(" vnode: %p, pages: total: %" B_PRIu32 ", free: %" B_PRIu32 243 "\n", file->vnode, total, file->bmp->free_slots); 244 245 totalSwapPages += total; 246 freeSwapPages += file->bmp->free_slots; 247 } 248 249 kprintf("\n"); 250 kprintf("swap space in pages:\n"); 251 kprintf("total: %9" B_PRIu32 "\n", totalSwapPages); 252 kprintf("available: %9" B_PRIdOFF "\n", sAvailSwapSpace / B_PAGE_SIZE); 253 kprintf("reserved: %9" B_PRIdOFF "\n", 254 totalSwapPages - sAvailSwapSpace / B_PAGE_SIZE); 255 kprintf("used: %9" B_PRIu32 "\n", totalSwapPages - freeSwapPages); 256 kprintf("free: %9" B_PRIu32 "\n", freeSwapPages); 257 258 return 0; 259 } 260 261 262 static swap_addr_t 263 swap_slot_alloc(uint32 count) 264 { 265 mutex_lock(&sSwapFileListLock); 266 267 if (sSwapFileList.IsEmpty()) { 268 mutex_unlock(&sSwapFileListLock); 269 panic("swap_slot_alloc(): no swap file in the system\n"); 270 return SWAP_SLOT_NONE; 271 } 272 273 // since radix bitmap could not handle more than 32 pages, we return 274 // SWAP_SLOT_NONE, this forces Write() adjust allocation amount 275 if (count > BITMAP_RADIX) { 276 mutex_unlock(&sSwapFileListLock); 277 return SWAP_SLOT_NONE; 278 } 279 280 swap_addr_t j, addr = SWAP_SLOT_NONE; 281 for (j = 0; j < sSwapFileCount; j++) { 282 if (sSwapFileAlloc == NULL) 283 sSwapFileAlloc = sSwapFileList.First(); 284 285 addr = radix_bitmap_alloc(sSwapFileAlloc->bmp, count); 286 if (addr != SWAP_SLOT_NONE) { 287 addr += sSwapFileAlloc->first_slot; 288 break; 289 } 290 291 // this swap_file is full, find another 292 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 293 } 294 295 if (j == sSwapFileCount) { 296 mutex_unlock(&sSwapFileListLock); 297 panic("swap_slot_alloc: swap space exhausted!\n"); 298 return SWAP_SLOT_NONE; 299 } 300 301 // if this swap file has used more than 90% percent of its space 302 // switch to another 303 if (sSwapFileAlloc->bmp->free_slots 304 < (sSwapFileAlloc->last_slot - sSwapFileAlloc->first_slot) / 10) { 305 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 306 } 307 308 mutex_unlock(&sSwapFileListLock); 309 310 return addr; 311 } 312 313 314 static swap_file* 315 find_swap_file(swap_addr_t slotIndex) 316 { 317 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 318 swap_file* swapFile = it.Next();) { 319 if (slotIndex >= swapFile->first_slot 320 && slotIndex < swapFile->last_slot) { 321 return swapFile; 322 } 323 } 324 325 panic("find_swap_file(): can't find swap file for slot %" B_PRIu32 "\n", 326 slotIndex); 327 return NULL; 328 } 329 330 331 static void 332 swap_slot_dealloc(swap_addr_t slotIndex, uint32 count) 333 { 334 if (slotIndex == SWAP_SLOT_NONE) 335 return; 336 337 mutex_lock(&sSwapFileListLock); 338 swap_file* swapFile = find_swap_file(slotIndex); 339 slotIndex -= swapFile->first_slot; 340 radix_bitmap_dealloc(swapFile->bmp, slotIndex, count); 341 mutex_unlock(&sSwapFileListLock); 342 } 343 344 345 static off_t 346 swap_space_reserve(off_t amount) 347 { 348 mutex_lock(&sAvailSwapSpaceLock); 349 if (sAvailSwapSpace >= amount) 350 sAvailSwapSpace -= amount; 351 else { 352 amount = sAvailSwapSpace; 353 sAvailSwapSpace = 0; 354 } 355 mutex_unlock(&sAvailSwapSpaceLock); 356 357 return amount; 358 } 359 360 361 static void 362 swap_space_unreserve(off_t amount) 363 { 364 mutex_lock(&sAvailSwapSpaceLock); 365 sAvailSwapSpace += amount; 366 mutex_unlock(&sAvailSwapSpaceLock); 367 } 368 369 370 static void 371 swap_hash_resizer(void*, int) 372 { 373 WriteLocker locker(sSwapHashLock); 374 375 size_t size; 376 void* allocation; 377 378 do { 379 size = sSwapHashTable.ResizeNeeded(); 380 if (size == 0) 381 return; 382 383 locker.Unlock(); 384 385 allocation = malloc(size); 386 if (allocation == NULL) 387 return; 388 389 locker.Lock(); 390 391 } while (!sSwapHashTable.Resize(allocation, size)); 392 } 393 394 395 // #pragma mark - 396 397 398 class VMAnonymousCache::WriteCallback : public StackableAsyncIOCallback { 399 public: 400 WriteCallback(VMAnonymousCache* cache, AsyncIOCallback* callback) 401 : 402 StackableAsyncIOCallback(callback), 403 fCache(cache) 404 { 405 } 406 407 void SetTo(page_num_t pageIndex, swap_addr_t slotIndex, bool newSlot) 408 { 409 fPageIndex = pageIndex; 410 fSlotIndex = slotIndex; 411 fNewSlot = newSlot; 412 } 413 414 virtual void IOFinished(status_t status, bool partialTransfer, 415 generic_size_t bytesTransferred) 416 { 417 if (fNewSlot) { 418 if (status == B_OK) { 419 fCache->_SwapBlockBuild(fPageIndex, fSlotIndex, 1); 420 } else { 421 AutoLocker<VMCache> locker(fCache); 422 fCache->fAllocatedSwapSize -= B_PAGE_SIZE; 423 locker.Unlock(); 424 425 swap_slot_dealloc(fSlotIndex, 1); 426 } 427 } 428 429 fNextCallback->IOFinished(status, partialTransfer, bytesTransferred); 430 431 delete this; 432 } 433 434 private: 435 VMAnonymousCache* fCache; 436 page_num_t fPageIndex; 437 swap_addr_t fSlotIndex; 438 bool fNewSlot; 439 }; 440 441 442 // #pragma mark - 443 444 445 VMAnonymousCache::~VMAnonymousCache() 446 { 447 _FreeSwapPageRange(virtual_base, virtual_end, false); 448 swap_space_unreserve(fCommittedSwapSize); 449 if (committed_size > fCommittedSwapSize) 450 vm_unreserve_memory(committed_size - fCommittedSwapSize); 451 } 452 453 454 status_t 455 VMAnonymousCache::Init(bool canOvercommit, int32 numPrecommittedPages, 456 int32 numGuardPages, uint32 allocationFlags) 457 { 458 TRACE("%p->VMAnonymousCache::Init(canOvercommit = %s, " 459 "numPrecommittedPages = %" B_PRId32 ", numGuardPages = %" B_PRId32 460 ")\n", this, canOvercommit ? "yes" : "no", numPrecommittedPages, 461 numGuardPages); 462 463 status_t error = VMCache::Init(CACHE_TYPE_RAM, allocationFlags); 464 if (error != B_OK) 465 return error; 466 467 fCanOvercommit = canOvercommit; 468 fHasPrecommitted = false; 469 fPrecommittedPages = min_c(numPrecommittedPages, 255); 470 fGuardedSize = numGuardPages * B_PAGE_SIZE; 471 fCommittedSwapSize = 0; 472 fAllocatedSwapSize = 0; 473 474 return B_OK; 475 } 476 477 478 void 479 VMAnonymousCache::_FreeSwapPageRange(off_t fromOffset, off_t toOffset, 480 bool skipBusyPages) 481 { 482 swap_block* swapBlock = NULL; 483 off_t toIndex = toOffset >> PAGE_SHIFT; 484 for (off_t pageIndex = fromOffset >> PAGE_SHIFT; 485 pageIndex < toIndex && fAllocatedSwapSize > 0; pageIndex++) { 486 487 WriteLocker locker(sSwapHashLock); 488 489 // Get the swap slot index for the page. 490 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 491 if (swapBlock == NULL || blockIndex == 0) { 492 swap_hash_key key = { this, pageIndex }; 493 swapBlock = sSwapHashTable.Lookup(key); 494 495 if (swapBlock == NULL) { 496 pageIndex = ROUNDUP(pageIndex + 1, SWAP_BLOCK_PAGES) - 1; 497 continue; 498 } 499 } 500 501 swap_addr_t slotIndex = swapBlock->swap_slots[blockIndex]; 502 if (slotIndex == SWAP_SLOT_NONE) 503 continue; 504 505 if (skipBusyPages) { 506 vm_page* page = LookupPage(pageIndex * B_PAGE_SIZE); 507 if (page != NULL && page->busy) { 508 // TODO: We skip (i.e. leak) swap space of busy pages, since 509 // there could be I/O going on (paging in/out). Waiting is 510 // not an option as 1. unlocking the cache means that new 511 // swap pages could be added in a range we've already 512 // cleared (since the cache still has the old size) and 2. 513 // we'd risk a deadlock in case we come from the file cache 514 // and the FS holds the node's write-lock. We should mark 515 // the page invalid and let the one responsible clean up. 516 // There's just no such mechanism yet. 517 continue; 518 } 519 } 520 521 swap_slot_dealloc(slotIndex, 1); 522 fAllocatedSwapSize -= B_PAGE_SIZE; 523 524 swapBlock->swap_slots[blockIndex] = SWAP_SLOT_NONE; 525 if (--swapBlock->used == 0) { 526 // All swap pages have been freed -- we can discard the swap block. 527 sSwapHashTable.RemoveUnchecked(swapBlock); 528 object_cache_free(sSwapBlockCache, swapBlock, 529 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 530 531 // There are no swap pages for possibly remaining pages, skip to the 532 // next block. 533 pageIndex = ROUNDUP(pageIndex + 1, SWAP_BLOCK_PAGES) - 1; 534 swapBlock = NULL; 535 } 536 } 537 } 538 539 540 status_t 541 VMAnonymousCache::Resize(off_t newSize, int priority) 542 { 543 _FreeSwapPageRange(newSize + B_PAGE_SIZE - 1, 544 virtual_end + B_PAGE_SIZE - 1); 545 return VMCache::Resize(newSize, priority); 546 } 547 548 549 status_t 550 VMAnonymousCache::Rebase(off_t newBase, int priority) 551 { 552 _FreeSwapPageRange(virtual_base, newBase); 553 return VMCache::Rebase(newBase, priority); 554 } 555 556 557 status_t 558 VMAnonymousCache::Discard(off_t offset, off_t size) 559 { 560 _FreeSwapPageRange(offset, offset + size); 561 return VMCache::Discard(offset, size); 562 } 563 564 565 /*! Moves the swap pages for the given range from the source cache into this 566 cache. Both caches must be locked. 567 */ 568 status_t 569 VMAnonymousCache::Adopt(VMCache* _source, off_t offset, off_t size, 570 off_t newOffset) 571 { 572 VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source); 573 if (source == NULL) { 574 panic("VMAnonymousCache::Adopt(): adopt from incompatible cache %p " 575 "requested", _source); 576 return B_ERROR; 577 } 578 579 off_t pageIndex = newOffset >> PAGE_SHIFT; 580 off_t sourcePageIndex = offset >> PAGE_SHIFT; 581 off_t sourceEndPageIndex = (offset + size + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 582 swap_block* swapBlock = NULL; 583 584 WriteLocker locker(sSwapHashLock); 585 586 while (sourcePageIndex < sourceEndPageIndex 587 && source->fAllocatedSwapSize > 0) { 588 swap_addr_t left 589 = SWAP_BLOCK_PAGES - (sourcePageIndex & SWAP_BLOCK_MASK); 590 591 swap_hash_key sourceKey = { source, sourcePageIndex }; 592 swap_block* sourceSwapBlock = sSwapHashTable.Lookup(sourceKey); 593 if (sourceSwapBlock == NULL || sourceSwapBlock->used == 0) { 594 sourcePageIndex += left; 595 pageIndex += left; 596 swapBlock = NULL; 597 continue; 598 } 599 600 for (; left > 0 && sourceSwapBlock->used > 0; 601 left--, sourcePageIndex++, pageIndex++) { 602 603 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 604 if (swapBlock == NULL || blockIndex == 0) { 605 swap_hash_key key = { this, pageIndex }; 606 swapBlock = sSwapHashTable.Lookup(key); 607 608 if (swapBlock == NULL) { 609 swapBlock = (swap_block*)object_cache_alloc(sSwapBlockCache, 610 CACHE_DONT_WAIT_FOR_MEMORY 611 | CACHE_DONT_LOCK_KERNEL_SPACE); 612 if (swapBlock == NULL) 613 return B_NO_MEMORY; 614 615 swapBlock->key.cache = this; 616 swapBlock->key.page_index 617 = pageIndex & ~(off_t)SWAP_BLOCK_MASK; 618 swapBlock->used = 0; 619 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) 620 swapBlock->swap_slots[i] = SWAP_SLOT_NONE; 621 622 sSwapHashTable.InsertUnchecked(swapBlock); 623 } 624 } 625 626 swap_addr_t sourceBlockIndex = sourcePageIndex & SWAP_BLOCK_MASK; 627 swap_addr_t slotIndex 628 = sourceSwapBlock->swap_slots[sourceBlockIndex]; 629 if (slotIndex == SWAP_SLOT_NONE) 630 continue; 631 632 ASSERT(swapBlock->swap_slots[blockIndex] == SWAP_SLOT_NONE); 633 634 swapBlock->swap_slots[blockIndex] = slotIndex; 635 swapBlock->used++; 636 fAllocatedSwapSize += B_PAGE_SIZE; 637 638 sourceSwapBlock->swap_slots[sourceBlockIndex] = SWAP_SLOT_NONE; 639 sourceSwapBlock->used--; 640 source->fAllocatedSwapSize -= B_PAGE_SIZE; 641 642 TRACE("adopted slot %#" B_PRIx32 " from %p at page %" B_PRIdOFF 643 " to %p at page %" B_PRIdOFF "\n", slotIndex, source, 644 sourcePageIndex, this, pageIndex); 645 } 646 647 if (left > 0) { 648 sourcePageIndex += left; 649 pageIndex += left; 650 swapBlock = NULL; 651 } 652 653 if (sourceSwapBlock->used == 0) { 654 // All swap pages have been adopted, we can discard the swap block. 655 sSwapHashTable.RemoveUnchecked(sourceSwapBlock); 656 object_cache_free(sSwapBlockCache, sourceSwapBlock, 657 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 658 } 659 } 660 661 locker.Unlock(); 662 663 return VMCache::Adopt(source, offset, size, newOffset); 664 } 665 666 667 status_t 668 VMAnonymousCache::Commit(off_t size, int priority) 669 { 670 TRACE("%p->VMAnonymousCache::Commit(%" B_PRIdOFF ")\n", this, size); 671 672 // If we can overcommit, we don't commit here, but in Fault(). We always 673 // unreserve memory, if we're asked to shrink our commitment, though. 674 if (fCanOvercommit && size > committed_size) { 675 if (fHasPrecommitted) 676 return B_OK; 677 678 // pre-commit some pages to make a later failure less probable 679 fHasPrecommitted = true; 680 uint32 precommitted = fPrecommittedPages * B_PAGE_SIZE; 681 if (size > precommitted) 682 size = precommitted; 683 } 684 685 return _Commit(size, priority); 686 } 687 688 689 bool 690 VMAnonymousCache::HasPage(off_t offset) 691 { 692 if (_SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE) 693 return true; 694 695 return false; 696 } 697 698 699 bool 700 VMAnonymousCache::DebugHasPage(off_t offset) 701 { 702 off_t pageIndex = offset >> PAGE_SHIFT; 703 swap_hash_key key = { this, pageIndex }; 704 swap_block* swap = sSwapHashTable.Lookup(key); 705 if (swap == NULL) 706 return false; 707 708 return swap->swap_slots[pageIndex & SWAP_BLOCK_MASK] != SWAP_SLOT_NONE; 709 } 710 711 712 status_t 713 VMAnonymousCache::Read(off_t offset, const generic_io_vec* vecs, size_t count, 714 uint32 flags, generic_size_t* _numBytes) 715 { 716 off_t pageIndex = offset >> PAGE_SHIFT; 717 718 for (uint32 i = 0, j = 0; i < count; i = j) { 719 swap_addr_t startSlotIndex = _SwapBlockGetAddress(pageIndex + i); 720 for (j = i + 1; j < count; j++) { 721 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + j); 722 if (slotIndex != startSlotIndex + j - i) 723 break; 724 } 725 726 T(ReadPage(this, pageIndex, startSlotIndex)); 727 // TODO: Assumes that only one page is read. 728 729 swap_file* swapFile = find_swap_file(startSlotIndex); 730 731 off_t pos = (off_t)(startSlotIndex - swapFile->first_slot) 732 * B_PAGE_SIZE; 733 734 status_t status = vfs_read_pages(swapFile->vnode, swapFile->cookie, pos, 735 vecs + i, j - i, flags, _numBytes); 736 if (status != B_OK) 737 return status; 738 } 739 740 return B_OK; 741 } 742 743 744 status_t 745 VMAnonymousCache::Write(off_t offset, const generic_io_vec* vecs, size_t count, 746 uint32 flags, generic_size_t* _numBytes) 747 { 748 off_t pageIndex = offset >> PAGE_SHIFT; 749 750 AutoLocker<VMCache> locker(this); 751 752 page_num_t totalPages = 0; 753 for (uint32 i = 0; i < count; i++) { 754 page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 755 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + totalPages); 756 if (slotIndex != SWAP_SLOT_NONE) { 757 swap_slot_dealloc(slotIndex, pageCount); 758 _SwapBlockFree(pageIndex + totalPages, pageCount); 759 fAllocatedSwapSize -= pageCount * B_PAGE_SIZE; 760 } 761 762 totalPages += pageCount; 763 } 764 765 off_t totalSize = totalPages * B_PAGE_SIZE; 766 if (fAllocatedSwapSize + totalSize > fCommittedSwapSize) 767 return B_ERROR; 768 769 fAllocatedSwapSize += totalSize; 770 locker.Unlock(); 771 772 page_num_t pagesLeft = totalPages; 773 totalPages = 0; 774 775 for (uint32 i = 0; i < count; i++) { 776 page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 777 778 generic_addr_t vectorBase = vecs[i].base; 779 generic_size_t vectorLength = vecs[i].length; 780 page_num_t n = pageCount; 781 782 for (page_num_t j = 0; j < pageCount; j += n) { 783 swap_addr_t slotIndex; 784 // try to allocate n slots, if fail, try to allocate n/2 785 while ((slotIndex = swap_slot_alloc(n)) == SWAP_SLOT_NONE && n >= 2) 786 n >>= 1; 787 788 if (slotIndex == SWAP_SLOT_NONE) 789 panic("VMAnonymousCache::Write(): can't allocate swap space\n"); 790 791 T(WritePage(this, pageIndex, slotIndex)); 792 // TODO: Assumes that only one page is written. 793 794 swap_file* swapFile = find_swap_file(slotIndex); 795 796 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 797 798 generic_size_t length = (phys_addr_t)n * B_PAGE_SIZE; 799 generic_io_vec vector[1]; 800 vector->base = vectorBase; 801 vector->length = length; 802 803 status_t status = vfs_write_pages(swapFile->vnode, swapFile->cookie, 804 pos, vector, 1, flags, &length); 805 if (status != B_OK) { 806 locker.Lock(); 807 fAllocatedSwapSize -= (off_t)pagesLeft * B_PAGE_SIZE; 808 locker.Unlock(); 809 810 swap_slot_dealloc(slotIndex, n); 811 return status; 812 } 813 814 _SwapBlockBuild(pageIndex + totalPages, slotIndex, n); 815 pagesLeft -= n; 816 817 if (n != pageCount) { 818 vectorBase = vectorBase + n * B_PAGE_SIZE; 819 vectorLength -= n * B_PAGE_SIZE; 820 } 821 } 822 823 totalPages += pageCount; 824 } 825 826 ASSERT(pagesLeft == 0); 827 return B_OK; 828 } 829 830 831 status_t 832 VMAnonymousCache::WriteAsync(off_t offset, const generic_io_vec* vecs, 833 size_t count, generic_size_t numBytes, uint32 flags, 834 AsyncIOCallback* _callback) 835 { 836 // TODO: Currently this method is only used for single pages. Either make 837 // more flexible use of it or change the interface! 838 // This implementation relies on the current usage! 839 ASSERT(count == 1); 840 ASSERT(numBytes <= B_PAGE_SIZE); 841 842 page_num_t pageIndex = offset >> PAGE_SHIFT; 843 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex); 844 bool newSlot = slotIndex == SWAP_SLOT_NONE; 845 846 // If the page doesn't have any swap space yet, allocate it. 847 if (newSlot) { 848 AutoLocker<VMCache> locker(this); 849 if (fAllocatedSwapSize + B_PAGE_SIZE > fCommittedSwapSize) { 850 _callback->IOFinished(B_ERROR, true, 0); 851 return B_ERROR; 852 } 853 854 fAllocatedSwapSize += B_PAGE_SIZE; 855 856 slotIndex = swap_slot_alloc(1); 857 } 858 859 // create our callback 860 WriteCallback* callback = (flags & B_VIP_IO_REQUEST) != 0 861 ? new(malloc_flags(HEAP_PRIORITY_VIP)) WriteCallback(this, _callback) 862 : new(std::nothrow) WriteCallback(this, _callback); 863 if (callback == NULL) { 864 if (newSlot) { 865 AutoLocker<VMCache> locker(this); 866 fAllocatedSwapSize -= B_PAGE_SIZE; 867 locker.Unlock(); 868 869 swap_slot_dealloc(slotIndex, 1); 870 } 871 _callback->IOFinished(B_NO_MEMORY, true, 0); 872 return B_NO_MEMORY; 873 } 874 // TODO: If the page already had swap space assigned, we don't need an own 875 // callback. 876 877 callback->SetTo(pageIndex, slotIndex, newSlot); 878 879 T(WritePage(this, pageIndex, slotIndex)); 880 881 // write the page asynchrounously 882 swap_file* swapFile = find_swap_file(slotIndex); 883 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 884 885 return vfs_asynchronous_write_pages(swapFile->vnode, swapFile->cookie, pos, 886 vecs, 1, numBytes, flags, callback); 887 } 888 889 890 bool 891 VMAnonymousCache::CanWritePage(off_t offset) 892 { 893 // We can write the page, if we have not used all of our committed swap 894 // space or the page already has a swap slot assigned. 895 return fAllocatedSwapSize < fCommittedSwapSize 896 || _SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE; 897 } 898 899 900 int32 901 VMAnonymousCache::MaxPagesPerAsyncWrite() const 902 { 903 return 1; 904 } 905 906 907 status_t 908 VMAnonymousCache::Fault(struct VMAddressSpace* aspace, off_t offset) 909 { 910 if (fGuardedSize > 0) { 911 uint32 guardOffset; 912 913 #ifdef STACK_GROWS_DOWNWARDS 914 guardOffset = 0; 915 #elif defined(STACK_GROWS_UPWARDS) 916 guardOffset = virtual_size - fGuardedSize; 917 #else 918 # error Stack direction has not been defined in arch_config.h 919 #endif 920 // report stack fault, guard page hit! 921 if (offset >= guardOffset && offset < guardOffset + fGuardedSize) { 922 TRACE(("stack overflow!\n")); 923 return B_BAD_ADDRESS; 924 } 925 } 926 927 if (fCanOvercommit && LookupPage(offset) == NULL && !HasPage(offset)) { 928 if (fPrecommittedPages == 0) { 929 // never commit more than needed 930 if (committed_size / B_PAGE_SIZE > page_count) 931 return B_BAD_HANDLER; 932 933 // try to commit additional swap space/memory 934 if (swap_space_reserve(B_PAGE_SIZE) == B_PAGE_SIZE) { 935 fCommittedSwapSize += B_PAGE_SIZE; 936 } else { 937 int priority = aspace == VMAddressSpace::Kernel() 938 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 939 if (vm_try_reserve_memory(B_PAGE_SIZE, priority, 0) != B_OK) { 940 dprintf("%p->VMAnonymousCache::Fault(): Failed to reserve " 941 "%d bytes of RAM.\n", this, (int)B_PAGE_SIZE); 942 return B_NO_MEMORY; 943 } 944 } 945 946 committed_size += B_PAGE_SIZE; 947 } else 948 fPrecommittedPages--; 949 } 950 951 // This will cause vm_soft_fault() to handle the fault 952 return B_BAD_HANDLER; 953 } 954 955 956 void 957 VMAnonymousCache::Merge(VMCache* _source) 958 { 959 VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source); 960 if (source == NULL) { 961 panic("VMAnonymousCache::Merge(): merge with incompatible cache " 962 "%p requested", _source); 963 return; 964 } 965 966 // take over the source' committed size 967 fCommittedSwapSize += source->fCommittedSwapSize; 968 source->fCommittedSwapSize = 0; 969 committed_size += source->committed_size; 970 source->committed_size = 0; 971 972 off_t actualSize = virtual_end - virtual_base; 973 if (committed_size > actualSize) 974 _Commit(actualSize, VM_PRIORITY_USER); 975 976 // Move all not shadowed swap pages from the source to the consumer cache. 977 // Also remove all source pages that are shadowed by consumer swap pages. 978 _MergeSwapPages(source); 979 980 // Move all not shadowed pages from the source to the consumer cache. 981 if (source->page_count < page_count) 982 _MergePagesSmallerSource(source); 983 else 984 _MergePagesSmallerConsumer(source); 985 } 986 987 988 void 989 VMAnonymousCache::DeleteObject() 990 { 991 object_cache_delete(gAnonymousCacheObjectCache, this); 992 } 993 994 995 void 996 VMAnonymousCache::_SwapBlockBuild(off_t startPageIndex, 997 swap_addr_t startSlotIndex, uint32 count) 998 { 999 WriteLocker locker(sSwapHashLock); 1000 1001 uint32 left = count; 1002 for (uint32 i = 0, j = 0; i < count; i += j) { 1003 off_t pageIndex = startPageIndex + i; 1004 swap_addr_t slotIndex = startSlotIndex + i; 1005 1006 swap_hash_key key = { this, pageIndex }; 1007 1008 swap_block* swap = sSwapHashTable.Lookup(key); 1009 while (swap == NULL) { 1010 swap = (swap_block*)object_cache_alloc(sSwapBlockCache, 1011 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1012 if (swap == NULL) { 1013 // Wait a short time until memory is available again. 1014 locker.Unlock(); 1015 snooze(10000); 1016 locker.Lock(); 1017 swap = sSwapHashTable.Lookup(key); 1018 continue; 1019 } 1020 1021 swap->key.cache = this; 1022 swap->key.page_index = pageIndex & ~(off_t)SWAP_BLOCK_MASK; 1023 swap->used = 0; 1024 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) 1025 swap->swap_slots[i] = SWAP_SLOT_NONE; 1026 1027 sSwapHashTable.InsertUnchecked(swap); 1028 } 1029 1030 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 1031 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 1032 swap->swap_slots[blockIndex++] = slotIndex + j; 1033 left--; 1034 } 1035 1036 swap->used += j; 1037 } 1038 } 1039 1040 1041 void 1042 VMAnonymousCache::_SwapBlockFree(off_t startPageIndex, uint32 count) 1043 { 1044 WriteLocker locker(sSwapHashLock); 1045 1046 uint32 left = count; 1047 for (uint32 i = 0, j = 0; i < count; i += j) { 1048 off_t pageIndex = startPageIndex + i; 1049 swap_hash_key key = { this, pageIndex }; 1050 swap_block* swap = sSwapHashTable.Lookup(key); 1051 1052 ASSERT(swap != NULL); 1053 1054 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 1055 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 1056 swap->swap_slots[blockIndex++] = SWAP_SLOT_NONE; 1057 left--; 1058 } 1059 1060 swap->used -= j; 1061 if (swap->used == 0) { 1062 sSwapHashTable.RemoveUnchecked(swap); 1063 object_cache_free(sSwapBlockCache, swap, 1064 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1065 } 1066 } 1067 } 1068 1069 1070 swap_addr_t 1071 VMAnonymousCache::_SwapBlockGetAddress(off_t pageIndex) 1072 { 1073 ReadLocker locker(sSwapHashLock); 1074 1075 swap_hash_key key = { this, pageIndex }; 1076 swap_block* swap = sSwapHashTable.Lookup(key); 1077 swap_addr_t slotIndex = SWAP_SLOT_NONE; 1078 1079 if (swap != NULL) { 1080 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 1081 slotIndex = swap->swap_slots[blockIndex]; 1082 } 1083 1084 return slotIndex; 1085 } 1086 1087 1088 status_t 1089 VMAnonymousCache::_Commit(off_t size, int priority) 1090 { 1091 TRACE("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "), already committed: " 1092 "%" B_PRIdOFF " (%" B_PRIdOFF " swap)\n", this, size, committed_size, 1093 fCommittedSwapSize); 1094 1095 // Basic strategy: reserve swap space first, only when running out of swap 1096 // space, reserve real memory. 1097 1098 off_t committedMemory = committed_size - fCommittedSwapSize; 1099 1100 // Regardless of whether we're asked to grow or shrink the commitment, 1101 // we always try to reserve as much as possible of the final commitment 1102 // in the swap space. 1103 if (size > fCommittedSwapSize) { 1104 fCommittedSwapSize += swap_space_reserve(size - fCommittedSwapSize); 1105 committed_size = fCommittedSwapSize + committedMemory; 1106 if (size > fCommittedSwapSize) { 1107 TRACE("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "), reserved " 1108 "only %" B_PRIdOFF " swap\n", this, size, fCommittedSwapSize); 1109 } 1110 } 1111 1112 if (committed_size == size) 1113 return B_OK; 1114 1115 if (committed_size > size) { 1116 // The commitment shrinks -- unreserve real memory first. 1117 off_t toUnreserve = committed_size - size; 1118 if (committedMemory > 0) { 1119 off_t unreserved = min_c(toUnreserve, committedMemory); 1120 vm_unreserve_memory(unreserved); 1121 committedMemory -= unreserved; 1122 committed_size -= unreserved; 1123 toUnreserve -= unreserved; 1124 } 1125 1126 // Unreserve swap space. 1127 if (toUnreserve > 0) { 1128 swap_space_unreserve(toUnreserve); 1129 fCommittedSwapSize -= toUnreserve; 1130 committed_size -= toUnreserve; 1131 } 1132 1133 return B_OK; 1134 } 1135 1136 // The commitment grows -- we have already tried to reserve swap space at 1137 // the start of the method, so we try to reserve real memory, now. 1138 1139 off_t toReserve = size - committed_size; 1140 if (vm_try_reserve_memory(toReserve, priority, 1000000) != B_OK) { 1141 dprintf("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "): Failed to " 1142 "reserve %" B_PRIdOFF " bytes of RAM\n", this, size, toReserve); 1143 return B_NO_MEMORY; 1144 } 1145 1146 committed_size = size; 1147 return B_OK; 1148 } 1149 1150 1151 void 1152 VMAnonymousCache::_MergePagesSmallerSource(VMAnonymousCache* source) 1153 { 1154 // The source cache has less pages than the consumer (this cache), so we 1155 // iterate through the source's pages and move the ones that are not 1156 // shadowed up to the consumer. 1157 1158 for (VMCachePagesTree::Iterator it = source->pages.GetIterator(); 1159 vm_page* page = it.Next();) { 1160 // Note: Removing the current node while iterating through a 1161 // IteratableSplayTree is safe. 1162 vm_page* consumerPage = LookupPage( 1163 (off_t)page->cache_offset << PAGE_SHIFT); 1164 if (consumerPage == NULL) { 1165 // the page is not yet in the consumer cache - move it upwards 1166 ASSERT_PRINT(!page->busy, "page: %p", page); 1167 MovePage(page); 1168 } 1169 } 1170 } 1171 1172 1173 void 1174 VMAnonymousCache::_MergePagesSmallerConsumer(VMAnonymousCache* source) 1175 { 1176 // The consumer (this cache) has less pages than the source, so we move the 1177 // consumer's pages to the source (freeing shadowed ones) and finally just 1178 // all pages of the source back to the consumer. 1179 1180 for (VMCachePagesTree::Iterator it = pages.GetIterator(); 1181 vm_page* page = it.Next();) { 1182 // If a source page is in the way, remove and free it. 1183 vm_page* sourcePage = source->LookupPage( 1184 (off_t)page->cache_offset << PAGE_SHIFT); 1185 if (sourcePage != NULL) { 1186 DEBUG_PAGE_ACCESS_START(sourcePage); 1187 ASSERT_PRINT(!sourcePage->busy, "page: %p", sourcePage); 1188 ASSERT_PRINT(sourcePage->WiredCount() == 0 1189 && sourcePage->mappings.IsEmpty(), 1190 "sourcePage: %p, page: %p", sourcePage, page); 1191 source->RemovePage(sourcePage); 1192 vm_page_free(source, sourcePage); 1193 } 1194 1195 // Note: Removing the current node while iterating through a 1196 // IteratableSplayTree is safe. 1197 source->MovePage(page); 1198 } 1199 1200 MoveAllPages(source); 1201 } 1202 1203 1204 void 1205 VMAnonymousCache::_MergeSwapPages(VMAnonymousCache* source) 1206 { 1207 // If neither source nor consumer have swap pages, we don't have to do 1208 // anything. 1209 if (source->fAllocatedSwapSize == 0 && fAllocatedSwapSize == 0) 1210 return; 1211 1212 for (off_t offset = source->virtual_base 1213 & ~(off_t)(B_PAGE_SIZE * SWAP_BLOCK_PAGES - 1); 1214 offset < source->virtual_end; 1215 offset += B_PAGE_SIZE * SWAP_BLOCK_PAGES) { 1216 1217 WriteLocker locker(sSwapHashLock); 1218 1219 off_t swapBlockPageIndex = offset >> PAGE_SHIFT; 1220 swap_hash_key key = { source, swapBlockPageIndex }; 1221 swap_block* sourceSwapBlock = sSwapHashTable.Lookup(key); 1222 1223 // remove the source swap block -- we will either take over the swap 1224 // space (and the block) or free it 1225 if (sourceSwapBlock != NULL) 1226 sSwapHashTable.RemoveUnchecked(sourceSwapBlock); 1227 1228 key.cache = this; 1229 swap_block* swapBlock = sSwapHashTable.Lookup(key); 1230 1231 locker.Unlock(); 1232 1233 // remove all source pages that are shadowed by consumer swap pages 1234 if (swapBlock != NULL) { 1235 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1236 if (swapBlock->swap_slots[i] != SWAP_SLOT_NONE) { 1237 vm_page* page = source->LookupPage( 1238 (off_t)(swapBlockPageIndex + i) << PAGE_SHIFT); 1239 if (page != NULL) { 1240 DEBUG_PAGE_ACCESS_START(page); 1241 ASSERT_PRINT(!page->busy, "page: %p", page); 1242 source->RemovePage(page); 1243 vm_page_free(source, page); 1244 } 1245 } 1246 } 1247 } 1248 1249 if (sourceSwapBlock == NULL) 1250 continue; 1251 1252 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1253 off_t pageIndex = swapBlockPageIndex + i; 1254 swap_addr_t sourceSlotIndex = sourceSwapBlock->swap_slots[i]; 1255 1256 if (sourceSlotIndex == SWAP_SLOT_NONE) 1257 continue; 1258 1259 if ((swapBlock != NULL 1260 && swapBlock->swap_slots[i] != SWAP_SLOT_NONE) 1261 || LookupPage((off_t)pageIndex << PAGE_SHIFT) != NULL) { 1262 // The consumer already has a page or a swapped out page 1263 // at this index. So we can free the source swap space. 1264 swap_slot_dealloc(sourceSlotIndex, 1); 1265 sourceSwapBlock->swap_slots[i] = SWAP_SLOT_NONE; 1266 sourceSwapBlock->used--; 1267 } 1268 1269 // We've either freed the source swap page or are going to move it 1270 // to the consumer. At any rate, the source cache doesn't own it 1271 // anymore. 1272 source->fAllocatedSwapSize -= B_PAGE_SIZE; 1273 } 1274 1275 // All source swap pages that have not been freed yet are taken over by 1276 // the consumer. 1277 fAllocatedSwapSize += B_PAGE_SIZE * (off_t)sourceSwapBlock->used; 1278 1279 if (sourceSwapBlock->used == 0) { 1280 // All swap pages have been freed -- we can discard the source swap 1281 // block. 1282 object_cache_free(sSwapBlockCache, sourceSwapBlock, 1283 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1284 } else if (swapBlock == NULL) { 1285 // We need to take over some of the source's swap pages and there's 1286 // no swap block in the consumer cache. Just take over the source 1287 // swap block. 1288 sourceSwapBlock->key.cache = this; 1289 locker.Lock(); 1290 sSwapHashTable.InsertUnchecked(sourceSwapBlock); 1291 locker.Unlock(); 1292 } else { 1293 // We need to take over some of the source's swap pages and there's 1294 // already a swap block in the consumer cache. Copy the respective 1295 // swap addresses and discard the source swap block. 1296 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1297 if (sourceSwapBlock->swap_slots[i] != SWAP_SLOT_NONE) 1298 swapBlock->swap_slots[i] = sourceSwapBlock->swap_slots[i]; 1299 } 1300 1301 object_cache_free(sSwapBlockCache, sourceSwapBlock, 1302 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1303 } 1304 } 1305 } 1306 1307 1308 // #pragma mark - 1309 1310 1311 // TODO: This can be removed if we get BFS uuid's 1312 struct VolumeInfo { 1313 char name[B_FILE_NAME_LENGTH]; 1314 char device[B_FILE_NAME_LENGTH]; 1315 char filesystem[B_OS_NAME_LENGTH]; 1316 off_t capacity; 1317 }; 1318 1319 1320 class PartitionScorer : public KPartitionVisitor { 1321 public: 1322 PartitionScorer(VolumeInfo& volumeInfo) 1323 : 1324 fBestPartition(NULL), 1325 fBestScore(-1), 1326 fVolumeInfo(volumeInfo) 1327 { 1328 } 1329 1330 virtual bool VisitPre(KPartition* partition) 1331 { 1332 if (!partition->ContainsFileSystem()) 1333 return false; 1334 1335 KPath path; 1336 partition->GetPath(&path); 1337 1338 int score = 0; 1339 if (strcmp(fVolumeInfo.name, partition->ContentName()) == 0) 1340 score += 4; 1341 if (strcmp(fVolumeInfo.device, path.Path()) == 0) 1342 score += 3; 1343 if (fVolumeInfo.capacity == partition->Size()) 1344 score += 2; 1345 if (strcmp(fVolumeInfo.filesystem, 1346 partition->DiskSystem()->ShortName()) == 0) { 1347 score += 1; 1348 } 1349 if (score >= 4 && score > fBestScore) { 1350 fBestPartition = partition; 1351 fBestScore = score; 1352 } 1353 1354 return false; 1355 } 1356 1357 KPartition* fBestPartition; 1358 1359 private: 1360 int32 fBestScore; 1361 VolumeInfo& fVolumeInfo; 1362 }; 1363 1364 1365 status_t 1366 swap_file_add(const char* path) 1367 { 1368 // open the file 1369 int fd = open(path, O_RDWR | O_NOCACHE, S_IRUSR | S_IWUSR); 1370 if (fd < 0) 1371 return errno; 1372 1373 // fstat() it and check whether we can use it 1374 struct stat st; 1375 if (fstat(fd, &st) < 0) { 1376 close(fd); 1377 return errno; 1378 } 1379 1380 if (!(S_ISREG(st.st_mode) || S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) { 1381 close(fd); 1382 return B_BAD_VALUE; 1383 } 1384 1385 if (st.st_size < B_PAGE_SIZE) { 1386 close(fd); 1387 return B_BAD_VALUE; 1388 } 1389 1390 // get file descriptor, vnode, and cookie 1391 file_descriptor* descriptor = get_fd(get_current_io_context(true), fd); 1392 put_fd(descriptor); 1393 1394 vnode* node = fd_vnode(descriptor); 1395 if (node == NULL) { 1396 close(fd); 1397 return B_BAD_VALUE; 1398 } 1399 1400 // do the allocations and prepare the swap_file structure 1401 swap_file* swap = (swap_file*)malloc(sizeof(swap_file)); 1402 if (swap == NULL) { 1403 close(fd); 1404 return B_NO_MEMORY; 1405 } 1406 1407 swap->fd = fd; 1408 swap->vnode = node; 1409 swap->cookie = descriptor->cookie; 1410 1411 uint32 pageCount = st.st_size >> PAGE_SHIFT; 1412 swap->bmp = radix_bitmap_create(pageCount); 1413 if (swap->bmp == NULL) { 1414 free(swap); 1415 close(fd); 1416 return B_NO_MEMORY; 1417 } 1418 1419 // set slot index and add this file to swap file list 1420 mutex_lock(&sSwapFileListLock); 1421 // TODO: Also check whether the swap file is already registered! 1422 if (sSwapFileList.IsEmpty()) { 1423 swap->first_slot = 0; 1424 swap->last_slot = pageCount; 1425 } else { 1426 // leave one page gap between two swap files 1427 swap->first_slot = sSwapFileList.Last()->last_slot + 1; 1428 swap->last_slot = swap->first_slot + pageCount; 1429 } 1430 sSwapFileList.Add(swap); 1431 sSwapFileCount++; 1432 mutex_unlock(&sSwapFileListLock); 1433 1434 mutex_lock(&sAvailSwapSpaceLock); 1435 sAvailSwapSpace += (off_t)pageCount * B_PAGE_SIZE; 1436 mutex_unlock(&sAvailSwapSpaceLock); 1437 1438 return B_OK; 1439 } 1440 1441 1442 status_t 1443 swap_file_delete(const char* path) 1444 { 1445 vnode* node = NULL; 1446 status_t status = vfs_get_vnode_from_path(path, true, &node); 1447 if (status != B_OK) 1448 return status; 1449 1450 MutexLocker locker(sSwapFileListLock); 1451 1452 swap_file* swapFile = NULL; 1453 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1454 (swapFile = it.Next()) != NULL;) { 1455 if (swapFile->vnode == node) 1456 break; 1457 } 1458 1459 vfs_put_vnode(node); 1460 1461 if (swapFile == NULL) 1462 return B_ERROR; 1463 1464 // if this file is currently used, we can't delete 1465 // TODO: mark this swap file deleting, and remove it after releasing 1466 // all the swap space 1467 if (swapFile->bmp->free_slots < swapFile->last_slot - swapFile->first_slot) 1468 return B_ERROR; 1469 1470 sSwapFileList.Remove(swapFile); 1471 sSwapFileCount--; 1472 locker.Unlock(); 1473 1474 mutex_lock(&sAvailSwapSpaceLock); 1475 sAvailSwapSpace -= (off_t)(swapFile->last_slot - swapFile->first_slot) 1476 * B_PAGE_SIZE; 1477 mutex_unlock(&sAvailSwapSpaceLock); 1478 1479 close(swapFile->fd); 1480 radix_bitmap_destroy(swapFile->bmp); 1481 free(swapFile); 1482 1483 return B_OK; 1484 } 1485 1486 1487 void 1488 swap_init(void) 1489 { 1490 // create swap block cache 1491 sSwapBlockCache = create_object_cache("swapblock", sizeof(swap_block), 1492 sizeof(void*), NULL, NULL, NULL); 1493 if (sSwapBlockCache == NULL) 1494 panic("swap_init(): can't create object cache for swap blocks\n"); 1495 1496 status_t error = object_cache_set_minimum_reserve(sSwapBlockCache, 1497 MIN_SWAP_BLOCK_RESERVE); 1498 if (error != B_OK) { 1499 panic("swap_init(): object_cache_set_minimum_reserve() failed: %s", 1500 strerror(error)); 1501 } 1502 1503 // init swap hash table 1504 sSwapHashTable.Init(INITIAL_SWAP_HASH_SIZE); 1505 rw_lock_init(&sSwapHashLock, "swaphash"); 1506 1507 error = register_resource_resizer(swap_hash_resizer, NULL, 1508 SWAP_HASH_RESIZE_INTERVAL); 1509 if (error != B_OK) { 1510 panic("swap_init(): Failed to register swap hash resizer: %s", 1511 strerror(error)); 1512 } 1513 1514 // init swap file list 1515 mutex_init(&sSwapFileListLock, "swaplist"); 1516 sSwapFileAlloc = NULL; 1517 sSwapFileCount = 0; 1518 1519 // init available swap space 1520 mutex_init(&sAvailSwapSpaceLock, "avail swap space"); 1521 sAvailSwapSpace = 0; 1522 1523 add_debugger_command_etc("swap", &dump_swap_info, 1524 "Print infos about the swap usage", 1525 "\n" 1526 "Print infos about the swap usage.\n", 0); 1527 } 1528 1529 1530 void 1531 swap_init_post_modules() 1532 { 1533 // Never try to create a swap file on a read-only device - when booting 1534 // from CD, the write overlay is used. 1535 if (gReadOnlyBootDevice) 1536 return; 1537 1538 bool swapEnabled = true; 1539 bool swapAutomatic = true; 1540 off_t swapSize = 0; 1541 1542 dev_t swapDeviceID = -1; 1543 VolumeInfo selectedVolume = {}; 1544 1545 void* settings = load_driver_settings("virtual_memory"); 1546 1547 if (settings != NULL) { 1548 // We pass a lot of information on the swap device, this is mostly to 1549 // ensure that we are dealing with the same device that was configured. 1550 1551 // TODO: Some kind of BFS uuid would be great here :) 1552 const char* enabled = get_driver_parameter(settings, "vm", NULL, NULL); 1553 1554 if (enabled != NULL) { 1555 swapEnabled = get_driver_boolean_parameter(settings, "vm", 1556 true, false); 1557 swapAutomatic = get_driver_boolean_parameter(settings, "swap_auto", 1558 true, false); 1559 1560 if (swapEnabled && !swapAutomatic) { 1561 const char* size = get_driver_parameter(settings, "swap_size", 1562 NULL, NULL); 1563 const char* volume = get_driver_parameter(settings, 1564 "swap_volume_name", NULL, NULL); 1565 const char* device = get_driver_parameter(settings, 1566 "swap_volume_device", NULL, NULL); 1567 const char* filesystem = get_driver_parameter(settings, 1568 "swap_volume_filesystem", NULL, NULL); 1569 const char* capacity = get_driver_parameter(settings, 1570 "swap_volume_capacity", NULL, NULL); 1571 1572 if (size != NULL && device != NULL && volume != NULL 1573 && filesystem != NULL && capacity != NULL) { 1574 // User specified a size / volume that seems valid 1575 swapAutomatic = false; 1576 swapSize = atoll(size); 1577 strlcpy(selectedVolume.name, volume, 1578 sizeof(selectedVolume.name)); 1579 strlcpy(selectedVolume.device, device, 1580 sizeof(selectedVolume.device)); 1581 strlcpy(selectedVolume.filesystem, filesystem, 1582 sizeof(selectedVolume.filesystem)); 1583 selectedVolume.capacity = atoll(capacity); 1584 } else { 1585 // Something isn't right with swap config, go auto 1586 swapAutomatic = true; 1587 dprintf("%s: virtual_memory configuration is invalid, " 1588 "using automatic swap\n", __func__); 1589 } 1590 } 1591 } 1592 unload_driver_settings(settings); 1593 } 1594 1595 if (swapAutomatic) { 1596 swapSize = (off_t)vm_page_num_pages() * B_PAGE_SIZE; 1597 if (swapSize <= (1024 * 1024 * 1024)) { 1598 // Memory under 1GB? double the swap 1599 swapSize *= 2; 1600 } 1601 // Automatic swap defaults to the boot device 1602 swapDeviceID = gBootDevice; 1603 } 1604 1605 if (!swapEnabled || swapSize < B_PAGE_SIZE) { 1606 dprintf("%s: virtual_memory is disabled\n", __func__); 1607 return; 1608 } 1609 1610 if (!swapAutomatic && swapDeviceID < 0) { 1611 // If user-specified swap, and no swap device has been chosen yet... 1612 KDiskDeviceManager::CreateDefault(); 1613 KDiskDeviceManager* manager = KDiskDeviceManager::Default(); 1614 PartitionScorer visitor(selectedVolume); 1615 1616 KDiskDevice* device; 1617 int32 cookie = 0; 1618 while ((device = manager->NextDevice(&cookie)) != NULL) { 1619 if (device->IsReadOnlyMedia() || device->IsWriteOnce() 1620 || device->IsRemovable()) { 1621 continue; 1622 } 1623 device->VisitEachDescendant(&visitor); 1624 } 1625 1626 if (!visitor.fBestPartition) { 1627 dprintf("%s: Can't find configured swap partition '%s'\n", 1628 __func__, selectedVolume.name); 1629 } else { 1630 if (visitor.fBestPartition->IsMounted()) 1631 swapDeviceID = visitor.fBestPartition->VolumeID(); 1632 else { 1633 KPath devPath, mountPoint; 1634 visitor.fBestPartition->GetPath(&devPath); 1635 get_mount_point(visitor.fBestPartition, &mountPoint); 1636 const char* mountPath = mountPoint.Path(); 1637 mkdir(mountPath, S_IRWXU | S_IRWXG | S_IRWXO); 1638 swapDeviceID = _kern_mount(mountPath, devPath.Path(), 1639 NULL, 0, NULL, 0); 1640 if (swapDeviceID < 0) { 1641 dprintf("%s: Can't mount configured swap partition '%s'\n", 1642 __func__, selectedVolume.name); 1643 } 1644 } 1645 } 1646 } 1647 1648 if (swapDeviceID < 0) 1649 swapDeviceID = gBootDevice; 1650 1651 // We now have a swapDeviceID which is used for the swap file 1652 1653 KPath path; 1654 struct fs_info info; 1655 _kern_read_fs_info(swapDeviceID, &info); 1656 if (swapDeviceID == gBootDevice) 1657 path = kDefaultSwapPath; 1658 else { 1659 vfs_entry_ref_to_path(info.dev, info.root, ".", true, path.LockBuffer(), 1660 path.BufferSize()); 1661 path.UnlockBuffer(); 1662 path.Append("swap"); 1663 } 1664 1665 const char* swapPath = path.Path(); 1666 1667 // Swap size limits prevent oversized swap files 1668 if (swapAutomatic) { 1669 off_t existingSwapSize = 0; 1670 struct stat existingSwapStat; 1671 if (stat(swapPath, &existingSwapStat) == 0) 1672 existingSwapSize = existingSwapStat.st_size; 1673 1674 off_t freeSpace = info.free_blocks * info.block_size + existingSwapSize; 1675 1676 // Adjust automatic swap to a maximum of 25% of the free space 1677 if (swapSize > (freeSpace / 4)) 1678 swapSize = (freeSpace / 4); 1679 } 1680 1681 // Create swap file 1682 int fd = open(swapPath, O_RDWR | O_CREAT | O_NOCACHE, S_IRUSR | S_IWUSR); 1683 if (fd < 0) { 1684 dprintf("%s: Can't open/create %s: %s\n", __func__, 1685 swapPath, strerror(errno)); 1686 return; 1687 } 1688 1689 struct stat stat; 1690 stat.st_size = swapSize; 1691 status_t error = _kern_write_stat(fd, NULL, false, &stat, 1692 sizeof(struct stat), B_STAT_SIZE | B_STAT_SIZE_INSECURE); 1693 if (error != B_OK) { 1694 dprintf("%s: Failed to resize %s to %" B_PRIdOFF " bytes: %s\n", 1695 __func__, swapPath, swapSize, strerror(error)); 1696 } 1697 1698 close(fd); 1699 1700 error = swap_file_add(swapPath); 1701 if (error != B_OK) { 1702 dprintf("%s: Failed to add swap file %s: %s\n", __func__, swapPath, 1703 strerror(error)); 1704 } 1705 } 1706 1707 1708 //! Used by page daemon to free swap space. 1709 bool 1710 swap_free_page_swap_space(vm_page* page) 1711 { 1712 VMAnonymousCache* cache = dynamic_cast<VMAnonymousCache*>(page->Cache()); 1713 if (cache == NULL) 1714 return false; 1715 1716 swap_addr_t slotIndex = cache->_SwapBlockGetAddress(page->cache_offset); 1717 if (slotIndex == SWAP_SLOT_NONE) 1718 return false; 1719 1720 swap_slot_dealloc(slotIndex, 1); 1721 cache->fAllocatedSwapSize -= B_PAGE_SIZE; 1722 cache->_SwapBlockFree(page->cache_offset, 1); 1723 1724 return true; 1725 } 1726 1727 1728 uint32 1729 swap_available_pages() 1730 { 1731 mutex_lock(&sAvailSwapSpaceLock); 1732 uint32 avail = sAvailSwapSpace >> PAGE_SHIFT; 1733 mutex_unlock(&sAvailSwapSpaceLock); 1734 1735 return avail; 1736 } 1737 1738 1739 uint32 1740 swap_total_swap_pages() 1741 { 1742 mutex_lock(&sSwapFileListLock); 1743 1744 uint32 totalSwapSlots = 0; 1745 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1746 swap_file* swapFile = it.Next();) { 1747 totalSwapSlots += swapFile->last_slot - swapFile->first_slot; 1748 } 1749 1750 mutex_unlock(&sSwapFileListLock); 1751 1752 return totalSwapSlots; 1753 } 1754 1755 1756 #endif // ENABLE_SWAP_SUPPORT 1757 1758 1759 void 1760 swap_get_info(system_info* info) 1761 { 1762 #if ENABLE_SWAP_SUPPORT 1763 MutexLocker locker(sSwapFileListLock); 1764 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1765 swap_file* swapFile = it.Next();) { 1766 info->max_swap_pages += swapFile->last_slot - swapFile->first_slot; 1767 info->free_swap_pages += swapFile->bmp->free_slots; 1768 } 1769 #else 1770 info->max_swap_pages = 0; 1771 info->free_swap_pages = 0; 1772 #endif 1773 } 1774 1775