1 /* 2 * Copyright 2008, Zhao Shuai, upczhsh@163.com. 3 * Copyright 2008-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 4 * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de. 5 * Distributed under the terms of the MIT License. 6 * 7 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 8 * Distributed under the terms of the NewOS License. 9 * 10 * Copyright 2011-2012 Haiku, Inc. All rights reserved. 11 * Distributed under the terms of the MIT License. 12 * 13 * Authors: 14 * Hamish Morrison, hamish@lavabit.com 15 * Alexander von Gluck IV, kallisti5@unixzen.com 16 */ 17 18 19 #include "VMAnonymousCache.h" 20 21 #include <errno.h> 22 #include <fcntl.h> 23 #include <stdlib.h> 24 #include <string.h> 25 #include <unistd.h> 26 27 #include <FindDirectory.h> 28 #include <KernelExport.h> 29 #include <NodeMonitor.h> 30 31 #include <arch_config.h> 32 #include <boot_device.h> 33 #include <disk_device_manager/KDiskDevice.h> 34 #include <disk_device_manager/KDiskDeviceManager.h> 35 #include <disk_device_manager/KDiskSystem.h> 36 #include <disk_device_manager/KPartitionVisitor.h> 37 #include <driver_settings.h> 38 #include <fs/fd.h> 39 #include <fs/KPath.h> 40 #include <fs_info.h> 41 #include <fs_interface.h> 42 #include <heap.h> 43 #include <kernel_daemon.h> 44 #include <slab/Slab.h> 45 #include <syscalls.h> 46 #include <system_info.h> 47 #include <tracing.h> 48 #include <util/AutoLock.h> 49 #include <util/DoublyLinkedList.h> 50 #include <util/OpenHashTable.h> 51 #include <util/RadixBitmap.h> 52 #include <vfs.h> 53 #include <vm/vm.h> 54 #include <vm/vm_page.h> 55 #include <vm/vm_priv.h> 56 #include <vm/VMAddressSpace.h> 57 58 #include "IORequest.h" 59 #include "VMUtils.h" 60 61 62 #if ENABLE_SWAP_SUPPORT 63 64 //#define TRACE_VM_ANONYMOUS_CACHE 65 #ifdef TRACE_VM_ANONYMOUS_CACHE 66 # define TRACE(x...) dprintf(x) 67 #else 68 # define TRACE(x...) do { } while (false) 69 #endif 70 71 72 // number of free swap blocks the object cache shall minimally have 73 #define MIN_SWAP_BLOCK_RESERVE 4096 74 75 // interval the has resizer is triggered (in 0.1s) 76 #define SWAP_HASH_RESIZE_INTERVAL 5 77 78 #define INITIAL_SWAP_HASH_SIZE 1024 79 80 #define SWAP_SLOT_NONE RADIX_SLOT_NONE 81 82 #define SWAP_BLOCK_PAGES 32 83 #define SWAP_BLOCK_SHIFT 5 /* 1 << SWAP_BLOCK_SHIFT == SWAP_BLOCK_PAGES */ 84 #define SWAP_BLOCK_MASK (SWAP_BLOCK_PAGES - 1) 85 86 87 static const char* const kDefaultSwapPath = "/var/swap"; 88 89 struct swap_file : DoublyLinkedListLinkImpl<swap_file> { 90 int fd; 91 struct vnode* vnode; 92 void* cookie; 93 swap_addr_t first_slot; 94 swap_addr_t last_slot; 95 radix_bitmap* bmp; 96 }; 97 98 struct swap_hash_key { 99 VMAnonymousCache *cache; 100 off_t page_index; // page index in the cache 101 }; 102 103 // Each swap block contains swap address information for 104 // SWAP_BLOCK_PAGES continuous pages from the same cache 105 struct swap_block { 106 swap_block* hash_link; 107 swap_hash_key key; 108 uint32 used; 109 swap_addr_t swap_slots[SWAP_BLOCK_PAGES]; 110 }; 111 112 struct SwapHashTableDefinition { 113 typedef swap_hash_key KeyType; 114 typedef swap_block ValueType; 115 116 SwapHashTableDefinition() {} 117 118 size_t HashKey(const swap_hash_key& key) const 119 { 120 off_t blockIndex = key.page_index >> SWAP_BLOCK_SHIFT; 121 VMAnonymousCache* cache = key.cache; 122 return blockIndex ^ (size_t)(int*)cache; 123 } 124 125 size_t Hash(const swap_block* value) const 126 { 127 return HashKey(value->key); 128 } 129 130 bool Compare(const swap_hash_key& key, const swap_block* value) const 131 { 132 return (key.page_index & ~(off_t)SWAP_BLOCK_MASK) 133 == (value->key.page_index & ~(off_t)SWAP_BLOCK_MASK) 134 && key.cache == value->key.cache; 135 } 136 137 swap_block*& GetLink(swap_block* value) const 138 { 139 return value->hash_link; 140 } 141 }; 142 143 typedef BOpenHashTable<SwapHashTableDefinition> SwapHashTable; 144 typedef DoublyLinkedList<swap_file> SwapFileList; 145 146 static SwapHashTable sSwapHashTable; 147 static rw_lock sSwapHashLock; 148 149 static SwapFileList sSwapFileList; 150 static mutex sSwapFileListLock; 151 static swap_file* sSwapFileAlloc = NULL; // allocate from here 152 static uint32 sSwapFileCount = 0; 153 154 static off_t sAvailSwapSpace = 0; 155 static mutex sAvailSwapSpaceLock; 156 157 static object_cache* sSwapBlockCache; 158 159 160 #if SWAP_TRACING 161 namespace SwapTracing { 162 163 class SwapTraceEntry : public AbstractTraceEntry { 164 public: 165 SwapTraceEntry(VMAnonymousCache* cache) 166 : 167 fCache(cache) 168 { 169 } 170 171 protected: 172 VMAnonymousCache* fCache; 173 }; 174 175 176 class ReadPage : public SwapTraceEntry { 177 public: 178 ReadPage(VMAnonymousCache* cache, page_num_t pageIndex, 179 swap_addr_t swapSlotIndex) 180 : 181 SwapTraceEntry(cache), 182 fPageIndex(pageIndex), 183 fSwapSlotIndex(swapSlotIndex) 184 { 185 Initialized(); 186 } 187 188 virtual void AddDump(TraceOutput& out) 189 { 190 out.Print("swap read: cache %p, page index: %lu <- swap slot: %lu", 191 fCache, fPageIndex, fSwapSlotIndex); 192 } 193 194 private: 195 page_num_t fPageIndex; 196 swap_addr_t fSwapSlotIndex; 197 }; 198 199 200 class WritePage : public SwapTraceEntry { 201 public: 202 WritePage(VMAnonymousCache* cache, page_num_t pageIndex, 203 swap_addr_t swapSlotIndex) 204 : 205 SwapTraceEntry(cache), 206 fPageIndex(pageIndex), 207 fSwapSlotIndex(swapSlotIndex) 208 { 209 Initialized(); 210 } 211 212 virtual void AddDump(TraceOutput& out) 213 { 214 out.Print("swap write: cache %p, page index: %lu -> swap slot: %lu", 215 fCache, fPageIndex, fSwapSlotIndex); 216 } 217 218 private: 219 page_num_t fPageIndex; 220 swap_addr_t fSwapSlotIndex; 221 }; 222 223 } // namespace SwapTracing 224 225 # define T(x) new(std::nothrow) SwapTracing::x; 226 #else 227 # define T(x) ; 228 #endif 229 230 231 static int 232 dump_swap_info(int argc, char** argv) 233 { 234 swap_addr_t totalSwapPages = 0; 235 swap_addr_t freeSwapPages = 0; 236 237 kprintf("swap files:\n"); 238 239 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 240 swap_file* file = it.Next();) { 241 swap_addr_t total = file->last_slot - file->first_slot; 242 kprintf(" vnode: %p, pages: total: %" B_PRIu32 ", free: %" B_PRIu32 243 "\n", file->vnode, total, file->bmp->free_slots); 244 245 totalSwapPages += total; 246 freeSwapPages += file->bmp->free_slots; 247 } 248 249 kprintf("\n"); 250 kprintf("swap space in pages:\n"); 251 kprintf("total: %9" B_PRIu32 "\n", totalSwapPages); 252 kprintf("available: %9" B_PRIdOFF "\n", sAvailSwapSpace / B_PAGE_SIZE); 253 kprintf("reserved: %9" B_PRIdOFF "\n", 254 totalSwapPages - sAvailSwapSpace / B_PAGE_SIZE); 255 kprintf("used: %9" B_PRIu32 "\n", totalSwapPages - freeSwapPages); 256 kprintf("free: %9" B_PRIu32 "\n", freeSwapPages); 257 258 return 0; 259 } 260 261 262 static swap_addr_t 263 swap_slot_alloc(uint32 count) 264 { 265 mutex_lock(&sSwapFileListLock); 266 267 if (sSwapFileList.IsEmpty()) { 268 mutex_unlock(&sSwapFileListLock); 269 panic("swap_slot_alloc(): no swap file in the system\n"); 270 return SWAP_SLOT_NONE; 271 } 272 273 // since radix bitmap could not handle more than 32 pages, we return 274 // SWAP_SLOT_NONE, this forces Write() adjust allocation amount 275 if (count > BITMAP_RADIX) { 276 mutex_unlock(&sSwapFileListLock); 277 return SWAP_SLOT_NONE; 278 } 279 280 swap_addr_t j, addr = SWAP_SLOT_NONE; 281 for (j = 0; j < sSwapFileCount; j++) { 282 if (sSwapFileAlloc == NULL) 283 sSwapFileAlloc = sSwapFileList.First(); 284 285 addr = radix_bitmap_alloc(sSwapFileAlloc->bmp, count); 286 if (addr != SWAP_SLOT_NONE) { 287 addr += sSwapFileAlloc->first_slot; 288 break; 289 } 290 291 // this swap_file is full, find another 292 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 293 } 294 295 if (j == sSwapFileCount) { 296 mutex_unlock(&sSwapFileListLock); 297 panic("swap_slot_alloc: swap space exhausted!\n"); 298 return SWAP_SLOT_NONE; 299 } 300 301 // if this swap file has used more than 90% percent of its space 302 // switch to another 303 if (sSwapFileAlloc->bmp->free_slots 304 < (sSwapFileAlloc->last_slot - sSwapFileAlloc->first_slot) / 10) { 305 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 306 } 307 308 mutex_unlock(&sSwapFileListLock); 309 310 return addr; 311 } 312 313 314 static swap_file* 315 find_swap_file(swap_addr_t slotIndex) 316 { 317 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 318 swap_file* swapFile = it.Next();) { 319 if (slotIndex >= swapFile->first_slot 320 && slotIndex < swapFile->last_slot) { 321 return swapFile; 322 } 323 } 324 325 panic("find_swap_file(): can't find swap file for slot %" B_PRIu32 "\n", 326 slotIndex); 327 return NULL; 328 } 329 330 331 static void 332 swap_slot_dealloc(swap_addr_t slotIndex, uint32 count) 333 { 334 if (slotIndex == SWAP_SLOT_NONE) 335 return; 336 337 mutex_lock(&sSwapFileListLock); 338 swap_file* swapFile = find_swap_file(slotIndex); 339 slotIndex -= swapFile->first_slot; 340 radix_bitmap_dealloc(swapFile->bmp, slotIndex, count); 341 mutex_unlock(&sSwapFileListLock); 342 } 343 344 345 static off_t 346 swap_space_reserve(off_t amount) 347 { 348 mutex_lock(&sAvailSwapSpaceLock); 349 if (sAvailSwapSpace >= amount) 350 sAvailSwapSpace -= amount; 351 else { 352 amount = sAvailSwapSpace; 353 sAvailSwapSpace = 0; 354 } 355 mutex_unlock(&sAvailSwapSpaceLock); 356 357 return amount; 358 } 359 360 361 static void 362 swap_space_unreserve(off_t amount) 363 { 364 mutex_lock(&sAvailSwapSpaceLock); 365 sAvailSwapSpace += amount; 366 mutex_unlock(&sAvailSwapSpaceLock); 367 } 368 369 370 static void 371 swap_hash_resizer(void*, int) 372 { 373 WriteLocker locker(sSwapHashLock); 374 375 size_t size; 376 void* allocation; 377 378 do { 379 size = sSwapHashTable.ResizeNeeded(); 380 if (size == 0) 381 return; 382 383 locker.Unlock(); 384 385 allocation = malloc(size); 386 if (allocation == NULL) 387 return; 388 389 locker.Lock(); 390 391 } while (!sSwapHashTable.Resize(allocation, size)); 392 } 393 394 395 // #pragma mark - 396 397 398 class VMAnonymousCache::WriteCallback : public StackableAsyncIOCallback { 399 public: 400 WriteCallback(VMAnonymousCache* cache, AsyncIOCallback* callback) 401 : 402 StackableAsyncIOCallback(callback), 403 fCache(cache) 404 { 405 } 406 407 void SetTo(page_num_t pageIndex, swap_addr_t slotIndex, bool newSlot) 408 { 409 fPageIndex = pageIndex; 410 fSlotIndex = slotIndex; 411 fNewSlot = newSlot; 412 } 413 414 virtual void IOFinished(status_t status, bool partialTransfer, 415 generic_size_t bytesTransferred) 416 { 417 if (fNewSlot) { 418 if (status == B_OK) { 419 fCache->_SwapBlockBuild(fPageIndex, fSlotIndex, 1); 420 } else { 421 AutoLocker<VMCache> locker(fCache); 422 fCache->fAllocatedSwapSize -= B_PAGE_SIZE; 423 locker.Unlock(); 424 425 swap_slot_dealloc(fSlotIndex, 1); 426 } 427 } 428 429 fNextCallback->IOFinished(status, partialTransfer, bytesTransferred); 430 431 delete this; 432 } 433 434 private: 435 VMAnonymousCache* fCache; 436 page_num_t fPageIndex; 437 swap_addr_t fSlotIndex; 438 bool fNewSlot; 439 }; 440 441 442 // #pragma mark - 443 444 445 VMAnonymousCache::~VMAnonymousCache() 446 { 447 // free allocated swap space and swap block 448 for (off_t offset = virtual_base, toFree = fAllocatedSwapSize; 449 offset < virtual_end && toFree > 0; offset += B_PAGE_SIZE) { 450 swap_addr_t slotIndex = _SwapBlockGetAddress(offset >> PAGE_SHIFT); 451 if (slotIndex == SWAP_SLOT_NONE) 452 continue; 453 454 swap_slot_dealloc(slotIndex, 1); 455 _SwapBlockFree(offset >> PAGE_SHIFT, 1); 456 toFree -= B_PAGE_SIZE; 457 } 458 459 swap_space_unreserve(fCommittedSwapSize); 460 if (committed_size > fCommittedSwapSize) 461 vm_unreserve_memory(committed_size - fCommittedSwapSize); 462 } 463 464 465 status_t 466 VMAnonymousCache::Init(bool canOvercommit, int32 numPrecommittedPages, 467 int32 numGuardPages, uint32 allocationFlags) 468 { 469 TRACE("%p->VMAnonymousCache::Init(canOvercommit = %s, " 470 "numPrecommittedPages = %" B_PRId32 ", numGuardPages = %" B_PRId32 471 ")\n", this, canOvercommit ? "yes" : "no", numPrecommittedPages, 472 numGuardPages); 473 474 status_t error = VMCache::Init(CACHE_TYPE_RAM, allocationFlags); 475 if (error != B_OK) 476 return error; 477 478 fCanOvercommit = canOvercommit; 479 fHasPrecommitted = false; 480 fPrecommittedPages = min_c(numPrecommittedPages, 255); 481 fGuardedSize = numGuardPages * B_PAGE_SIZE; 482 fCommittedSwapSize = 0; 483 fAllocatedSwapSize = 0; 484 485 return B_OK; 486 } 487 488 489 status_t 490 VMAnonymousCache::Resize(off_t newSize, int priority) 491 { 492 // If the cache size shrinks, drop all swap pages beyond the new size. 493 if (fAllocatedSwapSize > 0) { 494 off_t oldPageCount = (virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 495 swap_block* swapBlock = NULL; 496 497 for (off_t pageIndex = (newSize + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 498 pageIndex < oldPageCount && fAllocatedSwapSize > 0; pageIndex++) { 499 500 WriteLocker locker(sSwapHashLock); 501 502 // Get the swap slot index for the page. 503 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 504 if (swapBlock == NULL || blockIndex == 0) { 505 swap_hash_key key = { this, pageIndex }; 506 swapBlock = sSwapHashTable.Lookup(key); 507 508 if (swapBlock == NULL) { 509 pageIndex = ROUNDUP(pageIndex + 1, SWAP_BLOCK_PAGES); 510 continue; 511 } 512 } 513 514 swap_addr_t slotIndex = swapBlock->swap_slots[blockIndex]; 515 vm_page* page; 516 if (slotIndex != SWAP_SLOT_NONE 517 && ((page = LookupPage((off_t)pageIndex * B_PAGE_SIZE)) == NULL 518 || !page->busy)) { 519 // TODO: We skip (i.e. leak) swap space of busy pages, since 520 // there could be I/O going on (paging in/out). Waiting is 521 // not an option as 1. unlocking the cache means that new 522 // swap pages could be added in a range we've already 523 // cleared (since the cache still has the old size) and 2. 524 // we'd risk a deadlock in case we come from the file cache 525 // and the FS holds the node's write-lock. We should mark 526 // the page invalid and let the one responsible clean up. 527 // There's just no such mechanism yet. 528 swap_slot_dealloc(slotIndex, 1); 529 fAllocatedSwapSize -= B_PAGE_SIZE; 530 531 swapBlock->swap_slots[blockIndex] = SWAP_SLOT_NONE; 532 if (--swapBlock->used == 0) { 533 // All swap pages have been freed -- we can discard the swap 534 // block. 535 sSwapHashTable.RemoveUnchecked(swapBlock); 536 object_cache_free(sSwapBlockCache, swapBlock, 537 CACHE_DONT_WAIT_FOR_MEMORY 538 | CACHE_DONT_LOCK_KERNEL_SPACE); 539 } 540 } 541 } 542 } 543 544 return VMCache::Resize(newSize, priority); 545 } 546 547 548 status_t 549 VMAnonymousCache::Commit(off_t size, int priority) 550 { 551 TRACE("%p->VMAnonymousCache::Commit(%" B_PRIdOFF ")\n", this, size); 552 553 // If we can overcommit, we don't commit here, but in Fault(). We always 554 // unreserve memory, if we're asked to shrink our commitment, though. 555 if (fCanOvercommit && size > committed_size) { 556 if (fHasPrecommitted) 557 return B_OK; 558 559 // pre-commit some pages to make a later failure less probable 560 fHasPrecommitted = true; 561 uint32 precommitted = fPrecommittedPages * B_PAGE_SIZE; 562 if (size > precommitted) 563 size = precommitted; 564 } 565 566 return _Commit(size, priority); 567 } 568 569 570 bool 571 VMAnonymousCache::HasPage(off_t offset) 572 { 573 if (_SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE) 574 return true; 575 576 return false; 577 } 578 579 580 bool 581 VMAnonymousCache::DebugHasPage(off_t offset) 582 { 583 off_t pageIndex = offset >> PAGE_SHIFT; 584 swap_hash_key key = { this, pageIndex }; 585 swap_block* swap = sSwapHashTable.Lookup(key); 586 if (swap == NULL) 587 return false; 588 589 return swap->swap_slots[pageIndex & SWAP_BLOCK_MASK] != SWAP_SLOT_NONE; 590 } 591 592 593 status_t 594 VMAnonymousCache::Read(off_t offset, const generic_io_vec* vecs, size_t count, 595 uint32 flags, generic_size_t* _numBytes) 596 { 597 off_t pageIndex = offset >> PAGE_SHIFT; 598 599 for (uint32 i = 0, j = 0; i < count; i = j) { 600 swap_addr_t startSlotIndex = _SwapBlockGetAddress(pageIndex + i); 601 for (j = i + 1; j < count; j++) { 602 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + j); 603 if (slotIndex != startSlotIndex + j - i) 604 break; 605 } 606 607 T(ReadPage(this, pageIndex, startSlotIndex)); 608 // TODO: Assumes that only one page is read. 609 610 swap_file* swapFile = find_swap_file(startSlotIndex); 611 612 off_t pos = (off_t)(startSlotIndex - swapFile->first_slot) 613 * B_PAGE_SIZE; 614 615 status_t status = vfs_read_pages(swapFile->vnode, swapFile->cookie, pos, 616 vecs + i, j - i, flags, _numBytes); 617 if (status != B_OK) 618 return status; 619 } 620 621 return B_OK; 622 } 623 624 625 status_t 626 VMAnonymousCache::Write(off_t offset, const generic_io_vec* vecs, size_t count, 627 uint32 flags, generic_size_t* _numBytes) 628 { 629 off_t pageIndex = offset >> PAGE_SHIFT; 630 631 AutoLocker<VMCache> locker(this); 632 633 page_num_t totalPages = 0; 634 for (uint32 i = 0; i < count; i++) { 635 page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 636 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + totalPages); 637 if (slotIndex != SWAP_SLOT_NONE) { 638 swap_slot_dealloc(slotIndex, pageCount); 639 _SwapBlockFree(pageIndex + totalPages, pageCount); 640 fAllocatedSwapSize -= pageCount * B_PAGE_SIZE; 641 } 642 643 totalPages += pageCount; 644 } 645 646 off_t totalSize = totalPages * B_PAGE_SIZE; 647 if (fAllocatedSwapSize + totalSize > fCommittedSwapSize) 648 return B_ERROR; 649 650 fAllocatedSwapSize += totalSize; 651 locker.Unlock(); 652 653 page_num_t pagesLeft = totalPages; 654 totalPages = 0; 655 656 for (uint32 i = 0; i < count; i++) { 657 page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 658 659 generic_addr_t vectorBase = vecs[i].base; 660 generic_size_t vectorLength = vecs[i].length; 661 page_num_t n = pageCount; 662 663 for (page_num_t j = 0; j < pageCount; j += n) { 664 swap_addr_t slotIndex; 665 // try to allocate n slots, if fail, try to allocate n/2 666 while ((slotIndex = swap_slot_alloc(n)) == SWAP_SLOT_NONE && n >= 2) 667 n >>= 1; 668 669 if (slotIndex == SWAP_SLOT_NONE) 670 panic("VMAnonymousCache::Write(): can't allocate swap space\n"); 671 672 T(WritePage(this, pageIndex, slotIndex)); 673 // TODO: Assumes that only one page is written. 674 675 swap_file* swapFile = find_swap_file(slotIndex); 676 677 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 678 679 generic_size_t length = (phys_addr_t)n * B_PAGE_SIZE; 680 generic_io_vec vector[1]; 681 vector->base = vectorBase; 682 vector->length = length; 683 684 status_t status = vfs_write_pages(swapFile->vnode, swapFile->cookie, 685 pos, vector, 1, flags, &length); 686 if (status != B_OK) { 687 locker.Lock(); 688 fAllocatedSwapSize -= (off_t)pagesLeft * B_PAGE_SIZE; 689 locker.Unlock(); 690 691 swap_slot_dealloc(slotIndex, n); 692 return status; 693 } 694 695 _SwapBlockBuild(pageIndex + totalPages, slotIndex, n); 696 pagesLeft -= n; 697 698 if (n != pageCount) { 699 vectorBase = vectorBase + n * B_PAGE_SIZE; 700 vectorLength -= n * B_PAGE_SIZE; 701 } 702 } 703 704 totalPages += pageCount; 705 } 706 707 ASSERT(pagesLeft == 0); 708 return B_OK; 709 } 710 711 712 status_t 713 VMAnonymousCache::WriteAsync(off_t offset, const generic_io_vec* vecs, 714 size_t count, generic_size_t numBytes, uint32 flags, 715 AsyncIOCallback* _callback) 716 { 717 // TODO: Currently this method is only used for single pages. Either make 718 // more flexible use of it or change the interface! 719 // This implementation relies on the current usage! 720 ASSERT(count == 1); 721 ASSERT(numBytes <= B_PAGE_SIZE); 722 723 page_num_t pageIndex = offset >> PAGE_SHIFT; 724 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex); 725 bool newSlot = slotIndex == SWAP_SLOT_NONE; 726 727 // If the page doesn't have any swap space yet, allocate it. 728 if (newSlot) { 729 AutoLocker<VMCache> locker(this); 730 if (fAllocatedSwapSize + B_PAGE_SIZE > fCommittedSwapSize) { 731 _callback->IOFinished(B_ERROR, true, 0); 732 return B_ERROR; 733 } 734 735 fAllocatedSwapSize += B_PAGE_SIZE; 736 737 slotIndex = swap_slot_alloc(1); 738 } 739 740 // create our callback 741 WriteCallback* callback = (flags & B_VIP_IO_REQUEST) != 0 742 ? new(malloc_flags(HEAP_PRIORITY_VIP)) WriteCallback(this, _callback) 743 : new(std::nothrow) WriteCallback(this, _callback); 744 if (callback == NULL) { 745 if (newSlot) { 746 AutoLocker<VMCache> locker(this); 747 fAllocatedSwapSize -= B_PAGE_SIZE; 748 locker.Unlock(); 749 750 swap_slot_dealloc(slotIndex, 1); 751 } 752 _callback->IOFinished(B_NO_MEMORY, true, 0); 753 return B_NO_MEMORY; 754 } 755 // TODO: If the page already had swap space assigned, we don't need an own 756 // callback. 757 758 callback->SetTo(pageIndex, slotIndex, newSlot); 759 760 T(WritePage(this, pageIndex, slotIndex)); 761 762 // write the page asynchrounously 763 swap_file* swapFile = find_swap_file(slotIndex); 764 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 765 766 return vfs_asynchronous_write_pages(swapFile->vnode, swapFile->cookie, pos, 767 vecs, 1, numBytes, flags, callback); 768 } 769 770 771 bool 772 VMAnonymousCache::CanWritePage(off_t offset) 773 { 774 // We can write the page, if we have not used all of our committed swap 775 // space or the page already has a swap slot assigned. 776 return fAllocatedSwapSize < fCommittedSwapSize 777 || _SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE; 778 } 779 780 781 int32 782 VMAnonymousCache::MaxPagesPerAsyncWrite() const 783 { 784 return 1; 785 } 786 787 788 status_t 789 VMAnonymousCache::Fault(struct VMAddressSpace* aspace, off_t offset) 790 { 791 if (fGuardedSize > 0) { 792 uint32 guardOffset; 793 794 #ifdef STACK_GROWS_DOWNWARDS 795 guardOffset = 0; 796 #elif defined(STACK_GROWS_UPWARDS) 797 guardOffset = virtual_size - fGuardedSize; 798 #else 799 # error Stack direction has not been defined in arch_config.h 800 #endif 801 // report stack fault, guard page hit! 802 if (offset >= guardOffset && offset < guardOffset + fGuardedSize) { 803 TRACE(("stack overflow!\n")); 804 return B_BAD_ADDRESS; 805 } 806 } 807 808 if (fCanOvercommit && LookupPage(offset) == NULL && !HasPage(offset)) { 809 if (fPrecommittedPages == 0) { 810 // never commit more than needed 811 if (committed_size / B_PAGE_SIZE > page_count) 812 return B_BAD_HANDLER; 813 814 // try to commit additional swap space/memory 815 if (swap_space_reserve(B_PAGE_SIZE) == B_PAGE_SIZE) { 816 fCommittedSwapSize += B_PAGE_SIZE; 817 } else { 818 int priority = aspace == VMAddressSpace::Kernel() 819 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 820 if (vm_try_reserve_memory(B_PAGE_SIZE, priority, 0) != B_OK) { 821 dprintf("%p->VMAnonymousCache::Fault(): Failed to reserve " 822 "%d bytes of RAM.\n", this, (int)B_PAGE_SIZE); 823 return B_NO_MEMORY; 824 } 825 } 826 827 committed_size += B_PAGE_SIZE; 828 } else 829 fPrecommittedPages--; 830 } 831 832 // This will cause vm_soft_fault() to handle the fault 833 return B_BAD_HANDLER; 834 } 835 836 837 void 838 VMAnonymousCache::Merge(VMCache* _source) 839 { 840 VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source); 841 if (source == NULL) { 842 panic("VMAnonymousCache::MergeStore(): merge with incompatible cache " 843 "%p requested", _source); 844 return; 845 } 846 847 // take over the source' committed size 848 fCommittedSwapSize += source->fCommittedSwapSize; 849 source->fCommittedSwapSize = 0; 850 committed_size += source->committed_size; 851 source->committed_size = 0; 852 853 off_t actualSize = virtual_end - virtual_base; 854 if (committed_size > actualSize) 855 _Commit(actualSize, VM_PRIORITY_USER); 856 857 // Move all not shadowed swap pages from the source to the consumer cache. 858 // Also remove all source pages that are shadowed by consumer swap pages. 859 _MergeSwapPages(source); 860 861 // Move all not shadowed pages from the source to the consumer cache. 862 if (source->page_count < page_count) 863 _MergePagesSmallerSource(source); 864 else 865 _MergePagesSmallerConsumer(source); 866 } 867 868 869 void 870 VMAnonymousCache::DeleteObject() 871 { 872 object_cache_delete(gAnonymousCacheObjectCache, this); 873 } 874 875 876 void 877 VMAnonymousCache::_SwapBlockBuild(off_t startPageIndex, 878 swap_addr_t startSlotIndex, uint32 count) 879 { 880 WriteLocker locker(sSwapHashLock); 881 882 uint32 left = count; 883 for (uint32 i = 0, j = 0; i < count; i += j) { 884 off_t pageIndex = startPageIndex + i; 885 swap_addr_t slotIndex = startSlotIndex + i; 886 887 swap_hash_key key = { this, pageIndex }; 888 889 swap_block* swap = sSwapHashTable.Lookup(key); 890 while (swap == NULL) { 891 swap = (swap_block*)object_cache_alloc(sSwapBlockCache, 892 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 893 if (swap == NULL) { 894 // Wait a short time until memory is available again. 895 locker.Unlock(); 896 snooze(10000); 897 locker.Lock(); 898 swap = sSwapHashTable.Lookup(key); 899 continue; 900 } 901 902 swap->key.cache = this; 903 swap->key.page_index = pageIndex & ~(off_t)SWAP_BLOCK_MASK; 904 swap->used = 0; 905 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) 906 swap->swap_slots[i] = SWAP_SLOT_NONE; 907 908 sSwapHashTable.InsertUnchecked(swap); 909 } 910 911 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 912 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 913 swap->swap_slots[blockIndex++] = slotIndex + j; 914 left--; 915 } 916 917 swap->used += j; 918 } 919 } 920 921 922 void 923 VMAnonymousCache::_SwapBlockFree(off_t startPageIndex, uint32 count) 924 { 925 WriteLocker locker(sSwapHashLock); 926 927 uint32 left = count; 928 for (uint32 i = 0, j = 0; i < count; i += j) { 929 off_t pageIndex = startPageIndex + i; 930 swap_hash_key key = { this, pageIndex }; 931 swap_block* swap = sSwapHashTable.Lookup(key); 932 933 ASSERT(swap != NULL); 934 935 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 936 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 937 swap->swap_slots[blockIndex++] = SWAP_SLOT_NONE; 938 left--; 939 } 940 941 swap->used -= j; 942 if (swap->used == 0) { 943 sSwapHashTable.RemoveUnchecked(swap); 944 object_cache_free(sSwapBlockCache, swap, 945 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 946 } 947 } 948 } 949 950 951 swap_addr_t 952 VMAnonymousCache::_SwapBlockGetAddress(off_t pageIndex) 953 { 954 ReadLocker locker(sSwapHashLock); 955 956 swap_hash_key key = { this, pageIndex }; 957 swap_block* swap = sSwapHashTable.Lookup(key); 958 swap_addr_t slotIndex = SWAP_SLOT_NONE; 959 960 if (swap != NULL) { 961 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 962 slotIndex = swap->swap_slots[blockIndex]; 963 } 964 965 return slotIndex; 966 } 967 968 969 status_t 970 VMAnonymousCache::_Commit(off_t size, int priority) 971 { 972 TRACE("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "), already committed: " 973 "%" B_PRIdOFF " (%" B_PRIdOFF " swap)\n", this, size, committed_size, 974 fCommittedSwapSize); 975 976 // Basic strategy: reserve swap space first, only when running out of swap 977 // space, reserve real memory. 978 979 off_t committedMemory = committed_size - fCommittedSwapSize; 980 981 // Regardless of whether we're asked to grow or shrink the commitment, 982 // we always try to reserve as much as possible of the final commitment 983 // in the swap space. 984 if (size > fCommittedSwapSize) { 985 fCommittedSwapSize += swap_space_reserve(size - fCommittedSwapSize); 986 committed_size = fCommittedSwapSize + committedMemory; 987 if (size > fCommittedSwapSize) { 988 TRACE("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "), reserved " 989 "only %" B_PRIdOFF " swap\n", this, size, fCommittedSwapSize); 990 } 991 } 992 993 if (committed_size == size) 994 return B_OK; 995 996 if (committed_size > size) { 997 // The commitment shrinks -- unreserve real memory first. 998 off_t toUnreserve = committed_size - size; 999 if (committedMemory > 0) { 1000 off_t unreserved = min_c(toUnreserve, committedMemory); 1001 vm_unreserve_memory(unreserved); 1002 committedMemory -= unreserved; 1003 committed_size -= unreserved; 1004 toUnreserve -= unreserved; 1005 } 1006 1007 // Unreserve swap space. 1008 if (toUnreserve > 0) { 1009 swap_space_unreserve(toUnreserve); 1010 fCommittedSwapSize -= toUnreserve; 1011 committed_size -= toUnreserve; 1012 } 1013 1014 return B_OK; 1015 } 1016 1017 // The commitment grows -- we have already tried to reserve swap space at 1018 // the start of the method, so we try to reserve real memory, now. 1019 1020 off_t toReserve = size - committed_size; 1021 if (vm_try_reserve_memory(toReserve, priority, 1000000) != B_OK) { 1022 dprintf("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "): Failed to " 1023 "reserve %" B_PRIdOFF " bytes of RAM\n", this, size, toReserve); 1024 return B_NO_MEMORY; 1025 } 1026 1027 committed_size = size; 1028 return B_OK; 1029 } 1030 1031 1032 void 1033 VMAnonymousCache::_MergePagesSmallerSource(VMAnonymousCache* source) 1034 { 1035 // The source cache has less pages than the consumer (this cache), so we 1036 // iterate through the source's pages and move the ones that are not 1037 // shadowed up to the consumer. 1038 1039 for (VMCachePagesTree::Iterator it = source->pages.GetIterator(); 1040 vm_page* page = it.Next();) { 1041 // Note: Removing the current node while iterating through a 1042 // IteratableSplayTree is safe. 1043 vm_page* consumerPage = LookupPage( 1044 (off_t)page->cache_offset << PAGE_SHIFT); 1045 if (consumerPage == NULL) { 1046 // the page is not yet in the consumer cache - move it upwards 1047 ASSERT_PRINT(!page->busy, "page: %p", page); 1048 MovePage(page); 1049 } 1050 } 1051 } 1052 1053 1054 void 1055 VMAnonymousCache::_MergePagesSmallerConsumer(VMAnonymousCache* source) 1056 { 1057 // The consumer (this cache) has less pages than the source, so we move the 1058 // consumer's pages to the source (freeing shadowed ones) and finally just 1059 // all pages of the source back to the consumer. 1060 1061 for (VMCachePagesTree::Iterator it = pages.GetIterator(); 1062 vm_page* page = it.Next();) { 1063 // If a source page is in the way, remove and free it. 1064 vm_page* sourcePage = source->LookupPage( 1065 (off_t)page->cache_offset << PAGE_SHIFT); 1066 if (sourcePage != NULL) { 1067 DEBUG_PAGE_ACCESS_START(sourcePage); 1068 ASSERT_PRINT(!sourcePage->busy, "page: %p", sourcePage); 1069 ASSERT_PRINT(sourcePage->WiredCount() == 0 1070 && sourcePage->mappings.IsEmpty(), 1071 "sourcePage: %p, page: %p", sourcePage, page); 1072 source->RemovePage(sourcePage); 1073 vm_page_free(source, sourcePage); 1074 } 1075 1076 // Note: Removing the current node while iterating through a 1077 // IteratableSplayTree is safe. 1078 source->MovePage(page); 1079 } 1080 1081 MoveAllPages(source); 1082 } 1083 1084 1085 void 1086 VMAnonymousCache::_MergeSwapPages(VMAnonymousCache* source) 1087 { 1088 // If neither source nor consumer have swap pages, we don't have to do 1089 // anything. 1090 if (source->fAllocatedSwapSize == 0 && fAllocatedSwapSize == 0) 1091 return; 1092 1093 for (off_t offset = source->virtual_base 1094 & ~(off_t)(B_PAGE_SIZE * SWAP_BLOCK_PAGES - 1); 1095 offset < source->virtual_end; 1096 offset += B_PAGE_SIZE * SWAP_BLOCK_PAGES) { 1097 1098 WriteLocker locker(sSwapHashLock); 1099 1100 off_t swapBlockPageIndex = offset >> PAGE_SHIFT; 1101 swap_hash_key key = { source, swapBlockPageIndex }; 1102 swap_block* sourceSwapBlock = sSwapHashTable.Lookup(key); 1103 1104 // remove the source swap block -- we will either take over the swap 1105 // space (and the block) or free it 1106 if (sourceSwapBlock != NULL) 1107 sSwapHashTable.RemoveUnchecked(sourceSwapBlock); 1108 1109 key.cache = this; 1110 swap_block* swapBlock = sSwapHashTable.Lookup(key); 1111 1112 locker.Unlock(); 1113 1114 // remove all source pages that are shadowed by consumer swap pages 1115 if (swapBlock != NULL) { 1116 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1117 if (swapBlock->swap_slots[i] != SWAP_SLOT_NONE) { 1118 vm_page* page = source->LookupPage( 1119 (off_t)(swapBlockPageIndex + i) << PAGE_SHIFT); 1120 if (page != NULL) { 1121 DEBUG_PAGE_ACCESS_START(page); 1122 ASSERT_PRINT(!page->busy, "page: %p", page); 1123 source->RemovePage(page); 1124 vm_page_free(source, page); 1125 } 1126 } 1127 } 1128 } 1129 1130 if (sourceSwapBlock == NULL) 1131 continue; 1132 1133 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1134 off_t pageIndex = swapBlockPageIndex + i; 1135 swap_addr_t sourceSlotIndex = sourceSwapBlock->swap_slots[i]; 1136 1137 if (sourceSlotIndex == SWAP_SLOT_NONE) 1138 continue; 1139 1140 if ((swapBlock != NULL 1141 && swapBlock->swap_slots[i] != SWAP_SLOT_NONE) 1142 || LookupPage((off_t)pageIndex << PAGE_SHIFT) != NULL) { 1143 // The consumer already has a page or a swapped out page 1144 // at this index. So we can free the source swap space. 1145 swap_slot_dealloc(sourceSlotIndex, 1); 1146 sourceSwapBlock->swap_slots[i] = SWAP_SLOT_NONE; 1147 sourceSwapBlock->used--; 1148 } 1149 1150 // We've either freed the source swap page or are going to move it 1151 // to the consumer. At any rate, the source cache doesn't own it 1152 // anymore. 1153 source->fAllocatedSwapSize -= B_PAGE_SIZE; 1154 } 1155 1156 // All source swap pages that have not been freed yet are taken over by 1157 // the consumer. 1158 fAllocatedSwapSize += B_PAGE_SIZE * (off_t)sourceSwapBlock->used; 1159 1160 if (sourceSwapBlock->used == 0) { 1161 // All swap pages have been freed -- we can discard the source swap 1162 // block. 1163 object_cache_free(sSwapBlockCache, sourceSwapBlock, 1164 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1165 } else if (swapBlock == NULL) { 1166 // We need to take over some of the source's swap pages and there's 1167 // no swap block in the consumer cache. Just take over the source 1168 // swap block. 1169 sourceSwapBlock->key.cache = this; 1170 locker.Lock(); 1171 sSwapHashTable.InsertUnchecked(sourceSwapBlock); 1172 locker.Unlock(); 1173 } else { 1174 // We need to take over some of the source's swap pages and there's 1175 // already a swap block in the consumer cache. Copy the respective 1176 // swap addresses and discard the source swap block. 1177 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1178 if (sourceSwapBlock->swap_slots[i] != SWAP_SLOT_NONE) 1179 swapBlock->swap_slots[i] = sourceSwapBlock->swap_slots[i]; 1180 } 1181 1182 object_cache_free(sSwapBlockCache, sourceSwapBlock, 1183 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1184 } 1185 } 1186 } 1187 1188 1189 // #pragma mark - 1190 1191 1192 // TODO: This can be removed if we get BFS uuid's 1193 struct VolumeInfo { 1194 char name[B_FILE_NAME_LENGTH]; 1195 char device[B_FILE_NAME_LENGTH]; 1196 char filesystem[B_OS_NAME_LENGTH]; 1197 off_t capacity; 1198 }; 1199 1200 1201 class PartitionScorer : public KPartitionVisitor { 1202 public: 1203 PartitionScorer(VolumeInfo& volumeInfo) 1204 : 1205 fBestPartition(NULL), 1206 fBestScore(-1), 1207 fVolumeInfo(volumeInfo) 1208 { 1209 } 1210 1211 virtual bool VisitPre(KPartition* partition) 1212 { 1213 if (!partition->ContainsFileSystem()) 1214 return false; 1215 1216 KPath path; 1217 partition->GetPath(&path); 1218 1219 int score = 0; 1220 if (strcmp(fVolumeInfo.name, partition->ContentName()) == 0) 1221 score += 4; 1222 if (strcmp(fVolumeInfo.device, path.Path()) == 0) 1223 score += 3; 1224 if (fVolumeInfo.capacity == partition->Size()) 1225 score += 2; 1226 if (strcmp(fVolumeInfo.filesystem, 1227 partition->DiskSystem()->ShortName()) == 0) { 1228 score += 1; 1229 } 1230 if (score >= 4 && score > fBestScore) { 1231 fBestPartition = partition; 1232 fBestScore = score; 1233 } 1234 1235 return false; 1236 } 1237 1238 KPartition* fBestPartition; 1239 1240 private: 1241 int32 fBestScore; 1242 VolumeInfo& fVolumeInfo; 1243 }; 1244 1245 1246 status_t 1247 swap_file_add(const char* path) 1248 { 1249 // open the file 1250 int fd = open(path, O_RDWR | O_NOCACHE, S_IRUSR | S_IWUSR); 1251 if (fd < 0) 1252 return errno; 1253 1254 // fstat() it and check whether we can use it 1255 struct stat st; 1256 if (fstat(fd, &st) < 0) { 1257 close(fd); 1258 return errno; 1259 } 1260 1261 if (!(S_ISREG(st.st_mode) || S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) { 1262 close(fd); 1263 return B_BAD_VALUE; 1264 } 1265 1266 if (st.st_size < B_PAGE_SIZE) { 1267 close(fd); 1268 return B_BAD_VALUE; 1269 } 1270 1271 // get file descriptor, vnode, and cookie 1272 file_descriptor* descriptor = get_fd(get_current_io_context(true), fd); 1273 put_fd(descriptor); 1274 1275 vnode* node = fd_vnode(descriptor); 1276 if (node == NULL) { 1277 close(fd); 1278 return B_BAD_VALUE; 1279 } 1280 1281 // do the allocations and prepare the swap_file structure 1282 swap_file* swap = (swap_file*)malloc(sizeof(swap_file)); 1283 if (swap == NULL) { 1284 close(fd); 1285 return B_NO_MEMORY; 1286 } 1287 1288 swap->fd = fd; 1289 swap->vnode = node; 1290 swap->cookie = descriptor->cookie; 1291 1292 uint32 pageCount = st.st_size >> PAGE_SHIFT; 1293 swap->bmp = radix_bitmap_create(pageCount); 1294 if (swap->bmp == NULL) { 1295 free(swap); 1296 close(fd); 1297 return B_NO_MEMORY; 1298 } 1299 1300 // set slot index and add this file to swap file list 1301 mutex_lock(&sSwapFileListLock); 1302 // TODO: Also check whether the swap file is already registered! 1303 if (sSwapFileList.IsEmpty()) { 1304 swap->first_slot = 0; 1305 swap->last_slot = pageCount; 1306 } else { 1307 // leave one page gap between two swap files 1308 swap->first_slot = sSwapFileList.Last()->last_slot + 1; 1309 swap->last_slot = swap->first_slot + pageCount; 1310 } 1311 sSwapFileList.Add(swap); 1312 sSwapFileCount++; 1313 mutex_unlock(&sSwapFileListLock); 1314 1315 mutex_lock(&sAvailSwapSpaceLock); 1316 sAvailSwapSpace += (off_t)pageCount * B_PAGE_SIZE; 1317 mutex_unlock(&sAvailSwapSpaceLock); 1318 1319 return B_OK; 1320 } 1321 1322 1323 status_t 1324 swap_file_delete(const char* path) 1325 { 1326 vnode* node = NULL; 1327 status_t status = vfs_get_vnode_from_path(path, true, &node); 1328 if (status != B_OK) 1329 return status; 1330 1331 MutexLocker locker(sSwapFileListLock); 1332 1333 swap_file* swapFile = NULL; 1334 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1335 (swapFile = it.Next()) != NULL;) { 1336 if (swapFile->vnode == node) 1337 break; 1338 } 1339 1340 vfs_put_vnode(node); 1341 1342 if (swapFile == NULL) 1343 return B_ERROR; 1344 1345 // if this file is currently used, we can't delete 1346 // TODO: mark this swap file deleting, and remove it after releasing 1347 // all the swap space 1348 if (swapFile->bmp->free_slots < swapFile->last_slot - swapFile->first_slot) 1349 return B_ERROR; 1350 1351 sSwapFileList.Remove(swapFile); 1352 sSwapFileCount--; 1353 locker.Unlock(); 1354 1355 mutex_lock(&sAvailSwapSpaceLock); 1356 sAvailSwapSpace -= (off_t)(swapFile->last_slot - swapFile->first_slot) 1357 * B_PAGE_SIZE; 1358 mutex_unlock(&sAvailSwapSpaceLock); 1359 1360 close(swapFile->fd); 1361 radix_bitmap_destroy(swapFile->bmp); 1362 free(swapFile); 1363 1364 return B_OK; 1365 } 1366 1367 1368 void 1369 swap_init(void) 1370 { 1371 // create swap block cache 1372 sSwapBlockCache = create_object_cache("swapblock", sizeof(swap_block), 1373 sizeof(void*), NULL, NULL, NULL); 1374 if (sSwapBlockCache == NULL) 1375 panic("swap_init(): can't create object cache for swap blocks\n"); 1376 1377 status_t error = object_cache_set_minimum_reserve(sSwapBlockCache, 1378 MIN_SWAP_BLOCK_RESERVE); 1379 if (error != B_OK) { 1380 panic("swap_init(): object_cache_set_minimum_reserve() failed: %s", 1381 strerror(error)); 1382 } 1383 1384 // init swap hash table 1385 sSwapHashTable.Init(INITIAL_SWAP_HASH_SIZE); 1386 rw_lock_init(&sSwapHashLock, "swaphash"); 1387 1388 error = register_resource_resizer(swap_hash_resizer, NULL, 1389 SWAP_HASH_RESIZE_INTERVAL); 1390 if (error != B_OK) { 1391 panic("swap_init(): Failed to register swap hash resizer: %s", 1392 strerror(error)); 1393 } 1394 1395 // init swap file list 1396 mutex_init(&sSwapFileListLock, "swaplist"); 1397 sSwapFileAlloc = NULL; 1398 sSwapFileCount = 0; 1399 1400 // init available swap space 1401 mutex_init(&sAvailSwapSpaceLock, "avail swap space"); 1402 sAvailSwapSpace = 0; 1403 1404 add_debugger_command_etc("swap", &dump_swap_info, 1405 "Print infos about the swap usage", 1406 "\n" 1407 "Print infos about the swap usage.\n", 0); 1408 } 1409 1410 1411 void 1412 swap_init_post_modules() 1413 { 1414 // Never try to create a swap file on a read-only device - when booting 1415 // from CD, the write overlay is used. 1416 if (gReadOnlyBootDevice) 1417 return; 1418 1419 bool swapEnabled = true; 1420 bool swapAutomatic = true; 1421 off_t swapSize = 0; 1422 1423 dev_t swapDeviceID = -1; 1424 VolumeInfo selectedVolume = {}; 1425 1426 void* settings = load_driver_settings("virtual_memory"); 1427 1428 if (settings != NULL) { 1429 // We pass a lot of information on the swap device, this is mostly to 1430 // ensure that we are dealing with the same device that was configured. 1431 1432 // TODO: Some kind of BFS uuid would be great here :) 1433 const char* enabled = get_driver_parameter(settings, "vm", NULL, NULL); 1434 1435 if (enabled != NULL) { 1436 swapEnabled = get_driver_boolean_parameter(settings, "vm", 1437 true, false); 1438 swapAutomatic = get_driver_boolean_parameter(settings, "swap_auto", 1439 true, false); 1440 1441 if (swapEnabled && !swapAutomatic) { 1442 const char* size = get_driver_parameter(settings, "swap_size", 1443 NULL, NULL); 1444 const char* volume = get_driver_parameter(settings, 1445 "swap_volume_name", NULL, NULL); 1446 const char* device = get_driver_parameter(settings, 1447 "swap_volume_device", NULL, NULL); 1448 const char* filesystem = get_driver_parameter(settings, 1449 "swap_volume_filesystem", NULL, NULL); 1450 const char* capacity = get_driver_parameter(settings, 1451 "swap_volume_capacity", NULL, NULL); 1452 1453 if (size != NULL && device != NULL && volume != NULL 1454 && filesystem != NULL && capacity != NULL) { 1455 // User specified a size / volume that seems valid 1456 swapAutomatic = false; 1457 swapSize = atoll(size); 1458 strlcpy(selectedVolume.name, volume, 1459 sizeof(selectedVolume.name)); 1460 strlcpy(selectedVolume.device, device, 1461 sizeof(selectedVolume.device)); 1462 strlcpy(selectedVolume.filesystem, filesystem, 1463 sizeof(selectedVolume.filesystem)); 1464 selectedVolume.capacity = atoll(capacity); 1465 } else { 1466 // Something isn't right with swap config, go auto 1467 swapAutomatic = true; 1468 dprintf("%s: virtual_memory configuration is invalid, " 1469 "using automatic swap\n", __func__); 1470 } 1471 } 1472 } 1473 unload_driver_settings(settings); 1474 } 1475 1476 if (swapAutomatic) { 1477 swapSize = (off_t)vm_page_num_pages() * B_PAGE_SIZE; 1478 if (swapSize <= (1024 * 1024 * 1024)) { 1479 // Memory under 1GB? double the swap 1480 swapSize *= 2; 1481 } 1482 // Automatic swap defaults to the boot device 1483 swapDeviceID = gBootDevice; 1484 } 1485 1486 if (!swapEnabled || swapSize < B_PAGE_SIZE) { 1487 dprintf("%s: virtual_memory is disabled\n", __func__); 1488 return; 1489 } 1490 1491 if (!swapAutomatic && swapDeviceID < 0) { 1492 // If user-specified swap, and no swap device has been chosen yet... 1493 KDiskDeviceManager::CreateDefault(); 1494 KDiskDeviceManager* manager = KDiskDeviceManager::Default(); 1495 PartitionScorer visitor(selectedVolume); 1496 1497 KDiskDevice* device; 1498 int32 cookie = 0; 1499 while ((device = manager->NextDevice(&cookie)) != NULL) { 1500 if (device->IsReadOnlyMedia() || device->IsWriteOnce() 1501 || device->IsRemovable()) { 1502 continue; 1503 } 1504 device->VisitEachDescendant(&visitor); 1505 } 1506 1507 if (!visitor.fBestPartition) { 1508 dprintf("%s: Can't find configured swap partition '%s'\n", 1509 __func__, selectedVolume.name); 1510 } else { 1511 if (visitor.fBestPartition->IsMounted()) 1512 swapDeviceID = visitor.fBestPartition->VolumeID(); 1513 else { 1514 KPath devPath, mountPoint; 1515 visitor.fBestPartition->GetPath(&devPath); 1516 get_mount_point(visitor.fBestPartition, &mountPoint); 1517 const char* mountPath = mountPoint.Path(); 1518 mkdir(mountPath, S_IRWXU | S_IRWXG | S_IRWXO); 1519 swapDeviceID = _kern_mount(mountPath, devPath.Path(), 1520 NULL, 0, NULL, 0); 1521 if (swapDeviceID < 0) { 1522 dprintf("%s: Can't mount configured swap partition '%s'\n", 1523 __func__, selectedVolume.name); 1524 } 1525 } 1526 } 1527 } 1528 1529 if (swapDeviceID < 0) 1530 swapDeviceID = gBootDevice; 1531 1532 // We now have a swapDeviceID which is used for the swap file 1533 1534 KPath path; 1535 struct fs_info info; 1536 _kern_read_fs_info(swapDeviceID, &info); 1537 if (swapDeviceID == gBootDevice) 1538 path = kDefaultSwapPath; 1539 else { 1540 vfs_entry_ref_to_path(info.dev, info.root, ".", true, path.LockBuffer(), 1541 path.BufferSize()); 1542 path.UnlockBuffer(); 1543 path.Append("swap"); 1544 } 1545 1546 const char* swapPath = path.Path(); 1547 1548 // Swap size limits prevent oversized swap files 1549 if (swapAutomatic) { 1550 off_t existingSwapSize = 0; 1551 struct stat existingSwapStat; 1552 if (stat(swapPath, &existingSwapStat) == 0) 1553 existingSwapSize = existingSwapStat.st_size; 1554 1555 off_t freeSpace = info.free_blocks * info.block_size + existingSwapSize; 1556 1557 // Adjust automatic swap to a maximum of 25% of the free space 1558 if (swapSize > (freeSpace / 4)) 1559 swapSize = (freeSpace / 4); 1560 } 1561 1562 // Create swap file 1563 int fd = open(swapPath, O_RDWR | O_CREAT | O_NOCACHE, S_IRUSR | S_IWUSR); 1564 if (fd < 0) { 1565 dprintf("%s: Can't open/create %s: %s\n", __func__, 1566 swapPath, strerror(errno)); 1567 return; 1568 } 1569 1570 struct stat stat; 1571 stat.st_size = swapSize; 1572 status_t error = _kern_write_stat(fd, NULL, false, &stat, 1573 sizeof(struct stat), B_STAT_SIZE | B_STAT_SIZE_INSECURE); 1574 if (error != B_OK) { 1575 dprintf("%s: Failed to resize %s to %" B_PRIdOFF " bytes: %s\n", 1576 __func__, swapPath, swapSize, strerror(error)); 1577 } 1578 1579 close(fd); 1580 1581 error = swap_file_add(swapPath); 1582 if (error != B_OK) { 1583 dprintf("%s: Failed to add swap file %s: %s\n", __func__, swapPath, 1584 strerror(error)); 1585 } 1586 } 1587 1588 1589 //! Used by page daemon to free swap space. 1590 bool 1591 swap_free_page_swap_space(vm_page* page) 1592 { 1593 VMAnonymousCache* cache = dynamic_cast<VMAnonymousCache*>(page->Cache()); 1594 if (cache == NULL) 1595 return false; 1596 1597 swap_addr_t slotIndex = cache->_SwapBlockGetAddress(page->cache_offset); 1598 if (slotIndex == SWAP_SLOT_NONE) 1599 return false; 1600 1601 swap_slot_dealloc(slotIndex, 1); 1602 cache->fAllocatedSwapSize -= B_PAGE_SIZE; 1603 cache->_SwapBlockFree(page->cache_offset, 1); 1604 1605 return true; 1606 } 1607 1608 1609 uint32 1610 swap_available_pages() 1611 { 1612 mutex_lock(&sAvailSwapSpaceLock); 1613 uint32 avail = sAvailSwapSpace >> PAGE_SHIFT; 1614 mutex_unlock(&sAvailSwapSpaceLock); 1615 1616 return avail; 1617 } 1618 1619 1620 uint32 1621 swap_total_swap_pages() 1622 { 1623 mutex_lock(&sSwapFileListLock); 1624 1625 uint32 totalSwapSlots = 0; 1626 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1627 swap_file* swapFile = it.Next();) { 1628 totalSwapSlots += swapFile->last_slot - swapFile->first_slot; 1629 } 1630 1631 mutex_unlock(&sSwapFileListLock); 1632 1633 return totalSwapSlots; 1634 } 1635 1636 1637 #endif // ENABLE_SWAP_SUPPORT 1638 1639 1640 void 1641 swap_get_info(system_info* info) 1642 { 1643 #if ENABLE_SWAP_SUPPORT 1644 info->max_swap_pages = swap_total_swap_pages(); 1645 info->free_swap_pages = swap_available_pages(); 1646 #else 1647 info->max_swap_space = 0; 1648 info->free_swap_space = 0; 1649 #endif 1650 } 1651 1652