1 /* 2 * Copyright 2008, Zhao Shuai, upczhsh@163.com. 3 * Copyright 2008-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 4 * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de. 5 * Distributed under the terms of the MIT License. 6 * 7 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 8 * Distributed under the terms of the NewOS License. 9 * 10 * Copyright 2011-2012 Haiku, Inc. All rights reserved. 11 * Distributed under the terms of the MIT License. 12 * 13 * Authors: 14 * Hamish Morrison, hamish@lavabit.com 15 * Alexander von Gluck IV, kallisti5@unixzen.com 16 */ 17 18 19 #include "VMAnonymousCache.h" 20 21 #include <errno.h> 22 #include <fcntl.h> 23 #include <stdlib.h> 24 #include <string.h> 25 #include <unistd.h> 26 27 #include <FindDirectory.h> 28 #include <KernelExport.h> 29 #include <NodeMonitor.h> 30 31 #include <arch_config.h> 32 #include <boot_device.h> 33 #include <disk_device_manager/KDiskDevice.h> 34 #include <disk_device_manager/KDiskDeviceManager.h> 35 #include <disk_device_manager/KDiskSystem.h> 36 #include <disk_device_manager/KPartitionVisitor.h> 37 #include <driver_settings.h> 38 #include <fs/fd.h> 39 #include <fs/KPath.h> 40 #include <fs_info.h> 41 #include <fs_interface.h> 42 #include <heap.h> 43 #include <kernel_daemon.h> 44 #include <slab/Slab.h> 45 #include <syscalls.h> 46 #include <system_info.h> 47 #include <tracing.h> 48 #include <util/AutoLock.h> 49 #include <util/DoublyLinkedList.h> 50 #include <util/OpenHashTable.h> 51 #include <util/RadixBitmap.h> 52 #include <vfs.h> 53 #include <vm/vm.h> 54 #include <vm/vm_page.h> 55 #include <vm/vm_priv.h> 56 #include <vm/VMAddressSpace.h> 57 58 #include "IORequest.h" 59 60 61 #if ENABLE_SWAP_SUPPORT 62 63 //#define TRACE_VM_ANONYMOUS_CACHE 64 #ifdef TRACE_VM_ANONYMOUS_CACHE 65 # define TRACE(x...) dprintf(x) 66 #else 67 # define TRACE(x...) do { } while (false) 68 #endif 69 70 71 // number of free swap blocks the object cache shall minimally have 72 #define MIN_SWAP_BLOCK_RESERVE 4096 73 74 // interval the has resizer is triggered (in 0.1s) 75 #define SWAP_HASH_RESIZE_INTERVAL 5 76 77 #define INITIAL_SWAP_HASH_SIZE 1024 78 79 #define SWAP_SLOT_NONE RADIX_SLOT_NONE 80 81 #define SWAP_BLOCK_PAGES 32 82 #define SWAP_BLOCK_SHIFT 5 /* 1 << SWAP_BLOCK_SHIFT == SWAP_BLOCK_PAGES */ 83 #define SWAP_BLOCK_MASK (SWAP_BLOCK_PAGES - 1) 84 85 86 static const char* const kDefaultSwapPath = "/var/swap"; 87 88 struct swap_file : DoublyLinkedListLinkImpl<swap_file> { 89 int fd; 90 struct vnode* vnode; 91 void* cookie; 92 swap_addr_t first_slot; 93 swap_addr_t last_slot; 94 radix_bitmap* bmp; 95 }; 96 97 struct swap_hash_key { 98 VMAnonymousCache *cache; 99 off_t page_index; // page index in the cache 100 }; 101 102 // Each swap block contains swap address information for 103 // SWAP_BLOCK_PAGES continuous pages from the same cache 104 struct swap_block { 105 swap_block* hash_link; 106 swap_hash_key key; 107 uint32 used; 108 swap_addr_t swap_slots[SWAP_BLOCK_PAGES]; 109 }; 110 111 struct SwapHashTableDefinition { 112 typedef swap_hash_key KeyType; 113 typedef swap_block ValueType; 114 115 SwapHashTableDefinition() {} 116 117 size_t HashKey(const swap_hash_key& key) const 118 { 119 off_t blockIndex = key.page_index >> SWAP_BLOCK_SHIFT; 120 VMAnonymousCache* cache = key.cache; 121 return blockIndex ^ (size_t)(int*)cache; 122 } 123 124 size_t Hash(const swap_block* value) const 125 { 126 return HashKey(value->key); 127 } 128 129 bool Compare(const swap_hash_key& key, const swap_block* value) const 130 { 131 return (key.page_index & ~(off_t)SWAP_BLOCK_MASK) 132 == (value->key.page_index & ~(off_t)SWAP_BLOCK_MASK) 133 && key.cache == value->key.cache; 134 } 135 136 swap_block*& GetLink(swap_block* value) const 137 { 138 return value->hash_link; 139 } 140 }; 141 142 typedef BOpenHashTable<SwapHashTableDefinition> SwapHashTable; 143 typedef DoublyLinkedList<swap_file> SwapFileList; 144 145 static SwapHashTable sSwapHashTable; 146 static rw_lock sSwapHashLock; 147 148 static SwapFileList sSwapFileList; 149 static mutex sSwapFileListLock; 150 static swap_file* sSwapFileAlloc = NULL; // allocate from here 151 static uint32 sSwapFileCount = 0; 152 153 static off_t sAvailSwapSpace = 0; 154 static mutex sAvailSwapSpaceLock; 155 156 static object_cache* sSwapBlockCache; 157 158 159 #if SWAP_TRACING 160 namespace SwapTracing { 161 162 class SwapTraceEntry : public AbstractTraceEntry { 163 public: 164 SwapTraceEntry(VMAnonymousCache* cache) 165 : 166 fCache(cache) 167 { 168 } 169 170 protected: 171 VMAnonymousCache* fCache; 172 }; 173 174 175 class ReadPage : public SwapTraceEntry { 176 public: 177 ReadPage(VMAnonymousCache* cache, page_num_t pageIndex, 178 swap_addr_t swapSlotIndex) 179 : 180 SwapTraceEntry(cache), 181 fPageIndex(pageIndex), 182 fSwapSlotIndex(swapSlotIndex) 183 { 184 Initialized(); 185 } 186 187 virtual void AddDump(TraceOutput& out) 188 { 189 out.Print("swap read: cache %p, page index: %lu <- swap slot: %lu", 190 fCache, fPageIndex, fSwapSlotIndex); 191 } 192 193 private: 194 page_num_t fPageIndex; 195 swap_addr_t fSwapSlotIndex; 196 }; 197 198 199 class WritePage : public SwapTraceEntry { 200 public: 201 WritePage(VMAnonymousCache* cache, page_num_t pageIndex, 202 swap_addr_t swapSlotIndex) 203 : 204 SwapTraceEntry(cache), 205 fPageIndex(pageIndex), 206 fSwapSlotIndex(swapSlotIndex) 207 { 208 Initialized(); 209 } 210 211 virtual void AddDump(TraceOutput& out) 212 { 213 out.Print("swap write: cache %p, page index: %lu -> swap slot: %lu", 214 fCache, fPageIndex, fSwapSlotIndex); 215 } 216 217 private: 218 page_num_t fPageIndex; 219 swap_addr_t fSwapSlotIndex; 220 }; 221 222 } // namespace SwapTracing 223 224 # define T(x) new(std::nothrow) SwapTracing::x; 225 #else 226 # define T(x) ; 227 #endif 228 229 230 static int 231 dump_swap_info(int argc, char** argv) 232 { 233 swap_addr_t totalSwapPages = 0; 234 swap_addr_t freeSwapPages = 0; 235 236 kprintf("swap files:\n"); 237 238 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 239 swap_file* file = it.Next();) { 240 swap_addr_t total = file->last_slot - file->first_slot; 241 kprintf(" vnode: %p, pages: total: %" B_PRIu32 ", free: %" B_PRIu32 242 "\n", file->vnode, total, file->bmp->free_slots); 243 244 totalSwapPages += total; 245 freeSwapPages += file->bmp->free_slots; 246 } 247 248 kprintf("\n"); 249 kprintf("swap space in pages:\n"); 250 kprintf("total: %9" B_PRIu32 "\n", totalSwapPages); 251 kprintf("available: %9" B_PRIdOFF "\n", sAvailSwapSpace / B_PAGE_SIZE); 252 kprintf("reserved: %9" B_PRIdOFF "\n", 253 totalSwapPages - sAvailSwapSpace / B_PAGE_SIZE); 254 kprintf("used: %9" B_PRIu32 "\n", totalSwapPages - freeSwapPages); 255 kprintf("free: %9" B_PRIu32 "\n", freeSwapPages); 256 257 return 0; 258 } 259 260 261 static swap_addr_t 262 swap_slot_alloc(uint32 count) 263 { 264 mutex_lock(&sSwapFileListLock); 265 266 if (sSwapFileList.IsEmpty()) { 267 mutex_unlock(&sSwapFileListLock); 268 panic("swap_slot_alloc(): no swap file in the system\n"); 269 return SWAP_SLOT_NONE; 270 } 271 272 // since radix bitmap could not handle more than 32 pages, we return 273 // SWAP_SLOT_NONE, this forces Write() adjust allocation amount 274 if (count > BITMAP_RADIX) { 275 mutex_unlock(&sSwapFileListLock); 276 return SWAP_SLOT_NONE; 277 } 278 279 swap_addr_t j, addr = SWAP_SLOT_NONE; 280 for (j = 0; j < sSwapFileCount; j++) { 281 if (sSwapFileAlloc == NULL) 282 sSwapFileAlloc = sSwapFileList.First(); 283 284 addr = radix_bitmap_alloc(sSwapFileAlloc->bmp, count); 285 if (addr != SWAP_SLOT_NONE) { 286 addr += sSwapFileAlloc->first_slot; 287 break; 288 } 289 290 // this swap_file is full, find another 291 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 292 } 293 294 if (j == sSwapFileCount) { 295 mutex_unlock(&sSwapFileListLock); 296 panic("swap_slot_alloc: swap space exhausted!\n"); 297 return SWAP_SLOT_NONE; 298 } 299 300 // if this swap file has used more than 90% percent of its space 301 // switch to another 302 if (sSwapFileAlloc->bmp->free_slots 303 < (sSwapFileAlloc->last_slot - sSwapFileAlloc->first_slot) / 10) { 304 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 305 } 306 307 mutex_unlock(&sSwapFileListLock); 308 309 return addr; 310 } 311 312 313 static swap_file* 314 find_swap_file(swap_addr_t slotIndex) 315 { 316 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 317 swap_file* swapFile = it.Next();) { 318 if (slotIndex >= swapFile->first_slot 319 && slotIndex < swapFile->last_slot) { 320 return swapFile; 321 } 322 } 323 324 panic("find_swap_file(): can't find swap file for slot %" B_PRIu32 "\n", 325 slotIndex); 326 return NULL; 327 } 328 329 330 static void 331 swap_slot_dealloc(swap_addr_t slotIndex, uint32 count) 332 { 333 if (slotIndex == SWAP_SLOT_NONE) 334 return; 335 336 mutex_lock(&sSwapFileListLock); 337 swap_file* swapFile = find_swap_file(slotIndex); 338 slotIndex -= swapFile->first_slot; 339 radix_bitmap_dealloc(swapFile->bmp, slotIndex, count); 340 mutex_unlock(&sSwapFileListLock); 341 } 342 343 344 static off_t 345 swap_space_reserve(off_t amount) 346 { 347 mutex_lock(&sAvailSwapSpaceLock); 348 if (sAvailSwapSpace >= amount) 349 sAvailSwapSpace -= amount; 350 else { 351 amount = sAvailSwapSpace; 352 sAvailSwapSpace = 0; 353 } 354 mutex_unlock(&sAvailSwapSpaceLock); 355 356 return amount; 357 } 358 359 360 static void 361 swap_space_unreserve(off_t amount) 362 { 363 mutex_lock(&sAvailSwapSpaceLock); 364 sAvailSwapSpace += amount; 365 mutex_unlock(&sAvailSwapSpaceLock); 366 } 367 368 369 static void 370 swap_hash_resizer(void*, int) 371 { 372 WriteLocker locker(sSwapHashLock); 373 374 size_t size; 375 void* allocation; 376 377 do { 378 size = sSwapHashTable.ResizeNeeded(); 379 if (size == 0) 380 return; 381 382 locker.Unlock(); 383 384 allocation = malloc(size); 385 if (allocation == NULL) 386 return; 387 388 locker.Lock(); 389 390 } while (!sSwapHashTable.Resize(allocation, size)); 391 } 392 393 394 // #pragma mark - 395 396 397 class VMAnonymousCache::WriteCallback : public StackableAsyncIOCallback { 398 public: 399 WriteCallback(VMAnonymousCache* cache, AsyncIOCallback* callback) 400 : 401 StackableAsyncIOCallback(callback), 402 fCache(cache) 403 { 404 } 405 406 void SetTo(page_num_t pageIndex, swap_addr_t slotIndex, bool newSlot) 407 { 408 fPageIndex = pageIndex; 409 fSlotIndex = slotIndex; 410 fNewSlot = newSlot; 411 } 412 413 virtual void IOFinished(status_t status, bool partialTransfer, 414 generic_size_t bytesTransferred) 415 { 416 if (fNewSlot) { 417 if (status == B_OK) { 418 fCache->_SwapBlockBuild(fPageIndex, fSlotIndex, 1); 419 } else { 420 AutoLocker<VMCache> locker(fCache); 421 fCache->fAllocatedSwapSize -= B_PAGE_SIZE; 422 locker.Unlock(); 423 424 swap_slot_dealloc(fSlotIndex, 1); 425 } 426 } 427 428 fNextCallback->IOFinished(status, partialTransfer, bytesTransferred); 429 430 delete this; 431 } 432 433 private: 434 VMAnonymousCache* fCache; 435 page_num_t fPageIndex; 436 swap_addr_t fSlotIndex; 437 bool fNewSlot; 438 }; 439 440 441 // #pragma mark - 442 443 444 VMAnonymousCache::~VMAnonymousCache() 445 { 446 // free allocated swap space and swap block 447 for (off_t offset = virtual_base, toFree = fAllocatedSwapSize; 448 offset < virtual_end && toFree > 0; offset += B_PAGE_SIZE) { 449 swap_addr_t slotIndex = _SwapBlockGetAddress(offset >> PAGE_SHIFT); 450 if (slotIndex == SWAP_SLOT_NONE) 451 continue; 452 453 swap_slot_dealloc(slotIndex, 1); 454 _SwapBlockFree(offset >> PAGE_SHIFT, 1); 455 toFree -= B_PAGE_SIZE; 456 } 457 458 swap_space_unreserve(fCommittedSwapSize); 459 if (committed_size > fCommittedSwapSize) 460 vm_unreserve_memory(committed_size - fCommittedSwapSize); 461 } 462 463 464 status_t 465 VMAnonymousCache::Init(bool canOvercommit, int32 numPrecommittedPages, 466 int32 numGuardPages, uint32 allocationFlags) 467 { 468 TRACE("%p->VMAnonymousCache::Init(canOvercommit = %s, " 469 "numPrecommittedPages = %" B_PRId32 ", numGuardPages = %" B_PRId32 470 ")\n", this, canOvercommit ? "yes" : "no", numPrecommittedPages, 471 numGuardPages); 472 473 status_t error = VMCache::Init(CACHE_TYPE_RAM, allocationFlags); 474 if (error != B_OK) 475 return error; 476 477 fCanOvercommit = canOvercommit; 478 fHasPrecommitted = false; 479 fPrecommittedPages = min_c(numPrecommittedPages, 255); 480 fGuardedSize = numGuardPages * B_PAGE_SIZE; 481 fCommittedSwapSize = 0; 482 fAllocatedSwapSize = 0; 483 484 return B_OK; 485 } 486 487 488 status_t 489 VMAnonymousCache::Resize(off_t newSize, int priority) 490 { 491 // If the cache size shrinks, drop all swap pages beyond the new size. 492 if (fAllocatedSwapSize > 0) { 493 off_t oldPageCount = (virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 494 swap_block* swapBlock = NULL; 495 496 for (off_t pageIndex = (newSize + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 497 pageIndex < oldPageCount && fAllocatedSwapSize > 0; pageIndex++) { 498 499 WriteLocker locker(sSwapHashLock); 500 501 // Get the swap slot index for the page. 502 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 503 if (swapBlock == NULL || blockIndex == 0) { 504 swap_hash_key key = { this, pageIndex }; 505 swapBlock = sSwapHashTable.Lookup(key); 506 507 if (swapBlock == NULL) { 508 pageIndex = ROUNDUP(pageIndex + 1, SWAP_BLOCK_PAGES); 509 continue; 510 } 511 } 512 513 swap_addr_t slotIndex = swapBlock->swap_slots[blockIndex]; 514 vm_page* page; 515 if (slotIndex != SWAP_SLOT_NONE 516 && ((page = LookupPage((off_t)pageIndex * B_PAGE_SIZE)) == NULL 517 || !page->busy)) { 518 // TODO: We skip (i.e. leak) swap space of busy pages, since 519 // there could be I/O going on (paging in/out). Waiting is 520 // not an option as 1. unlocking the cache means that new 521 // swap pages could be added in a range we've already 522 // cleared (since the cache still has the old size) and 2. 523 // we'd risk a deadlock in case we come from the file cache 524 // and the FS holds the node's write-lock. We should mark 525 // the page invalid and let the one responsible clean up. 526 // There's just no such mechanism yet. 527 swap_slot_dealloc(slotIndex, 1); 528 fAllocatedSwapSize -= B_PAGE_SIZE; 529 530 swapBlock->swap_slots[blockIndex] = SWAP_SLOT_NONE; 531 if (--swapBlock->used == 0) { 532 // All swap pages have been freed -- we can discard the swap 533 // block. 534 sSwapHashTable.RemoveUnchecked(swapBlock); 535 object_cache_free(sSwapBlockCache, swapBlock, 536 CACHE_DONT_WAIT_FOR_MEMORY 537 | CACHE_DONT_LOCK_KERNEL_SPACE); 538 } 539 } 540 } 541 } 542 543 return VMCache::Resize(newSize, priority); 544 } 545 546 547 status_t 548 VMAnonymousCache::Commit(off_t size, int priority) 549 { 550 TRACE("%p->VMAnonymousCache::Commit(%" B_PRIdOFF ")\n", this, size); 551 552 // If we can overcommit, we don't commit here, but in Fault(). We always 553 // unreserve memory, if we're asked to shrink our commitment, though. 554 if (fCanOvercommit && size > committed_size) { 555 if (fHasPrecommitted) 556 return B_OK; 557 558 // pre-commit some pages to make a later failure less probable 559 fHasPrecommitted = true; 560 uint32 precommitted = fPrecommittedPages * B_PAGE_SIZE; 561 if (size > precommitted) 562 size = precommitted; 563 } 564 565 return _Commit(size, priority); 566 } 567 568 569 bool 570 VMAnonymousCache::HasPage(off_t offset) 571 { 572 if (_SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE) 573 return true; 574 575 return false; 576 } 577 578 579 bool 580 VMAnonymousCache::DebugHasPage(off_t offset) 581 { 582 off_t pageIndex = offset >> PAGE_SHIFT; 583 swap_hash_key key = { this, pageIndex }; 584 swap_block* swap = sSwapHashTable.Lookup(key); 585 if (swap == NULL) 586 return false; 587 588 return swap->swap_slots[pageIndex & SWAP_BLOCK_MASK] != SWAP_SLOT_NONE; 589 } 590 591 592 status_t 593 VMAnonymousCache::Read(off_t offset, const generic_io_vec* vecs, size_t count, 594 uint32 flags, generic_size_t* _numBytes) 595 { 596 off_t pageIndex = offset >> PAGE_SHIFT; 597 598 for (uint32 i = 0, j = 0; i < count; i = j) { 599 swap_addr_t startSlotIndex = _SwapBlockGetAddress(pageIndex + i); 600 for (j = i + 1; j < count; j++) { 601 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + j); 602 if (slotIndex != startSlotIndex + j - i) 603 break; 604 } 605 606 T(ReadPage(this, pageIndex, startSlotIndex)); 607 // TODO: Assumes that only one page is read. 608 609 swap_file* swapFile = find_swap_file(startSlotIndex); 610 611 off_t pos = (off_t)(startSlotIndex - swapFile->first_slot) 612 * B_PAGE_SIZE; 613 614 status_t status = vfs_read_pages(swapFile->vnode, swapFile->cookie, pos, 615 vecs + i, j - i, flags, _numBytes); 616 if (status != B_OK) 617 return status; 618 } 619 620 return B_OK; 621 } 622 623 624 status_t 625 VMAnonymousCache::Write(off_t offset, const generic_io_vec* vecs, size_t count, 626 uint32 flags, generic_size_t* _numBytes) 627 { 628 off_t pageIndex = offset >> PAGE_SHIFT; 629 630 AutoLocker<VMCache> locker(this); 631 632 page_num_t totalPages = 0; 633 for (uint32 i = 0; i < count; i++) { 634 page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 635 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + totalPages); 636 if (slotIndex != SWAP_SLOT_NONE) { 637 swap_slot_dealloc(slotIndex, pageCount); 638 _SwapBlockFree(pageIndex + totalPages, pageCount); 639 fAllocatedSwapSize -= pageCount * B_PAGE_SIZE; 640 } 641 642 totalPages += pageCount; 643 } 644 645 off_t totalSize = totalPages * B_PAGE_SIZE; 646 if (fAllocatedSwapSize + totalSize > fCommittedSwapSize) 647 return B_ERROR; 648 649 fAllocatedSwapSize += totalSize; 650 locker.Unlock(); 651 652 page_num_t pagesLeft = totalPages; 653 totalPages = 0; 654 655 for (uint32 i = 0; i < count; i++) { 656 page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 657 658 generic_addr_t vectorBase = vecs[i].base; 659 generic_size_t vectorLength = vecs[i].length; 660 page_num_t n = pageCount; 661 662 for (page_num_t j = 0; j < pageCount; j += n) { 663 swap_addr_t slotIndex; 664 // try to allocate n slots, if fail, try to allocate n/2 665 while ((slotIndex = swap_slot_alloc(n)) == SWAP_SLOT_NONE && n >= 2) 666 n >>= 1; 667 668 if (slotIndex == SWAP_SLOT_NONE) 669 panic("VMAnonymousCache::Write(): can't allocate swap space\n"); 670 671 T(WritePage(this, pageIndex, slotIndex)); 672 // TODO: Assumes that only one page is written. 673 674 swap_file* swapFile = find_swap_file(slotIndex); 675 676 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 677 678 generic_size_t length = (phys_addr_t)n * B_PAGE_SIZE; 679 generic_io_vec vector[1]; 680 vector->base = vectorBase; 681 vector->length = length; 682 683 status_t status = vfs_write_pages(swapFile->vnode, swapFile->cookie, 684 pos, vector, 1, flags, &length); 685 if (status != B_OK) { 686 locker.Lock(); 687 fAllocatedSwapSize -= (off_t)pagesLeft * B_PAGE_SIZE; 688 locker.Unlock(); 689 690 swap_slot_dealloc(slotIndex, n); 691 return status; 692 } 693 694 _SwapBlockBuild(pageIndex + totalPages, slotIndex, n); 695 pagesLeft -= n; 696 697 if (n != pageCount) { 698 vectorBase = vectorBase + n * B_PAGE_SIZE; 699 vectorLength -= n * B_PAGE_SIZE; 700 } 701 } 702 703 totalPages += pageCount; 704 } 705 706 ASSERT(pagesLeft == 0); 707 return B_OK; 708 } 709 710 711 status_t 712 VMAnonymousCache::WriteAsync(off_t offset, const generic_io_vec* vecs, 713 size_t count, generic_size_t numBytes, uint32 flags, 714 AsyncIOCallback* _callback) 715 { 716 // TODO: Currently this method is only used for single pages. Either make 717 // more flexible use of it or change the interface! 718 // This implementation relies on the current usage! 719 ASSERT(count == 1); 720 ASSERT(numBytes <= B_PAGE_SIZE); 721 722 page_num_t pageIndex = offset >> PAGE_SHIFT; 723 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex); 724 bool newSlot = slotIndex == SWAP_SLOT_NONE; 725 726 // If the page doesn't have any swap space yet, allocate it. 727 if (newSlot) { 728 AutoLocker<VMCache> locker(this); 729 if (fAllocatedSwapSize + B_PAGE_SIZE > fCommittedSwapSize) { 730 _callback->IOFinished(B_ERROR, true, 0); 731 return B_ERROR; 732 } 733 734 fAllocatedSwapSize += B_PAGE_SIZE; 735 736 slotIndex = swap_slot_alloc(1); 737 } 738 739 // create our callback 740 WriteCallback* callback = (flags & B_VIP_IO_REQUEST) != 0 741 ? new(malloc_flags(HEAP_PRIORITY_VIP)) WriteCallback(this, _callback) 742 : new(std::nothrow) WriteCallback(this, _callback); 743 if (callback == NULL) { 744 if (newSlot) { 745 AutoLocker<VMCache> locker(this); 746 fAllocatedSwapSize -= B_PAGE_SIZE; 747 locker.Unlock(); 748 749 swap_slot_dealloc(slotIndex, 1); 750 } 751 _callback->IOFinished(B_NO_MEMORY, true, 0); 752 return B_NO_MEMORY; 753 } 754 // TODO: If the page already had swap space assigned, we don't need an own 755 // callback. 756 757 callback->SetTo(pageIndex, slotIndex, newSlot); 758 759 T(WritePage(this, pageIndex, slotIndex)); 760 761 // write the page asynchrounously 762 swap_file* swapFile = find_swap_file(slotIndex); 763 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 764 765 return vfs_asynchronous_write_pages(swapFile->vnode, swapFile->cookie, pos, 766 vecs, 1, numBytes, flags, callback); 767 } 768 769 770 bool 771 VMAnonymousCache::CanWritePage(off_t offset) 772 { 773 // We can write the page, if we have not used all of our committed swap 774 // space or the page already has a swap slot assigned. 775 return fAllocatedSwapSize < fCommittedSwapSize 776 || _SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE; 777 } 778 779 780 int32 781 VMAnonymousCache::MaxPagesPerAsyncWrite() const 782 { 783 return 1; 784 } 785 786 787 status_t 788 VMAnonymousCache::Fault(struct VMAddressSpace* aspace, off_t offset) 789 { 790 if (fGuardedSize > 0) { 791 uint32 guardOffset; 792 793 #ifdef STACK_GROWS_DOWNWARDS 794 guardOffset = 0; 795 #elif defined(STACK_GROWS_UPWARDS) 796 guardOffset = virtual_size - fGuardedSize; 797 #else 798 # error Stack direction has not been defined in arch_config.h 799 #endif 800 // report stack fault, guard page hit! 801 if (offset >= guardOffset && offset < guardOffset + fGuardedSize) { 802 TRACE(("stack overflow!\n")); 803 return B_BAD_ADDRESS; 804 } 805 } 806 807 if (fCanOvercommit && LookupPage(offset) == NULL && !HasPage(offset)) { 808 if (fPrecommittedPages == 0) { 809 // never commit more than needed 810 if (committed_size / B_PAGE_SIZE > page_count) 811 return B_BAD_HANDLER; 812 813 // try to commit additional swap space/memory 814 if (swap_space_reserve(B_PAGE_SIZE) == B_PAGE_SIZE) { 815 fCommittedSwapSize += B_PAGE_SIZE; 816 } else { 817 int priority = aspace == VMAddressSpace::Kernel() 818 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 819 if (vm_try_reserve_memory(B_PAGE_SIZE, priority, 0) != B_OK) { 820 dprintf("%p->VMAnonymousCache::Fault(): Failed to reserve " 821 "%d bytes of RAM.\n", this, (int)B_PAGE_SIZE); 822 return B_NO_MEMORY; 823 } 824 } 825 826 committed_size += B_PAGE_SIZE; 827 } else 828 fPrecommittedPages--; 829 } 830 831 // This will cause vm_soft_fault() to handle the fault 832 return B_BAD_HANDLER; 833 } 834 835 836 void 837 VMAnonymousCache::Merge(VMCache* _source) 838 { 839 VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source); 840 if (source == NULL) { 841 panic("VMAnonymousCache::MergeStore(): merge with incompatible cache " 842 "%p requested", _source); 843 return; 844 } 845 846 // take over the source' committed size 847 fCommittedSwapSize += source->fCommittedSwapSize; 848 source->fCommittedSwapSize = 0; 849 committed_size += source->committed_size; 850 source->committed_size = 0; 851 852 off_t actualSize = virtual_end - virtual_base; 853 if (committed_size > actualSize) 854 _Commit(actualSize, VM_PRIORITY_USER); 855 856 // Move all not shadowed swap pages from the source to the consumer cache. 857 // Also remove all source pages that are shadowed by consumer swap pages. 858 _MergeSwapPages(source); 859 860 // Move all not shadowed pages from the source to the consumer cache. 861 if (source->page_count < page_count) 862 _MergePagesSmallerSource(source); 863 else 864 _MergePagesSmallerConsumer(source); 865 } 866 867 868 void 869 VMAnonymousCache::DeleteObject() 870 { 871 object_cache_delete(gAnonymousCacheObjectCache, this); 872 } 873 874 875 void 876 VMAnonymousCache::_SwapBlockBuild(off_t startPageIndex, 877 swap_addr_t startSlotIndex, uint32 count) 878 { 879 WriteLocker locker(sSwapHashLock); 880 881 uint32 left = count; 882 for (uint32 i = 0, j = 0; i < count; i += j) { 883 off_t pageIndex = startPageIndex + i; 884 swap_addr_t slotIndex = startSlotIndex + i; 885 886 swap_hash_key key = { this, pageIndex }; 887 888 swap_block* swap = sSwapHashTable.Lookup(key); 889 while (swap == NULL) { 890 swap = (swap_block*)object_cache_alloc(sSwapBlockCache, 891 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 892 if (swap == NULL) { 893 // Wait a short time until memory is available again. 894 locker.Unlock(); 895 snooze(10000); 896 locker.Lock(); 897 swap = sSwapHashTable.Lookup(key); 898 continue; 899 } 900 901 swap->key.cache = this; 902 swap->key.page_index = pageIndex & ~(off_t)SWAP_BLOCK_MASK; 903 swap->used = 0; 904 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) 905 swap->swap_slots[i] = SWAP_SLOT_NONE; 906 907 sSwapHashTable.InsertUnchecked(swap); 908 } 909 910 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 911 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 912 swap->swap_slots[blockIndex++] = slotIndex + j; 913 left--; 914 } 915 916 swap->used += j; 917 } 918 } 919 920 921 void 922 VMAnonymousCache::_SwapBlockFree(off_t startPageIndex, uint32 count) 923 { 924 WriteLocker locker(sSwapHashLock); 925 926 uint32 left = count; 927 for (uint32 i = 0, j = 0; i < count; i += j) { 928 off_t pageIndex = startPageIndex + i; 929 swap_hash_key key = { this, pageIndex }; 930 swap_block* swap = sSwapHashTable.Lookup(key); 931 932 ASSERT(swap != NULL); 933 934 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 935 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 936 swap->swap_slots[blockIndex++] = SWAP_SLOT_NONE; 937 left--; 938 } 939 940 swap->used -= j; 941 if (swap->used == 0) { 942 sSwapHashTable.RemoveUnchecked(swap); 943 object_cache_free(sSwapBlockCache, swap, 944 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 945 } 946 } 947 } 948 949 950 swap_addr_t 951 VMAnonymousCache::_SwapBlockGetAddress(off_t pageIndex) 952 { 953 ReadLocker locker(sSwapHashLock); 954 955 swap_hash_key key = { this, pageIndex }; 956 swap_block* swap = sSwapHashTable.Lookup(key); 957 swap_addr_t slotIndex = SWAP_SLOT_NONE; 958 959 if (swap != NULL) { 960 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 961 slotIndex = swap->swap_slots[blockIndex]; 962 } 963 964 return slotIndex; 965 } 966 967 968 status_t 969 VMAnonymousCache::_Commit(off_t size, int priority) 970 { 971 TRACE("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "), already committed: " 972 "%" B_PRIdOFF " (%" B_PRIdOFF " swap)\n", this, size, committed_size, 973 fCommittedSwapSize); 974 975 // Basic strategy: reserve swap space first, only when running out of swap 976 // space, reserve real memory. 977 978 off_t committedMemory = committed_size - fCommittedSwapSize; 979 980 // Regardless of whether we're asked to grow or shrink the commitment, 981 // we always try to reserve as much as possible of the final commitment 982 // in the swap space. 983 if (size > fCommittedSwapSize) { 984 fCommittedSwapSize += swap_space_reserve(size - fCommittedSwapSize); 985 committed_size = fCommittedSwapSize + committedMemory; 986 if (size > fCommittedSwapSize) { 987 TRACE("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "), reserved " 988 "only %" B_PRIdOFF " swap\n", this, size, fCommittedSwapSize); 989 } 990 } 991 992 if (committed_size == size) 993 return B_OK; 994 995 if (committed_size > size) { 996 // The commitment shrinks -- unreserve real memory first. 997 off_t toUnreserve = committed_size - size; 998 if (committedMemory > 0) { 999 off_t unreserved = min_c(toUnreserve, committedMemory); 1000 vm_unreserve_memory(unreserved); 1001 committedMemory -= unreserved; 1002 committed_size -= unreserved; 1003 toUnreserve -= unreserved; 1004 } 1005 1006 // Unreserve swap space. 1007 if (toUnreserve > 0) { 1008 swap_space_unreserve(toUnreserve); 1009 fCommittedSwapSize -= toUnreserve; 1010 committed_size -= toUnreserve; 1011 } 1012 1013 return B_OK; 1014 } 1015 1016 // The commitment grows -- we have already tried to reserve swap space at 1017 // the start of the method, so we try to reserve real memory, now. 1018 1019 off_t toReserve = size - committed_size; 1020 if (vm_try_reserve_memory(toReserve, priority, 1000000) != B_OK) { 1021 dprintf("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "): Failed to " 1022 "reserve %" B_PRIdOFF " bytes of RAM\n", this, size, toReserve); 1023 return B_NO_MEMORY; 1024 } 1025 1026 committed_size = size; 1027 return B_OK; 1028 } 1029 1030 1031 void 1032 VMAnonymousCache::_MergePagesSmallerSource(VMAnonymousCache* source) 1033 { 1034 // The source cache has less pages than the consumer (this cache), so we 1035 // iterate through the source's pages and move the ones that are not 1036 // shadowed up to the consumer. 1037 1038 for (VMCachePagesTree::Iterator it = source->pages.GetIterator(); 1039 vm_page* page = it.Next();) { 1040 // Note: Removing the current node while iterating through a 1041 // IteratableSplayTree is safe. 1042 vm_page* consumerPage = LookupPage( 1043 (off_t)page->cache_offset << PAGE_SHIFT); 1044 if (consumerPage == NULL) { 1045 // the page is not yet in the consumer cache - move it upwards 1046 ASSERT_PRINT(!page->busy, "page: %p", page); 1047 MovePage(page); 1048 } 1049 } 1050 } 1051 1052 1053 void 1054 VMAnonymousCache::_MergePagesSmallerConsumer(VMAnonymousCache* source) 1055 { 1056 // The consumer (this cache) has less pages than the source, so we move the 1057 // consumer's pages to the source (freeing shadowed ones) and finally just 1058 // all pages of the source back to the consumer. 1059 1060 for (VMCachePagesTree::Iterator it = pages.GetIterator(); 1061 vm_page* page = it.Next();) { 1062 // If a source page is in the way, remove and free it. 1063 vm_page* sourcePage = source->LookupPage( 1064 (off_t)page->cache_offset << PAGE_SHIFT); 1065 if (sourcePage != NULL) { 1066 DEBUG_PAGE_ACCESS_START(sourcePage); 1067 ASSERT_PRINT(!sourcePage->busy, "page: %p", sourcePage); 1068 source->RemovePage(sourcePage); 1069 vm_page_free(source, sourcePage); 1070 } 1071 1072 // Note: Removing the current node while iterating through a 1073 // IteratableSplayTree is safe. 1074 source->MovePage(page); 1075 } 1076 1077 MoveAllPages(source); 1078 } 1079 1080 1081 void 1082 VMAnonymousCache::_MergeSwapPages(VMAnonymousCache* source) 1083 { 1084 // If neither source nor consumer have swap pages, we don't have to do 1085 // anything. 1086 if (source->fAllocatedSwapSize == 0 && fAllocatedSwapSize == 0) 1087 return; 1088 1089 for (off_t offset = source->virtual_base 1090 & ~(off_t)(B_PAGE_SIZE * SWAP_BLOCK_PAGES - 1); 1091 offset < source->virtual_end; 1092 offset += B_PAGE_SIZE * SWAP_BLOCK_PAGES) { 1093 1094 WriteLocker locker(sSwapHashLock); 1095 1096 off_t swapBlockPageIndex = offset >> PAGE_SHIFT; 1097 swap_hash_key key = { source, swapBlockPageIndex }; 1098 swap_block* sourceSwapBlock = sSwapHashTable.Lookup(key); 1099 1100 // remove the source swap block -- we will either take over the swap 1101 // space (and the block) or free it 1102 if (sourceSwapBlock != NULL) 1103 sSwapHashTable.RemoveUnchecked(sourceSwapBlock); 1104 1105 key.cache = this; 1106 swap_block* swapBlock = sSwapHashTable.Lookup(key); 1107 1108 locker.Unlock(); 1109 1110 // remove all source pages that are shadowed by consumer swap pages 1111 if (swapBlock != NULL) { 1112 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1113 if (swapBlock->swap_slots[i] != SWAP_SLOT_NONE) { 1114 vm_page* page = source->LookupPage( 1115 (off_t)(swapBlockPageIndex + i) << PAGE_SHIFT); 1116 if (page != NULL) { 1117 DEBUG_PAGE_ACCESS_START(page); 1118 ASSERT_PRINT(!page->busy, "page: %p", page); 1119 source->RemovePage(page); 1120 vm_page_free(source, page); 1121 } 1122 } 1123 } 1124 } 1125 1126 if (sourceSwapBlock == NULL) 1127 continue; 1128 1129 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1130 off_t pageIndex = swapBlockPageIndex + i; 1131 swap_addr_t sourceSlotIndex = sourceSwapBlock->swap_slots[i]; 1132 1133 if (sourceSlotIndex == SWAP_SLOT_NONE) 1134 continue; 1135 1136 if ((swapBlock != NULL 1137 && swapBlock->swap_slots[i] != SWAP_SLOT_NONE) 1138 || LookupPage((off_t)pageIndex << PAGE_SHIFT) != NULL) { 1139 // The consumer already has a page or a swapped out page 1140 // at this index. So we can free the source swap space. 1141 swap_slot_dealloc(sourceSlotIndex, 1); 1142 sourceSwapBlock->swap_slots[i] = SWAP_SLOT_NONE; 1143 sourceSwapBlock->used--; 1144 } 1145 1146 // We've either freed the source swap page or are going to move it 1147 // to the consumer. At any rate, the source cache doesn't own it 1148 // anymore. 1149 source->fAllocatedSwapSize -= B_PAGE_SIZE; 1150 } 1151 1152 // All source swap pages that have not been freed yet are taken over by 1153 // the consumer. 1154 fAllocatedSwapSize += B_PAGE_SIZE * (off_t)sourceSwapBlock->used; 1155 1156 if (sourceSwapBlock->used == 0) { 1157 // All swap pages have been freed -- we can discard the source swap 1158 // block. 1159 object_cache_free(sSwapBlockCache, sourceSwapBlock, 1160 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1161 } else if (swapBlock == NULL) { 1162 // We need to take over some of the source's swap pages and there's 1163 // no swap block in the consumer cache. Just take over the source 1164 // swap block. 1165 sourceSwapBlock->key.cache = this; 1166 locker.Lock(); 1167 sSwapHashTable.InsertUnchecked(sourceSwapBlock); 1168 locker.Unlock(); 1169 } else { 1170 // We need to take over some of the source's swap pages and there's 1171 // already a swap block in the consumer cache. Copy the respective 1172 // swap addresses and discard the source swap block. 1173 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1174 if (sourceSwapBlock->swap_slots[i] != SWAP_SLOT_NONE) 1175 swapBlock->swap_slots[i] = sourceSwapBlock->swap_slots[i]; 1176 } 1177 1178 object_cache_free(sSwapBlockCache, sourceSwapBlock, 1179 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1180 } 1181 } 1182 } 1183 1184 1185 // #pragma mark - 1186 1187 1188 // TODO: This can be removed if we get BFS uuid's 1189 struct VolumeInfo { 1190 char name[B_FILE_NAME_LENGTH]; 1191 char device[B_FILE_NAME_LENGTH]; 1192 char filesystem[B_OS_NAME_LENGTH]; 1193 off_t capacity; 1194 }; 1195 1196 1197 class PartitionScorer : public KPartitionVisitor { 1198 public: 1199 PartitionScorer(VolumeInfo& volumeInfo) 1200 : 1201 fBestPartition(NULL), 1202 fBestScore(-1), 1203 fVolumeInfo(volumeInfo) 1204 { 1205 } 1206 1207 virtual bool VisitPre(KPartition* partition) 1208 { 1209 if (!partition->ContainsFileSystem()) 1210 return false; 1211 1212 KPath path; 1213 partition->GetPath(&path); 1214 1215 int score = 0; 1216 if (strcmp(fVolumeInfo.name, partition->ContentName()) == 0) 1217 score += 4; 1218 if (strcmp(fVolumeInfo.device, path.Path()) == 0) 1219 score += 3; 1220 if (fVolumeInfo.capacity == partition->Size()) 1221 score += 2; 1222 if (strcmp(fVolumeInfo.filesystem, 1223 partition->DiskSystem()->ShortName()) == 0) { 1224 score += 1; 1225 } 1226 if (score >= 4 && score > fBestScore) { 1227 fBestPartition = partition; 1228 fBestScore = score; 1229 } 1230 1231 return false; 1232 } 1233 1234 KPartition* fBestPartition; 1235 1236 private: 1237 int32 fBestScore; 1238 VolumeInfo fVolumeInfo; 1239 }; 1240 1241 1242 status_t 1243 get_mount_point(KPartition* partition, KPath* mountPoint) 1244 { 1245 if (!mountPoint || !partition->ContainsFileSystem()) 1246 return B_BAD_VALUE; 1247 1248 const char* volumeName = partition->ContentName(); 1249 if (!volumeName || strlen(volumeName) == 0) 1250 volumeName = partition->Name(); 1251 if (!volumeName || strlen(volumeName) == 0) 1252 volumeName = "unnamed volume"; 1253 1254 char basePath[B_PATH_NAME_LENGTH]; 1255 int32 len = snprintf(basePath, sizeof(basePath), "/%s", volumeName); 1256 for (int32 i = 1; i < len; i++) 1257 if (basePath[i] == '/') 1258 basePath[i] = '-'; 1259 char* path = mountPoint->LockBuffer(); 1260 int32 pathLen = mountPoint->BufferSize(); 1261 strncpy(path, basePath, pathLen); 1262 1263 struct stat dummy; 1264 for (int i = 1; ; i++) { 1265 if (stat(path, &dummy) != 0) 1266 break; 1267 snprintf(path, pathLen, "%s%d", basePath, i); 1268 } 1269 1270 mountPoint->UnlockBuffer(); 1271 return B_OK; 1272 } 1273 1274 1275 status_t 1276 swap_file_add(const char* path) 1277 { 1278 // open the file 1279 int fd = open(path, O_RDWR | O_NOCACHE, S_IRUSR | S_IWUSR); 1280 if (fd < 0) 1281 return errno; 1282 1283 // fstat() it and check whether we can use it 1284 struct stat st; 1285 if (fstat(fd, &st) < 0) { 1286 close(fd); 1287 return errno; 1288 } 1289 1290 if (!(S_ISREG(st.st_mode) || S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) { 1291 close(fd); 1292 return B_BAD_VALUE; 1293 } 1294 1295 if (st.st_size < B_PAGE_SIZE) { 1296 close(fd); 1297 return B_BAD_VALUE; 1298 } 1299 1300 // get file descriptor, vnode, and cookie 1301 file_descriptor* descriptor = get_fd(get_current_io_context(true), fd); 1302 put_fd(descriptor); 1303 1304 vnode* node = fd_vnode(descriptor); 1305 if (node == NULL) { 1306 close(fd); 1307 return B_BAD_VALUE; 1308 } 1309 1310 // do the allocations and prepare the swap_file structure 1311 swap_file* swap = (swap_file*)malloc(sizeof(swap_file)); 1312 if (swap == NULL) { 1313 close(fd); 1314 return B_NO_MEMORY; 1315 } 1316 1317 swap->fd = fd; 1318 swap->vnode = node; 1319 swap->cookie = descriptor->cookie; 1320 1321 uint32 pageCount = st.st_size >> PAGE_SHIFT; 1322 swap->bmp = radix_bitmap_create(pageCount); 1323 if (swap->bmp == NULL) { 1324 free(swap); 1325 close(fd); 1326 return B_NO_MEMORY; 1327 } 1328 1329 // set slot index and add this file to swap file list 1330 mutex_lock(&sSwapFileListLock); 1331 // TODO: Also check whether the swap file is already registered! 1332 if (sSwapFileList.IsEmpty()) { 1333 swap->first_slot = 0; 1334 swap->last_slot = pageCount; 1335 } else { 1336 // leave one page gap between two swap files 1337 swap->first_slot = sSwapFileList.Last()->last_slot + 1; 1338 swap->last_slot = swap->first_slot + pageCount; 1339 } 1340 sSwapFileList.Add(swap); 1341 sSwapFileCount++; 1342 mutex_unlock(&sSwapFileListLock); 1343 1344 mutex_lock(&sAvailSwapSpaceLock); 1345 sAvailSwapSpace += (off_t)pageCount * B_PAGE_SIZE; 1346 mutex_unlock(&sAvailSwapSpaceLock); 1347 1348 return B_OK; 1349 } 1350 1351 1352 status_t 1353 swap_file_delete(const char* path) 1354 { 1355 vnode* node = NULL; 1356 status_t status = vfs_get_vnode_from_path(path, true, &node); 1357 if (status != B_OK) 1358 return status; 1359 1360 MutexLocker locker(sSwapFileListLock); 1361 1362 swap_file* swapFile = NULL; 1363 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1364 (swapFile = it.Next()) != NULL;) { 1365 if (swapFile->vnode == node) 1366 break; 1367 } 1368 1369 vfs_put_vnode(node); 1370 1371 if (swapFile == NULL) 1372 return B_ERROR; 1373 1374 // if this file is currently used, we can't delete 1375 // TODO: mark this swap file deleting, and remove it after releasing 1376 // all the swap space 1377 if (swapFile->bmp->free_slots < swapFile->last_slot - swapFile->first_slot) 1378 return B_ERROR; 1379 1380 sSwapFileList.Remove(swapFile); 1381 sSwapFileCount--; 1382 locker.Unlock(); 1383 1384 mutex_lock(&sAvailSwapSpaceLock); 1385 sAvailSwapSpace -= (off_t)(swapFile->last_slot - swapFile->first_slot) 1386 * PAGE_SIZE; 1387 mutex_unlock(&sAvailSwapSpaceLock); 1388 1389 close(swapFile->fd); 1390 radix_bitmap_destroy(swapFile->bmp); 1391 free(swapFile); 1392 1393 return B_OK; 1394 } 1395 1396 1397 void 1398 swap_init(void) 1399 { 1400 // create swap block cache 1401 sSwapBlockCache = create_object_cache("swapblock", sizeof(swap_block), 1402 sizeof(void*), NULL, NULL, NULL); 1403 if (sSwapBlockCache == NULL) 1404 panic("swap_init(): can't create object cache for swap blocks\n"); 1405 1406 status_t error = object_cache_set_minimum_reserve(sSwapBlockCache, 1407 MIN_SWAP_BLOCK_RESERVE); 1408 if (error != B_OK) { 1409 panic("swap_init(): object_cache_set_minimum_reserve() failed: %s", 1410 strerror(error)); 1411 } 1412 1413 // init swap hash table 1414 sSwapHashTable.Init(INITIAL_SWAP_HASH_SIZE); 1415 rw_lock_init(&sSwapHashLock, "swaphash"); 1416 1417 error = register_resource_resizer(swap_hash_resizer, NULL, 1418 SWAP_HASH_RESIZE_INTERVAL); 1419 if (error != B_OK) { 1420 panic("swap_init(): Failed to register swap hash resizer: %s", 1421 strerror(error)); 1422 } 1423 1424 // init swap file list 1425 mutex_init(&sSwapFileListLock, "swaplist"); 1426 sSwapFileAlloc = NULL; 1427 sSwapFileCount = 0; 1428 1429 // init available swap space 1430 mutex_init(&sAvailSwapSpaceLock, "avail swap space"); 1431 sAvailSwapSpace = 0; 1432 1433 add_debugger_command_etc("swap", &dump_swap_info, 1434 "Print infos about the swap usage", 1435 "\n" 1436 "Print infos about the swap usage.\n", 0); 1437 } 1438 1439 1440 void 1441 swap_init_post_modules() 1442 { 1443 // Never try to create a swap file on a read-only device - when booting 1444 // from CD, the write overlay is used. 1445 if (gReadOnlyBootDevice) 1446 return; 1447 1448 bool swapEnabled = true; 1449 bool swapAutomatic = true; 1450 off_t swapSize = 0; 1451 1452 dev_t swapDeviceID = -1; 1453 VolumeInfo selectedVolume = {}; 1454 1455 void* settings = load_driver_settings("virtual_memory"); 1456 1457 if (settings != NULL) { 1458 // We pass a lot of information on the swap device, this is mostly to 1459 // ensure that we are dealing with the same device that was configured. 1460 1461 // TODO: Some kind of BFS uuid would be great here :) 1462 const char* enabled = get_driver_parameter(settings, "vm", NULL, NULL); 1463 1464 if (enabled != NULL) { 1465 swapEnabled = get_driver_boolean_parameter(settings, "vm", 1466 true, false); 1467 swapAutomatic = get_driver_boolean_parameter(settings, "swap_auto", 1468 true, false); 1469 1470 if (swapEnabled && !swapAutomatic) { 1471 const char* size = get_driver_parameter(settings, "swap_size", 1472 NULL, NULL); 1473 const char* volume = get_driver_parameter(settings, 1474 "swap_volume_name", NULL, NULL); 1475 const char* device = get_driver_parameter(settings, 1476 "swap_volume_device", NULL, NULL); 1477 const char* filesystem = get_driver_parameter(settings, 1478 "swap_volume_filesystem", NULL, NULL); 1479 const char* capacity = get_driver_parameter(settings, 1480 "swap_volume_capacity", NULL, NULL); 1481 1482 if (size != NULL && device != NULL && volume != NULL 1483 && filesystem != NULL && capacity != NULL) { 1484 // User specified a size / volume that seems valid 1485 swapAutomatic = false; 1486 swapSize = atoll(size); 1487 strlcpy(selectedVolume.name, volume, 1488 sizeof(selectedVolume.name)); 1489 strlcpy(selectedVolume.device, device, 1490 sizeof(selectedVolume.device)); 1491 strlcpy(selectedVolume.filesystem, filesystem, 1492 sizeof(selectedVolume.filesystem)); 1493 selectedVolume.capacity = atoll(capacity); 1494 } else { 1495 // Something isn't right with swap config, go auto 1496 swapAutomatic = true; 1497 dprintf("%s: virtual_memory configuration is invalid, " 1498 "using automatic swap\n", __func__); 1499 } 1500 } 1501 } 1502 unload_driver_settings(settings); 1503 } 1504 1505 if (swapAutomatic) { 1506 swapSize = (off_t)vm_page_num_pages() * B_PAGE_SIZE; 1507 if (swapSize <= (1024 * 1024 * 1024)) { 1508 // Memory under 1GB? double the swap 1509 swapSize *= 2; 1510 } 1511 // Automatic swap defaults to the boot device 1512 swapDeviceID = gBootDevice; 1513 } 1514 1515 if (!swapEnabled || swapSize < B_PAGE_SIZE) { 1516 dprintf("%s: virtual_memory is disabled\n", __func__); 1517 return; 1518 } 1519 1520 if (!swapAutomatic && swapDeviceID < 0) { 1521 // If user-specified swap, and no swap device has been chosen yet... 1522 KDiskDeviceManager::CreateDefault(); 1523 KDiskDeviceManager* manager = KDiskDeviceManager::Default(); 1524 PartitionScorer visitor(selectedVolume); 1525 1526 KDiskDevice* device; 1527 int32 cookie = 0; 1528 while ((device = manager->NextDevice(&cookie)) != NULL) { 1529 if (device->IsReadOnlyMedia() || device->IsWriteOnce() 1530 || device->IsRemovable()) { 1531 continue; 1532 } 1533 device->VisitEachDescendant(&visitor); 1534 } 1535 1536 if (!visitor.fBestPartition) { 1537 dprintf("%s: Can't find configured swap partition '%s'\n", 1538 __func__, selectedVolume.name); 1539 } else { 1540 if (visitor.fBestPartition->IsMounted()) 1541 swapDeviceID = visitor.fBestPartition->VolumeID(); 1542 else { 1543 KPath devPath, mountPoint; 1544 visitor.fBestPartition->GetPath(&devPath); 1545 get_mount_point(visitor.fBestPartition, &mountPoint); 1546 const char* mountPath = mountPoint.Path(); 1547 mkdir(mountPath, S_IRWXU | S_IRWXG | S_IRWXO); 1548 swapDeviceID = _kern_mount(mountPath, devPath.Path(), 1549 NULL, 0, NULL, 0); 1550 if (swapDeviceID < 0) { 1551 dprintf("%s: Can't mount configured swap partition '%s'\n", 1552 __func__, selectedVolume.name); 1553 } 1554 } 1555 } 1556 } 1557 1558 if (swapDeviceID < 0) 1559 swapDeviceID = gBootDevice; 1560 1561 // We now have a swapDeviceID which is used for the swap file 1562 1563 KPath path; 1564 struct fs_info info; 1565 _kern_read_fs_info(swapDeviceID, &info); 1566 if (swapDeviceID == gBootDevice) 1567 path = kDefaultSwapPath; 1568 else { 1569 vfs_entry_ref_to_path(info.dev, info.root, ".", true, path.LockBuffer(), 1570 path.BufferSize()); 1571 path.UnlockBuffer(); 1572 path.Append("swap"); 1573 } 1574 1575 const char* swapPath = path.Path(); 1576 1577 // Swap size limits prevent oversized swap files 1578 if (swapAutomatic) { 1579 off_t existingSwapSize = 0; 1580 struct stat existingSwapStat; 1581 if (stat(swapPath, &existingSwapStat) == 0) 1582 existingSwapSize = existingSwapStat.st_size; 1583 1584 off_t freeSpace = info.free_blocks * info.block_size + existingSwapSize; 1585 1586 // Adjust automatic swap to a maximum of 25% of the free space 1587 if (swapSize > (freeSpace / 4)) 1588 swapSize = (freeSpace / 4); 1589 } 1590 1591 // Create swap file 1592 int fd = open(swapPath, O_RDWR | O_CREAT | O_NOCACHE, S_IRUSR | S_IWUSR); 1593 if (fd < 0) { 1594 dprintf("%s: Can't open/create %s: %s\n", __func__, 1595 swapPath, strerror(errno)); 1596 return; 1597 } 1598 1599 struct stat stat; 1600 stat.st_size = swapSize; 1601 status_t error = _kern_write_stat(fd, NULL, false, &stat, 1602 sizeof(struct stat), B_STAT_SIZE | B_STAT_SIZE_INSECURE); 1603 if (error != B_OK) { 1604 dprintf("%s: Failed to resize %s to %" B_PRIdOFF " bytes: %s\n", 1605 __func__, swapPath, swapSize, strerror(error)); 1606 } 1607 1608 close(fd); 1609 1610 error = swap_file_add(swapPath); 1611 if (error != B_OK) { 1612 dprintf("%s: Failed to add swap file %s: %s\n", __func__, swapPath, 1613 strerror(error)); 1614 } 1615 } 1616 1617 1618 //! Used by page daemon to free swap space. 1619 bool 1620 swap_free_page_swap_space(vm_page* page) 1621 { 1622 VMAnonymousCache* cache = dynamic_cast<VMAnonymousCache*>(page->Cache()); 1623 if (cache == NULL) 1624 return false; 1625 1626 swap_addr_t slotIndex = cache->_SwapBlockGetAddress(page->cache_offset); 1627 if (slotIndex == SWAP_SLOT_NONE) 1628 return false; 1629 1630 swap_slot_dealloc(slotIndex, 1); 1631 cache->fAllocatedSwapSize -= B_PAGE_SIZE; 1632 cache->_SwapBlockFree(page->cache_offset, 1); 1633 1634 return true; 1635 } 1636 1637 1638 uint32 1639 swap_available_pages() 1640 { 1641 mutex_lock(&sAvailSwapSpaceLock); 1642 uint32 avail = sAvailSwapSpace >> PAGE_SHIFT; 1643 mutex_unlock(&sAvailSwapSpaceLock); 1644 1645 return avail; 1646 } 1647 1648 1649 uint32 1650 swap_total_swap_pages() 1651 { 1652 mutex_lock(&sSwapFileListLock); 1653 1654 uint32 totalSwapSlots = 0; 1655 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1656 swap_file* swapFile = it.Next();) { 1657 totalSwapSlots += swapFile->last_slot - swapFile->first_slot; 1658 } 1659 1660 mutex_unlock(&sSwapFileListLock); 1661 1662 return totalSwapSlots; 1663 } 1664 1665 1666 #endif // ENABLE_SWAP_SUPPORT 1667 1668 1669 void 1670 swap_get_info(struct system_memory_info* info) 1671 { 1672 #if ENABLE_SWAP_SUPPORT 1673 info->max_swap_space = (uint64)swap_total_swap_pages() * B_PAGE_SIZE; 1674 info->free_swap_space = (uint64)swap_available_pages() * B_PAGE_SIZE; 1675 #else 1676 info->max_swap_space = 0; 1677 info->free_swap_space = 0; 1678 #endif 1679 } 1680 1681