1 /* 2 * Copyright 2008, Zhao Shuai, upczhsh@163.com. 3 * Copyright 2008-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 4 * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de. 5 * Distributed under the terms of the MIT License. 6 * 7 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 8 * Distributed under the terms of the NewOS License. 9 * 10 * Copyright 2011-2012 Haiku, Inc. All rights reserved. 11 * Distributed under the terms of the MIT License. 12 * 13 * Authors: 14 * Hamish Morrison, hamish@lavabit.com 15 * Alexander von Gluck IV, kallisti5@unixzen.com 16 */ 17 18 19 #include "VMAnonymousCache.h" 20 21 #include <errno.h> 22 #include <fcntl.h> 23 #include <stdlib.h> 24 #include <string.h> 25 #include <unistd.h> 26 27 #include <FindDirectory.h> 28 #include <KernelExport.h> 29 #include <NodeMonitor.h> 30 31 #include <arch_config.h> 32 #include <boot_device.h> 33 #include <disk_device_manager/KDiskDevice.h> 34 #include <disk_device_manager/KDiskDeviceManager.h> 35 #include <disk_device_manager/KDiskSystem.h> 36 #include <disk_device_manager/KPartitionVisitor.h> 37 #include <driver_settings.h> 38 #include <fs/fd.h> 39 #include <fs/KPath.h> 40 #include <fs_info.h> 41 #include <fs_interface.h> 42 #include <heap.h> 43 #include <kernel_daemon.h> 44 #include <slab/Slab.h> 45 #include <syscalls.h> 46 #include <system_info.h> 47 #include <tracing.h> 48 #include <util/AutoLock.h> 49 #include <util/DoublyLinkedList.h> 50 #include <util/OpenHashTable.h> 51 #include <util/RadixBitmap.h> 52 #include <vfs.h> 53 #include <vm/vm.h> 54 #include <vm/vm_page.h> 55 #include <vm/vm_priv.h> 56 #include <vm/VMAddressSpace.h> 57 58 #include "IORequest.h" 59 60 61 #if ENABLE_SWAP_SUPPORT 62 63 //#define TRACE_VM_ANONYMOUS_CACHE 64 #ifdef TRACE_VM_ANONYMOUS_CACHE 65 # define TRACE(x...) dprintf(x) 66 #else 67 # define TRACE(x...) do { } while (false) 68 #endif 69 70 71 // number of free swap blocks the object cache shall minimally have 72 #define MIN_SWAP_BLOCK_RESERVE 4096 73 74 // interval the has resizer is triggered (in 0.1s) 75 #define SWAP_HASH_RESIZE_INTERVAL 5 76 77 #define INITIAL_SWAP_HASH_SIZE 1024 78 79 #define SWAP_SLOT_NONE RADIX_SLOT_NONE 80 81 #define SWAP_BLOCK_PAGES 32 82 #define SWAP_BLOCK_SHIFT 5 /* 1 << SWAP_BLOCK_SHIFT == SWAP_BLOCK_PAGES */ 83 #define SWAP_BLOCK_MASK (SWAP_BLOCK_PAGES - 1) 84 85 86 static const char* const kDefaultSwapPath = "/var/swap"; 87 88 struct swap_file : DoublyLinkedListLinkImpl<swap_file> { 89 int fd; 90 struct vnode* vnode; 91 void* cookie; 92 swap_addr_t first_slot; 93 swap_addr_t last_slot; 94 radix_bitmap* bmp; 95 }; 96 97 struct swap_hash_key { 98 VMAnonymousCache *cache; 99 off_t page_index; // page index in the cache 100 }; 101 102 // Each swap block contains swap address information for 103 // SWAP_BLOCK_PAGES continuous pages from the same cache 104 struct swap_block { 105 swap_block* hash_link; 106 swap_hash_key key; 107 uint32 used; 108 swap_addr_t swap_slots[SWAP_BLOCK_PAGES]; 109 }; 110 111 struct SwapHashTableDefinition { 112 typedef swap_hash_key KeyType; 113 typedef swap_block ValueType; 114 115 SwapHashTableDefinition() {} 116 117 size_t HashKey(const swap_hash_key& key) const 118 { 119 off_t blockIndex = key.page_index >> SWAP_BLOCK_SHIFT; 120 VMAnonymousCache* cache = key.cache; 121 return blockIndex ^ (size_t)(int*)cache; 122 } 123 124 size_t Hash(const swap_block* value) const 125 { 126 return HashKey(value->key); 127 } 128 129 bool Compare(const swap_hash_key& key, const swap_block* value) const 130 { 131 return (key.page_index & ~(off_t)SWAP_BLOCK_MASK) 132 == (value->key.page_index & ~(off_t)SWAP_BLOCK_MASK) 133 && key.cache == value->key.cache; 134 } 135 136 swap_block*& GetLink(swap_block* value) const 137 { 138 return value->hash_link; 139 } 140 }; 141 142 typedef BOpenHashTable<SwapHashTableDefinition> SwapHashTable; 143 typedef DoublyLinkedList<swap_file> SwapFileList; 144 145 static SwapHashTable sSwapHashTable; 146 static rw_lock sSwapHashLock; 147 148 static SwapFileList sSwapFileList; 149 static mutex sSwapFileListLock; 150 static swap_file* sSwapFileAlloc = NULL; // allocate from here 151 static uint32 sSwapFileCount = 0; 152 153 static off_t sAvailSwapSpace = 0; 154 static mutex sAvailSwapSpaceLock; 155 156 static object_cache* sSwapBlockCache; 157 158 159 #if SWAP_TRACING 160 namespace SwapTracing { 161 162 class SwapTraceEntry : public AbstractTraceEntry { 163 public: 164 SwapTraceEntry(VMAnonymousCache* cache) 165 : 166 fCache(cache) 167 { 168 } 169 170 protected: 171 VMAnonymousCache* fCache; 172 }; 173 174 175 class ReadPage : public SwapTraceEntry { 176 public: 177 ReadPage(VMAnonymousCache* cache, page_num_t pageIndex, 178 swap_addr_t swapSlotIndex) 179 : 180 SwapTraceEntry(cache), 181 fPageIndex(pageIndex), 182 fSwapSlotIndex(swapSlotIndex) 183 { 184 Initialized(); 185 } 186 187 virtual void AddDump(TraceOutput& out) 188 { 189 out.Print("swap read: cache %p, page index: %lu <- swap slot: %lu", 190 fCache, fPageIndex, fSwapSlotIndex); 191 } 192 193 private: 194 page_num_t fPageIndex; 195 swap_addr_t fSwapSlotIndex; 196 }; 197 198 199 class WritePage : public SwapTraceEntry { 200 public: 201 WritePage(VMAnonymousCache* cache, page_num_t pageIndex, 202 swap_addr_t swapSlotIndex) 203 : 204 SwapTraceEntry(cache), 205 fPageIndex(pageIndex), 206 fSwapSlotIndex(swapSlotIndex) 207 { 208 Initialized(); 209 } 210 211 virtual void AddDump(TraceOutput& out) 212 { 213 out.Print("swap write: cache %p, page index: %lu -> swap slot: %lu", 214 fCache, fPageIndex, fSwapSlotIndex); 215 } 216 217 private: 218 page_num_t fPageIndex; 219 swap_addr_t fSwapSlotIndex; 220 }; 221 222 } // namespace SwapTracing 223 224 # define T(x) new(std::nothrow) SwapTracing::x; 225 #else 226 # define T(x) ; 227 #endif 228 229 230 static int 231 dump_swap_info(int argc, char** argv) 232 { 233 swap_addr_t totalSwapPages = 0; 234 swap_addr_t freeSwapPages = 0; 235 236 kprintf("swap files:\n"); 237 238 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 239 swap_file* file = it.Next();) { 240 swap_addr_t total = file->last_slot - file->first_slot; 241 kprintf(" vnode: %p, pages: total: %" B_PRIu32 ", free: %" B_PRIu32 242 "\n", file->vnode, total, file->bmp->free_slots); 243 244 totalSwapPages += total; 245 freeSwapPages += file->bmp->free_slots; 246 } 247 248 kprintf("\n"); 249 kprintf("swap space in pages:\n"); 250 kprintf("total: %9" B_PRIu32 "\n", totalSwapPages); 251 kprintf("available: %9" B_PRIdOFF "\n", sAvailSwapSpace / B_PAGE_SIZE); 252 kprintf("reserved: %9" B_PRIdOFF "\n", 253 totalSwapPages - sAvailSwapSpace / B_PAGE_SIZE); 254 kprintf("used: %9" B_PRIu32 "\n", totalSwapPages - freeSwapPages); 255 kprintf("free: %9" B_PRIu32 "\n", freeSwapPages); 256 257 return 0; 258 } 259 260 261 static swap_addr_t 262 swap_slot_alloc(uint32 count) 263 { 264 mutex_lock(&sSwapFileListLock); 265 266 if (sSwapFileList.IsEmpty()) { 267 mutex_unlock(&sSwapFileListLock); 268 panic("swap_slot_alloc(): no swap file in the system\n"); 269 return SWAP_SLOT_NONE; 270 } 271 272 // since radix bitmap could not handle more than 32 pages, we return 273 // SWAP_SLOT_NONE, this forces Write() adjust allocation amount 274 if (count > BITMAP_RADIX) { 275 mutex_unlock(&sSwapFileListLock); 276 return SWAP_SLOT_NONE; 277 } 278 279 swap_addr_t j, addr = SWAP_SLOT_NONE; 280 for (j = 0; j < sSwapFileCount; j++) { 281 if (sSwapFileAlloc == NULL) 282 sSwapFileAlloc = sSwapFileList.First(); 283 284 addr = radix_bitmap_alloc(sSwapFileAlloc->bmp, count); 285 if (addr != SWAP_SLOT_NONE) { 286 addr += sSwapFileAlloc->first_slot; 287 break; 288 } 289 290 // this swap_file is full, find another 291 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 292 } 293 294 if (j == sSwapFileCount) { 295 mutex_unlock(&sSwapFileListLock); 296 panic("swap_slot_alloc: swap space exhausted!\n"); 297 return SWAP_SLOT_NONE; 298 } 299 300 // if this swap file has used more than 90% percent of its space 301 // switch to another 302 if (sSwapFileAlloc->bmp->free_slots 303 < (sSwapFileAlloc->last_slot - sSwapFileAlloc->first_slot) / 10) { 304 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 305 } 306 307 mutex_unlock(&sSwapFileListLock); 308 309 return addr; 310 } 311 312 313 static swap_file* 314 find_swap_file(swap_addr_t slotIndex) 315 { 316 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 317 swap_file* swapFile = it.Next();) { 318 if (slotIndex >= swapFile->first_slot 319 && slotIndex < swapFile->last_slot) { 320 return swapFile; 321 } 322 } 323 324 panic("find_swap_file(): can't find swap file for slot %" B_PRIu32 "\n", 325 slotIndex); 326 return NULL; 327 } 328 329 330 static void 331 swap_slot_dealloc(swap_addr_t slotIndex, uint32 count) 332 { 333 if (slotIndex == SWAP_SLOT_NONE) 334 return; 335 336 mutex_lock(&sSwapFileListLock); 337 swap_file* swapFile = find_swap_file(slotIndex); 338 slotIndex -= swapFile->first_slot; 339 radix_bitmap_dealloc(swapFile->bmp, slotIndex, count); 340 mutex_unlock(&sSwapFileListLock); 341 } 342 343 344 static off_t 345 swap_space_reserve(off_t amount) 346 { 347 mutex_lock(&sAvailSwapSpaceLock); 348 if (sAvailSwapSpace >= amount) 349 sAvailSwapSpace -= amount; 350 else { 351 amount = sAvailSwapSpace; 352 sAvailSwapSpace = 0; 353 } 354 mutex_unlock(&sAvailSwapSpaceLock); 355 356 return amount; 357 } 358 359 360 static void 361 swap_space_unreserve(off_t amount) 362 { 363 mutex_lock(&sAvailSwapSpaceLock); 364 sAvailSwapSpace += amount; 365 mutex_unlock(&sAvailSwapSpaceLock); 366 } 367 368 369 static void 370 swap_hash_resizer(void*, int) 371 { 372 WriteLocker locker(sSwapHashLock); 373 374 size_t size; 375 void* allocation; 376 377 do { 378 size = sSwapHashTable.ResizeNeeded(); 379 if (size == 0) 380 return; 381 382 locker.Unlock(); 383 384 allocation = malloc(size); 385 if (allocation == NULL) 386 return; 387 388 locker.Lock(); 389 390 } while (!sSwapHashTable.Resize(allocation, size)); 391 } 392 393 394 // #pragma mark - 395 396 397 class VMAnonymousCache::WriteCallback : public StackableAsyncIOCallback { 398 public: 399 WriteCallback(VMAnonymousCache* cache, AsyncIOCallback* callback) 400 : 401 StackableAsyncIOCallback(callback), 402 fCache(cache) 403 { 404 } 405 406 void SetTo(page_num_t pageIndex, swap_addr_t slotIndex, bool newSlot) 407 { 408 fPageIndex = pageIndex; 409 fSlotIndex = slotIndex; 410 fNewSlot = newSlot; 411 } 412 413 virtual void IOFinished(status_t status, bool partialTransfer, 414 generic_size_t bytesTransferred) 415 { 416 if (fNewSlot) { 417 if (status == B_OK) { 418 fCache->_SwapBlockBuild(fPageIndex, fSlotIndex, 1); 419 } else { 420 AutoLocker<VMCache> locker(fCache); 421 fCache->fAllocatedSwapSize -= B_PAGE_SIZE; 422 locker.Unlock(); 423 424 swap_slot_dealloc(fSlotIndex, 1); 425 } 426 } 427 428 fNextCallback->IOFinished(status, partialTransfer, bytesTransferred); 429 430 delete this; 431 } 432 433 private: 434 VMAnonymousCache* fCache; 435 page_num_t fPageIndex; 436 swap_addr_t fSlotIndex; 437 bool fNewSlot; 438 }; 439 440 441 // #pragma mark - 442 443 444 VMAnonymousCache::~VMAnonymousCache() 445 { 446 // free allocated swap space and swap block 447 for (off_t offset = virtual_base, toFree = fAllocatedSwapSize; 448 offset < virtual_end && toFree > 0; offset += B_PAGE_SIZE) { 449 swap_addr_t slotIndex = _SwapBlockGetAddress(offset >> PAGE_SHIFT); 450 if (slotIndex == SWAP_SLOT_NONE) 451 continue; 452 453 swap_slot_dealloc(slotIndex, 1); 454 _SwapBlockFree(offset >> PAGE_SHIFT, 1); 455 toFree -= B_PAGE_SIZE; 456 } 457 458 swap_space_unreserve(fCommittedSwapSize); 459 if (committed_size > fCommittedSwapSize) 460 vm_unreserve_memory(committed_size - fCommittedSwapSize); 461 } 462 463 464 status_t 465 VMAnonymousCache::Init(bool canOvercommit, int32 numPrecommittedPages, 466 int32 numGuardPages, uint32 allocationFlags) 467 { 468 TRACE("%p->VMAnonymousCache::Init(canOvercommit = %s, " 469 "numPrecommittedPages = %" B_PRId32 ", numGuardPages = %" B_PRId32 470 ")\n", this, canOvercommit ? "yes" : "no", numPrecommittedPages, 471 numGuardPages); 472 473 status_t error = VMCache::Init(CACHE_TYPE_RAM, allocationFlags); 474 if (error != B_OK) 475 return error; 476 477 fCanOvercommit = canOvercommit; 478 fHasPrecommitted = false; 479 fPrecommittedPages = min_c(numPrecommittedPages, 255); 480 fGuardedSize = numGuardPages * B_PAGE_SIZE; 481 fCommittedSwapSize = 0; 482 fAllocatedSwapSize = 0; 483 484 return B_OK; 485 } 486 487 488 status_t 489 VMAnonymousCache::Resize(off_t newSize, int priority) 490 { 491 // If the cache size shrinks, drop all swap pages beyond the new size. 492 if (fAllocatedSwapSize > 0) { 493 off_t oldPageCount = (virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 494 swap_block* swapBlock = NULL; 495 496 for (off_t pageIndex = (newSize + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 497 pageIndex < oldPageCount && fAllocatedSwapSize > 0; pageIndex++) { 498 499 WriteLocker locker(sSwapHashLock); 500 501 // Get the swap slot index for the page. 502 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 503 if (swapBlock == NULL || blockIndex == 0) { 504 swap_hash_key key = { this, pageIndex }; 505 swapBlock = sSwapHashTable.Lookup(key); 506 507 if (swapBlock == NULL) { 508 pageIndex = ROUNDUP(pageIndex + 1, SWAP_BLOCK_PAGES); 509 continue; 510 } 511 } 512 513 swap_addr_t slotIndex = swapBlock->swap_slots[blockIndex]; 514 vm_page* page; 515 if (slotIndex != SWAP_SLOT_NONE 516 && ((page = LookupPage((off_t)pageIndex * B_PAGE_SIZE)) == NULL 517 || !page->busy)) { 518 // TODO: We skip (i.e. leak) swap space of busy pages, since 519 // there could be I/O going on (paging in/out). Waiting is 520 // not an option as 1. unlocking the cache means that new 521 // swap pages could be added in a range we've already 522 // cleared (since the cache still has the old size) and 2. 523 // we'd risk a deadlock in case we come from the file cache 524 // and the FS holds the node's write-lock. We should mark 525 // the page invalid and let the one responsible clean up. 526 // There's just no such mechanism yet. 527 swap_slot_dealloc(slotIndex, 1); 528 fAllocatedSwapSize -= B_PAGE_SIZE; 529 530 swapBlock->swap_slots[blockIndex] = SWAP_SLOT_NONE; 531 if (--swapBlock->used == 0) { 532 // All swap pages have been freed -- we can discard the swap 533 // block. 534 sSwapHashTable.RemoveUnchecked(swapBlock); 535 object_cache_free(sSwapBlockCache, swapBlock, 536 CACHE_DONT_WAIT_FOR_MEMORY 537 | CACHE_DONT_LOCK_KERNEL_SPACE); 538 } 539 } 540 } 541 } 542 543 return VMCache::Resize(newSize, priority); 544 } 545 546 547 status_t 548 VMAnonymousCache::Commit(off_t size, int priority) 549 { 550 TRACE("%p->VMAnonymousCache::Commit(%" B_PRIdOFF ")\n", this, size); 551 552 // If we can overcommit, we don't commit here, but in Fault(). We always 553 // unreserve memory, if we're asked to shrink our commitment, though. 554 if (fCanOvercommit && size > committed_size) { 555 if (fHasPrecommitted) 556 return B_OK; 557 558 // pre-commit some pages to make a later failure less probable 559 fHasPrecommitted = true; 560 uint32 precommitted = fPrecommittedPages * B_PAGE_SIZE; 561 if (size > precommitted) 562 size = precommitted; 563 } 564 565 return _Commit(size, priority); 566 } 567 568 569 bool 570 VMAnonymousCache::HasPage(off_t offset) 571 { 572 if (_SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE) 573 return true; 574 575 return false; 576 } 577 578 579 bool 580 VMAnonymousCache::DebugHasPage(off_t offset) 581 { 582 off_t pageIndex = offset >> PAGE_SHIFT; 583 swap_hash_key key = { this, pageIndex }; 584 swap_block* swap = sSwapHashTable.Lookup(key); 585 if (swap == NULL) 586 return false; 587 588 return swap->swap_slots[pageIndex & SWAP_BLOCK_MASK] != SWAP_SLOT_NONE; 589 } 590 591 592 status_t 593 VMAnonymousCache::Read(off_t offset, const generic_io_vec* vecs, size_t count, 594 uint32 flags, generic_size_t* _numBytes) 595 { 596 off_t pageIndex = offset >> PAGE_SHIFT; 597 598 for (uint32 i = 0, j = 0; i < count; i = j) { 599 swap_addr_t startSlotIndex = _SwapBlockGetAddress(pageIndex + i); 600 for (j = i + 1; j < count; j++) { 601 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + j); 602 if (slotIndex != startSlotIndex + j - i) 603 break; 604 } 605 606 T(ReadPage(this, pageIndex, startSlotIndex)); 607 // TODO: Assumes that only one page is read. 608 609 swap_file* swapFile = find_swap_file(startSlotIndex); 610 611 off_t pos = (off_t)(startSlotIndex - swapFile->first_slot) 612 * B_PAGE_SIZE; 613 614 status_t status = vfs_read_pages(swapFile->vnode, swapFile->cookie, pos, 615 vecs + i, j - i, flags, _numBytes); 616 if (status != B_OK) 617 return status; 618 } 619 620 return B_OK; 621 } 622 623 624 status_t 625 VMAnonymousCache::Write(off_t offset, const generic_io_vec* vecs, size_t count, 626 uint32 flags, generic_size_t* _numBytes) 627 { 628 off_t pageIndex = offset >> PAGE_SHIFT; 629 630 AutoLocker<VMCache> locker(this); 631 632 page_num_t totalPages = 0; 633 for (uint32 i = 0; i < count; i++) { 634 page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 635 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + totalPages); 636 if (slotIndex != SWAP_SLOT_NONE) { 637 swap_slot_dealloc(slotIndex, pageCount); 638 _SwapBlockFree(pageIndex + totalPages, pageCount); 639 fAllocatedSwapSize -= pageCount * B_PAGE_SIZE; 640 } 641 642 totalPages += pageCount; 643 } 644 645 off_t totalSize = totalPages * B_PAGE_SIZE; 646 if (fAllocatedSwapSize + totalSize > fCommittedSwapSize) 647 return B_ERROR; 648 649 fAllocatedSwapSize += totalSize; 650 locker.Unlock(); 651 652 page_num_t pagesLeft = totalPages; 653 totalPages = 0; 654 655 for (uint32 i = 0; i < count; i++) { 656 page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 657 658 generic_addr_t vectorBase = vecs[i].base; 659 generic_size_t vectorLength = vecs[i].length; 660 page_num_t n = pageCount; 661 662 for (page_num_t j = 0; j < pageCount; j += n) { 663 swap_addr_t slotIndex; 664 // try to allocate n slots, if fail, try to allocate n/2 665 while ((slotIndex = swap_slot_alloc(n)) == SWAP_SLOT_NONE && n >= 2) 666 n >>= 1; 667 668 if (slotIndex == SWAP_SLOT_NONE) 669 panic("VMAnonymousCache::Write(): can't allocate swap space\n"); 670 671 T(WritePage(this, pageIndex, slotIndex)); 672 // TODO: Assumes that only one page is written. 673 674 swap_file* swapFile = find_swap_file(slotIndex); 675 676 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 677 678 generic_size_t length = (phys_addr_t)n * B_PAGE_SIZE; 679 generic_io_vec vector[1]; 680 vector->base = vectorBase; 681 vector->length = length; 682 683 status_t status = vfs_write_pages(swapFile->vnode, swapFile->cookie, 684 pos, vector, 1, flags, &length); 685 if (status != B_OK) { 686 locker.Lock(); 687 fAllocatedSwapSize -= (off_t)pagesLeft * B_PAGE_SIZE; 688 locker.Unlock(); 689 690 swap_slot_dealloc(slotIndex, n); 691 return status; 692 } 693 694 _SwapBlockBuild(pageIndex + totalPages, slotIndex, n); 695 pagesLeft -= n; 696 697 if (n != pageCount) { 698 vectorBase = vectorBase + n * B_PAGE_SIZE; 699 vectorLength -= n * B_PAGE_SIZE; 700 } 701 } 702 703 totalPages += pageCount; 704 } 705 706 ASSERT(pagesLeft == 0); 707 return B_OK; 708 } 709 710 711 status_t 712 VMAnonymousCache::WriteAsync(off_t offset, const generic_io_vec* vecs, 713 size_t count, generic_size_t numBytes, uint32 flags, 714 AsyncIOCallback* _callback) 715 { 716 // TODO: Currently this method is only used for single pages. Either make 717 // more flexible use of it or change the interface! 718 // This implementation relies on the current usage! 719 ASSERT(count == 1); 720 ASSERT(numBytes <= B_PAGE_SIZE); 721 722 page_num_t pageIndex = offset >> PAGE_SHIFT; 723 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex); 724 bool newSlot = slotIndex == SWAP_SLOT_NONE; 725 726 // If the page doesn't have any swap space yet, allocate it. 727 if (newSlot) { 728 AutoLocker<VMCache> locker(this); 729 if (fAllocatedSwapSize + B_PAGE_SIZE > fCommittedSwapSize) { 730 _callback->IOFinished(B_ERROR, true, 0); 731 return B_ERROR; 732 } 733 734 fAllocatedSwapSize += B_PAGE_SIZE; 735 736 slotIndex = swap_slot_alloc(1); 737 } 738 739 // create our callback 740 WriteCallback* callback = (flags & B_VIP_IO_REQUEST) != 0 741 ? new(malloc_flags(HEAP_PRIORITY_VIP)) WriteCallback(this, _callback) 742 : new(std::nothrow) WriteCallback(this, _callback); 743 if (callback == NULL) { 744 if (newSlot) { 745 AutoLocker<VMCache> locker(this); 746 fAllocatedSwapSize -= B_PAGE_SIZE; 747 locker.Unlock(); 748 749 swap_slot_dealloc(slotIndex, 1); 750 } 751 _callback->IOFinished(B_NO_MEMORY, true, 0); 752 return B_NO_MEMORY; 753 } 754 // TODO: If the page already had swap space assigned, we don't need an own 755 // callback. 756 757 callback->SetTo(pageIndex, slotIndex, newSlot); 758 759 T(WritePage(this, pageIndex, slotIndex)); 760 761 // write the page asynchrounously 762 swap_file* swapFile = find_swap_file(slotIndex); 763 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 764 765 return vfs_asynchronous_write_pages(swapFile->vnode, swapFile->cookie, pos, 766 vecs, 1, numBytes, flags, callback); 767 } 768 769 770 bool 771 VMAnonymousCache::CanWritePage(off_t offset) 772 { 773 // We can write the page, if we have not used all of our committed swap 774 // space or the page already has a swap slot assigned. 775 return fAllocatedSwapSize < fCommittedSwapSize 776 || _SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE; 777 } 778 779 780 int32 781 VMAnonymousCache::MaxPagesPerAsyncWrite() const 782 { 783 return 1; 784 } 785 786 787 status_t 788 VMAnonymousCache::Fault(struct VMAddressSpace* aspace, off_t offset) 789 { 790 if (fGuardedSize > 0) { 791 uint32 guardOffset; 792 793 #ifdef STACK_GROWS_DOWNWARDS 794 guardOffset = 0; 795 #elif defined(STACK_GROWS_UPWARDS) 796 guardOffset = virtual_size - fGuardedSize; 797 #else 798 # error Stack direction has not been defined in arch_config.h 799 #endif 800 // report stack fault, guard page hit! 801 if (offset >= guardOffset && offset < guardOffset + fGuardedSize) { 802 TRACE(("stack overflow!\n")); 803 return B_BAD_ADDRESS; 804 } 805 } 806 807 if (fCanOvercommit && LookupPage(offset) == NULL && !HasPage(offset)) { 808 if (fPrecommittedPages == 0) { 809 // never commit more than needed 810 if (committed_size / B_PAGE_SIZE > page_count) 811 return B_BAD_HANDLER; 812 813 // try to commit additional swap space/memory 814 if (swap_space_reserve(B_PAGE_SIZE) == B_PAGE_SIZE) { 815 fCommittedSwapSize += B_PAGE_SIZE; 816 } else { 817 int priority = aspace == VMAddressSpace::Kernel() 818 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 819 if (vm_try_reserve_memory(B_PAGE_SIZE, priority, 0) != B_OK) { 820 dprintf("%p->VMAnonymousCache::Fault(): Failed to reserve " 821 "%d bytes of RAM.\n", this, (int)B_PAGE_SIZE); 822 return B_NO_MEMORY; 823 } 824 } 825 826 committed_size += B_PAGE_SIZE; 827 } else 828 fPrecommittedPages--; 829 } 830 831 // This will cause vm_soft_fault() to handle the fault 832 return B_BAD_HANDLER; 833 } 834 835 836 void 837 VMAnonymousCache::Merge(VMCache* _source) 838 { 839 VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source); 840 if (source == NULL) { 841 panic("VMAnonymousCache::MergeStore(): merge with incompatible cache " 842 "%p requested", _source); 843 return; 844 } 845 846 // take over the source' committed size 847 fCommittedSwapSize += source->fCommittedSwapSize; 848 source->fCommittedSwapSize = 0; 849 committed_size += source->committed_size; 850 source->committed_size = 0; 851 852 off_t actualSize = virtual_end - virtual_base; 853 if (committed_size > actualSize) 854 _Commit(actualSize, VM_PRIORITY_USER); 855 856 // Move all not shadowed swap pages from the source to the consumer cache. 857 // Also remove all source pages that are shadowed by consumer swap pages. 858 _MergeSwapPages(source); 859 860 // Move all not shadowed pages from the source to the consumer cache. 861 if (source->page_count < page_count) 862 _MergePagesSmallerSource(source); 863 else 864 _MergePagesSmallerConsumer(source); 865 } 866 867 868 void 869 VMAnonymousCache::DeleteObject() 870 { 871 object_cache_delete(gAnonymousCacheObjectCache, this); 872 } 873 874 875 void 876 VMAnonymousCache::_SwapBlockBuild(off_t startPageIndex, 877 swap_addr_t startSlotIndex, uint32 count) 878 { 879 WriteLocker locker(sSwapHashLock); 880 881 uint32 left = count; 882 for (uint32 i = 0, j = 0; i < count; i += j) { 883 off_t pageIndex = startPageIndex + i; 884 swap_addr_t slotIndex = startSlotIndex + i; 885 886 swap_hash_key key = { this, pageIndex }; 887 888 swap_block* swap = sSwapHashTable.Lookup(key); 889 while (swap == NULL) { 890 swap = (swap_block*)object_cache_alloc(sSwapBlockCache, 891 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 892 if (swap == NULL) { 893 // Wait a short time until memory is available again. 894 locker.Unlock(); 895 snooze(10000); 896 locker.Lock(); 897 swap = sSwapHashTable.Lookup(key); 898 continue; 899 } 900 901 swap->key.cache = this; 902 swap->key.page_index = pageIndex & ~(off_t)SWAP_BLOCK_MASK; 903 swap->used = 0; 904 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) 905 swap->swap_slots[i] = SWAP_SLOT_NONE; 906 907 sSwapHashTable.InsertUnchecked(swap); 908 } 909 910 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 911 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 912 swap->swap_slots[blockIndex++] = slotIndex + j; 913 left--; 914 } 915 916 swap->used += j; 917 } 918 } 919 920 921 void 922 VMAnonymousCache::_SwapBlockFree(off_t startPageIndex, uint32 count) 923 { 924 WriteLocker locker(sSwapHashLock); 925 926 uint32 left = count; 927 for (uint32 i = 0, j = 0; i < count; i += j) { 928 off_t pageIndex = startPageIndex + i; 929 swap_hash_key key = { this, pageIndex }; 930 swap_block* swap = sSwapHashTable.Lookup(key); 931 932 ASSERT(swap != NULL); 933 934 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 935 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 936 swap->swap_slots[blockIndex++] = SWAP_SLOT_NONE; 937 left--; 938 } 939 940 swap->used -= j; 941 if (swap->used == 0) { 942 sSwapHashTable.RemoveUnchecked(swap); 943 object_cache_free(sSwapBlockCache, swap, 944 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 945 } 946 } 947 } 948 949 950 swap_addr_t 951 VMAnonymousCache::_SwapBlockGetAddress(off_t pageIndex) 952 { 953 ReadLocker locker(sSwapHashLock); 954 955 swap_hash_key key = { this, pageIndex }; 956 swap_block* swap = sSwapHashTable.Lookup(key); 957 swap_addr_t slotIndex = SWAP_SLOT_NONE; 958 959 if (swap != NULL) { 960 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 961 slotIndex = swap->swap_slots[blockIndex]; 962 } 963 964 return slotIndex; 965 } 966 967 968 status_t 969 VMAnonymousCache::_Commit(off_t size, int priority) 970 { 971 TRACE("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "), already committed: " 972 "%" B_PRIdOFF " (%" B_PRIdOFF " swap)\n", this, size, committed_size, 973 fCommittedSwapSize); 974 975 // Basic strategy: reserve swap space first, only when running out of swap 976 // space, reserve real memory. 977 978 off_t committedMemory = committed_size - fCommittedSwapSize; 979 980 // Regardless of whether we're asked to grow or shrink the commitment, 981 // we always try to reserve as much as possible of the final commitment 982 // in the swap space. 983 if (size > fCommittedSwapSize) { 984 fCommittedSwapSize += swap_space_reserve(size - fCommittedSwapSize); 985 committed_size = fCommittedSwapSize + committedMemory; 986 if (size > fCommittedSwapSize) { 987 TRACE("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "), reserved " 988 "only %" B_PRIdOFF " swap\n", this, size, fCommittedSwapSize); 989 } 990 } 991 992 if (committed_size == size) 993 return B_OK; 994 995 if (committed_size > size) { 996 // The commitment shrinks -- unreserve real memory first. 997 off_t toUnreserve = committed_size - size; 998 if (committedMemory > 0) { 999 off_t unreserved = min_c(toUnreserve, committedMemory); 1000 vm_unreserve_memory(unreserved); 1001 committedMemory -= unreserved; 1002 committed_size -= unreserved; 1003 toUnreserve -= unreserved; 1004 } 1005 1006 // Unreserve swap space. 1007 if (toUnreserve > 0) { 1008 swap_space_unreserve(toUnreserve); 1009 fCommittedSwapSize -= toUnreserve; 1010 committed_size -= toUnreserve; 1011 } 1012 1013 return B_OK; 1014 } 1015 1016 // The commitment grows -- we have already tried to reserve swap space at 1017 // the start of the method, so we try to reserve real memory, now. 1018 1019 off_t toReserve = size - committed_size; 1020 if (vm_try_reserve_memory(toReserve, priority, 1000000) != B_OK) { 1021 dprintf("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "): Failed to " 1022 "reserve %" B_PRIdOFF " bytes of RAM\n", this, size, toReserve); 1023 return B_NO_MEMORY; 1024 } 1025 1026 committed_size = size; 1027 return B_OK; 1028 } 1029 1030 1031 void 1032 VMAnonymousCache::_MergePagesSmallerSource(VMAnonymousCache* source) 1033 { 1034 // The source cache has less pages than the consumer (this cache), so we 1035 // iterate through the source's pages and move the ones that are not 1036 // shadowed up to the consumer. 1037 1038 for (VMCachePagesTree::Iterator it = source->pages.GetIterator(); 1039 vm_page* page = it.Next();) { 1040 // Note: Removing the current node while iterating through a 1041 // IteratableSplayTree is safe. 1042 vm_page* consumerPage = LookupPage( 1043 (off_t)page->cache_offset << PAGE_SHIFT); 1044 if (consumerPage == NULL) { 1045 // the page is not yet in the consumer cache - move it upwards 1046 ASSERT_PRINT(!page->busy, "page: %p", page); 1047 MovePage(page); 1048 } 1049 } 1050 } 1051 1052 1053 void 1054 VMAnonymousCache::_MergePagesSmallerConsumer(VMAnonymousCache* source) 1055 { 1056 // The consumer (this cache) has less pages than the source, so we move the 1057 // consumer's pages to the source (freeing shadowed ones) and finally just 1058 // all pages of the source back to the consumer. 1059 1060 for (VMCachePagesTree::Iterator it = pages.GetIterator(); 1061 vm_page* page = it.Next();) { 1062 // If a source page is in the way, remove and free it. 1063 vm_page* sourcePage = source->LookupPage( 1064 (off_t)page->cache_offset << PAGE_SHIFT); 1065 if (sourcePage != NULL) { 1066 DEBUG_PAGE_ACCESS_START(sourcePage); 1067 ASSERT_PRINT(!sourcePage->busy, "page: %p", sourcePage); 1068 ASSERT_PRINT(sourcePage->WiredCount() == 0 1069 && sourcePage->mappings.IsEmpty(), 1070 "sourcePage: %p, page: %p", sourcePage, page); 1071 source->RemovePage(sourcePage); 1072 vm_page_free(source, sourcePage); 1073 } 1074 1075 // Note: Removing the current node while iterating through a 1076 // IteratableSplayTree is safe. 1077 source->MovePage(page); 1078 } 1079 1080 MoveAllPages(source); 1081 } 1082 1083 1084 void 1085 VMAnonymousCache::_MergeSwapPages(VMAnonymousCache* source) 1086 { 1087 // If neither source nor consumer have swap pages, we don't have to do 1088 // anything. 1089 if (source->fAllocatedSwapSize == 0 && fAllocatedSwapSize == 0) 1090 return; 1091 1092 for (off_t offset = source->virtual_base 1093 & ~(off_t)(B_PAGE_SIZE * SWAP_BLOCK_PAGES - 1); 1094 offset < source->virtual_end; 1095 offset += B_PAGE_SIZE * SWAP_BLOCK_PAGES) { 1096 1097 WriteLocker locker(sSwapHashLock); 1098 1099 off_t swapBlockPageIndex = offset >> PAGE_SHIFT; 1100 swap_hash_key key = { source, swapBlockPageIndex }; 1101 swap_block* sourceSwapBlock = sSwapHashTable.Lookup(key); 1102 1103 // remove the source swap block -- we will either take over the swap 1104 // space (and the block) or free it 1105 if (sourceSwapBlock != NULL) 1106 sSwapHashTable.RemoveUnchecked(sourceSwapBlock); 1107 1108 key.cache = this; 1109 swap_block* swapBlock = sSwapHashTable.Lookup(key); 1110 1111 locker.Unlock(); 1112 1113 // remove all source pages that are shadowed by consumer swap pages 1114 if (swapBlock != NULL) { 1115 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1116 if (swapBlock->swap_slots[i] != SWAP_SLOT_NONE) { 1117 vm_page* page = source->LookupPage( 1118 (off_t)(swapBlockPageIndex + i) << PAGE_SHIFT); 1119 if (page != NULL) { 1120 DEBUG_PAGE_ACCESS_START(page); 1121 ASSERT_PRINT(!page->busy, "page: %p", page); 1122 source->RemovePage(page); 1123 vm_page_free(source, page); 1124 } 1125 } 1126 } 1127 } 1128 1129 if (sourceSwapBlock == NULL) 1130 continue; 1131 1132 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1133 off_t pageIndex = swapBlockPageIndex + i; 1134 swap_addr_t sourceSlotIndex = sourceSwapBlock->swap_slots[i]; 1135 1136 if (sourceSlotIndex == SWAP_SLOT_NONE) 1137 continue; 1138 1139 if ((swapBlock != NULL 1140 && swapBlock->swap_slots[i] != SWAP_SLOT_NONE) 1141 || LookupPage((off_t)pageIndex << PAGE_SHIFT) != NULL) { 1142 // The consumer already has a page or a swapped out page 1143 // at this index. So we can free the source swap space. 1144 swap_slot_dealloc(sourceSlotIndex, 1); 1145 sourceSwapBlock->swap_slots[i] = SWAP_SLOT_NONE; 1146 sourceSwapBlock->used--; 1147 } 1148 1149 // We've either freed the source swap page or are going to move it 1150 // to the consumer. At any rate, the source cache doesn't own it 1151 // anymore. 1152 source->fAllocatedSwapSize -= B_PAGE_SIZE; 1153 } 1154 1155 // All source swap pages that have not been freed yet are taken over by 1156 // the consumer. 1157 fAllocatedSwapSize += B_PAGE_SIZE * (off_t)sourceSwapBlock->used; 1158 1159 if (sourceSwapBlock->used == 0) { 1160 // All swap pages have been freed -- we can discard the source swap 1161 // block. 1162 object_cache_free(sSwapBlockCache, sourceSwapBlock, 1163 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1164 } else if (swapBlock == NULL) { 1165 // We need to take over some of the source's swap pages and there's 1166 // no swap block in the consumer cache. Just take over the source 1167 // swap block. 1168 sourceSwapBlock->key.cache = this; 1169 locker.Lock(); 1170 sSwapHashTable.InsertUnchecked(sourceSwapBlock); 1171 locker.Unlock(); 1172 } else { 1173 // We need to take over some of the source's swap pages and there's 1174 // already a swap block in the consumer cache. Copy the respective 1175 // swap addresses and discard the source swap block. 1176 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1177 if (sourceSwapBlock->swap_slots[i] != SWAP_SLOT_NONE) 1178 swapBlock->swap_slots[i] = sourceSwapBlock->swap_slots[i]; 1179 } 1180 1181 object_cache_free(sSwapBlockCache, sourceSwapBlock, 1182 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1183 } 1184 } 1185 } 1186 1187 1188 // #pragma mark - 1189 1190 1191 // TODO: This can be removed if we get BFS uuid's 1192 struct VolumeInfo { 1193 char name[B_FILE_NAME_LENGTH]; 1194 char device[B_FILE_NAME_LENGTH]; 1195 char filesystem[B_OS_NAME_LENGTH]; 1196 off_t capacity; 1197 }; 1198 1199 1200 class PartitionScorer : public KPartitionVisitor { 1201 public: 1202 PartitionScorer(VolumeInfo& volumeInfo) 1203 : 1204 fBestPartition(NULL), 1205 fBestScore(-1), 1206 fVolumeInfo(volumeInfo) 1207 { 1208 } 1209 1210 virtual bool VisitPre(KPartition* partition) 1211 { 1212 if (!partition->ContainsFileSystem()) 1213 return false; 1214 1215 KPath path; 1216 partition->GetPath(&path); 1217 1218 int score = 0; 1219 if (strcmp(fVolumeInfo.name, partition->ContentName()) == 0) 1220 score += 4; 1221 if (strcmp(fVolumeInfo.device, path.Path()) == 0) 1222 score += 3; 1223 if (fVolumeInfo.capacity == partition->Size()) 1224 score += 2; 1225 if (strcmp(fVolumeInfo.filesystem, 1226 partition->DiskSystem()->ShortName()) == 0) { 1227 score += 1; 1228 } 1229 if (score >= 4 && score > fBestScore) { 1230 fBestPartition = partition; 1231 fBestScore = score; 1232 } 1233 1234 return false; 1235 } 1236 1237 KPartition* fBestPartition; 1238 1239 private: 1240 int32 fBestScore; 1241 VolumeInfo fVolumeInfo; 1242 }; 1243 1244 1245 status_t 1246 get_mount_point(KPartition* partition, KPath* mountPoint) 1247 { 1248 if (!mountPoint || !partition->ContainsFileSystem()) 1249 return B_BAD_VALUE; 1250 1251 const char* volumeName = partition->ContentName(); 1252 if (!volumeName || strlen(volumeName) == 0) 1253 volumeName = partition->Name(); 1254 if (!volumeName || strlen(volumeName) == 0) 1255 volumeName = "unnamed volume"; 1256 1257 char basePath[B_PATH_NAME_LENGTH]; 1258 int32 len = snprintf(basePath, sizeof(basePath), "/%s", volumeName); 1259 for (int32 i = 1; i < len; i++) 1260 if (basePath[i] == '/') 1261 basePath[i] = '-'; 1262 char* path = mountPoint->LockBuffer(); 1263 int32 pathLen = mountPoint->BufferSize(); 1264 strncpy(path, basePath, pathLen); 1265 1266 struct stat dummy; 1267 for (int i = 1; ; i++) { 1268 if (stat(path, &dummy) != 0) 1269 break; 1270 snprintf(path, pathLen, "%s%d", basePath, i); 1271 } 1272 1273 mountPoint->UnlockBuffer(); 1274 return B_OK; 1275 } 1276 1277 1278 status_t 1279 swap_file_add(const char* path) 1280 { 1281 // open the file 1282 int fd = open(path, O_RDWR | O_NOCACHE, S_IRUSR | S_IWUSR); 1283 if (fd < 0) 1284 return errno; 1285 1286 // fstat() it and check whether we can use it 1287 struct stat st; 1288 if (fstat(fd, &st) < 0) { 1289 close(fd); 1290 return errno; 1291 } 1292 1293 if (!(S_ISREG(st.st_mode) || S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) { 1294 close(fd); 1295 return B_BAD_VALUE; 1296 } 1297 1298 if (st.st_size < B_PAGE_SIZE) { 1299 close(fd); 1300 return B_BAD_VALUE; 1301 } 1302 1303 // get file descriptor, vnode, and cookie 1304 file_descriptor* descriptor = get_fd(get_current_io_context(true), fd); 1305 put_fd(descriptor); 1306 1307 vnode* node = fd_vnode(descriptor); 1308 if (node == NULL) { 1309 close(fd); 1310 return B_BAD_VALUE; 1311 } 1312 1313 // do the allocations and prepare the swap_file structure 1314 swap_file* swap = (swap_file*)malloc(sizeof(swap_file)); 1315 if (swap == NULL) { 1316 close(fd); 1317 return B_NO_MEMORY; 1318 } 1319 1320 swap->fd = fd; 1321 swap->vnode = node; 1322 swap->cookie = descriptor->cookie; 1323 1324 uint32 pageCount = st.st_size >> PAGE_SHIFT; 1325 swap->bmp = radix_bitmap_create(pageCount); 1326 if (swap->bmp == NULL) { 1327 free(swap); 1328 close(fd); 1329 return B_NO_MEMORY; 1330 } 1331 1332 // set slot index and add this file to swap file list 1333 mutex_lock(&sSwapFileListLock); 1334 // TODO: Also check whether the swap file is already registered! 1335 if (sSwapFileList.IsEmpty()) { 1336 swap->first_slot = 0; 1337 swap->last_slot = pageCount; 1338 } else { 1339 // leave one page gap between two swap files 1340 swap->first_slot = sSwapFileList.Last()->last_slot + 1; 1341 swap->last_slot = swap->first_slot + pageCount; 1342 } 1343 sSwapFileList.Add(swap); 1344 sSwapFileCount++; 1345 mutex_unlock(&sSwapFileListLock); 1346 1347 mutex_lock(&sAvailSwapSpaceLock); 1348 sAvailSwapSpace += (off_t)pageCount * B_PAGE_SIZE; 1349 mutex_unlock(&sAvailSwapSpaceLock); 1350 1351 return B_OK; 1352 } 1353 1354 1355 status_t 1356 swap_file_delete(const char* path) 1357 { 1358 vnode* node = NULL; 1359 status_t status = vfs_get_vnode_from_path(path, true, &node); 1360 if (status != B_OK) 1361 return status; 1362 1363 MutexLocker locker(sSwapFileListLock); 1364 1365 swap_file* swapFile = NULL; 1366 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1367 (swapFile = it.Next()) != NULL;) { 1368 if (swapFile->vnode == node) 1369 break; 1370 } 1371 1372 vfs_put_vnode(node); 1373 1374 if (swapFile == NULL) 1375 return B_ERROR; 1376 1377 // if this file is currently used, we can't delete 1378 // TODO: mark this swap file deleting, and remove it after releasing 1379 // all the swap space 1380 if (swapFile->bmp->free_slots < swapFile->last_slot - swapFile->first_slot) 1381 return B_ERROR; 1382 1383 sSwapFileList.Remove(swapFile); 1384 sSwapFileCount--; 1385 locker.Unlock(); 1386 1387 mutex_lock(&sAvailSwapSpaceLock); 1388 sAvailSwapSpace -= (off_t)(swapFile->last_slot - swapFile->first_slot) 1389 * PAGE_SIZE; 1390 mutex_unlock(&sAvailSwapSpaceLock); 1391 1392 close(swapFile->fd); 1393 radix_bitmap_destroy(swapFile->bmp); 1394 free(swapFile); 1395 1396 return B_OK; 1397 } 1398 1399 1400 void 1401 swap_init(void) 1402 { 1403 // create swap block cache 1404 sSwapBlockCache = create_object_cache("swapblock", sizeof(swap_block), 1405 sizeof(void*), NULL, NULL, NULL); 1406 if (sSwapBlockCache == NULL) 1407 panic("swap_init(): can't create object cache for swap blocks\n"); 1408 1409 status_t error = object_cache_set_minimum_reserve(sSwapBlockCache, 1410 MIN_SWAP_BLOCK_RESERVE); 1411 if (error != B_OK) { 1412 panic("swap_init(): object_cache_set_minimum_reserve() failed: %s", 1413 strerror(error)); 1414 } 1415 1416 // init swap hash table 1417 sSwapHashTable.Init(INITIAL_SWAP_HASH_SIZE); 1418 rw_lock_init(&sSwapHashLock, "swaphash"); 1419 1420 error = register_resource_resizer(swap_hash_resizer, NULL, 1421 SWAP_HASH_RESIZE_INTERVAL); 1422 if (error != B_OK) { 1423 panic("swap_init(): Failed to register swap hash resizer: %s", 1424 strerror(error)); 1425 } 1426 1427 // init swap file list 1428 mutex_init(&sSwapFileListLock, "swaplist"); 1429 sSwapFileAlloc = NULL; 1430 sSwapFileCount = 0; 1431 1432 // init available swap space 1433 mutex_init(&sAvailSwapSpaceLock, "avail swap space"); 1434 sAvailSwapSpace = 0; 1435 1436 add_debugger_command_etc("swap", &dump_swap_info, 1437 "Print infos about the swap usage", 1438 "\n" 1439 "Print infos about the swap usage.\n", 0); 1440 } 1441 1442 1443 void 1444 swap_init_post_modules() 1445 { 1446 // Never try to create a swap file on a read-only device - when booting 1447 // from CD, the write overlay is used. 1448 if (gReadOnlyBootDevice) 1449 return; 1450 1451 bool swapEnabled = true; 1452 bool swapAutomatic = true; 1453 off_t swapSize = 0; 1454 1455 dev_t swapDeviceID = -1; 1456 VolumeInfo selectedVolume = {}; 1457 1458 void* settings = load_driver_settings("virtual_memory"); 1459 1460 if (settings != NULL) { 1461 // We pass a lot of information on the swap device, this is mostly to 1462 // ensure that we are dealing with the same device that was configured. 1463 1464 // TODO: Some kind of BFS uuid would be great here :) 1465 const char* enabled = get_driver_parameter(settings, "vm", NULL, NULL); 1466 1467 if (enabled != NULL) { 1468 swapEnabled = get_driver_boolean_parameter(settings, "vm", 1469 true, false); 1470 swapAutomatic = get_driver_boolean_parameter(settings, "swap_auto", 1471 true, false); 1472 1473 if (swapEnabled && !swapAutomatic) { 1474 const char* size = get_driver_parameter(settings, "swap_size", 1475 NULL, NULL); 1476 const char* volume = get_driver_parameter(settings, 1477 "swap_volume_name", NULL, NULL); 1478 const char* device = get_driver_parameter(settings, 1479 "swap_volume_device", NULL, NULL); 1480 const char* filesystem = get_driver_parameter(settings, 1481 "swap_volume_filesystem", NULL, NULL); 1482 const char* capacity = get_driver_parameter(settings, 1483 "swap_volume_capacity", NULL, NULL); 1484 1485 if (size != NULL && device != NULL && volume != NULL 1486 && filesystem != NULL && capacity != NULL) { 1487 // User specified a size / volume that seems valid 1488 swapAutomatic = false; 1489 swapSize = atoll(size); 1490 strlcpy(selectedVolume.name, volume, 1491 sizeof(selectedVolume.name)); 1492 strlcpy(selectedVolume.device, device, 1493 sizeof(selectedVolume.device)); 1494 strlcpy(selectedVolume.filesystem, filesystem, 1495 sizeof(selectedVolume.filesystem)); 1496 selectedVolume.capacity = atoll(capacity); 1497 } else { 1498 // Something isn't right with swap config, go auto 1499 swapAutomatic = true; 1500 dprintf("%s: virtual_memory configuration is invalid, " 1501 "using automatic swap\n", __func__); 1502 } 1503 } 1504 } 1505 unload_driver_settings(settings); 1506 } 1507 1508 if (swapAutomatic) { 1509 swapSize = (off_t)vm_page_num_pages() * B_PAGE_SIZE; 1510 if (swapSize <= (1024 * 1024 * 1024)) { 1511 // Memory under 1GB? double the swap 1512 swapSize *= 2; 1513 } 1514 // Automatic swap defaults to the boot device 1515 swapDeviceID = gBootDevice; 1516 } 1517 1518 if (!swapEnabled || swapSize < B_PAGE_SIZE) { 1519 dprintf("%s: virtual_memory is disabled\n", __func__); 1520 return; 1521 } 1522 1523 if (!swapAutomatic && swapDeviceID < 0) { 1524 // If user-specified swap, and no swap device has been chosen yet... 1525 KDiskDeviceManager::CreateDefault(); 1526 KDiskDeviceManager* manager = KDiskDeviceManager::Default(); 1527 PartitionScorer visitor(selectedVolume); 1528 1529 KDiskDevice* device; 1530 int32 cookie = 0; 1531 while ((device = manager->NextDevice(&cookie)) != NULL) { 1532 if (device->IsReadOnlyMedia() || device->IsWriteOnce() 1533 || device->IsRemovable()) { 1534 continue; 1535 } 1536 device->VisitEachDescendant(&visitor); 1537 } 1538 1539 if (!visitor.fBestPartition) { 1540 dprintf("%s: Can't find configured swap partition '%s'\n", 1541 __func__, selectedVolume.name); 1542 } else { 1543 if (visitor.fBestPartition->IsMounted()) 1544 swapDeviceID = visitor.fBestPartition->VolumeID(); 1545 else { 1546 KPath devPath, mountPoint; 1547 visitor.fBestPartition->GetPath(&devPath); 1548 get_mount_point(visitor.fBestPartition, &mountPoint); 1549 const char* mountPath = mountPoint.Path(); 1550 mkdir(mountPath, S_IRWXU | S_IRWXG | S_IRWXO); 1551 swapDeviceID = _kern_mount(mountPath, devPath.Path(), 1552 NULL, 0, NULL, 0); 1553 if (swapDeviceID < 0) { 1554 dprintf("%s: Can't mount configured swap partition '%s'\n", 1555 __func__, selectedVolume.name); 1556 } 1557 } 1558 } 1559 } 1560 1561 if (swapDeviceID < 0) 1562 swapDeviceID = gBootDevice; 1563 1564 // We now have a swapDeviceID which is used for the swap file 1565 1566 KPath path; 1567 struct fs_info info; 1568 _kern_read_fs_info(swapDeviceID, &info); 1569 if (swapDeviceID == gBootDevice) 1570 path = kDefaultSwapPath; 1571 else { 1572 vfs_entry_ref_to_path(info.dev, info.root, ".", true, path.LockBuffer(), 1573 path.BufferSize()); 1574 path.UnlockBuffer(); 1575 path.Append("swap"); 1576 } 1577 1578 const char* swapPath = path.Path(); 1579 1580 // Swap size limits prevent oversized swap files 1581 if (swapAutomatic) { 1582 off_t existingSwapSize = 0; 1583 struct stat existingSwapStat; 1584 if (stat(swapPath, &existingSwapStat) == 0) 1585 existingSwapSize = existingSwapStat.st_size; 1586 1587 off_t freeSpace = info.free_blocks * info.block_size + existingSwapSize; 1588 1589 // Adjust automatic swap to a maximum of 25% of the free space 1590 if (swapSize > (freeSpace / 4)) 1591 swapSize = (freeSpace / 4); 1592 } 1593 1594 // Create swap file 1595 int fd = open(swapPath, O_RDWR | O_CREAT | O_NOCACHE, S_IRUSR | S_IWUSR); 1596 if (fd < 0) { 1597 dprintf("%s: Can't open/create %s: %s\n", __func__, 1598 swapPath, strerror(errno)); 1599 return; 1600 } 1601 1602 struct stat stat; 1603 stat.st_size = swapSize; 1604 status_t error = _kern_write_stat(fd, NULL, false, &stat, 1605 sizeof(struct stat), B_STAT_SIZE | B_STAT_SIZE_INSECURE); 1606 if (error != B_OK) { 1607 dprintf("%s: Failed to resize %s to %" B_PRIdOFF " bytes: %s\n", 1608 __func__, swapPath, swapSize, strerror(error)); 1609 } 1610 1611 close(fd); 1612 1613 error = swap_file_add(swapPath); 1614 if (error != B_OK) { 1615 dprintf("%s: Failed to add swap file %s: %s\n", __func__, swapPath, 1616 strerror(error)); 1617 } 1618 } 1619 1620 1621 //! Used by page daemon to free swap space. 1622 bool 1623 swap_free_page_swap_space(vm_page* page) 1624 { 1625 VMAnonymousCache* cache = dynamic_cast<VMAnonymousCache*>(page->Cache()); 1626 if (cache == NULL) 1627 return false; 1628 1629 swap_addr_t slotIndex = cache->_SwapBlockGetAddress(page->cache_offset); 1630 if (slotIndex == SWAP_SLOT_NONE) 1631 return false; 1632 1633 swap_slot_dealloc(slotIndex, 1); 1634 cache->fAllocatedSwapSize -= B_PAGE_SIZE; 1635 cache->_SwapBlockFree(page->cache_offset, 1); 1636 1637 return true; 1638 } 1639 1640 1641 uint32 1642 swap_available_pages() 1643 { 1644 mutex_lock(&sAvailSwapSpaceLock); 1645 uint32 avail = sAvailSwapSpace >> PAGE_SHIFT; 1646 mutex_unlock(&sAvailSwapSpaceLock); 1647 1648 return avail; 1649 } 1650 1651 1652 uint32 1653 swap_total_swap_pages() 1654 { 1655 mutex_lock(&sSwapFileListLock); 1656 1657 uint32 totalSwapSlots = 0; 1658 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1659 swap_file* swapFile = it.Next();) { 1660 totalSwapSlots += swapFile->last_slot - swapFile->first_slot; 1661 } 1662 1663 mutex_unlock(&sSwapFileListLock); 1664 1665 return totalSwapSlots; 1666 } 1667 1668 1669 #endif // ENABLE_SWAP_SUPPORT 1670 1671 1672 void 1673 swap_get_info(system_info* info) 1674 { 1675 #if ENABLE_SWAP_SUPPORT 1676 info->max_swap_pages = swap_total_swap_pages(); 1677 info->free_swap_pages = swap_available_pages(); 1678 #else 1679 info->max_swap_space = 0; 1680 info->free_swap_space = 0; 1681 #endif 1682 } 1683 1684