1 /* 2 * Copyright 2008, Zhao Shuai, upczhsh@163.com. 3 * Copyright 2008-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 4 * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de. 5 * Distributed under the terms of the MIT License. 6 * 7 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 8 * Distributed under the terms of the NewOS License. 9 * 10 * Copyright 2011-2012 Haiku, Inc. All rights reserved. 11 * Distributed under the terms of the MIT License. 12 * 13 * Authors: 14 * Hamish Morrison, hamish@lavabit.com 15 * Alexander von Gluck IV, kallisti5@unixzen.com 16 */ 17 18 19 #include "VMAnonymousCache.h" 20 21 #include <errno.h> 22 #include <fcntl.h> 23 #include <stdlib.h> 24 #include <string.h> 25 #include <unistd.h> 26 27 #include <FindDirectory.h> 28 #include <KernelExport.h> 29 #include <NodeMonitor.h> 30 #include <StackOrHeapArray.h> 31 32 #include <arch_config.h> 33 #include <boot_device.h> 34 #include <disk_device_manager/KDiskDevice.h> 35 #include <disk_device_manager/KDiskDeviceManager.h> 36 #include <disk_device_manager/KDiskSystem.h> 37 #include <disk_device_manager/KPartitionVisitor.h> 38 #include <driver_settings.h> 39 #include <fs/fd.h> 40 #include <fs/KPath.h> 41 #include <fs_info.h> 42 #include <fs_interface.h> 43 #include <heap.h> 44 #include <kernel_daemon.h> 45 #include <slab/Slab.h> 46 #include <syscalls.h> 47 #include <system_info.h> 48 #include <tracing.h> 49 #include <util/AutoLock.h> 50 #include <util/DoublyLinkedList.h> 51 #include <util/OpenHashTable.h> 52 #include <util/RadixBitmap.h> 53 #include <vfs.h> 54 #include <vm/vm.h> 55 #include <vm/vm_page.h> 56 #include <vm/vm_priv.h> 57 #include <vm/VMAddressSpace.h> 58 59 #include "IORequest.h" 60 61 62 #if ENABLE_SWAP_SUPPORT 63 64 //#define TRACE_VM_ANONYMOUS_CACHE 65 #ifdef TRACE_VM_ANONYMOUS_CACHE 66 # define TRACE(x...) dprintf(x) 67 #else 68 # define TRACE(x...) do { } while (false) 69 #endif 70 71 72 // number of free swap blocks the object cache shall minimally have 73 #define MIN_SWAP_BLOCK_RESERVE 4096 74 75 // interval the has resizer is triggered (in 0.1s) 76 #define SWAP_HASH_RESIZE_INTERVAL 5 77 78 #define INITIAL_SWAP_HASH_SIZE 1024 79 80 #define SWAP_SLOT_NONE RADIX_SLOT_NONE 81 82 #define SWAP_BLOCK_PAGES 32 83 #define SWAP_BLOCK_SHIFT 5 /* 1 << SWAP_BLOCK_SHIFT == SWAP_BLOCK_PAGES */ 84 #define SWAP_BLOCK_MASK (SWAP_BLOCK_PAGES - 1) 85 86 87 static const char* const kDefaultSwapPath = "/var/swap"; 88 89 struct swap_file : DoublyLinkedListLinkImpl<swap_file> { 90 int fd; 91 struct vnode* vnode; 92 void* cookie; 93 swap_addr_t first_slot; 94 swap_addr_t last_slot; 95 radix_bitmap* bmp; 96 }; 97 98 struct swap_hash_key { 99 VMAnonymousCache *cache; 100 off_t page_index; // page index in the cache 101 }; 102 103 // Each swap block contains swap address information for 104 // SWAP_BLOCK_PAGES continuous pages from the same cache 105 struct swap_block { 106 swap_block* hash_link; 107 swap_hash_key key; 108 uint32 used; 109 swap_addr_t swap_slots[SWAP_BLOCK_PAGES]; 110 }; 111 112 struct SwapHashTableDefinition { 113 typedef swap_hash_key KeyType; 114 typedef swap_block ValueType; 115 116 SwapHashTableDefinition() {} 117 118 size_t HashKey(const swap_hash_key& key) const 119 { 120 off_t blockIndex = key.page_index >> SWAP_BLOCK_SHIFT; 121 VMAnonymousCache* cache = key.cache; 122 return blockIndex ^ (size_t)(int*)cache; 123 } 124 125 size_t Hash(const swap_block* value) const 126 { 127 return HashKey(value->key); 128 } 129 130 bool Compare(const swap_hash_key& key, const swap_block* value) const 131 { 132 return (key.page_index & ~(off_t)SWAP_BLOCK_MASK) 133 == (value->key.page_index & ~(off_t)SWAP_BLOCK_MASK) 134 && key.cache == value->key.cache; 135 } 136 137 swap_block*& GetLink(swap_block* value) const 138 { 139 return value->hash_link; 140 } 141 }; 142 143 typedef BOpenHashTable<SwapHashTableDefinition> SwapHashTable; 144 typedef DoublyLinkedList<swap_file> SwapFileList; 145 146 static SwapHashTable sSwapHashTable; 147 static rw_lock sSwapHashLock; 148 149 static SwapFileList sSwapFileList; 150 static mutex sSwapFileListLock; 151 static swap_file* sSwapFileAlloc = NULL; // allocate from here 152 static uint32 sSwapFileCount = 0; 153 154 static off_t sAvailSwapSpace = 0; 155 static mutex sAvailSwapSpaceLock; 156 157 static object_cache* sSwapBlockCache; 158 159 160 #if SWAP_TRACING 161 namespace SwapTracing { 162 163 class SwapTraceEntry : public AbstractTraceEntry { 164 public: 165 SwapTraceEntry(VMAnonymousCache* cache) 166 : 167 fCache(cache) 168 { 169 } 170 171 protected: 172 VMAnonymousCache* fCache; 173 }; 174 175 176 class ReadPage : public SwapTraceEntry { 177 public: 178 ReadPage(VMAnonymousCache* cache, page_num_t pageIndex, 179 swap_addr_t swapSlotIndex) 180 : 181 SwapTraceEntry(cache), 182 fPageIndex(pageIndex), 183 fSwapSlotIndex(swapSlotIndex) 184 { 185 Initialized(); 186 } 187 188 virtual void AddDump(TraceOutput& out) 189 { 190 out.Print("swap read: cache %p, page index: %lu <- swap slot: %lu", 191 fCache, fPageIndex, fSwapSlotIndex); 192 } 193 194 private: 195 page_num_t fPageIndex; 196 swap_addr_t fSwapSlotIndex; 197 }; 198 199 200 class WritePage : public SwapTraceEntry { 201 public: 202 WritePage(VMAnonymousCache* cache, page_num_t pageIndex, 203 swap_addr_t swapSlotIndex) 204 : 205 SwapTraceEntry(cache), 206 fPageIndex(pageIndex), 207 fSwapSlotIndex(swapSlotIndex) 208 { 209 Initialized(); 210 } 211 212 virtual void AddDump(TraceOutput& out) 213 { 214 out.Print("swap write: cache %p, page index: %lu -> swap slot: %lu", 215 fCache, fPageIndex, fSwapSlotIndex); 216 } 217 218 private: 219 page_num_t fPageIndex; 220 swap_addr_t fSwapSlotIndex; 221 }; 222 223 } // namespace SwapTracing 224 225 # define T(x) new(std::nothrow) SwapTracing::x; 226 #else 227 # define T(x) ; 228 #endif 229 230 231 static int 232 dump_swap_info(int argc, char** argv) 233 { 234 swap_addr_t totalSwapPages = 0; 235 swap_addr_t freeSwapPages = 0; 236 237 kprintf("swap files:\n"); 238 239 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 240 swap_file* file = it.Next();) { 241 swap_addr_t total = file->last_slot - file->first_slot; 242 kprintf(" vnode: %p, pages: total: %" B_PRIu32 ", free: %" B_PRIu32 243 "\n", file->vnode, total, file->bmp->free_slots); 244 245 totalSwapPages += total; 246 freeSwapPages += file->bmp->free_slots; 247 } 248 249 kprintf("\n"); 250 kprintf("swap space in pages:\n"); 251 kprintf("total: %9" B_PRIu32 "\n", totalSwapPages); 252 kprintf("available: %9" B_PRIdOFF "\n", sAvailSwapSpace / B_PAGE_SIZE); 253 kprintf("reserved: %9" B_PRIdOFF "\n", 254 totalSwapPages - sAvailSwapSpace / B_PAGE_SIZE); 255 kprintf("used: %9" B_PRIu32 "\n", totalSwapPages - freeSwapPages); 256 kprintf("free: %9" B_PRIu32 "\n", freeSwapPages); 257 258 return 0; 259 } 260 261 262 static swap_addr_t 263 swap_slot_alloc(uint32 count) 264 { 265 mutex_lock(&sSwapFileListLock); 266 267 if (sSwapFileList.IsEmpty()) { 268 mutex_unlock(&sSwapFileListLock); 269 panic("swap_slot_alloc(): no swap file in the system\n"); 270 return SWAP_SLOT_NONE; 271 } 272 273 // since radix bitmap could not handle more than 32 pages, we return 274 // SWAP_SLOT_NONE, this forces Write() adjust allocation amount 275 if (count > BITMAP_RADIX) { 276 mutex_unlock(&sSwapFileListLock); 277 return SWAP_SLOT_NONE; 278 } 279 280 swap_addr_t j, addr = SWAP_SLOT_NONE; 281 for (j = 0; j < sSwapFileCount; j++) { 282 if (sSwapFileAlloc == NULL) 283 sSwapFileAlloc = sSwapFileList.First(); 284 285 addr = radix_bitmap_alloc(sSwapFileAlloc->bmp, count); 286 if (addr != SWAP_SLOT_NONE) { 287 addr += sSwapFileAlloc->first_slot; 288 break; 289 } 290 291 // this swap_file is full, find another 292 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 293 } 294 295 if (j == sSwapFileCount) { 296 mutex_unlock(&sSwapFileListLock); 297 panic("swap_slot_alloc: swap space exhausted!\n"); 298 return SWAP_SLOT_NONE; 299 } 300 301 // if this swap file has used more than 90% percent of its space 302 // switch to another 303 if (sSwapFileAlloc->bmp->free_slots 304 < (sSwapFileAlloc->last_slot - sSwapFileAlloc->first_slot) / 10) { 305 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 306 } 307 308 mutex_unlock(&sSwapFileListLock); 309 310 return addr; 311 } 312 313 314 static swap_file* 315 find_swap_file(swap_addr_t slotIndex) 316 { 317 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 318 swap_file* swapFile = it.Next();) { 319 if (slotIndex >= swapFile->first_slot 320 && slotIndex < swapFile->last_slot) { 321 return swapFile; 322 } 323 } 324 325 panic("find_swap_file(): can't find swap file for slot %" B_PRIu32 "\n", 326 slotIndex); 327 return NULL; 328 } 329 330 331 static void 332 swap_slot_dealloc(swap_addr_t slotIndex, uint32 count) 333 { 334 if (slotIndex == SWAP_SLOT_NONE) 335 return; 336 337 mutex_lock(&sSwapFileListLock); 338 swap_file* swapFile = find_swap_file(slotIndex); 339 slotIndex -= swapFile->first_slot; 340 radix_bitmap_dealloc(swapFile->bmp, slotIndex, count); 341 mutex_unlock(&sSwapFileListLock); 342 } 343 344 345 static off_t 346 swap_space_reserve(off_t amount) 347 { 348 mutex_lock(&sAvailSwapSpaceLock); 349 if (sAvailSwapSpace >= amount) 350 sAvailSwapSpace -= amount; 351 else { 352 amount = sAvailSwapSpace; 353 sAvailSwapSpace = 0; 354 } 355 mutex_unlock(&sAvailSwapSpaceLock); 356 357 return amount; 358 } 359 360 361 static void 362 swap_space_unreserve(off_t amount) 363 { 364 mutex_lock(&sAvailSwapSpaceLock); 365 sAvailSwapSpace += amount; 366 mutex_unlock(&sAvailSwapSpaceLock); 367 } 368 369 370 static void 371 swap_hash_resizer(void*, int) 372 { 373 WriteLocker locker(sSwapHashLock); 374 375 size_t size; 376 void* allocation; 377 378 do { 379 size = sSwapHashTable.ResizeNeeded(); 380 if (size == 0) 381 return; 382 383 locker.Unlock(); 384 385 allocation = malloc(size); 386 if (allocation == NULL) 387 return; 388 389 locker.Lock(); 390 391 } while (!sSwapHashTable.Resize(allocation, size)); 392 } 393 394 395 // #pragma mark - 396 397 398 class VMAnonymousCache::WriteCallback : public StackableAsyncIOCallback { 399 public: 400 WriteCallback(VMAnonymousCache* cache, AsyncIOCallback* callback) 401 : 402 StackableAsyncIOCallback(callback), 403 fCache(cache) 404 { 405 } 406 407 void SetTo(page_num_t pageIndex, swap_addr_t slotIndex, bool newSlot) 408 { 409 fPageIndex = pageIndex; 410 fSlotIndex = slotIndex; 411 fNewSlot = newSlot; 412 } 413 414 virtual void IOFinished(status_t status, bool partialTransfer, 415 generic_size_t bytesTransferred) 416 { 417 if (fNewSlot) { 418 if (status == B_OK) { 419 fCache->_SwapBlockBuild(fPageIndex, fSlotIndex, 1); 420 } else { 421 AutoLocker<VMCache> locker(fCache); 422 fCache->fAllocatedSwapSize -= B_PAGE_SIZE; 423 locker.Unlock(); 424 425 swap_slot_dealloc(fSlotIndex, 1); 426 } 427 } 428 429 fNextCallback->IOFinished(status, partialTransfer, bytesTransferred); 430 431 delete this; 432 } 433 434 private: 435 VMAnonymousCache* fCache; 436 page_num_t fPageIndex; 437 swap_addr_t fSlotIndex; 438 bool fNewSlot; 439 }; 440 441 442 // #pragma mark - 443 444 445 VMAnonymousCache::~VMAnonymousCache() 446 { 447 // free allocated swap space and swap block 448 for (off_t offset = virtual_base, toFree = fAllocatedSwapSize; 449 offset < virtual_end && toFree > 0; offset += B_PAGE_SIZE) { 450 swap_addr_t slotIndex = _SwapBlockGetAddress(offset >> PAGE_SHIFT); 451 if (slotIndex == SWAP_SLOT_NONE) 452 continue; 453 454 swap_slot_dealloc(slotIndex, 1); 455 _SwapBlockFree(offset >> PAGE_SHIFT, 1); 456 toFree -= B_PAGE_SIZE; 457 } 458 459 swap_space_unreserve(fCommittedSwapSize); 460 if (committed_size > fCommittedSwapSize) 461 vm_unreserve_memory(committed_size - fCommittedSwapSize); 462 } 463 464 465 status_t 466 VMAnonymousCache::Init(bool canOvercommit, int32 numPrecommittedPages, 467 int32 numGuardPages, uint32 allocationFlags) 468 { 469 TRACE("%p->VMAnonymousCache::Init(canOvercommit = %s, " 470 "numPrecommittedPages = %" B_PRId32 ", numGuardPages = %" B_PRId32 471 ")\n", this, canOvercommit ? "yes" : "no", numPrecommittedPages, 472 numGuardPages); 473 474 status_t error = VMCache::Init(CACHE_TYPE_RAM, allocationFlags); 475 if (error != B_OK) 476 return error; 477 478 fCanOvercommit = canOvercommit; 479 fHasPrecommitted = false; 480 fPrecommittedPages = min_c(numPrecommittedPages, 255); 481 fGuardedSize = numGuardPages * B_PAGE_SIZE; 482 fCommittedSwapSize = 0; 483 fAllocatedSwapSize = 0; 484 485 return B_OK; 486 } 487 488 489 status_t 490 VMAnonymousCache::Resize(off_t newSize, int priority) 491 { 492 // If the cache size shrinks, drop all swap pages beyond the new size. 493 if (fAllocatedSwapSize > 0) { 494 off_t oldPageCount = (virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 495 swap_block* swapBlock = NULL; 496 497 for (off_t pageIndex = (newSize + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 498 pageIndex < oldPageCount && fAllocatedSwapSize > 0; pageIndex++) { 499 500 WriteLocker locker(sSwapHashLock); 501 502 // Get the swap slot index for the page. 503 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 504 if (swapBlock == NULL || blockIndex == 0) { 505 swap_hash_key key = { this, pageIndex }; 506 swapBlock = sSwapHashTable.Lookup(key); 507 508 if (swapBlock == NULL) { 509 pageIndex = ROUNDUP(pageIndex + 1, SWAP_BLOCK_PAGES); 510 continue; 511 } 512 } 513 514 swap_addr_t slotIndex = swapBlock->swap_slots[blockIndex]; 515 vm_page* page; 516 if (slotIndex != SWAP_SLOT_NONE 517 && ((page = LookupPage((off_t)pageIndex * B_PAGE_SIZE)) == NULL 518 || !page->busy)) { 519 // TODO: We skip (i.e. leak) swap space of busy pages, since 520 // there could be I/O going on (paging in/out). Waiting is 521 // not an option as 1. unlocking the cache means that new 522 // swap pages could be added in a range we've already 523 // cleared (since the cache still has the old size) and 2. 524 // we'd risk a deadlock in case we come from the file cache 525 // and the FS holds the node's write-lock. We should mark 526 // the page invalid and let the one responsible clean up. 527 // There's just no such mechanism yet. 528 swap_slot_dealloc(slotIndex, 1); 529 fAllocatedSwapSize -= B_PAGE_SIZE; 530 531 swapBlock->swap_slots[blockIndex] = SWAP_SLOT_NONE; 532 if (--swapBlock->used == 0) { 533 // All swap pages have been freed -- we can discard the swap 534 // block. 535 sSwapHashTable.RemoveUnchecked(swapBlock); 536 object_cache_free(sSwapBlockCache, swapBlock, 537 CACHE_DONT_WAIT_FOR_MEMORY 538 | CACHE_DONT_LOCK_KERNEL_SPACE); 539 } 540 } 541 } 542 } 543 544 return VMCache::Resize(newSize, priority); 545 } 546 547 548 status_t 549 VMAnonymousCache::Commit(off_t size, int priority) 550 { 551 TRACE("%p->VMAnonymousCache::Commit(%" B_PRIdOFF ")\n", this, size); 552 553 // If we can overcommit, we don't commit here, but in Fault(). We always 554 // unreserve memory, if we're asked to shrink our commitment, though. 555 if (fCanOvercommit && size > committed_size) { 556 if (fHasPrecommitted) 557 return B_OK; 558 559 // pre-commit some pages to make a later failure less probable 560 fHasPrecommitted = true; 561 uint32 precommitted = fPrecommittedPages * B_PAGE_SIZE; 562 if (size > precommitted) 563 size = precommitted; 564 } 565 566 return _Commit(size, priority); 567 } 568 569 570 bool 571 VMAnonymousCache::HasPage(off_t offset) 572 { 573 if (_SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE) 574 return true; 575 576 return false; 577 } 578 579 580 bool 581 VMAnonymousCache::DebugHasPage(off_t offset) 582 { 583 off_t pageIndex = offset >> PAGE_SHIFT; 584 swap_hash_key key = { this, pageIndex }; 585 swap_block* swap = sSwapHashTable.Lookup(key); 586 if (swap == NULL) 587 return false; 588 589 return swap->swap_slots[pageIndex & SWAP_BLOCK_MASK] != SWAP_SLOT_NONE; 590 } 591 592 593 status_t 594 VMAnonymousCache::Read(off_t offset, const generic_io_vec* vecs, size_t count, 595 uint32 flags, generic_size_t* _numBytes) 596 { 597 off_t pageIndex = offset >> PAGE_SHIFT; 598 599 for (uint32 i = 0, j = 0; i < count; i = j) { 600 swap_addr_t startSlotIndex = _SwapBlockGetAddress(pageIndex + i); 601 for (j = i + 1; j < count; j++) { 602 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + j); 603 if (slotIndex != startSlotIndex + j - i) 604 break; 605 } 606 607 T(ReadPage(this, pageIndex, startSlotIndex)); 608 // TODO: Assumes that only one page is read. 609 610 swap_file* swapFile = find_swap_file(startSlotIndex); 611 612 off_t pos = (off_t)(startSlotIndex - swapFile->first_slot) 613 * B_PAGE_SIZE; 614 615 status_t status = vfs_read_pages(swapFile->vnode, swapFile->cookie, pos, 616 vecs + i, j - i, flags, _numBytes); 617 if (status != B_OK) 618 return status; 619 } 620 621 return B_OK; 622 } 623 624 625 status_t 626 VMAnonymousCache::Write(off_t offset, const generic_io_vec* vecs, size_t count, 627 uint32 flags, generic_size_t* _numBytes) 628 { 629 off_t pageIndex = offset >> PAGE_SHIFT; 630 631 AutoLocker<VMCache> locker(this); 632 633 page_num_t totalPages = 0; 634 for (uint32 i = 0; i < count; i++) { 635 page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 636 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + totalPages); 637 if (slotIndex != SWAP_SLOT_NONE) { 638 swap_slot_dealloc(slotIndex, pageCount); 639 _SwapBlockFree(pageIndex + totalPages, pageCount); 640 fAllocatedSwapSize -= pageCount * B_PAGE_SIZE; 641 } 642 643 totalPages += pageCount; 644 } 645 646 off_t totalSize = totalPages * B_PAGE_SIZE; 647 if (fAllocatedSwapSize + totalSize > fCommittedSwapSize) 648 return B_ERROR; 649 650 fAllocatedSwapSize += totalSize; 651 locker.Unlock(); 652 653 page_num_t pagesLeft = totalPages; 654 totalPages = 0; 655 656 for (uint32 i = 0; i < count; i++) { 657 page_num_t pageCount = (vecs[i].length + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 658 659 generic_addr_t vectorBase = vecs[i].base; 660 generic_size_t vectorLength = vecs[i].length; 661 page_num_t n = pageCount; 662 663 for (page_num_t j = 0; j < pageCount; j += n) { 664 swap_addr_t slotIndex; 665 // try to allocate n slots, if fail, try to allocate n/2 666 while ((slotIndex = swap_slot_alloc(n)) == SWAP_SLOT_NONE && n >= 2) 667 n >>= 1; 668 669 if (slotIndex == SWAP_SLOT_NONE) 670 panic("VMAnonymousCache::Write(): can't allocate swap space\n"); 671 672 T(WritePage(this, pageIndex, slotIndex)); 673 // TODO: Assumes that only one page is written. 674 675 swap_file* swapFile = find_swap_file(slotIndex); 676 677 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 678 679 generic_size_t length = (phys_addr_t)n * B_PAGE_SIZE; 680 generic_io_vec vector[1]; 681 vector->base = vectorBase; 682 vector->length = length; 683 684 status_t status = vfs_write_pages(swapFile->vnode, swapFile->cookie, 685 pos, vector, 1, flags, &length); 686 if (status != B_OK) { 687 locker.Lock(); 688 fAllocatedSwapSize -= (off_t)pagesLeft * B_PAGE_SIZE; 689 locker.Unlock(); 690 691 swap_slot_dealloc(slotIndex, n); 692 return status; 693 } 694 695 _SwapBlockBuild(pageIndex + totalPages, slotIndex, n); 696 pagesLeft -= n; 697 698 if (n != pageCount) { 699 vectorBase = vectorBase + n * B_PAGE_SIZE; 700 vectorLength -= n * B_PAGE_SIZE; 701 } 702 } 703 704 totalPages += pageCount; 705 } 706 707 ASSERT(pagesLeft == 0); 708 return B_OK; 709 } 710 711 712 status_t 713 VMAnonymousCache::WriteAsync(off_t offset, const generic_io_vec* vecs, 714 size_t count, generic_size_t numBytes, uint32 flags, 715 AsyncIOCallback* _callback) 716 { 717 // TODO: Currently this method is only used for single pages. Either make 718 // more flexible use of it or change the interface! 719 // This implementation relies on the current usage! 720 ASSERT(count == 1); 721 ASSERT(numBytes <= B_PAGE_SIZE); 722 723 page_num_t pageIndex = offset >> PAGE_SHIFT; 724 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex); 725 bool newSlot = slotIndex == SWAP_SLOT_NONE; 726 727 // If the page doesn't have any swap space yet, allocate it. 728 if (newSlot) { 729 AutoLocker<VMCache> locker(this); 730 if (fAllocatedSwapSize + B_PAGE_SIZE > fCommittedSwapSize) { 731 _callback->IOFinished(B_ERROR, true, 0); 732 return B_ERROR; 733 } 734 735 fAllocatedSwapSize += B_PAGE_SIZE; 736 737 slotIndex = swap_slot_alloc(1); 738 } 739 740 // create our callback 741 WriteCallback* callback = (flags & B_VIP_IO_REQUEST) != 0 742 ? new(malloc_flags(HEAP_PRIORITY_VIP)) WriteCallback(this, _callback) 743 : new(std::nothrow) WriteCallback(this, _callback); 744 if (callback == NULL) { 745 if (newSlot) { 746 AutoLocker<VMCache> locker(this); 747 fAllocatedSwapSize -= B_PAGE_SIZE; 748 locker.Unlock(); 749 750 swap_slot_dealloc(slotIndex, 1); 751 } 752 _callback->IOFinished(B_NO_MEMORY, true, 0); 753 return B_NO_MEMORY; 754 } 755 // TODO: If the page already had swap space assigned, we don't need an own 756 // callback. 757 758 callback->SetTo(pageIndex, slotIndex, newSlot); 759 760 T(WritePage(this, pageIndex, slotIndex)); 761 762 // write the page asynchrounously 763 swap_file* swapFile = find_swap_file(slotIndex); 764 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 765 766 return vfs_asynchronous_write_pages(swapFile->vnode, swapFile->cookie, pos, 767 vecs, 1, numBytes, flags, callback); 768 } 769 770 771 bool 772 VMAnonymousCache::CanWritePage(off_t offset) 773 { 774 // We can write the page, if we have not used all of our committed swap 775 // space or the page already has a swap slot assigned. 776 return fAllocatedSwapSize < fCommittedSwapSize 777 || _SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE; 778 } 779 780 781 int32 782 VMAnonymousCache::MaxPagesPerAsyncWrite() const 783 { 784 return 1; 785 } 786 787 788 status_t 789 VMAnonymousCache::Fault(struct VMAddressSpace* aspace, off_t offset) 790 { 791 if (fGuardedSize > 0) { 792 uint32 guardOffset; 793 794 #ifdef STACK_GROWS_DOWNWARDS 795 guardOffset = 0; 796 #elif defined(STACK_GROWS_UPWARDS) 797 guardOffset = virtual_size - fGuardedSize; 798 #else 799 # error Stack direction has not been defined in arch_config.h 800 #endif 801 // report stack fault, guard page hit! 802 if (offset >= guardOffset && offset < guardOffset + fGuardedSize) { 803 TRACE(("stack overflow!\n")); 804 return B_BAD_ADDRESS; 805 } 806 } 807 808 if (fCanOvercommit && LookupPage(offset) == NULL && !HasPage(offset)) { 809 if (fPrecommittedPages == 0) { 810 // never commit more than needed 811 if (committed_size / B_PAGE_SIZE > page_count) 812 return B_BAD_HANDLER; 813 814 // try to commit additional swap space/memory 815 if (swap_space_reserve(B_PAGE_SIZE) == B_PAGE_SIZE) { 816 fCommittedSwapSize += B_PAGE_SIZE; 817 } else { 818 int priority = aspace == VMAddressSpace::Kernel() 819 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 820 if (vm_try_reserve_memory(B_PAGE_SIZE, priority, 0) != B_OK) { 821 dprintf("%p->VMAnonymousCache::Fault(): Failed to reserve " 822 "%d bytes of RAM.\n", this, (int)B_PAGE_SIZE); 823 return B_NO_MEMORY; 824 } 825 } 826 827 committed_size += B_PAGE_SIZE; 828 } else 829 fPrecommittedPages--; 830 } 831 832 // This will cause vm_soft_fault() to handle the fault 833 return B_BAD_HANDLER; 834 } 835 836 837 void 838 VMAnonymousCache::Merge(VMCache* _source) 839 { 840 VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source); 841 if (source == NULL) { 842 panic("VMAnonymousCache::MergeStore(): merge with incompatible cache " 843 "%p requested", _source); 844 return; 845 } 846 847 // take over the source' committed size 848 fCommittedSwapSize += source->fCommittedSwapSize; 849 source->fCommittedSwapSize = 0; 850 committed_size += source->committed_size; 851 source->committed_size = 0; 852 853 off_t actualSize = virtual_end - virtual_base; 854 if (committed_size > actualSize) 855 _Commit(actualSize, VM_PRIORITY_USER); 856 857 // Move all not shadowed swap pages from the source to the consumer cache. 858 // Also remove all source pages that are shadowed by consumer swap pages. 859 _MergeSwapPages(source); 860 861 // Move all not shadowed pages from the source to the consumer cache. 862 if (source->page_count < page_count) 863 _MergePagesSmallerSource(source); 864 else 865 _MergePagesSmallerConsumer(source); 866 } 867 868 869 void 870 VMAnonymousCache::DeleteObject() 871 { 872 object_cache_delete(gAnonymousCacheObjectCache, this); 873 } 874 875 876 void 877 VMAnonymousCache::_SwapBlockBuild(off_t startPageIndex, 878 swap_addr_t startSlotIndex, uint32 count) 879 { 880 WriteLocker locker(sSwapHashLock); 881 882 uint32 left = count; 883 for (uint32 i = 0, j = 0; i < count; i += j) { 884 off_t pageIndex = startPageIndex + i; 885 swap_addr_t slotIndex = startSlotIndex + i; 886 887 swap_hash_key key = { this, pageIndex }; 888 889 swap_block* swap = sSwapHashTable.Lookup(key); 890 while (swap == NULL) { 891 swap = (swap_block*)object_cache_alloc(sSwapBlockCache, 892 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 893 if (swap == NULL) { 894 // Wait a short time until memory is available again. 895 locker.Unlock(); 896 snooze(10000); 897 locker.Lock(); 898 swap = sSwapHashTable.Lookup(key); 899 continue; 900 } 901 902 swap->key.cache = this; 903 swap->key.page_index = pageIndex & ~(off_t)SWAP_BLOCK_MASK; 904 swap->used = 0; 905 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) 906 swap->swap_slots[i] = SWAP_SLOT_NONE; 907 908 sSwapHashTable.InsertUnchecked(swap); 909 } 910 911 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 912 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 913 swap->swap_slots[blockIndex++] = slotIndex + j; 914 left--; 915 } 916 917 swap->used += j; 918 } 919 } 920 921 922 void 923 VMAnonymousCache::_SwapBlockFree(off_t startPageIndex, uint32 count) 924 { 925 WriteLocker locker(sSwapHashLock); 926 927 uint32 left = count; 928 for (uint32 i = 0, j = 0; i < count; i += j) { 929 off_t pageIndex = startPageIndex + i; 930 swap_hash_key key = { this, pageIndex }; 931 swap_block* swap = sSwapHashTable.Lookup(key); 932 933 ASSERT(swap != NULL); 934 935 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 936 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 937 swap->swap_slots[blockIndex++] = SWAP_SLOT_NONE; 938 left--; 939 } 940 941 swap->used -= j; 942 if (swap->used == 0) { 943 sSwapHashTable.RemoveUnchecked(swap); 944 object_cache_free(sSwapBlockCache, swap, 945 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 946 } 947 } 948 } 949 950 951 swap_addr_t 952 VMAnonymousCache::_SwapBlockGetAddress(off_t pageIndex) 953 { 954 ReadLocker locker(sSwapHashLock); 955 956 swap_hash_key key = { this, pageIndex }; 957 swap_block* swap = sSwapHashTable.Lookup(key); 958 swap_addr_t slotIndex = SWAP_SLOT_NONE; 959 960 if (swap != NULL) { 961 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 962 slotIndex = swap->swap_slots[blockIndex]; 963 } 964 965 return slotIndex; 966 } 967 968 969 status_t 970 VMAnonymousCache::_Commit(off_t size, int priority) 971 { 972 TRACE("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "), already committed: " 973 "%" B_PRIdOFF " (%" B_PRIdOFF " swap)\n", this, size, committed_size, 974 fCommittedSwapSize); 975 976 // Basic strategy: reserve swap space first, only when running out of swap 977 // space, reserve real memory. 978 979 off_t committedMemory = committed_size - fCommittedSwapSize; 980 981 // Regardless of whether we're asked to grow or shrink the commitment, 982 // we always try to reserve as much as possible of the final commitment 983 // in the swap space. 984 if (size > fCommittedSwapSize) { 985 fCommittedSwapSize += swap_space_reserve(size - fCommittedSwapSize); 986 committed_size = fCommittedSwapSize + committedMemory; 987 if (size > fCommittedSwapSize) { 988 TRACE("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "), reserved " 989 "only %" B_PRIdOFF " swap\n", this, size, fCommittedSwapSize); 990 } 991 } 992 993 if (committed_size == size) 994 return B_OK; 995 996 if (committed_size > size) { 997 // The commitment shrinks -- unreserve real memory first. 998 off_t toUnreserve = committed_size - size; 999 if (committedMemory > 0) { 1000 off_t unreserved = min_c(toUnreserve, committedMemory); 1001 vm_unreserve_memory(unreserved); 1002 committedMemory -= unreserved; 1003 committed_size -= unreserved; 1004 toUnreserve -= unreserved; 1005 } 1006 1007 // Unreserve swap space. 1008 if (toUnreserve > 0) { 1009 swap_space_unreserve(toUnreserve); 1010 fCommittedSwapSize -= toUnreserve; 1011 committed_size -= toUnreserve; 1012 } 1013 1014 return B_OK; 1015 } 1016 1017 // The commitment grows -- we have already tried to reserve swap space at 1018 // the start of the method, so we try to reserve real memory, now. 1019 1020 off_t toReserve = size - committed_size; 1021 if (vm_try_reserve_memory(toReserve, priority, 1000000) != B_OK) { 1022 dprintf("%p->VMAnonymousCache::_Commit(%" B_PRIdOFF "): Failed to " 1023 "reserve %" B_PRIdOFF " bytes of RAM\n", this, size, toReserve); 1024 return B_NO_MEMORY; 1025 } 1026 1027 committed_size = size; 1028 return B_OK; 1029 } 1030 1031 1032 void 1033 VMAnonymousCache::_MergePagesSmallerSource(VMAnonymousCache* source) 1034 { 1035 // The source cache has less pages than the consumer (this cache), so we 1036 // iterate through the source's pages and move the ones that are not 1037 // shadowed up to the consumer. 1038 1039 for (VMCachePagesTree::Iterator it = source->pages.GetIterator(); 1040 vm_page* page = it.Next();) { 1041 // Note: Removing the current node while iterating through a 1042 // IteratableSplayTree is safe. 1043 vm_page* consumerPage = LookupPage( 1044 (off_t)page->cache_offset << PAGE_SHIFT); 1045 if (consumerPage == NULL) { 1046 // the page is not yet in the consumer cache - move it upwards 1047 ASSERT_PRINT(!page->busy, "page: %p", page); 1048 MovePage(page); 1049 } 1050 } 1051 } 1052 1053 1054 void 1055 VMAnonymousCache::_MergePagesSmallerConsumer(VMAnonymousCache* source) 1056 { 1057 // The consumer (this cache) has less pages than the source, so we move the 1058 // consumer's pages to the source (freeing shadowed ones) and finally just 1059 // all pages of the source back to the consumer. 1060 1061 for (VMCachePagesTree::Iterator it = pages.GetIterator(); 1062 vm_page* page = it.Next();) { 1063 // If a source page is in the way, remove and free it. 1064 vm_page* sourcePage = source->LookupPage( 1065 (off_t)page->cache_offset << PAGE_SHIFT); 1066 if (sourcePage != NULL) { 1067 DEBUG_PAGE_ACCESS_START(sourcePage); 1068 ASSERT_PRINT(!sourcePage->busy, "page: %p", sourcePage); 1069 ASSERT_PRINT(sourcePage->WiredCount() == 0 1070 && sourcePage->mappings.IsEmpty(), 1071 "sourcePage: %p, page: %p", sourcePage, page); 1072 source->RemovePage(sourcePage); 1073 vm_page_free(source, sourcePage); 1074 } 1075 1076 // Note: Removing the current node while iterating through a 1077 // IteratableSplayTree is safe. 1078 source->MovePage(page); 1079 } 1080 1081 MoveAllPages(source); 1082 } 1083 1084 1085 void 1086 VMAnonymousCache::_MergeSwapPages(VMAnonymousCache* source) 1087 { 1088 // If neither source nor consumer have swap pages, we don't have to do 1089 // anything. 1090 if (source->fAllocatedSwapSize == 0 && fAllocatedSwapSize == 0) 1091 return; 1092 1093 for (off_t offset = source->virtual_base 1094 & ~(off_t)(B_PAGE_SIZE * SWAP_BLOCK_PAGES - 1); 1095 offset < source->virtual_end; 1096 offset += B_PAGE_SIZE * SWAP_BLOCK_PAGES) { 1097 1098 WriteLocker locker(sSwapHashLock); 1099 1100 off_t swapBlockPageIndex = offset >> PAGE_SHIFT; 1101 swap_hash_key key = { source, swapBlockPageIndex }; 1102 swap_block* sourceSwapBlock = sSwapHashTable.Lookup(key); 1103 1104 // remove the source swap block -- we will either take over the swap 1105 // space (and the block) or free it 1106 if (sourceSwapBlock != NULL) 1107 sSwapHashTable.RemoveUnchecked(sourceSwapBlock); 1108 1109 key.cache = this; 1110 swap_block* swapBlock = sSwapHashTable.Lookup(key); 1111 1112 locker.Unlock(); 1113 1114 // remove all source pages that are shadowed by consumer swap pages 1115 if (swapBlock != NULL) { 1116 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1117 if (swapBlock->swap_slots[i] != SWAP_SLOT_NONE) { 1118 vm_page* page = source->LookupPage( 1119 (off_t)(swapBlockPageIndex + i) << PAGE_SHIFT); 1120 if (page != NULL) { 1121 DEBUG_PAGE_ACCESS_START(page); 1122 ASSERT_PRINT(!page->busy, "page: %p", page); 1123 source->RemovePage(page); 1124 vm_page_free(source, page); 1125 } 1126 } 1127 } 1128 } 1129 1130 if (sourceSwapBlock == NULL) 1131 continue; 1132 1133 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1134 off_t pageIndex = swapBlockPageIndex + i; 1135 swap_addr_t sourceSlotIndex = sourceSwapBlock->swap_slots[i]; 1136 1137 if (sourceSlotIndex == SWAP_SLOT_NONE) 1138 continue; 1139 1140 if ((swapBlock != NULL 1141 && swapBlock->swap_slots[i] != SWAP_SLOT_NONE) 1142 || LookupPage((off_t)pageIndex << PAGE_SHIFT) != NULL) { 1143 // The consumer already has a page or a swapped out page 1144 // at this index. So we can free the source swap space. 1145 swap_slot_dealloc(sourceSlotIndex, 1); 1146 sourceSwapBlock->swap_slots[i] = SWAP_SLOT_NONE; 1147 sourceSwapBlock->used--; 1148 } 1149 1150 // We've either freed the source swap page or are going to move it 1151 // to the consumer. At any rate, the source cache doesn't own it 1152 // anymore. 1153 source->fAllocatedSwapSize -= B_PAGE_SIZE; 1154 } 1155 1156 // All source swap pages that have not been freed yet are taken over by 1157 // the consumer. 1158 fAllocatedSwapSize += B_PAGE_SIZE * (off_t)sourceSwapBlock->used; 1159 1160 if (sourceSwapBlock->used == 0) { 1161 // All swap pages have been freed -- we can discard the source swap 1162 // block. 1163 object_cache_free(sSwapBlockCache, sourceSwapBlock, 1164 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1165 } else if (swapBlock == NULL) { 1166 // We need to take over some of the source's swap pages and there's 1167 // no swap block in the consumer cache. Just take over the source 1168 // swap block. 1169 sourceSwapBlock->key.cache = this; 1170 locker.Lock(); 1171 sSwapHashTable.InsertUnchecked(sourceSwapBlock); 1172 locker.Unlock(); 1173 } else { 1174 // We need to take over some of the source's swap pages and there's 1175 // already a swap block in the consumer cache. Copy the respective 1176 // swap addresses and discard the source swap block. 1177 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1178 if (sourceSwapBlock->swap_slots[i] != SWAP_SLOT_NONE) 1179 swapBlock->swap_slots[i] = sourceSwapBlock->swap_slots[i]; 1180 } 1181 1182 object_cache_free(sSwapBlockCache, sourceSwapBlock, 1183 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1184 } 1185 } 1186 } 1187 1188 1189 // #pragma mark - 1190 1191 1192 // TODO: This can be removed if we get BFS uuid's 1193 struct VolumeInfo { 1194 char name[B_FILE_NAME_LENGTH]; 1195 char device[B_FILE_NAME_LENGTH]; 1196 char filesystem[B_OS_NAME_LENGTH]; 1197 off_t capacity; 1198 }; 1199 1200 1201 class PartitionScorer : public KPartitionVisitor { 1202 public: 1203 PartitionScorer(VolumeInfo& volumeInfo) 1204 : 1205 fBestPartition(NULL), 1206 fBestScore(-1), 1207 fVolumeInfo(volumeInfo) 1208 { 1209 } 1210 1211 virtual bool VisitPre(KPartition* partition) 1212 { 1213 if (!partition->ContainsFileSystem()) 1214 return false; 1215 1216 KPath path; 1217 partition->GetPath(&path); 1218 1219 int score = 0; 1220 if (strcmp(fVolumeInfo.name, partition->ContentName()) == 0) 1221 score += 4; 1222 if (strcmp(fVolumeInfo.device, path.Path()) == 0) 1223 score += 3; 1224 if (fVolumeInfo.capacity == partition->Size()) 1225 score += 2; 1226 if (strcmp(fVolumeInfo.filesystem, 1227 partition->DiskSystem()->ShortName()) == 0) { 1228 score += 1; 1229 } 1230 if (score >= 4 && score > fBestScore) { 1231 fBestPartition = partition; 1232 fBestScore = score; 1233 } 1234 1235 return false; 1236 } 1237 1238 KPartition* fBestPartition; 1239 1240 private: 1241 int32 fBestScore; 1242 VolumeInfo& fVolumeInfo; 1243 }; 1244 1245 1246 status_t 1247 get_mount_point(KPartition* partition, KPath* mountPoint) 1248 { 1249 if (!mountPoint || !partition->ContainsFileSystem()) 1250 return B_BAD_VALUE; 1251 1252 int nameLength = 0; 1253 const char* volumeName = partition->ContentName(); 1254 if (volumeName != NULL) 1255 nameLength = strlen(volumeName); 1256 if (nameLength == 0) { 1257 volumeName = partition->Name(); 1258 if (volumeName != NULL) 1259 nameLength = strlen(volumeName); 1260 if (nameLength == 0) { 1261 volumeName = "unnamed volume"; 1262 nameLength = strlen(volumeName); 1263 } 1264 } 1265 1266 BStackOrHeapArray<char, 128> basePath(nameLength + 1); 1267 if (!basePath.IsValid()) 1268 return B_NO_MEMORY; 1269 int32 len = snprintf(basePath, nameLength + 1, "/%s", volumeName); 1270 for (int32 i = 1; i < len; i++) 1271 if (basePath[i] == '/') 1272 basePath[i] = '-'; 1273 char* path = mountPoint->LockBuffer(); 1274 int32 pathLen = mountPoint->BufferSize(); 1275 strncpy(path, basePath, pathLen); 1276 1277 struct stat dummy; 1278 for (int i = 1; ; i++) { 1279 if (stat(path, &dummy) != 0) 1280 break; 1281 snprintf(path, pathLen, "%s%d", (char*)basePath, i); 1282 } 1283 1284 mountPoint->UnlockBuffer(); 1285 return B_OK; 1286 } 1287 1288 1289 status_t 1290 swap_file_add(const char* path) 1291 { 1292 // open the file 1293 int fd = open(path, O_RDWR | O_NOCACHE, S_IRUSR | S_IWUSR); 1294 if (fd < 0) 1295 return errno; 1296 1297 // fstat() it and check whether we can use it 1298 struct stat st; 1299 if (fstat(fd, &st) < 0) { 1300 close(fd); 1301 return errno; 1302 } 1303 1304 if (!(S_ISREG(st.st_mode) || S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) { 1305 close(fd); 1306 return B_BAD_VALUE; 1307 } 1308 1309 if (st.st_size < B_PAGE_SIZE) { 1310 close(fd); 1311 return B_BAD_VALUE; 1312 } 1313 1314 // get file descriptor, vnode, and cookie 1315 file_descriptor* descriptor = get_fd(get_current_io_context(true), fd); 1316 put_fd(descriptor); 1317 1318 vnode* node = fd_vnode(descriptor); 1319 if (node == NULL) { 1320 close(fd); 1321 return B_BAD_VALUE; 1322 } 1323 1324 // do the allocations and prepare the swap_file structure 1325 swap_file* swap = (swap_file*)malloc(sizeof(swap_file)); 1326 if (swap == NULL) { 1327 close(fd); 1328 return B_NO_MEMORY; 1329 } 1330 1331 swap->fd = fd; 1332 swap->vnode = node; 1333 swap->cookie = descriptor->cookie; 1334 1335 uint32 pageCount = st.st_size >> PAGE_SHIFT; 1336 swap->bmp = radix_bitmap_create(pageCount); 1337 if (swap->bmp == NULL) { 1338 free(swap); 1339 close(fd); 1340 return B_NO_MEMORY; 1341 } 1342 1343 // set slot index and add this file to swap file list 1344 mutex_lock(&sSwapFileListLock); 1345 // TODO: Also check whether the swap file is already registered! 1346 if (sSwapFileList.IsEmpty()) { 1347 swap->first_slot = 0; 1348 swap->last_slot = pageCount; 1349 } else { 1350 // leave one page gap between two swap files 1351 swap->first_slot = sSwapFileList.Last()->last_slot + 1; 1352 swap->last_slot = swap->first_slot + pageCount; 1353 } 1354 sSwapFileList.Add(swap); 1355 sSwapFileCount++; 1356 mutex_unlock(&sSwapFileListLock); 1357 1358 mutex_lock(&sAvailSwapSpaceLock); 1359 sAvailSwapSpace += (off_t)pageCount * B_PAGE_SIZE; 1360 mutex_unlock(&sAvailSwapSpaceLock); 1361 1362 return B_OK; 1363 } 1364 1365 1366 status_t 1367 swap_file_delete(const char* path) 1368 { 1369 vnode* node = NULL; 1370 status_t status = vfs_get_vnode_from_path(path, true, &node); 1371 if (status != B_OK) 1372 return status; 1373 1374 MutexLocker locker(sSwapFileListLock); 1375 1376 swap_file* swapFile = NULL; 1377 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1378 (swapFile = it.Next()) != NULL;) { 1379 if (swapFile->vnode == node) 1380 break; 1381 } 1382 1383 vfs_put_vnode(node); 1384 1385 if (swapFile == NULL) 1386 return B_ERROR; 1387 1388 // if this file is currently used, we can't delete 1389 // TODO: mark this swap file deleting, and remove it after releasing 1390 // all the swap space 1391 if (swapFile->bmp->free_slots < swapFile->last_slot - swapFile->first_slot) 1392 return B_ERROR; 1393 1394 sSwapFileList.Remove(swapFile); 1395 sSwapFileCount--; 1396 locker.Unlock(); 1397 1398 mutex_lock(&sAvailSwapSpaceLock); 1399 sAvailSwapSpace -= (off_t)(swapFile->last_slot - swapFile->first_slot) 1400 * B_PAGE_SIZE; 1401 mutex_unlock(&sAvailSwapSpaceLock); 1402 1403 close(swapFile->fd); 1404 radix_bitmap_destroy(swapFile->bmp); 1405 free(swapFile); 1406 1407 return B_OK; 1408 } 1409 1410 1411 void 1412 swap_init(void) 1413 { 1414 // create swap block cache 1415 sSwapBlockCache = create_object_cache("swapblock", sizeof(swap_block), 1416 sizeof(void*), NULL, NULL, NULL); 1417 if (sSwapBlockCache == NULL) 1418 panic("swap_init(): can't create object cache for swap blocks\n"); 1419 1420 status_t error = object_cache_set_minimum_reserve(sSwapBlockCache, 1421 MIN_SWAP_BLOCK_RESERVE); 1422 if (error != B_OK) { 1423 panic("swap_init(): object_cache_set_minimum_reserve() failed: %s", 1424 strerror(error)); 1425 } 1426 1427 // init swap hash table 1428 sSwapHashTable.Init(INITIAL_SWAP_HASH_SIZE); 1429 rw_lock_init(&sSwapHashLock, "swaphash"); 1430 1431 error = register_resource_resizer(swap_hash_resizer, NULL, 1432 SWAP_HASH_RESIZE_INTERVAL); 1433 if (error != B_OK) { 1434 panic("swap_init(): Failed to register swap hash resizer: %s", 1435 strerror(error)); 1436 } 1437 1438 // init swap file list 1439 mutex_init(&sSwapFileListLock, "swaplist"); 1440 sSwapFileAlloc = NULL; 1441 sSwapFileCount = 0; 1442 1443 // init available swap space 1444 mutex_init(&sAvailSwapSpaceLock, "avail swap space"); 1445 sAvailSwapSpace = 0; 1446 1447 add_debugger_command_etc("swap", &dump_swap_info, 1448 "Print infos about the swap usage", 1449 "\n" 1450 "Print infos about the swap usage.\n", 0); 1451 } 1452 1453 1454 void 1455 swap_init_post_modules() 1456 { 1457 // Never try to create a swap file on a read-only device - when booting 1458 // from CD, the write overlay is used. 1459 if (gReadOnlyBootDevice) 1460 return; 1461 1462 bool swapEnabled = true; 1463 bool swapAutomatic = true; 1464 off_t swapSize = 0; 1465 1466 dev_t swapDeviceID = -1; 1467 VolumeInfo selectedVolume = {}; 1468 1469 void* settings = load_driver_settings("virtual_memory"); 1470 1471 if (settings != NULL) { 1472 // We pass a lot of information on the swap device, this is mostly to 1473 // ensure that we are dealing with the same device that was configured. 1474 1475 // TODO: Some kind of BFS uuid would be great here :) 1476 const char* enabled = get_driver_parameter(settings, "vm", NULL, NULL); 1477 1478 if (enabled != NULL) { 1479 swapEnabled = get_driver_boolean_parameter(settings, "vm", 1480 true, false); 1481 swapAutomatic = get_driver_boolean_parameter(settings, "swap_auto", 1482 true, false); 1483 1484 if (swapEnabled && !swapAutomatic) { 1485 const char* size = get_driver_parameter(settings, "swap_size", 1486 NULL, NULL); 1487 const char* volume = get_driver_parameter(settings, 1488 "swap_volume_name", NULL, NULL); 1489 const char* device = get_driver_parameter(settings, 1490 "swap_volume_device", NULL, NULL); 1491 const char* filesystem = get_driver_parameter(settings, 1492 "swap_volume_filesystem", NULL, NULL); 1493 const char* capacity = get_driver_parameter(settings, 1494 "swap_volume_capacity", NULL, NULL); 1495 1496 if (size != NULL && device != NULL && volume != NULL 1497 && filesystem != NULL && capacity != NULL) { 1498 // User specified a size / volume that seems valid 1499 swapAutomatic = false; 1500 swapSize = atoll(size); 1501 strlcpy(selectedVolume.name, volume, 1502 sizeof(selectedVolume.name)); 1503 strlcpy(selectedVolume.device, device, 1504 sizeof(selectedVolume.device)); 1505 strlcpy(selectedVolume.filesystem, filesystem, 1506 sizeof(selectedVolume.filesystem)); 1507 selectedVolume.capacity = atoll(capacity); 1508 } else { 1509 // Something isn't right with swap config, go auto 1510 swapAutomatic = true; 1511 dprintf("%s: virtual_memory configuration is invalid, " 1512 "using automatic swap\n", __func__); 1513 } 1514 } 1515 } 1516 unload_driver_settings(settings); 1517 } 1518 1519 if (swapAutomatic) { 1520 swapSize = (off_t)vm_page_num_pages() * B_PAGE_SIZE; 1521 if (swapSize <= (1024 * 1024 * 1024)) { 1522 // Memory under 1GB? double the swap 1523 swapSize *= 2; 1524 } 1525 // Automatic swap defaults to the boot device 1526 swapDeviceID = gBootDevice; 1527 } 1528 1529 if (!swapEnabled || swapSize < B_PAGE_SIZE) { 1530 dprintf("%s: virtual_memory is disabled\n", __func__); 1531 return; 1532 } 1533 1534 if (!swapAutomatic && swapDeviceID < 0) { 1535 // If user-specified swap, and no swap device has been chosen yet... 1536 KDiskDeviceManager::CreateDefault(); 1537 KDiskDeviceManager* manager = KDiskDeviceManager::Default(); 1538 PartitionScorer visitor(selectedVolume); 1539 1540 KDiskDevice* device; 1541 int32 cookie = 0; 1542 while ((device = manager->NextDevice(&cookie)) != NULL) { 1543 if (device->IsReadOnlyMedia() || device->IsWriteOnce() 1544 || device->IsRemovable()) { 1545 continue; 1546 } 1547 device->VisitEachDescendant(&visitor); 1548 } 1549 1550 if (!visitor.fBestPartition) { 1551 dprintf("%s: Can't find configured swap partition '%s'\n", 1552 __func__, selectedVolume.name); 1553 } else { 1554 if (visitor.fBestPartition->IsMounted()) 1555 swapDeviceID = visitor.fBestPartition->VolumeID(); 1556 else { 1557 KPath devPath, mountPoint; 1558 visitor.fBestPartition->GetPath(&devPath); 1559 get_mount_point(visitor.fBestPartition, &mountPoint); 1560 const char* mountPath = mountPoint.Path(); 1561 mkdir(mountPath, S_IRWXU | S_IRWXG | S_IRWXO); 1562 swapDeviceID = _kern_mount(mountPath, devPath.Path(), 1563 NULL, 0, NULL, 0); 1564 if (swapDeviceID < 0) { 1565 dprintf("%s: Can't mount configured swap partition '%s'\n", 1566 __func__, selectedVolume.name); 1567 } 1568 } 1569 } 1570 } 1571 1572 if (swapDeviceID < 0) 1573 swapDeviceID = gBootDevice; 1574 1575 // We now have a swapDeviceID which is used for the swap file 1576 1577 KPath path; 1578 struct fs_info info; 1579 _kern_read_fs_info(swapDeviceID, &info); 1580 if (swapDeviceID == gBootDevice) 1581 path = kDefaultSwapPath; 1582 else { 1583 vfs_entry_ref_to_path(info.dev, info.root, ".", true, path.LockBuffer(), 1584 path.BufferSize()); 1585 path.UnlockBuffer(); 1586 path.Append("swap"); 1587 } 1588 1589 const char* swapPath = path.Path(); 1590 1591 // Swap size limits prevent oversized swap files 1592 if (swapAutomatic) { 1593 off_t existingSwapSize = 0; 1594 struct stat existingSwapStat; 1595 if (stat(swapPath, &existingSwapStat) == 0) 1596 existingSwapSize = existingSwapStat.st_size; 1597 1598 off_t freeSpace = info.free_blocks * info.block_size + existingSwapSize; 1599 1600 // Adjust automatic swap to a maximum of 25% of the free space 1601 if (swapSize > (freeSpace / 4)) 1602 swapSize = (freeSpace / 4); 1603 } 1604 1605 // Create swap file 1606 int fd = open(swapPath, O_RDWR | O_CREAT | O_NOCACHE, S_IRUSR | S_IWUSR); 1607 if (fd < 0) { 1608 dprintf("%s: Can't open/create %s: %s\n", __func__, 1609 swapPath, strerror(errno)); 1610 return; 1611 } 1612 1613 struct stat stat; 1614 stat.st_size = swapSize; 1615 status_t error = _kern_write_stat(fd, NULL, false, &stat, 1616 sizeof(struct stat), B_STAT_SIZE | B_STAT_SIZE_INSECURE); 1617 if (error != B_OK) { 1618 dprintf("%s: Failed to resize %s to %" B_PRIdOFF " bytes: %s\n", 1619 __func__, swapPath, swapSize, strerror(error)); 1620 } 1621 1622 close(fd); 1623 1624 error = swap_file_add(swapPath); 1625 if (error != B_OK) { 1626 dprintf("%s: Failed to add swap file %s: %s\n", __func__, swapPath, 1627 strerror(error)); 1628 } 1629 } 1630 1631 1632 //! Used by page daemon to free swap space. 1633 bool 1634 swap_free_page_swap_space(vm_page* page) 1635 { 1636 VMAnonymousCache* cache = dynamic_cast<VMAnonymousCache*>(page->Cache()); 1637 if (cache == NULL) 1638 return false; 1639 1640 swap_addr_t slotIndex = cache->_SwapBlockGetAddress(page->cache_offset); 1641 if (slotIndex == SWAP_SLOT_NONE) 1642 return false; 1643 1644 swap_slot_dealloc(slotIndex, 1); 1645 cache->fAllocatedSwapSize -= B_PAGE_SIZE; 1646 cache->_SwapBlockFree(page->cache_offset, 1); 1647 1648 return true; 1649 } 1650 1651 1652 uint32 1653 swap_available_pages() 1654 { 1655 mutex_lock(&sAvailSwapSpaceLock); 1656 uint32 avail = sAvailSwapSpace >> PAGE_SHIFT; 1657 mutex_unlock(&sAvailSwapSpaceLock); 1658 1659 return avail; 1660 } 1661 1662 1663 uint32 1664 swap_total_swap_pages() 1665 { 1666 mutex_lock(&sSwapFileListLock); 1667 1668 uint32 totalSwapSlots = 0; 1669 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1670 swap_file* swapFile = it.Next();) { 1671 totalSwapSlots += swapFile->last_slot - swapFile->first_slot; 1672 } 1673 1674 mutex_unlock(&sSwapFileListLock); 1675 1676 return totalSwapSlots; 1677 } 1678 1679 1680 #endif // ENABLE_SWAP_SUPPORT 1681 1682 1683 void 1684 swap_get_info(system_info* info) 1685 { 1686 #if ENABLE_SWAP_SUPPORT 1687 info->max_swap_pages = swap_total_swap_pages(); 1688 info->free_swap_pages = swap_available_pages(); 1689 #else 1690 info->max_swap_space = 0; 1691 info->free_swap_space = 0; 1692 #endif 1693 } 1694 1695