1 /* 2 * Copyright 2008, Zhao Shuai, upczhsh@163.com. 3 * Copyright 2008-2009, Ingo Weinhold, ingo_weinhold@gmx.de. 4 * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de. 5 * Distributed under the terms of the MIT License. 6 * 7 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 8 * Distributed under the terms of the NewOS License. 9 */ 10 11 #include "VMAnonymousCache.h" 12 13 #include <errno.h> 14 #include <fcntl.h> 15 #include <stdlib.h> 16 #include <string.h> 17 #include <unistd.h> 18 19 #include <KernelExport.h> 20 #include <NodeMonitor.h> 21 22 #include <arch_config.h> 23 #include <boot_device.h> 24 #include <driver_settings.h> 25 #include <fs/fd.h> 26 #include <fs_interface.h> 27 #include <heap.h> 28 #include <kernel_daemon.h> 29 #include <slab/Slab.h> 30 #include <syscalls.h> 31 #include <system_info.h> 32 #include <tracing.h> 33 #include <util/AutoLock.h> 34 #include <util/DoublyLinkedList.h> 35 #include <util/OpenHashTable.h> 36 #include <util/RadixBitmap.h> 37 #include <vfs.h> 38 #include <vm.h> 39 #include <vm_page.h> 40 #include <vm_priv.h> 41 42 #include "IORequest.h" 43 44 45 #if ENABLE_SWAP_SUPPORT 46 47 //#define TRACE_VM_ANONYMOUS_CACHE 48 #ifdef TRACE_VM_ANONYMOUS_CACHE 49 # define TRACE(x...) dprintf(x) 50 #else 51 # define TRACE(x...) do { } while (false) 52 #endif 53 54 55 // number of free swap blocks the object cache shall minimally have 56 #define MIN_SWAP_BLOCK_RESERVE 4096 57 58 // interval the has resizer is triggered (in 0.1s) 59 #define SWAP_HASH_RESIZE_INTERVAL 5 60 61 #define INITIAL_SWAP_HASH_SIZE 1024 62 63 #define SWAP_BLOCK_PAGES 32 64 #define SWAP_BLOCK_SHIFT 5 /* 1 << SWAP_BLOCK_SHIFT == SWAP_BLOCK_PAGES */ 65 #define SWAP_BLOCK_MASK (SWAP_BLOCK_PAGES - 1) 66 67 struct swap_file : DoublyLinkedListLinkImpl<swap_file> { 68 int fd; 69 struct vnode *vnode; 70 void *cookie; 71 swap_addr_t first_slot; 72 swap_addr_t last_slot; 73 radix_bitmap *bmp; 74 }; 75 76 struct swap_hash_key { 77 VMAnonymousCache *cache; 78 off_t page_index; // page index in the cache 79 }; 80 81 // Each swap block contains swap address information for 82 // SWAP_BLOCK_PAGES continuous pages from the same cache 83 struct swap_block { 84 swap_block* hash_link; 85 swap_hash_key key; 86 uint32 used; 87 swap_addr_t swap_slots[SWAP_BLOCK_PAGES]; 88 }; 89 90 struct SwapHashTableDefinition { 91 typedef swap_hash_key KeyType; 92 typedef swap_block ValueType; 93 94 SwapHashTableDefinition() {} 95 96 size_t HashKey(const swap_hash_key& key) const 97 { 98 off_t blockIndex = key.page_index >> SWAP_BLOCK_SHIFT; 99 VMAnonymousCache *cache = key.cache; 100 return blockIndex ^ (int)(int *)cache; 101 } 102 103 size_t Hash(const swap_block *value) const 104 { 105 return HashKey(value->key); 106 } 107 108 bool Compare(const swap_hash_key& key, const swap_block *value) const 109 { 110 return (key.page_index & ~(off_t)SWAP_BLOCK_MASK) 111 == (value->key.page_index & ~(off_t)SWAP_BLOCK_MASK) 112 && key.cache == value->key.cache; 113 } 114 115 swap_block*& GetLink(swap_block *value) const 116 { 117 return value->hash_link; 118 } 119 }; 120 121 typedef BOpenHashTable<SwapHashTableDefinition> SwapHashTable; 122 typedef DoublyLinkedList<swap_file> SwapFileList; 123 124 static SwapHashTable sSwapHashTable; 125 static mutex sSwapHashLock; 126 127 static SwapFileList sSwapFileList; 128 static mutex sSwapFileListLock; 129 static swap_file *sSwapFileAlloc = NULL; // allocate from here 130 static uint32 sSwapFileCount = 0; 131 132 static off_t sAvailSwapSpace = 0; 133 static mutex sAvailSwapSpaceLock; 134 135 static object_cache *sSwapBlockCache; 136 137 138 #if SWAP_TRACING 139 namespace SwapTracing { 140 141 class SwapTraceEntry : public AbstractTraceEntry { 142 public: 143 SwapTraceEntry(VMAnonymousCache* cache) 144 : 145 fCache(cache) 146 { 147 } 148 149 protected: 150 VMAnonymousCache* fCache; 151 }; 152 153 154 class ReadPage : public SwapTraceEntry { 155 public: 156 ReadPage(VMAnonymousCache* cache, page_num_t pageIndex, 157 swap_addr_t swapSlotIndex) 158 : 159 SwapTraceEntry(cache), 160 fPageIndex(pageIndex), 161 fSwapSlotIndex(swapSlotIndex) 162 { 163 Initialized(); 164 } 165 166 virtual void AddDump(TraceOutput& out) 167 { 168 out.Print("swap read: cache %p, page index: %lu <- swap slot: %lu", 169 fCache, fPageIndex, fSwapSlotIndex); 170 } 171 172 private: 173 page_num_t fPageIndex; 174 swap_addr_t fSwapSlotIndex; 175 }; 176 177 178 class WritePage : public SwapTraceEntry { 179 public: 180 WritePage(VMAnonymousCache* cache, page_num_t pageIndex, 181 swap_addr_t swapSlotIndex) 182 : 183 SwapTraceEntry(cache), 184 fPageIndex(pageIndex), 185 fSwapSlotIndex(swapSlotIndex) 186 { 187 Initialized(); 188 } 189 190 virtual void AddDump(TraceOutput& out) 191 { 192 out.Print("swap write: cache %p, page index: %lu -> swap slot: %lu", 193 fCache, fPageIndex, fSwapSlotIndex); 194 } 195 196 private: 197 page_num_t fPageIndex; 198 swap_addr_t fSwapSlotIndex; 199 }; 200 201 } // namespace SwapTracing 202 203 # define T(x) new(std::nothrow) SwapTracing::x; 204 #else 205 # define T(x) ; 206 #endif 207 208 209 static int 210 dump_swap_info(int argc, char** argv) 211 { 212 swap_addr_t totalSwapPages = 0; 213 swap_addr_t freeSwapPages = 0; 214 215 kprintf("swap files:\n"); 216 217 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 218 swap_file* file = it.Next();) { 219 swap_addr_t total = file->last_slot - file->first_slot; 220 kprintf(" vnode: %p, pages: total: %lu, free: %lu\n", 221 file->vnode, total, file->bmp->free_slots); 222 223 totalSwapPages += total; 224 freeSwapPages += file->bmp->free_slots; 225 } 226 227 kprintf("\n"); 228 kprintf("swap space in pages:\n"); 229 kprintf("total: %9lu\n", totalSwapPages); 230 kprintf("available: %9llu\n", sAvailSwapSpace / B_PAGE_SIZE); 231 kprintf("reserved: %9llu\n", 232 totalSwapPages - sAvailSwapSpace / B_PAGE_SIZE); 233 kprintf("used: %9lu\n", totalSwapPages - freeSwapPages); 234 kprintf("free: %9lu\n", freeSwapPages); 235 236 return 0; 237 } 238 239 240 static swap_addr_t 241 swap_slot_alloc(uint32 count) 242 { 243 mutex_lock(&sSwapFileListLock); 244 245 if (sSwapFileList.IsEmpty()) { 246 mutex_unlock(&sSwapFileListLock); 247 panic("swap_slot_alloc(): no swap file in the system\n"); 248 return SWAP_SLOT_NONE; 249 } 250 251 // since radix bitmap could not handle more than 32 pages, we return 252 // SWAP_SLOT_NONE, this forces Write() adjust allocation amount 253 if (count > BITMAP_RADIX) { 254 mutex_unlock(&sSwapFileListLock); 255 return SWAP_SLOT_NONE; 256 } 257 258 swap_addr_t j, addr = SWAP_SLOT_NONE; 259 for (j = 0; j < sSwapFileCount; j++) { 260 if (sSwapFileAlloc == NULL) 261 sSwapFileAlloc = sSwapFileList.First(); 262 263 addr = radix_bitmap_alloc(sSwapFileAlloc->bmp, count); 264 if (addr != SWAP_SLOT_NONE) { 265 addr += sSwapFileAlloc->first_slot; 266 break; 267 } 268 269 // this swap_file is full, find another 270 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 271 } 272 273 if (j == sSwapFileCount) { 274 mutex_unlock(&sSwapFileListLock); 275 panic("swap_slot_alloc: swap space exhausted!\n"); 276 return SWAP_SLOT_NONE; 277 } 278 279 // if this swap file has used more than 90% percent of its space 280 // switch to another 281 if (sSwapFileAlloc->bmp->free_slots 282 < (sSwapFileAlloc->last_slot - sSwapFileAlloc->first_slot) / 10) 283 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 284 285 mutex_unlock(&sSwapFileListLock); 286 287 return addr; 288 } 289 290 291 static swap_file * 292 find_swap_file(swap_addr_t slotIndex) 293 { 294 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 295 swap_file *swapFile = it.Next();) { 296 if (slotIndex >= swapFile->first_slot 297 && slotIndex < swapFile->last_slot) 298 return swapFile; 299 } 300 301 panic("find_swap_file(): can't find swap file for slot %ld\n", slotIndex); 302 return NULL; 303 } 304 305 306 static void 307 swap_slot_dealloc(swap_addr_t slotIndex, uint32 count) 308 { 309 if (slotIndex == SWAP_SLOT_NONE) 310 return; 311 312 mutex_lock(&sSwapFileListLock); 313 swap_file *swapFile = find_swap_file(slotIndex); 314 slotIndex -= swapFile->first_slot; 315 radix_bitmap_dealloc(swapFile->bmp, slotIndex, count); 316 mutex_unlock(&sSwapFileListLock); 317 } 318 319 320 static off_t 321 swap_space_reserve(off_t amount) 322 { 323 mutex_lock(&sAvailSwapSpaceLock); 324 if (sAvailSwapSpace >= amount) 325 sAvailSwapSpace -= amount; 326 else { 327 amount = sAvailSwapSpace; 328 sAvailSwapSpace = 0; 329 } 330 mutex_unlock(&sAvailSwapSpaceLock); 331 332 return amount; 333 } 334 335 336 static void 337 swap_space_unreserve(off_t amount) 338 { 339 mutex_lock(&sAvailSwapSpaceLock); 340 sAvailSwapSpace += amount; 341 mutex_unlock(&sAvailSwapSpaceLock); 342 } 343 344 345 static void 346 swap_hash_resizer(void*, int) 347 { 348 MutexLocker locker(sSwapHashLock); 349 350 size_t size; 351 void* allocation; 352 353 do { 354 size = sSwapHashTable.ResizeNeeded(); 355 if (size == 0) 356 return; 357 358 locker.Unlock(); 359 360 allocation = malloc(size); 361 if (allocation == NULL) 362 return; 363 364 locker.Lock(); 365 366 } while (!sSwapHashTable.Resize(allocation, size)); 367 } 368 369 370 // #pragma mark - 371 372 373 class VMAnonymousCache::WriteCallback : public StackableAsyncIOCallback { 374 public: 375 WriteCallback(VMAnonymousCache* cache, AsyncIOCallback* callback) 376 : 377 StackableAsyncIOCallback(callback), 378 fCache(cache) 379 { 380 } 381 382 void SetTo(page_num_t pageIndex, swap_addr_t slotIndex, bool newSlot) 383 { 384 fPageIndex = pageIndex; 385 fSlotIndex = slotIndex; 386 fNewSlot = newSlot; 387 } 388 389 virtual void IOFinished(status_t status, bool partialTransfer, 390 size_t bytesTransferred) 391 { 392 if (fNewSlot) { 393 if (status == B_OK) { 394 fCache->_SwapBlockBuild(fPageIndex, fSlotIndex, 1); 395 } else { 396 AutoLocker<VMCache> locker(fCache); 397 fCache->fAllocatedSwapSize -= B_PAGE_SIZE; 398 locker.Unlock(); 399 400 swap_slot_dealloc(fSlotIndex, 1); 401 } 402 } 403 404 fNextCallback->IOFinished(status, partialTransfer, bytesTransferred); 405 406 delete this; 407 } 408 409 void operator delete(void* address, size_t size) 410 { 411 io_request_free(address); 412 } 413 414 private: 415 VMAnonymousCache* fCache; 416 page_num_t fPageIndex; 417 swap_addr_t fSlotIndex; 418 bool fNewSlot; 419 }; 420 421 422 // #pragma mark - 423 424 425 VMAnonymousCache::~VMAnonymousCache() 426 { 427 // free allocated swap space and swap block 428 for (off_t offset = virtual_base, toFree = fAllocatedSwapSize; 429 offset < virtual_end && toFree > 0; offset += B_PAGE_SIZE) { 430 swap_addr_t slotIndex = _SwapBlockGetAddress(offset >> PAGE_SHIFT); 431 if (slotIndex == SWAP_SLOT_NONE) 432 continue; 433 434 swap_slot_dealloc(slotIndex, 1); 435 _SwapBlockFree(offset >> PAGE_SHIFT, 1); 436 toFree -= B_PAGE_SIZE; 437 } 438 439 swap_space_unreserve(fCommittedSwapSize); 440 if (committed_size > fCommittedSwapSize) 441 vm_unreserve_memory(committed_size - fCommittedSwapSize); 442 } 443 444 445 status_t 446 VMAnonymousCache::Init(bool canOvercommit, int32 numPrecommittedPages, 447 int32 numGuardPages) 448 { 449 TRACE("%p->VMAnonymousCache::Init(canOvercommit = %s, " 450 "numPrecommittedPages = %ld, numGuardPages = %ld)\n", this, 451 canOvercommit ? "yes" : "no", numPrecommittedPages, numGuardPages); 452 453 status_t error = VMCache::Init(CACHE_TYPE_RAM); 454 if (error != B_OK) 455 return error; 456 457 fCanOvercommit = canOvercommit; 458 fHasPrecommitted = false; 459 fPrecommittedPages = min_c(numPrecommittedPages, 255); 460 fGuardedSize = numGuardPages * B_PAGE_SIZE; 461 fCommittedSwapSize = 0; 462 fAllocatedSwapSize = 0; 463 464 return B_OK; 465 } 466 467 468 status_t 469 VMAnonymousCache::Commit(off_t size) 470 { 471 TRACE("%p->VMAnonymousCache::Commit(%lld)\n", this, size); 472 473 // if we can overcommit, we don't commit here, but in anonymous_fault() 474 if (fCanOvercommit) { 475 if (fHasPrecommitted) 476 return B_OK; 477 478 // pre-commit some pages to make a later failure less probable 479 fHasPrecommitted = true; 480 uint32 precommitted = fPrecommittedPages * B_PAGE_SIZE; 481 if (size > precommitted) 482 size = precommitted; 483 } 484 485 return _Commit(size); 486 } 487 488 489 bool 490 VMAnonymousCache::HasPage(off_t offset) 491 { 492 if (_SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE) 493 return true; 494 495 return false; 496 } 497 498 499 status_t 500 VMAnonymousCache::Read(off_t offset, const iovec *vecs, size_t count, 501 uint32 flags, size_t *_numBytes) 502 { 503 off_t pageIndex = offset >> PAGE_SHIFT; 504 505 for (uint32 i = 0, j = 0; i < count; i = j) { 506 swap_addr_t startSlotIndex = _SwapBlockGetAddress(pageIndex + i); 507 for (j = i + 1; j < count; j++) { 508 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + j); 509 if (slotIndex != startSlotIndex + j - i) 510 break; 511 } 512 513 T(ReadPage(this, pageIndex, startSlotIndex)); 514 // TODO: Assumes that only one page is read. 515 516 swap_file *swapFile = find_swap_file(startSlotIndex); 517 518 off_t pos = (off_t)(startSlotIndex - swapFile->first_slot) 519 * B_PAGE_SIZE; 520 521 status_t status = vfs_read_pages(swapFile->vnode, swapFile->cookie, pos, 522 vecs + i, j - i, flags, _numBytes); 523 if (status != B_OK) 524 return status; 525 } 526 527 return B_OK; 528 } 529 530 531 status_t 532 VMAnonymousCache::Write(off_t offset, const iovec *vecs, size_t count, 533 uint32 flags, size_t *_numBytes) 534 { 535 off_t pageIndex = offset >> PAGE_SHIFT; 536 537 AutoLocker<VMCache> locker(this); 538 539 for (uint32 i = 0; i < count; i++) { 540 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + i); 541 if (slotIndex != SWAP_SLOT_NONE) { 542 swap_slot_dealloc(slotIndex, 1); 543 _SwapBlockFree(pageIndex + i, 1); 544 fAllocatedSwapSize -= B_PAGE_SIZE; 545 } 546 } 547 548 if (fAllocatedSwapSize + (off_t)count * B_PAGE_SIZE > fCommittedSwapSize) 549 return B_ERROR; 550 551 fAllocatedSwapSize += (off_t)count * B_PAGE_SIZE; 552 locker.Unlock(); 553 554 uint32 n = count; 555 for (uint32 i = 0; i < count; i += n) { 556 swap_addr_t slotIndex; 557 // try to allocate n slots, if fail, try to allocate n/2 558 while ((slotIndex = swap_slot_alloc(n)) == SWAP_SLOT_NONE && n >= 2) 559 n >>= 1; 560 if (slotIndex == SWAP_SLOT_NONE) 561 panic("VMAnonymousCache::Write(): can't allocate swap space\n"); 562 563 T(WritePage(this, pageIndex, slotIndex)); 564 // TODO: Assumes that only one page is written. 565 566 swap_file *swapFile = find_swap_file(slotIndex); 567 568 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 569 570 status_t status = vfs_write_pages(swapFile->vnode, swapFile->cookie, 571 pos, vecs + i, n, flags, _numBytes); 572 if (status != B_OK) { 573 locker.Lock(); 574 fAllocatedSwapSize -= (off_t)n * B_PAGE_SIZE; 575 locker.Unlock(); 576 577 swap_slot_dealloc(slotIndex, n); 578 return status; 579 } 580 581 _SwapBlockBuild(pageIndex + i, slotIndex, n); 582 } 583 584 return B_OK; 585 } 586 587 588 status_t 589 VMAnonymousCache::WriteAsync(off_t offset, const iovec* vecs, size_t count, 590 size_t numBytes, uint32 flags, AsyncIOCallback* _callback) 591 { 592 // TODO: Currently this method is only used for single pages. Either make 593 // more flexible use of it or change the interface! 594 // This implementation relies on the current usage! 595 ASSERT(count == 1); 596 ASSERT(numBytes <= B_PAGE_SIZE); 597 598 page_num_t pageIndex = offset >> PAGE_SHIFT; 599 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex); 600 bool newSlot = slotIndex == SWAP_SLOT_NONE; 601 602 // If the page doesn't have any swap space yet, allocate it. 603 if (newSlot) { 604 AutoLocker<VMCache> locker(this); 605 if (fAllocatedSwapSize + B_PAGE_SIZE > fCommittedSwapSize) { 606 _callback->IOFinished(B_ERROR, true, 0); 607 return B_ERROR; 608 } 609 610 fAllocatedSwapSize += B_PAGE_SIZE; 611 612 slotIndex = swap_slot_alloc(1); 613 } 614 615 // create our callback 616 WriteCallback* callback = (flags & B_VIP_IO_REQUEST) != 0 617 ? new(vip_io_alloc) WriteCallback(this, _callback) 618 : new(std::nothrow) WriteCallback(this, _callback); 619 if (callback == NULL) { 620 if (newSlot) { 621 AutoLocker<VMCache> locker(this); 622 fAllocatedSwapSize -= B_PAGE_SIZE; 623 locker.Unlock(); 624 625 swap_slot_dealloc(slotIndex, 1); 626 } 627 _callback->IOFinished(B_NO_MEMORY, true, 0); 628 return B_NO_MEMORY; 629 } 630 // TODO: If the page already had swap space assigned, we don't need an own 631 // callback. 632 633 callback->SetTo(pageIndex, slotIndex, newSlot); 634 635 T(WritePage(this, pageIndex, slotIndex)); 636 637 // write the page asynchrounously 638 swap_file* swapFile = find_swap_file(slotIndex); 639 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 640 641 return vfs_asynchronous_write_pages(swapFile->vnode, swapFile->cookie, pos, 642 vecs, 1, numBytes, flags, callback); 643 } 644 645 646 bool 647 VMAnonymousCache::CanWritePage(off_t offset) 648 { 649 // We can write the page, if we have not used all of our committed swap 650 // space or the page already has a swap slot assigned. 651 return fAllocatedSwapSize < fCommittedSwapSize 652 || _SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE; 653 } 654 655 656 status_t 657 VMAnonymousCache::Fault(struct vm_address_space *aspace, off_t offset) 658 { 659 if (fCanOvercommit && LookupPage(offset) == NULL && !HasPage(offset)) { 660 if (fGuardedSize > 0) { 661 uint32 guardOffset; 662 663 #ifdef STACK_GROWS_DOWNWARDS 664 guardOffset = 0; 665 #elif defined(STACK_GROWS_UPWARDS) 666 guardOffset = virtual_size - fGuardedSize; 667 #else 668 # error Stack direction has not been defined in arch_config.h 669 #endif 670 671 // report stack fault, guard page hit! 672 if (offset >= guardOffset && offset < guardOffset + fGuardedSize) { 673 TRACE(("stack overflow!\n")); 674 return B_BAD_ADDRESS; 675 } 676 } 677 678 if (fPrecommittedPages == 0) { 679 // try to commit additional swap space/memory 680 if (swap_space_reserve(B_PAGE_SIZE) == B_PAGE_SIZE) 681 fCommittedSwapSize += B_PAGE_SIZE; 682 else if (vm_try_reserve_memory(B_PAGE_SIZE, 0) != B_OK) 683 return B_NO_MEMORY; 684 685 committed_size += B_PAGE_SIZE; 686 } else 687 fPrecommittedPages--; 688 } 689 690 // This will cause vm_soft_fault() to handle the fault 691 return B_BAD_HANDLER; 692 } 693 694 695 void 696 VMAnonymousCache::Merge(VMCache* _source) 697 { 698 VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source); 699 if (source == NULL) { 700 panic("VMAnonymousCache::MergeStore(): merge with incompatible cache " 701 "%p requested", _source); 702 return; 703 } 704 705 // take over the source' committed size 706 fCommittedSwapSize += source->fCommittedSwapSize; 707 source->fCommittedSwapSize = 0; 708 committed_size += source->committed_size; 709 source->committed_size = 0; 710 711 off_t actualSize = virtual_end - virtual_base; 712 if (committed_size > actualSize) 713 _Commit(actualSize); 714 715 // Move all not shadowed pages from the source to the consumer cache. 716 717 for (VMCachePagesTree::Iterator it = source->pages.GetIterator(); 718 vm_page* page = it.Next();) { 719 // Note: Removing the current node while iterating through a 720 // IteratableSplayTree is safe. 721 vm_page* consumerPage = LookupPage( 722 (off_t)page->cache_offset << PAGE_SHIFT); 723 swap_addr_t consumerSwapSlot = _SwapBlockGetAddress(page->cache_offset); 724 if (consumerPage == NULL && consumerSwapSlot == SWAP_SLOT_NONE) { 725 // the page is not yet in the consumer cache - move it upwards 726 source->RemovePage(page); 727 InsertPage(page, (off_t)page->cache_offset << PAGE_SHIFT); 728 729 // If the moved-up page has a swap page associated, we mark it, so 730 // that the swap page is moved upwards, too. We would lose if the 731 // page was modified and written to swap, and is now not marked 732 // modified. 733 if (source->_SwapBlockGetAddress(page->cache_offset) 734 != SWAP_SLOT_NONE) { 735 page->merge_swap = true; 736 } 737 #if DEBUG_PAGE_CACHE_TRANSITIONS 738 } else { 739 page->debug_flags = 0; 740 if (consumerPage->state == PAGE_STATE_BUSY) 741 page->debug_flags |= 0x1; 742 if (consumerPage->type == PAGE_TYPE_DUMMY) 743 page->debug_flags |= 0x2; 744 page->collided_page = consumerPage; 745 consumerPage->collided_page = page; 746 #endif // DEBUG_PAGE_CACHE_TRANSITIONS 747 } 748 } 749 750 // Move all not shadowed swap pages from the source to the consumer cache. 751 752 for (off_t offset = source->virtual_base 753 & ~(off_t)(B_PAGE_SIZE * SWAP_BLOCK_PAGES - 1); 754 offset < source->virtual_end; 755 offset += B_PAGE_SIZE * SWAP_BLOCK_PAGES) { 756 757 MutexLocker locker(sSwapHashLock); 758 759 page_num_t swapBlockPageIndex = offset >> PAGE_SHIFT; 760 swap_hash_key key = { source, swapBlockPageIndex }; 761 swap_block* sourceSwapBlock = sSwapHashTable.Lookup(key); 762 763 if (sourceSwapBlock == NULL) 764 continue; 765 766 // remove the source swap block -- we will either take over the swap 767 // space (and the block) or free it 768 sSwapHashTable.RemoveUnchecked(sourceSwapBlock); 769 770 key.cache = this; 771 swap_block* swapBlock = sSwapHashTable.Lookup(key); 772 773 locker.Unlock(); 774 775 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 776 off_t pageIndex = swapBlockPageIndex + i; 777 swap_addr_t sourceSlotIndex = sourceSwapBlock->swap_slots[i]; 778 779 if (sourceSlotIndex == SWAP_SLOT_NONE) 780 // this page is not swapped out 781 continue; 782 783 vm_page* page = LookupPage((off_t)pageIndex << PAGE_SHIFT); 784 785 bool keepSwapPage = true; 786 if (page != NULL && !page->merge_swap) { 787 // The consumer already has a page at this index and it wasn't 788 // one taken over from the source. So we can simply free the 789 // swap space. 790 keepSwapPage = false; 791 } else { 792 if (page != NULL) { 793 // The page was taken over from the source cache. Clear the 794 // indicator flag. We'll take over the swap page too. 795 page->merge_swap = false; 796 } else if (swapBlock != NULL 797 && swapBlock->swap_slots[i] != SWAP_SLOT_NONE) { 798 // There's no page in the consumer cache, but a swap page. 799 // Free the source swap page. 800 keepSwapPage = false; 801 } 802 } 803 804 if (!keepSwapPage) { 805 swap_slot_dealloc(sourceSlotIndex, 1); 806 sourceSwapBlock->swap_slots[i] = SWAP_SLOT_NONE; 807 sourceSwapBlock->used--; 808 } 809 810 // We've either freed the source swap page or are going to move it 811 // to the consumer. At any rate, the source cache doesn't own it 812 // anymore. 813 source->fAllocatedSwapSize -= B_PAGE_SIZE; 814 } 815 816 // All source swap pages that have not been freed yet are taken over by 817 // by the consumer. 818 fAllocatedSwapSize += B_PAGE_SIZE * (off_t)sourceSwapBlock->used; 819 820 if (sourceSwapBlock->used == 0) { 821 // All swap pages have been freed -- we can discard the source swap 822 // block. 823 object_cache_free(sSwapBlockCache, sourceSwapBlock); 824 } else if (swapBlock == NULL) { 825 // We need to take over some of the source's swap pages and there's 826 // no swap block in the consumer cache. Just take over the source 827 // swap block. 828 sourceSwapBlock->key.cache = this; 829 locker.Lock(); 830 sSwapHashTable.InsertUnchecked(sourceSwapBlock); 831 locker.Unlock(); 832 } else { 833 // We need to take over some of the source's swap pages and there's 834 // already swap block in the consumer cache. Copy the respective 835 // swap addresses and discard the source swap block. 836 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 837 if (sourceSwapBlock->swap_slots[i] != SWAP_SLOT_NONE) 838 swapBlock->swap_slots[i] = sourceSwapBlock->swap_slots[i]; 839 } 840 841 object_cache_free(sSwapBlockCache, sourceSwapBlock); 842 } 843 } 844 } 845 846 847 void 848 VMAnonymousCache::_SwapBlockBuild(off_t startPageIndex, 849 swap_addr_t startSlotIndex, uint32 count) 850 { 851 mutex_lock(&sSwapHashLock); 852 853 uint32 left = count; 854 for (uint32 i = 0, j = 0; i < count; i += j) { 855 off_t pageIndex = startPageIndex + i; 856 swap_addr_t slotIndex = startSlotIndex + i; 857 858 swap_hash_key key = { this, pageIndex }; 859 860 swap_block *swap = sSwapHashTable.Lookup(key); 861 while (swap == NULL) { 862 swap = (swap_block *)object_cache_alloc(sSwapBlockCache, 863 CACHE_DONT_SLEEP); 864 if (swap == NULL) { 865 // Wait a short time until memory is available again. 866 mutex_unlock(&sSwapHashLock); 867 snooze(10000); 868 mutex_lock(&sSwapHashLock); 869 swap = sSwapHashTable.Lookup(key); 870 continue; 871 } 872 873 swap->key.cache = this; 874 swap->key.page_index = pageIndex & ~(off_t)SWAP_BLOCK_MASK; 875 swap->used = 0; 876 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) 877 swap->swap_slots[i] = SWAP_SLOT_NONE; 878 879 sSwapHashTable.InsertUnchecked(swap); 880 } 881 882 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 883 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 884 swap->swap_slots[blockIndex++] = slotIndex + j; 885 left--; 886 } 887 888 swap->used += j; 889 } 890 891 mutex_unlock(&sSwapHashLock); 892 } 893 894 895 void 896 VMAnonymousCache::_SwapBlockFree(off_t startPageIndex, uint32 count) 897 { 898 mutex_lock(&sSwapHashLock); 899 900 uint32 left = count; 901 for (uint32 i = 0, j = 0; i < count; i += j) { 902 off_t pageIndex = startPageIndex + i; 903 swap_hash_key key = { this, pageIndex }; 904 swap_block *swap = sSwapHashTable.Lookup(key); 905 906 ASSERT(swap != NULL); 907 908 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 909 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 910 swap->swap_slots[blockIndex++] = SWAP_SLOT_NONE; 911 left--; 912 } 913 914 swap->used -= j; 915 if (swap->used == 0) { 916 sSwapHashTable.RemoveUnchecked(swap); 917 object_cache_free(sSwapBlockCache, swap); 918 } 919 } 920 921 mutex_unlock(&sSwapHashLock); 922 } 923 924 925 swap_addr_t 926 VMAnonymousCache::_SwapBlockGetAddress(off_t pageIndex) 927 { 928 mutex_lock(&sSwapHashLock); 929 930 swap_hash_key key = { this, pageIndex }; 931 swap_block *swap = sSwapHashTable.Lookup(key); 932 swap_addr_t slotIndex = SWAP_SLOT_NONE; 933 934 if (swap != NULL) { 935 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 936 slotIndex = swap->swap_slots[blockIndex]; 937 } 938 939 mutex_unlock(&sSwapHashLock); 940 941 return slotIndex; 942 } 943 944 945 status_t 946 VMAnonymousCache::_Commit(off_t size) 947 { 948 TRACE("%p->VMAnonymousCache::_Commit(%lld), already committed: %lld " 949 "(%lld swap)\n", this, size, committed_size, fCommittedSwapSize); 950 951 // Basic strategy: reserve swap space first, only when running out of swap 952 // space, reserve real memory. 953 954 off_t committedMemory = committed_size - fCommittedSwapSize; 955 956 // Regardless of whether we're asked to grow or shrink the commitment, 957 // we always try to reserve as much as possible of the final commitment 958 // in the swap space. 959 if (size > fCommittedSwapSize) { 960 fCommittedSwapSize += swap_space_reserve(size - fCommittedSwapSize); 961 committed_size = fCommittedSwapSize + committedMemory; 962 if (size > fCommittedSwapSize) { 963 TRACE("%p->VMAnonymousCache::_Commit(%lld), reserved only %lld " 964 "swap\n", this, size, fCommittedSwapSize); 965 } 966 } 967 968 if (committed_size == size) 969 return B_OK; 970 971 if (committed_size > size) { 972 // The commitment shrinks -- unreserve real memory first. 973 off_t toUnreserve = committed_size - size; 974 if (committedMemory > 0) { 975 off_t unreserved = min_c(toUnreserve, committedMemory); 976 vm_unreserve_memory(unreserved); 977 committedMemory -= unreserved; 978 committed_size -= unreserved; 979 toUnreserve -= unreserved; 980 } 981 982 // Unreserve swap space. 983 if (toUnreserve > 0) { 984 swap_space_unreserve(toUnreserve); 985 fCommittedSwapSize -= toUnreserve; 986 committed_size -= toUnreserve; 987 } 988 989 return B_OK; 990 } 991 992 // The commitment grows -- we have already tried to reserve swap space at 993 // the start of the method, so we try to reserve real memory, now. 994 995 off_t toReserve = size - committed_size; 996 if (vm_try_reserve_memory(toReserve, 1000000) != B_OK) { 997 dprintf("%p->VMAnonymousCache::_Commit(%lld): Failed to reserve %lld " 998 "bytes of RAM\n", this, size, toReserve); 999 return B_NO_MEMORY; 1000 } 1001 1002 committed_size = size; 1003 return B_OK; 1004 } 1005 1006 1007 // #pragma mark - 1008 1009 1010 status_t 1011 swap_file_add(const char *path) 1012 { 1013 // open the file 1014 int fd = open(path, O_RDWR | O_NOCACHE, S_IRUSR | S_IWUSR); 1015 if (fd < 0) 1016 return errno; 1017 1018 // fstat() it and check whether we can use it 1019 struct stat st; 1020 if (fstat(fd, &st) < 0) { 1021 close(fd); 1022 return errno; 1023 } 1024 1025 if (!(S_ISREG(st.st_mode) || S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) { 1026 close(fd); 1027 return B_BAD_VALUE; 1028 } 1029 1030 if (st.st_size < B_PAGE_SIZE) { 1031 close(fd); 1032 return B_BAD_VALUE; 1033 } 1034 1035 // get file descriptor, vnode, and cookie 1036 file_descriptor* descriptor = get_fd(get_current_io_context(true), fd); 1037 put_fd(descriptor); 1038 1039 vnode *node = fd_vnode(descriptor); 1040 if (node == NULL) { 1041 close(fd); 1042 return B_BAD_VALUE; 1043 } 1044 1045 // do the allocations and prepare the swap_file structure 1046 swap_file *swap = (swap_file *)malloc(sizeof(swap_file)); 1047 if (swap == NULL) { 1048 close(fd); 1049 return B_NO_MEMORY; 1050 } 1051 1052 swap->fd = fd; 1053 swap->vnode = node; 1054 swap->cookie = descriptor->cookie; 1055 1056 uint32 pageCount = st.st_size >> PAGE_SHIFT; 1057 swap->bmp = radix_bitmap_create(pageCount); 1058 if (swap->bmp == NULL) { 1059 free(swap); 1060 close(fd); 1061 return B_NO_MEMORY; 1062 } 1063 1064 // set slot index and add this file to swap file list 1065 mutex_lock(&sSwapFileListLock); 1066 // TODO: Also check whether the swap file is already registered! 1067 if (sSwapFileList.IsEmpty()) { 1068 swap->first_slot = 0; 1069 swap->last_slot = pageCount; 1070 } else { 1071 // leave one page gap between two swap files 1072 swap->first_slot = sSwapFileList.Last()->last_slot + 1; 1073 swap->last_slot = swap->first_slot + pageCount; 1074 } 1075 sSwapFileList.Add(swap); 1076 sSwapFileCount++; 1077 mutex_unlock(&sSwapFileListLock); 1078 1079 mutex_lock(&sAvailSwapSpaceLock); 1080 sAvailSwapSpace += (off_t)pageCount * B_PAGE_SIZE; 1081 mutex_unlock(&sAvailSwapSpaceLock); 1082 1083 return B_OK; 1084 } 1085 1086 1087 status_t 1088 swap_file_delete(const char *path) 1089 { 1090 vnode *node = NULL; 1091 status_t status = vfs_get_vnode_from_path(path, true, &node); 1092 if (status != B_OK) 1093 return status; 1094 1095 MutexLocker locker(sSwapFileListLock); 1096 1097 swap_file *swapFile = NULL; 1098 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1099 (swapFile = it.Next()) != NULL;) { 1100 if (swapFile->vnode == node) 1101 break; 1102 } 1103 1104 vfs_put_vnode(node); 1105 1106 if (swapFile == NULL) 1107 return B_ERROR; 1108 1109 // if this file is currently used, we can't delete 1110 // TODO: mark this swap file deleting, and remove it after releasing 1111 // all the swap space 1112 if (swapFile->bmp->free_slots < swapFile->last_slot - swapFile->first_slot) 1113 return B_ERROR; 1114 1115 sSwapFileList.Remove(swapFile); 1116 sSwapFileCount--; 1117 locker.Unlock(); 1118 1119 mutex_lock(&sAvailSwapSpaceLock); 1120 sAvailSwapSpace -= (off_t)(swapFile->last_slot - swapFile->first_slot) 1121 * PAGE_SIZE; 1122 mutex_unlock(&sAvailSwapSpaceLock); 1123 1124 close(swapFile->fd); 1125 radix_bitmap_destroy(swapFile->bmp); 1126 free(swapFile); 1127 1128 return B_OK; 1129 } 1130 1131 1132 void 1133 swap_init(void) 1134 { 1135 // create swap block cache 1136 sSwapBlockCache = create_object_cache("swapblock", 1137 sizeof(swap_block), sizeof(void*), NULL, NULL, NULL); 1138 if (sSwapBlockCache == NULL) 1139 panic("swap_init(): can't create object cache for swap blocks\n"); 1140 1141 status_t error = object_cache_set_minimum_reserve(sSwapBlockCache, 1142 MIN_SWAP_BLOCK_RESERVE); 1143 if (error != B_OK) { 1144 panic("swap_init(): object_cache_set_minimum_reserve() failed: %s", 1145 strerror(error)); 1146 } 1147 1148 // init swap hash table 1149 sSwapHashTable.Init(INITIAL_SWAP_HASH_SIZE); 1150 mutex_init(&sSwapHashLock, "swaphash"); 1151 1152 error = register_resource_resizer(swap_hash_resizer, NULL, 1153 SWAP_HASH_RESIZE_INTERVAL); 1154 if (error != B_OK) { 1155 panic("swap_init(): Failed to register swap hash resizer: %s", 1156 strerror(error)); 1157 } 1158 1159 // init swap file list 1160 mutex_init(&sSwapFileListLock, "swaplist"); 1161 sSwapFileAlloc = NULL; 1162 sSwapFileCount = 0; 1163 1164 // init available swap space 1165 mutex_init(&sAvailSwapSpaceLock, "avail swap space"); 1166 sAvailSwapSpace = 0; 1167 1168 add_debugger_command_etc("swap", &dump_swap_info, 1169 "Print infos about the swap usage", 1170 "\n" 1171 "Print infos about the swap usage.\n", 0); 1172 } 1173 1174 1175 void 1176 swap_init_post_modules() 1177 { 1178 // Never try to create a swap file on a read-only device - when booting 1179 // from CD, the write overlay is used. 1180 if (gReadOnlyBootDevice) 1181 return; 1182 1183 off_t size = 0; 1184 1185 void *settings = load_driver_settings("virtual_memory"); 1186 if (settings != NULL) { 1187 if (!get_driver_boolean_parameter(settings, "vm", false, false)) 1188 return; 1189 1190 const char *string = get_driver_parameter(settings, "swap_size", NULL, 1191 NULL); 1192 size = string ? atoll(string) : 0; 1193 1194 unload_driver_settings(settings); 1195 } else 1196 size = (off_t)vm_page_num_pages() * B_PAGE_SIZE * 2; 1197 1198 if (size < B_PAGE_SIZE) 1199 return; 1200 1201 int fd = open("/var/swap", O_RDWR | O_CREAT | O_NOCACHE, S_IRUSR | S_IWUSR); 1202 if (fd < 0) { 1203 dprintf("Can't open/create /var/swap: %s\n", strerror(errno)); 1204 return; 1205 } 1206 1207 struct stat stat; 1208 stat.st_size = size; 1209 status_t error = _kern_write_stat(fd, NULL, false, &stat, 1210 sizeof(struct stat), B_STAT_SIZE | B_STAT_SIZE_INSECURE); 1211 if (error != B_OK) { 1212 dprintf("Failed to resize /var/swap to %lld bytes: %s\n", size, 1213 strerror(error)); 1214 } 1215 1216 close(fd); 1217 1218 error = swap_file_add("/var/swap"); 1219 if (error != B_OK) 1220 dprintf("Failed to add swap file /var/swap: %s\n", strerror(error)); 1221 } 1222 1223 1224 //! Used by page daemon to free swap space. 1225 bool 1226 swap_free_page_swap_space(vm_page *page) 1227 { 1228 VMAnonymousCache *cache = dynamic_cast<VMAnonymousCache *>(page->cache); 1229 if (cache == NULL) 1230 return false; 1231 1232 swap_addr_t slotIndex = cache->_SwapBlockGetAddress(page->cache_offset); 1233 if (slotIndex == SWAP_SLOT_NONE) 1234 return false; 1235 1236 swap_slot_dealloc(slotIndex, 1); 1237 cache->fAllocatedSwapSize -= B_PAGE_SIZE; 1238 cache->_SwapBlockFree(page->cache_offset, 1); 1239 1240 return true; 1241 } 1242 1243 1244 uint32 1245 swap_available_pages() 1246 { 1247 mutex_lock(&sAvailSwapSpaceLock); 1248 uint32 avail = sAvailSwapSpace >> PAGE_SHIFT; 1249 mutex_unlock(&sAvailSwapSpaceLock); 1250 1251 return avail; 1252 } 1253 1254 1255 uint32 1256 swap_total_swap_pages() 1257 { 1258 mutex_lock(&sSwapFileListLock); 1259 1260 uint32 totalSwapSlots = 0; 1261 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1262 swap_file *swapFile = it.Next();) 1263 totalSwapSlots += swapFile->last_slot - swapFile->first_slot; 1264 1265 mutex_unlock(&sSwapFileListLock); 1266 1267 return totalSwapSlots; 1268 } 1269 1270 #endif // ENABLE_SWAP_SUPPORT 1271 1272 void 1273 swap_get_info(struct system_memory_info *info) 1274 { 1275 #if ENABLE_SWAP_SUPPORT 1276 info->max_swap_space = (uint64)swap_total_swap_pages() * B_PAGE_SIZE; 1277 info->free_swap_space = (uint64)swap_available_pages() * B_PAGE_SIZE; 1278 #else 1279 info->max_swap_space = 0; 1280 info->free_swap_space = 0; 1281 #endif 1282 } 1283 1284