1 /* 2 * Copyright 2008, Zhao Shuai, upczhsh@163.com. 3 * Copyright 2008-2010, Ingo Weinhold, ingo_weinhold@gmx.de. 4 * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de. 5 * Distributed under the terms of the MIT License. 6 * 7 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 8 * Distributed under the terms of the NewOS License. 9 */ 10 11 12 #include "VMAnonymousCache.h" 13 14 #include <errno.h> 15 #include <fcntl.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include <unistd.h> 19 20 #include <KernelExport.h> 21 #include <NodeMonitor.h> 22 23 #include <arch_config.h> 24 #include <boot_device.h> 25 #include <driver_settings.h> 26 #include <fs/fd.h> 27 #include <fs_interface.h> 28 #include <heap.h> 29 #include <kernel_daemon.h> 30 #include <slab/Slab.h> 31 #include <syscalls.h> 32 #include <system_info.h> 33 #include <tracing.h> 34 #include <util/AutoLock.h> 35 #include <util/DoublyLinkedList.h> 36 #include <util/OpenHashTable.h> 37 #include <util/RadixBitmap.h> 38 #include <vfs.h> 39 #include <vm/vm.h> 40 #include <vm/vm_page.h> 41 #include <vm/vm_priv.h> 42 #include <vm/VMAddressSpace.h> 43 44 #include "IORequest.h" 45 46 47 #if ENABLE_SWAP_SUPPORT 48 49 //#define TRACE_VM_ANONYMOUS_CACHE 50 #ifdef TRACE_VM_ANONYMOUS_CACHE 51 # define TRACE(x...) dprintf(x) 52 #else 53 # define TRACE(x...) do { } while (false) 54 #endif 55 56 57 // number of free swap blocks the object cache shall minimally have 58 #define MIN_SWAP_BLOCK_RESERVE 4096 59 60 // interval the has resizer is triggered (in 0.1s) 61 #define SWAP_HASH_RESIZE_INTERVAL 5 62 63 #define INITIAL_SWAP_HASH_SIZE 1024 64 65 #define SWAP_BLOCK_PAGES 32 66 #define SWAP_BLOCK_SHIFT 5 /* 1 << SWAP_BLOCK_SHIFT == SWAP_BLOCK_PAGES */ 67 #define SWAP_BLOCK_MASK (SWAP_BLOCK_PAGES - 1) 68 69 70 struct swap_file : DoublyLinkedListLinkImpl<swap_file> { 71 int fd; 72 struct vnode* vnode; 73 void* cookie; 74 swap_addr_t first_slot; 75 swap_addr_t last_slot; 76 radix_bitmap* bmp; 77 }; 78 79 struct swap_hash_key { 80 VMAnonymousCache *cache; 81 off_t page_index; // page index in the cache 82 }; 83 84 // Each swap block contains swap address information for 85 // SWAP_BLOCK_PAGES continuous pages from the same cache 86 struct swap_block { 87 swap_block* hash_link; 88 swap_hash_key key; 89 uint32 used; 90 swap_addr_t swap_slots[SWAP_BLOCK_PAGES]; 91 }; 92 93 struct SwapHashTableDefinition { 94 typedef swap_hash_key KeyType; 95 typedef swap_block ValueType; 96 97 SwapHashTableDefinition() {} 98 99 size_t HashKey(const swap_hash_key& key) const 100 { 101 off_t blockIndex = key.page_index >> SWAP_BLOCK_SHIFT; 102 VMAnonymousCache* cache = key.cache; 103 return blockIndex ^ (size_t)(int*)cache; 104 } 105 106 size_t Hash(const swap_block* value) const 107 { 108 return HashKey(value->key); 109 } 110 111 bool Compare(const swap_hash_key& key, const swap_block* value) const 112 { 113 return (key.page_index & ~(off_t)SWAP_BLOCK_MASK) 114 == (value->key.page_index & ~(off_t)SWAP_BLOCK_MASK) 115 && key.cache == value->key.cache; 116 } 117 118 swap_block*& GetLink(swap_block* value) const 119 { 120 return value->hash_link; 121 } 122 }; 123 124 typedef BOpenHashTable<SwapHashTableDefinition> SwapHashTable; 125 typedef DoublyLinkedList<swap_file> SwapFileList; 126 127 static SwapHashTable sSwapHashTable; 128 static rw_lock sSwapHashLock; 129 130 static SwapFileList sSwapFileList; 131 static mutex sSwapFileListLock; 132 static swap_file* sSwapFileAlloc = NULL; // allocate from here 133 static uint32 sSwapFileCount = 0; 134 135 static off_t sAvailSwapSpace = 0; 136 static mutex sAvailSwapSpaceLock; 137 138 static object_cache* sSwapBlockCache; 139 140 141 #if SWAP_TRACING 142 namespace SwapTracing { 143 144 class SwapTraceEntry : public AbstractTraceEntry { 145 public: 146 SwapTraceEntry(VMAnonymousCache* cache) 147 : 148 fCache(cache) 149 { 150 } 151 152 protected: 153 VMAnonymousCache* fCache; 154 }; 155 156 157 class ReadPage : public SwapTraceEntry { 158 public: 159 ReadPage(VMAnonymousCache* cache, page_num_t pageIndex, 160 swap_addr_t swapSlotIndex) 161 : 162 SwapTraceEntry(cache), 163 fPageIndex(pageIndex), 164 fSwapSlotIndex(swapSlotIndex) 165 { 166 Initialized(); 167 } 168 169 virtual void AddDump(TraceOutput& out) 170 { 171 out.Print("swap read: cache %p, page index: %lu <- swap slot: %lu", 172 fCache, fPageIndex, fSwapSlotIndex); 173 } 174 175 private: 176 page_num_t fPageIndex; 177 swap_addr_t fSwapSlotIndex; 178 }; 179 180 181 class WritePage : public SwapTraceEntry { 182 public: 183 WritePage(VMAnonymousCache* cache, page_num_t pageIndex, 184 swap_addr_t swapSlotIndex) 185 : 186 SwapTraceEntry(cache), 187 fPageIndex(pageIndex), 188 fSwapSlotIndex(swapSlotIndex) 189 { 190 Initialized(); 191 } 192 193 virtual void AddDump(TraceOutput& out) 194 { 195 out.Print("swap write: cache %p, page index: %lu -> swap slot: %lu", 196 fCache, fPageIndex, fSwapSlotIndex); 197 } 198 199 private: 200 page_num_t fPageIndex; 201 swap_addr_t fSwapSlotIndex; 202 }; 203 204 } // namespace SwapTracing 205 206 # define T(x) new(std::nothrow) SwapTracing::x; 207 #else 208 # define T(x) ; 209 #endif 210 211 212 static int 213 dump_swap_info(int argc, char** argv) 214 { 215 swap_addr_t totalSwapPages = 0; 216 swap_addr_t freeSwapPages = 0; 217 218 kprintf("swap files:\n"); 219 220 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 221 swap_file* file = it.Next();) { 222 swap_addr_t total = file->last_slot - file->first_slot; 223 kprintf(" vnode: %p, pages: total: %lu, free: %lu\n", 224 file->vnode, total, file->bmp->free_slots); 225 226 totalSwapPages += total; 227 freeSwapPages += file->bmp->free_slots; 228 } 229 230 kprintf("\n"); 231 kprintf("swap space in pages:\n"); 232 kprintf("total: %9lu\n", totalSwapPages); 233 kprintf("available: %9llu\n", sAvailSwapSpace / B_PAGE_SIZE); 234 kprintf("reserved: %9llu\n", 235 totalSwapPages - sAvailSwapSpace / B_PAGE_SIZE); 236 kprintf("used: %9lu\n", totalSwapPages - freeSwapPages); 237 kprintf("free: %9lu\n", freeSwapPages); 238 239 return 0; 240 } 241 242 243 static swap_addr_t 244 swap_slot_alloc(uint32 count) 245 { 246 mutex_lock(&sSwapFileListLock); 247 248 if (sSwapFileList.IsEmpty()) { 249 mutex_unlock(&sSwapFileListLock); 250 panic("swap_slot_alloc(): no swap file in the system\n"); 251 return SWAP_SLOT_NONE; 252 } 253 254 // since radix bitmap could not handle more than 32 pages, we return 255 // SWAP_SLOT_NONE, this forces Write() adjust allocation amount 256 if (count > BITMAP_RADIX) { 257 mutex_unlock(&sSwapFileListLock); 258 return SWAP_SLOT_NONE; 259 } 260 261 swap_addr_t j, addr = SWAP_SLOT_NONE; 262 for (j = 0; j < sSwapFileCount; j++) { 263 if (sSwapFileAlloc == NULL) 264 sSwapFileAlloc = sSwapFileList.First(); 265 266 addr = radix_bitmap_alloc(sSwapFileAlloc->bmp, count); 267 if (addr != SWAP_SLOT_NONE) { 268 addr += sSwapFileAlloc->first_slot; 269 break; 270 } 271 272 // this swap_file is full, find another 273 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 274 } 275 276 if (j == sSwapFileCount) { 277 mutex_unlock(&sSwapFileListLock); 278 panic("swap_slot_alloc: swap space exhausted!\n"); 279 return SWAP_SLOT_NONE; 280 } 281 282 // if this swap file has used more than 90% percent of its space 283 // switch to another 284 if (sSwapFileAlloc->bmp->free_slots 285 < (sSwapFileAlloc->last_slot - sSwapFileAlloc->first_slot) / 10) 286 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 287 288 mutex_unlock(&sSwapFileListLock); 289 290 return addr; 291 } 292 293 294 static swap_file* 295 find_swap_file(swap_addr_t slotIndex) 296 { 297 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 298 swap_file* swapFile = it.Next();) { 299 if (slotIndex >= swapFile->first_slot 300 && slotIndex < swapFile->last_slot) 301 return swapFile; 302 } 303 304 panic("find_swap_file(): can't find swap file for slot %ld\n", slotIndex); 305 return NULL; 306 } 307 308 309 static void 310 swap_slot_dealloc(swap_addr_t slotIndex, uint32 count) 311 { 312 if (slotIndex == SWAP_SLOT_NONE) 313 return; 314 315 mutex_lock(&sSwapFileListLock); 316 swap_file* swapFile = find_swap_file(slotIndex); 317 slotIndex -= swapFile->first_slot; 318 radix_bitmap_dealloc(swapFile->bmp, slotIndex, count); 319 mutex_unlock(&sSwapFileListLock); 320 } 321 322 323 static off_t 324 swap_space_reserve(off_t amount) 325 { 326 mutex_lock(&sAvailSwapSpaceLock); 327 if (sAvailSwapSpace >= amount) 328 sAvailSwapSpace -= amount; 329 else { 330 amount = sAvailSwapSpace; 331 sAvailSwapSpace = 0; 332 } 333 mutex_unlock(&sAvailSwapSpaceLock); 334 335 return amount; 336 } 337 338 339 static void 340 swap_space_unreserve(off_t amount) 341 { 342 mutex_lock(&sAvailSwapSpaceLock); 343 sAvailSwapSpace += amount; 344 mutex_unlock(&sAvailSwapSpaceLock); 345 } 346 347 348 static void 349 swap_hash_resizer(void*, int) 350 { 351 WriteLocker locker(sSwapHashLock); 352 353 size_t size; 354 void* allocation; 355 356 do { 357 size = sSwapHashTable.ResizeNeeded(); 358 if (size == 0) 359 return; 360 361 locker.Unlock(); 362 363 allocation = malloc(size); 364 if (allocation == NULL) 365 return; 366 367 locker.Lock(); 368 369 } while (!sSwapHashTable.Resize(allocation, size)); 370 } 371 372 373 // #pragma mark - 374 375 376 class VMAnonymousCache::WriteCallback : public StackableAsyncIOCallback { 377 public: 378 WriteCallback(VMAnonymousCache* cache, AsyncIOCallback* callback) 379 : 380 StackableAsyncIOCallback(callback), 381 fCache(cache) 382 { 383 } 384 385 void SetTo(page_num_t pageIndex, swap_addr_t slotIndex, bool newSlot) 386 { 387 fPageIndex = pageIndex; 388 fSlotIndex = slotIndex; 389 fNewSlot = newSlot; 390 } 391 392 virtual void IOFinished(status_t status, bool partialTransfer, 393 size_t bytesTransferred) 394 { 395 if (fNewSlot) { 396 if (status == B_OK) { 397 fCache->_SwapBlockBuild(fPageIndex, fSlotIndex, 1); 398 } else { 399 AutoLocker<VMCache> locker(fCache); 400 fCache->fAllocatedSwapSize -= B_PAGE_SIZE; 401 locker.Unlock(); 402 403 swap_slot_dealloc(fSlotIndex, 1); 404 } 405 } 406 407 fNextCallback->IOFinished(status, partialTransfer, bytesTransferred); 408 409 delete this; 410 } 411 412 private: 413 VMAnonymousCache* fCache; 414 page_num_t fPageIndex; 415 swap_addr_t fSlotIndex; 416 bool fNewSlot; 417 }; 418 419 420 // #pragma mark - 421 422 423 VMAnonymousCache::~VMAnonymousCache() 424 { 425 // free allocated swap space and swap block 426 for (off_t offset = virtual_base, toFree = fAllocatedSwapSize; 427 offset < virtual_end && toFree > 0; offset += B_PAGE_SIZE) { 428 swap_addr_t slotIndex = _SwapBlockGetAddress(offset >> PAGE_SHIFT); 429 if (slotIndex == SWAP_SLOT_NONE) 430 continue; 431 432 swap_slot_dealloc(slotIndex, 1); 433 _SwapBlockFree(offset >> PAGE_SHIFT, 1); 434 toFree -= B_PAGE_SIZE; 435 } 436 437 swap_space_unreserve(fCommittedSwapSize); 438 if (committed_size > fCommittedSwapSize) 439 vm_unreserve_memory(committed_size - fCommittedSwapSize); 440 } 441 442 443 status_t 444 VMAnonymousCache::Init(bool canOvercommit, int32 numPrecommittedPages, 445 int32 numGuardPages, uint32 allocationFlags) 446 { 447 TRACE("%p->VMAnonymousCache::Init(canOvercommit = %s, " 448 "numPrecommittedPages = %ld, numGuardPages = %ld)\n", this, 449 canOvercommit ? "yes" : "no", numPrecommittedPages, numGuardPages); 450 451 status_t error = VMCache::Init(CACHE_TYPE_RAM, allocationFlags); 452 if (error != B_OK) 453 return error; 454 455 fCanOvercommit = canOvercommit; 456 fHasPrecommitted = false; 457 fPrecommittedPages = min_c(numPrecommittedPages, 255); 458 fGuardedSize = numGuardPages * B_PAGE_SIZE; 459 fCommittedSwapSize = 0; 460 fAllocatedSwapSize = 0; 461 462 return B_OK; 463 } 464 465 466 status_t 467 VMAnonymousCache::Commit(off_t size, int priority) 468 { 469 TRACE("%p->VMAnonymousCache::Commit(%lld)\n", this, size); 470 471 // if we can overcommit, we don't commit here, but in anonymous_fault() 472 if (fCanOvercommit) { 473 if (fHasPrecommitted) 474 return B_OK; 475 476 // pre-commit some pages to make a later failure less probable 477 fHasPrecommitted = true; 478 uint32 precommitted = fPrecommittedPages * B_PAGE_SIZE; 479 if (size > precommitted) 480 size = precommitted; 481 } 482 483 return _Commit(size, priority); 484 } 485 486 487 bool 488 VMAnonymousCache::HasPage(off_t offset) 489 { 490 if (_SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE) 491 return true; 492 493 return false; 494 } 495 496 497 status_t 498 VMAnonymousCache::Read(off_t offset, const iovec* vecs, size_t count, 499 uint32 flags, size_t* _numBytes) 500 { 501 off_t pageIndex = offset >> PAGE_SHIFT; 502 503 for (uint32 i = 0, j = 0; i < count; i = j) { 504 swap_addr_t startSlotIndex = _SwapBlockGetAddress(pageIndex + i); 505 for (j = i + 1; j < count; j++) { 506 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + j); 507 if (slotIndex != startSlotIndex + j - i) 508 break; 509 } 510 511 T(ReadPage(this, pageIndex, startSlotIndex)); 512 // TODO: Assumes that only one page is read. 513 514 swap_file* swapFile = find_swap_file(startSlotIndex); 515 516 off_t pos = (off_t)(startSlotIndex - swapFile->first_slot) 517 * B_PAGE_SIZE; 518 519 status_t status = vfs_read_pages(swapFile->vnode, swapFile->cookie, pos, 520 vecs + i, j - i, flags, _numBytes); 521 if (status != B_OK) 522 return status; 523 } 524 525 return B_OK; 526 } 527 528 529 status_t 530 VMAnonymousCache::Write(off_t offset, const iovec* vecs, size_t count, 531 uint32 flags, size_t* _numBytes) 532 { 533 off_t pageIndex = offset >> PAGE_SHIFT; 534 535 AutoLocker<VMCache> locker(this); 536 537 uint32 totalPages = 0; 538 for (uint32 i = 0; i < count; i++) { 539 uint32 pageCount = (vecs[i].iov_len + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 540 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + totalPages); 541 if (slotIndex != SWAP_SLOT_NONE) { 542 swap_slot_dealloc(slotIndex, pageCount); 543 _SwapBlockFree(pageIndex + totalPages, pageCount); 544 fAllocatedSwapSize -= pageCount * B_PAGE_SIZE; 545 } 546 547 totalPages += pageCount; 548 } 549 550 off_t totalSize = totalPages * B_PAGE_SIZE; 551 if (fAllocatedSwapSize + totalSize > fCommittedSwapSize) 552 return B_ERROR; 553 554 fAllocatedSwapSize += totalSize; 555 locker.Unlock(); 556 557 uint32 pagesLeft = totalPages; 558 totalPages = 0; 559 560 for (uint32 i = 0; i < count; i++) { 561 uint32 pageCount = (vecs[i].iov_len + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 562 563 void* vectorBase = vecs[i].iov_base; 564 size_t vectorLength = vecs[i].iov_len; 565 uint32 n = pageCount; 566 567 for (uint32 j = 0; j < pageCount; j += n) { 568 swap_addr_t slotIndex; 569 // try to allocate n slots, if fail, try to allocate n/2 570 while ((slotIndex = swap_slot_alloc(n)) == SWAP_SLOT_NONE && n >= 2) 571 n >>= 1; 572 573 if (slotIndex == SWAP_SLOT_NONE) 574 panic("VMAnonymousCache::Write(): can't allocate swap space\n"); 575 576 T(WritePage(this, pageIndex, slotIndex)); 577 // TODO: Assumes that only one page is written. 578 579 swap_file* swapFile = find_swap_file(slotIndex); 580 581 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 582 583 size_t length = n * B_PAGE_SIZE; 584 iovec vector[1]; 585 vector->iov_base = vectorBase; 586 vector->iov_len = length; 587 588 status_t status = vfs_write_pages(swapFile->vnode, swapFile->cookie, 589 pos, vector, 1, flags, &length); 590 if (status != B_OK) { 591 locker.Lock(); 592 fAllocatedSwapSize -= (off_t)pagesLeft * B_PAGE_SIZE; 593 locker.Unlock(); 594 595 swap_slot_dealloc(slotIndex, n); 596 return status; 597 } 598 599 _SwapBlockBuild(pageIndex + totalPages, slotIndex, n); 600 pagesLeft -= n; 601 602 if (n != pageCount) { 603 vectorBase = (void*)((addr_t)vectorBase + n * B_PAGE_SIZE); 604 vectorLength -= n * B_PAGE_SIZE; 605 } 606 } 607 608 totalPages += pageCount; 609 } 610 611 ASSERT(pagesLeft == 0); 612 return B_OK; 613 } 614 615 616 status_t 617 VMAnonymousCache::WriteAsync(off_t offset, const iovec* vecs, size_t count, 618 size_t numBytes, uint32 flags, AsyncIOCallback* _callback) 619 { 620 // TODO: Currently this method is only used for single pages. Either make 621 // more flexible use of it or change the interface! 622 // This implementation relies on the current usage! 623 ASSERT(count == 1); 624 ASSERT(numBytes <= B_PAGE_SIZE); 625 626 page_num_t pageIndex = offset >> PAGE_SHIFT; 627 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex); 628 bool newSlot = slotIndex == SWAP_SLOT_NONE; 629 630 // If the page doesn't have any swap space yet, allocate it. 631 if (newSlot) { 632 AutoLocker<VMCache> locker(this); 633 if (fAllocatedSwapSize + B_PAGE_SIZE > fCommittedSwapSize) { 634 _callback->IOFinished(B_ERROR, true, 0); 635 return B_ERROR; 636 } 637 638 fAllocatedSwapSize += B_PAGE_SIZE; 639 640 slotIndex = swap_slot_alloc(1); 641 } 642 643 // create our callback 644 WriteCallback* callback = (flags & B_VIP_IO_REQUEST) != 0 645 ? new(malloc_flags(HEAP_PRIORITY_VIP)) WriteCallback(this, _callback) 646 : new(std::nothrow) WriteCallback(this, _callback); 647 if (callback == NULL) { 648 if (newSlot) { 649 AutoLocker<VMCache> locker(this); 650 fAllocatedSwapSize -= B_PAGE_SIZE; 651 locker.Unlock(); 652 653 swap_slot_dealloc(slotIndex, 1); 654 } 655 _callback->IOFinished(B_NO_MEMORY, true, 0); 656 return B_NO_MEMORY; 657 } 658 // TODO: If the page already had swap space assigned, we don't need an own 659 // callback. 660 661 callback->SetTo(pageIndex, slotIndex, newSlot); 662 663 T(WritePage(this, pageIndex, slotIndex)); 664 665 // write the page asynchrounously 666 swap_file* swapFile = find_swap_file(slotIndex); 667 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 668 669 return vfs_asynchronous_write_pages(swapFile->vnode, swapFile->cookie, pos, 670 vecs, 1, numBytes, flags, callback); 671 } 672 673 674 bool 675 VMAnonymousCache::CanWritePage(off_t offset) 676 { 677 // We can write the page, if we have not used all of our committed swap 678 // space or the page already has a swap slot assigned. 679 return fAllocatedSwapSize < fCommittedSwapSize 680 || _SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE; 681 } 682 683 684 int32 685 VMAnonymousCache::MaxPagesPerAsyncWrite() const 686 { 687 return 1; 688 } 689 690 691 status_t 692 VMAnonymousCache::Fault(struct VMAddressSpace* aspace, off_t offset) 693 { 694 if (fCanOvercommit && LookupPage(offset) == NULL && !HasPage(offset)) { 695 if (fGuardedSize > 0) { 696 uint32 guardOffset; 697 698 #ifdef STACK_GROWS_DOWNWARDS 699 guardOffset = 0; 700 #elif defined(STACK_GROWS_UPWARDS) 701 guardOffset = virtual_size - fGuardedSize; 702 #else 703 # error Stack direction has not been defined in arch_config.h 704 #endif 705 706 // report stack fault, guard page hit! 707 if (offset >= guardOffset && offset < guardOffset + fGuardedSize) { 708 TRACE(("stack overflow!\n")); 709 return B_BAD_ADDRESS; 710 } 711 } 712 713 if (fPrecommittedPages == 0) { 714 // try to commit additional swap space/memory 715 if (swap_space_reserve(B_PAGE_SIZE) == B_PAGE_SIZE) { 716 fCommittedSwapSize += B_PAGE_SIZE; 717 } else { 718 int priority = aspace == VMAddressSpace::Kernel() 719 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 720 if (vm_try_reserve_memory(B_PAGE_SIZE, priority, 0) != B_OK) 721 return B_NO_MEMORY; 722 } 723 724 committed_size += B_PAGE_SIZE; 725 } else 726 fPrecommittedPages--; 727 } 728 729 // This will cause vm_soft_fault() to handle the fault 730 return B_BAD_HANDLER; 731 } 732 733 734 void 735 VMAnonymousCache::Merge(VMCache* _source) 736 { 737 VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source); 738 if (source == NULL) { 739 panic("VMAnonymousCache::MergeStore(): merge with incompatible cache " 740 "%p requested", _source); 741 return; 742 } 743 744 // take over the source' committed size 745 fCommittedSwapSize += source->fCommittedSwapSize; 746 source->fCommittedSwapSize = 0; 747 committed_size += source->committed_size; 748 source->committed_size = 0; 749 750 off_t actualSize = virtual_end - virtual_base; 751 if (committed_size > actualSize) 752 _Commit(actualSize, VM_PRIORITY_USER); 753 754 // Move all not shadowed swap pages from the source to the consumer cache. 755 // Also remove all source pages that are shadowed by consumer swap pages. 756 _MergeSwapPages(source); 757 758 // Move all not shadowed pages from the source to the consumer cache. 759 if (source->page_count < page_count) 760 _MergePagesSmallerSource(source); 761 else 762 _MergePagesSmallerConsumer(source); 763 } 764 765 766 void 767 VMAnonymousCache::_SwapBlockBuild(off_t startPageIndex, 768 swap_addr_t startSlotIndex, uint32 count) 769 { 770 WriteLocker locker(sSwapHashLock); 771 772 uint32 left = count; 773 for (uint32 i = 0, j = 0; i < count; i += j) { 774 off_t pageIndex = startPageIndex + i; 775 swap_addr_t slotIndex = startSlotIndex + i; 776 777 swap_hash_key key = { this, pageIndex }; 778 779 swap_block* swap = sSwapHashTable.Lookup(key); 780 while (swap == NULL) { 781 swap = (swap_block*)object_cache_alloc(sSwapBlockCache, 782 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 783 if (swap == NULL) { 784 // Wait a short time until memory is available again. 785 locker.Unlock(); 786 snooze(10000); 787 locker.Lock(); 788 swap = sSwapHashTable.Lookup(key); 789 continue; 790 } 791 792 swap->key.cache = this; 793 swap->key.page_index = pageIndex & ~(off_t)SWAP_BLOCK_MASK; 794 swap->used = 0; 795 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) 796 swap->swap_slots[i] = SWAP_SLOT_NONE; 797 798 sSwapHashTable.InsertUnchecked(swap); 799 } 800 801 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 802 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 803 swap->swap_slots[blockIndex++] = slotIndex + j; 804 left--; 805 } 806 807 swap->used += j; 808 } 809 } 810 811 812 void 813 VMAnonymousCache::_SwapBlockFree(off_t startPageIndex, uint32 count) 814 { 815 WriteLocker locker(sSwapHashLock); 816 817 uint32 left = count; 818 for (uint32 i = 0, j = 0; i < count; i += j) { 819 off_t pageIndex = startPageIndex + i; 820 swap_hash_key key = { this, pageIndex }; 821 swap_block* swap = sSwapHashTable.Lookup(key); 822 823 ASSERT(swap != NULL); 824 825 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 826 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 827 swap->swap_slots[blockIndex++] = SWAP_SLOT_NONE; 828 left--; 829 } 830 831 swap->used -= j; 832 if (swap->used == 0) { 833 sSwapHashTable.RemoveUnchecked(swap); 834 object_cache_free(sSwapBlockCache, swap, 835 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 836 } 837 } 838 } 839 840 841 swap_addr_t 842 VMAnonymousCache::_SwapBlockGetAddress(off_t pageIndex) 843 { 844 ReadLocker locker(sSwapHashLock); 845 846 swap_hash_key key = { this, pageIndex }; 847 swap_block* swap = sSwapHashTable.Lookup(key); 848 swap_addr_t slotIndex = SWAP_SLOT_NONE; 849 850 if (swap != NULL) { 851 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 852 slotIndex = swap->swap_slots[blockIndex]; 853 } 854 855 return slotIndex; 856 } 857 858 859 status_t 860 VMAnonymousCache::_Commit(off_t size, int priority) 861 { 862 TRACE("%p->VMAnonymousCache::_Commit(%lld), already committed: %lld " 863 "(%lld swap)\n", this, size, committed_size, fCommittedSwapSize); 864 865 // Basic strategy: reserve swap space first, only when running out of swap 866 // space, reserve real memory. 867 868 off_t committedMemory = committed_size - fCommittedSwapSize; 869 870 // Regardless of whether we're asked to grow or shrink the commitment, 871 // we always try to reserve as much as possible of the final commitment 872 // in the swap space. 873 if (size > fCommittedSwapSize) { 874 fCommittedSwapSize += swap_space_reserve(size - fCommittedSwapSize); 875 committed_size = fCommittedSwapSize + committedMemory; 876 if (size > fCommittedSwapSize) { 877 TRACE("%p->VMAnonymousCache::_Commit(%lld), reserved only %lld " 878 "swap\n", this, size, fCommittedSwapSize); 879 } 880 } 881 882 if (committed_size == size) 883 return B_OK; 884 885 if (committed_size > size) { 886 // The commitment shrinks -- unreserve real memory first. 887 off_t toUnreserve = committed_size - size; 888 if (committedMemory > 0) { 889 off_t unreserved = min_c(toUnreserve, committedMemory); 890 vm_unreserve_memory(unreserved); 891 committedMemory -= unreserved; 892 committed_size -= unreserved; 893 toUnreserve -= unreserved; 894 } 895 896 // Unreserve swap space. 897 if (toUnreserve > 0) { 898 swap_space_unreserve(toUnreserve); 899 fCommittedSwapSize -= toUnreserve; 900 committed_size -= toUnreserve; 901 } 902 903 return B_OK; 904 } 905 906 // The commitment grows -- we have already tried to reserve swap space at 907 // the start of the method, so we try to reserve real memory, now. 908 909 off_t toReserve = size - committed_size; 910 if (vm_try_reserve_memory(toReserve, priority, 1000000) != B_OK) { 911 dprintf("%p->VMAnonymousCache::_Commit(%lld): Failed to reserve %lld " 912 "bytes of RAM\n", this, size, toReserve); 913 return B_NO_MEMORY; 914 } 915 916 committed_size = size; 917 return B_OK; 918 } 919 920 921 void 922 VMAnonymousCache::_MergePagesSmallerSource(VMAnonymousCache* source) 923 { 924 // The source cache has less pages than the consumer (this cache), so we 925 // iterate through the source's pages and move the ones that are not 926 // shadowed up to the consumer. 927 928 for (VMCachePagesTree::Iterator it = source->pages.GetIterator(); 929 vm_page* page = it.Next();) { 930 // Note: Removing the current node while iterating through a 931 // IteratableSplayTree is safe. 932 vm_page* consumerPage = LookupPage( 933 (off_t)page->cache_offset << PAGE_SHIFT); 934 if (consumerPage == NULL) { 935 // the page is not yet in the consumer cache - move it upwards 936 ASSERT_PRINT(!page->busy, "page: %p", page); 937 source->RemovePage(page); 938 InsertPage(page, (off_t)page->cache_offset << PAGE_SHIFT); 939 } 940 } 941 } 942 943 944 void 945 VMAnonymousCache::_MergePagesSmallerConsumer(VMAnonymousCache* source) 946 { 947 // The consumer (this cache) has less pages than the source, so we move the 948 // consumer's pages to the source (freeing shadowed ones) and finally just 949 // all pages of the source back to the consumer. 950 951 for (VMCachePagesTree::Iterator it = pages.GetIterator(); 952 vm_page* page = it.Next();) { 953 // If a source page is in the way, remove and free it. 954 vm_page* sourcePage = source->LookupPage( 955 (off_t)page->cache_offset << PAGE_SHIFT); 956 if (sourcePage != NULL) { 957 DEBUG_PAGE_ACCESS_START(sourcePage); 958 ASSERT_PRINT(!sourcePage->busy, "page: %p", sourcePage); 959 source->RemovePage(sourcePage); 960 vm_page_free(source, sourcePage); 961 } 962 963 // Note: Removing the current node while iterating through a 964 // IteratableSplayTree is safe. 965 source->MovePage(page); 966 } 967 968 MoveAllPages(source); 969 } 970 971 972 void 973 VMAnonymousCache::_MergeSwapPages(VMAnonymousCache* source) 974 { 975 // If neither source nor consumer have swap pages, we don't have to do 976 // anything. 977 if (source->fAllocatedSwapSize == 0 && fAllocatedSwapSize == 0) 978 return; 979 980 for (off_t offset = source->virtual_base 981 & ~(off_t)(B_PAGE_SIZE * SWAP_BLOCK_PAGES - 1); 982 offset < source->virtual_end; 983 offset += B_PAGE_SIZE * SWAP_BLOCK_PAGES) { 984 985 WriteLocker locker(sSwapHashLock); 986 987 page_num_t swapBlockPageIndex = offset >> PAGE_SHIFT; 988 swap_hash_key key = { source, swapBlockPageIndex }; 989 swap_block* sourceSwapBlock = sSwapHashTable.Lookup(key); 990 991 // remove the source swap block -- we will either take over the swap 992 // space (and the block) or free it 993 if (sourceSwapBlock != NULL) 994 sSwapHashTable.RemoveUnchecked(sourceSwapBlock); 995 996 key.cache = this; 997 swap_block* swapBlock = sSwapHashTable.Lookup(key); 998 999 locker.Unlock(); 1000 1001 // remove all source pages that are shadowed by consumer swap pages 1002 if (swapBlock != NULL) { 1003 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1004 if (swapBlock->swap_slots[i] != SWAP_SLOT_NONE) { 1005 vm_page* page = source->LookupPage( 1006 (off_t)(swapBlockPageIndex + i) << PAGE_SHIFT); 1007 if (page != NULL) { 1008 DEBUG_PAGE_ACCESS_START(page); 1009 ASSERT_PRINT(!page->busy, "page: %p", page); 1010 source->RemovePage(page); 1011 vm_page_free(source, page); 1012 } 1013 } 1014 } 1015 } 1016 1017 if (sourceSwapBlock == NULL) 1018 continue; 1019 1020 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1021 off_t pageIndex = swapBlockPageIndex + i; 1022 swap_addr_t sourceSlotIndex = sourceSwapBlock->swap_slots[i]; 1023 1024 if (sourceSlotIndex == SWAP_SLOT_NONE) 1025 continue; 1026 1027 if ((swapBlock != NULL 1028 && swapBlock->swap_slots[i] != SWAP_SLOT_NONE) 1029 || LookupPage((off_t)pageIndex << PAGE_SHIFT) != NULL) { 1030 // The consumer already has a page or a swapped out page 1031 // at this index. So we can free the source swap space. 1032 swap_slot_dealloc(sourceSlotIndex, 1); 1033 sourceSwapBlock->swap_slots[i] = SWAP_SLOT_NONE; 1034 sourceSwapBlock->used--; 1035 } 1036 1037 // We've either freed the source swap page or are going to move it 1038 // to the consumer. At any rate, the source cache doesn't own it 1039 // anymore. 1040 source->fAllocatedSwapSize -= B_PAGE_SIZE; 1041 } 1042 1043 // All source swap pages that have not been freed yet are taken over by 1044 // the consumer. 1045 fAllocatedSwapSize += B_PAGE_SIZE * (off_t)sourceSwapBlock->used; 1046 1047 if (sourceSwapBlock->used == 0) { 1048 // All swap pages have been freed -- we can discard the source swap 1049 // block. 1050 object_cache_free(sSwapBlockCache, sourceSwapBlock, 1051 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1052 } else if (swapBlock == NULL) { 1053 // We need to take over some of the source's swap pages and there's 1054 // no swap block in the consumer cache. Just take over the source 1055 // swap block. 1056 sourceSwapBlock->key.cache = this; 1057 locker.Lock(); 1058 sSwapHashTable.InsertUnchecked(sourceSwapBlock); 1059 locker.Unlock(); 1060 } else { 1061 // We need to take over some of the source's swap pages and there's 1062 // already a swap block in the consumer cache. Copy the respective 1063 // swap addresses and discard the source swap block. 1064 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 1065 if (sourceSwapBlock->swap_slots[i] != SWAP_SLOT_NONE) 1066 swapBlock->swap_slots[i] = sourceSwapBlock->swap_slots[i]; 1067 } 1068 1069 object_cache_free(sSwapBlockCache, sourceSwapBlock, 1070 CACHE_DONT_WAIT_FOR_MEMORY | CACHE_DONT_LOCK_KERNEL_SPACE); 1071 } 1072 } 1073 } 1074 1075 1076 // #pragma mark - 1077 1078 1079 status_t 1080 swap_file_add(const char* path) 1081 { 1082 // open the file 1083 int fd = open(path, O_RDWR | O_NOCACHE, S_IRUSR | S_IWUSR); 1084 if (fd < 0) 1085 return errno; 1086 1087 // fstat() it and check whether we can use it 1088 struct stat st; 1089 if (fstat(fd, &st) < 0) { 1090 close(fd); 1091 return errno; 1092 } 1093 1094 if (!(S_ISREG(st.st_mode) || S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) { 1095 close(fd); 1096 return B_BAD_VALUE; 1097 } 1098 1099 if (st.st_size < B_PAGE_SIZE) { 1100 close(fd); 1101 return B_BAD_VALUE; 1102 } 1103 1104 // get file descriptor, vnode, and cookie 1105 file_descriptor* descriptor = get_fd(get_current_io_context(true), fd); 1106 put_fd(descriptor); 1107 1108 vnode* node = fd_vnode(descriptor); 1109 if (node == NULL) { 1110 close(fd); 1111 return B_BAD_VALUE; 1112 } 1113 1114 // do the allocations and prepare the swap_file structure 1115 swap_file* swap = (swap_file*)malloc(sizeof(swap_file)); 1116 if (swap == NULL) { 1117 close(fd); 1118 return B_NO_MEMORY; 1119 } 1120 1121 swap->fd = fd; 1122 swap->vnode = node; 1123 swap->cookie = descriptor->cookie; 1124 1125 uint32 pageCount = st.st_size >> PAGE_SHIFT; 1126 swap->bmp = radix_bitmap_create(pageCount); 1127 if (swap->bmp == NULL) { 1128 free(swap); 1129 close(fd); 1130 return B_NO_MEMORY; 1131 } 1132 1133 // set slot index and add this file to swap file list 1134 mutex_lock(&sSwapFileListLock); 1135 // TODO: Also check whether the swap file is already registered! 1136 if (sSwapFileList.IsEmpty()) { 1137 swap->first_slot = 0; 1138 swap->last_slot = pageCount; 1139 } else { 1140 // leave one page gap between two swap files 1141 swap->first_slot = sSwapFileList.Last()->last_slot + 1; 1142 swap->last_slot = swap->first_slot + pageCount; 1143 } 1144 sSwapFileList.Add(swap); 1145 sSwapFileCount++; 1146 mutex_unlock(&sSwapFileListLock); 1147 1148 mutex_lock(&sAvailSwapSpaceLock); 1149 sAvailSwapSpace += (off_t)pageCount * B_PAGE_SIZE; 1150 mutex_unlock(&sAvailSwapSpaceLock); 1151 1152 return B_OK; 1153 } 1154 1155 1156 status_t 1157 swap_file_delete(const char* path) 1158 { 1159 vnode* node = NULL; 1160 status_t status = vfs_get_vnode_from_path(path, true, &node); 1161 if (status != B_OK) 1162 return status; 1163 1164 MutexLocker locker(sSwapFileListLock); 1165 1166 swap_file* swapFile = NULL; 1167 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1168 (swapFile = it.Next()) != NULL;) { 1169 if (swapFile->vnode == node) 1170 break; 1171 } 1172 1173 vfs_put_vnode(node); 1174 1175 if (swapFile == NULL) 1176 return B_ERROR; 1177 1178 // if this file is currently used, we can't delete 1179 // TODO: mark this swap file deleting, and remove it after releasing 1180 // all the swap space 1181 if (swapFile->bmp->free_slots < swapFile->last_slot - swapFile->first_slot) 1182 return B_ERROR; 1183 1184 sSwapFileList.Remove(swapFile); 1185 sSwapFileCount--; 1186 locker.Unlock(); 1187 1188 mutex_lock(&sAvailSwapSpaceLock); 1189 sAvailSwapSpace -= (off_t)(swapFile->last_slot - swapFile->first_slot) 1190 * PAGE_SIZE; 1191 mutex_unlock(&sAvailSwapSpaceLock); 1192 1193 close(swapFile->fd); 1194 radix_bitmap_destroy(swapFile->bmp); 1195 free(swapFile); 1196 1197 return B_OK; 1198 } 1199 1200 1201 void 1202 swap_init(void) 1203 { 1204 // create swap block cache 1205 sSwapBlockCache = create_object_cache("swapblock", 1206 sizeof(swap_block), sizeof(void*), NULL, NULL, NULL); 1207 if (sSwapBlockCache == NULL) 1208 panic("swap_init(): can't create object cache for swap blocks\n"); 1209 1210 status_t error = object_cache_set_minimum_reserve(sSwapBlockCache, 1211 MIN_SWAP_BLOCK_RESERVE); 1212 if (error != B_OK) { 1213 panic("swap_init(): object_cache_set_minimum_reserve() failed: %s", 1214 strerror(error)); 1215 } 1216 1217 // init swap hash table 1218 sSwapHashTable.Init(INITIAL_SWAP_HASH_SIZE); 1219 rw_lock_init(&sSwapHashLock, "swaphash"); 1220 1221 error = register_resource_resizer(swap_hash_resizer, NULL, 1222 SWAP_HASH_RESIZE_INTERVAL); 1223 if (error != B_OK) { 1224 panic("swap_init(): Failed to register swap hash resizer: %s", 1225 strerror(error)); 1226 } 1227 1228 // init swap file list 1229 mutex_init(&sSwapFileListLock, "swaplist"); 1230 sSwapFileAlloc = NULL; 1231 sSwapFileCount = 0; 1232 1233 // init available swap space 1234 mutex_init(&sAvailSwapSpaceLock, "avail swap space"); 1235 sAvailSwapSpace = 0; 1236 1237 add_debugger_command_etc("swap", &dump_swap_info, 1238 "Print infos about the swap usage", 1239 "\n" 1240 "Print infos about the swap usage.\n", 0); 1241 } 1242 1243 1244 void 1245 swap_init_post_modules() 1246 { 1247 // Never try to create a swap file on a read-only device - when booting 1248 // from CD, the write overlay is used. 1249 if (gReadOnlyBootDevice) 1250 return; 1251 1252 off_t size = 0; 1253 1254 void* settings = load_driver_settings("virtual_memory"); 1255 if (settings != NULL) { 1256 if (!get_driver_boolean_parameter(settings, "vm", false, false)) 1257 return; 1258 1259 const char* string = get_driver_parameter(settings, "swap_size", NULL, 1260 NULL); 1261 size = string ? atoll(string) : 0; 1262 1263 unload_driver_settings(settings); 1264 } else 1265 size = (off_t)vm_page_num_pages() * B_PAGE_SIZE * 2; 1266 1267 if (size < B_PAGE_SIZE) 1268 return; 1269 1270 int fd = open("/var/swap", O_RDWR | O_CREAT | O_NOCACHE, S_IRUSR | S_IWUSR); 1271 if (fd < 0) { 1272 dprintf("Can't open/create /var/swap: %s\n", strerror(errno)); 1273 return; 1274 } 1275 1276 struct stat stat; 1277 stat.st_size = size; 1278 status_t error = _kern_write_stat(fd, NULL, false, &stat, 1279 sizeof(struct stat), B_STAT_SIZE | B_STAT_SIZE_INSECURE); 1280 if (error != B_OK) { 1281 dprintf("Failed to resize /var/swap to %lld bytes: %s\n", size, 1282 strerror(error)); 1283 } 1284 1285 close(fd); 1286 1287 error = swap_file_add("/var/swap"); 1288 if (error != B_OK) 1289 dprintf("Failed to add swap file /var/swap: %s\n", strerror(error)); 1290 } 1291 1292 1293 //! Used by page daemon to free swap space. 1294 bool 1295 swap_free_page_swap_space(vm_page* page) 1296 { 1297 VMAnonymousCache* cache = dynamic_cast<VMAnonymousCache*>(page->Cache()); 1298 if (cache == NULL) 1299 return false; 1300 1301 swap_addr_t slotIndex = cache->_SwapBlockGetAddress(page->cache_offset); 1302 if (slotIndex == SWAP_SLOT_NONE) 1303 return false; 1304 1305 swap_slot_dealloc(slotIndex, 1); 1306 cache->fAllocatedSwapSize -= B_PAGE_SIZE; 1307 cache->_SwapBlockFree(page->cache_offset, 1); 1308 1309 return true; 1310 } 1311 1312 1313 uint32 1314 swap_available_pages() 1315 { 1316 mutex_lock(&sAvailSwapSpaceLock); 1317 uint32 avail = sAvailSwapSpace >> PAGE_SHIFT; 1318 mutex_unlock(&sAvailSwapSpaceLock); 1319 1320 return avail; 1321 } 1322 1323 1324 uint32 1325 swap_total_swap_pages() 1326 { 1327 mutex_lock(&sSwapFileListLock); 1328 1329 uint32 totalSwapSlots = 0; 1330 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1331 swap_file* swapFile = it.Next();) 1332 totalSwapSlots += swapFile->last_slot - swapFile->first_slot; 1333 1334 mutex_unlock(&sSwapFileListLock); 1335 1336 return totalSwapSlots; 1337 } 1338 1339 #endif // ENABLE_SWAP_SUPPORT 1340 1341 void 1342 swap_get_info(struct system_memory_info* info) 1343 { 1344 #if ENABLE_SWAP_SUPPORT 1345 info->max_swap_space = (uint64)swap_total_swap_pages() * B_PAGE_SIZE; 1346 info->free_swap_space = (uint64)swap_available_pages() * B_PAGE_SIZE; 1347 #else 1348 info->max_swap_space = 0; 1349 info->free_swap_space = 0; 1350 #endif 1351 } 1352 1353