1 /* 2 * Copyright 2008, Zhao Shuai, upczhsh@163.com. 3 * Copyright 2008-2009, Ingo Weinhold, ingo_weinhold@gmx.de. 4 * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de. 5 * Distributed under the terms of the MIT License. 6 * 7 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 8 * Distributed under the terms of the NewOS License. 9 */ 10 11 #include "VMAnonymousCache.h" 12 13 #include <errno.h> 14 #include <fcntl.h> 15 #include <stdlib.h> 16 #include <string.h> 17 #include <unistd.h> 18 19 #include <KernelExport.h> 20 #include <NodeMonitor.h> 21 22 #include <arch_config.h> 23 #include <boot_device.h> 24 #include <driver_settings.h> 25 #include <fs/fd.h> 26 #include <fs_interface.h> 27 #include <heap.h> 28 #include <kernel_daemon.h> 29 #include <slab/Slab.h> 30 #include <syscalls.h> 31 #include <system_info.h> 32 #include <tracing.h> 33 #include <util/AutoLock.h> 34 #include <util/DoublyLinkedList.h> 35 #include <util/OpenHashTable.h> 36 #include <util/RadixBitmap.h> 37 #include <vfs.h> 38 #include <vm.h> 39 #include <vm_page.h> 40 #include <vm_priv.h> 41 42 #include "IORequest.h" 43 44 45 #if ENABLE_SWAP_SUPPORT 46 47 //#define TRACE_VM_ANONYMOUS_CACHE 48 #ifdef TRACE_VM_ANONYMOUS_CACHE 49 # define TRACE(x...) dprintf(x) 50 #else 51 # define TRACE(x...) do { } while (false) 52 #endif 53 54 55 // number of free swap blocks the object cache shall minimally have 56 #define MIN_SWAP_BLOCK_RESERVE 4096 57 58 // interval the has resizer is triggered (in 0.1s) 59 #define SWAP_HASH_RESIZE_INTERVAL 5 60 61 #define INITIAL_SWAP_HASH_SIZE 1024 62 63 #define SWAP_BLOCK_PAGES 32 64 #define SWAP_BLOCK_SHIFT 5 /* 1 << SWAP_BLOCK_SHIFT == SWAP_BLOCK_PAGES */ 65 #define SWAP_BLOCK_MASK (SWAP_BLOCK_PAGES - 1) 66 67 struct swap_file : DoublyLinkedListLinkImpl<swap_file> { 68 int fd; 69 struct vnode *vnode; 70 void *cookie; 71 swap_addr_t first_slot; 72 swap_addr_t last_slot; 73 radix_bitmap *bmp; 74 }; 75 76 struct swap_hash_key { 77 VMAnonymousCache *cache; 78 off_t page_index; // page index in the cache 79 }; 80 81 // Each swap block contains swap address information for 82 // SWAP_BLOCK_PAGES continuous pages from the same cache 83 struct swap_block { 84 swap_block* hash_link; 85 swap_hash_key key; 86 uint32 used; 87 swap_addr_t swap_slots[SWAP_BLOCK_PAGES]; 88 }; 89 90 struct SwapHashTableDefinition { 91 typedef swap_hash_key KeyType; 92 typedef swap_block ValueType; 93 94 SwapHashTableDefinition() {} 95 96 size_t HashKey(const swap_hash_key& key) const 97 { 98 off_t blockIndex = key.page_index >> SWAP_BLOCK_SHIFT; 99 VMAnonymousCache *cache = key.cache; 100 return blockIndex ^ (int)(int *)cache; 101 } 102 103 size_t Hash(const swap_block *value) const 104 { 105 return HashKey(value->key); 106 } 107 108 bool Compare(const swap_hash_key& key, const swap_block *value) const 109 { 110 return (key.page_index & ~(off_t)SWAP_BLOCK_MASK) 111 == (value->key.page_index & ~(off_t)SWAP_BLOCK_MASK) 112 && key.cache == value->key.cache; 113 } 114 115 swap_block*& GetLink(swap_block *value) const 116 { 117 return value->hash_link; 118 } 119 }; 120 121 typedef BOpenHashTable<SwapHashTableDefinition> SwapHashTable; 122 typedef DoublyLinkedList<swap_file> SwapFileList; 123 124 static SwapHashTable sSwapHashTable; 125 static rw_lock sSwapHashLock; 126 127 static SwapFileList sSwapFileList; 128 static mutex sSwapFileListLock; 129 static swap_file *sSwapFileAlloc = NULL; // allocate from here 130 static uint32 sSwapFileCount = 0; 131 132 static off_t sAvailSwapSpace = 0; 133 static mutex sAvailSwapSpaceLock; 134 135 static object_cache *sSwapBlockCache; 136 137 138 #if SWAP_TRACING 139 namespace SwapTracing { 140 141 class SwapTraceEntry : public AbstractTraceEntry { 142 public: 143 SwapTraceEntry(VMAnonymousCache* cache) 144 : 145 fCache(cache) 146 { 147 } 148 149 protected: 150 VMAnonymousCache* fCache; 151 }; 152 153 154 class ReadPage : public SwapTraceEntry { 155 public: 156 ReadPage(VMAnonymousCache* cache, page_num_t pageIndex, 157 swap_addr_t swapSlotIndex) 158 : 159 SwapTraceEntry(cache), 160 fPageIndex(pageIndex), 161 fSwapSlotIndex(swapSlotIndex) 162 { 163 Initialized(); 164 } 165 166 virtual void AddDump(TraceOutput& out) 167 { 168 out.Print("swap read: cache %p, page index: %lu <- swap slot: %lu", 169 fCache, fPageIndex, fSwapSlotIndex); 170 } 171 172 private: 173 page_num_t fPageIndex; 174 swap_addr_t fSwapSlotIndex; 175 }; 176 177 178 class WritePage : public SwapTraceEntry { 179 public: 180 WritePage(VMAnonymousCache* cache, page_num_t pageIndex, 181 swap_addr_t swapSlotIndex) 182 : 183 SwapTraceEntry(cache), 184 fPageIndex(pageIndex), 185 fSwapSlotIndex(swapSlotIndex) 186 { 187 Initialized(); 188 } 189 190 virtual void AddDump(TraceOutput& out) 191 { 192 out.Print("swap write: cache %p, page index: %lu -> swap slot: %lu", 193 fCache, fPageIndex, fSwapSlotIndex); 194 } 195 196 private: 197 page_num_t fPageIndex; 198 swap_addr_t fSwapSlotIndex; 199 }; 200 201 } // namespace SwapTracing 202 203 # define T(x) new(std::nothrow) SwapTracing::x; 204 #else 205 # define T(x) ; 206 #endif 207 208 209 static int 210 dump_swap_info(int argc, char** argv) 211 { 212 swap_addr_t totalSwapPages = 0; 213 swap_addr_t freeSwapPages = 0; 214 215 kprintf("swap files:\n"); 216 217 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 218 swap_file* file = it.Next();) { 219 swap_addr_t total = file->last_slot - file->first_slot; 220 kprintf(" vnode: %p, pages: total: %lu, free: %lu\n", 221 file->vnode, total, file->bmp->free_slots); 222 223 totalSwapPages += total; 224 freeSwapPages += file->bmp->free_slots; 225 } 226 227 kprintf("\n"); 228 kprintf("swap space in pages:\n"); 229 kprintf("total: %9lu\n", totalSwapPages); 230 kprintf("available: %9llu\n", sAvailSwapSpace / B_PAGE_SIZE); 231 kprintf("reserved: %9llu\n", 232 totalSwapPages - sAvailSwapSpace / B_PAGE_SIZE); 233 kprintf("used: %9lu\n", totalSwapPages - freeSwapPages); 234 kprintf("free: %9lu\n", freeSwapPages); 235 236 return 0; 237 } 238 239 240 static swap_addr_t 241 swap_slot_alloc(uint32 count) 242 { 243 mutex_lock(&sSwapFileListLock); 244 245 if (sSwapFileList.IsEmpty()) { 246 mutex_unlock(&sSwapFileListLock); 247 panic("swap_slot_alloc(): no swap file in the system\n"); 248 return SWAP_SLOT_NONE; 249 } 250 251 // since radix bitmap could not handle more than 32 pages, we return 252 // SWAP_SLOT_NONE, this forces Write() adjust allocation amount 253 if (count > BITMAP_RADIX) { 254 mutex_unlock(&sSwapFileListLock); 255 return SWAP_SLOT_NONE; 256 } 257 258 swap_addr_t j, addr = SWAP_SLOT_NONE; 259 for (j = 0; j < sSwapFileCount; j++) { 260 if (sSwapFileAlloc == NULL) 261 sSwapFileAlloc = sSwapFileList.First(); 262 263 addr = radix_bitmap_alloc(sSwapFileAlloc->bmp, count); 264 if (addr != SWAP_SLOT_NONE) { 265 addr += sSwapFileAlloc->first_slot; 266 break; 267 } 268 269 // this swap_file is full, find another 270 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 271 } 272 273 if (j == sSwapFileCount) { 274 mutex_unlock(&sSwapFileListLock); 275 panic("swap_slot_alloc: swap space exhausted!\n"); 276 return SWAP_SLOT_NONE; 277 } 278 279 // if this swap file has used more than 90% percent of its space 280 // switch to another 281 if (sSwapFileAlloc->bmp->free_slots 282 < (sSwapFileAlloc->last_slot - sSwapFileAlloc->first_slot) / 10) 283 sSwapFileAlloc = sSwapFileList.GetNext(sSwapFileAlloc); 284 285 mutex_unlock(&sSwapFileListLock); 286 287 return addr; 288 } 289 290 291 static swap_file * 292 find_swap_file(swap_addr_t slotIndex) 293 { 294 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 295 swap_file *swapFile = it.Next();) { 296 if (slotIndex >= swapFile->first_slot 297 && slotIndex < swapFile->last_slot) 298 return swapFile; 299 } 300 301 panic("find_swap_file(): can't find swap file for slot %ld\n", slotIndex); 302 return NULL; 303 } 304 305 306 static void 307 swap_slot_dealloc(swap_addr_t slotIndex, uint32 count) 308 { 309 if (slotIndex == SWAP_SLOT_NONE) 310 return; 311 312 mutex_lock(&sSwapFileListLock); 313 swap_file *swapFile = find_swap_file(slotIndex); 314 slotIndex -= swapFile->first_slot; 315 radix_bitmap_dealloc(swapFile->bmp, slotIndex, count); 316 mutex_unlock(&sSwapFileListLock); 317 } 318 319 320 static off_t 321 swap_space_reserve(off_t amount) 322 { 323 mutex_lock(&sAvailSwapSpaceLock); 324 if (sAvailSwapSpace >= amount) 325 sAvailSwapSpace -= amount; 326 else { 327 amount = sAvailSwapSpace; 328 sAvailSwapSpace = 0; 329 } 330 mutex_unlock(&sAvailSwapSpaceLock); 331 332 return amount; 333 } 334 335 336 static void 337 swap_space_unreserve(off_t amount) 338 { 339 mutex_lock(&sAvailSwapSpaceLock); 340 sAvailSwapSpace += amount; 341 mutex_unlock(&sAvailSwapSpaceLock); 342 } 343 344 345 static void 346 swap_hash_resizer(void*, int) 347 { 348 WriteLocker locker(sSwapHashLock); 349 350 size_t size; 351 void* allocation; 352 353 do { 354 size = sSwapHashTable.ResizeNeeded(); 355 if (size == 0) 356 return; 357 358 locker.Unlock(); 359 360 allocation = malloc(size); 361 if (allocation == NULL) 362 return; 363 364 locker.Lock(); 365 366 } while (!sSwapHashTable.Resize(allocation, size)); 367 } 368 369 370 // #pragma mark - 371 372 373 class VMAnonymousCache::WriteCallback : public StackableAsyncIOCallback { 374 public: 375 WriteCallback(VMAnonymousCache* cache, AsyncIOCallback* callback) 376 : 377 StackableAsyncIOCallback(callback), 378 fCache(cache) 379 { 380 } 381 382 void SetTo(page_num_t pageIndex, swap_addr_t slotIndex, bool newSlot) 383 { 384 fPageIndex = pageIndex; 385 fSlotIndex = slotIndex; 386 fNewSlot = newSlot; 387 } 388 389 virtual void IOFinished(status_t status, bool partialTransfer, 390 size_t bytesTransferred) 391 { 392 if (fNewSlot) { 393 if (status == B_OK) { 394 fCache->_SwapBlockBuild(fPageIndex, fSlotIndex, 1); 395 } else { 396 AutoLocker<VMCache> locker(fCache); 397 fCache->fAllocatedSwapSize -= B_PAGE_SIZE; 398 locker.Unlock(); 399 400 swap_slot_dealloc(fSlotIndex, 1); 401 } 402 } 403 404 fNextCallback->IOFinished(status, partialTransfer, bytesTransferred); 405 406 delete this; 407 } 408 409 void operator delete(void* address, size_t size) 410 { 411 io_request_free(address); 412 } 413 414 private: 415 VMAnonymousCache* fCache; 416 page_num_t fPageIndex; 417 swap_addr_t fSlotIndex; 418 bool fNewSlot; 419 }; 420 421 422 // #pragma mark - 423 424 425 VMAnonymousCache::~VMAnonymousCache() 426 { 427 // free allocated swap space and swap block 428 for (off_t offset = virtual_base, toFree = fAllocatedSwapSize; 429 offset < virtual_end && toFree > 0; offset += B_PAGE_SIZE) { 430 swap_addr_t slotIndex = _SwapBlockGetAddress(offset >> PAGE_SHIFT); 431 if (slotIndex == SWAP_SLOT_NONE) 432 continue; 433 434 swap_slot_dealloc(slotIndex, 1); 435 _SwapBlockFree(offset >> PAGE_SHIFT, 1); 436 toFree -= B_PAGE_SIZE; 437 } 438 439 swap_space_unreserve(fCommittedSwapSize); 440 if (committed_size > fCommittedSwapSize) 441 vm_unreserve_memory(committed_size - fCommittedSwapSize); 442 } 443 444 445 status_t 446 VMAnonymousCache::Init(bool canOvercommit, int32 numPrecommittedPages, 447 int32 numGuardPages) 448 { 449 TRACE("%p->VMAnonymousCache::Init(canOvercommit = %s, " 450 "numPrecommittedPages = %ld, numGuardPages = %ld)\n", this, 451 canOvercommit ? "yes" : "no", numPrecommittedPages, numGuardPages); 452 453 status_t error = VMCache::Init(CACHE_TYPE_RAM); 454 if (error != B_OK) 455 return error; 456 457 fCanOvercommit = canOvercommit; 458 fHasPrecommitted = false; 459 fPrecommittedPages = min_c(numPrecommittedPages, 255); 460 fGuardedSize = numGuardPages * B_PAGE_SIZE; 461 fCommittedSwapSize = 0; 462 fAllocatedSwapSize = 0; 463 464 return B_OK; 465 } 466 467 468 status_t 469 VMAnonymousCache::Commit(off_t size) 470 { 471 TRACE("%p->VMAnonymousCache::Commit(%lld)\n", this, size); 472 473 // if we can overcommit, we don't commit here, but in anonymous_fault() 474 if (fCanOvercommit) { 475 if (fHasPrecommitted) 476 return B_OK; 477 478 // pre-commit some pages to make a later failure less probable 479 fHasPrecommitted = true; 480 uint32 precommitted = fPrecommittedPages * B_PAGE_SIZE; 481 if (size > precommitted) 482 size = precommitted; 483 } 484 485 return _Commit(size); 486 } 487 488 489 bool 490 VMAnonymousCache::HasPage(off_t offset) 491 { 492 if (_SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE) 493 return true; 494 495 return false; 496 } 497 498 499 status_t 500 VMAnonymousCache::Read(off_t offset, const iovec *vecs, size_t count, 501 uint32 flags, size_t *_numBytes) 502 { 503 off_t pageIndex = offset >> PAGE_SHIFT; 504 505 for (uint32 i = 0, j = 0; i < count; i = j) { 506 swap_addr_t startSlotIndex = _SwapBlockGetAddress(pageIndex + i); 507 for (j = i + 1; j < count; j++) { 508 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + j); 509 if (slotIndex != startSlotIndex + j - i) 510 break; 511 } 512 513 T(ReadPage(this, pageIndex, startSlotIndex)); 514 // TODO: Assumes that only one page is read. 515 516 swap_file *swapFile = find_swap_file(startSlotIndex); 517 518 off_t pos = (off_t)(startSlotIndex - swapFile->first_slot) 519 * B_PAGE_SIZE; 520 521 status_t status = vfs_read_pages(swapFile->vnode, swapFile->cookie, pos, 522 vecs + i, j - i, flags, _numBytes); 523 if (status != B_OK) 524 return status; 525 } 526 527 return B_OK; 528 } 529 530 531 status_t 532 VMAnonymousCache::Write(off_t offset, const iovec *vecs, size_t count, 533 uint32 flags, size_t *_numBytes) 534 { 535 off_t pageIndex = offset >> PAGE_SHIFT; 536 537 AutoLocker<VMCache> locker(this); 538 539 uint32 totalPages = 0; 540 for (uint32 i = 0; i < count; i++) { 541 uint32 pageCount = (vecs[i].iov_len + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 542 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex + totalPages); 543 if (slotIndex != SWAP_SLOT_NONE) { 544 swap_slot_dealloc(slotIndex, pageCount); 545 _SwapBlockFree(pageIndex + totalPages, pageCount); 546 fAllocatedSwapSize -= pageCount * B_PAGE_SIZE; 547 } 548 549 totalPages += pageCount; 550 } 551 552 off_t totalSize = totalPages * B_PAGE_SIZE; 553 if (fAllocatedSwapSize + totalSize > fCommittedSwapSize) 554 return B_ERROR; 555 556 fAllocatedSwapSize += totalSize; 557 locker.Unlock(); 558 559 uint32 pagesLeft = totalPages; 560 totalPages = 0; 561 562 for (uint32 i = 0; i < count; i++) { 563 uint32 pageCount = (vecs[i].iov_len + B_PAGE_SIZE - 1) >> PAGE_SHIFT; 564 565 void *vectorBase = vecs[i].iov_base; 566 size_t vectorLength = vecs[i].iov_len; 567 uint32 n = pageCount; 568 569 for (uint32 j = 0; j < pageCount; j += n) { 570 swap_addr_t slotIndex; 571 // try to allocate n slots, if fail, try to allocate n/2 572 while ((slotIndex = swap_slot_alloc(n)) == SWAP_SLOT_NONE && n >= 2) 573 n >>= 1; 574 575 if (slotIndex == SWAP_SLOT_NONE) 576 panic("VMAnonymousCache::Write(): can't allocate swap space\n"); 577 578 T(WritePage(this, pageIndex, slotIndex)); 579 // TODO: Assumes that only one page is written. 580 581 swap_file *swapFile = find_swap_file(slotIndex); 582 583 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 584 585 size_t length = n * B_PAGE_SIZE; 586 iovec vector[1]; 587 vector->iov_base = vectorBase; 588 vector->iov_len = length; 589 590 status_t status = vfs_write_pages(swapFile->vnode, swapFile->cookie, 591 pos, vector, 1, flags, &length); 592 if (status != B_OK) { 593 locker.Lock(); 594 fAllocatedSwapSize -= (off_t)pagesLeft * B_PAGE_SIZE; 595 locker.Unlock(); 596 597 swap_slot_dealloc(slotIndex, n); 598 return status; 599 } 600 601 _SwapBlockBuild(pageIndex + totalPages, slotIndex, n); 602 pagesLeft -= n; 603 604 if (n != pageCount) { 605 vectorBase = (void *)((addr_t)vectorBase + n * B_PAGE_SIZE); 606 vectorLength -= n * B_PAGE_SIZE; 607 } 608 } 609 610 totalPages += pageCount; 611 } 612 613 ASSERT(pagesLeft == 0); 614 return B_OK; 615 } 616 617 618 status_t 619 VMAnonymousCache::WriteAsync(off_t offset, const iovec* vecs, size_t count, 620 size_t numBytes, uint32 flags, AsyncIOCallback* _callback) 621 { 622 // TODO: Currently this method is only used for single pages. Either make 623 // more flexible use of it or change the interface! 624 // This implementation relies on the current usage! 625 ASSERT(count == 1); 626 ASSERT(numBytes <= B_PAGE_SIZE); 627 628 page_num_t pageIndex = offset >> PAGE_SHIFT; 629 swap_addr_t slotIndex = _SwapBlockGetAddress(pageIndex); 630 bool newSlot = slotIndex == SWAP_SLOT_NONE; 631 632 // If the page doesn't have any swap space yet, allocate it. 633 if (newSlot) { 634 AutoLocker<VMCache> locker(this); 635 if (fAllocatedSwapSize + B_PAGE_SIZE > fCommittedSwapSize) { 636 _callback->IOFinished(B_ERROR, true, 0); 637 return B_ERROR; 638 } 639 640 fAllocatedSwapSize += B_PAGE_SIZE; 641 642 slotIndex = swap_slot_alloc(1); 643 } 644 645 // create our callback 646 WriteCallback* callback = (flags & B_VIP_IO_REQUEST) != 0 647 ? new(vip_io_alloc) WriteCallback(this, _callback) 648 : new(std::nothrow) WriteCallback(this, _callback); 649 if (callback == NULL) { 650 if (newSlot) { 651 AutoLocker<VMCache> locker(this); 652 fAllocatedSwapSize -= B_PAGE_SIZE; 653 locker.Unlock(); 654 655 swap_slot_dealloc(slotIndex, 1); 656 } 657 _callback->IOFinished(B_NO_MEMORY, true, 0); 658 return B_NO_MEMORY; 659 } 660 // TODO: If the page already had swap space assigned, we don't need an own 661 // callback. 662 663 callback->SetTo(pageIndex, slotIndex, newSlot); 664 665 T(WritePage(this, pageIndex, slotIndex)); 666 667 // write the page asynchrounously 668 swap_file* swapFile = find_swap_file(slotIndex); 669 off_t pos = (off_t)(slotIndex - swapFile->first_slot) * B_PAGE_SIZE; 670 671 return vfs_asynchronous_write_pages(swapFile->vnode, swapFile->cookie, pos, 672 vecs, 1, numBytes, flags, callback); 673 } 674 675 676 bool 677 VMAnonymousCache::CanWritePage(off_t offset) 678 { 679 // We can write the page, if we have not used all of our committed swap 680 // space or the page already has a swap slot assigned. 681 return fAllocatedSwapSize < fCommittedSwapSize 682 || _SwapBlockGetAddress(offset >> PAGE_SHIFT) != SWAP_SLOT_NONE; 683 } 684 685 686 status_t 687 VMAnonymousCache::Fault(struct vm_address_space *aspace, off_t offset) 688 { 689 if (fCanOvercommit && LookupPage(offset) == NULL && !HasPage(offset)) { 690 if (fGuardedSize > 0) { 691 uint32 guardOffset; 692 693 #ifdef STACK_GROWS_DOWNWARDS 694 guardOffset = 0; 695 #elif defined(STACK_GROWS_UPWARDS) 696 guardOffset = virtual_size - fGuardedSize; 697 #else 698 # error Stack direction has not been defined in arch_config.h 699 #endif 700 701 // report stack fault, guard page hit! 702 if (offset >= guardOffset && offset < guardOffset + fGuardedSize) { 703 TRACE(("stack overflow!\n")); 704 return B_BAD_ADDRESS; 705 } 706 } 707 708 if (fPrecommittedPages == 0) { 709 // try to commit additional swap space/memory 710 if (swap_space_reserve(B_PAGE_SIZE) == B_PAGE_SIZE) 711 fCommittedSwapSize += B_PAGE_SIZE; 712 else if (vm_try_reserve_memory(B_PAGE_SIZE, 0) != B_OK) 713 return B_NO_MEMORY; 714 715 committed_size += B_PAGE_SIZE; 716 } else 717 fPrecommittedPages--; 718 } 719 720 // This will cause vm_soft_fault() to handle the fault 721 return B_BAD_HANDLER; 722 } 723 724 725 void 726 VMAnonymousCache::Merge(VMCache* _source) 727 { 728 VMAnonymousCache* source = dynamic_cast<VMAnonymousCache*>(_source); 729 if (source == NULL) { 730 panic("VMAnonymousCache::MergeStore(): merge with incompatible cache " 731 "%p requested", _source); 732 return; 733 } 734 735 // take over the source' committed size 736 fCommittedSwapSize += source->fCommittedSwapSize; 737 source->fCommittedSwapSize = 0; 738 committed_size += source->committed_size; 739 source->committed_size = 0; 740 741 off_t actualSize = virtual_end - virtual_base; 742 if (committed_size > actualSize) 743 _Commit(actualSize); 744 745 // Move all not shadowed pages from the source to the consumer cache. 746 747 for (VMCachePagesTree::Iterator it = source->pages.GetIterator(); 748 vm_page* page = it.Next();) { 749 // Note: Removing the current node while iterating through a 750 // IteratableSplayTree is safe. 751 vm_page* consumerPage = LookupPage( 752 (off_t)page->cache_offset << PAGE_SHIFT); 753 swap_addr_t consumerSwapSlot = _SwapBlockGetAddress(page->cache_offset); 754 if (consumerPage == NULL && consumerSwapSlot == SWAP_SLOT_NONE) { 755 // the page is not yet in the consumer cache - move it upwards 756 source->RemovePage(page); 757 InsertPage(page, (off_t)page->cache_offset << PAGE_SHIFT); 758 759 // If the moved-up page has a swap page associated, we mark it, so 760 // that the swap page is moved upwards, too. We would lose if the 761 // page was modified and written to swap, and is now not marked 762 // modified. 763 if (source->_SwapBlockGetAddress(page->cache_offset) 764 != SWAP_SLOT_NONE) { 765 page->merge_swap = true; 766 } 767 #if DEBUG_PAGE_CACHE_TRANSITIONS 768 } else { 769 page->debug_flags = 0; 770 if (consumerPage->state == PAGE_STATE_BUSY) 771 page->debug_flags |= 0x1; 772 if (consumerPage->type == PAGE_TYPE_DUMMY) 773 page->debug_flags |= 0x2; 774 page->collided_page = consumerPage; 775 consumerPage->collided_page = page; 776 #endif // DEBUG_PAGE_CACHE_TRANSITIONS 777 } 778 } 779 780 // Move all not shadowed swap pages from the source to the consumer cache. 781 782 for (off_t offset = source->virtual_base 783 & ~(off_t)(B_PAGE_SIZE * SWAP_BLOCK_PAGES - 1); 784 offset < source->virtual_end; 785 offset += B_PAGE_SIZE * SWAP_BLOCK_PAGES) { 786 787 WriteLocker locker(sSwapHashLock); 788 789 page_num_t swapBlockPageIndex = offset >> PAGE_SHIFT; 790 swap_hash_key key = { source, swapBlockPageIndex }; 791 swap_block* sourceSwapBlock = sSwapHashTable.Lookup(key); 792 793 if (sourceSwapBlock == NULL) 794 continue; 795 796 // remove the source swap block -- we will either take over the swap 797 // space (and the block) or free it 798 sSwapHashTable.RemoveUnchecked(sourceSwapBlock); 799 800 key.cache = this; 801 swap_block* swapBlock = sSwapHashTable.Lookup(key); 802 803 locker.Unlock(); 804 805 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 806 off_t pageIndex = swapBlockPageIndex + i; 807 swap_addr_t sourceSlotIndex = sourceSwapBlock->swap_slots[i]; 808 809 if (sourceSlotIndex == SWAP_SLOT_NONE) 810 // this page is not swapped out 811 continue; 812 813 vm_page* page = LookupPage((off_t)pageIndex << PAGE_SHIFT); 814 815 bool keepSwapPage = true; 816 if (page != NULL && !page->merge_swap) { 817 // The consumer already has a page at this index and it wasn't 818 // one taken over from the source. So we can simply free the 819 // swap space. 820 keepSwapPage = false; 821 } else { 822 if (page != NULL) { 823 // The page was taken over from the source cache. Clear the 824 // indicator flag. We'll take over the swap page too. 825 page->merge_swap = false; 826 } else if (swapBlock != NULL 827 && swapBlock->swap_slots[i] != SWAP_SLOT_NONE) { 828 // There's no page in the consumer cache, but a swap page. 829 // Free the source swap page. 830 keepSwapPage = false; 831 } 832 } 833 834 if (!keepSwapPage) { 835 swap_slot_dealloc(sourceSlotIndex, 1); 836 sourceSwapBlock->swap_slots[i] = SWAP_SLOT_NONE; 837 sourceSwapBlock->used--; 838 } 839 840 // We've either freed the source swap page or are going to move it 841 // to the consumer. At any rate, the source cache doesn't own it 842 // anymore. 843 source->fAllocatedSwapSize -= B_PAGE_SIZE; 844 } 845 846 // All source swap pages that have not been freed yet are taken over by 847 // by the consumer. 848 fAllocatedSwapSize += B_PAGE_SIZE * (off_t)sourceSwapBlock->used; 849 850 if (sourceSwapBlock->used == 0) { 851 // All swap pages have been freed -- we can discard the source swap 852 // block. 853 object_cache_free(sSwapBlockCache, sourceSwapBlock); 854 } else if (swapBlock == NULL) { 855 // We need to take over some of the source's swap pages and there's 856 // no swap block in the consumer cache. Just take over the source 857 // swap block. 858 sourceSwapBlock->key.cache = this; 859 locker.Lock(); 860 sSwapHashTable.InsertUnchecked(sourceSwapBlock); 861 locker.Unlock(); 862 } else { 863 // We need to take over some of the source's swap pages and there's 864 // already swap block in the consumer cache. Copy the respective 865 // swap addresses and discard the source swap block. 866 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) { 867 if (sourceSwapBlock->swap_slots[i] != SWAP_SLOT_NONE) 868 swapBlock->swap_slots[i] = sourceSwapBlock->swap_slots[i]; 869 } 870 871 object_cache_free(sSwapBlockCache, sourceSwapBlock); 872 } 873 } 874 } 875 876 877 void 878 VMAnonymousCache::_SwapBlockBuild(off_t startPageIndex, 879 swap_addr_t startSlotIndex, uint32 count) 880 { 881 WriteLocker locker(sSwapHashLock); 882 883 uint32 left = count; 884 for (uint32 i = 0, j = 0; i < count; i += j) { 885 off_t pageIndex = startPageIndex + i; 886 swap_addr_t slotIndex = startSlotIndex + i; 887 888 swap_hash_key key = { this, pageIndex }; 889 890 swap_block *swap = sSwapHashTable.Lookup(key); 891 while (swap == NULL) { 892 swap = (swap_block *)object_cache_alloc(sSwapBlockCache, 893 CACHE_DONT_SLEEP); 894 if (swap == NULL) { 895 // Wait a short time until memory is available again. 896 locker.Unlock(); 897 snooze(10000); 898 locker.Lock(); 899 swap = sSwapHashTable.Lookup(key); 900 continue; 901 } 902 903 swap->key.cache = this; 904 swap->key.page_index = pageIndex & ~(off_t)SWAP_BLOCK_MASK; 905 swap->used = 0; 906 for (uint32 i = 0; i < SWAP_BLOCK_PAGES; i++) 907 swap->swap_slots[i] = SWAP_SLOT_NONE; 908 909 sSwapHashTable.InsertUnchecked(swap); 910 } 911 912 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 913 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 914 swap->swap_slots[blockIndex++] = slotIndex + j; 915 left--; 916 } 917 918 swap->used += j; 919 } 920 } 921 922 923 void 924 VMAnonymousCache::_SwapBlockFree(off_t startPageIndex, uint32 count) 925 { 926 WriteLocker locker(sSwapHashLock); 927 928 uint32 left = count; 929 for (uint32 i = 0, j = 0; i < count; i += j) { 930 off_t pageIndex = startPageIndex + i; 931 swap_hash_key key = { this, pageIndex }; 932 swap_block *swap = sSwapHashTable.Lookup(key); 933 934 ASSERT(swap != NULL); 935 936 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 937 for (j = 0; blockIndex < SWAP_BLOCK_PAGES && left > 0; j++) { 938 swap->swap_slots[blockIndex++] = SWAP_SLOT_NONE; 939 left--; 940 } 941 942 swap->used -= j; 943 if (swap->used == 0) { 944 sSwapHashTable.RemoveUnchecked(swap); 945 object_cache_free(sSwapBlockCache, swap); 946 } 947 } 948 } 949 950 951 swap_addr_t 952 VMAnonymousCache::_SwapBlockGetAddress(off_t pageIndex) 953 { 954 ReadLocker locker(sSwapHashLock); 955 956 swap_hash_key key = { this, pageIndex }; 957 swap_block *swap = sSwapHashTable.Lookup(key); 958 swap_addr_t slotIndex = SWAP_SLOT_NONE; 959 960 if (swap != NULL) { 961 swap_addr_t blockIndex = pageIndex & SWAP_BLOCK_MASK; 962 slotIndex = swap->swap_slots[blockIndex]; 963 } 964 965 return slotIndex; 966 } 967 968 969 status_t 970 VMAnonymousCache::_Commit(off_t size) 971 { 972 TRACE("%p->VMAnonymousCache::_Commit(%lld), already committed: %lld " 973 "(%lld swap)\n", this, size, committed_size, fCommittedSwapSize); 974 975 // Basic strategy: reserve swap space first, only when running out of swap 976 // space, reserve real memory. 977 978 off_t committedMemory = committed_size - fCommittedSwapSize; 979 980 // Regardless of whether we're asked to grow or shrink the commitment, 981 // we always try to reserve as much as possible of the final commitment 982 // in the swap space. 983 if (size > fCommittedSwapSize) { 984 fCommittedSwapSize += swap_space_reserve(size - fCommittedSwapSize); 985 committed_size = fCommittedSwapSize + committedMemory; 986 if (size > fCommittedSwapSize) { 987 TRACE("%p->VMAnonymousCache::_Commit(%lld), reserved only %lld " 988 "swap\n", this, size, fCommittedSwapSize); 989 } 990 } 991 992 if (committed_size == size) 993 return B_OK; 994 995 if (committed_size > size) { 996 // The commitment shrinks -- unreserve real memory first. 997 off_t toUnreserve = committed_size - size; 998 if (committedMemory > 0) { 999 off_t unreserved = min_c(toUnreserve, committedMemory); 1000 vm_unreserve_memory(unreserved); 1001 committedMemory -= unreserved; 1002 committed_size -= unreserved; 1003 toUnreserve -= unreserved; 1004 } 1005 1006 // Unreserve swap space. 1007 if (toUnreserve > 0) { 1008 swap_space_unreserve(toUnreserve); 1009 fCommittedSwapSize -= toUnreserve; 1010 committed_size -= toUnreserve; 1011 } 1012 1013 return B_OK; 1014 } 1015 1016 // The commitment grows -- we have already tried to reserve swap space at 1017 // the start of the method, so we try to reserve real memory, now. 1018 1019 off_t toReserve = size - committed_size; 1020 if (vm_try_reserve_memory(toReserve, 1000000) != B_OK) { 1021 dprintf("%p->VMAnonymousCache::_Commit(%lld): Failed to reserve %lld " 1022 "bytes of RAM\n", this, size, toReserve); 1023 return B_NO_MEMORY; 1024 } 1025 1026 committed_size = size; 1027 return B_OK; 1028 } 1029 1030 1031 // #pragma mark - 1032 1033 1034 status_t 1035 swap_file_add(const char *path) 1036 { 1037 // open the file 1038 int fd = open(path, O_RDWR | O_NOCACHE, S_IRUSR | S_IWUSR); 1039 if (fd < 0) 1040 return errno; 1041 1042 // fstat() it and check whether we can use it 1043 struct stat st; 1044 if (fstat(fd, &st) < 0) { 1045 close(fd); 1046 return errno; 1047 } 1048 1049 if (!(S_ISREG(st.st_mode) || S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) { 1050 close(fd); 1051 return B_BAD_VALUE; 1052 } 1053 1054 if (st.st_size < B_PAGE_SIZE) { 1055 close(fd); 1056 return B_BAD_VALUE; 1057 } 1058 1059 // get file descriptor, vnode, and cookie 1060 file_descriptor* descriptor = get_fd(get_current_io_context(true), fd); 1061 put_fd(descriptor); 1062 1063 vnode *node = fd_vnode(descriptor); 1064 if (node == NULL) { 1065 close(fd); 1066 return B_BAD_VALUE; 1067 } 1068 1069 // do the allocations and prepare the swap_file structure 1070 swap_file *swap = (swap_file *)malloc(sizeof(swap_file)); 1071 if (swap == NULL) { 1072 close(fd); 1073 return B_NO_MEMORY; 1074 } 1075 1076 swap->fd = fd; 1077 swap->vnode = node; 1078 swap->cookie = descriptor->cookie; 1079 1080 uint32 pageCount = st.st_size >> PAGE_SHIFT; 1081 swap->bmp = radix_bitmap_create(pageCount); 1082 if (swap->bmp == NULL) { 1083 free(swap); 1084 close(fd); 1085 return B_NO_MEMORY; 1086 } 1087 1088 // set slot index and add this file to swap file list 1089 mutex_lock(&sSwapFileListLock); 1090 // TODO: Also check whether the swap file is already registered! 1091 if (sSwapFileList.IsEmpty()) { 1092 swap->first_slot = 0; 1093 swap->last_slot = pageCount; 1094 } else { 1095 // leave one page gap between two swap files 1096 swap->first_slot = sSwapFileList.Last()->last_slot + 1; 1097 swap->last_slot = swap->first_slot + pageCount; 1098 } 1099 sSwapFileList.Add(swap); 1100 sSwapFileCount++; 1101 mutex_unlock(&sSwapFileListLock); 1102 1103 mutex_lock(&sAvailSwapSpaceLock); 1104 sAvailSwapSpace += (off_t)pageCount * B_PAGE_SIZE; 1105 mutex_unlock(&sAvailSwapSpaceLock); 1106 1107 return B_OK; 1108 } 1109 1110 1111 status_t 1112 swap_file_delete(const char *path) 1113 { 1114 vnode *node = NULL; 1115 status_t status = vfs_get_vnode_from_path(path, true, &node); 1116 if (status != B_OK) 1117 return status; 1118 1119 MutexLocker locker(sSwapFileListLock); 1120 1121 swap_file *swapFile = NULL; 1122 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1123 (swapFile = it.Next()) != NULL;) { 1124 if (swapFile->vnode == node) 1125 break; 1126 } 1127 1128 vfs_put_vnode(node); 1129 1130 if (swapFile == NULL) 1131 return B_ERROR; 1132 1133 // if this file is currently used, we can't delete 1134 // TODO: mark this swap file deleting, and remove it after releasing 1135 // all the swap space 1136 if (swapFile->bmp->free_slots < swapFile->last_slot - swapFile->first_slot) 1137 return B_ERROR; 1138 1139 sSwapFileList.Remove(swapFile); 1140 sSwapFileCount--; 1141 locker.Unlock(); 1142 1143 mutex_lock(&sAvailSwapSpaceLock); 1144 sAvailSwapSpace -= (off_t)(swapFile->last_slot - swapFile->first_slot) 1145 * PAGE_SIZE; 1146 mutex_unlock(&sAvailSwapSpaceLock); 1147 1148 close(swapFile->fd); 1149 radix_bitmap_destroy(swapFile->bmp); 1150 free(swapFile); 1151 1152 return B_OK; 1153 } 1154 1155 1156 void 1157 swap_init(void) 1158 { 1159 // create swap block cache 1160 sSwapBlockCache = create_object_cache("swapblock", 1161 sizeof(swap_block), sizeof(void*), NULL, NULL, NULL); 1162 if (sSwapBlockCache == NULL) 1163 panic("swap_init(): can't create object cache for swap blocks\n"); 1164 1165 status_t error = object_cache_set_minimum_reserve(sSwapBlockCache, 1166 MIN_SWAP_BLOCK_RESERVE); 1167 if (error != B_OK) { 1168 panic("swap_init(): object_cache_set_minimum_reserve() failed: %s", 1169 strerror(error)); 1170 } 1171 1172 // init swap hash table 1173 sSwapHashTable.Init(INITIAL_SWAP_HASH_SIZE); 1174 rw_lock_init(&sSwapHashLock, "swaphash"); 1175 1176 error = register_resource_resizer(swap_hash_resizer, NULL, 1177 SWAP_HASH_RESIZE_INTERVAL); 1178 if (error != B_OK) { 1179 panic("swap_init(): Failed to register swap hash resizer: %s", 1180 strerror(error)); 1181 } 1182 1183 // init swap file list 1184 mutex_init(&sSwapFileListLock, "swaplist"); 1185 sSwapFileAlloc = NULL; 1186 sSwapFileCount = 0; 1187 1188 // init available swap space 1189 mutex_init(&sAvailSwapSpaceLock, "avail swap space"); 1190 sAvailSwapSpace = 0; 1191 1192 add_debugger_command_etc("swap", &dump_swap_info, 1193 "Print infos about the swap usage", 1194 "\n" 1195 "Print infos about the swap usage.\n", 0); 1196 } 1197 1198 1199 void 1200 swap_init_post_modules() 1201 { 1202 // Never try to create a swap file on a read-only device - when booting 1203 // from CD, the write overlay is used. 1204 if (gReadOnlyBootDevice) 1205 return; 1206 1207 off_t size = 0; 1208 1209 void *settings = load_driver_settings("virtual_memory"); 1210 if (settings != NULL) { 1211 if (!get_driver_boolean_parameter(settings, "vm", false, false)) 1212 return; 1213 1214 const char *string = get_driver_parameter(settings, "swap_size", NULL, 1215 NULL); 1216 size = string ? atoll(string) : 0; 1217 1218 unload_driver_settings(settings); 1219 } else 1220 size = (off_t)vm_page_num_pages() * B_PAGE_SIZE * 2; 1221 1222 if (size < B_PAGE_SIZE) 1223 return; 1224 1225 int fd = open("/var/swap", O_RDWR | O_CREAT | O_NOCACHE, S_IRUSR | S_IWUSR); 1226 if (fd < 0) { 1227 dprintf("Can't open/create /var/swap: %s\n", strerror(errno)); 1228 return; 1229 } 1230 1231 struct stat stat; 1232 stat.st_size = size; 1233 status_t error = _kern_write_stat(fd, NULL, false, &stat, 1234 sizeof(struct stat), B_STAT_SIZE | B_STAT_SIZE_INSECURE); 1235 if (error != B_OK) { 1236 dprintf("Failed to resize /var/swap to %lld bytes: %s\n", size, 1237 strerror(error)); 1238 } 1239 1240 close(fd); 1241 1242 error = swap_file_add("/var/swap"); 1243 if (error != B_OK) 1244 dprintf("Failed to add swap file /var/swap: %s\n", strerror(error)); 1245 } 1246 1247 1248 //! Used by page daemon to free swap space. 1249 bool 1250 swap_free_page_swap_space(vm_page *page) 1251 { 1252 VMAnonymousCache *cache = dynamic_cast<VMAnonymousCache *>(page->cache); 1253 if (cache == NULL) 1254 return false; 1255 1256 swap_addr_t slotIndex = cache->_SwapBlockGetAddress(page->cache_offset); 1257 if (slotIndex == SWAP_SLOT_NONE) 1258 return false; 1259 1260 swap_slot_dealloc(slotIndex, 1); 1261 cache->fAllocatedSwapSize -= B_PAGE_SIZE; 1262 cache->_SwapBlockFree(page->cache_offset, 1); 1263 1264 return true; 1265 } 1266 1267 1268 uint32 1269 swap_available_pages() 1270 { 1271 mutex_lock(&sAvailSwapSpaceLock); 1272 uint32 avail = sAvailSwapSpace >> PAGE_SHIFT; 1273 mutex_unlock(&sAvailSwapSpaceLock); 1274 1275 return avail; 1276 } 1277 1278 1279 uint32 1280 swap_total_swap_pages() 1281 { 1282 mutex_lock(&sSwapFileListLock); 1283 1284 uint32 totalSwapSlots = 0; 1285 for (SwapFileList::Iterator it = sSwapFileList.GetIterator(); 1286 swap_file *swapFile = it.Next();) 1287 totalSwapSlots += swapFile->last_slot - swapFile->first_slot; 1288 1289 mutex_unlock(&sSwapFileListLock); 1290 1291 return totalSwapSlots; 1292 } 1293 1294 #endif // ENABLE_SWAP_SUPPORT 1295 1296 void 1297 swap_get_info(struct system_memory_info *info) 1298 { 1299 #if ENABLE_SWAP_SUPPORT 1300 info->max_swap_space = (uint64)swap_total_swap_pages() * B_PAGE_SIZE; 1301 info->free_swap_space = (uint64)swap_available_pages() * B_PAGE_SIZE; 1302 #else 1303 info->max_swap_space = 0; 1304 info->free_swap_space = 0; 1305 #endif 1306 } 1307 1308