1 /* 2 * Copyright 2010-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <string.h> 12 #include <stdlib.h> 13 14 #include <algorithm> 15 16 #include <KernelExport.h> 17 #include <OS.h> 18 19 #include <AutoDeleter.h> 20 21 #include <arch/cpu.h> 22 #include <arch/vm_translation_map.h> 23 #include <block_cache.h> 24 #include <boot/kernel_args.h> 25 #include <condition_variable.h> 26 #include <elf.h> 27 #include <heap.h> 28 #include <kernel.h> 29 #include <low_resource_manager.h> 30 #include <thread.h> 31 #include <tracing.h> 32 #include <util/AutoLock.h> 33 #include <vfs.h> 34 #include <vm/vm.h> 35 #include <vm/vm_priv.h> 36 #include <vm/vm_page.h> 37 #include <vm/VMAddressSpace.h> 38 #include <vm/VMArea.h> 39 #include <vm/VMCache.h> 40 41 #include "IORequest.h" 42 #include "PageCacheLocker.h" 43 #include "VMAnonymousCache.h" 44 #include "VMPageQueue.h" 45 46 47 //#define TRACE_VM_PAGE 48 #ifdef TRACE_VM_PAGE 49 # define TRACE(x) dprintf x 50 #else 51 # define TRACE(x) ; 52 #endif 53 54 //#define TRACE_VM_DAEMONS 55 #ifdef TRACE_VM_DAEMONS 56 #define TRACE_DAEMON(x...) dprintf(x) 57 #else 58 #define TRACE_DAEMON(x...) do {} while (false) 59 #endif 60 61 //#define TRACK_PAGE_USAGE_STATS 1 62 63 #define PAGE_ASSERT(page, condition) \ 64 ASSERT_PRINT((condition), "page: %p", (page)) 65 66 #define SCRUB_SIZE 32 67 // this many pages will be cleared at once in the page scrubber thread 68 69 #define MAX_PAGE_WRITER_IO_PRIORITY B_URGENT_DISPLAY_PRIORITY 70 // maximum I/O priority of the page writer 71 #define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD 10000 72 // the maximum I/O priority shall be reached when this many pages need to 73 // be written 74 75 76 // The page reserve an allocation of the certain priority must not touch. 77 static const size_t kPageReserveForPriority[] = { 78 VM_PAGE_RESERVE_USER, // user 79 VM_PAGE_RESERVE_SYSTEM, // system 80 0 // VIP 81 }; 82 83 // Minimum number of free pages the page daemon will try to achieve. 84 static uint32 sFreePagesTarget; 85 static uint32 sFreeOrCachedPagesTarget; 86 static uint32 sInactivePagesTarget; 87 88 // Wait interval between page daemon runs. 89 static const bigtime_t kIdleScanWaitInterval = 1000000LL; // 1 sec 90 static const bigtime_t kBusyScanWaitInterval = 500000LL; // 0.5 sec 91 92 // Number of idle runs after which we want to have processed the full active 93 // queue. 94 static const uint32 kIdleRunsForFullQueue = 20; 95 96 // Maximum limit for the vm_page::usage_count. 97 static const int32 kPageUsageMax = 64; 98 // vm_page::usage_count buff an accessed page receives in a scan. 99 static const int32 kPageUsageAdvance = 3; 100 // vm_page::usage_count debuff an unaccessed page receives in a scan. 101 static const int32 kPageUsageDecline = 1; 102 103 int32 gMappedPagesCount; 104 105 static VMPageQueue sPageQueues[PAGE_STATE_COUNT]; 106 107 static VMPageQueue& sFreePageQueue = sPageQueues[PAGE_STATE_FREE]; 108 static VMPageQueue& sClearPageQueue = sPageQueues[PAGE_STATE_CLEAR]; 109 static VMPageQueue& sModifiedPageQueue = sPageQueues[PAGE_STATE_MODIFIED]; 110 static VMPageQueue& sInactivePageQueue = sPageQueues[PAGE_STATE_INACTIVE]; 111 static VMPageQueue& sActivePageQueue = sPageQueues[PAGE_STATE_ACTIVE]; 112 static VMPageQueue& sCachedPageQueue = sPageQueues[PAGE_STATE_CACHED]; 113 114 static vm_page *sPages; 115 static page_num_t sPhysicalPageOffset; 116 static page_num_t sNumPages; 117 static page_num_t sNonExistingPages; 118 // pages in the sPages array that aren't backed by physical memory 119 static uint64 sIgnoredPages; 120 // pages of physical memory ignored by the boot loader (and thus not 121 // available here) 122 static int32 sUnreservedFreePages; 123 static int32 sUnsatisfiedPageReservations; 124 static int32 sModifiedTemporaryPages; 125 126 static ConditionVariable sFreePageCondition; 127 static mutex sPageDeficitLock = MUTEX_INITIALIZER("page deficit"); 128 129 // This lock must be used whenever the free or clear page queues are changed. 130 // If you need to work on both queues at the same time, you need to hold a write 131 // lock, otherwise, a read lock suffices (each queue still has a spinlock to 132 // guard against concurrent changes). 133 static rw_lock sFreePageQueuesLock 134 = RW_LOCK_INITIALIZER("free/clear page queues"); 135 136 #ifdef TRACK_PAGE_USAGE_STATS 137 static page_num_t sPageUsageArrays[512]; 138 static page_num_t* sPageUsage = sPageUsageArrays; 139 static page_num_t sPageUsagePageCount; 140 static page_num_t* sNextPageUsage = sPageUsageArrays + 256; 141 static page_num_t sNextPageUsagePageCount; 142 #endif 143 144 145 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 146 147 struct caller_info { 148 addr_t caller; 149 size_t count; 150 }; 151 152 static const int32 kCallerInfoTableSize = 1024; 153 static caller_info sCallerInfoTable[kCallerInfoTableSize]; 154 static int32 sCallerInfoCount = 0; 155 156 static caller_info* get_caller_info(addr_t caller); 157 158 159 RANGE_MARKER_FUNCTION_PROTOTYPES(vm_page) 160 161 static const addr_t kVMPageCodeAddressRange[] = { 162 RANGE_MARKER_FUNCTION_ADDRESS_RANGE(vm_page) 163 }; 164 165 #endif 166 167 168 RANGE_MARKER_FUNCTION_BEGIN(vm_page) 169 170 171 struct page_stats { 172 int32 totalFreePages; 173 int32 unsatisfiedReservations; 174 int32 cachedPages; 175 }; 176 177 178 struct PageReservationWaiter 179 : public DoublyLinkedListLinkImpl<PageReservationWaiter> { 180 Thread* thread; 181 uint32 dontTouch; // reserve not to touch 182 uint32 missing; // pages missing for the reservation 183 int32 threadPriority; 184 185 bool operator<(const PageReservationWaiter& other) const 186 { 187 // Implies an order by descending VM priority (ascending dontTouch) 188 // and (secondarily) descending thread priority. 189 if (dontTouch != other.dontTouch) 190 return dontTouch < other.dontTouch; 191 return threadPriority > other.threadPriority; 192 } 193 }; 194 195 typedef DoublyLinkedList<PageReservationWaiter> PageReservationWaiterList; 196 static PageReservationWaiterList sPageReservationWaiters; 197 198 199 struct DaemonCondition { 200 void Init(const char* name) 201 { 202 mutex_init(&fLock, "daemon condition"); 203 fCondition.Init(this, name); 204 fActivated = false; 205 } 206 207 bool Lock() 208 { 209 return mutex_lock(&fLock) == B_OK; 210 } 211 212 void Unlock() 213 { 214 mutex_unlock(&fLock); 215 } 216 217 bool Wait(bigtime_t timeout, bool clearActivated) 218 { 219 MutexLocker locker(fLock); 220 if (clearActivated) 221 fActivated = false; 222 else if (fActivated) 223 return true; 224 225 ConditionVariableEntry entry; 226 fCondition.Add(&entry); 227 228 locker.Unlock(); 229 230 return entry.Wait(B_RELATIVE_TIMEOUT, timeout) == B_OK; 231 } 232 233 void WakeUp() 234 { 235 if (fActivated) 236 return; 237 238 MutexLocker locker(fLock); 239 fActivated = true; 240 fCondition.NotifyOne(); 241 } 242 243 void ClearActivated() 244 { 245 MutexLocker locker(fLock); 246 fActivated = false; 247 } 248 249 private: 250 mutex fLock; 251 ConditionVariable fCondition; 252 bool fActivated; 253 }; 254 255 256 static DaemonCondition sPageWriterCondition; 257 static DaemonCondition sPageDaemonCondition; 258 259 260 #if PAGE_ALLOCATION_TRACING 261 262 namespace PageAllocationTracing { 263 264 class ReservePages : public AbstractTraceEntry { 265 public: 266 ReservePages(uint32 count) 267 : 268 fCount(count) 269 { 270 Initialized(); 271 } 272 273 virtual void AddDump(TraceOutput& out) 274 { 275 out.Print("page reserve: %" B_PRIu32, fCount); 276 } 277 278 private: 279 uint32 fCount; 280 }; 281 282 283 class UnreservePages : public AbstractTraceEntry { 284 public: 285 UnreservePages(uint32 count) 286 : 287 fCount(count) 288 { 289 Initialized(); 290 } 291 292 virtual void AddDump(TraceOutput& out) 293 { 294 out.Print("page unreserve: %" B_PRId32, fCount); 295 } 296 297 private: 298 uint32 fCount; 299 }; 300 301 302 class AllocatePage 303 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) { 304 public: 305 AllocatePage(page_num_t pageNumber) 306 : 307 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true), 308 fPageNumber(pageNumber) 309 { 310 Initialized(); 311 } 312 313 virtual void AddDump(TraceOutput& out) 314 { 315 out.Print("page alloc: %#" B_PRIxPHYSADDR, fPageNumber); 316 } 317 318 private: 319 page_num_t fPageNumber; 320 }; 321 322 323 class AllocatePageRun 324 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) { 325 public: 326 AllocatePageRun(page_num_t startPage, uint32 length) 327 : 328 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true), 329 fStartPage(startPage), 330 fLength(length) 331 { 332 Initialized(); 333 } 334 335 virtual void AddDump(TraceOutput& out) 336 { 337 out.Print("page alloc run: start %#" B_PRIxPHYSADDR " length: %" 338 B_PRIu32, fStartPage, fLength); 339 } 340 341 private: 342 page_num_t fStartPage; 343 uint32 fLength; 344 }; 345 346 347 class FreePage 348 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) { 349 public: 350 FreePage(page_num_t pageNumber) 351 : 352 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true), 353 fPageNumber(pageNumber) 354 { 355 Initialized(); 356 } 357 358 virtual void AddDump(TraceOutput& out) 359 { 360 out.Print("page free: %#" B_PRIxPHYSADDR, fPageNumber); 361 } 362 363 private: 364 page_num_t fPageNumber; 365 }; 366 367 368 class ScrubbingPages : public AbstractTraceEntry { 369 public: 370 ScrubbingPages(uint32 count) 371 : 372 fCount(count) 373 { 374 Initialized(); 375 } 376 377 virtual void AddDump(TraceOutput& out) 378 { 379 out.Print("page scrubbing: %" B_PRId32, fCount); 380 } 381 382 private: 383 uint32 fCount; 384 }; 385 386 387 class ScrubbedPages : public AbstractTraceEntry { 388 public: 389 ScrubbedPages(uint32 count) 390 : 391 fCount(count) 392 { 393 Initialized(); 394 } 395 396 virtual void AddDump(TraceOutput& out) 397 { 398 out.Print("page scrubbed: %" B_PRId32, fCount); 399 } 400 401 private: 402 uint32 fCount; 403 }; 404 405 406 class StolenPage : public AbstractTraceEntry { 407 public: 408 StolenPage() 409 { 410 Initialized(); 411 } 412 413 virtual void AddDump(TraceOutput& out) 414 { 415 out.Print("page stolen"); 416 } 417 }; 418 419 } // namespace PageAllocationTracing 420 421 # define TA(x) new(std::nothrow) PageAllocationTracing::x 422 423 #else 424 # define TA(x) 425 #endif // PAGE_ALLOCATION_TRACING 426 427 428 #if PAGE_DAEMON_TRACING 429 430 namespace PageDaemonTracing { 431 432 class ActivatePage : public AbstractTraceEntry { 433 public: 434 ActivatePage(vm_page* page) 435 : 436 fCache(page->cache), 437 fPage(page) 438 { 439 Initialized(); 440 } 441 442 virtual void AddDump(TraceOutput& out) 443 { 444 out.Print("page activated: %p, cache: %p", fPage, fCache); 445 } 446 447 private: 448 VMCache* fCache; 449 vm_page* fPage; 450 }; 451 452 453 class DeactivatePage : public AbstractTraceEntry { 454 public: 455 DeactivatePage(vm_page* page) 456 : 457 fCache(page->cache), 458 fPage(page) 459 { 460 Initialized(); 461 } 462 463 virtual void AddDump(TraceOutput& out) 464 { 465 out.Print("page deactivated: %p, cache: %p", fPage, fCache); 466 } 467 468 private: 469 VMCache* fCache; 470 vm_page* fPage; 471 }; 472 473 474 class FreedPageSwap : public AbstractTraceEntry { 475 public: 476 FreedPageSwap(vm_page* page) 477 : 478 fCache(page->cache), 479 fPage(page) 480 { 481 Initialized(); 482 } 483 484 virtual void AddDump(TraceOutput& out) 485 { 486 out.Print("page swap freed: %p, cache: %p", fPage, fCache); 487 } 488 489 private: 490 VMCache* fCache; 491 vm_page* fPage; 492 }; 493 494 } // namespace PageDaemonTracing 495 496 # define TD(x) new(std::nothrow) PageDaemonTracing::x 497 498 #else 499 # define TD(x) 500 #endif // PAGE_DAEMON_TRACING 501 502 503 #if PAGE_WRITER_TRACING 504 505 namespace PageWriterTracing { 506 507 class WritePage : public AbstractTraceEntry { 508 public: 509 WritePage(vm_page* page) 510 : 511 fCache(page->Cache()), 512 fPage(page) 513 { 514 Initialized(); 515 } 516 517 virtual void AddDump(TraceOutput& out) 518 { 519 out.Print("page write: %p, cache: %p", fPage, fCache); 520 } 521 522 private: 523 VMCache* fCache; 524 vm_page* fPage; 525 }; 526 527 } // namespace PageWriterTracing 528 529 # define TPW(x) new(std::nothrow) PageWriterTracing::x 530 531 #else 532 # define TPW(x) 533 #endif // PAGE_WRITER_TRACING 534 535 536 #if PAGE_STATE_TRACING 537 538 namespace PageStateTracing { 539 540 class SetPageState : public AbstractTraceEntry { 541 public: 542 SetPageState(vm_page* page, uint8 newState) 543 : 544 fPage(page), 545 fOldState(page->State()), 546 fNewState(newState), 547 fBusy(page->busy), 548 fWired(page->WiredCount() > 0), 549 fMapped(!page->mappings.IsEmpty()), 550 fAccessed(page->accessed), 551 fModified(page->modified) 552 { 553 #if PAGE_STATE_TRACING_STACK_TRACE 554 fStackTrace = capture_tracing_stack_trace( 555 PAGE_STATE_TRACING_STACK_TRACE, 0, true); 556 // Don't capture userland stack trace to avoid potential 557 // deadlocks. 558 #endif 559 Initialized(); 560 } 561 562 #if PAGE_STATE_TRACING_STACK_TRACE 563 virtual void DumpStackTrace(TraceOutput& out) 564 { 565 out.PrintStackTrace(fStackTrace); 566 } 567 #endif 568 569 virtual void AddDump(TraceOutput& out) 570 { 571 out.Print("page set state: %p (%c%c%c%c%c): %s -> %s", fPage, 572 fBusy ? 'b' : '-', 573 fWired ? 'w' : '-', 574 fMapped ? 'm' : '-', 575 fAccessed ? 'a' : '-', 576 fModified ? 'm' : '-', 577 page_state_to_string(fOldState), 578 page_state_to_string(fNewState)); 579 } 580 581 private: 582 vm_page* fPage; 583 #if PAGE_STATE_TRACING_STACK_TRACE 584 tracing_stack_trace* fStackTrace; 585 #endif 586 uint8 fOldState; 587 uint8 fNewState; 588 bool fBusy : 1; 589 bool fWired : 1; 590 bool fMapped : 1; 591 bool fAccessed : 1; 592 bool fModified : 1; 593 }; 594 595 } // namespace PageStateTracing 596 597 # define TPS(x) new(std::nothrow) PageStateTracing::x 598 599 #else 600 # define TPS(x) 601 #endif // PAGE_STATE_TRACING 602 603 604 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 605 606 namespace BKernel { 607 608 class AllocationTrackingCallback { 609 public: 610 virtual ~AllocationTrackingCallback(); 611 612 virtual bool ProcessTrackingInfo( 613 AllocationTrackingInfo* info, 614 page_num_t pageNumber) = 0; 615 }; 616 617 } 618 619 using BKernel::AllocationTrackingCallback; 620 621 622 class AllocationCollectorCallback : public AllocationTrackingCallback { 623 public: 624 AllocationCollectorCallback(bool resetInfos) 625 : 626 fResetInfos(resetInfos) 627 { 628 } 629 630 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info, 631 page_num_t pageNumber) 632 { 633 if (!info->IsInitialized()) 634 return true; 635 636 addr_t caller = 0; 637 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry(); 638 639 if (traceEntry != NULL && info->IsTraceEntryValid()) { 640 caller = tracing_find_caller_in_stack_trace( 641 traceEntry->StackTrace(), kVMPageCodeAddressRange, 1); 642 } 643 644 caller_info* callerInfo = get_caller_info(caller); 645 if (callerInfo == NULL) { 646 kprintf("out of space for caller infos\n"); 647 return false; 648 } 649 650 callerInfo->count++; 651 652 if (fResetInfos) 653 info->Clear(); 654 655 return true; 656 } 657 658 private: 659 bool fResetInfos; 660 }; 661 662 663 class AllocationInfoPrinterCallback : public AllocationTrackingCallback { 664 public: 665 AllocationInfoPrinterCallback(bool printStackTrace, page_num_t pageFilter, 666 team_id teamFilter, thread_id threadFilter) 667 : 668 fPrintStackTrace(printStackTrace), 669 fPageFilter(pageFilter), 670 fTeamFilter(teamFilter), 671 fThreadFilter(threadFilter) 672 { 673 } 674 675 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info, 676 page_num_t pageNumber) 677 { 678 if (!info->IsInitialized()) 679 return true; 680 681 if (fPageFilter != 0 && pageNumber != fPageFilter) 682 return true; 683 684 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry(); 685 if (traceEntry != NULL && !info->IsTraceEntryValid()) 686 traceEntry = NULL; 687 688 if (traceEntry != NULL) { 689 if (fTeamFilter != -1 && traceEntry->TeamID() != fTeamFilter) 690 return true; 691 if (fThreadFilter != -1 && traceEntry->ThreadID() != fThreadFilter) 692 return true; 693 } else { 694 // we need the info if we have filters set 695 if (fTeamFilter != -1 || fThreadFilter != -1) 696 return true; 697 } 698 699 kprintf("page number %#" B_PRIxPHYSADDR, pageNumber); 700 701 if (traceEntry != NULL) { 702 kprintf(", team: %" B_PRId32 ", thread %" B_PRId32 703 ", time %" B_PRId64 "\n", traceEntry->TeamID(), 704 traceEntry->ThreadID(), traceEntry->Time()); 705 706 if (fPrintStackTrace) 707 tracing_print_stack_trace(traceEntry->StackTrace()); 708 } else 709 kprintf("\n"); 710 711 return true; 712 } 713 714 private: 715 bool fPrintStackTrace; 716 page_num_t fPageFilter; 717 team_id fTeamFilter; 718 thread_id fThreadFilter; 719 }; 720 721 722 class AllocationDetailPrinterCallback : public AllocationTrackingCallback { 723 public: 724 AllocationDetailPrinterCallback(addr_t caller) 725 : 726 fCaller(caller) 727 { 728 } 729 730 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info, 731 page_num_t pageNumber) 732 { 733 if (!info->IsInitialized()) 734 return true; 735 736 addr_t caller = 0; 737 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry(); 738 if (traceEntry != NULL && !info->IsTraceEntryValid()) 739 traceEntry = NULL; 740 741 if (traceEntry != NULL) { 742 caller = tracing_find_caller_in_stack_trace( 743 traceEntry->StackTrace(), kVMPageCodeAddressRange, 1); 744 } 745 746 if (caller != fCaller) 747 return true; 748 749 kprintf("page %#" B_PRIxPHYSADDR "\n", pageNumber); 750 if (traceEntry != NULL) 751 tracing_print_stack_trace(traceEntry->StackTrace()); 752 753 return true; 754 } 755 756 private: 757 addr_t fCaller; 758 }; 759 760 #endif // VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 761 762 763 static void 764 list_page(vm_page* page) 765 { 766 kprintf("0x%08" B_PRIxADDR " ", 767 (addr_t)(page->physical_page_number * B_PAGE_SIZE)); 768 switch (page->State()) { 769 case PAGE_STATE_ACTIVE: kprintf("A"); break; 770 case PAGE_STATE_INACTIVE: kprintf("I"); break; 771 case PAGE_STATE_MODIFIED: kprintf("M"); break; 772 case PAGE_STATE_CACHED: kprintf("C"); break; 773 case PAGE_STATE_FREE: kprintf("F"); break; 774 case PAGE_STATE_CLEAR: kprintf("L"); break; 775 case PAGE_STATE_WIRED: kprintf("W"); break; 776 case PAGE_STATE_UNUSED: kprintf("-"); break; 777 } 778 kprintf(" "); 779 if (page->busy) kprintf("B"); else kprintf("-"); 780 if (page->busy_writing) kprintf("W"); else kprintf("-"); 781 if (page->accessed) kprintf("A"); else kprintf("-"); 782 if (page->modified) kprintf("M"); else kprintf("-"); 783 if (page->unused) kprintf("U"); else kprintf("-"); 784 785 kprintf(" usage:%3u", page->usage_count); 786 kprintf(" wired:%5u", page->WiredCount()); 787 788 bool first = true; 789 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 790 vm_page_mapping* mapping; 791 while ((mapping = iterator.Next()) != NULL) { 792 if (first) { 793 kprintf(": "); 794 first = false; 795 } else 796 kprintf(", "); 797 798 kprintf("%" B_PRId32 " (%s)", mapping->area->id, mapping->area->name); 799 mapping = mapping->page_link.next; 800 } 801 } 802 803 804 static int 805 dump_page_list(int argc, char **argv) 806 { 807 kprintf("page table:\n"); 808 for (page_num_t i = 0; i < sNumPages; i++) { 809 if (sPages[i].State() != PAGE_STATE_UNUSED) { 810 list_page(&sPages[i]); 811 kprintf("\n"); 812 } 813 } 814 kprintf("end of page table\n"); 815 816 return 0; 817 } 818 819 820 static int 821 find_page(int argc, char **argv) 822 { 823 struct vm_page *page; 824 addr_t address; 825 int32 index = 1; 826 int i; 827 828 struct { 829 const char* name; 830 VMPageQueue* queue; 831 } pageQueueInfos[] = { 832 { "free", &sFreePageQueue }, 833 { "clear", &sClearPageQueue }, 834 { "modified", &sModifiedPageQueue }, 835 { "active", &sActivePageQueue }, 836 { "inactive", &sInactivePageQueue }, 837 { "cached", &sCachedPageQueue }, 838 { NULL, NULL } 839 }; 840 841 if (argc < 2 842 || strlen(argv[index]) <= 2 843 || argv[index][0] != '0' 844 || argv[index][1] != 'x') { 845 kprintf("usage: find_page <address>\n"); 846 return 0; 847 } 848 849 address = strtoul(argv[index], NULL, 0); 850 page = (vm_page*)address; 851 852 for (i = 0; pageQueueInfos[i].name; i++) { 853 VMPageQueue::Iterator it = pageQueueInfos[i].queue->GetIterator(); 854 while (vm_page* p = it.Next()) { 855 if (p == page) { 856 kprintf("found page %p in queue %p (%s)\n", page, 857 pageQueueInfos[i].queue, pageQueueInfos[i].name); 858 return 0; 859 } 860 } 861 } 862 863 kprintf("page %p isn't in any queue\n", page); 864 865 return 0; 866 } 867 868 869 const char * 870 page_state_to_string(int state) 871 { 872 switch(state) { 873 case PAGE_STATE_ACTIVE: 874 return "active"; 875 case PAGE_STATE_INACTIVE: 876 return "inactive"; 877 case PAGE_STATE_MODIFIED: 878 return "modified"; 879 case PAGE_STATE_CACHED: 880 return "cached"; 881 case PAGE_STATE_FREE: 882 return "free"; 883 case PAGE_STATE_CLEAR: 884 return "clear"; 885 case PAGE_STATE_WIRED: 886 return "wired"; 887 case PAGE_STATE_UNUSED: 888 return "unused"; 889 default: 890 return "unknown"; 891 } 892 } 893 894 895 static int 896 dump_page_long(int argc, char **argv) 897 { 898 bool addressIsPointer = true; 899 bool physical = false; 900 bool searchMappings = false; 901 int32 index = 1; 902 903 while (index < argc) { 904 if (argv[index][0] != '-') 905 break; 906 907 if (!strcmp(argv[index], "-p")) { 908 addressIsPointer = false; 909 physical = true; 910 } else if (!strcmp(argv[index], "-v")) { 911 addressIsPointer = false; 912 } else if (!strcmp(argv[index], "-m")) { 913 searchMappings = true; 914 } else { 915 print_debugger_command_usage(argv[0]); 916 return 0; 917 } 918 919 index++; 920 } 921 922 if (index + 1 != argc) { 923 print_debugger_command_usage(argv[0]); 924 return 0; 925 } 926 927 uint64 value; 928 if (!evaluate_debug_expression(argv[index], &value, false)) 929 return 0; 930 931 uint64 pageAddress = value; 932 struct vm_page* page; 933 934 if (addressIsPointer) { 935 page = (struct vm_page *)(addr_t)pageAddress; 936 } else { 937 if (!physical) { 938 VMAddressSpace *addressSpace = VMAddressSpace::Kernel(); 939 940 if (debug_get_debugged_thread()->team->address_space != NULL) 941 addressSpace = debug_get_debugged_thread()->team->address_space; 942 943 uint32 flags = 0; 944 phys_addr_t physicalAddress; 945 if (addressSpace->TranslationMap()->QueryInterrupt(pageAddress, 946 &physicalAddress, &flags) != B_OK 947 || (flags & PAGE_PRESENT) == 0) { 948 kprintf("Virtual address not mapped to a physical page in this " 949 "address space.\n"); 950 return 0; 951 } 952 pageAddress = physicalAddress; 953 } 954 955 page = vm_lookup_page(pageAddress / B_PAGE_SIZE); 956 } 957 958 const page_num_t expected = sPhysicalPageOffset + (page - sPages); 959 960 kprintf("PAGE: %p\n", page); 961 kprintf("queue_next,prev: %p, %p\n", page->queue_link.next, 962 page->queue_link.previous); 963 kprintf("physical_number: %#" B_PRIxPHYSADDR "\n", page->physical_page_number); 964 if (page->physical_page_number != expected) 965 kprintf("\t(expected %#" B_PRIxPHYSADDR ")!\n", expected); 966 kprintf("cache: %p\n", page->Cache()); 967 kprintf("cache_offset: %" B_PRIuPHYSADDR "\n", page->cache_offset); 968 kprintf("cache_next: %p\n", page->cache_next); 969 kprintf("state: %s\n", page_state_to_string(page->State())); 970 kprintf("wired_count: %d\n", page->WiredCount()); 971 kprintf("usage_count: %d\n", page->usage_count); 972 kprintf("busy: %d\n", page->busy); 973 kprintf("busy_writing: %d\n", page->busy_writing); 974 kprintf("accessed: %d\n", page->accessed); 975 kprintf("modified: %d\n", page->modified); 976 #if DEBUG_PAGE_QUEUE 977 kprintf("queue: %p\n", page->queue); 978 #endif 979 #if DEBUG_PAGE_ACCESS 980 kprintf("accessor: %" B_PRId32 "\n", page->accessing_thread); 981 #endif 982 kprintf("area mappings:\n"); 983 984 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 985 vm_page_mapping *mapping; 986 while ((mapping = iterator.Next()) != NULL) { 987 kprintf(" %p (%" B_PRId32 ")\n", mapping->area, mapping->area->id); 988 mapping = mapping->page_link.next; 989 } 990 991 if (searchMappings) { 992 kprintf("all mappings:\n"); 993 VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst(); 994 while (addressSpace != NULL) { 995 size_t pageCount = addressSpace->Size() / B_PAGE_SIZE; 996 for (addr_t address = addressSpace->Base(); pageCount != 0; 997 address += B_PAGE_SIZE, pageCount--) { 998 phys_addr_t physicalAddress; 999 uint32 flags = 0; 1000 if (addressSpace->TranslationMap()->QueryInterrupt(address, 1001 &physicalAddress, &flags) == B_OK 1002 && (flags & PAGE_PRESENT) != 0 1003 && physicalAddress / B_PAGE_SIZE 1004 == page->physical_page_number) { 1005 VMArea* area = addressSpace->LookupArea(address); 1006 kprintf(" aspace %" B_PRId32 ", area %" B_PRId32 ": %#" 1007 B_PRIxADDR " (%c%c%s%s)\n", addressSpace->ID(), 1008 area != NULL ? area->id : -1, address, 1009 (flags & B_KERNEL_READ_AREA) != 0 ? 'r' : '-', 1010 (flags & B_KERNEL_WRITE_AREA) != 0 ? 'w' : '-', 1011 (flags & PAGE_MODIFIED) != 0 ? " modified" : "", 1012 (flags & PAGE_ACCESSED) != 0 ? " accessed" : ""); 1013 } 1014 } 1015 addressSpace = VMAddressSpace::DebugNext(addressSpace); 1016 } 1017 } 1018 1019 set_debug_variable("_cache", (addr_t)page->Cache()); 1020 #if DEBUG_PAGE_ACCESS 1021 set_debug_variable("_accessor", page->accessing_thread); 1022 #endif 1023 1024 return 0; 1025 } 1026 1027 1028 static int 1029 dump_page_queue(int argc, char **argv) 1030 { 1031 struct VMPageQueue *queue; 1032 1033 if (argc < 2) { 1034 kprintf("usage: page_queue <address/name> [list]\n"); 1035 return 0; 1036 } 1037 1038 if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x') 1039 queue = (VMPageQueue*)strtoul(argv[1], NULL, 16); 1040 else if (!strcmp(argv[1], "free")) 1041 queue = &sFreePageQueue; 1042 else if (!strcmp(argv[1], "clear")) 1043 queue = &sClearPageQueue; 1044 else if (!strcmp(argv[1], "modified")) 1045 queue = &sModifiedPageQueue; 1046 else if (!strcmp(argv[1], "active")) 1047 queue = &sActivePageQueue; 1048 else if (!strcmp(argv[1], "inactive")) 1049 queue = &sInactivePageQueue; 1050 else if (!strcmp(argv[1], "cached")) 1051 queue = &sCachedPageQueue; 1052 else { 1053 kprintf("page_queue: unknown queue \"%s\".\n", argv[1]); 1054 return 0; 1055 } 1056 1057 kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %" 1058 B_PRIuPHYSADDR "\n", queue, queue->Head(), queue->Tail(), 1059 queue->Count()); 1060 1061 if (argc == 3) { 1062 struct vm_page *page = queue->Head(); 1063 1064 kprintf("page cache type state wired usage\n"); 1065 for (page_num_t i = 0; page; i++, page = queue->Next(page)) { 1066 kprintf("%p %p %-7s %8s %5d %5d\n", page, page->Cache(), 1067 vm_cache_type_to_string(page->Cache()->type), 1068 page_state_to_string(page->State()), 1069 page->WiredCount(), page->usage_count); 1070 } 1071 } 1072 return 0; 1073 } 1074 1075 1076 static int 1077 dump_page_stats(int argc, char **argv) 1078 { 1079 page_num_t swappableModified = 0; 1080 page_num_t swappableModifiedInactive = 0; 1081 1082 size_t counter[8]; 1083 size_t busyCounter[8]; 1084 memset(counter, 0, sizeof(counter)); 1085 memset(busyCounter, 0, sizeof(busyCounter)); 1086 1087 struct page_run { 1088 page_num_t start; 1089 page_num_t end; 1090 1091 page_num_t Length() const { return end - start; } 1092 }; 1093 1094 page_run currentFreeRun = { 0, 0 }; 1095 page_run currentCachedRun = { 0, 0 }; 1096 page_run longestFreeRun = { 0, 0 }; 1097 page_run longestCachedRun = { 0, 0 }; 1098 1099 for (page_num_t i = 0; i < sNumPages; i++) { 1100 if (sPages[i].State() > 7) { 1101 panic("page %" B_PRIuPHYSADDR " at %p has invalid state!\n", i, 1102 &sPages[i]); 1103 } 1104 1105 uint32 pageState = sPages[i].State(); 1106 1107 counter[pageState]++; 1108 if (sPages[i].busy) 1109 busyCounter[pageState]++; 1110 1111 if (pageState == PAGE_STATE_MODIFIED 1112 && sPages[i].Cache() != NULL 1113 && sPages[i].Cache()->temporary && sPages[i].WiredCount() == 0) { 1114 swappableModified++; 1115 if (sPages[i].usage_count == 0) 1116 swappableModifiedInactive++; 1117 } 1118 1119 // track free and cached pages runs 1120 if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) { 1121 currentFreeRun.end = i + 1; 1122 currentCachedRun.end = i + 1; 1123 } else { 1124 if (currentFreeRun.Length() > longestFreeRun.Length()) 1125 longestFreeRun = currentFreeRun; 1126 currentFreeRun.start = currentFreeRun.end = i + 1; 1127 1128 if (pageState == PAGE_STATE_CACHED) { 1129 currentCachedRun.end = i + 1; 1130 } else { 1131 if (currentCachedRun.Length() > longestCachedRun.Length()) 1132 longestCachedRun = currentCachedRun; 1133 currentCachedRun.start = currentCachedRun.end = i + 1; 1134 } 1135 } 1136 } 1137 1138 kprintf("page stats:\n"); 1139 kprintf("total: %" B_PRIuPHYSADDR "\n", sNumPages); 1140 1141 kprintf("active: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1142 counter[PAGE_STATE_ACTIVE], busyCounter[PAGE_STATE_ACTIVE]); 1143 kprintf("inactive: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1144 counter[PAGE_STATE_INACTIVE], busyCounter[PAGE_STATE_INACTIVE]); 1145 kprintf("cached: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1146 counter[PAGE_STATE_CACHED], busyCounter[PAGE_STATE_CACHED]); 1147 kprintf("unused: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1148 counter[PAGE_STATE_UNUSED], busyCounter[PAGE_STATE_UNUSED]); 1149 kprintf("wired: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1150 counter[PAGE_STATE_WIRED], busyCounter[PAGE_STATE_WIRED]); 1151 kprintf("modified: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1152 counter[PAGE_STATE_MODIFIED], busyCounter[PAGE_STATE_MODIFIED]); 1153 kprintf("free: %" B_PRIuSIZE "\n", counter[PAGE_STATE_FREE]); 1154 kprintf("clear: %" B_PRIuSIZE "\n", counter[PAGE_STATE_CLEAR]); 1155 1156 kprintf("unreserved free pages: %" B_PRId32 "\n", sUnreservedFreePages); 1157 kprintf("unsatisfied page reservations: %" B_PRId32 "\n", 1158 sUnsatisfiedPageReservations); 1159 kprintf("mapped pages: %" B_PRId32 "\n", gMappedPagesCount); 1160 kprintf("longest free pages run: %" B_PRIuPHYSADDR " pages (at %" 1161 B_PRIuPHYSADDR ")\n", longestFreeRun.Length(), 1162 sPages[longestFreeRun.start].physical_page_number); 1163 kprintf("longest free/cached pages run: %" B_PRIuPHYSADDR " pages (at %" 1164 B_PRIuPHYSADDR ")\n", longestCachedRun.Length(), 1165 sPages[longestCachedRun.start].physical_page_number); 1166 1167 kprintf("waiting threads:\n"); 1168 for (PageReservationWaiterList::Iterator it 1169 = sPageReservationWaiters.GetIterator(); 1170 PageReservationWaiter* waiter = it.Next();) { 1171 kprintf(" %6" B_PRId32 ": missing: %6" B_PRIu32 1172 ", don't touch: %6" B_PRIu32 "\n", waiter->thread->id, 1173 waiter->missing, waiter->dontTouch); 1174 } 1175 1176 kprintf("\nfree queue: %p, count = %" B_PRIuPHYSADDR "\n", &sFreePageQueue, 1177 sFreePageQueue.Count()); 1178 kprintf("clear queue: %p, count = %" B_PRIuPHYSADDR "\n", &sClearPageQueue, 1179 sClearPageQueue.Count()); 1180 kprintf("modified queue: %p, count = %" B_PRIuPHYSADDR " (%" B_PRId32 1181 " temporary, %" B_PRIuPHYSADDR " swappable, " "inactive: %" 1182 B_PRIuPHYSADDR ")\n", &sModifiedPageQueue, sModifiedPageQueue.Count(), 1183 sModifiedTemporaryPages, swappableModified, swappableModifiedInactive); 1184 kprintf("active queue: %p, count = %" B_PRIuPHYSADDR "\n", 1185 &sActivePageQueue, sActivePageQueue.Count()); 1186 kprintf("inactive queue: %p, count = %" B_PRIuPHYSADDR "\n", 1187 &sInactivePageQueue, sInactivePageQueue.Count()); 1188 kprintf("cached queue: %p, count = %" B_PRIuPHYSADDR "\n", 1189 &sCachedPageQueue, sCachedPageQueue.Count()); 1190 return 0; 1191 } 1192 1193 1194 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 1195 1196 static caller_info* 1197 get_caller_info(addr_t caller) 1198 { 1199 // find the caller info 1200 for (int32 i = 0; i < sCallerInfoCount; i++) { 1201 if (caller == sCallerInfoTable[i].caller) 1202 return &sCallerInfoTable[i]; 1203 } 1204 1205 // not found, add a new entry, if there are free slots 1206 if (sCallerInfoCount >= kCallerInfoTableSize) 1207 return NULL; 1208 1209 caller_info* info = &sCallerInfoTable[sCallerInfoCount++]; 1210 info->caller = caller; 1211 info->count = 0; 1212 1213 return info; 1214 } 1215 1216 1217 static int 1218 caller_info_compare_count(const void* _a, const void* _b) 1219 { 1220 const caller_info* a = (const caller_info*)_a; 1221 const caller_info* b = (const caller_info*)_b; 1222 return (int)(b->count - a->count); 1223 } 1224 1225 1226 static int 1227 dump_page_allocations_per_caller(int argc, char** argv) 1228 { 1229 bool resetAllocationInfos = false; 1230 bool printDetails = false; 1231 addr_t caller = 0; 1232 1233 for (int32 i = 1; i < argc; i++) { 1234 if (strcmp(argv[i], "-d") == 0) { 1235 uint64 callerAddress; 1236 if (++i >= argc 1237 || !evaluate_debug_expression(argv[i], &callerAddress, true)) { 1238 print_debugger_command_usage(argv[0]); 1239 return 0; 1240 } 1241 1242 caller = callerAddress; 1243 printDetails = true; 1244 } else if (strcmp(argv[i], "-r") == 0) { 1245 resetAllocationInfos = true; 1246 } else { 1247 print_debugger_command_usage(argv[0]); 1248 return 0; 1249 } 1250 } 1251 1252 sCallerInfoCount = 0; 1253 1254 AllocationCollectorCallback collectorCallback(resetAllocationInfos); 1255 AllocationDetailPrinterCallback detailsCallback(caller); 1256 AllocationTrackingCallback& callback = printDetails 1257 ? (AllocationTrackingCallback&)detailsCallback 1258 : (AllocationTrackingCallback&)collectorCallback; 1259 1260 for (page_num_t i = 0; i < sNumPages; i++) 1261 callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i); 1262 1263 if (printDetails) 1264 return 0; 1265 1266 // sort the array 1267 qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info), 1268 &caller_info_compare_count); 1269 1270 kprintf("%" B_PRId32 " different callers\n\n", sCallerInfoCount); 1271 1272 size_t totalAllocationCount = 0; 1273 1274 kprintf(" count caller\n"); 1275 kprintf("----------------------------------\n"); 1276 for (int32 i = 0; i < sCallerInfoCount; i++) { 1277 caller_info& info = sCallerInfoTable[i]; 1278 kprintf("%10" B_PRIuSIZE " %p", info.count, (void*)info.caller); 1279 1280 const char* symbol; 1281 const char* imageName; 1282 bool exactMatch; 1283 addr_t baseAddress; 1284 1285 if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol, 1286 &imageName, &exactMatch) == B_OK) { 1287 kprintf(" %s + %#" B_PRIxADDR " (%s)%s\n", symbol, 1288 info.caller - baseAddress, imageName, 1289 exactMatch ? "" : " (nearest)"); 1290 } else 1291 kprintf("\n"); 1292 1293 totalAllocationCount += info.count; 1294 } 1295 1296 kprintf("\ntotal page allocations: %" B_PRIuSIZE "\n", 1297 totalAllocationCount); 1298 1299 return 0; 1300 } 1301 1302 1303 static int 1304 dump_page_allocation_infos(int argc, char** argv) 1305 { 1306 page_num_t pageFilter = 0; 1307 team_id teamFilter = -1; 1308 thread_id threadFilter = -1; 1309 bool printStackTraces = false; 1310 1311 for (int32 i = 1; i < argc; i++) { 1312 if (strcmp(argv[i], "--stacktrace") == 0) 1313 printStackTraces = true; 1314 else if (strcmp(argv[i], "-p") == 0) { 1315 uint64 pageNumber; 1316 if (++i >= argc 1317 || !evaluate_debug_expression(argv[i], &pageNumber, true)) { 1318 print_debugger_command_usage(argv[0]); 1319 return 0; 1320 } 1321 1322 pageFilter = pageNumber; 1323 } else if (strcmp(argv[i], "--team") == 0) { 1324 uint64 team; 1325 if (++i >= argc 1326 || !evaluate_debug_expression(argv[i], &team, true)) { 1327 print_debugger_command_usage(argv[0]); 1328 return 0; 1329 } 1330 1331 teamFilter = team; 1332 } else if (strcmp(argv[i], "--thread") == 0) { 1333 uint64 thread; 1334 if (++i >= argc 1335 || !evaluate_debug_expression(argv[i], &thread, true)) { 1336 print_debugger_command_usage(argv[0]); 1337 return 0; 1338 } 1339 1340 threadFilter = thread; 1341 } else { 1342 print_debugger_command_usage(argv[0]); 1343 return 0; 1344 } 1345 } 1346 1347 AllocationInfoPrinterCallback callback(printStackTraces, pageFilter, 1348 teamFilter, threadFilter); 1349 1350 for (page_num_t i = 0; i < sNumPages; i++) 1351 callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i); 1352 1353 return 0; 1354 } 1355 1356 #endif // VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 1357 1358 1359 #ifdef TRACK_PAGE_USAGE_STATS 1360 1361 static void 1362 track_page_usage(vm_page* page) 1363 { 1364 if (page->WiredCount() == 0) { 1365 sNextPageUsage[(int32)page->usage_count + 128]++; 1366 sNextPageUsagePageCount++; 1367 } 1368 } 1369 1370 1371 static void 1372 update_page_usage_stats() 1373 { 1374 std::swap(sPageUsage, sNextPageUsage); 1375 sPageUsagePageCount = sNextPageUsagePageCount; 1376 1377 memset(sNextPageUsage, 0, sizeof(page_num_t) * 256); 1378 sNextPageUsagePageCount = 0; 1379 1380 // compute average 1381 if (sPageUsagePageCount > 0) { 1382 int64 sum = 0; 1383 for (int32 i = 0; i < 256; i++) 1384 sum += (int64)sPageUsage[i] * (i - 128); 1385 1386 TRACE_DAEMON("average page usage: %f (%lu pages)\n", 1387 (float)sum / sPageUsagePageCount, sPageUsagePageCount); 1388 } 1389 } 1390 1391 1392 static int 1393 dump_page_usage_stats(int argc, char** argv) 1394 { 1395 kprintf("distribution of page usage counts (%lu pages):", 1396 sPageUsagePageCount); 1397 1398 int64 sum = 0; 1399 for (int32 i = 0; i < 256; i++) { 1400 if (i % 8 == 0) 1401 kprintf("\n%4ld:", i - 128); 1402 1403 int64 count = sPageUsage[i]; 1404 sum += count * (i - 128); 1405 1406 kprintf(" %9llu", count); 1407 } 1408 1409 kprintf("\n\n"); 1410 1411 kprintf("average usage count: %f\n", 1412 sPageUsagePageCount > 0 ? (float)sum / sPageUsagePageCount : 0); 1413 1414 return 0; 1415 } 1416 1417 #endif // TRACK_PAGE_USAGE_STATS 1418 1419 1420 // #pragma mark - vm_page 1421 1422 1423 inline void 1424 vm_page::InitState(uint8 newState) 1425 { 1426 state = newState; 1427 } 1428 1429 1430 inline void 1431 vm_page::SetState(uint8 newState) 1432 { 1433 TPS(SetPageState(this, newState)); 1434 1435 state = newState; 1436 } 1437 1438 1439 // #pragma mark - 1440 1441 1442 static void 1443 get_page_stats(page_stats& _pageStats) 1444 { 1445 _pageStats.totalFreePages = sUnreservedFreePages; 1446 _pageStats.cachedPages = sCachedPageQueue.Count(); 1447 _pageStats.unsatisfiedReservations = sUnsatisfiedPageReservations; 1448 // TODO: We don't get an actual snapshot here! 1449 } 1450 1451 1452 static bool 1453 do_active_paging(const page_stats& pageStats) 1454 { 1455 return pageStats.totalFreePages + pageStats.cachedPages 1456 < pageStats.unsatisfiedReservations 1457 + (int32)sFreeOrCachedPagesTarget; 1458 } 1459 1460 1461 /*! Reserves as many pages as possible from \c sUnreservedFreePages up to 1462 \a count. Doesn't touch the last \a dontTouch pages of 1463 \c sUnreservedFreePages, though. 1464 \return The number of actually reserved pages. 1465 */ 1466 static uint32 1467 reserve_some_pages(uint32 count, uint32 dontTouch) 1468 { 1469 while (true) { 1470 int32 freePages = atomic_get(&sUnreservedFreePages); 1471 if (freePages <= (int32)dontTouch) 1472 return 0; 1473 1474 int32 toReserve = std::min(count, freePages - dontTouch); 1475 if (atomic_test_and_set(&sUnreservedFreePages, 1476 freePages - toReserve, freePages) 1477 == freePages) { 1478 return toReserve; 1479 } 1480 1481 // the count changed in the meantime -- retry 1482 } 1483 } 1484 1485 1486 static void 1487 wake_up_page_reservation_waiters() 1488 { 1489 MutexLocker pageDeficitLocker(sPageDeficitLock); 1490 1491 // TODO: If this is a low priority thread, we might want to disable 1492 // interrupts or otherwise ensure that we aren't unscheduled. Otherwise 1493 // high priority threads wait be kept waiting while a medium priority thread 1494 // prevents us from running. 1495 1496 while (PageReservationWaiter* waiter = sPageReservationWaiters.Head()) { 1497 int32 reserved = reserve_some_pages(waiter->missing, 1498 waiter->dontTouch); 1499 if (reserved == 0) 1500 return; 1501 1502 atomic_add(&sUnsatisfiedPageReservations, -reserved); 1503 waiter->missing -= reserved; 1504 1505 if (waiter->missing > 0) 1506 return; 1507 1508 sPageReservationWaiters.Remove(waiter); 1509 1510 thread_unblock(waiter->thread, B_OK); 1511 } 1512 } 1513 1514 1515 static inline void 1516 unreserve_pages(uint32 count) 1517 { 1518 atomic_add(&sUnreservedFreePages, count); 1519 if (atomic_get(&sUnsatisfiedPageReservations) != 0) 1520 wake_up_page_reservation_waiters(); 1521 } 1522 1523 1524 static void 1525 free_page(vm_page* page, bool clear) 1526 { 1527 DEBUG_PAGE_ACCESS_CHECK(page); 1528 1529 PAGE_ASSERT(page, !page->IsMapped()); 1530 1531 VMPageQueue* fromQueue; 1532 1533 switch (page->State()) { 1534 case PAGE_STATE_ACTIVE: 1535 fromQueue = &sActivePageQueue; 1536 break; 1537 case PAGE_STATE_INACTIVE: 1538 fromQueue = &sInactivePageQueue; 1539 break; 1540 case PAGE_STATE_MODIFIED: 1541 fromQueue = &sModifiedPageQueue; 1542 break; 1543 case PAGE_STATE_CACHED: 1544 fromQueue = &sCachedPageQueue; 1545 break; 1546 case PAGE_STATE_FREE: 1547 case PAGE_STATE_CLEAR: 1548 panic("free_page(): page %p already free", page); 1549 return; 1550 case PAGE_STATE_WIRED: 1551 case PAGE_STATE_UNUSED: 1552 fromQueue = NULL; 1553 break; 1554 default: 1555 panic("free_page(): page %p in invalid state %d", 1556 page, page->State()); 1557 return; 1558 } 1559 1560 if (page->CacheRef() != NULL) 1561 panic("to be freed page %p has cache", page); 1562 if (page->IsMapped()) 1563 panic("to be freed page %p has mappings", page); 1564 1565 if (fromQueue != NULL) 1566 fromQueue->RemoveUnlocked(page); 1567 1568 TA(FreePage(page->physical_page_number)); 1569 1570 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 1571 page->allocation_tracking_info.Clear(); 1572 #endif 1573 1574 ReadLocker locker(sFreePageQueuesLock); 1575 1576 DEBUG_PAGE_ACCESS_END(page); 1577 1578 if (clear) { 1579 page->SetState(PAGE_STATE_CLEAR); 1580 sClearPageQueue.PrependUnlocked(page); 1581 } else { 1582 page->SetState(PAGE_STATE_FREE); 1583 sFreePageQueue.PrependUnlocked(page); 1584 sFreePageCondition.NotifyAll(); 1585 } 1586 1587 locker.Unlock(); 1588 } 1589 1590 1591 /*! The caller must make sure that no-one else tries to change the page's state 1592 while the function is called. If the page has a cache, this can be done by 1593 locking the cache. 1594 */ 1595 static void 1596 set_page_state(vm_page *page, int pageState) 1597 { 1598 DEBUG_PAGE_ACCESS_CHECK(page); 1599 1600 if (pageState == page->State()) 1601 return; 1602 1603 VMPageQueue* fromQueue; 1604 1605 switch (page->State()) { 1606 case PAGE_STATE_ACTIVE: 1607 fromQueue = &sActivePageQueue; 1608 break; 1609 case PAGE_STATE_INACTIVE: 1610 fromQueue = &sInactivePageQueue; 1611 break; 1612 case PAGE_STATE_MODIFIED: 1613 fromQueue = &sModifiedPageQueue; 1614 break; 1615 case PAGE_STATE_CACHED: 1616 fromQueue = &sCachedPageQueue; 1617 break; 1618 case PAGE_STATE_FREE: 1619 case PAGE_STATE_CLEAR: 1620 panic("set_page_state(): page %p is free/clear", page); 1621 return; 1622 case PAGE_STATE_WIRED: 1623 case PAGE_STATE_UNUSED: 1624 fromQueue = NULL; 1625 break; 1626 default: 1627 panic("set_page_state(): page %p in invalid state %d", 1628 page, page->State()); 1629 return; 1630 } 1631 1632 VMPageQueue* toQueue; 1633 1634 switch (pageState) { 1635 case PAGE_STATE_ACTIVE: 1636 toQueue = &sActivePageQueue; 1637 break; 1638 case PAGE_STATE_INACTIVE: 1639 toQueue = &sInactivePageQueue; 1640 break; 1641 case PAGE_STATE_MODIFIED: 1642 toQueue = &sModifiedPageQueue; 1643 break; 1644 case PAGE_STATE_CACHED: 1645 PAGE_ASSERT(page, !page->IsMapped()); 1646 PAGE_ASSERT(page, !page->modified); 1647 toQueue = &sCachedPageQueue; 1648 break; 1649 case PAGE_STATE_FREE: 1650 case PAGE_STATE_CLEAR: 1651 panic("set_page_state(): target state is free/clear"); 1652 return; 1653 case PAGE_STATE_WIRED: 1654 case PAGE_STATE_UNUSED: 1655 toQueue = NULL; 1656 break; 1657 default: 1658 panic("set_page_state(): invalid target state %d", pageState); 1659 return; 1660 } 1661 1662 VMCache* cache = page->Cache(); 1663 if (cache != NULL && cache->temporary) { 1664 if (pageState == PAGE_STATE_MODIFIED) 1665 atomic_add(&sModifiedTemporaryPages, 1); 1666 else if (page->State() == PAGE_STATE_MODIFIED) 1667 atomic_add(&sModifiedTemporaryPages, -1); 1668 } 1669 1670 // move the page 1671 if (toQueue == fromQueue) { 1672 // Note: Theoretically we are required to lock when changing the page 1673 // state, even if we don't change the queue. We actually don't have to 1674 // do this, though, since only for the active queue there are different 1675 // page states and active pages have a cache that must be locked at 1676 // this point. So we rely on the fact that everyone must lock the cache 1677 // before trying to change/interpret the page state. 1678 PAGE_ASSERT(page, cache != NULL); 1679 cache->AssertLocked(); 1680 page->SetState(pageState); 1681 } else { 1682 if (fromQueue != NULL) 1683 fromQueue->RemoveUnlocked(page); 1684 1685 page->SetState(pageState); 1686 1687 if (toQueue != NULL) 1688 toQueue->AppendUnlocked(page); 1689 } 1690 } 1691 1692 1693 /*! Moves a previously modified page into a now appropriate queue. 1694 The page queues must not be locked. 1695 */ 1696 static void 1697 move_page_to_appropriate_queue(vm_page *page) 1698 { 1699 DEBUG_PAGE_ACCESS_CHECK(page); 1700 1701 // Note, this logic must be in sync with what the page daemon does. 1702 int32 state; 1703 if (page->IsMapped()) 1704 state = PAGE_STATE_ACTIVE; 1705 else if (page->modified) 1706 state = PAGE_STATE_MODIFIED; 1707 else 1708 state = PAGE_STATE_CACHED; 1709 1710 // TODO: If free + cached pages are low, we might directly want to free the 1711 // page. 1712 set_page_state(page, state); 1713 } 1714 1715 1716 static void 1717 clear_page(struct vm_page *page) 1718 { 1719 vm_memset_physical(page->physical_page_number << PAGE_SHIFT, 0, 1720 B_PAGE_SIZE); 1721 } 1722 1723 1724 static status_t 1725 mark_page_range_in_use(page_num_t startPage, page_num_t length, bool wired) 1726 { 1727 TRACE(("mark_page_range_in_use: start %#" B_PRIxPHYSADDR ", len %#" 1728 B_PRIxPHYSADDR "\n", startPage, length)); 1729 1730 if (sPhysicalPageOffset > startPage) { 1731 dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR 1732 "): start page is before free list\n", startPage, length); 1733 if (sPhysicalPageOffset - startPage >= length) 1734 return B_OK; 1735 length -= sPhysicalPageOffset - startPage; 1736 startPage = sPhysicalPageOffset; 1737 } 1738 1739 startPage -= sPhysicalPageOffset; 1740 1741 if (startPage + length > sNumPages) { 1742 dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR 1743 "): range would extend past free list\n", startPage, length); 1744 if (startPage >= sNumPages) 1745 return B_OK; 1746 length = sNumPages - startPage; 1747 } 1748 1749 WriteLocker locker(sFreePageQueuesLock); 1750 1751 for (page_num_t i = 0; i < length; i++) { 1752 vm_page *page = &sPages[startPage + i]; 1753 switch (page->State()) { 1754 case PAGE_STATE_FREE: 1755 case PAGE_STATE_CLEAR: 1756 { 1757 // TODO: This violates the page reservation policy, since we remove pages from 1758 // the free/clear queues without having reserved them before. This should happen 1759 // in the early boot process only, though. 1760 DEBUG_PAGE_ACCESS_START(page); 1761 VMPageQueue& queue = page->State() == PAGE_STATE_FREE 1762 ? sFreePageQueue : sClearPageQueue; 1763 queue.Remove(page); 1764 page->SetState(wired ? PAGE_STATE_WIRED : PAGE_STATE_UNUSED); 1765 page->busy = false; 1766 atomic_add(&sUnreservedFreePages, -1); 1767 DEBUG_PAGE_ACCESS_END(page); 1768 break; 1769 } 1770 case PAGE_STATE_WIRED: 1771 case PAGE_STATE_UNUSED: 1772 break; 1773 case PAGE_STATE_ACTIVE: 1774 case PAGE_STATE_INACTIVE: 1775 case PAGE_STATE_MODIFIED: 1776 case PAGE_STATE_CACHED: 1777 default: 1778 // uh 1779 dprintf("mark_page_range_in_use: page %#" B_PRIxPHYSADDR 1780 " in non-free state %d!\n", startPage + i, page->State()); 1781 break; 1782 } 1783 } 1784 1785 return B_OK; 1786 } 1787 1788 1789 /*! 1790 This is a background thread that wakes up when its condition is notified 1791 and moves some pages from the free queue over to the clear queue. 1792 Given enough time, it will clear out all pages from the free queue - we 1793 could probably slow it down after having reached a certain threshold. 1794 */ 1795 static int32 1796 page_scrubber(void *unused) 1797 { 1798 (void)(unused); 1799 1800 TRACE(("page_scrubber starting...\n")); 1801 1802 ConditionVariableEntry entry; 1803 for (;;) { 1804 while (sFreePageQueue.Count() == 0 1805 || atomic_get(&sUnreservedFreePages) 1806 < (int32)sFreePagesTarget) { 1807 sFreePageCondition.Add(&entry); 1808 entry.Wait(); 1809 } 1810 1811 // Since we temporarily remove pages from the free pages reserve, 1812 // we must make sure we don't cause a violation of the page 1813 // reservation warranty. The following is usually stricter than 1814 // necessary, because we don't have information on how many of the 1815 // reserved pages have already been allocated. 1816 int32 reserved = reserve_some_pages(SCRUB_SIZE, 1817 kPageReserveForPriority[VM_PRIORITY_USER]); 1818 if (reserved == 0) 1819 continue; 1820 1821 // get some pages from the free queue, mostly sorted 1822 ReadLocker locker(sFreePageQueuesLock); 1823 1824 vm_page *page[SCRUB_SIZE]; 1825 int32 scrubCount = 0; 1826 for (int32 i = 0; i < reserved; i++) { 1827 page[i] = sFreePageQueue.RemoveHeadUnlocked(); 1828 if (page[i] == NULL) 1829 break; 1830 1831 DEBUG_PAGE_ACCESS_START(page[i]); 1832 1833 page[i]->SetState(PAGE_STATE_ACTIVE); 1834 page[i]->busy = true; 1835 scrubCount++; 1836 } 1837 1838 locker.Unlock(); 1839 1840 if (scrubCount == 0) { 1841 unreserve_pages(reserved); 1842 continue; 1843 } 1844 1845 TA(ScrubbingPages(scrubCount)); 1846 1847 // clear them 1848 for (int32 i = 0; i < scrubCount; i++) 1849 clear_page(page[i]); 1850 1851 locker.Lock(); 1852 1853 // and put them into the clear queue 1854 // process the array reversed when prepending to preserve sequential order 1855 for (int32 i = scrubCount - 1; i >= 0; i--) { 1856 page[i]->SetState(PAGE_STATE_CLEAR); 1857 page[i]->busy = false; 1858 DEBUG_PAGE_ACCESS_END(page[i]); 1859 sClearPageQueue.PrependUnlocked(page[i]); 1860 } 1861 1862 locker.Unlock(); 1863 1864 unreserve_pages(reserved); 1865 1866 TA(ScrubbedPages(scrubCount)); 1867 1868 // wait at least 100ms between runs 1869 snooze(100 * 1000); 1870 } 1871 1872 return 0; 1873 } 1874 1875 1876 static void 1877 init_page_marker(vm_page &marker) 1878 { 1879 marker.SetCacheRef(NULL); 1880 marker.InitState(PAGE_STATE_UNUSED); 1881 marker.busy = true; 1882 #if DEBUG_PAGE_QUEUE 1883 marker.queue = NULL; 1884 #endif 1885 #if DEBUG_PAGE_ACCESS 1886 marker.accessing_thread = thread_get_current_thread_id(); 1887 #endif 1888 } 1889 1890 1891 static void 1892 remove_page_marker(struct vm_page &marker) 1893 { 1894 DEBUG_PAGE_ACCESS_CHECK(&marker); 1895 1896 if (marker.State() < PAGE_STATE_FIRST_UNQUEUED) 1897 sPageQueues[marker.State()].RemoveUnlocked(&marker); 1898 1899 marker.SetState(PAGE_STATE_UNUSED); 1900 } 1901 1902 1903 static vm_page* 1904 next_modified_page(page_num_t& maxPagesToSee) 1905 { 1906 InterruptsSpinLocker locker(sModifiedPageQueue.GetLock()); 1907 1908 while (maxPagesToSee > 0) { 1909 vm_page* page = sModifiedPageQueue.Head(); 1910 if (page == NULL) 1911 return NULL; 1912 1913 sModifiedPageQueue.Requeue(page, true); 1914 1915 maxPagesToSee--; 1916 1917 if (!page->busy) 1918 return page; 1919 } 1920 1921 return NULL; 1922 } 1923 1924 1925 // #pragma mark - 1926 1927 1928 class PageWriteTransfer; 1929 class PageWriteWrapper; 1930 1931 1932 class PageWriterRun { 1933 public: 1934 status_t Init(uint32 maxPages); 1935 1936 void PrepareNextRun(); 1937 void AddPage(vm_page* page); 1938 uint32 Go(); 1939 1940 void PageWritten(PageWriteTransfer* transfer, status_t status, 1941 bool partialTransfer, size_t bytesTransferred); 1942 1943 private: 1944 uint32 fMaxPages; 1945 uint32 fWrapperCount; 1946 uint32 fTransferCount; 1947 int32 fPendingTransfers; 1948 PageWriteWrapper* fWrappers; 1949 PageWriteTransfer* fTransfers; 1950 ConditionVariable fAllFinishedCondition; 1951 }; 1952 1953 1954 class PageWriteTransfer : public AsyncIOCallback { 1955 public: 1956 void SetTo(PageWriterRun* run, vm_page* page, int32 maxPages); 1957 bool AddPage(vm_page* page); 1958 1959 status_t Schedule(uint32 flags); 1960 1961 void SetStatus(status_t status, size_t transferred); 1962 1963 status_t Status() const { return fStatus; } 1964 struct VMCache* Cache() const { return fCache; } 1965 uint32 PageCount() const { return fPageCount; } 1966 1967 virtual void IOFinished(status_t status, bool partialTransfer, 1968 generic_size_t bytesTransferred); 1969 private: 1970 PageWriterRun* fRun; 1971 struct VMCache* fCache; 1972 off_t fOffset; 1973 uint32 fPageCount; 1974 int32 fMaxPages; 1975 status_t fStatus; 1976 uint32 fVecCount; 1977 generic_io_vec fVecs[32]; // TODO: make dynamic/configurable 1978 }; 1979 1980 1981 class PageWriteWrapper { 1982 public: 1983 PageWriteWrapper(); 1984 ~PageWriteWrapper(); 1985 void SetTo(vm_page* page); 1986 bool Done(status_t result); 1987 1988 private: 1989 vm_page* fPage; 1990 struct VMCache* fCache; 1991 bool fIsActive; 1992 }; 1993 1994 1995 PageWriteWrapper::PageWriteWrapper() 1996 : 1997 fIsActive(false) 1998 { 1999 } 2000 2001 2002 PageWriteWrapper::~PageWriteWrapper() 2003 { 2004 if (fIsActive) 2005 panic("page write wrapper going out of scope but isn't completed"); 2006 } 2007 2008 2009 /*! The page's cache must be locked. 2010 */ 2011 void 2012 PageWriteWrapper::SetTo(vm_page* page) 2013 { 2014 DEBUG_PAGE_ACCESS_CHECK(page); 2015 2016 if (page->busy) 2017 panic("setting page write wrapper to busy page"); 2018 2019 if (fIsActive) 2020 panic("re-setting page write wrapper that isn't completed"); 2021 2022 fPage = page; 2023 fCache = page->Cache(); 2024 fIsActive = true; 2025 2026 fPage->busy = true; 2027 fPage->busy_writing = true; 2028 2029 // We have a modified page -- however, while we're writing it back, 2030 // the page might still be mapped. In order not to lose any changes to the 2031 // page, we mark it clean before actually writing it back; if 2032 // writing the page fails for some reason, we'll just keep it in the 2033 // modified page list, but that should happen only rarely. 2034 2035 // If the page is changed after we cleared the dirty flag, but before we 2036 // had the chance to write it back, then we'll write it again later -- that 2037 // will probably not happen that often, though. 2038 2039 vm_clear_map_flags(fPage, PAGE_MODIFIED); 2040 } 2041 2042 2043 /*! The page's cache must be locked. 2044 The page queues must not be locked. 2045 \return \c true if the page was written successfully respectively could be 2046 handled somehow, \c false otherwise. 2047 */ 2048 bool 2049 PageWriteWrapper::Done(status_t result) 2050 { 2051 if (!fIsActive) 2052 panic("completing page write wrapper that is not active"); 2053 2054 DEBUG_PAGE_ACCESS_START(fPage); 2055 2056 fPage->busy = false; 2057 // Set unbusy and notify later by hand, since we might free the page. 2058 2059 bool success = true; 2060 2061 if (result == B_OK) { 2062 // put it into the active/inactive queue 2063 move_page_to_appropriate_queue(fPage); 2064 fPage->busy_writing = false; 2065 DEBUG_PAGE_ACCESS_END(fPage); 2066 } else { 2067 // Writing the page failed. One reason would be that the cache has been 2068 // shrunk and the page does no longer belong to the file. Otherwise the 2069 // actual I/O failed, in which case we'll simply keep the page modified. 2070 2071 if (!fPage->busy_writing) { 2072 // The busy_writing flag was cleared. That means the cache has been 2073 // shrunk while we were trying to write the page and we have to free 2074 // it now. 2075 vm_remove_all_page_mappings(fPage); 2076 // TODO: Unmapping should already happen when resizing the cache! 2077 fCache->RemovePage(fPage); 2078 free_page(fPage, false); 2079 unreserve_pages(1); 2080 } else { 2081 // Writing the page failed -- mark the page modified and move it to 2082 // an appropriate queue other than the modified queue, so we don't 2083 // keep trying to write it over and over again. We keep 2084 // non-temporary pages in the modified queue, though, so they don't 2085 // get lost in the inactive queue. 2086 dprintf("PageWriteWrapper: Failed to write page %p: %s\n", fPage, 2087 strerror(result)); 2088 2089 fPage->modified = true; 2090 if (!fCache->temporary) 2091 set_page_state(fPage, PAGE_STATE_MODIFIED); 2092 else if (fPage->IsMapped()) 2093 set_page_state(fPage, PAGE_STATE_ACTIVE); 2094 else 2095 set_page_state(fPage, PAGE_STATE_INACTIVE); 2096 2097 fPage->busy_writing = false; 2098 DEBUG_PAGE_ACCESS_END(fPage); 2099 2100 success = false; 2101 } 2102 } 2103 2104 fCache->NotifyPageEvents(fPage, PAGE_EVENT_NOT_BUSY); 2105 fIsActive = false; 2106 2107 return success; 2108 } 2109 2110 2111 /*! The page's cache must be locked. 2112 */ 2113 void 2114 PageWriteTransfer::SetTo(PageWriterRun* run, vm_page* page, int32 maxPages) 2115 { 2116 fRun = run; 2117 fCache = page->Cache(); 2118 fOffset = page->cache_offset; 2119 fPageCount = 1; 2120 fMaxPages = maxPages; 2121 fStatus = B_OK; 2122 2123 fVecs[0].base = (phys_addr_t)page->physical_page_number << PAGE_SHIFT; 2124 fVecs[0].length = B_PAGE_SIZE; 2125 fVecCount = 1; 2126 } 2127 2128 2129 /*! The page's cache must be locked. 2130 */ 2131 bool 2132 PageWriteTransfer::AddPage(vm_page* page) 2133 { 2134 if (page->Cache() != fCache 2135 || (fMaxPages >= 0 && fPageCount >= (uint32)fMaxPages)) 2136 return false; 2137 2138 phys_addr_t nextBase = fVecs[fVecCount - 1].base 2139 + fVecs[fVecCount - 1].length; 2140 2141 if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase 2142 && (off_t)page->cache_offset == fOffset + fPageCount) { 2143 // append to last iovec 2144 fVecs[fVecCount - 1].length += B_PAGE_SIZE; 2145 fPageCount++; 2146 return true; 2147 } 2148 2149 nextBase = fVecs[0].base - B_PAGE_SIZE; 2150 if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase 2151 && (off_t)page->cache_offset == fOffset - 1) { 2152 // prepend to first iovec and adjust offset 2153 fVecs[0].base = nextBase; 2154 fVecs[0].length += B_PAGE_SIZE; 2155 fOffset = page->cache_offset; 2156 fPageCount++; 2157 return true; 2158 } 2159 2160 if (((off_t)page->cache_offset == fOffset + fPageCount 2161 || (off_t)page->cache_offset == fOffset - 1) 2162 && fVecCount < sizeof(fVecs) / sizeof(fVecs[0])) { 2163 // not physically contiguous or not in the right order 2164 uint32 vectorIndex; 2165 if ((off_t)page->cache_offset < fOffset) { 2166 // we are pre-pending another vector, move the other vecs 2167 for (uint32 i = fVecCount; i > 0; i--) 2168 fVecs[i] = fVecs[i - 1]; 2169 2170 fOffset = page->cache_offset; 2171 vectorIndex = 0; 2172 } else 2173 vectorIndex = fVecCount; 2174 2175 fVecs[vectorIndex].base 2176 = (phys_addr_t)page->physical_page_number << PAGE_SHIFT; 2177 fVecs[vectorIndex].length = B_PAGE_SIZE; 2178 2179 fVecCount++; 2180 fPageCount++; 2181 return true; 2182 } 2183 2184 return false; 2185 } 2186 2187 2188 status_t 2189 PageWriteTransfer::Schedule(uint32 flags) 2190 { 2191 off_t writeOffset = (off_t)fOffset << PAGE_SHIFT; 2192 generic_size_t writeLength = (phys_size_t)fPageCount << PAGE_SHIFT; 2193 2194 if (fRun != NULL) { 2195 return fCache->WriteAsync(writeOffset, fVecs, fVecCount, writeLength, 2196 flags | B_PHYSICAL_IO_REQUEST, this); 2197 } 2198 2199 status_t status = fCache->Write(writeOffset, fVecs, fVecCount, 2200 flags | B_PHYSICAL_IO_REQUEST, &writeLength); 2201 2202 SetStatus(status, writeLength); 2203 return fStatus; 2204 } 2205 2206 2207 void 2208 PageWriteTransfer::SetStatus(status_t status, size_t transferred) 2209 { 2210 // only succeed if all pages up to the last one have been written fully 2211 // and the last page has at least been written partially 2212 if (status == B_OK && transferred <= (fPageCount - 1) * B_PAGE_SIZE) 2213 status = B_ERROR; 2214 2215 fStatus = status; 2216 } 2217 2218 2219 void 2220 PageWriteTransfer::IOFinished(status_t status, bool partialTransfer, 2221 generic_size_t bytesTransferred) 2222 { 2223 SetStatus(status, bytesTransferred); 2224 fRun->PageWritten(this, fStatus, partialTransfer, bytesTransferred); 2225 } 2226 2227 2228 status_t 2229 PageWriterRun::Init(uint32 maxPages) 2230 { 2231 fMaxPages = maxPages; 2232 fWrapperCount = 0; 2233 fTransferCount = 0; 2234 fPendingTransfers = 0; 2235 2236 fWrappers = new(std::nothrow) PageWriteWrapper[maxPages]; 2237 fTransfers = new(std::nothrow) PageWriteTransfer[maxPages]; 2238 if (fWrappers == NULL || fTransfers == NULL) 2239 return B_NO_MEMORY; 2240 2241 return B_OK; 2242 } 2243 2244 2245 void 2246 PageWriterRun::PrepareNextRun() 2247 { 2248 fWrapperCount = 0; 2249 fTransferCount = 0; 2250 fPendingTransfers = 0; 2251 } 2252 2253 2254 /*! The page's cache must be locked. 2255 */ 2256 void 2257 PageWriterRun::AddPage(vm_page* page) 2258 { 2259 fWrappers[fWrapperCount++].SetTo(page); 2260 2261 if (fTransferCount == 0 || !fTransfers[fTransferCount - 1].AddPage(page)) { 2262 fTransfers[fTransferCount++].SetTo(this, page, 2263 page->Cache()->MaxPagesPerAsyncWrite()); 2264 } 2265 } 2266 2267 2268 /*! Writes all pages previously added. 2269 \return The number of pages that could not be written or otherwise handled. 2270 */ 2271 uint32 2272 PageWriterRun::Go() 2273 { 2274 atomic_set(&fPendingTransfers, fTransferCount); 2275 2276 fAllFinishedCondition.Init(this, "page writer wait for I/O"); 2277 ConditionVariableEntry waitEntry; 2278 fAllFinishedCondition.Add(&waitEntry); 2279 2280 // schedule writes 2281 for (uint32 i = 0; i < fTransferCount; i++) 2282 fTransfers[i].Schedule(B_VIP_IO_REQUEST); 2283 2284 // wait until all pages have been written 2285 waitEntry.Wait(); 2286 2287 // mark pages depending on whether they could be written or not 2288 2289 uint32 failedPages = 0; 2290 uint32 wrapperIndex = 0; 2291 for (uint32 i = 0; i < fTransferCount; i++) { 2292 PageWriteTransfer& transfer = fTransfers[i]; 2293 transfer.Cache()->Lock(); 2294 2295 for (uint32 j = 0; j < transfer.PageCount(); j++) { 2296 if (!fWrappers[wrapperIndex++].Done(transfer.Status())) 2297 failedPages++; 2298 } 2299 2300 transfer.Cache()->Unlock(); 2301 } 2302 2303 ASSERT(wrapperIndex == fWrapperCount); 2304 2305 for (uint32 i = 0; i < fTransferCount; i++) { 2306 PageWriteTransfer& transfer = fTransfers[i]; 2307 struct VMCache* cache = transfer.Cache(); 2308 2309 // We've acquired a references for each page 2310 for (uint32 j = 0; j < transfer.PageCount(); j++) { 2311 // We release the cache references after all pages were made 2312 // unbusy again - otherwise releasing a vnode could deadlock. 2313 cache->ReleaseStoreRef(); 2314 cache->ReleaseRef(); 2315 } 2316 } 2317 2318 return failedPages; 2319 } 2320 2321 2322 void 2323 PageWriterRun::PageWritten(PageWriteTransfer* transfer, status_t status, 2324 bool partialTransfer, size_t bytesTransferred) 2325 { 2326 if (atomic_add(&fPendingTransfers, -1) == 1) 2327 fAllFinishedCondition.NotifyAll(); 2328 } 2329 2330 2331 /*! The page writer continuously takes some pages from the modified 2332 queue, writes them back, and moves them back to the active queue. 2333 It runs in its own thread, and is only there to keep the number 2334 of modified pages low, so that more pages can be reused with 2335 fewer costs. 2336 */ 2337 status_t 2338 page_writer(void* /*unused*/) 2339 { 2340 const uint32 kNumPages = 256; 2341 #ifdef TRACE_VM_PAGE 2342 uint32 writtenPages = 0; 2343 bigtime_t lastWrittenTime = 0; 2344 bigtime_t pageCollectionTime = 0; 2345 bigtime_t pageWritingTime = 0; 2346 #endif 2347 2348 PageWriterRun run; 2349 if (run.Init(kNumPages) != B_OK) { 2350 panic("page writer: Failed to init PageWriterRun!"); 2351 return B_ERROR; 2352 } 2353 2354 page_num_t pagesSinceLastSuccessfulWrite = 0; 2355 2356 while (true) { 2357 // TODO: Maybe wait shorter when memory is low! 2358 if (sModifiedPageQueue.Count() < kNumPages) { 2359 sPageWriterCondition.Wait(3000000, true); 2360 // all 3 seconds when no one triggers us 2361 } 2362 2363 page_num_t modifiedPages = sModifiedPageQueue.Count(); 2364 if (modifiedPages == 0) 2365 continue; 2366 2367 if (modifiedPages <= pagesSinceLastSuccessfulWrite) { 2368 // We ran through the whole queue without being able to write a 2369 // single page. Take a break. 2370 snooze(500000); 2371 pagesSinceLastSuccessfulWrite = 0; 2372 } 2373 2374 #if ENABLE_SWAP_SUPPORT 2375 page_stats pageStats; 2376 get_page_stats(pageStats); 2377 bool activePaging = do_active_paging(pageStats); 2378 #endif 2379 2380 // depending on how urgent it becomes to get pages to disk, we adjust 2381 // our I/O priority 2382 uint32 lowPagesState = low_resource_state(B_KERNEL_RESOURCE_PAGES); 2383 int32 ioPriority = B_IDLE_PRIORITY; 2384 if (lowPagesState >= B_LOW_RESOURCE_CRITICAL 2385 || modifiedPages > MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD) { 2386 ioPriority = MAX_PAGE_WRITER_IO_PRIORITY; 2387 } else { 2388 ioPriority = (uint64)MAX_PAGE_WRITER_IO_PRIORITY * modifiedPages 2389 / MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD; 2390 } 2391 2392 thread_set_io_priority(ioPriority); 2393 2394 uint32 numPages = 0; 2395 run.PrepareNextRun(); 2396 2397 // TODO: make this laptop friendly, too (ie. only start doing 2398 // something if someone else did something or there is really 2399 // enough to do). 2400 2401 // collect pages to be written 2402 #ifdef TRACE_VM_PAGE 2403 pageCollectionTime -= system_time(); 2404 #endif 2405 2406 page_num_t maxPagesToSee = modifiedPages; 2407 2408 while (numPages < kNumPages && maxPagesToSee > 0) { 2409 vm_page *page = next_modified_page(maxPagesToSee); 2410 if (page == NULL) 2411 break; 2412 2413 PageCacheLocker cacheLocker(page, false); 2414 if (!cacheLocker.IsLocked()) 2415 continue; 2416 2417 VMCache *cache = page->Cache(); 2418 2419 // If the page is busy or its state has changed while we were 2420 // locking the cache, just ignore it. 2421 if (page->busy || page->State() != PAGE_STATE_MODIFIED) 2422 continue; 2423 2424 DEBUG_PAGE_ACCESS_START(page); 2425 2426 // Don't write back wired (locked) pages. 2427 if (page->WiredCount() > 0) { 2428 set_page_state(page, PAGE_STATE_ACTIVE); 2429 DEBUG_PAGE_ACCESS_END(page); 2430 continue; 2431 } 2432 2433 // Write back temporary pages only when we're actively paging. 2434 if (cache->temporary 2435 #if ENABLE_SWAP_SUPPORT 2436 && (!activePaging 2437 || !cache->CanWritePage( 2438 (off_t)page->cache_offset << PAGE_SHIFT)) 2439 #endif 2440 ) { 2441 // We can't/don't want to do anything with this page, so move it 2442 // to one of the other queues. 2443 if (page->mappings.IsEmpty()) 2444 set_page_state(page, PAGE_STATE_INACTIVE); 2445 else 2446 set_page_state(page, PAGE_STATE_ACTIVE); 2447 2448 DEBUG_PAGE_ACCESS_END(page); 2449 continue; 2450 } 2451 2452 // We need our own reference to the store, as it might currently be 2453 // destroyed. 2454 if (cache->AcquireUnreferencedStoreRef() != B_OK) { 2455 DEBUG_PAGE_ACCESS_END(page); 2456 cacheLocker.Unlock(); 2457 thread_yield(); 2458 continue; 2459 } 2460 2461 run.AddPage(page); 2462 // TODO: We're possibly adding pages of different caches and 2463 // thus maybe of different underlying file systems here. This 2464 // is a potential problem for loop file systems/devices, since 2465 // we could mark a page busy that would need to be accessed 2466 // when writing back another page, thus causing a deadlock. 2467 2468 DEBUG_PAGE_ACCESS_END(page); 2469 2470 //dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count); 2471 TPW(WritePage(page)); 2472 2473 cache->AcquireRefLocked(); 2474 numPages++; 2475 } 2476 2477 #ifdef TRACE_VM_PAGE 2478 pageCollectionTime += system_time(); 2479 #endif 2480 if (numPages == 0) 2481 continue; 2482 2483 // write pages to disk and do all the cleanup 2484 #ifdef TRACE_VM_PAGE 2485 pageWritingTime -= system_time(); 2486 #endif 2487 uint32 failedPages = run.Go(); 2488 #ifdef TRACE_VM_PAGE 2489 pageWritingTime += system_time(); 2490 2491 // debug output only... 2492 writtenPages += numPages; 2493 if (writtenPages >= 1024) { 2494 bigtime_t now = system_time(); 2495 TRACE(("page writer: wrote 1024 pages (total: %" B_PRIu64 " ms, " 2496 "collect: %" B_PRIu64 " ms, write: %" B_PRIu64 " ms)\n", 2497 (now - lastWrittenTime) / 1000, 2498 pageCollectionTime / 1000, pageWritingTime / 1000)); 2499 lastWrittenTime = now; 2500 2501 writtenPages -= 1024; 2502 pageCollectionTime = 0; 2503 pageWritingTime = 0; 2504 } 2505 #endif 2506 2507 if (failedPages == numPages) 2508 pagesSinceLastSuccessfulWrite += modifiedPages - maxPagesToSee; 2509 else 2510 pagesSinceLastSuccessfulWrite = 0; 2511 } 2512 2513 return B_OK; 2514 } 2515 2516 2517 // #pragma mark - 2518 2519 2520 // TODO: This should be done in the page daemon! 2521 #if 0 2522 #if ENABLE_SWAP_SUPPORT 2523 static bool 2524 free_page_swap_space(int32 index) 2525 { 2526 vm_page *page = vm_page_at_index(index); 2527 PageCacheLocker locker(page); 2528 if (!locker.IsLocked()) 2529 return false; 2530 2531 DEBUG_PAGE_ACCESS_START(page); 2532 2533 VMCache* cache = page->Cache(); 2534 if (cache->temporary && page->WiredCount() == 0 2535 && cache->HasPage(page->cache_offset << PAGE_SHIFT) 2536 && page->usage_count > 0) { 2537 // TODO: how to judge a page is highly active? 2538 if (swap_free_page_swap_space(page)) { 2539 // We need to mark the page modified, since otherwise it could be 2540 // stolen and we'd lose its data. 2541 vm_page_set_state(page, PAGE_STATE_MODIFIED); 2542 TD(FreedPageSwap(page)); 2543 DEBUG_PAGE_ACCESS_END(page); 2544 return true; 2545 } 2546 } 2547 DEBUG_PAGE_ACCESS_END(page); 2548 return false; 2549 } 2550 #endif 2551 #endif // 0 2552 2553 2554 static vm_page * 2555 find_cached_page_candidate(struct vm_page &marker) 2556 { 2557 DEBUG_PAGE_ACCESS_CHECK(&marker); 2558 2559 InterruptsSpinLocker locker(sCachedPageQueue.GetLock()); 2560 vm_page *page; 2561 2562 if (marker.State() == PAGE_STATE_UNUSED) { 2563 // Get the first free pages of the (in)active queue 2564 page = sCachedPageQueue.Head(); 2565 } else { 2566 // Get the next page of the current queue 2567 if (marker.State() != PAGE_STATE_CACHED) { 2568 panic("invalid marker %p state", &marker); 2569 return NULL; 2570 } 2571 2572 page = sCachedPageQueue.Next(&marker); 2573 sCachedPageQueue.Remove(&marker); 2574 marker.SetState(PAGE_STATE_UNUSED); 2575 } 2576 2577 while (page != NULL) { 2578 if (!page->busy) { 2579 // we found a candidate, insert marker 2580 marker.SetState(PAGE_STATE_CACHED); 2581 sCachedPageQueue.InsertAfter(page, &marker); 2582 return page; 2583 } 2584 2585 page = sCachedPageQueue.Next(page); 2586 } 2587 2588 return NULL; 2589 } 2590 2591 2592 static bool 2593 free_cached_page(vm_page *page, bool dontWait) 2594 { 2595 // try to lock the page's cache 2596 if (vm_cache_acquire_locked_page_cache(page, dontWait) == NULL) 2597 return false; 2598 VMCache* cache = page->Cache(); 2599 2600 AutoLocker<VMCache> cacheLocker(cache, true); 2601 MethodDeleter<VMCache, void, &VMCache::ReleaseRefLocked> _2(cache); 2602 2603 // check again if that page is still a candidate 2604 if (page->busy || page->State() != PAGE_STATE_CACHED) 2605 return false; 2606 2607 DEBUG_PAGE_ACCESS_START(page); 2608 2609 PAGE_ASSERT(page, !page->IsMapped()); 2610 PAGE_ASSERT(page, !page->modified); 2611 2612 // we can now steal this page 2613 2614 cache->RemovePage(page); 2615 // Now the page doesn't have cache anymore, so no one else (e.g. 2616 // vm_page_allocate_page_run() can pick it up), since they would be 2617 // required to lock the cache first, which would fail. 2618 2619 sCachedPageQueue.RemoveUnlocked(page); 2620 return true; 2621 } 2622 2623 2624 static uint32 2625 free_cached_pages(uint32 pagesToFree, bool dontWait) 2626 { 2627 vm_page marker; 2628 init_page_marker(marker); 2629 2630 uint32 pagesFreed = 0; 2631 2632 while (pagesFreed < pagesToFree) { 2633 vm_page *page = find_cached_page_candidate(marker); 2634 if (page == NULL) 2635 break; 2636 2637 if (free_cached_page(page, dontWait)) { 2638 ReadLocker locker(sFreePageQueuesLock); 2639 page->SetState(PAGE_STATE_FREE); 2640 DEBUG_PAGE_ACCESS_END(page); 2641 sFreePageQueue.PrependUnlocked(page); 2642 locker.Unlock(); 2643 2644 TA(StolenPage()); 2645 2646 pagesFreed++; 2647 } 2648 } 2649 2650 remove_page_marker(marker); 2651 2652 sFreePageCondition.NotifyAll(); 2653 2654 return pagesFreed; 2655 } 2656 2657 2658 static void 2659 idle_scan_active_pages(page_stats& pageStats) 2660 { 2661 VMPageQueue& queue = sActivePageQueue; 2662 2663 // We want to scan the whole queue in roughly kIdleRunsForFullQueue runs. 2664 uint32 maxToScan = queue.Count() / kIdleRunsForFullQueue + 1; 2665 2666 while (maxToScan > 0) { 2667 maxToScan--; 2668 2669 // Get the next page. Note that we don't bother to lock here. We go with 2670 // the assumption that on all architectures reading/writing pointers is 2671 // atomic. Beyond that it doesn't really matter. We have to unlock the 2672 // queue anyway to lock the page's cache, and we'll recheck afterwards. 2673 vm_page* page = queue.Head(); 2674 if (page == NULL) 2675 break; 2676 2677 // lock the page's cache 2678 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2679 if (cache == NULL) 2680 continue; 2681 2682 if (page->State() != PAGE_STATE_ACTIVE) { 2683 // page is no longer in the cache or in this queue 2684 cache->ReleaseRefAndUnlock(); 2685 continue; 2686 } 2687 2688 if (page->busy) { 2689 // page is busy -- requeue at the end 2690 vm_page_requeue(page, true); 2691 cache->ReleaseRefAndUnlock(); 2692 continue; 2693 } 2694 2695 DEBUG_PAGE_ACCESS_START(page); 2696 2697 // Get the page active/modified flags and update the page's usage count. 2698 // We completely unmap inactive temporary pages. This saves us to 2699 // iterate through the inactive list as well, since we'll be notified 2700 // via page fault whenever such an inactive page is used again. 2701 // We don't remove the mappings of non-temporary pages, since we 2702 // wouldn't notice when those would become unused and could thus be 2703 // moved to the cached list. 2704 int32 usageCount; 2705 if (page->WiredCount() > 0 || page->usage_count > 0 2706 || !cache->temporary) { 2707 usageCount = vm_clear_page_mapping_accessed_flags(page); 2708 } else 2709 usageCount = vm_remove_all_page_mappings_if_unaccessed(page); 2710 2711 if (usageCount > 0) { 2712 usageCount += page->usage_count + kPageUsageAdvance; 2713 if (usageCount > kPageUsageMax) 2714 usageCount = kPageUsageMax; 2715 // TODO: This would probably also be the place to reclaim swap space. 2716 } else { 2717 usageCount += page->usage_count - (int32)kPageUsageDecline; 2718 if (usageCount < 0) { 2719 usageCount = 0; 2720 set_page_state(page, PAGE_STATE_INACTIVE); 2721 } 2722 } 2723 2724 page->usage_count = usageCount; 2725 2726 DEBUG_PAGE_ACCESS_END(page); 2727 2728 cache->ReleaseRefAndUnlock(); 2729 } 2730 } 2731 2732 2733 static void 2734 full_scan_inactive_pages(page_stats& pageStats, int32 despairLevel) 2735 { 2736 int32 pagesToFree = pageStats.unsatisfiedReservations 2737 + sFreeOrCachedPagesTarget 2738 - (pageStats.totalFreePages + pageStats.cachedPages); 2739 if (pagesToFree <= 0) 2740 return; 2741 2742 bigtime_t time = system_time(); 2743 uint32 pagesScanned = 0; 2744 uint32 pagesToCached = 0; 2745 uint32 pagesToModified = 0; 2746 uint32 pagesToActive = 0; 2747 2748 // Determine how many pages at maximum to send to the modified queue. Since 2749 // it is relatively expensive to page out pages, we do that on a grander 2750 // scale only when things get desperate. 2751 uint32 maxToFlush = despairLevel <= 1 ? 32 : 10000; 2752 2753 vm_page marker; 2754 init_page_marker(marker); 2755 2756 VMPageQueue& queue = sInactivePageQueue; 2757 InterruptsSpinLocker queueLocker(queue.GetLock()); 2758 uint32 maxToScan = queue.Count(); 2759 2760 vm_page* nextPage = queue.Head(); 2761 2762 while (pagesToFree > 0 && maxToScan > 0) { 2763 maxToScan--; 2764 2765 // get the next page 2766 vm_page* page = nextPage; 2767 if (page == NULL) 2768 break; 2769 nextPage = queue.Next(page); 2770 2771 if (page->busy) 2772 continue; 2773 2774 // mark the position 2775 queue.InsertAfter(page, &marker); 2776 queueLocker.Unlock(); 2777 2778 // lock the page's cache 2779 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2780 if (cache == NULL || page->busy 2781 || page->State() != PAGE_STATE_INACTIVE) { 2782 if (cache != NULL) 2783 cache->ReleaseRefAndUnlock(); 2784 queueLocker.Lock(); 2785 nextPage = queue.Next(&marker); 2786 queue.Remove(&marker); 2787 continue; 2788 } 2789 2790 pagesScanned++; 2791 2792 DEBUG_PAGE_ACCESS_START(page); 2793 2794 // Get the accessed count, clear the accessed/modified flags and 2795 // unmap the page, if it hasn't been accessed. 2796 int32 usageCount; 2797 if (page->WiredCount() > 0) 2798 usageCount = vm_clear_page_mapping_accessed_flags(page); 2799 else 2800 usageCount = vm_remove_all_page_mappings_if_unaccessed(page); 2801 2802 // update usage count 2803 if (usageCount > 0) { 2804 usageCount += page->usage_count + kPageUsageAdvance; 2805 if (usageCount > kPageUsageMax) 2806 usageCount = kPageUsageMax; 2807 } else { 2808 usageCount += page->usage_count - (int32)kPageUsageDecline; 2809 if (usageCount < 0) 2810 usageCount = 0; 2811 } 2812 2813 page->usage_count = usageCount; 2814 2815 // Move to fitting queue or requeue: 2816 // * Active mapped pages go to the active queue. 2817 // * Inactive mapped (i.e. wired) pages are requeued. 2818 // * The remaining pages are cachable. Thus, if unmodified they go to 2819 // the cached queue, otherwise to the modified queue (up to a limit). 2820 // Note that until in the idle scanning we don't exempt pages of 2821 // temporary caches. Apparently we really need memory, so we better 2822 // page out memory as well. 2823 bool isMapped = page->IsMapped(); 2824 if (usageCount > 0) { 2825 if (isMapped) { 2826 set_page_state(page, PAGE_STATE_ACTIVE); 2827 pagesToActive++; 2828 } else 2829 vm_page_requeue(page, true); 2830 } else if (isMapped) { 2831 vm_page_requeue(page, true); 2832 } else if (!page->modified) { 2833 set_page_state(page, PAGE_STATE_CACHED); 2834 pagesToFree--; 2835 pagesToCached++; 2836 } else if (maxToFlush > 0) { 2837 set_page_state(page, PAGE_STATE_MODIFIED); 2838 maxToFlush--; 2839 pagesToModified++; 2840 } else 2841 vm_page_requeue(page, true); 2842 2843 DEBUG_PAGE_ACCESS_END(page); 2844 2845 cache->ReleaseRefAndUnlock(); 2846 2847 // remove the marker 2848 queueLocker.Lock(); 2849 nextPage = queue.Next(&marker); 2850 queue.Remove(&marker); 2851 } 2852 2853 queueLocker.Unlock(); 2854 2855 time = system_time() - time; 2856 TRACE_DAEMON(" -> inactive scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32 2857 ", moved: %" B_PRIu32 " -> cached, %" B_PRIu32 " -> modified, %" 2858 B_PRIu32 " -> active\n", time, pagesScanned, pagesToCached, 2859 pagesToModified, pagesToActive); 2860 2861 // wake up the page writer, if we tossed it some pages 2862 if (pagesToModified > 0) 2863 sPageWriterCondition.WakeUp(); 2864 } 2865 2866 2867 static void 2868 full_scan_active_pages(page_stats& pageStats, int32 despairLevel) 2869 { 2870 vm_page marker; 2871 init_page_marker(marker); 2872 2873 VMPageQueue& queue = sActivePageQueue; 2874 InterruptsSpinLocker queueLocker(queue.GetLock()); 2875 uint32 maxToScan = queue.Count(); 2876 2877 int32 pagesToDeactivate = pageStats.unsatisfiedReservations 2878 + sFreeOrCachedPagesTarget 2879 - (pageStats.totalFreePages + pageStats.cachedPages) 2880 + std::max((int32)sInactivePagesTarget - (int32)maxToScan, (int32)0); 2881 if (pagesToDeactivate <= 0) 2882 return; 2883 2884 bigtime_t time = system_time(); 2885 uint32 pagesAccessed = 0; 2886 uint32 pagesToInactive = 0; 2887 uint32 pagesScanned = 0; 2888 2889 vm_page* nextPage = queue.Head(); 2890 2891 while (pagesToDeactivate > 0 && maxToScan > 0) { 2892 maxToScan--; 2893 2894 // get the next page 2895 vm_page* page = nextPage; 2896 if (page == NULL) 2897 break; 2898 nextPage = queue.Next(page); 2899 2900 if (page->busy) 2901 continue; 2902 2903 // mark the position 2904 queue.InsertAfter(page, &marker); 2905 queueLocker.Unlock(); 2906 2907 // lock the page's cache 2908 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2909 if (cache == NULL || page->busy || page->State() != PAGE_STATE_ACTIVE) { 2910 if (cache != NULL) 2911 cache->ReleaseRefAndUnlock(); 2912 queueLocker.Lock(); 2913 nextPage = queue.Next(&marker); 2914 queue.Remove(&marker); 2915 continue; 2916 } 2917 2918 pagesScanned++; 2919 2920 DEBUG_PAGE_ACCESS_START(page); 2921 2922 // Get the page active/modified flags and update the page's usage count. 2923 int32 usageCount = vm_clear_page_mapping_accessed_flags(page); 2924 2925 if (usageCount > 0) { 2926 usageCount += page->usage_count + kPageUsageAdvance; 2927 if (usageCount > kPageUsageMax) 2928 usageCount = kPageUsageMax; 2929 pagesAccessed++; 2930 // TODO: This would probably also be the place to reclaim swap space. 2931 } else { 2932 usageCount += page->usage_count - (int32)kPageUsageDecline; 2933 if (usageCount <= 0) { 2934 usageCount = 0; 2935 set_page_state(page, PAGE_STATE_INACTIVE); 2936 pagesToInactive++; 2937 } 2938 } 2939 2940 page->usage_count = usageCount; 2941 2942 DEBUG_PAGE_ACCESS_END(page); 2943 2944 cache->ReleaseRefAndUnlock(); 2945 2946 // remove the marker 2947 queueLocker.Lock(); 2948 nextPage = queue.Next(&marker); 2949 queue.Remove(&marker); 2950 } 2951 2952 time = system_time() - time; 2953 TRACE_DAEMON(" -> active scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32 2954 ", moved: %" B_PRIu32 " -> inactive, encountered %" B_PRIu32 " accessed" 2955 " ones\n", time, pagesScanned, pagesToInactive, pagesAccessed); 2956 } 2957 2958 2959 static void 2960 page_daemon_idle_scan(page_stats& pageStats) 2961 { 2962 TRACE_DAEMON("page daemon: idle run\n"); 2963 2964 if (pageStats.totalFreePages < (int32)sFreePagesTarget) { 2965 // We want more actually free pages, so free some from the cached 2966 // ones. 2967 uint32 freed = free_cached_pages( 2968 sFreePagesTarget - pageStats.totalFreePages, false); 2969 if (freed > 0) 2970 unreserve_pages(freed); 2971 get_page_stats(pageStats); 2972 } 2973 2974 // Walk the active list and move pages to the inactive queue. 2975 get_page_stats(pageStats); 2976 idle_scan_active_pages(pageStats); 2977 } 2978 2979 2980 static void 2981 page_daemon_full_scan(page_stats& pageStats, int32 despairLevel) 2982 { 2983 TRACE_DAEMON("page daemon: full run: free: %" B_PRIu32 ", cached: %" 2984 B_PRIu32 ", to free: %" B_PRIu32 "\n", pageStats.totalFreePages, 2985 pageStats.cachedPages, pageStats.unsatisfiedReservations 2986 + sFreeOrCachedPagesTarget 2987 - (pageStats.totalFreePages + pageStats.cachedPages)); 2988 2989 // Walk the inactive list and transfer pages to the cached and modified 2990 // queues. 2991 full_scan_inactive_pages(pageStats, despairLevel); 2992 2993 // Free cached pages. Also wake up reservation waiters. 2994 get_page_stats(pageStats); 2995 int32 pagesToFree = pageStats.unsatisfiedReservations + sFreePagesTarget 2996 - (pageStats.totalFreePages); 2997 if (pagesToFree > 0) { 2998 uint32 freed = free_cached_pages(pagesToFree, true); 2999 if (freed > 0) 3000 unreserve_pages(freed); 3001 } 3002 3003 // Walk the active list and move pages to the inactive queue. 3004 get_page_stats(pageStats); 3005 full_scan_active_pages(pageStats, despairLevel); 3006 } 3007 3008 3009 static status_t 3010 page_daemon(void* /*unused*/) 3011 { 3012 int32 despairLevel = 0; 3013 3014 while (true) { 3015 sPageDaemonCondition.ClearActivated(); 3016 3017 // evaluate the free pages situation 3018 page_stats pageStats; 3019 get_page_stats(pageStats); 3020 3021 if (!do_active_paging(pageStats)) { 3022 // Things look good -- just maintain statistics and keep the pool 3023 // of actually free pages full enough. 3024 despairLevel = 0; 3025 page_daemon_idle_scan(pageStats); 3026 sPageDaemonCondition.Wait(kIdleScanWaitInterval, false); 3027 } else { 3028 // Not enough free pages. We need to do some real work. 3029 despairLevel = std::max(despairLevel + 1, (int32)3); 3030 page_daemon_full_scan(pageStats, despairLevel); 3031 3032 // Don't wait after the first full scan, but rather immediately 3033 // check whether we were successful in freeing enough pages and 3034 // re-run with increased despair level. The first scan is 3035 // conservative with respect to moving inactive modified pages to 3036 // the modified list to avoid thrashing. The second scan, however, 3037 // will not hold back. 3038 if (despairLevel > 1) 3039 snooze(kBusyScanWaitInterval); 3040 } 3041 } 3042 3043 return B_OK; 3044 } 3045 3046 3047 /*! Returns how many pages could *not* be reserved. 3048 */ 3049 static uint32 3050 reserve_pages(uint32 count, int priority, bool dontWait) 3051 { 3052 int32 dontTouch = kPageReserveForPriority[priority]; 3053 3054 while (true) { 3055 count -= reserve_some_pages(count, dontTouch); 3056 if (count == 0) 3057 return 0; 3058 3059 if (sUnsatisfiedPageReservations == 0) { 3060 count -= free_cached_pages(count, dontWait); 3061 if (count == 0) 3062 return count; 3063 } 3064 3065 if (dontWait) 3066 return count; 3067 3068 // we need to wait for pages to become available 3069 3070 MutexLocker pageDeficitLocker(sPageDeficitLock); 3071 3072 bool notifyDaemon = sUnsatisfiedPageReservations == 0; 3073 sUnsatisfiedPageReservations += count; 3074 3075 if (atomic_get(&sUnreservedFreePages) > dontTouch) { 3076 // the situation changed 3077 sUnsatisfiedPageReservations -= count; 3078 continue; 3079 } 3080 3081 PageReservationWaiter waiter; 3082 waiter.dontTouch = dontTouch; 3083 waiter.missing = count; 3084 waiter.thread = thread_get_current_thread(); 3085 waiter.threadPriority = waiter.thread->priority; 3086 3087 // insert ordered (i.e. after all waiters with higher or equal priority) 3088 PageReservationWaiter* otherWaiter = NULL; 3089 for (PageReservationWaiterList::Iterator it 3090 = sPageReservationWaiters.GetIterator(); 3091 (otherWaiter = it.Next()) != NULL;) { 3092 if (waiter < *otherWaiter) 3093 break; 3094 } 3095 3096 sPageReservationWaiters.InsertBefore(otherWaiter, &waiter); 3097 3098 thread_prepare_to_block(waiter.thread, 0, THREAD_BLOCK_TYPE_OTHER, 3099 "waiting for pages"); 3100 3101 if (notifyDaemon) 3102 sPageDaemonCondition.WakeUp(); 3103 3104 pageDeficitLocker.Unlock(); 3105 3106 low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0); 3107 thread_block(); 3108 3109 pageDeficitLocker.Lock(); 3110 3111 return 0; 3112 } 3113 } 3114 3115 3116 // #pragma mark - private kernel API 3117 3118 3119 /*! Writes a range of modified pages of a cache to disk. 3120 You need to hold the VMCache lock when calling this function. 3121 Note that the cache lock is released in this function. 3122 \param cache The cache. 3123 \param firstPage Offset (in page size units) of the first page in the range. 3124 \param endPage End offset (in page size units) of the page range. The page 3125 at this offset is not included. 3126 */ 3127 status_t 3128 vm_page_write_modified_page_range(struct VMCache* cache, uint32 firstPage, 3129 uint32 endPage) 3130 { 3131 static const int32 kMaxPages = 256; 3132 int32 maxPages = cache->MaxPagesPerWrite(); 3133 if (maxPages < 0 || maxPages > kMaxPages) 3134 maxPages = kMaxPages; 3135 3136 const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 3137 | HEAP_DONT_LOCK_KERNEL_SPACE; 3138 3139 PageWriteWrapper stackWrappersPool[2]; 3140 PageWriteWrapper* stackWrappers[1]; 3141 PageWriteWrapper* wrapperPool 3142 = new(malloc_flags(allocationFlags)) PageWriteWrapper[maxPages + 1]; 3143 PageWriteWrapper** wrappers 3144 = new(malloc_flags(allocationFlags)) PageWriteWrapper*[maxPages]; 3145 if (wrapperPool == NULL || wrappers == NULL) { 3146 // don't fail, just limit our capabilities 3147 delete[] wrapperPool; 3148 delete[] wrappers; 3149 wrapperPool = stackWrappersPool; 3150 wrappers = stackWrappers; 3151 maxPages = 1; 3152 } 3153 3154 int32 nextWrapper = 0; 3155 int32 usedWrappers = 0; 3156 3157 PageWriteTransfer transfer; 3158 bool transferEmpty = true; 3159 3160 VMCachePagesTree::Iterator it 3161 = cache->pages.GetIterator(firstPage, true, true); 3162 3163 while (true) { 3164 vm_page* page = it.Next(); 3165 if (page == NULL || page->cache_offset >= endPage) { 3166 if (transferEmpty) 3167 break; 3168 3169 page = NULL; 3170 } 3171 3172 if (page != NULL) { 3173 if (page->busy 3174 || (page->State() != PAGE_STATE_MODIFIED 3175 && !vm_test_map_modification(page))) { 3176 page = NULL; 3177 } 3178 } 3179 3180 PageWriteWrapper* wrapper = NULL; 3181 if (page != NULL) { 3182 wrapper = &wrapperPool[nextWrapper++]; 3183 if (nextWrapper > maxPages) 3184 nextWrapper = 0; 3185 3186 DEBUG_PAGE_ACCESS_START(page); 3187 3188 wrapper->SetTo(page); 3189 3190 if (transferEmpty || transfer.AddPage(page)) { 3191 if (transferEmpty) { 3192 transfer.SetTo(NULL, page, maxPages); 3193 transferEmpty = false; 3194 } 3195 3196 DEBUG_PAGE_ACCESS_END(page); 3197 3198 wrappers[usedWrappers++] = wrapper; 3199 continue; 3200 } 3201 3202 DEBUG_PAGE_ACCESS_END(page); 3203 } 3204 3205 if (transferEmpty) 3206 continue; 3207 3208 cache->Unlock(); 3209 status_t status = transfer.Schedule(0); 3210 cache->Lock(); 3211 3212 for (int32 i = 0; i < usedWrappers; i++) 3213 wrappers[i]->Done(status); 3214 3215 usedWrappers = 0; 3216 3217 if (page != NULL) { 3218 transfer.SetTo(NULL, page, maxPages); 3219 wrappers[usedWrappers++] = wrapper; 3220 } else 3221 transferEmpty = true; 3222 } 3223 3224 if (wrapperPool != stackWrappersPool) { 3225 delete[] wrapperPool; 3226 delete[] wrappers; 3227 } 3228 3229 return B_OK; 3230 } 3231 3232 3233 /*! You need to hold the VMCache lock when calling this function. 3234 Note that the cache lock is released in this function. 3235 */ 3236 status_t 3237 vm_page_write_modified_pages(VMCache *cache) 3238 { 3239 return vm_page_write_modified_page_range(cache, 0, 3240 (cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 3241 } 3242 3243 3244 /*! Schedules the page writer to write back the specified \a page. 3245 Note, however, that it might not do this immediately, and it can well 3246 take several seconds until the page is actually written out. 3247 */ 3248 void 3249 vm_page_schedule_write_page(vm_page *page) 3250 { 3251 PAGE_ASSERT(page, page->State() == PAGE_STATE_MODIFIED); 3252 3253 vm_page_requeue(page, false); 3254 3255 sPageWriterCondition.WakeUp(); 3256 } 3257 3258 3259 /*! Cache must be locked. 3260 */ 3261 void 3262 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage, 3263 uint32 endPage) 3264 { 3265 uint32 modified = 0; 3266 for (VMCachePagesTree::Iterator it 3267 = cache->pages.GetIterator(firstPage, true, true); 3268 vm_page *page = it.Next();) { 3269 if (page->cache_offset >= endPage) 3270 break; 3271 3272 if (!page->busy && page->State() == PAGE_STATE_MODIFIED) { 3273 DEBUG_PAGE_ACCESS_START(page); 3274 vm_page_requeue(page, false); 3275 modified++; 3276 DEBUG_PAGE_ACCESS_END(page); 3277 } 3278 } 3279 3280 if (modified > 0) 3281 sPageWriterCondition.WakeUp(); 3282 } 3283 3284 3285 void 3286 vm_page_init_num_pages(kernel_args *args) 3287 { 3288 // calculate the size of memory by looking at the physical_memory_range array 3289 sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE; 3290 page_num_t physicalPagesEnd = sPhysicalPageOffset 3291 + args->physical_memory_range[0].size / B_PAGE_SIZE; 3292 3293 sNonExistingPages = 0; 3294 sIgnoredPages = args->ignored_physical_memory / B_PAGE_SIZE; 3295 3296 for (uint32 i = 1; i < args->num_physical_memory_ranges; i++) { 3297 page_num_t start = args->physical_memory_range[i].start / B_PAGE_SIZE; 3298 if (start > physicalPagesEnd) 3299 sNonExistingPages += start - physicalPagesEnd; 3300 physicalPagesEnd = start 3301 + args->physical_memory_range[i].size / B_PAGE_SIZE; 3302 3303 #ifdef LIMIT_AVAILABLE_MEMORY 3304 page_num_t available 3305 = physicalPagesEnd - sPhysicalPageOffset - sNonExistingPages; 3306 if (available > LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE)) { 3307 physicalPagesEnd = sPhysicalPageOffset + sNonExistingPages 3308 + LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE); 3309 break; 3310 } 3311 #endif 3312 } 3313 3314 TRACE(("first phys page = %#" B_PRIxPHYSADDR ", end %#" B_PRIxPHYSADDR "\n", 3315 sPhysicalPageOffset, physicalPagesEnd)); 3316 3317 sNumPages = physicalPagesEnd - sPhysicalPageOffset; 3318 } 3319 3320 3321 status_t 3322 vm_page_init(kernel_args *args) 3323 { 3324 TRACE(("vm_page_init: entry\n")); 3325 3326 // init page queues 3327 sModifiedPageQueue.Init("modified pages queue"); 3328 sInactivePageQueue.Init("inactive pages queue"); 3329 sActivePageQueue.Init("active pages queue"); 3330 sCachedPageQueue.Init("cached pages queue"); 3331 sFreePageQueue.Init("free pages queue"); 3332 sClearPageQueue.Init("clear pages queue"); 3333 3334 new (&sPageReservationWaiters) PageReservationWaiterList; 3335 3336 // map in the new free page table 3337 sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page), 3338 ~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 3339 3340 TRACE(("vm_init: putting free_page_table @ %p, # ents %" B_PRIuPHYSADDR 3341 " (size %#" B_PRIxPHYSADDR ")\n", sPages, sNumPages, 3342 (phys_addr_t)(sNumPages * sizeof(vm_page)))); 3343 3344 // initialize the free page table 3345 for (uint32 i = 0; i < sNumPages; i++) { 3346 sPages[i].Init(sPhysicalPageOffset + i); 3347 sFreePageQueue.Append(&sPages[i]); 3348 3349 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3350 sPages[i].allocation_tracking_info.Clear(); 3351 #endif 3352 } 3353 3354 sUnreservedFreePages = sNumPages; 3355 3356 TRACE(("initialized table\n")); 3357 3358 // mark the ranges between usable physical memory unused 3359 phys_addr_t previousEnd = 0; 3360 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 3361 phys_addr_t base = args->physical_memory_range[i].start; 3362 phys_size_t size = args->physical_memory_range[i].size; 3363 if (base > previousEnd) { 3364 mark_page_range_in_use(previousEnd / B_PAGE_SIZE, 3365 (base - previousEnd) / B_PAGE_SIZE, false); 3366 } 3367 previousEnd = base + size; 3368 } 3369 3370 // mark the allocated physical page ranges wired 3371 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 3372 mark_page_range_in_use( 3373 args->physical_allocated_range[i].start / B_PAGE_SIZE, 3374 args->physical_allocated_range[i].size / B_PAGE_SIZE, true); 3375 } 3376 3377 // prevent future allocations from the kernel args ranges 3378 args->num_physical_allocated_ranges = 0; 3379 3380 // The target of actually free pages. This must be at least the system 3381 // reserve, but should be a few more pages, so we don't have to extract 3382 // a cached page with each allocation. 3383 sFreePagesTarget = VM_PAGE_RESERVE_USER 3384 + std::max((page_num_t)32, (sNumPages - sNonExistingPages) / 1024); 3385 3386 // The target of free + cached and inactive pages. On low-memory machines 3387 // keep things tight. free + cached is the pool of immediately allocatable 3388 // pages. We want a few inactive pages, so when we're actually paging, we 3389 // have a reasonably large set of pages to work with. 3390 if (sUnreservedFreePages < 16 * 1024) { 3391 sFreeOrCachedPagesTarget = sFreePagesTarget + 128; 3392 sInactivePagesTarget = sFreePagesTarget / 3; 3393 } else { 3394 sFreeOrCachedPagesTarget = 2 * sFreePagesTarget; 3395 sInactivePagesTarget = sFreePagesTarget / 2; 3396 } 3397 3398 TRACE(("vm_page_init: exit\n")); 3399 3400 return B_OK; 3401 } 3402 3403 3404 status_t 3405 vm_page_init_post_area(kernel_args *args) 3406 { 3407 void *dummy; 3408 3409 dummy = sPages; 3410 create_area("page structures", &dummy, B_EXACT_ADDRESS, 3411 PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED, 3412 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3413 3414 add_debugger_command("list_pages", &dump_page_list, 3415 "List physical pages"); 3416 add_debugger_command("page_stats", &dump_page_stats, 3417 "Dump statistics about page usage"); 3418 add_debugger_command_etc("page", &dump_page_long, 3419 "Dump page info", 3420 "[ \"-p\" | \"-v\" ] [ \"-m\" ] <address>\n" 3421 "Prints information for the physical page. If neither \"-p\" nor\n" 3422 "\"-v\" are given, the provided address is interpreted as address of\n" 3423 "the vm_page data structure for the page in question. If \"-p\" is\n" 3424 "given, the address is the physical address of the page. If \"-v\" is\n" 3425 "given, the address is interpreted as virtual address in the current\n" 3426 "thread's address space and for the page it is mapped to (if any)\n" 3427 "information are printed. If \"-m\" is specified, the command will\n" 3428 "search all known address spaces for mappings to that page and print\n" 3429 "them.\n", 0); 3430 add_debugger_command("page_queue", &dump_page_queue, "Dump page queue"); 3431 add_debugger_command("find_page", &find_page, 3432 "Find out which queue a page is actually in"); 3433 3434 #ifdef TRACK_PAGE_USAGE_STATS 3435 add_debugger_command_etc("page_usage", &dump_page_usage_stats, 3436 "Dumps statistics about page usage counts", 3437 "\n" 3438 "Dumps statistics about page usage counts.\n", 3439 B_KDEBUG_DONT_PARSE_ARGUMENTS); 3440 #endif 3441 3442 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3443 add_debugger_command_etc("page_allocations_per_caller", 3444 &dump_page_allocations_per_caller, 3445 "Dump current page allocations summed up per caller", 3446 "[ -d <caller> ] [ -r ]\n" 3447 "The current allocations will by summed up by caller (their count)\n" 3448 "printed in decreasing order by count.\n" 3449 "If \"-d\" is given, each allocation for caller <caller> is printed\n" 3450 "including the respective stack trace.\n" 3451 "If \"-r\" is given, the allocation infos are reset after gathering\n" 3452 "the information, so the next command invocation will only show the\n" 3453 "allocations made after the reset.\n", 0); 3454 add_debugger_command_etc("page_allocation_infos", 3455 &dump_page_allocation_infos, 3456 "Dump current page allocations", 3457 "[ --stacktrace ] [ -p <page number> ] [ --team <team ID> ] " 3458 "[ --thread <thread ID> ]\n" 3459 "The current allocations filtered by optional values will be printed.\n" 3460 "The optional \"-p\" page number filters for a specific page,\n" 3461 "with \"--team\" and \"--thread\" allocations by specific teams\n" 3462 "and/or threads can be filtered (these only work if a corresponding\n" 3463 "tracing entry is still available).\n" 3464 "If \"--stacktrace\" is given, then stack traces of the allocation\n" 3465 "callers are printed, where available\n", 0); 3466 #endif 3467 3468 return B_OK; 3469 } 3470 3471 3472 status_t 3473 vm_page_init_post_thread(kernel_args *args) 3474 { 3475 new (&sFreePageCondition) ConditionVariable; 3476 3477 // create a kernel thread to clear out pages 3478 3479 thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber", 3480 B_LOWEST_ACTIVE_PRIORITY, NULL); 3481 resume_thread(thread); 3482 3483 // start page writer 3484 3485 sPageWriterCondition.Init("page writer"); 3486 3487 thread = spawn_kernel_thread(&page_writer, "page writer", 3488 B_NORMAL_PRIORITY + 1, NULL); 3489 resume_thread(thread); 3490 3491 // start page daemon 3492 3493 sPageDaemonCondition.Init("page daemon"); 3494 3495 thread = spawn_kernel_thread(&page_daemon, "page daemon", 3496 B_NORMAL_PRIORITY, NULL); 3497 resume_thread(thread); 3498 3499 return B_OK; 3500 } 3501 3502 3503 status_t 3504 vm_mark_page_inuse(page_num_t page) 3505 { 3506 return vm_mark_page_range_inuse(page, 1); 3507 } 3508 3509 3510 status_t 3511 vm_mark_page_range_inuse(page_num_t startPage, page_num_t length) 3512 { 3513 return mark_page_range_in_use(startPage, length, false); 3514 } 3515 3516 3517 /*! Unreserve pages previously reserved with vm_page_reserve_pages(). 3518 */ 3519 void 3520 vm_page_unreserve_pages(vm_page_reservation* reservation) 3521 { 3522 uint32 count = reservation->count; 3523 reservation->count = 0; 3524 3525 if (count == 0) 3526 return; 3527 3528 TA(UnreservePages(count)); 3529 3530 unreserve_pages(count); 3531 } 3532 3533 3534 /*! With this call, you can reserve a number of free pages in the system. 3535 They will only be handed out to someone who has actually reserved them. 3536 This call returns as soon as the number of requested pages has been 3537 reached. 3538 The caller must not hold any cache lock or the function might deadlock. 3539 */ 3540 void 3541 vm_page_reserve_pages(vm_page_reservation* reservation, uint32 count, 3542 int priority) 3543 { 3544 reservation->count = count; 3545 3546 if (count == 0) 3547 return; 3548 3549 TA(ReservePages(count)); 3550 3551 reserve_pages(count, priority, false); 3552 } 3553 3554 3555 bool 3556 vm_page_try_reserve_pages(vm_page_reservation* reservation, uint32 count, 3557 int priority) 3558 { 3559 if (count == 0) { 3560 reservation->count = count; 3561 return true; 3562 } 3563 3564 uint32 remaining = reserve_pages(count, priority, true); 3565 if (remaining == 0) { 3566 TA(ReservePages(count)); 3567 reservation->count = count; 3568 return true; 3569 } 3570 3571 unreserve_pages(count - remaining); 3572 3573 return false; 3574 } 3575 3576 3577 vm_page * 3578 vm_page_allocate_page(vm_page_reservation* reservation, uint32 flags) 3579 { 3580 uint32 pageState = flags & VM_PAGE_ALLOC_STATE; 3581 ASSERT(pageState != PAGE_STATE_FREE); 3582 ASSERT(pageState != PAGE_STATE_CLEAR); 3583 3584 ASSERT(reservation->count > 0); 3585 reservation->count--; 3586 3587 VMPageQueue* queue; 3588 VMPageQueue* otherQueue; 3589 3590 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) { 3591 queue = &sClearPageQueue; 3592 otherQueue = &sFreePageQueue; 3593 } else { 3594 queue = &sFreePageQueue; 3595 otherQueue = &sClearPageQueue; 3596 } 3597 3598 ReadLocker locker(sFreePageQueuesLock); 3599 3600 vm_page* page = queue->RemoveHeadUnlocked(); 3601 if (page == NULL) { 3602 // if the primary queue was empty, grab the page from the 3603 // secondary queue 3604 page = otherQueue->RemoveHeadUnlocked(); 3605 3606 if (page == NULL) { 3607 // Unlikely, but possible: the page we have reserved has moved 3608 // between the queues after we checked the first queue. Grab the 3609 // write locker to make sure this doesn't happen again. 3610 locker.Unlock(); 3611 WriteLocker writeLocker(sFreePageQueuesLock); 3612 3613 page = queue->RemoveHead(); 3614 if (page == NULL) 3615 otherQueue->RemoveHead(); 3616 3617 if (page == NULL) { 3618 panic("Had reserved page, but there is none!"); 3619 return NULL; 3620 } 3621 3622 // downgrade to read lock 3623 locker.Lock(); 3624 } 3625 } 3626 3627 if (page->CacheRef() != NULL) 3628 panic("supposed to be free page %p has cache\n", page); 3629 3630 DEBUG_PAGE_ACCESS_START(page); 3631 3632 int oldPageState = page->State(); 3633 page->SetState(pageState); 3634 page->busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3635 page->usage_count = 0; 3636 page->accessed = false; 3637 page->modified = false; 3638 3639 locker.Unlock(); 3640 3641 if (pageState < PAGE_STATE_FIRST_UNQUEUED) 3642 sPageQueues[pageState].AppendUnlocked(page); 3643 3644 // clear the page, if we had to take it from the free queue and a clear 3645 // page was requested 3646 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0 && oldPageState != PAGE_STATE_CLEAR) 3647 clear_page(page); 3648 3649 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3650 page->allocation_tracking_info.Init( 3651 TA(AllocatePage(page->physical_page_number))); 3652 #else 3653 TA(AllocatePage(page->physical_page_number)); 3654 #endif 3655 3656 return page; 3657 } 3658 3659 3660 static void 3661 allocate_page_run_cleanup(VMPageQueue::PageList& freePages, 3662 VMPageQueue::PageList& clearPages) 3663 { 3664 // Page lists are sorted, so remove tails before prepending to the respective queue. 3665 3666 while (vm_page* page = freePages.RemoveTail()) { 3667 page->busy = false; 3668 page->SetState(PAGE_STATE_FREE); 3669 DEBUG_PAGE_ACCESS_END(page); 3670 sFreePageQueue.PrependUnlocked(page); 3671 } 3672 3673 while (vm_page* page = clearPages.RemoveTail()) { 3674 page->busy = false; 3675 page->SetState(PAGE_STATE_CLEAR); 3676 DEBUG_PAGE_ACCESS_END(page); 3677 sClearPageQueue.PrependUnlocked(page); 3678 } 3679 3680 sFreePageCondition.NotifyAll(); 3681 } 3682 3683 3684 /*! Tries to allocate the a contiguous run of \a length pages starting at 3685 index \a start. 3686 3687 The caller must have write-locked the free/clear page queues. The function 3688 will unlock regardless of whether it succeeds or fails. 3689 3690 If the function fails, it cleans up after itself, i.e. it will free all 3691 pages it managed to allocate. 3692 3693 \param start The start index (into \c sPages) of the run. 3694 \param length The number of pages to allocate. 3695 \param flags Page allocation flags. Encodes the state the function shall 3696 set the allocated pages to, whether the pages shall be marked busy 3697 (VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared 3698 (VM_PAGE_ALLOC_CLEAR). 3699 \param freeClearQueueLocker Locked WriteLocker for the free/clear page 3700 queues in locked state. Will be unlocked by the function. 3701 \return The index of the first page that could not be allocated. \a length 3702 is returned when the function was successful. 3703 */ 3704 static page_num_t 3705 allocate_page_run(page_num_t start, page_num_t length, uint32 flags, 3706 WriteLocker& freeClearQueueLocker) 3707 { 3708 uint32 pageState = flags & VM_PAGE_ALLOC_STATE; 3709 ASSERT(pageState != PAGE_STATE_FREE); 3710 ASSERT(pageState != PAGE_STATE_CLEAR); 3711 ASSERT(start + length <= sNumPages); 3712 3713 // Pull the free/clear pages out of their respective queues. Cached pages 3714 // are allocated later. 3715 page_num_t cachedPages = 0; 3716 VMPageQueue::PageList freePages; 3717 VMPageQueue::PageList clearPages; 3718 page_num_t i = 0; 3719 for (; i < length; i++) { 3720 bool pageAllocated = true; 3721 bool noPage = false; 3722 vm_page& page = sPages[start + i]; 3723 switch (page.State()) { 3724 case PAGE_STATE_CLEAR: 3725 DEBUG_PAGE_ACCESS_START(&page); 3726 sClearPageQueue.Remove(&page); 3727 clearPages.Add(&page); 3728 break; 3729 case PAGE_STATE_FREE: 3730 DEBUG_PAGE_ACCESS_START(&page); 3731 sFreePageQueue.Remove(&page); 3732 freePages.Add(&page); 3733 break; 3734 case PAGE_STATE_CACHED: 3735 // We allocate cached pages later. 3736 cachedPages++; 3737 pageAllocated = false; 3738 break; 3739 3740 default: 3741 // Probably a page was cached when our caller checked. Now it's 3742 // gone and we have to abort. 3743 noPage = true; 3744 break; 3745 } 3746 3747 if (noPage) 3748 break; 3749 3750 if (pageAllocated) { 3751 page.SetState(flags & VM_PAGE_ALLOC_STATE); 3752 page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3753 page.usage_count = 0; 3754 page.accessed = false; 3755 page.modified = false; 3756 } 3757 } 3758 3759 if (i < length) { 3760 // failed to allocate a page -- free all that we've got 3761 allocate_page_run_cleanup(freePages, clearPages); 3762 return i; 3763 } 3764 3765 freeClearQueueLocker.Unlock(); 3766 3767 if (cachedPages > 0) { 3768 // allocate the pages that weren't free but cached 3769 page_num_t freedCachedPages = 0; 3770 page_num_t nextIndex = start; 3771 vm_page* freePage = freePages.Head(); 3772 vm_page* clearPage = clearPages.Head(); 3773 while (cachedPages > 0) { 3774 // skip, if we've already got the page 3775 if (freePage != NULL && size_t(freePage - sPages) == nextIndex) { 3776 freePage = freePages.GetNext(freePage); 3777 nextIndex++; 3778 continue; 3779 } 3780 if (clearPage != NULL && size_t(clearPage - sPages) == nextIndex) { 3781 clearPage = clearPages.GetNext(clearPage); 3782 nextIndex++; 3783 continue; 3784 } 3785 3786 // free the page, if it is still cached 3787 vm_page& page = sPages[nextIndex]; 3788 if (!free_cached_page(&page, false)) { 3789 // TODO: if the page turns out to have been freed already, 3790 // there would be no need to fail 3791 break; 3792 } 3793 3794 page.SetState(flags & VM_PAGE_ALLOC_STATE); 3795 page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3796 page.usage_count = 0; 3797 page.accessed = false; 3798 page.modified = false; 3799 3800 freePages.InsertBefore(freePage, &page); 3801 freedCachedPages++; 3802 cachedPages--; 3803 nextIndex++; 3804 } 3805 3806 // If we have freed cached pages, we need to balance things. 3807 if (freedCachedPages > 0) 3808 unreserve_pages(freedCachedPages); 3809 3810 if (nextIndex - start < length) { 3811 // failed to allocate all cached pages -- free all that we've got 3812 freeClearQueueLocker.Lock(); 3813 allocate_page_run_cleanup(freePages, clearPages); 3814 freeClearQueueLocker.Unlock(); 3815 3816 return nextIndex - start; 3817 } 3818 } 3819 3820 // clear pages, if requested 3821 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) { 3822 for (VMPageQueue::PageList::Iterator it = freePages.GetIterator(); 3823 vm_page* page = it.Next();) { 3824 clear_page(page); 3825 } 3826 } 3827 3828 // add pages to target queue 3829 if (pageState < PAGE_STATE_FIRST_UNQUEUED) { 3830 freePages.MoveFrom(&clearPages); 3831 sPageQueues[pageState].AppendUnlocked(freePages, length); 3832 } 3833 3834 // Note: We don't unreserve the pages since we pulled them out of the 3835 // free/clear queues without adjusting sUnreservedFreePages. 3836 3837 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3838 AbstractTraceEntryWithStackTrace* traceEntry 3839 = TA(AllocatePageRun(start, length)); 3840 3841 for (page_num_t i = start; i < start + length; i++) 3842 sPages[i].allocation_tracking_info.Init(traceEntry); 3843 #else 3844 TA(AllocatePageRun(start, length)); 3845 #endif 3846 3847 return length; 3848 } 3849 3850 3851 /*! Allocate a physically contiguous range of pages. 3852 3853 \param flags Page allocation flags. Encodes the state the function shall 3854 set the allocated pages to, whether the pages shall be marked busy 3855 (VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared 3856 (VM_PAGE_ALLOC_CLEAR). 3857 \param length The number of contiguous pages to allocate. 3858 \param restrictions Restrictions to the physical addresses of the page run 3859 to allocate, including \c low_address, the first acceptable physical 3860 address where the page run may start, \c high_address, the last 3861 acceptable physical address where the page run may end (i.e. it must 3862 hold \code runStartAddress + length <= high_address \endcode), 3863 \c alignment, the alignment of the page run start address, and 3864 \c boundary, multiples of which the page run must not cross. 3865 Values set to \c 0 are ignored. 3866 \param priority The page reservation priority (as passed to 3867 vm_page_reserve_pages()). 3868 \return The first page of the allocated page run on success; \c NULL 3869 when the allocation failed. 3870 */ 3871 vm_page* 3872 vm_page_allocate_page_run(uint32 flags, page_num_t length, 3873 const physical_address_restrictions* restrictions, int priority) 3874 { 3875 // compute start and end page index 3876 page_num_t requestedStart 3877 = std::max(restrictions->low_address / B_PAGE_SIZE, sPhysicalPageOffset) 3878 - sPhysicalPageOffset; 3879 page_num_t start = requestedStart; 3880 page_num_t end; 3881 if (restrictions->high_address > 0) { 3882 end = std::max(restrictions->high_address / B_PAGE_SIZE, 3883 sPhysicalPageOffset) 3884 - sPhysicalPageOffset; 3885 end = std::min(end, sNumPages); 3886 } else 3887 end = sNumPages; 3888 3889 // compute alignment mask 3890 page_num_t alignmentMask 3891 = std::max(restrictions->alignment / B_PAGE_SIZE, (phys_addr_t)1) - 1; 3892 ASSERT(((alignmentMask + 1) & alignmentMask) == 0); 3893 // alignment must be a power of 2 3894 3895 // compute the boundary mask 3896 uint32 boundaryMask = 0; 3897 if (restrictions->boundary != 0) { 3898 page_num_t boundary = restrictions->boundary / B_PAGE_SIZE; 3899 // boundary must be a power of two and not less than alignment and 3900 // length 3901 ASSERT(((boundary - 1) & boundary) == 0); 3902 ASSERT(boundary >= alignmentMask + 1); 3903 ASSERT(boundary >= length); 3904 3905 boundaryMask = -boundary; 3906 } 3907 3908 vm_page_reservation reservation; 3909 vm_page_reserve_pages(&reservation, length, priority); 3910 3911 WriteLocker freeClearQueueLocker(sFreePageQueuesLock); 3912 3913 // First we try to get a run with free pages only. If that fails, we also 3914 // consider cached pages. If there are only few free pages and many cached 3915 // ones, the odds are that we won't find enough contiguous ones, so we skip 3916 // the first iteration in this case. 3917 int32 freePages = sUnreservedFreePages; 3918 int useCached = freePages > 0 && (page_num_t)freePages > 2 * length ? 0 : 1; 3919 3920 for (;;) { 3921 if (alignmentMask != 0 || boundaryMask != 0) { 3922 page_num_t offsetStart = start + sPhysicalPageOffset; 3923 3924 // enforce alignment 3925 if ((offsetStart & alignmentMask) != 0) 3926 offsetStart = (offsetStart + alignmentMask) & ~alignmentMask; 3927 3928 // enforce boundary 3929 if (boundaryMask != 0 && ((offsetStart ^ (offsetStart 3930 + length - 1)) & boundaryMask) != 0) { 3931 offsetStart = (offsetStart + length - 1) & boundaryMask; 3932 } 3933 3934 start = offsetStart - sPhysicalPageOffset; 3935 } 3936 3937 if (start + length > end) { 3938 if (useCached == 0) { 3939 // The first iteration with free pages only was unsuccessful. 3940 // Try again also considering cached pages. 3941 useCached = 1; 3942 start = requestedStart; 3943 continue; 3944 } 3945 3946 dprintf("vm_page_allocate_page_run(): Failed to allocate run of " 3947 "length %" B_PRIuPHYSADDR " (%" B_PRIuPHYSADDR " %" 3948 B_PRIuPHYSADDR ") in second iteration (align: %" B_PRIuPHYSADDR 3949 " boundary: %" B_PRIuPHYSADDR ")!\n", length, requestedStart, 3950 end, restrictions->alignment, restrictions->boundary); 3951 3952 freeClearQueueLocker.Unlock(); 3953 vm_page_unreserve_pages(&reservation); 3954 return NULL; 3955 } 3956 3957 bool foundRun = true; 3958 page_num_t i; 3959 for (i = 0; i < length; i++) { 3960 uint32 pageState = sPages[start + i].State(); 3961 if (pageState != PAGE_STATE_FREE 3962 && pageState != PAGE_STATE_CLEAR 3963 && (pageState != PAGE_STATE_CACHED || useCached == 0)) { 3964 foundRun = false; 3965 break; 3966 } 3967 } 3968 3969 if (foundRun) { 3970 i = allocate_page_run(start, length, flags, freeClearQueueLocker); 3971 if (i == length) 3972 return &sPages[start]; 3973 3974 // apparently a cached page couldn't be allocated -- skip it and 3975 // continue 3976 freeClearQueueLocker.Lock(); 3977 } 3978 3979 start += i + 1; 3980 } 3981 } 3982 3983 3984 vm_page * 3985 vm_page_at_index(int32 index) 3986 { 3987 return &sPages[index]; 3988 } 3989 3990 3991 vm_page * 3992 vm_lookup_page(page_num_t pageNumber) 3993 { 3994 if (pageNumber < sPhysicalPageOffset) 3995 return NULL; 3996 3997 pageNumber -= sPhysicalPageOffset; 3998 if (pageNumber >= sNumPages) 3999 return NULL; 4000 4001 return &sPages[pageNumber]; 4002 } 4003 4004 4005 bool 4006 vm_page_is_dummy(struct vm_page *page) 4007 { 4008 return page < sPages || page >= sPages + sNumPages; 4009 } 4010 4011 4012 /*! Free the page that belonged to a certain cache. 4013 You can use vm_page_set_state() manually if you prefer, but only 4014 if the page does not equal PAGE_STATE_MODIFIED. 4015 4016 \param cache The cache the page was previously owned by or NULL. The page 4017 must have been removed from its cache before calling this method in 4018 either case. 4019 \param page The page to free. 4020 \param reservation If not NULL, the page count of the reservation will be 4021 incremented, thus allowing to allocate another page for the freed one at 4022 a later time. 4023 */ 4024 void 4025 vm_page_free_etc(VMCache* cache, vm_page* page, 4026 vm_page_reservation* reservation) 4027 { 4028 PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE 4029 && page->State() != PAGE_STATE_CLEAR); 4030 4031 if (page->State() == PAGE_STATE_MODIFIED && cache->temporary) 4032 atomic_add(&sModifiedTemporaryPages, -1); 4033 4034 free_page(page, false); 4035 if (reservation == NULL) 4036 unreserve_pages(1); 4037 } 4038 4039 4040 void 4041 vm_page_set_state(vm_page *page, int pageState) 4042 { 4043 PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE 4044 && page->State() != PAGE_STATE_CLEAR); 4045 4046 if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) { 4047 free_page(page, pageState == PAGE_STATE_CLEAR); 4048 unreserve_pages(1); 4049 } else 4050 set_page_state(page, pageState); 4051 } 4052 4053 4054 /*! Moves a page to either the tail of the head of its current queue, 4055 depending on \a tail. 4056 The page must have a cache and the cache must be locked! 4057 */ 4058 void 4059 vm_page_requeue(struct vm_page *page, bool tail) 4060 { 4061 PAGE_ASSERT(page, page->Cache() != NULL); 4062 page->Cache()->AssertLocked(); 4063 // DEBUG_PAGE_ACCESS_CHECK(page); 4064 // TODO: This assertion cannot be satisfied by idle_scan_active_pages() 4065 // when it requeues busy pages. The reason is that vm_soft_fault() 4066 // (respectively fault_get_page()) and the file cache keep newly 4067 // allocated pages accessed while they are reading them from disk. It 4068 // would probably be better to change that code and reenable this 4069 // check. 4070 4071 VMPageQueue *queue = NULL; 4072 4073 switch (page->State()) { 4074 case PAGE_STATE_ACTIVE: 4075 queue = &sActivePageQueue; 4076 break; 4077 case PAGE_STATE_INACTIVE: 4078 queue = &sInactivePageQueue; 4079 break; 4080 case PAGE_STATE_MODIFIED: 4081 queue = &sModifiedPageQueue; 4082 break; 4083 case PAGE_STATE_CACHED: 4084 queue = &sCachedPageQueue; 4085 break; 4086 case PAGE_STATE_FREE: 4087 case PAGE_STATE_CLEAR: 4088 panic("vm_page_requeue() called for free/clear page %p", page); 4089 return; 4090 case PAGE_STATE_WIRED: 4091 case PAGE_STATE_UNUSED: 4092 return; 4093 default: 4094 panic("vm_page_touch: vm_page %p in invalid state %d\n", 4095 page, page->State()); 4096 break; 4097 } 4098 4099 queue->RequeueUnlocked(page, tail); 4100 } 4101 4102 4103 page_num_t 4104 vm_page_num_pages(void) 4105 { 4106 return sNumPages - sNonExistingPages; 4107 } 4108 4109 4110 /*! There is a subtle distinction between the page counts returned by 4111 this function and vm_page_num_free_pages(): 4112 The latter returns the number of pages that are completely uncommitted, 4113 whereas this one returns the number of pages that are available for 4114 use by being reclaimed as well (IOW it factors in things like cache pages 4115 as available). 4116 */ 4117 page_num_t 4118 vm_page_num_available_pages(void) 4119 { 4120 return vm_available_memory() / B_PAGE_SIZE; 4121 } 4122 4123 4124 page_num_t 4125 vm_page_num_free_pages(void) 4126 { 4127 int32 count = sUnreservedFreePages + sCachedPageQueue.Count(); 4128 return count > 0 ? count : 0; 4129 } 4130 4131 4132 page_num_t 4133 vm_page_num_unused_pages(void) 4134 { 4135 int32 count = sUnreservedFreePages; 4136 return count > 0 ? count : 0; 4137 } 4138 4139 4140 void 4141 vm_page_get_stats(system_info *info) 4142 { 4143 // Note: there's no locking protecting any of the queues or counters here, 4144 // so we run the risk of getting bogus values when evaluating them 4145 // throughout this function. As these stats are for informational purposes 4146 // only, it is not really worth introducing such locking. Therefore we just 4147 // ensure that we don't under- or overflow any of the values. 4148 4149 // The pages used for the block cache buffers. Those should not be counted 4150 // as used but as cached pages. 4151 // TODO: We should subtract the blocks that are in use ATM, since those 4152 // can't really be freed in a low memory situation. 4153 page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE; 4154 info->block_cache_pages = blockCachePages; 4155 4156 // Non-temporary modified pages are special as they represent pages that 4157 // can be written back, so they could be freed if necessary, for us 4158 // basically making them into cached pages with a higher overhead. The 4159 // modified queue count is therefore split into temporary and non-temporary 4160 // counts that are then added to the corresponding number. 4161 page_num_t modifiedNonTemporaryPages 4162 = (sModifiedPageQueue.Count() - sModifiedTemporaryPages); 4163 4164 info->max_pages = vm_page_num_pages(); 4165 info->cached_pages = sCachedPageQueue.Count() + modifiedNonTemporaryPages 4166 + blockCachePages; 4167 4168 // max_pages is composed of: 4169 // active + inactive + unused + wired + modified + cached + free + clear 4170 // So taking out the cached (including modified non-temporary), free and 4171 // clear ones leaves us with all used pages. 4172 uint32 subtractPages = info->cached_pages + sFreePageQueue.Count() 4173 + sClearPageQueue.Count(); 4174 info->used_pages = subtractPages > info->max_pages 4175 ? 0 : info->max_pages - subtractPages; 4176 4177 if (info->used_pages + info->cached_pages > info->max_pages) { 4178 // Something was shuffled around while we were summing up the counts. 4179 // Make the values sane, preferring the worse case of more used pages. 4180 info->cached_pages = info->max_pages - info->used_pages; 4181 } 4182 4183 info->page_faults = vm_num_page_faults(); 4184 info->ignored_pages = sIgnoredPages; 4185 4186 // TODO: We don't consider pages used for page directories/tables yet. 4187 } 4188 4189 4190 /*! Returns the greatest address within the last page of accessible physical 4191 memory. 4192 The value is inclusive, i.e. in case of a 32 bit phys_addr_t 0xffffffff 4193 means the that the last page ends at exactly 4 GB. 4194 */ 4195 phys_addr_t 4196 vm_page_max_address() 4197 { 4198 return ((phys_addr_t)sPhysicalPageOffset + sNumPages) * B_PAGE_SIZE - 1; 4199 } 4200 4201 4202 RANGE_MARKER_FUNCTION_END(vm_page) 4203