1 /* 2 * Copyright 2010-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <string.h> 12 #include <stdlib.h> 13 14 #include <algorithm> 15 16 #include <KernelExport.h> 17 #include <OS.h> 18 19 #include <AutoDeleter.h> 20 21 #include <arch/cpu.h> 22 #include <arch/vm_translation_map.h> 23 #include <block_cache.h> 24 #include <boot/kernel_args.h> 25 #include <condition_variable.h> 26 #include <elf.h> 27 #include <heap.h> 28 #include <kernel.h> 29 #include <low_resource_manager.h> 30 #include <thread.h> 31 #include <tracing.h> 32 #include <util/AutoLock.h> 33 #include <vfs.h> 34 #include <vm/vm.h> 35 #include <vm/vm_priv.h> 36 #include <vm/vm_page.h> 37 #include <vm/VMAddressSpace.h> 38 #include <vm/VMArea.h> 39 #include <vm/VMCache.h> 40 41 #include "IORequest.h" 42 #include "PageCacheLocker.h" 43 #include "VMAnonymousCache.h" 44 #include "VMPageQueue.h" 45 46 47 //#define TRACE_VM_PAGE 48 #ifdef TRACE_VM_PAGE 49 # define TRACE(x) dprintf x 50 #else 51 # define TRACE(x) ; 52 #endif 53 54 //#define TRACE_VM_DAEMONS 55 #ifdef TRACE_VM_DAEMONS 56 #define TRACE_DAEMON(x...) dprintf(x) 57 #else 58 #define TRACE_DAEMON(x...) do {} while (false) 59 #endif 60 61 //#define TRACK_PAGE_USAGE_STATS 1 62 63 #define PAGE_ASSERT(page, condition) \ 64 ASSERT_PRINT((condition), "page: %p", (page)) 65 66 #define SCRUB_SIZE 32 67 // this many pages will be cleared at once in the page scrubber thread 68 69 #define MAX_PAGE_WRITER_IO_PRIORITY B_URGENT_DISPLAY_PRIORITY 70 // maximum I/O priority of the page writer 71 #define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD 10000 72 // the maximum I/O priority shall be reached when this many pages need to 73 // be written 74 75 76 // The page reserve an allocation of the certain priority must not touch. 77 static const size_t kPageReserveForPriority[] = { 78 VM_PAGE_RESERVE_USER, // user 79 VM_PAGE_RESERVE_SYSTEM, // system 80 0 // VIP 81 }; 82 83 // Minimum number of free pages the page daemon will try to achieve. 84 static uint32 sFreePagesTarget; 85 static uint32 sFreeOrCachedPagesTarget; 86 static uint32 sInactivePagesTarget; 87 88 // Wait interval between page daemon runs. 89 static const bigtime_t kIdleScanWaitInterval = 1000000LL; // 1 sec 90 static const bigtime_t kBusyScanWaitInterval = 500000LL; // 0.5 sec 91 92 // Number of idle runs after which we want to have processed the full active 93 // queue. 94 static const uint32 kIdleRunsForFullQueue = 20; 95 96 // Maximum limit for the vm_page::usage_count. 97 static const int32 kPageUsageMax = 64; 98 // vm_page::usage_count buff an accessed page receives in a scan. 99 static const int32 kPageUsageAdvance = 3; 100 // vm_page::usage_count debuff an unaccessed page receives in a scan. 101 static const int32 kPageUsageDecline = 1; 102 103 int32 gMappedPagesCount; 104 105 static VMPageQueue sPageQueues[PAGE_STATE_COUNT]; 106 107 static VMPageQueue& sFreePageQueue = sPageQueues[PAGE_STATE_FREE]; 108 static VMPageQueue& sClearPageQueue = sPageQueues[PAGE_STATE_CLEAR]; 109 static VMPageQueue& sModifiedPageQueue = sPageQueues[PAGE_STATE_MODIFIED]; 110 static VMPageQueue& sInactivePageQueue = sPageQueues[PAGE_STATE_INACTIVE]; 111 static VMPageQueue& sActivePageQueue = sPageQueues[PAGE_STATE_ACTIVE]; 112 static VMPageQueue& sCachedPageQueue = sPageQueues[PAGE_STATE_CACHED]; 113 114 static vm_page *sPages; 115 static page_num_t sPhysicalPageOffset; 116 static page_num_t sNumPages; 117 static page_num_t sNonExistingPages; 118 // pages in the sPages array that aren't backed by physical memory 119 static uint64 sIgnoredPages; 120 // pages of physical memory ignored by the boot loader (and thus not 121 // available here) 122 static int32 sUnreservedFreePages; 123 static int32 sUnsatisfiedPageReservations; 124 static int32 sModifiedTemporaryPages; 125 126 static ConditionVariable sFreePageCondition; 127 static mutex sPageDeficitLock = MUTEX_INITIALIZER("page deficit"); 128 129 // This lock must be used whenever the free or clear page queues are changed. 130 // If you need to work on both queues at the same time, you need to hold a write 131 // lock, otherwise, a read lock suffices (each queue still has a spinlock to 132 // guard against concurrent changes). 133 static rw_lock sFreePageQueuesLock 134 = RW_LOCK_INITIALIZER("free/clear page queues"); 135 136 #ifdef TRACK_PAGE_USAGE_STATS 137 static page_num_t sPageUsageArrays[512]; 138 static page_num_t* sPageUsage = sPageUsageArrays; 139 static page_num_t sPageUsagePageCount; 140 static page_num_t* sNextPageUsage = sPageUsageArrays + 256; 141 static page_num_t sNextPageUsagePageCount; 142 #endif 143 144 145 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 146 147 struct caller_info { 148 addr_t caller; 149 size_t count; 150 }; 151 152 static const int32 kCallerInfoTableSize = 1024; 153 static caller_info sCallerInfoTable[kCallerInfoTableSize]; 154 static int32 sCallerInfoCount = 0; 155 156 static caller_info* get_caller_info(addr_t caller); 157 158 159 RANGE_MARKER_FUNCTION_PROTOTYPES(vm_page) 160 161 static const addr_t kVMPageCodeAddressRange[] = { 162 RANGE_MARKER_FUNCTION_ADDRESS_RANGE(vm_page) 163 }; 164 165 #endif 166 167 168 RANGE_MARKER_FUNCTION_BEGIN(vm_page) 169 170 171 struct page_stats { 172 int32 totalFreePages; 173 int32 unsatisfiedReservations; 174 int32 cachedPages; 175 }; 176 177 178 struct PageReservationWaiter 179 : public DoublyLinkedListLinkImpl<PageReservationWaiter> { 180 Thread* thread; 181 uint32 dontTouch; // reserve not to touch 182 uint32 missing; // pages missing for the reservation 183 int32 threadPriority; 184 185 bool operator<(const PageReservationWaiter& other) const 186 { 187 // Implies an order by descending VM priority (ascending dontTouch) 188 // and (secondarily) descending thread priority. 189 if (dontTouch != other.dontTouch) 190 return dontTouch < other.dontTouch; 191 return threadPriority > other.threadPriority; 192 } 193 }; 194 195 typedef DoublyLinkedList<PageReservationWaiter> PageReservationWaiterList; 196 static PageReservationWaiterList sPageReservationWaiters; 197 198 199 struct DaemonCondition { 200 void Init(const char* name) 201 { 202 mutex_init(&fLock, "daemon condition"); 203 fCondition.Init(this, name); 204 fActivated = false; 205 } 206 207 bool Lock() 208 { 209 return mutex_lock(&fLock) == B_OK; 210 } 211 212 void Unlock() 213 { 214 mutex_unlock(&fLock); 215 } 216 217 bool Wait(bigtime_t timeout, bool clearActivated) 218 { 219 MutexLocker locker(fLock); 220 if (clearActivated) 221 fActivated = false; 222 else if (fActivated) 223 return true; 224 225 ConditionVariableEntry entry; 226 fCondition.Add(&entry); 227 228 locker.Unlock(); 229 230 return entry.Wait(B_RELATIVE_TIMEOUT, timeout) == B_OK; 231 } 232 233 void WakeUp() 234 { 235 if (fActivated) 236 return; 237 238 MutexLocker locker(fLock); 239 fActivated = true; 240 fCondition.NotifyOne(); 241 } 242 243 void ClearActivated() 244 { 245 MutexLocker locker(fLock); 246 fActivated = false; 247 } 248 249 private: 250 mutex fLock; 251 ConditionVariable fCondition; 252 bool fActivated; 253 }; 254 255 256 static DaemonCondition sPageWriterCondition; 257 static DaemonCondition sPageDaemonCondition; 258 259 260 #if PAGE_ALLOCATION_TRACING 261 262 namespace PageAllocationTracing { 263 264 class ReservePages : public AbstractTraceEntry { 265 public: 266 ReservePages(uint32 count) 267 : 268 fCount(count) 269 { 270 Initialized(); 271 } 272 273 virtual void AddDump(TraceOutput& out) 274 { 275 out.Print("page reserve: %" B_PRIu32, fCount); 276 } 277 278 private: 279 uint32 fCount; 280 }; 281 282 283 class UnreservePages : public AbstractTraceEntry { 284 public: 285 UnreservePages(uint32 count) 286 : 287 fCount(count) 288 { 289 Initialized(); 290 } 291 292 virtual void AddDump(TraceOutput& out) 293 { 294 out.Print("page unreserve: %" B_PRId32, fCount); 295 } 296 297 private: 298 uint32 fCount; 299 }; 300 301 302 class AllocatePage 303 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) { 304 public: 305 AllocatePage(page_num_t pageNumber) 306 : 307 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true), 308 fPageNumber(pageNumber) 309 { 310 Initialized(); 311 } 312 313 virtual void AddDump(TraceOutput& out) 314 { 315 out.Print("page alloc: %#" B_PRIxPHYSADDR, fPageNumber); 316 } 317 318 private: 319 page_num_t fPageNumber; 320 }; 321 322 323 class AllocatePageRun 324 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) { 325 public: 326 AllocatePageRun(page_num_t startPage, uint32 length) 327 : 328 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true), 329 fStartPage(startPage), 330 fLength(length) 331 { 332 Initialized(); 333 } 334 335 virtual void AddDump(TraceOutput& out) 336 { 337 out.Print("page alloc run: start %#" B_PRIxPHYSADDR " length: %" 338 B_PRIu32, fStartPage, fLength); 339 } 340 341 private: 342 page_num_t fStartPage; 343 uint32 fLength; 344 }; 345 346 347 class FreePage 348 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) { 349 public: 350 FreePage(page_num_t pageNumber) 351 : 352 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true), 353 fPageNumber(pageNumber) 354 { 355 Initialized(); 356 } 357 358 virtual void AddDump(TraceOutput& out) 359 { 360 out.Print("page free: %#" B_PRIxPHYSADDR, fPageNumber); 361 } 362 363 private: 364 page_num_t fPageNumber; 365 }; 366 367 368 class ScrubbingPages : public AbstractTraceEntry { 369 public: 370 ScrubbingPages(uint32 count) 371 : 372 fCount(count) 373 { 374 Initialized(); 375 } 376 377 virtual void AddDump(TraceOutput& out) 378 { 379 out.Print("page scrubbing: %" B_PRId32, fCount); 380 } 381 382 private: 383 uint32 fCount; 384 }; 385 386 387 class ScrubbedPages : public AbstractTraceEntry { 388 public: 389 ScrubbedPages(uint32 count) 390 : 391 fCount(count) 392 { 393 Initialized(); 394 } 395 396 virtual void AddDump(TraceOutput& out) 397 { 398 out.Print("page scrubbed: %" B_PRId32, fCount); 399 } 400 401 private: 402 uint32 fCount; 403 }; 404 405 406 class StolenPage : public AbstractTraceEntry { 407 public: 408 StolenPage() 409 { 410 Initialized(); 411 } 412 413 virtual void AddDump(TraceOutput& out) 414 { 415 out.Print("page stolen"); 416 } 417 }; 418 419 } // namespace PageAllocationTracing 420 421 # define TA(x) new(std::nothrow) PageAllocationTracing::x 422 423 #else 424 # define TA(x) 425 #endif // PAGE_ALLOCATION_TRACING 426 427 428 #if PAGE_DAEMON_TRACING 429 430 namespace PageDaemonTracing { 431 432 class ActivatePage : public AbstractTraceEntry { 433 public: 434 ActivatePage(vm_page* page) 435 : 436 fCache(page->cache), 437 fPage(page) 438 { 439 Initialized(); 440 } 441 442 virtual void AddDump(TraceOutput& out) 443 { 444 out.Print("page activated: %p, cache: %p", fPage, fCache); 445 } 446 447 private: 448 VMCache* fCache; 449 vm_page* fPage; 450 }; 451 452 453 class DeactivatePage : public AbstractTraceEntry { 454 public: 455 DeactivatePage(vm_page* page) 456 : 457 fCache(page->cache), 458 fPage(page) 459 { 460 Initialized(); 461 } 462 463 virtual void AddDump(TraceOutput& out) 464 { 465 out.Print("page deactivated: %p, cache: %p", fPage, fCache); 466 } 467 468 private: 469 VMCache* fCache; 470 vm_page* fPage; 471 }; 472 473 474 class FreedPageSwap : public AbstractTraceEntry { 475 public: 476 FreedPageSwap(vm_page* page) 477 : 478 fCache(page->cache), 479 fPage(page) 480 { 481 Initialized(); 482 } 483 484 virtual void AddDump(TraceOutput& out) 485 { 486 out.Print("page swap freed: %p, cache: %p", fPage, fCache); 487 } 488 489 private: 490 VMCache* fCache; 491 vm_page* fPage; 492 }; 493 494 } // namespace PageDaemonTracing 495 496 # define TD(x) new(std::nothrow) PageDaemonTracing::x 497 498 #else 499 # define TD(x) 500 #endif // PAGE_DAEMON_TRACING 501 502 503 #if PAGE_WRITER_TRACING 504 505 namespace PageWriterTracing { 506 507 class WritePage : public AbstractTraceEntry { 508 public: 509 WritePage(vm_page* page) 510 : 511 fCache(page->Cache()), 512 fPage(page) 513 { 514 Initialized(); 515 } 516 517 virtual void AddDump(TraceOutput& out) 518 { 519 out.Print("page write: %p, cache: %p", fPage, fCache); 520 } 521 522 private: 523 VMCache* fCache; 524 vm_page* fPage; 525 }; 526 527 } // namespace PageWriterTracing 528 529 # define TPW(x) new(std::nothrow) PageWriterTracing::x 530 531 #else 532 # define TPW(x) 533 #endif // PAGE_WRITER_TRACING 534 535 536 #if PAGE_STATE_TRACING 537 538 namespace PageStateTracing { 539 540 class SetPageState : public AbstractTraceEntry { 541 public: 542 SetPageState(vm_page* page, uint8 newState) 543 : 544 fPage(page), 545 fOldState(page->State()), 546 fNewState(newState), 547 fBusy(page->busy), 548 fWired(page->WiredCount() > 0), 549 fMapped(!page->mappings.IsEmpty()), 550 fAccessed(page->accessed), 551 fModified(page->modified) 552 { 553 #if PAGE_STATE_TRACING_STACK_TRACE 554 fStackTrace = capture_tracing_stack_trace( 555 PAGE_STATE_TRACING_STACK_TRACE, 0, true); 556 // Don't capture userland stack trace to avoid potential 557 // deadlocks. 558 #endif 559 Initialized(); 560 } 561 562 #if PAGE_STATE_TRACING_STACK_TRACE 563 virtual void DumpStackTrace(TraceOutput& out) 564 { 565 out.PrintStackTrace(fStackTrace); 566 } 567 #endif 568 569 virtual void AddDump(TraceOutput& out) 570 { 571 out.Print("page set state: %p (%c%c%c%c%c): %s -> %s", fPage, 572 fBusy ? 'b' : '-', 573 fWired ? 'w' : '-', 574 fMapped ? 'm' : '-', 575 fAccessed ? 'a' : '-', 576 fModified ? 'm' : '-', 577 page_state_to_string(fOldState), 578 page_state_to_string(fNewState)); 579 } 580 581 private: 582 vm_page* fPage; 583 #if PAGE_STATE_TRACING_STACK_TRACE 584 tracing_stack_trace* fStackTrace; 585 #endif 586 uint8 fOldState; 587 uint8 fNewState; 588 bool fBusy : 1; 589 bool fWired : 1; 590 bool fMapped : 1; 591 bool fAccessed : 1; 592 bool fModified : 1; 593 }; 594 595 } // namespace PageStateTracing 596 597 # define TPS(x) new(std::nothrow) PageStateTracing::x 598 599 #else 600 # define TPS(x) 601 #endif // PAGE_STATE_TRACING 602 603 604 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 605 606 namespace BKernel { 607 608 class AllocationTrackingCallback { 609 public: 610 virtual ~AllocationTrackingCallback(); 611 612 virtual bool ProcessTrackingInfo( 613 AllocationTrackingInfo* info, 614 page_num_t pageNumber) = 0; 615 }; 616 617 } 618 619 using BKernel::AllocationTrackingCallback; 620 621 622 class AllocationCollectorCallback : public AllocationTrackingCallback { 623 public: 624 AllocationCollectorCallback(bool resetInfos) 625 : 626 fResetInfos(resetInfos) 627 { 628 } 629 630 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info, 631 page_num_t pageNumber) 632 { 633 if (!info->IsInitialized()) 634 return true; 635 636 addr_t caller = 0; 637 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry(); 638 639 if (traceEntry != NULL && info->IsTraceEntryValid()) { 640 caller = tracing_find_caller_in_stack_trace( 641 traceEntry->StackTrace(), kVMPageCodeAddressRange, 1); 642 } 643 644 caller_info* callerInfo = get_caller_info(caller); 645 if (callerInfo == NULL) { 646 kprintf("out of space for caller infos\n"); 647 return false; 648 } 649 650 callerInfo->count++; 651 652 if (fResetInfos) 653 info->Clear(); 654 655 return true; 656 } 657 658 private: 659 bool fResetInfos; 660 }; 661 662 663 class AllocationInfoPrinterCallback : public AllocationTrackingCallback { 664 public: 665 AllocationInfoPrinterCallback(bool printStackTrace, page_num_t pageFilter, 666 team_id teamFilter, thread_id threadFilter) 667 : 668 fPrintStackTrace(printStackTrace), 669 fPageFilter(pageFilter), 670 fTeamFilter(teamFilter), 671 fThreadFilter(threadFilter) 672 { 673 } 674 675 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info, 676 page_num_t pageNumber) 677 { 678 if (!info->IsInitialized()) 679 return true; 680 681 if (fPageFilter != 0 && pageNumber != fPageFilter) 682 return true; 683 684 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry(); 685 if (traceEntry != NULL && !info->IsTraceEntryValid()) 686 traceEntry = NULL; 687 688 if (traceEntry != NULL) { 689 if (fTeamFilter != -1 && traceEntry->TeamID() != fTeamFilter) 690 return true; 691 if (fThreadFilter != -1 && traceEntry->ThreadID() != fThreadFilter) 692 return true; 693 } else { 694 // we need the info if we have filters set 695 if (fTeamFilter != -1 || fThreadFilter != -1) 696 return true; 697 } 698 699 kprintf("page number %#" B_PRIxPHYSADDR, pageNumber); 700 701 if (traceEntry != NULL) { 702 kprintf(", team: %" B_PRId32 ", thread %" B_PRId32 703 ", time %" B_PRId64 "\n", traceEntry->TeamID(), 704 traceEntry->ThreadID(), traceEntry->Time()); 705 706 if (fPrintStackTrace) 707 tracing_print_stack_trace(traceEntry->StackTrace()); 708 } else 709 kprintf("\n"); 710 711 return true; 712 } 713 714 private: 715 bool fPrintStackTrace; 716 page_num_t fPageFilter; 717 team_id fTeamFilter; 718 thread_id fThreadFilter; 719 }; 720 721 722 class AllocationDetailPrinterCallback : public AllocationTrackingCallback { 723 public: 724 AllocationDetailPrinterCallback(addr_t caller) 725 : 726 fCaller(caller) 727 { 728 } 729 730 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info, 731 page_num_t pageNumber) 732 { 733 if (!info->IsInitialized()) 734 return true; 735 736 addr_t caller = 0; 737 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry(); 738 if (traceEntry != NULL && !info->IsTraceEntryValid()) 739 traceEntry = NULL; 740 741 if (traceEntry != NULL) { 742 caller = tracing_find_caller_in_stack_trace( 743 traceEntry->StackTrace(), kVMPageCodeAddressRange, 1); 744 } 745 746 if (caller != fCaller) 747 return true; 748 749 kprintf("page %#" B_PRIxPHYSADDR "\n", pageNumber); 750 if (traceEntry != NULL) 751 tracing_print_stack_trace(traceEntry->StackTrace()); 752 753 return true; 754 } 755 756 private: 757 addr_t fCaller; 758 }; 759 760 #endif // VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 761 762 763 static int 764 find_page(int argc, char **argv) 765 { 766 struct vm_page *page; 767 addr_t address; 768 int32 index = 1; 769 int i; 770 771 struct { 772 const char* name; 773 VMPageQueue* queue; 774 } pageQueueInfos[] = { 775 { "free", &sFreePageQueue }, 776 { "clear", &sClearPageQueue }, 777 { "modified", &sModifiedPageQueue }, 778 { "active", &sActivePageQueue }, 779 { "inactive", &sInactivePageQueue }, 780 { "cached", &sCachedPageQueue }, 781 { NULL, NULL } 782 }; 783 784 if (argc < 2 785 || strlen(argv[index]) <= 2 786 || argv[index][0] != '0' 787 || argv[index][1] != 'x') { 788 kprintf("usage: find_page <address>\n"); 789 return 0; 790 } 791 792 address = strtoul(argv[index], NULL, 0); 793 page = (vm_page*)address; 794 795 for (i = 0; pageQueueInfos[i].name; i++) { 796 VMPageQueue::Iterator it = pageQueueInfos[i].queue->GetIterator(); 797 while (vm_page* p = it.Next()) { 798 if (p == page) { 799 kprintf("found page %p in queue %p (%s)\n", page, 800 pageQueueInfos[i].queue, pageQueueInfos[i].name); 801 return 0; 802 } 803 } 804 } 805 806 kprintf("page %p isn't in any queue\n", page); 807 808 return 0; 809 } 810 811 812 const char * 813 page_state_to_string(int state) 814 { 815 switch(state) { 816 case PAGE_STATE_ACTIVE: 817 return "active"; 818 case PAGE_STATE_INACTIVE: 819 return "inactive"; 820 case PAGE_STATE_MODIFIED: 821 return "modified"; 822 case PAGE_STATE_CACHED: 823 return "cached"; 824 case PAGE_STATE_FREE: 825 return "free"; 826 case PAGE_STATE_CLEAR: 827 return "clear"; 828 case PAGE_STATE_WIRED: 829 return "wired"; 830 case PAGE_STATE_UNUSED: 831 return "unused"; 832 default: 833 return "unknown"; 834 } 835 } 836 837 838 static int 839 dump_page(int argc, char **argv) 840 { 841 bool addressIsPointer = true; 842 bool physical = false; 843 bool searchMappings = false; 844 int32 index = 1; 845 846 while (index < argc) { 847 if (argv[index][0] != '-') 848 break; 849 850 if (!strcmp(argv[index], "-p")) { 851 addressIsPointer = false; 852 physical = true; 853 } else if (!strcmp(argv[index], "-v")) { 854 addressIsPointer = false; 855 } else if (!strcmp(argv[index], "-m")) { 856 searchMappings = true; 857 } else { 858 print_debugger_command_usage(argv[0]); 859 return 0; 860 } 861 862 index++; 863 } 864 865 if (index + 1 != argc) { 866 print_debugger_command_usage(argv[0]); 867 return 0; 868 } 869 870 uint64 value; 871 if (!evaluate_debug_expression(argv[index], &value, false)) 872 return 0; 873 874 uint64 pageAddress = value; 875 struct vm_page* page; 876 877 if (addressIsPointer) { 878 page = (struct vm_page *)(addr_t)pageAddress; 879 } else { 880 if (!physical) { 881 VMAddressSpace *addressSpace = VMAddressSpace::Kernel(); 882 883 if (debug_get_debugged_thread()->team->address_space != NULL) 884 addressSpace = debug_get_debugged_thread()->team->address_space; 885 886 uint32 flags = 0; 887 phys_addr_t physicalAddress; 888 if (addressSpace->TranslationMap()->QueryInterrupt(pageAddress, 889 &physicalAddress, &flags) != B_OK 890 || (flags & PAGE_PRESENT) == 0) { 891 kprintf("Virtual address not mapped to a physical page in this " 892 "address space.\n"); 893 return 0; 894 } 895 pageAddress = physicalAddress; 896 } 897 898 page = vm_lookup_page(pageAddress / B_PAGE_SIZE); 899 } 900 901 kprintf("PAGE: %p\n", page); 902 kprintf("queue_next,prev: %p, %p\n", page->queue_link.next, 903 page->queue_link.previous); 904 kprintf("physical_number: %#" B_PRIxPHYSADDR "\n", 905 page->physical_page_number); 906 kprintf("cache: %p\n", page->Cache()); 907 kprintf("cache_offset: %" B_PRIuPHYSADDR "\n", page->cache_offset); 908 kprintf("cache_next: %p\n", page->cache_next); 909 kprintf("state: %s\n", page_state_to_string(page->State())); 910 kprintf("wired_count: %d\n", page->WiredCount()); 911 kprintf("usage_count: %d\n", page->usage_count); 912 kprintf("busy: %d\n", page->busy); 913 kprintf("busy_writing: %d\n", page->busy_writing); 914 kprintf("accessed: %d\n", page->accessed); 915 kprintf("modified: %d\n", page->modified); 916 #if DEBUG_PAGE_QUEUE 917 kprintf("queue: %p\n", page->queue); 918 #endif 919 #if DEBUG_PAGE_ACCESS 920 kprintf("accessor: %" B_PRId32 "\n", page->accessing_thread); 921 #endif 922 kprintf("area mappings:\n"); 923 924 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 925 vm_page_mapping *mapping; 926 while ((mapping = iterator.Next()) != NULL) { 927 kprintf(" %p (%" B_PRId32 ")\n", mapping->area, mapping->area->id); 928 mapping = mapping->page_link.next; 929 } 930 931 if (searchMappings) { 932 kprintf("all mappings:\n"); 933 VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst(); 934 while (addressSpace != NULL) { 935 size_t pageCount = addressSpace->Size() / B_PAGE_SIZE; 936 for (addr_t address = addressSpace->Base(); pageCount != 0; 937 address += B_PAGE_SIZE, pageCount--) { 938 phys_addr_t physicalAddress; 939 uint32 flags = 0; 940 if (addressSpace->TranslationMap()->QueryInterrupt(address, 941 &physicalAddress, &flags) == B_OK 942 && (flags & PAGE_PRESENT) != 0 943 && physicalAddress / B_PAGE_SIZE 944 == page->physical_page_number) { 945 VMArea* area = addressSpace->LookupArea(address); 946 kprintf(" aspace %" B_PRId32 ", area %" B_PRId32 ": %#" 947 B_PRIxADDR " (%c%c%s%s)\n", addressSpace->ID(), 948 area != NULL ? area->id : -1, address, 949 (flags & B_KERNEL_READ_AREA) != 0 ? 'r' : '-', 950 (flags & B_KERNEL_WRITE_AREA) != 0 ? 'w' : '-', 951 (flags & PAGE_MODIFIED) != 0 ? " modified" : "", 952 (flags & PAGE_ACCESSED) != 0 ? " accessed" : ""); 953 } 954 } 955 addressSpace = VMAddressSpace::DebugNext(addressSpace); 956 } 957 } 958 959 set_debug_variable("_cache", (addr_t)page->Cache()); 960 #if DEBUG_PAGE_ACCESS 961 set_debug_variable("_accessor", page->accessing_thread); 962 #endif 963 964 return 0; 965 } 966 967 968 static int 969 dump_page_queue(int argc, char **argv) 970 { 971 struct VMPageQueue *queue; 972 973 if (argc < 2) { 974 kprintf("usage: page_queue <address/name> [list]\n"); 975 return 0; 976 } 977 978 if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x') 979 queue = (VMPageQueue*)strtoul(argv[1], NULL, 16); 980 else if (!strcmp(argv[1], "free")) 981 queue = &sFreePageQueue; 982 else if (!strcmp(argv[1], "clear")) 983 queue = &sClearPageQueue; 984 else if (!strcmp(argv[1], "modified")) 985 queue = &sModifiedPageQueue; 986 else if (!strcmp(argv[1], "active")) 987 queue = &sActivePageQueue; 988 else if (!strcmp(argv[1], "inactive")) 989 queue = &sInactivePageQueue; 990 else if (!strcmp(argv[1], "cached")) 991 queue = &sCachedPageQueue; 992 else { 993 kprintf("page_queue: unknown queue \"%s\".\n", argv[1]); 994 return 0; 995 } 996 997 kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %" 998 B_PRIuPHYSADDR "\n", queue, queue->Head(), queue->Tail(), 999 queue->Count()); 1000 1001 if (argc == 3) { 1002 struct vm_page *page = queue->Head(); 1003 1004 kprintf("page cache type state wired usage\n"); 1005 for (page_num_t i = 0; page; i++, page = queue->Next(page)) { 1006 kprintf("%p %p %-7s %8s %5d %5d\n", page, page->Cache(), 1007 vm_cache_type_to_string(page->Cache()->type), 1008 page_state_to_string(page->State()), 1009 page->WiredCount(), page->usage_count); 1010 } 1011 } 1012 return 0; 1013 } 1014 1015 1016 static int 1017 dump_page_stats(int argc, char **argv) 1018 { 1019 page_num_t swappableModified = 0; 1020 page_num_t swappableModifiedInactive = 0; 1021 1022 size_t counter[8]; 1023 size_t busyCounter[8]; 1024 memset(counter, 0, sizeof(counter)); 1025 memset(busyCounter, 0, sizeof(busyCounter)); 1026 1027 struct page_run { 1028 page_num_t start; 1029 page_num_t end; 1030 1031 page_num_t Length() const { return end - start; } 1032 }; 1033 1034 page_run currentFreeRun = { 0, 0 }; 1035 page_run currentCachedRun = { 0, 0 }; 1036 page_run longestFreeRun = { 0, 0 }; 1037 page_run longestCachedRun = { 0, 0 }; 1038 1039 for (page_num_t i = 0; i < sNumPages; i++) { 1040 if (sPages[i].State() > 7) { 1041 panic("page %" B_PRIuPHYSADDR " at %p has invalid state!\n", i, 1042 &sPages[i]); 1043 } 1044 1045 uint32 pageState = sPages[i].State(); 1046 1047 counter[pageState]++; 1048 if (sPages[i].busy) 1049 busyCounter[pageState]++; 1050 1051 if (pageState == PAGE_STATE_MODIFIED 1052 && sPages[i].Cache() != NULL 1053 && sPages[i].Cache()->temporary && sPages[i].WiredCount() == 0) { 1054 swappableModified++; 1055 if (sPages[i].usage_count == 0) 1056 swappableModifiedInactive++; 1057 } 1058 1059 // track free and cached pages runs 1060 if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) { 1061 currentFreeRun.end = i + 1; 1062 currentCachedRun.end = i + 1; 1063 } else { 1064 if (currentFreeRun.Length() > longestFreeRun.Length()) 1065 longestFreeRun = currentFreeRun; 1066 currentFreeRun.start = currentFreeRun.end = i + 1; 1067 1068 if (pageState == PAGE_STATE_CACHED) { 1069 currentCachedRun.end = i + 1; 1070 } else { 1071 if (currentCachedRun.Length() > longestCachedRun.Length()) 1072 longestCachedRun = currentCachedRun; 1073 currentCachedRun.start = currentCachedRun.end = i + 1; 1074 } 1075 } 1076 } 1077 1078 kprintf("page stats:\n"); 1079 kprintf("total: %" B_PRIuPHYSADDR "\n", sNumPages); 1080 1081 kprintf("active: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1082 counter[PAGE_STATE_ACTIVE], busyCounter[PAGE_STATE_ACTIVE]); 1083 kprintf("inactive: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1084 counter[PAGE_STATE_INACTIVE], busyCounter[PAGE_STATE_INACTIVE]); 1085 kprintf("cached: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1086 counter[PAGE_STATE_CACHED], busyCounter[PAGE_STATE_CACHED]); 1087 kprintf("unused: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1088 counter[PAGE_STATE_UNUSED], busyCounter[PAGE_STATE_UNUSED]); 1089 kprintf("wired: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1090 counter[PAGE_STATE_WIRED], busyCounter[PAGE_STATE_WIRED]); 1091 kprintf("modified: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1092 counter[PAGE_STATE_MODIFIED], busyCounter[PAGE_STATE_MODIFIED]); 1093 kprintf("free: %" B_PRIuSIZE "\n", counter[PAGE_STATE_FREE]); 1094 kprintf("clear: %" B_PRIuSIZE "\n", counter[PAGE_STATE_CLEAR]); 1095 1096 kprintf("unreserved free pages: %" B_PRId32 "\n", sUnreservedFreePages); 1097 kprintf("unsatisfied page reservations: %" B_PRId32 "\n", 1098 sUnsatisfiedPageReservations); 1099 kprintf("mapped pages: %" B_PRId32 "\n", gMappedPagesCount); 1100 kprintf("longest free pages run: %" B_PRIuPHYSADDR " pages (at %" 1101 B_PRIuPHYSADDR ")\n", longestFreeRun.Length(), 1102 sPages[longestFreeRun.start].physical_page_number); 1103 kprintf("longest free/cached pages run: %" B_PRIuPHYSADDR " pages (at %" 1104 B_PRIuPHYSADDR ")\n", longestCachedRun.Length(), 1105 sPages[longestCachedRun.start].physical_page_number); 1106 1107 kprintf("waiting threads:\n"); 1108 for (PageReservationWaiterList::Iterator it 1109 = sPageReservationWaiters.GetIterator(); 1110 PageReservationWaiter* waiter = it.Next();) { 1111 kprintf(" %6" B_PRId32 ": missing: %6" B_PRIu32 1112 ", don't touch: %6" B_PRIu32 "\n", waiter->thread->id, 1113 waiter->missing, waiter->dontTouch); 1114 } 1115 1116 kprintf("\nfree queue: %p, count = %" B_PRIuPHYSADDR "\n", &sFreePageQueue, 1117 sFreePageQueue.Count()); 1118 kprintf("clear queue: %p, count = %" B_PRIuPHYSADDR "\n", &sClearPageQueue, 1119 sClearPageQueue.Count()); 1120 kprintf("modified queue: %p, count = %" B_PRIuPHYSADDR " (%" B_PRId32 1121 " temporary, %" B_PRIuPHYSADDR " swappable, " "inactive: %" 1122 B_PRIuPHYSADDR ")\n", &sModifiedPageQueue, sModifiedPageQueue.Count(), 1123 sModifiedTemporaryPages, swappableModified, swappableModifiedInactive); 1124 kprintf("active queue: %p, count = %" B_PRIuPHYSADDR "\n", 1125 &sActivePageQueue, sActivePageQueue.Count()); 1126 kprintf("inactive queue: %p, count = %" B_PRIuPHYSADDR "\n", 1127 &sInactivePageQueue, sInactivePageQueue.Count()); 1128 kprintf("cached queue: %p, count = %" B_PRIuPHYSADDR "\n", 1129 &sCachedPageQueue, sCachedPageQueue.Count()); 1130 return 0; 1131 } 1132 1133 1134 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 1135 1136 static caller_info* 1137 get_caller_info(addr_t caller) 1138 { 1139 // find the caller info 1140 for (int32 i = 0; i < sCallerInfoCount; i++) { 1141 if (caller == sCallerInfoTable[i].caller) 1142 return &sCallerInfoTable[i]; 1143 } 1144 1145 // not found, add a new entry, if there are free slots 1146 if (sCallerInfoCount >= kCallerInfoTableSize) 1147 return NULL; 1148 1149 caller_info* info = &sCallerInfoTable[sCallerInfoCount++]; 1150 info->caller = caller; 1151 info->count = 0; 1152 1153 return info; 1154 } 1155 1156 1157 static int 1158 caller_info_compare_count(const void* _a, const void* _b) 1159 { 1160 const caller_info* a = (const caller_info*)_a; 1161 const caller_info* b = (const caller_info*)_b; 1162 return (int)(b->count - a->count); 1163 } 1164 1165 1166 static int 1167 dump_page_allocations_per_caller(int argc, char** argv) 1168 { 1169 bool resetAllocationInfos = false; 1170 bool printDetails = false; 1171 addr_t caller = 0; 1172 1173 for (int32 i = 1; i < argc; i++) { 1174 if (strcmp(argv[i], "-d") == 0) { 1175 uint64 callerAddress; 1176 if (++i >= argc 1177 || !evaluate_debug_expression(argv[i], &callerAddress, true)) { 1178 print_debugger_command_usage(argv[0]); 1179 return 0; 1180 } 1181 1182 caller = callerAddress; 1183 printDetails = true; 1184 } else if (strcmp(argv[i], "-r") == 0) { 1185 resetAllocationInfos = true; 1186 } else { 1187 print_debugger_command_usage(argv[0]); 1188 return 0; 1189 } 1190 } 1191 1192 sCallerInfoCount = 0; 1193 1194 AllocationCollectorCallback collectorCallback(resetAllocationInfos); 1195 AllocationDetailPrinterCallback detailsCallback(caller); 1196 AllocationTrackingCallback& callback = printDetails 1197 ? (AllocationTrackingCallback&)detailsCallback 1198 : (AllocationTrackingCallback&)collectorCallback; 1199 1200 for (page_num_t i = 0; i < sNumPages; i++) 1201 callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i); 1202 1203 if (printDetails) 1204 return 0; 1205 1206 // sort the array 1207 qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info), 1208 &caller_info_compare_count); 1209 1210 kprintf("%" B_PRId32 " different callers\n\n", sCallerInfoCount); 1211 1212 size_t totalAllocationCount = 0; 1213 1214 kprintf(" count caller\n"); 1215 kprintf("----------------------------------\n"); 1216 for (int32 i = 0; i < sCallerInfoCount; i++) { 1217 caller_info& info = sCallerInfoTable[i]; 1218 kprintf("%10" B_PRIuSIZE " %p", info.count, (void*)info.caller); 1219 1220 const char* symbol; 1221 const char* imageName; 1222 bool exactMatch; 1223 addr_t baseAddress; 1224 1225 if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol, 1226 &imageName, &exactMatch) == B_OK) { 1227 kprintf(" %s + %#" B_PRIxADDR " (%s)%s\n", symbol, 1228 info.caller - baseAddress, imageName, 1229 exactMatch ? "" : " (nearest)"); 1230 } else 1231 kprintf("\n"); 1232 1233 totalAllocationCount += info.count; 1234 } 1235 1236 kprintf("\ntotal page allocations: %" B_PRIuSIZE "\n", 1237 totalAllocationCount); 1238 1239 return 0; 1240 } 1241 1242 1243 static int 1244 dump_page_allocation_infos(int argc, char** argv) 1245 { 1246 page_num_t pageFilter = 0; 1247 team_id teamFilter = -1; 1248 thread_id threadFilter = -1; 1249 bool printStackTraces = false; 1250 1251 for (int32 i = 1; i < argc; i++) { 1252 if (strcmp(argv[i], "--stacktrace") == 0) 1253 printStackTraces = true; 1254 else if (strcmp(argv[i], "-p") == 0) { 1255 uint64 pageNumber; 1256 if (++i >= argc 1257 || !evaluate_debug_expression(argv[i], &pageNumber, true)) { 1258 print_debugger_command_usage(argv[0]); 1259 return 0; 1260 } 1261 1262 pageFilter = pageNumber; 1263 } else if (strcmp(argv[i], "--team") == 0) { 1264 uint64 team; 1265 if (++i >= argc 1266 || !evaluate_debug_expression(argv[i], &team, true)) { 1267 print_debugger_command_usage(argv[0]); 1268 return 0; 1269 } 1270 1271 teamFilter = team; 1272 } else if (strcmp(argv[i], "--thread") == 0) { 1273 uint64 thread; 1274 if (++i >= argc 1275 || !evaluate_debug_expression(argv[i], &thread, true)) { 1276 print_debugger_command_usage(argv[0]); 1277 return 0; 1278 } 1279 1280 threadFilter = thread; 1281 } else { 1282 print_debugger_command_usage(argv[0]); 1283 return 0; 1284 } 1285 } 1286 1287 AllocationInfoPrinterCallback callback(printStackTraces, pageFilter, 1288 teamFilter, threadFilter); 1289 1290 for (page_num_t i = 0; i < sNumPages; i++) 1291 callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i); 1292 1293 return 0; 1294 } 1295 1296 #endif // VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 1297 1298 1299 #ifdef TRACK_PAGE_USAGE_STATS 1300 1301 static void 1302 track_page_usage(vm_page* page) 1303 { 1304 if (page->WiredCount() == 0) { 1305 sNextPageUsage[(int32)page->usage_count + 128]++; 1306 sNextPageUsagePageCount++; 1307 } 1308 } 1309 1310 1311 static void 1312 update_page_usage_stats() 1313 { 1314 std::swap(sPageUsage, sNextPageUsage); 1315 sPageUsagePageCount = sNextPageUsagePageCount; 1316 1317 memset(sNextPageUsage, 0, sizeof(page_num_t) * 256); 1318 sNextPageUsagePageCount = 0; 1319 1320 // compute average 1321 if (sPageUsagePageCount > 0) { 1322 int64 sum = 0; 1323 for (int32 i = 0; i < 256; i++) 1324 sum += (int64)sPageUsage[i] * (i - 128); 1325 1326 TRACE_DAEMON("average page usage: %f (%lu pages)\n", 1327 (float)sum / sPageUsagePageCount, sPageUsagePageCount); 1328 } 1329 } 1330 1331 1332 static int 1333 dump_page_usage_stats(int argc, char** argv) 1334 { 1335 kprintf("distribution of page usage counts (%lu pages):", 1336 sPageUsagePageCount); 1337 1338 int64 sum = 0; 1339 for (int32 i = 0; i < 256; i++) { 1340 if (i % 8 == 0) 1341 kprintf("\n%4ld:", i - 128); 1342 1343 int64 count = sPageUsage[i]; 1344 sum += count * (i - 128); 1345 1346 kprintf(" %9llu", count); 1347 } 1348 1349 kprintf("\n\n"); 1350 1351 kprintf("average usage count: %f\n", 1352 sPageUsagePageCount > 0 ? (float)sum / sPageUsagePageCount : 0); 1353 1354 return 0; 1355 } 1356 1357 #endif // TRACK_PAGE_USAGE_STATS 1358 1359 1360 // #pragma mark - vm_page 1361 1362 1363 inline void 1364 vm_page::InitState(uint8 newState) 1365 { 1366 state = newState; 1367 } 1368 1369 1370 inline void 1371 vm_page::SetState(uint8 newState) 1372 { 1373 TPS(SetPageState(this, newState)); 1374 1375 state = newState; 1376 } 1377 1378 1379 // #pragma mark - 1380 1381 1382 static void 1383 get_page_stats(page_stats& _pageStats) 1384 { 1385 _pageStats.totalFreePages = sUnreservedFreePages; 1386 _pageStats.cachedPages = sCachedPageQueue.Count(); 1387 _pageStats.unsatisfiedReservations = sUnsatisfiedPageReservations; 1388 // TODO: We don't get an actual snapshot here! 1389 } 1390 1391 1392 static bool 1393 do_active_paging(const page_stats& pageStats) 1394 { 1395 return pageStats.totalFreePages + pageStats.cachedPages 1396 < pageStats.unsatisfiedReservations 1397 + (int32)sFreeOrCachedPagesTarget; 1398 } 1399 1400 1401 /*! Reserves as many pages as possible from \c sUnreservedFreePages up to 1402 \a count. Doesn't touch the last \a dontTouch pages of 1403 \c sUnreservedFreePages, though. 1404 \return The number of actually reserved pages. 1405 */ 1406 static uint32 1407 reserve_some_pages(uint32 count, uint32 dontTouch) 1408 { 1409 while (true) { 1410 int32 freePages = atomic_get(&sUnreservedFreePages); 1411 if (freePages <= (int32)dontTouch) 1412 return 0; 1413 1414 int32 toReserve = std::min(count, freePages - dontTouch); 1415 if (atomic_test_and_set(&sUnreservedFreePages, 1416 freePages - toReserve, freePages) 1417 == freePages) { 1418 return toReserve; 1419 } 1420 1421 // the count changed in the meantime -- retry 1422 } 1423 } 1424 1425 1426 static void 1427 wake_up_page_reservation_waiters() 1428 { 1429 MutexLocker pageDeficitLocker(sPageDeficitLock); 1430 1431 // TODO: If this is a low priority thread, we might want to disable 1432 // interrupts or otherwise ensure that we aren't unscheduled. Otherwise 1433 // high priority threads wait be kept waiting while a medium priority thread 1434 // prevents us from running. 1435 1436 while (PageReservationWaiter* waiter = sPageReservationWaiters.Head()) { 1437 int32 reserved = reserve_some_pages(waiter->missing, 1438 waiter->dontTouch); 1439 if (reserved == 0) 1440 return; 1441 1442 atomic_add(&sUnsatisfiedPageReservations, -reserved); 1443 waiter->missing -= reserved; 1444 1445 if (waiter->missing > 0) 1446 return; 1447 1448 sPageReservationWaiters.Remove(waiter); 1449 1450 thread_unblock(waiter->thread, B_OK); 1451 } 1452 } 1453 1454 1455 static inline void 1456 unreserve_pages(uint32 count) 1457 { 1458 atomic_add(&sUnreservedFreePages, count); 1459 if (atomic_get(&sUnsatisfiedPageReservations) != 0) 1460 wake_up_page_reservation_waiters(); 1461 } 1462 1463 1464 static void 1465 free_page(vm_page* page, bool clear) 1466 { 1467 DEBUG_PAGE_ACCESS_CHECK(page); 1468 1469 PAGE_ASSERT(page, !page->IsMapped()); 1470 1471 VMPageQueue* fromQueue; 1472 1473 switch (page->State()) { 1474 case PAGE_STATE_ACTIVE: 1475 fromQueue = &sActivePageQueue; 1476 break; 1477 case PAGE_STATE_INACTIVE: 1478 fromQueue = &sInactivePageQueue; 1479 break; 1480 case PAGE_STATE_MODIFIED: 1481 fromQueue = &sModifiedPageQueue; 1482 break; 1483 case PAGE_STATE_CACHED: 1484 fromQueue = &sCachedPageQueue; 1485 break; 1486 case PAGE_STATE_FREE: 1487 case PAGE_STATE_CLEAR: 1488 panic("free_page(): page %p already free", page); 1489 return; 1490 case PAGE_STATE_WIRED: 1491 case PAGE_STATE_UNUSED: 1492 fromQueue = NULL; 1493 break; 1494 default: 1495 panic("free_page(): page %p in invalid state %d", 1496 page, page->State()); 1497 return; 1498 } 1499 1500 if (page->CacheRef() != NULL) 1501 panic("to be freed page %p has cache", page); 1502 if (page->IsMapped()) 1503 panic("to be freed page %p has mappings", page); 1504 1505 if (fromQueue != NULL) 1506 fromQueue->RemoveUnlocked(page); 1507 1508 TA(FreePage(page->physical_page_number)); 1509 1510 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 1511 page->allocation_tracking_info.Clear(); 1512 #endif 1513 1514 ReadLocker locker(sFreePageQueuesLock); 1515 1516 DEBUG_PAGE_ACCESS_END(page); 1517 1518 if (clear) { 1519 page->SetState(PAGE_STATE_CLEAR); 1520 sClearPageQueue.PrependUnlocked(page); 1521 } else { 1522 page->SetState(PAGE_STATE_FREE); 1523 sFreePageQueue.PrependUnlocked(page); 1524 sFreePageCondition.NotifyAll(); 1525 } 1526 1527 locker.Unlock(); 1528 } 1529 1530 1531 /*! The caller must make sure that no-one else tries to change the page's state 1532 while the function is called. If the page has a cache, this can be done by 1533 locking the cache. 1534 */ 1535 static void 1536 set_page_state(vm_page *page, int pageState) 1537 { 1538 DEBUG_PAGE_ACCESS_CHECK(page); 1539 1540 if (pageState == page->State()) 1541 return; 1542 1543 VMPageQueue* fromQueue; 1544 1545 switch (page->State()) { 1546 case PAGE_STATE_ACTIVE: 1547 fromQueue = &sActivePageQueue; 1548 break; 1549 case PAGE_STATE_INACTIVE: 1550 fromQueue = &sInactivePageQueue; 1551 break; 1552 case PAGE_STATE_MODIFIED: 1553 fromQueue = &sModifiedPageQueue; 1554 break; 1555 case PAGE_STATE_CACHED: 1556 fromQueue = &sCachedPageQueue; 1557 break; 1558 case PAGE_STATE_FREE: 1559 case PAGE_STATE_CLEAR: 1560 panic("set_page_state(): page %p is free/clear", page); 1561 return; 1562 case PAGE_STATE_WIRED: 1563 case PAGE_STATE_UNUSED: 1564 fromQueue = NULL; 1565 break; 1566 default: 1567 panic("set_page_state(): page %p in invalid state %d", 1568 page, page->State()); 1569 return; 1570 } 1571 1572 VMPageQueue* toQueue; 1573 1574 switch (pageState) { 1575 case PAGE_STATE_ACTIVE: 1576 toQueue = &sActivePageQueue; 1577 break; 1578 case PAGE_STATE_INACTIVE: 1579 toQueue = &sInactivePageQueue; 1580 break; 1581 case PAGE_STATE_MODIFIED: 1582 toQueue = &sModifiedPageQueue; 1583 break; 1584 case PAGE_STATE_CACHED: 1585 PAGE_ASSERT(page, !page->IsMapped()); 1586 PAGE_ASSERT(page, !page->modified); 1587 toQueue = &sCachedPageQueue; 1588 break; 1589 case PAGE_STATE_FREE: 1590 case PAGE_STATE_CLEAR: 1591 panic("set_page_state(): target state is free/clear"); 1592 return; 1593 case PAGE_STATE_WIRED: 1594 case PAGE_STATE_UNUSED: 1595 toQueue = NULL; 1596 break; 1597 default: 1598 panic("set_page_state(): invalid target state %d", pageState); 1599 return; 1600 } 1601 1602 VMCache* cache = page->Cache(); 1603 if (cache != NULL && cache->temporary) { 1604 if (pageState == PAGE_STATE_MODIFIED) 1605 atomic_add(&sModifiedTemporaryPages, 1); 1606 else if (page->State() == PAGE_STATE_MODIFIED) 1607 atomic_add(&sModifiedTemporaryPages, -1); 1608 } 1609 1610 // move the page 1611 if (toQueue == fromQueue) { 1612 // Note: Theoretically we are required to lock when changing the page 1613 // state, even if we don't change the queue. We actually don't have to 1614 // do this, though, since only for the active queue there are different 1615 // page states and active pages have a cache that must be locked at 1616 // this point. So we rely on the fact that everyone must lock the cache 1617 // before trying to change/interpret the page state. 1618 PAGE_ASSERT(page, cache != NULL); 1619 cache->AssertLocked(); 1620 page->SetState(pageState); 1621 } else { 1622 if (fromQueue != NULL) 1623 fromQueue->RemoveUnlocked(page); 1624 1625 page->SetState(pageState); 1626 1627 if (toQueue != NULL) 1628 toQueue->AppendUnlocked(page); 1629 } 1630 } 1631 1632 1633 /*! Moves a previously modified page into a now appropriate queue. 1634 The page queues must not be locked. 1635 */ 1636 static void 1637 move_page_to_appropriate_queue(vm_page *page) 1638 { 1639 DEBUG_PAGE_ACCESS_CHECK(page); 1640 1641 // Note, this logic must be in sync with what the page daemon does. 1642 int32 state; 1643 if (page->IsMapped()) 1644 state = PAGE_STATE_ACTIVE; 1645 else if (page->modified) 1646 state = PAGE_STATE_MODIFIED; 1647 else 1648 state = PAGE_STATE_CACHED; 1649 1650 // TODO: If free + cached pages are low, we might directly want to free the 1651 // page. 1652 set_page_state(page, state); 1653 } 1654 1655 1656 static void 1657 clear_page(struct vm_page *page) 1658 { 1659 vm_memset_physical(page->physical_page_number << PAGE_SHIFT, 0, 1660 B_PAGE_SIZE); 1661 } 1662 1663 1664 static status_t 1665 mark_page_range_in_use(page_num_t startPage, page_num_t length, bool wired) 1666 { 1667 TRACE(("mark_page_range_in_use: start %#" B_PRIxPHYSADDR ", len %#" 1668 B_PRIxPHYSADDR "\n", startPage, length)); 1669 1670 if (sPhysicalPageOffset > startPage) { 1671 dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR 1672 "): start page is before free list\n", startPage, length); 1673 if (sPhysicalPageOffset - startPage >= length) 1674 return B_OK; 1675 length -= sPhysicalPageOffset - startPage; 1676 startPage = sPhysicalPageOffset; 1677 } 1678 1679 startPage -= sPhysicalPageOffset; 1680 1681 if (startPage + length > sNumPages) { 1682 dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR 1683 "): range would extend past free list\n", startPage, length); 1684 if (startPage >= sNumPages) 1685 return B_OK; 1686 length = sNumPages - startPage; 1687 } 1688 1689 WriteLocker locker(sFreePageQueuesLock); 1690 1691 for (page_num_t i = 0; i < length; i++) { 1692 vm_page *page = &sPages[startPage + i]; 1693 switch (page->State()) { 1694 case PAGE_STATE_FREE: 1695 case PAGE_STATE_CLEAR: 1696 { 1697 // TODO: This violates the page reservation policy, since we remove pages from 1698 // the free/clear queues without having reserved them before. This should happen 1699 // in the early boot process only, though. 1700 DEBUG_PAGE_ACCESS_START(page); 1701 VMPageQueue& queue = page->State() == PAGE_STATE_FREE 1702 ? sFreePageQueue : sClearPageQueue; 1703 queue.Remove(page); 1704 page->SetState(wired ? PAGE_STATE_WIRED : PAGE_STATE_UNUSED); 1705 page->busy = false; 1706 atomic_add(&sUnreservedFreePages, -1); 1707 DEBUG_PAGE_ACCESS_END(page); 1708 break; 1709 } 1710 case PAGE_STATE_WIRED: 1711 case PAGE_STATE_UNUSED: 1712 break; 1713 case PAGE_STATE_ACTIVE: 1714 case PAGE_STATE_INACTIVE: 1715 case PAGE_STATE_MODIFIED: 1716 case PAGE_STATE_CACHED: 1717 default: 1718 // uh 1719 dprintf("mark_page_range_in_use: page %#" B_PRIxPHYSADDR 1720 " in non-free state %d!\n", startPage + i, page->State()); 1721 break; 1722 } 1723 } 1724 1725 return B_OK; 1726 } 1727 1728 1729 /*! 1730 This is a background thread that wakes up when its condition is notified 1731 and moves some pages from the free queue over to the clear queue. 1732 Given enough time, it will clear out all pages from the free queue - we 1733 could probably slow it down after having reached a certain threshold. 1734 */ 1735 static int32 1736 page_scrubber(void *unused) 1737 { 1738 (void)(unused); 1739 1740 TRACE(("page_scrubber starting...\n")); 1741 1742 ConditionVariableEntry entry; 1743 for (;;) { 1744 while (sFreePageQueue.Count() == 0 1745 || atomic_get(&sUnreservedFreePages) 1746 < (int32)sFreePagesTarget) { 1747 sFreePageCondition.Add(&entry); 1748 entry.Wait(); 1749 } 1750 1751 // Since we temporarily remove pages from the free pages reserve, 1752 // we must make sure we don't cause a violation of the page 1753 // reservation warranty. The following is usually stricter than 1754 // necessary, because we don't have information on how many of the 1755 // reserved pages have already been allocated. 1756 int32 reserved = reserve_some_pages(SCRUB_SIZE, 1757 kPageReserveForPriority[VM_PRIORITY_USER]); 1758 if (reserved == 0) 1759 continue; 1760 1761 // get some pages from the free queue 1762 ReadLocker locker(sFreePageQueuesLock); 1763 1764 vm_page *page[SCRUB_SIZE]; 1765 int32 scrubCount = 0; 1766 for (int32 i = 0; i < reserved; i++) { 1767 page[i] = sFreePageQueue.RemoveHeadUnlocked(); 1768 if (page[i] == NULL) 1769 break; 1770 1771 DEBUG_PAGE_ACCESS_START(page[i]); 1772 1773 page[i]->SetState(PAGE_STATE_ACTIVE); 1774 page[i]->busy = true; 1775 scrubCount++; 1776 } 1777 1778 locker.Unlock(); 1779 1780 if (scrubCount == 0) { 1781 unreserve_pages(reserved); 1782 continue; 1783 } 1784 1785 TA(ScrubbingPages(scrubCount)); 1786 1787 // clear them 1788 for (int32 i = 0; i < scrubCount; i++) 1789 clear_page(page[i]); 1790 1791 locker.Lock(); 1792 1793 // and put them into the clear queue 1794 for (int32 i = 0; i < scrubCount; i++) { 1795 page[i]->SetState(PAGE_STATE_CLEAR); 1796 page[i]->busy = false; 1797 DEBUG_PAGE_ACCESS_END(page[i]); 1798 sClearPageQueue.PrependUnlocked(page[i]); 1799 } 1800 1801 locker.Unlock(); 1802 1803 unreserve_pages(reserved); 1804 1805 TA(ScrubbedPages(scrubCount)); 1806 1807 // wait at least 100ms between runs 1808 snooze(100 * 1000); 1809 } 1810 1811 return 0; 1812 } 1813 1814 1815 static void 1816 init_page_marker(vm_page &marker) 1817 { 1818 marker.SetCacheRef(NULL); 1819 marker.InitState(PAGE_STATE_UNUSED); 1820 marker.busy = true; 1821 #if DEBUG_PAGE_QUEUE 1822 marker.queue = NULL; 1823 #endif 1824 #if DEBUG_PAGE_ACCESS 1825 marker.accessing_thread = thread_get_current_thread_id(); 1826 #endif 1827 } 1828 1829 1830 static void 1831 remove_page_marker(struct vm_page &marker) 1832 { 1833 DEBUG_PAGE_ACCESS_CHECK(&marker); 1834 1835 if (marker.State() < PAGE_STATE_FIRST_UNQUEUED) 1836 sPageQueues[marker.State()].RemoveUnlocked(&marker); 1837 1838 marker.SetState(PAGE_STATE_UNUSED); 1839 } 1840 1841 1842 static vm_page* 1843 next_modified_page(page_num_t& maxPagesToSee) 1844 { 1845 InterruptsSpinLocker locker(sModifiedPageQueue.GetLock()); 1846 1847 while (maxPagesToSee > 0) { 1848 vm_page* page = sModifiedPageQueue.Head(); 1849 if (page == NULL) 1850 return NULL; 1851 1852 sModifiedPageQueue.Requeue(page, true); 1853 1854 maxPagesToSee--; 1855 1856 if (!page->busy) 1857 return page; 1858 } 1859 1860 return NULL; 1861 } 1862 1863 1864 // #pragma mark - 1865 1866 1867 class PageWriteTransfer; 1868 class PageWriteWrapper; 1869 1870 1871 class PageWriterRun { 1872 public: 1873 status_t Init(uint32 maxPages); 1874 1875 void PrepareNextRun(); 1876 void AddPage(vm_page* page); 1877 uint32 Go(); 1878 1879 void PageWritten(PageWriteTransfer* transfer, status_t status, 1880 bool partialTransfer, size_t bytesTransferred); 1881 1882 private: 1883 uint32 fMaxPages; 1884 uint32 fWrapperCount; 1885 uint32 fTransferCount; 1886 int32 fPendingTransfers; 1887 PageWriteWrapper* fWrappers; 1888 PageWriteTransfer* fTransfers; 1889 ConditionVariable fAllFinishedCondition; 1890 }; 1891 1892 1893 class PageWriteTransfer : public AsyncIOCallback { 1894 public: 1895 void SetTo(PageWriterRun* run, vm_page* page, int32 maxPages); 1896 bool AddPage(vm_page* page); 1897 1898 status_t Schedule(uint32 flags); 1899 1900 void SetStatus(status_t status, size_t transferred); 1901 1902 status_t Status() const { return fStatus; } 1903 struct VMCache* Cache() const { return fCache; } 1904 uint32 PageCount() const { return fPageCount; } 1905 1906 virtual void IOFinished(status_t status, bool partialTransfer, 1907 generic_size_t bytesTransferred); 1908 private: 1909 PageWriterRun* fRun; 1910 struct VMCache* fCache; 1911 off_t fOffset; 1912 uint32 fPageCount; 1913 int32 fMaxPages; 1914 status_t fStatus; 1915 uint32 fVecCount; 1916 generic_io_vec fVecs[32]; // TODO: make dynamic/configurable 1917 }; 1918 1919 1920 class PageWriteWrapper { 1921 public: 1922 PageWriteWrapper(); 1923 ~PageWriteWrapper(); 1924 void SetTo(vm_page* page); 1925 bool Done(status_t result); 1926 1927 private: 1928 vm_page* fPage; 1929 struct VMCache* fCache; 1930 bool fIsActive; 1931 }; 1932 1933 1934 PageWriteWrapper::PageWriteWrapper() 1935 : 1936 fIsActive(false) 1937 { 1938 } 1939 1940 1941 PageWriteWrapper::~PageWriteWrapper() 1942 { 1943 if (fIsActive) 1944 panic("page write wrapper going out of scope but isn't completed"); 1945 } 1946 1947 1948 /*! The page's cache must be locked. 1949 */ 1950 void 1951 PageWriteWrapper::SetTo(vm_page* page) 1952 { 1953 DEBUG_PAGE_ACCESS_CHECK(page); 1954 1955 if (page->busy) 1956 panic("setting page write wrapper to busy page"); 1957 1958 if (fIsActive) 1959 panic("re-setting page write wrapper that isn't completed"); 1960 1961 fPage = page; 1962 fCache = page->Cache(); 1963 fIsActive = true; 1964 1965 fPage->busy = true; 1966 fPage->busy_writing = true; 1967 1968 // We have a modified page -- however, while we're writing it back, 1969 // the page might still be mapped. In order not to lose any changes to the 1970 // page, we mark it clean before actually writing it back; if 1971 // writing the page fails for some reason, we'll just keep it in the 1972 // modified page list, but that should happen only rarely. 1973 1974 // If the page is changed after we cleared the dirty flag, but before we 1975 // had the chance to write it back, then we'll write it again later -- that 1976 // will probably not happen that often, though. 1977 1978 vm_clear_map_flags(fPage, PAGE_MODIFIED); 1979 } 1980 1981 1982 /*! The page's cache must be locked. 1983 The page queues must not be locked. 1984 \return \c true if the page was written successfully respectively could be 1985 handled somehow, \c false otherwise. 1986 */ 1987 bool 1988 PageWriteWrapper::Done(status_t result) 1989 { 1990 if (!fIsActive) 1991 panic("completing page write wrapper that is not active"); 1992 1993 DEBUG_PAGE_ACCESS_START(fPage); 1994 1995 fPage->busy = false; 1996 // Set unbusy and notify later by hand, since we might free the page. 1997 1998 bool success = true; 1999 2000 if (result == B_OK) { 2001 // put it into the active/inactive queue 2002 move_page_to_appropriate_queue(fPage); 2003 fPage->busy_writing = false; 2004 DEBUG_PAGE_ACCESS_END(fPage); 2005 } else { 2006 // Writing the page failed. One reason would be that the cache has been 2007 // shrunk and the page does no longer belong to the file. Otherwise the 2008 // actual I/O failed, in which case we'll simply keep the page modified. 2009 2010 if (!fPage->busy_writing) { 2011 // The busy_writing flag was cleared. That means the cache has been 2012 // shrunk while we were trying to write the page and we have to free 2013 // it now. 2014 vm_remove_all_page_mappings(fPage); 2015 // TODO: Unmapping should already happen when resizing the cache! 2016 fCache->RemovePage(fPage); 2017 free_page(fPage, false); 2018 unreserve_pages(1); 2019 } else { 2020 // Writing the page failed -- mark the page modified and move it to 2021 // an appropriate queue other than the modified queue, so we don't 2022 // keep trying to write it over and over again. We keep 2023 // non-temporary pages in the modified queue, though, so they don't 2024 // get lost in the inactive queue. 2025 dprintf("PageWriteWrapper: Failed to write page %p: %s\n", fPage, 2026 strerror(result)); 2027 2028 fPage->modified = true; 2029 if (!fCache->temporary) 2030 set_page_state(fPage, PAGE_STATE_MODIFIED); 2031 else if (fPage->IsMapped()) 2032 set_page_state(fPage, PAGE_STATE_ACTIVE); 2033 else 2034 set_page_state(fPage, PAGE_STATE_INACTIVE); 2035 2036 fPage->busy_writing = false; 2037 DEBUG_PAGE_ACCESS_END(fPage); 2038 2039 success = false; 2040 } 2041 } 2042 2043 fCache->NotifyPageEvents(fPage, PAGE_EVENT_NOT_BUSY); 2044 fIsActive = false; 2045 2046 return success; 2047 } 2048 2049 2050 /*! The page's cache must be locked. 2051 */ 2052 void 2053 PageWriteTransfer::SetTo(PageWriterRun* run, vm_page* page, int32 maxPages) 2054 { 2055 fRun = run; 2056 fCache = page->Cache(); 2057 fOffset = page->cache_offset; 2058 fPageCount = 1; 2059 fMaxPages = maxPages; 2060 fStatus = B_OK; 2061 2062 fVecs[0].base = (phys_addr_t)page->physical_page_number << PAGE_SHIFT; 2063 fVecs[0].length = B_PAGE_SIZE; 2064 fVecCount = 1; 2065 } 2066 2067 2068 /*! The page's cache must be locked. 2069 */ 2070 bool 2071 PageWriteTransfer::AddPage(vm_page* page) 2072 { 2073 if (page->Cache() != fCache 2074 || (fMaxPages >= 0 && fPageCount >= (uint32)fMaxPages)) 2075 return false; 2076 2077 phys_addr_t nextBase = fVecs[fVecCount - 1].base 2078 + fVecs[fVecCount - 1].length; 2079 2080 if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase 2081 && (off_t)page->cache_offset == fOffset + fPageCount) { 2082 // append to last iovec 2083 fVecs[fVecCount - 1].length += B_PAGE_SIZE; 2084 fPageCount++; 2085 return true; 2086 } 2087 2088 nextBase = fVecs[0].base - B_PAGE_SIZE; 2089 if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase 2090 && (off_t)page->cache_offset == fOffset - 1) { 2091 // prepend to first iovec and adjust offset 2092 fVecs[0].base = nextBase; 2093 fVecs[0].length += B_PAGE_SIZE; 2094 fOffset = page->cache_offset; 2095 fPageCount++; 2096 return true; 2097 } 2098 2099 if (((off_t)page->cache_offset == fOffset + fPageCount 2100 || (off_t)page->cache_offset == fOffset - 1) 2101 && fVecCount < sizeof(fVecs) / sizeof(fVecs[0])) { 2102 // not physically contiguous or not in the right order 2103 uint32 vectorIndex; 2104 if ((off_t)page->cache_offset < fOffset) { 2105 // we are pre-pending another vector, move the other vecs 2106 for (uint32 i = fVecCount; i > 0; i--) 2107 fVecs[i] = fVecs[i - 1]; 2108 2109 fOffset = page->cache_offset; 2110 vectorIndex = 0; 2111 } else 2112 vectorIndex = fVecCount; 2113 2114 fVecs[vectorIndex].base 2115 = (phys_addr_t)page->physical_page_number << PAGE_SHIFT; 2116 fVecs[vectorIndex].length = B_PAGE_SIZE; 2117 2118 fVecCount++; 2119 fPageCount++; 2120 return true; 2121 } 2122 2123 return false; 2124 } 2125 2126 2127 status_t 2128 PageWriteTransfer::Schedule(uint32 flags) 2129 { 2130 off_t writeOffset = (off_t)fOffset << PAGE_SHIFT; 2131 generic_size_t writeLength = (phys_size_t)fPageCount << PAGE_SHIFT; 2132 2133 if (fRun != NULL) { 2134 return fCache->WriteAsync(writeOffset, fVecs, fVecCount, writeLength, 2135 flags | B_PHYSICAL_IO_REQUEST, this); 2136 } 2137 2138 status_t status = fCache->Write(writeOffset, fVecs, fVecCount, 2139 flags | B_PHYSICAL_IO_REQUEST, &writeLength); 2140 2141 SetStatus(status, writeLength); 2142 return fStatus; 2143 } 2144 2145 2146 void 2147 PageWriteTransfer::SetStatus(status_t status, size_t transferred) 2148 { 2149 // only succeed if all pages up to the last one have been written fully 2150 // and the last page has at least been written partially 2151 if (status == B_OK && transferred <= (fPageCount - 1) * B_PAGE_SIZE) 2152 status = B_ERROR; 2153 2154 fStatus = status; 2155 } 2156 2157 2158 void 2159 PageWriteTransfer::IOFinished(status_t status, bool partialTransfer, 2160 generic_size_t bytesTransferred) 2161 { 2162 SetStatus(status, bytesTransferred); 2163 fRun->PageWritten(this, fStatus, partialTransfer, bytesTransferred); 2164 } 2165 2166 2167 status_t 2168 PageWriterRun::Init(uint32 maxPages) 2169 { 2170 fMaxPages = maxPages; 2171 fWrapperCount = 0; 2172 fTransferCount = 0; 2173 fPendingTransfers = 0; 2174 2175 fWrappers = new(std::nothrow) PageWriteWrapper[maxPages]; 2176 fTransfers = new(std::nothrow) PageWriteTransfer[maxPages]; 2177 if (fWrappers == NULL || fTransfers == NULL) 2178 return B_NO_MEMORY; 2179 2180 return B_OK; 2181 } 2182 2183 2184 void 2185 PageWriterRun::PrepareNextRun() 2186 { 2187 fWrapperCount = 0; 2188 fTransferCount = 0; 2189 fPendingTransfers = 0; 2190 } 2191 2192 2193 /*! The page's cache must be locked. 2194 */ 2195 void 2196 PageWriterRun::AddPage(vm_page* page) 2197 { 2198 fWrappers[fWrapperCount++].SetTo(page); 2199 2200 if (fTransferCount == 0 || !fTransfers[fTransferCount - 1].AddPage(page)) { 2201 fTransfers[fTransferCount++].SetTo(this, page, 2202 page->Cache()->MaxPagesPerAsyncWrite()); 2203 } 2204 } 2205 2206 2207 /*! Writes all pages previously added. 2208 \return The number of pages that could not be written or otherwise handled. 2209 */ 2210 uint32 2211 PageWriterRun::Go() 2212 { 2213 atomic_set(&fPendingTransfers, fTransferCount); 2214 2215 fAllFinishedCondition.Init(this, "page writer wait for I/O"); 2216 ConditionVariableEntry waitEntry; 2217 fAllFinishedCondition.Add(&waitEntry); 2218 2219 // schedule writes 2220 for (uint32 i = 0; i < fTransferCount; i++) 2221 fTransfers[i].Schedule(B_VIP_IO_REQUEST); 2222 2223 // wait until all pages have been written 2224 waitEntry.Wait(); 2225 2226 // mark pages depending on whether they could be written or not 2227 2228 uint32 failedPages = 0; 2229 uint32 wrapperIndex = 0; 2230 for (uint32 i = 0; i < fTransferCount; i++) { 2231 PageWriteTransfer& transfer = fTransfers[i]; 2232 transfer.Cache()->Lock(); 2233 2234 for (uint32 j = 0; j < transfer.PageCount(); j++) { 2235 if (!fWrappers[wrapperIndex++].Done(transfer.Status())) 2236 failedPages++; 2237 } 2238 2239 transfer.Cache()->Unlock(); 2240 } 2241 2242 ASSERT(wrapperIndex == fWrapperCount); 2243 2244 for (uint32 i = 0; i < fTransferCount; i++) { 2245 PageWriteTransfer& transfer = fTransfers[i]; 2246 struct VMCache* cache = transfer.Cache(); 2247 2248 // We've acquired a references for each page 2249 for (uint32 j = 0; j < transfer.PageCount(); j++) { 2250 // We release the cache references after all pages were made 2251 // unbusy again - otherwise releasing a vnode could deadlock. 2252 cache->ReleaseStoreRef(); 2253 cache->ReleaseRef(); 2254 } 2255 } 2256 2257 return failedPages; 2258 } 2259 2260 2261 void 2262 PageWriterRun::PageWritten(PageWriteTransfer* transfer, status_t status, 2263 bool partialTransfer, size_t bytesTransferred) 2264 { 2265 if (atomic_add(&fPendingTransfers, -1) == 1) 2266 fAllFinishedCondition.NotifyAll(); 2267 } 2268 2269 2270 /*! The page writer continuously takes some pages from the modified 2271 queue, writes them back, and moves them back to the active queue. 2272 It runs in its own thread, and is only there to keep the number 2273 of modified pages low, so that more pages can be reused with 2274 fewer costs. 2275 */ 2276 status_t 2277 page_writer(void* /*unused*/) 2278 { 2279 const uint32 kNumPages = 256; 2280 #ifdef TRACE_VM_PAGE 2281 uint32 writtenPages = 0; 2282 bigtime_t lastWrittenTime = 0; 2283 bigtime_t pageCollectionTime = 0; 2284 bigtime_t pageWritingTime = 0; 2285 #endif 2286 2287 PageWriterRun run; 2288 if (run.Init(kNumPages) != B_OK) { 2289 panic("page writer: Failed to init PageWriterRun!"); 2290 return B_ERROR; 2291 } 2292 2293 page_num_t pagesSinceLastSuccessfulWrite = 0; 2294 2295 while (true) { 2296 // TODO: Maybe wait shorter when memory is low! 2297 if (sModifiedPageQueue.Count() < kNumPages) { 2298 sPageWriterCondition.Wait(3000000, true); 2299 // all 3 seconds when no one triggers us 2300 } 2301 2302 page_num_t modifiedPages = sModifiedPageQueue.Count(); 2303 if (modifiedPages == 0) 2304 continue; 2305 2306 if (modifiedPages <= pagesSinceLastSuccessfulWrite) { 2307 // We ran through the whole queue without being able to write a 2308 // single page. Take a break. 2309 snooze(500000); 2310 pagesSinceLastSuccessfulWrite = 0; 2311 } 2312 2313 #if ENABLE_SWAP_SUPPORT 2314 page_stats pageStats; 2315 get_page_stats(pageStats); 2316 bool activePaging = do_active_paging(pageStats); 2317 #endif 2318 2319 // depending on how urgent it becomes to get pages to disk, we adjust 2320 // our I/O priority 2321 uint32 lowPagesState = low_resource_state(B_KERNEL_RESOURCE_PAGES); 2322 int32 ioPriority = B_IDLE_PRIORITY; 2323 if (lowPagesState >= B_LOW_RESOURCE_CRITICAL 2324 || modifiedPages > MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD) { 2325 ioPriority = MAX_PAGE_WRITER_IO_PRIORITY; 2326 } else { 2327 ioPriority = (uint64)MAX_PAGE_WRITER_IO_PRIORITY * modifiedPages 2328 / MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD; 2329 } 2330 2331 thread_set_io_priority(ioPriority); 2332 2333 uint32 numPages = 0; 2334 run.PrepareNextRun(); 2335 2336 // TODO: make this laptop friendly, too (ie. only start doing 2337 // something if someone else did something or there is really 2338 // enough to do). 2339 2340 // collect pages to be written 2341 #ifdef TRACE_VM_PAGE 2342 pageCollectionTime -= system_time(); 2343 #endif 2344 2345 page_num_t maxPagesToSee = modifiedPages; 2346 2347 while (numPages < kNumPages && maxPagesToSee > 0) { 2348 vm_page *page = next_modified_page(maxPagesToSee); 2349 if (page == NULL) 2350 break; 2351 2352 PageCacheLocker cacheLocker(page, false); 2353 if (!cacheLocker.IsLocked()) 2354 continue; 2355 2356 VMCache *cache = page->Cache(); 2357 2358 // If the page is busy or its state has changed while we were 2359 // locking the cache, just ignore it. 2360 if (page->busy || page->State() != PAGE_STATE_MODIFIED) 2361 continue; 2362 2363 DEBUG_PAGE_ACCESS_START(page); 2364 2365 // Don't write back wired (locked) pages. 2366 if (page->WiredCount() > 0) { 2367 set_page_state(page, PAGE_STATE_ACTIVE); 2368 DEBUG_PAGE_ACCESS_END(page); 2369 continue; 2370 } 2371 2372 // Write back temporary pages only when we're actively paging. 2373 if (cache->temporary 2374 #if ENABLE_SWAP_SUPPORT 2375 && (!activePaging 2376 || !cache->CanWritePage( 2377 (off_t)page->cache_offset << PAGE_SHIFT)) 2378 #endif 2379 ) { 2380 // We can't/don't want to do anything with this page, so move it 2381 // to one of the other queues. 2382 if (page->mappings.IsEmpty()) 2383 set_page_state(page, PAGE_STATE_INACTIVE); 2384 else 2385 set_page_state(page, PAGE_STATE_ACTIVE); 2386 2387 DEBUG_PAGE_ACCESS_END(page); 2388 continue; 2389 } 2390 2391 // We need our own reference to the store, as it might currently be 2392 // destroyed. 2393 if (cache->AcquireUnreferencedStoreRef() != B_OK) { 2394 DEBUG_PAGE_ACCESS_END(page); 2395 cacheLocker.Unlock(); 2396 thread_yield(); 2397 continue; 2398 } 2399 2400 run.AddPage(page); 2401 // TODO: We're possibly adding pages of different caches and 2402 // thus maybe of different underlying file systems here. This 2403 // is a potential problem for loop file systems/devices, since 2404 // we could mark a page busy that would need to be accessed 2405 // when writing back another page, thus causing a deadlock. 2406 2407 DEBUG_PAGE_ACCESS_END(page); 2408 2409 //dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count); 2410 TPW(WritePage(page)); 2411 2412 cache->AcquireRefLocked(); 2413 numPages++; 2414 } 2415 2416 #ifdef TRACE_VM_PAGE 2417 pageCollectionTime += system_time(); 2418 #endif 2419 if (numPages == 0) 2420 continue; 2421 2422 // write pages to disk and do all the cleanup 2423 #ifdef TRACE_VM_PAGE 2424 pageWritingTime -= system_time(); 2425 #endif 2426 uint32 failedPages = run.Go(); 2427 #ifdef TRACE_VM_PAGE 2428 pageWritingTime += system_time(); 2429 2430 // debug output only... 2431 writtenPages += numPages; 2432 if (writtenPages >= 1024) { 2433 bigtime_t now = system_time(); 2434 TRACE(("page writer: wrote 1024 pages (total: %" B_PRIu64 " ms, " 2435 "collect: %" B_PRIu64 " ms, write: %" B_PRIu64 " ms)\n", 2436 (now - lastWrittenTime) / 1000, 2437 pageCollectionTime / 1000, pageWritingTime / 1000)); 2438 lastWrittenTime = now; 2439 2440 writtenPages -= 1024; 2441 pageCollectionTime = 0; 2442 pageWritingTime = 0; 2443 } 2444 #endif 2445 2446 if (failedPages == numPages) 2447 pagesSinceLastSuccessfulWrite += modifiedPages - maxPagesToSee; 2448 else 2449 pagesSinceLastSuccessfulWrite = 0; 2450 } 2451 2452 return B_OK; 2453 } 2454 2455 2456 // #pragma mark - 2457 2458 2459 // TODO: This should be done in the page daemon! 2460 #if 0 2461 #if ENABLE_SWAP_SUPPORT 2462 static bool 2463 free_page_swap_space(int32 index) 2464 { 2465 vm_page *page = vm_page_at_index(index); 2466 PageCacheLocker locker(page); 2467 if (!locker.IsLocked()) 2468 return false; 2469 2470 DEBUG_PAGE_ACCESS_START(page); 2471 2472 VMCache* cache = page->Cache(); 2473 if (cache->temporary && page->WiredCount() == 0 2474 && cache->HasPage(page->cache_offset << PAGE_SHIFT) 2475 && page->usage_count > 0) { 2476 // TODO: how to judge a page is highly active? 2477 if (swap_free_page_swap_space(page)) { 2478 // We need to mark the page modified, since otherwise it could be 2479 // stolen and we'd lose its data. 2480 vm_page_set_state(page, PAGE_STATE_MODIFIED); 2481 TD(FreedPageSwap(page)); 2482 DEBUG_PAGE_ACCESS_END(page); 2483 return true; 2484 } 2485 } 2486 DEBUG_PAGE_ACCESS_END(page); 2487 return false; 2488 } 2489 #endif 2490 #endif // 0 2491 2492 2493 static vm_page * 2494 find_cached_page_candidate(struct vm_page &marker) 2495 { 2496 DEBUG_PAGE_ACCESS_CHECK(&marker); 2497 2498 InterruptsSpinLocker locker(sCachedPageQueue.GetLock()); 2499 vm_page *page; 2500 2501 if (marker.State() == PAGE_STATE_UNUSED) { 2502 // Get the first free pages of the (in)active queue 2503 page = sCachedPageQueue.Head(); 2504 } else { 2505 // Get the next page of the current queue 2506 if (marker.State() != PAGE_STATE_CACHED) { 2507 panic("invalid marker %p state", &marker); 2508 return NULL; 2509 } 2510 2511 page = sCachedPageQueue.Next(&marker); 2512 sCachedPageQueue.Remove(&marker); 2513 marker.SetState(PAGE_STATE_UNUSED); 2514 } 2515 2516 while (page != NULL) { 2517 if (!page->busy) { 2518 // we found a candidate, insert marker 2519 marker.SetState(PAGE_STATE_CACHED); 2520 sCachedPageQueue.InsertAfter(page, &marker); 2521 return page; 2522 } 2523 2524 page = sCachedPageQueue.Next(page); 2525 } 2526 2527 return NULL; 2528 } 2529 2530 2531 static bool 2532 free_cached_page(vm_page *page, bool dontWait) 2533 { 2534 // try to lock the page's cache 2535 if (vm_cache_acquire_locked_page_cache(page, dontWait) == NULL) 2536 return false; 2537 VMCache* cache = page->Cache(); 2538 2539 AutoLocker<VMCache> cacheLocker(cache, true); 2540 MethodDeleter<VMCache, void, &VMCache::ReleaseRefLocked> _2(cache); 2541 2542 // check again if that page is still a candidate 2543 if (page->busy || page->State() != PAGE_STATE_CACHED) 2544 return false; 2545 2546 DEBUG_PAGE_ACCESS_START(page); 2547 2548 PAGE_ASSERT(page, !page->IsMapped()); 2549 PAGE_ASSERT(page, !page->modified); 2550 2551 // we can now steal this page 2552 2553 cache->RemovePage(page); 2554 // Now the page doesn't have cache anymore, so no one else (e.g. 2555 // vm_page_allocate_page_run() can pick it up), since they would be 2556 // required to lock the cache first, which would fail. 2557 2558 sCachedPageQueue.RemoveUnlocked(page); 2559 return true; 2560 } 2561 2562 2563 static uint32 2564 free_cached_pages(uint32 pagesToFree, bool dontWait) 2565 { 2566 vm_page marker; 2567 init_page_marker(marker); 2568 2569 uint32 pagesFreed = 0; 2570 2571 while (pagesFreed < pagesToFree) { 2572 vm_page *page = find_cached_page_candidate(marker); 2573 if (page == NULL) 2574 break; 2575 2576 if (free_cached_page(page, dontWait)) { 2577 ReadLocker locker(sFreePageQueuesLock); 2578 page->SetState(PAGE_STATE_FREE); 2579 DEBUG_PAGE_ACCESS_END(page); 2580 sFreePageQueue.PrependUnlocked(page); 2581 locker.Unlock(); 2582 2583 TA(StolenPage()); 2584 2585 pagesFreed++; 2586 } 2587 } 2588 2589 remove_page_marker(marker); 2590 2591 sFreePageCondition.NotifyAll(); 2592 2593 return pagesFreed; 2594 } 2595 2596 2597 static void 2598 idle_scan_active_pages(page_stats& pageStats) 2599 { 2600 VMPageQueue& queue = sActivePageQueue; 2601 2602 // We want to scan the whole queue in roughly kIdleRunsForFullQueue runs. 2603 uint32 maxToScan = queue.Count() / kIdleRunsForFullQueue + 1; 2604 2605 while (maxToScan > 0) { 2606 maxToScan--; 2607 2608 // Get the next page. Note that we don't bother to lock here. We go with 2609 // the assumption that on all architectures reading/writing pointers is 2610 // atomic. Beyond that it doesn't really matter. We have to unlock the 2611 // queue anyway to lock the page's cache, and we'll recheck afterwards. 2612 vm_page* page = queue.Head(); 2613 if (page == NULL) 2614 break; 2615 2616 // lock the page's cache 2617 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2618 if (cache == NULL) 2619 continue; 2620 2621 if (page->State() != PAGE_STATE_ACTIVE) { 2622 // page is no longer in the cache or in this queue 2623 cache->ReleaseRefAndUnlock(); 2624 continue; 2625 } 2626 2627 if (page->busy) { 2628 // page is busy -- requeue at the end 2629 vm_page_requeue(page, true); 2630 cache->ReleaseRefAndUnlock(); 2631 continue; 2632 } 2633 2634 DEBUG_PAGE_ACCESS_START(page); 2635 2636 // Get the page active/modified flags and update the page's usage count. 2637 // We completely unmap inactive temporary pages. This saves us to 2638 // iterate through the inactive list as well, since we'll be notified 2639 // via page fault whenever such an inactive page is used again. 2640 // We don't remove the mappings of non-temporary pages, since we 2641 // wouldn't notice when those would become unused and could thus be 2642 // moved to the cached list. 2643 int32 usageCount; 2644 if (page->WiredCount() > 0 || page->usage_count > 0 2645 || !cache->temporary) { 2646 usageCount = vm_clear_page_mapping_accessed_flags(page); 2647 } else 2648 usageCount = vm_remove_all_page_mappings_if_unaccessed(page); 2649 2650 if (usageCount > 0) { 2651 usageCount += page->usage_count + kPageUsageAdvance; 2652 if (usageCount > kPageUsageMax) 2653 usageCount = kPageUsageMax; 2654 // TODO: This would probably also be the place to reclaim swap space. 2655 } else { 2656 usageCount += page->usage_count - (int32)kPageUsageDecline; 2657 if (usageCount < 0) { 2658 usageCount = 0; 2659 set_page_state(page, PAGE_STATE_INACTIVE); 2660 } 2661 } 2662 2663 page->usage_count = usageCount; 2664 2665 DEBUG_PAGE_ACCESS_END(page); 2666 2667 cache->ReleaseRefAndUnlock(); 2668 } 2669 } 2670 2671 2672 static void 2673 full_scan_inactive_pages(page_stats& pageStats, int32 despairLevel) 2674 { 2675 int32 pagesToFree = pageStats.unsatisfiedReservations 2676 + sFreeOrCachedPagesTarget 2677 - (pageStats.totalFreePages + pageStats.cachedPages); 2678 if (pagesToFree <= 0) 2679 return; 2680 2681 bigtime_t time = system_time(); 2682 uint32 pagesScanned = 0; 2683 uint32 pagesToCached = 0; 2684 uint32 pagesToModified = 0; 2685 uint32 pagesToActive = 0; 2686 2687 // Determine how many pages at maximum to send to the modified queue. Since 2688 // it is relatively expensive to page out pages, we do that on a grander 2689 // scale only when things get desperate. 2690 uint32 maxToFlush = despairLevel <= 1 ? 32 : 10000; 2691 2692 vm_page marker; 2693 init_page_marker(marker); 2694 2695 VMPageQueue& queue = sInactivePageQueue; 2696 InterruptsSpinLocker queueLocker(queue.GetLock()); 2697 uint32 maxToScan = queue.Count(); 2698 2699 vm_page* nextPage = queue.Head(); 2700 2701 while (pagesToFree > 0 && maxToScan > 0) { 2702 maxToScan--; 2703 2704 // get the next page 2705 vm_page* page = nextPage; 2706 if (page == NULL) 2707 break; 2708 nextPage = queue.Next(page); 2709 2710 if (page->busy) 2711 continue; 2712 2713 // mark the position 2714 queue.InsertAfter(page, &marker); 2715 queueLocker.Unlock(); 2716 2717 // lock the page's cache 2718 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2719 if (cache == NULL || page->busy 2720 || page->State() != PAGE_STATE_INACTIVE) { 2721 if (cache != NULL) 2722 cache->ReleaseRefAndUnlock(); 2723 queueLocker.Lock(); 2724 nextPage = queue.Next(&marker); 2725 queue.Remove(&marker); 2726 continue; 2727 } 2728 2729 pagesScanned++; 2730 2731 DEBUG_PAGE_ACCESS_START(page); 2732 2733 // Get the accessed count, clear the accessed/modified flags and 2734 // unmap the page, if it hasn't been accessed. 2735 int32 usageCount; 2736 if (page->WiredCount() > 0) 2737 usageCount = vm_clear_page_mapping_accessed_flags(page); 2738 else 2739 usageCount = vm_remove_all_page_mappings_if_unaccessed(page); 2740 2741 // update usage count 2742 if (usageCount > 0) { 2743 usageCount += page->usage_count + kPageUsageAdvance; 2744 if (usageCount > kPageUsageMax) 2745 usageCount = kPageUsageMax; 2746 } else { 2747 usageCount += page->usage_count - (int32)kPageUsageDecline; 2748 if (usageCount < 0) 2749 usageCount = 0; 2750 } 2751 2752 page->usage_count = usageCount; 2753 2754 // Move to fitting queue or requeue: 2755 // * Active mapped pages go to the active queue. 2756 // * Inactive mapped (i.e. wired) pages are requeued. 2757 // * The remaining pages are cachable. Thus, if unmodified they go to 2758 // the cached queue, otherwise to the modified queue (up to a limit). 2759 // Note that until in the idle scanning we don't exempt pages of 2760 // temporary caches. Apparently we really need memory, so we better 2761 // page out memory as well. 2762 bool isMapped = page->IsMapped(); 2763 if (usageCount > 0) { 2764 if (isMapped) { 2765 set_page_state(page, PAGE_STATE_ACTIVE); 2766 pagesToActive++; 2767 } else 2768 vm_page_requeue(page, true); 2769 } else if (isMapped) { 2770 vm_page_requeue(page, true); 2771 } else if (!page->modified) { 2772 set_page_state(page, PAGE_STATE_CACHED); 2773 pagesToFree--; 2774 pagesToCached++; 2775 } else if (maxToFlush > 0) { 2776 set_page_state(page, PAGE_STATE_MODIFIED); 2777 maxToFlush--; 2778 pagesToModified++; 2779 } else 2780 vm_page_requeue(page, true); 2781 2782 DEBUG_PAGE_ACCESS_END(page); 2783 2784 cache->ReleaseRefAndUnlock(); 2785 2786 // remove the marker 2787 queueLocker.Lock(); 2788 nextPage = queue.Next(&marker); 2789 queue.Remove(&marker); 2790 } 2791 2792 queueLocker.Unlock(); 2793 2794 time = system_time() - time; 2795 TRACE_DAEMON(" -> inactive scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32 2796 ", moved: %" B_PRIu32 " -> cached, %" B_PRIu32 " -> modified, %" 2797 B_PRIu32 " -> active\n", time, pagesScanned, pagesToCached, 2798 pagesToModified, pagesToActive); 2799 2800 // wake up the page writer, if we tossed it some pages 2801 if (pagesToModified > 0) 2802 sPageWriterCondition.WakeUp(); 2803 } 2804 2805 2806 static void 2807 full_scan_active_pages(page_stats& pageStats, int32 despairLevel) 2808 { 2809 vm_page marker; 2810 init_page_marker(marker); 2811 2812 VMPageQueue& queue = sActivePageQueue; 2813 InterruptsSpinLocker queueLocker(queue.GetLock()); 2814 uint32 maxToScan = queue.Count(); 2815 2816 int32 pagesToDeactivate = pageStats.unsatisfiedReservations 2817 + sFreeOrCachedPagesTarget 2818 - (pageStats.totalFreePages + pageStats.cachedPages) 2819 + std::max((int32)sInactivePagesTarget - (int32)maxToScan, (int32)0); 2820 if (pagesToDeactivate <= 0) 2821 return; 2822 2823 bigtime_t time = system_time(); 2824 uint32 pagesAccessed = 0; 2825 uint32 pagesToInactive = 0; 2826 uint32 pagesScanned = 0; 2827 2828 vm_page* nextPage = queue.Head(); 2829 2830 while (pagesToDeactivate > 0 && maxToScan > 0) { 2831 maxToScan--; 2832 2833 // get the next page 2834 vm_page* page = nextPage; 2835 if (page == NULL) 2836 break; 2837 nextPage = queue.Next(page); 2838 2839 if (page->busy) 2840 continue; 2841 2842 // mark the position 2843 queue.InsertAfter(page, &marker); 2844 queueLocker.Unlock(); 2845 2846 // lock the page's cache 2847 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2848 if (cache == NULL || page->busy || page->State() != PAGE_STATE_ACTIVE) { 2849 if (cache != NULL) 2850 cache->ReleaseRefAndUnlock(); 2851 queueLocker.Lock(); 2852 nextPage = queue.Next(&marker); 2853 queue.Remove(&marker); 2854 continue; 2855 } 2856 2857 pagesScanned++; 2858 2859 DEBUG_PAGE_ACCESS_START(page); 2860 2861 // Get the page active/modified flags and update the page's usage count. 2862 int32 usageCount = vm_clear_page_mapping_accessed_flags(page); 2863 2864 if (usageCount > 0) { 2865 usageCount += page->usage_count + kPageUsageAdvance; 2866 if (usageCount > kPageUsageMax) 2867 usageCount = kPageUsageMax; 2868 pagesAccessed++; 2869 // TODO: This would probably also be the place to reclaim swap space. 2870 } else { 2871 usageCount += page->usage_count - (int32)kPageUsageDecline; 2872 if (usageCount <= 0) { 2873 usageCount = 0; 2874 set_page_state(page, PAGE_STATE_INACTIVE); 2875 pagesToInactive++; 2876 } 2877 } 2878 2879 page->usage_count = usageCount; 2880 2881 DEBUG_PAGE_ACCESS_END(page); 2882 2883 cache->ReleaseRefAndUnlock(); 2884 2885 // remove the marker 2886 queueLocker.Lock(); 2887 nextPage = queue.Next(&marker); 2888 queue.Remove(&marker); 2889 } 2890 2891 time = system_time() - time; 2892 TRACE_DAEMON(" -> active scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32 2893 ", moved: %" B_PRIu32 " -> inactive, encountered %" B_PRIu32 " accessed" 2894 " ones\n", time, pagesScanned, pagesToInactive, pagesAccessed); 2895 } 2896 2897 2898 static void 2899 page_daemon_idle_scan(page_stats& pageStats) 2900 { 2901 TRACE_DAEMON("page daemon: idle run\n"); 2902 2903 if (pageStats.totalFreePages < (int32)sFreePagesTarget) { 2904 // We want more actually free pages, so free some from the cached 2905 // ones. 2906 uint32 freed = free_cached_pages( 2907 sFreePagesTarget - pageStats.totalFreePages, false); 2908 if (freed > 0) 2909 unreserve_pages(freed); 2910 get_page_stats(pageStats); 2911 } 2912 2913 // Walk the active list and move pages to the inactive queue. 2914 get_page_stats(pageStats); 2915 idle_scan_active_pages(pageStats); 2916 } 2917 2918 2919 static void 2920 page_daemon_full_scan(page_stats& pageStats, int32 despairLevel) 2921 { 2922 TRACE_DAEMON("page daemon: full run: free: %" B_PRIu32 ", cached: %" 2923 B_PRIu32 ", to free: %" B_PRIu32 "\n", pageStats.totalFreePages, 2924 pageStats.cachedPages, pageStats.unsatisfiedReservations 2925 + sFreeOrCachedPagesTarget 2926 - (pageStats.totalFreePages + pageStats.cachedPages)); 2927 2928 // Walk the inactive list and transfer pages to the cached and modified 2929 // queues. 2930 full_scan_inactive_pages(pageStats, despairLevel); 2931 2932 // Free cached pages. Also wake up reservation waiters. 2933 get_page_stats(pageStats); 2934 int32 pagesToFree = pageStats.unsatisfiedReservations + sFreePagesTarget 2935 - (pageStats.totalFreePages); 2936 if (pagesToFree > 0) { 2937 uint32 freed = free_cached_pages(pagesToFree, true); 2938 if (freed > 0) 2939 unreserve_pages(freed); 2940 } 2941 2942 // Walk the active list and move pages to the inactive queue. 2943 get_page_stats(pageStats); 2944 full_scan_active_pages(pageStats, despairLevel); 2945 } 2946 2947 2948 static status_t 2949 page_daemon(void* /*unused*/) 2950 { 2951 int32 despairLevel = 0; 2952 2953 while (true) { 2954 sPageDaemonCondition.ClearActivated(); 2955 2956 // evaluate the free pages situation 2957 page_stats pageStats; 2958 get_page_stats(pageStats); 2959 2960 if (!do_active_paging(pageStats)) { 2961 // Things look good -- just maintain statistics and keep the pool 2962 // of actually free pages full enough. 2963 despairLevel = 0; 2964 page_daemon_idle_scan(pageStats); 2965 sPageDaemonCondition.Wait(kIdleScanWaitInterval, false); 2966 } else { 2967 // Not enough free pages. We need to do some real work. 2968 despairLevel = std::max(despairLevel + 1, (int32)3); 2969 page_daemon_full_scan(pageStats, despairLevel); 2970 2971 // Don't wait after the first full scan, but rather immediately 2972 // check whether we were successful in freeing enough pages and 2973 // re-run with increased despair level. The first scan is 2974 // conservative with respect to moving inactive modified pages to 2975 // the modified list to avoid thrashing. The second scan, however, 2976 // will not hold back. 2977 if (despairLevel > 1) 2978 snooze(kBusyScanWaitInterval); 2979 } 2980 } 2981 2982 return B_OK; 2983 } 2984 2985 2986 /*! Returns how many pages could *not* be reserved. 2987 */ 2988 static uint32 2989 reserve_pages(uint32 count, int priority, bool dontWait) 2990 { 2991 int32 dontTouch = kPageReserveForPriority[priority]; 2992 2993 while (true) { 2994 count -= reserve_some_pages(count, dontTouch); 2995 if (count == 0) 2996 return 0; 2997 2998 if (sUnsatisfiedPageReservations == 0) { 2999 count -= free_cached_pages(count, dontWait); 3000 if (count == 0) 3001 return count; 3002 } 3003 3004 if (dontWait) 3005 return count; 3006 3007 // we need to wait for pages to become available 3008 3009 MutexLocker pageDeficitLocker(sPageDeficitLock); 3010 3011 bool notifyDaemon = sUnsatisfiedPageReservations == 0; 3012 sUnsatisfiedPageReservations += count; 3013 3014 if (atomic_get(&sUnreservedFreePages) > dontTouch) { 3015 // the situation changed 3016 sUnsatisfiedPageReservations -= count; 3017 continue; 3018 } 3019 3020 PageReservationWaiter waiter; 3021 waiter.dontTouch = dontTouch; 3022 waiter.missing = count; 3023 waiter.thread = thread_get_current_thread(); 3024 waiter.threadPriority = waiter.thread->priority; 3025 3026 // insert ordered (i.e. after all waiters with higher or equal priority) 3027 PageReservationWaiter* otherWaiter = NULL; 3028 for (PageReservationWaiterList::Iterator it 3029 = sPageReservationWaiters.GetIterator(); 3030 (otherWaiter = it.Next()) != NULL;) { 3031 if (waiter < *otherWaiter) 3032 break; 3033 } 3034 3035 sPageReservationWaiters.InsertBefore(otherWaiter, &waiter); 3036 3037 thread_prepare_to_block(waiter.thread, 0, THREAD_BLOCK_TYPE_OTHER, 3038 "waiting for pages"); 3039 3040 if (notifyDaemon) 3041 sPageDaemonCondition.WakeUp(); 3042 3043 pageDeficitLocker.Unlock(); 3044 3045 low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0); 3046 thread_block(); 3047 3048 pageDeficitLocker.Lock(); 3049 3050 return 0; 3051 } 3052 } 3053 3054 3055 // #pragma mark - private kernel API 3056 3057 3058 /*! Writes a range of modified pages of a cache to disk. 3059 You need to hold the VMCache lock when calling this function. 3060 Note that the cache lock is released in this function. 3061 \param cache The cache. 3062 \param firstPage Offset (in page size units) of the first page in the range. 3063 \param endPage End offset (in page size units) of the page range. The page 3064 at this offset is not included. 3065 */ 3066 status_t 3067 vm_page_write_modified_page_range(struct VMCache* cache, uint32 firstPage, 3068 uint32 endPage) 3069 { 3070 static const int32 kMaxPages = 256; 3071 int32 maxPages = cache->MaxPagesPerWrite(); 3072 if (maxPages < 0 || maxPages > kMaxPages) 3073 maxPages = kMaxPages; 3074 3075 const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 3076 | HEAP_DONT_LOCK_KERNEL_SPACE; 3077 3078 PageWriteWrapper stackWrappersPool[2]; 3079 PageWriteWrapper* stackWrappers[1]; 3080 PageWriteWrapper* wrapperPool 3081 = new(malloc_flags(allocationFlags)) PageWriteWrapper[maxPages + 1]; 3082 PageWriteWrapper** wrappers 3083 = new(malloc_flags(allocationFlags)) PageWriteWrapper*[maxPages]; 3084 if (wrapperPool == NULL || wrappers == NULL) { 3085 // don't fail, just limit our capabilities 3086 delete[] wrapperPool; 3087 delete[] wrappers; 3088 wrapperPool = stackWrappersPool; 3089 wrappers = stackWrappers; 3090 maxPages = 1; 3091 } 3092 3093 int32 nextWrapper = 0; 3094 int32 usedWrappers = 0; 3095 3096 PageWriteTransfer transfer; 3097 bool transferEmpty = true; 3098 3099 VMCachePagesTree::Iterator it 3100 = cache->pages.GetIterator(firstPage, true, true); 3101 3102 while (true) { 3103 vm_page* page = it.Next(); 3104 if (page == NULL || page->cache_offset >= endPage) { 3105 if (transferEmpty) 3106 break; 3107 3108 page = NULL; 3109 } 3110 3111 if (page != NULL) { 3112 if (page->busy 3113 || (page->State() != PAGE_STATE_MODIFIED 3114 && !vm_test_map_modification(page))) { 3115 page = NULL; 3116 } 3117 } 3118 3119 PageWriteWrapper* wrapper = NULL; 3120 if (page != NULL) { 3121 wrapper = &wrapperPool[nextWrapper++]; 3122 if (nextWrapper > maxPages) 3123 nextWrapper = 0; 3124 3125 DEBUG_PAGE_ACCESS_START(page); 3126 3127 wrapper->SetTo(page); 3128 3129 if (transferEmpty || transfer.AddPage(page)) { 3130 if (transferEmpty) { 3131 transfer.SetTo(NULL, page, maxPages); 3132 transferEmpty = false; 3133 } 3134 3135 DEBUG_PAGE_ACCESS_END(page); 3136 3137 wrappers[usedWrappers++] = wrapper; 3138 continue; 3139 } 3140 3141 DEBUG_PAGE_ACCESS_END(page); 3142 } 3143 3144 if (transferEmpty) 3145 continue; 3146 3147 cache->Unlock(); 3148 status_t status = transfer.Schedule(0); 3149 cache->Lock(); 3150 3151 for (int32 i = 0; i < usedWrappers; i++) 3152 wrappers[i]->Done(status); 3153 3154 usedWrappers = 0; 3155 3156 if (page != NULL) { 3157 transfer.SetTo(NULL, page, maxPages); 3158 wrappers[usedWrappers++] = wrapper; 3159 } else 3160 transferEmpty = true; 3161 } 3162 3163 if (wrapperPool != stackWrappersPool) { 3164 delete[] wrapperPool; 3165 delete[] wrappers; 3166 } 3167 3168 return B_OK; 3169 } 3170 3171 3172 /*! You need to hold the VMCache lock when calling this function. 3173 Note that the cache lock is released in this function. 3174 */ 3175 status_t 3176 vm_page_write_modified_pages(VMCache *cache) 3177 { 3178 return vm_page_write_modified_page_range(cache, 0, 3179 (cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 3180 } 3181 3182 3183 /*! Schedules the page writer to write back the specified \a page. 3184 Note, however, that it might not do this immediately, and it can well 3185 take several seconds until the page is actually written out. 3186 */ 3187 void 3188 vm_page_schedule_write_page(vm_page *page) 3189 { 3190 PAGE_ASSERT(page, page->State() == PAGE_STATE_MODIFIED); 3191 3192 vm_page_requeue(page, false); 3193 3194 sPageWriterCondition.WakeUp(); 3195 } 3196 3197 3198 /*! Cache must be locked. 3199 */ 3200 void 3201 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage, 3202 uint32 endPage) 3203 { 3204 uint32 modified = 0; 3205 for (VMCachePagesTree::Iterator it 3206 = cache->pages.GetIterator(firstPage, true, true); 3207 vm_page *page = it.Next();) { 3208 if (page->cache_offset >= endPage) 3209 break; 3210 3211 if (!page->busy && page->State() == PAGE_STATE_MODIFIED) { 3212 DEBUG_PAGE_ACCESS_START(page); 3213 vm_page_requeue(page, false); 3214 modified++; 3215 DEBUG_PAGE_ACCESS_END(page); 3216 } 3217 } 3218 3219 if (modified > 0) 3220 sPageWriterCondition.WakeUp(); 3221 } 3222 3223 3224 void 3225 vm_page_init_num_pages(kernel_args *args) 3226 { 3227 // calculate the size of memory by looking at the physical_memory_range array 3228 sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE; 3229 page_num_t physicalPagesEnd = sPhysicalPageOffset 3230 + args->physical_memory_range[0].size / B_PAGE_SIZE; 3231 3232 sNonExistingPages = 0; 3233 sIgnoredPages = args->ignored_physical_memory / B_PAGE_SIZE; 3234 3235 for (uint32 i = 1; i < args->num_physical_memory_ranges; i++) { 3236 page_num_t start = args->physical_memory_range[i].start / B_PAGE_SIZE; 3237 if (start > physicalPagesEnd) 3238 sNonExistingPages += start - physicalPagesEnd; 3239 physicalPagesEnd = start 3240 + args->physical_memory_range[i].size / B_PAGE_SIZE; 3241 3242 #ifdef LIMIT_AVAILABLE_MEMORY 3243 page_num_t available 3244 = physicalPagesEnd - sPhysicalPageOffset - sNonExistingPages; 3245 if (available > LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE)) { 3246 physicalPagesEnd = sPhysicalPageOffset + sNonExistingPages 3247 + LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE); 3248 break; 3249 } 3250 #endif 3251 } 3252 3253 TRACE(("first phys page = %#" B_PRIxPHYSADDR ", end %#" B_PRIxPHYSADDR "\n", 3254 sPhysicalPageOffset, physicalPagesEnd)); 3255 3256 sNumPages = physicalPagesEnd - sPhysicalPageOffset; 3257 } 3258 3259 3260 status_t 3261 vm_page_init(kernel_args *args) 3262 { 3263 TRACE(("vm_page_init: entry\n")); 3264 3265 // init page queues 3266 sModifiedPageQueue.Init("modified pages queue"); 3267 sInactivePageQueue.Init("inactive pages queue"); 3268 sActivePageQueue.Init("active pages queue"); 3269 sCachedPageQueue.Init("cached pages queue"); 3270 sFreePageQueue.Init("free pages queue"); 3271 sClearPageQueue.Init("clear pages queue"); 3272 3273 new (&sPageReservationWaiters) PageReservationWaiterList; 3274 3275 // map in the new free page table 3276 sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page), 3277 ~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 3278 3279 TRACE(("vm_init: putting free_page_table @ %p, # ents %" B_PRIuPHYSADDR 3280 " (size %#" B_PRIxPHYSADDR ")\n", sPages, sNumPages, 3281 (phys_addr_t)(sNumPages * sizeof(vm_page)))); 3282 3283 // initialize the free page table 3284 for (uint32 i = 0; i < sNumPages; i++) { 3285 sPages[i].Init(sPhysicalPageOffset + i); 3286 sFreePageQueue.Append(&sPages[i]); 3287 3288 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3289 sPages[i].allocation_tracking_info.Clear(); 3290 #endif 3291 } 3292 3293 sUnreservedFreePages = sNumPages; 3294 3295 TRACE(("initialized table\n")); 3296 3297 // mark the ranges between usable physical memory unused 3298 phys_addr_t previousEnd = 0; 3299 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 3300 phys_addr_t base = args->physical_memory_range[i].start; 3301 phys_size_t size = args->physical_memory_range[i].size; 3302 if (base > previousEnd) { 3303 mark_page_range_in_use(previousEnd / B_PAGE_SIZE, 3304 (base - previousEnd) / B_PAGE_SIZE, false); 3305 } 3306 previousEnd = base + size; 3307 } 3308 3309 // mark the allocated physical page ranges wired 3310 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 3311 mark_page_range_in_use( 3312 args->physical_allocated_range[i].start / B_PAGE_SIZE, 3313 args->physical_allocated_range[i].size / B_PAGE_SIZE, true); 3314 } 3315 3316 // The target of actually free pages. This must be at least the system 3317 // reserve, but should be a few more pages, so we don't have to extract 3318 // a cached page with each allocation. 3319 sFreePagesTarget = VM_PAGE_RESERVE_USER 3320 + std::max((page_num_t)32, (sNumPages - sNonExistingPages) / 1024); 3321 3322 // The target of free + cached and inactive pages. On low-memory machines 3323 // keep things tight. free + cached is the pool of immediately allocatable 3324 // pages. We want a few inactive pages, so when we're actually paging, we 3325 // have a reasonably large set of pages to work with. 3326 if (sUnreservedFreePages < 16 * 1024) { 3327 sFreeOrCachedPagesTarget = sFreePagesTarget + 128; 3328 sInactivePagesTarget = sFreePagesTarget / 3; 3329 } else { 3330 sFreeOrCachedPagesTarget = 2 * sFreePagesTarget; 3331 sInactivePagesTarget = sFreePagesTarget / 2; 3332 } 3333 3334 TRACE(("vm_page_init: exit\n")); 3335 3336 return B_OK; 3337 } 3338 3339 3340 status_t 3341 vm_page_init_post_area(kernel_args *args) 3342 { 3343 void *dummy; 3344 3345 dummy = sPages; 3346 create_area("page structures", &dummy, B_EXACT_ADDRESS, 3347 PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED, 3348 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3349 3350 add_debugger_command("page_stats", &dump_page_stats, 3351 "Dump statistics about page usage"); 3352 add_debugger_command_etc("page", &dump_page, 3353 "Dump page info", 3354 "[ \"-p\" | \"-v\" ] [ \"-m\" ] <address>\n" 3355 "Prints information for the physical page. If neither \"-p\" nor\n" 3356 "\"-v\" are given, the provided address is interpreted as address of\n" 3357 "the vm_page data structure for the page in question. If \"-p\" is\n" 3358 "given, the address is the physical address of the page. If \"-v\" is\n" 3359 "given, the address is interpreted as virtual address in the current\n" 3360 "thread's address space and for the page it is mapped to (if any)\n" 3361 "information are printed. If \"-m\" is specified, the command will\n" 3362 "search all known address spaces for mappings to that page and print\n" 3363 "them.\n", 0); 3364 add_debugger_command("page_queue", &dump_page_queue, "Dump page queue"); 3365 add_debugger_command("find_page", &find_page, 3366 "Find out which queue a page is actually in"); 3367 3368 #ifdef TRACK_PAGE_USAGE_STATS 3369 add_debugger_command_etc("page_usage", &dump_page_usage_stats, 3370 "Dumps statistics about page usage counts", 3371 "\n" 3372 "Dumps statistics about page usage counts.\n", 3373 B_KDEBUG_DONT_PARSE_ARGUMENTS); 3374 #endif 3375 3376 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3377 add_debugger_command_etc("page_allocations_per_caller", 3378 &dump_page_allocations_per_caller, 3379 "Dump current page allocations summed up per caller", 3380 "[ -d <caller> ] [ -r ]\n" 3381 "The current allocations will by summed up by caller (their count)\n" 3382 "printed in decreasing order by count.\n" 3383 "If \"-d\" is given, each allocation for caller <caller> is printed\n" 3384 "including the respective stack trace.\n" 3385 "If \"-r\" is given, the allocation infos are reset after gathering\n" 3386 "the information, so the next command invocation will only show the\n" 3387 "allocations made after the reset.\n", 0); 3388 add_debugger_command_etc("page_allocation_infos", 3389 &dump_page_allocation_infos, 3390 "Dump current page allocations", 3391 "[ --stacktrace ] [ -p <page number> ] [ --team <team ID> ] " 3392 "[ --thread <thread ID> ]\n" 3393 "The current allocations filtered by optional values will be printed.\n" 3394 "The optional \"-p\" page number filters for a specific page,\n" 3395 "with \"--team\" and \"--thread\" allocations by specific teams\n" 3396 "and/or threads can be filtered (these only work if a corresponding\n" 3397 "tracing entry is still available).\n" 3398 "If \"--stacktrace\" is given, then stack traces of the allocation\n" 3399 "callers are printed, where available\n", 0); 3400 #endif 3401 3402 return B_OK; 3403 } 3404 3405 3406 status_t 3407 vm_page_init_post_thread(kernel_args *args) 3408 { 3409 new (&sFreePageCondition) ConditionVariable; 3410 3411 // create a kernel thread to clear out pages 3412 3413 thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber", 3414 B_LOWEST_ACTIVE_PRIORITY, NULL); 3415 resume_thread(thread); 3416 3417 // start page writer 3418 3419 sPageWriterCondition.Init("page writer"); 3420 3421 thread = spawn_kernel_thread(&page_writer, "page writer", 3422 B_NORMAL_PRIORITY + 1, NULL); 3423 resume_thread(thread); 3424 3425 // start page daemon 3426 3427 sPageDaemonCondition.Init("page daemon"); 3428 3429 thread = spawn_kernel_thread(&page_daemon, "page daemon", 3430 B_NORMAL_PRIORITY, NULL); 3431 resume_thread(thread); 3432 3433 return B_OK; 3434 } 3435 3436 3437 status_t 3438 vm_mark_page_inuse(page_num_t page) 3439 { 3440 return vm_mark_page_range_inuse(page, 1); 3441 } 3442 3443 3444 status_t 3445 vm_mark_page_range_inuse(page_num_t startPage, page_num_t length) 3446 { 3447 return mark_page_range_in_use(startPage, length, false); 3448 } 3449 3450 3451 /*! Unreserve pages previously reserved with vm_page_reserve_pages(). 3452 */ 3453 void 3454 vm_page_unreserve_pages(vm_page_reservation* reservation) 3455 { 3456 uint32 count = reservation->count; 3457 reservation->count = 0; 3458 3459 if (count == 0) 3460 return; 3461 3462 TA(UnreservePages(count)); 3463 3464 unreserve_pages(count); 3465 } 3466 3467 3468 /*! With this call, you can reserve a number of free pages in the system. 3469 They will only be handed out to someone who has actually reserved them. 3470 This call returns as soon as the number of requested pages has been 3471 reached. 3472 The caller must not hold any cache lock or the function might deadlock. 3473 */ 3474 void 3475 vm_page_reserve_pages(vm_page_reservation* reservation, uint32 count, 3476 int priority) 3477 { 3478 reservation->count = count; 3479 3480 if (count == 0) 3481 return; 3482 3483 TA(ReservePages(count)); 3484 3485 reserve_pages(count, priority, false); 3486 } 3487 3488 3489 bool 3490 vm_page_try_reserve_pages(vm_page_reservation* reservation, uint32 count, 3491 int priority) 3492 { 3493 if (count == 0) { 3494 reservation->count = count; 3495 return true; 3496 } 3497 3498 uint32 remaining = reserve_pages(count, priority, true); 3499 if (remaining == 0) { 3500 TA(ReservePages(count)); 3501 reservation->count = count; 3502 return true; 3503 } 3504 3505 unreserve_pages(count - remaining); 3506 3507 return false; 3508 } 3509 3510 3511 vm_page * 3512 vm_page_allocate_page(vm_page_reservation* reservation, uint32 flags) 3513 { 3514 uint32 pageState = flags & VM_PAGE_ALLOC_STATE; 3515 ASSERT(pageState != PAGE_STATE_FREE); 3516 ASSERT(pageState != PAGE_STATE_CLEAR); 3517 3518 ASSERT(reservation->count > 0); 3519 reservation->count--; 3520 3521 VMPageQueue* queue; 3522 VMPageQueue* otherQueue; 3523 3524 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) { 3525 queue = &sClearPageQueue; 3526 otherQueue = &sFreePageQueue; 3527 } else { 3528 queue = &sFreePageQueue; 3529 otherQueue = &sClearPageQueue; 3530 } 3531 3532 ReadLocker locker(sFreePageQueuesLock); 3533 3534 vm_page* page = queue->RemoveHeadUnlocked(); 3535 if (page == NULL) { 3536 // if the primary queue was empty, grab the page from the 3537 // secondary queue 3538 page = otherQueue->RemoveHeadUnlocked(); 3539 3540 if (page == NULL) { 3541 // Unlikely, but possible: the page we have reserved has moved 3542 // between the queues after we checked the first queue. Grab the 3543 // write locker to make sure this doesn't happen again. 3544 locker.Unlock(); 3545 WriteLocker writeLocker(sFreePageQueuesLock); 3546 3547 page = queue->RemoveHead(); 3548 if (page == NULL) 3549 otherQueue->RemoveHead(); 3550 3551 if (page == NULL) { 3552 panic("Had reserved page, but there is none!"); 3553 return NULL; 3554 } 3555 3556 // downgrade to read lock 3557 locker.Lock(); 3558 } 3559 } 3560 3561 if (page->CacheRef() != NULL) 3562 panic("supposed to be free page %p has cache\n", page); 3563 3564 DEBUG_PAGE_ACCESS_START(page); 3565 3566 int oldPageState = page->State(); 3567 page->SetState(pageState); 3568 page->busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3569 page->usage_count = 0; 3570 page->accessed = false; 3571 page->modified = false; 3572 3573 locker.Unlock(); 3574 3575 if (pageState < PAGE_STATE_FIRST_UNQUEUED) 3576 sPageQueues[pageState].AppendUnlocked(page); 3577 3578 // clear the page, if we had to take it from the free queue and a clear 3579 // page was requested 3580 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0 && oldPageState != PAGE_STATE_CLEAR) 3581 clear_page(page); 3582 3583 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3584 page->allocation_tracking_info.Init( 3585 TA(AllocatePage(page->physical_page_number))); 3586 #else 3587 TA(AllocatePage(page->physical_page_number)); 3588 #endif 3589 3590 return page; 3591 } 3592 3593 3594 static void 3595 allocate_page_run_cleanup(VMPageQueue::PageList& freePages, 3596 VMPageQueue::PageList& clearPages) 3597 { 3598 while (vm_page* page = freePages.RemoveHead()) { 3599 page->busy = false; 3600 page->SetState(PAGE_STATE_FREE); 3601 DEBUG_PAGE_ACCESS_END(page); 3602 sFreePageQueue.PrependUnlocked(page); 3603 } 3604 3605 while (vm_page* page = clearPages.RemoveHead()) { 3606 page->busy = false; 3607 page->SetState(PAGE_STATE_CLEAR); 3608 DEBUG_PAGE_ACCESS_END(page); 3609 sClearPageQueue.PrependUnlocked(page); 3610 } 3611 3612 sFreePageCondition.NotifyAll(); 3613 } 3614 3615 3616 /*! Tries to allocate the a contiguous run of \a length pages starting at 3617 index \a start. 3618 3619 The caller must have write-locked the free/clear page queues. The function 3620 will unlock regardless of whether it succeeds or fails. 3621 3622 If the function fails, it cleans up after itself, i.e. it will free all 3623 pages it managed to allocate. 3624 3625 \param start The start index (into \c sPages) of the run. 3626 \param length The number of pages to allocate. 3627 \param flags Page allocation flags. Encodes the state the function shall 3628 set the allocated pages to, whether the pages shall be marked busy 3629 (VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared 3630 (VM_PAGE_ALLOC_CLEAR). 3631 \param freeClearQueueLocker Locked WriteLocker for the free/clear page 3632 queues in locked state. Will be unlocked by the function. 3633 \return The index of the first page that could not be allocated. \a length 3634 is returned when the function was successful. 3635 */ 3636 static page_num_t 3637 allocate_page_run(page_num_t start, page_num_t length, uint32 flags, 3638 WriteLocker& freeClearQueueLocker) 3639 { 3640 uint32 pageState = flags & VM_PAGE_ALLOC_STATE; 3641 ASSERT(pageState != PAGE_STATE_FREE); 3642 ASSERT(pageState != PAGE_STATE_CLEAR); 3643 ASSERT(start + length <= sNumPages); 3644 3645 // Pull the free/clear pages out of their respective queues. Cached pages 3646 // are allocated later. 3647 page_num_t cachedPages = 0; 3648 VMPageQueue::PageList freePages; 3649 VMPageQueue::PageList clearPages; 3650 page_num_t i = 0; 3651 for (; i < length; i++) { 3652 bool pageAllocated = true; 3653 bool noPage = false; 3654 vm_page& page = sPages[start + i]; 3655 switch (page.State()) { 3656 case PAGE_STATE_CLEAR: 3657 DEBUG_PAGE_ACCESS_START(&page); 3658 sClearPageQueue.Remove(&page); 3659 clearPages.Add(&page); 3660 break; 3661 case PAGE_STATE_FREE: 3662 DEBUG_PAGE_ACCESS_START(&page); 3663 sFreePageQueue.Remove(&page); 3664 freePages.Add(&page); 3665 break; 3666 case PAGE_STATE_CACHED: 3667 // We allocate cached pages later. 3668 cachedPages++; 3669 pageAllocated = false; 3670 break; 3671 3672 default: 3673 // Probably a page was cached when our caller checked. Now it's 3674 // gone and we have to abort. 3675 noPage = true; 3676 break; 3677 } 3678 3679 if (noPage) 3680 break; 3681 3682 if (pageAllocated) { 3683 page.SetState(flags & VM_PAGE_ALLOC_STATE); 3684 page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3685 page.usage_count = 0; 3686 page.accessed = false; 3687 page.modified = false; 3688 } 3689 } 3690 3691 if (i < length) { 3692 // failed to allocate a page -- free all that we've got 3693 allocate_page_run_cleanup(freePages, clearPages); 3694 return i; 3695 } 3696 3697 freeClearQueueLocker.Unlock(); 3698 3699 if (cachedPages > 0) { 3700 // allocate the pages that weren't free but cached 3701 page_num_t freedCachedPages = 0; 3702 page_num_t nextIndex = start; 3703 vm_page* freePage = freePages.Head(); 3704 vm_page* clearPage = clearPages.Head(); 3705 while (cachedPages > 0) { 3706 // skip, if we've already got the page 3707 if (freePage != NULL && size_t(freePage - sPages) == nextIndex) { 3708 freePage = freePages.GetNext(freePage); 3709 nextIndex++; 3710 continue; 3711 } 3712 if (clearPage != NULL && size_t(clearPage - sPages) == nextIndex) { 3713 clearPage = clearPages.GetNext(clearPage); 3714 nextIndex++; 3715 continue; 3716 } 3717 3718 // free the page, if it is still cached 3719 vm_page& page = sPages[nextIndex]; 3720 if (!free_cached_page(&page, false)) { 3721 // TODO: if the page turns out to have been freed already, 3722 // there would be no need to fail 3723 break; 3724 } 3725 3726 page.SetState(flags & VM_PAGE_ALLOC_STATE); 3727 page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3728 page.usage_count = 0; 3729 page.accessed = false; 3730 page.modified = false; 3731 3732 freePages.InsertBefore(freePage, &page); 3733 freedCachedPages++; 3734 cachedPages--; 3735 nextIndex++; 3736 } 3737 3738 // If we have freed cached pages, we need to balance things. 3739 if (freedCachedPages > 0) 3740 unreserve_pages(freedCachedPages); 3741 3742 if (nextIndex - start < length) { 3743 // failed to allocate all cached pages -- free all that we've got 3744 freeClearQueueLocker.Lock(); 3745 allocate_page_run_cleanup(freePages, clearPages); 3746 freeClearQueueLocker.Unlock(); 3747 3748 return nextIndex - start; 3749 } 3750 } 3751 3752 // clear pages, if requested 3753 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) { 3754 for (VMPageQueue::PageList::Iterator it = freePages.GetIterator(); 3755 vm_page* page = it.Next();) { 3756 clear_page(page); 3757 } 3758 } 3759 3760 // add pages to target queue 3761 if (pageState < PAGE_STATE_FIRST_UNQUEUED) { 3762 freePages.MoveFrom(&clearPages); 3763 sPageQueues[pageState].AppendUnlocked(freePages, length); 3764 } 3765 3766 // Note: We don't unreserve the pages since we pulled them out of the 3767 // free/clear queues without adjusting sUnreservedFreePages. 3768 3769 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3770 AbstractTraceEntryWithStackTrace* traceEntry 3771 = TA(AllocatePageRun(start, length)); 3772 3773 for (page_num_t i = start; i < start + length; i++) 3774 sPages[i].allocation_tracking_info.Init(traceEntry); 3775 #else 3776 TA(AllocatePageRun(start, length)); 3777 #endif 3778 3779 return length; 3780 } 3781 3782 3783 /*! Allocate a physically contiguous range of pages. 3784 3785 \param flags Page allocation flags. Encodes the state the function shall 3786 set the allocated pages to, whether the pages shall be marked busy 3787 (VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared 3788 (VM_PAGE_ALLOC_CLEAR). 3789 \param length The number of contiguous pages to allocate. 3790 \param restrictions Restrictions to the physical addresses of the page run 3791 to allocate, including \c low_address, the first acceptable physical 3792 address where the page run may start, \c high_address, the last 3793 acceptable physical address where the page run may end (i.e. it must 3794 hold \code runStartAddress + length <= high_address \endcode), 3795 \c alignment, the alignment of the page run start address, and 3796 \c boundary, multiples of which the page run must not cross. 3797 Values set to \c 0 are ignored. 3798 \param priority The page reservation priority (as passed to 3799 vm_page_reserve_pages()). 3800 \return The first page of the allocated page run on success; \c NULL 3801 when the allocation failed. 3802 */ 3803 vm_page* 3804 vm_page_allocate_page_run(uint32 flags, page_num_t length, 3805 const physical_address_restrictions* restrictions, int priority) 3806 { 3807 // compute start and end page index 3808 page_num_t requestedStart 3809 = std::max(restrictions->low_address / B_PAGE_SIZE, sPhysicalPageOffset) 3810 - sPhysicalPageOffset; 3811 page_num_t start = requestedStart; 3812 page_num_t end; 3813 if (restrictions->high_address > 0) { 3814 end = std::max(restrictions->high_address / B_PAGE_SIZE, 3815 sPhysicalPageOffset) 3816 - sPhysicalPageOffset; 3817 end = std::min(end, sNumPages); 3818 } else 3819 end = sNumPages; 3820 3821 // compute alignment mask 3822 page_num_t alignmentMask 3823 = std::max(restrictions->alignment / B_PAGE_SIZE, (phys_addr_t)1) - 1; 3824 ASSERT(((alignmentMask + 1) & alignmentMask) == 0); 3825 // alignment must be a power of 2 3826 3827 // compute the boundary mask 3828 uint32 boundaryMask = 0; 3829 if (restrictions->boundary != 0) { 3830 page_num_t boundary = restrictions->boundary / B_PAGE_SIZE; 3831 // boundary must be a power of two and not less than alignment and 3832 // length 3833 ASSERT(((boundary - 1) & boundary) == 0); 3834 ASSERT(boundary >= alignmentMask + 1); 3835 ASSERT(boundary >= length); 3836 3837 boundaryMask = -boundary; 3838 } 3839 3840 vm_page_reservation reservation; 3841 vm_page_reserve_pages(&reservation, length, priority); 3842 3843 WriteLocker freeClearQueueLocker(sFreePageQueuesLock); 3844 3845 // First we try to get a run with free pages only. If that fails, we also 3846 // consider cached pages. If there are only few free pages and many cached 3847 // ones, the odds are that we won't find enough contiguous ones, so we skip 3848 // the first iteration in this case. 3849 int32 freePages = sUnreservedFreePages; 3850 int useCached = freePages > 0 && (page_num_t)freePages > 2 * length ? 0 : 1; 3851 3852 for (;;) { 3853 if (alignmentMask != 0 || boundaryMask != 0) { 3854 page_num_t offsetStart = start + sPhysicalPageOffset; 3855 3856 // enforce alignment 3857 if ((offsetStart & alignmentMask) != 0) 3858 offsetStart = (offsetStart + alignmentMask) & ~alignmentMask; 3859 3860 // enforce boundary 3861 if (boundaryMask != 0 && ((offsetStart ^ (offsetStart 3862 + length - 1)) & boundaryMask) != 0) { 3863 offsetStart = (offsetStart + length - 1) & boundaryMask; 3864 } 3865 3866 start = offsetStart - sPhysicalPageOffset; 3867 } 3868 3869 if (start + length > end) { 3870 if (useCached == 0) { 3871 // The first iteration with free pages only was unsuccessful. 3872 // Try again also considering cached pages. 3873 useCached = 1; 3874 start = requestedStart; 3875 continue; 3876 } 3877 3878 dprintf("vm_page_allocate_page_run(): Failed to allocate run of " 3879 "length %" B_PRIuPHYSADDR " (%" B_PRIuPHYSADDR " %" 3880 B_PRIuPHYSADDR ") in second iteration (align: %" B_PRIuPHYSADDR 3881 " boundary: %" B_PRIuPHYSADDR ")!\n", length, requestedStart, 3882 end, restrictions->alignment, restrictions->boundary); 3883 3884 freeClearQueueLocker.Unlock(); 3885 vm_page_unreserve_pages(&reservation); 3886 return NULL; 3887 } 3888 3889 bool foundRun = true; 3890 page_num_t i; 3891 for (i = 0; i < length; i++) { 3892 uint32 pageState = sPages[start + i].State(); 3893 if (pageState != PAGE_STATE_FREE 3894 && pageState != PAGE_STATE_CLEAR 3895 && (pageState != PAGE_STATE_CACHED || useCached == 0)) { 3896 foundRun = false; 3897 break; 3898 } 3899 } 3900 3901 if (foundRun) { 3902 i = allocate_page_run(start, length, flags, freeClearQueueLocker); 3903 if (i == length) 3904 return &sPages[start]; 3905 3906 // apparently a cached page couldn't be allocated -- skip it and 3907 // continue 3908 freeClearQueueLocker.Lock(); 3909 } 3910 3911 start += i + 1; 3912 } 3913 } 3914 3915 3916 vm_page * 3917 vm_page_at_index(int32 index) 3918 { 3919 return &sPages[index]; 3920 } 3921 3922 3923 vm_page * 3924 vm_lookup_page(page_num_t pageNumber) 3925 { 3926 if (pageNumber < sPhysicalPageOffset) 3927 return NULL; 3928 3929 pageNumber -= sPhysicalPageOffset; 3930 if (pageNumber >= sNumPages) 3931 return NULL; 3932 3933 return &sPages[pageNumber]; 3934 } 3935 3936 3937 bool 3938 vm_page_is_dummy(struct vm_page *page) 3939 { 3940 return page < sPages || page >= sPages + sNumPages; 3941 } 3942 3943 3944 /*! Free the page that belonged to a certain cache. 3945 You can use vm_page_set_state() manually if you prefer, but only 3946 if the page does not equal PAGE_STATE_MODIFIED. 3947 3948 \param cache The cache the page was previously owned by or NULL. The page 3949 must have been removed from its cache before calling this method in 3950 either case. 3951 \param page The page to free. 3952 \param reservation If not NULL, the page count of the reservation will be 3953 incremented, thus allowing to allocate another page for the freed one at 3954 a later time. 3955 */ 3956 void 3957 vm_page_free_etc(VMCache* cache, vm_page* page, 3958 vm_page_reservation* reservation) 3959 { 3960 PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE 3961 && page->State() != PAGE_STATE_CLEAR); 3962 3963 if (page->State() == PAGE_STATE_MODIFIED && cache->temporary) 3964 atomic_add(&sModifiedTemporaryPages, -1); 3965 3966 free_page(page, false); 3967 if (reservation == NULL) 3968 unreserve_pages(1); 3969 } 3970 3971 3972 void 3973 vm_page_set_state(vm_page *page, int pageState) 3974 { 3975 PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE 3976 && page->State() != PAGE_STATE_CLEAR); 3977 3978 if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) { 3979 free_page(page, pageState == PAGE_STATE_CLEAR); 3980 unreserve_pages(1); 3981 } else 3982 set_page_state(page, pageState); 3983 } 3984 3985 3986 /*! Moves a page to either the tail of the head of its current queue, 3987 depending on \a tail. 3988 The page must have a cache and the cache must be locked! 3989 */ 3990 void 3991 vm_page_requeue(struct vm_page *page, bool tail) 3992 { 3993 PAGE_ASSERT(page, page->Cache() != NULL); 3994 page->Cache()->AssertLocked(); 3995 // DEBUG_PAGE_ACCESS_CHECK(page); 3996 // TODO: This assertion cannot be satisfied by idle_scan_active_pages() 3997 // when it requeues busy pages. The reason is that vm_soft_fault() 3998 // (respectively fault_get_page()) and the file cache keep newly 3999 // allocated pages accessed while they are reading them from disk. It 4000 // would probably be better to change that code and reenable this 4001 // check. 4002 4003 VMPageQueue *queue = NULL; 4004 4005 switch (page->State()) { 4006 case PAGE_STATE_ACTIVE: 4007 queue = &sActivePageQueue; 4008 break; 4009 case PAGE_STATE_INACTIVE: 4010 queue = &sInactivePageQueue; 4011 break; 4012 case PAGE_STATE_MODIFIED: 4013 queue = &sModifiedPageQueue; 4014 break; 4015 case PAGE_STATE_CACHED: 4016 queue = &sCachedPageQueue; 4017 break; 4018 case PAGE_STATE_FREE: 4019 case PAGE_STATE_CLEAR: 4020 panic("vm_page_requeue() called for free/clear page %p", page); 4021 return; 4022 case PAGE_STATE_WIRED: 4023 case PAGE_STATE_UNUSED: 4024 return; 4025 default: 4026 panic("vm_page_touch: vm_page %p in invalid state %d\n", 4027 page, page->State()); 4028 break; 4029 } 4030 4031 queue->RequeueUnlocked(page, tail); 4032 } 4033 4034 4035 page_num_t 4036 vm_page_num_pages(void) 4037 { 4038 return sNumPages - sNonExistingPages; 4039 } 4040 4041 4042 /*! There is a subtle distinction between the page counts returned by 4043 this function and vm_page_num_free_pages(): 4044 The latter returns the number of pages that are completely uncommitted, 4045 whereas this one returns the number of pages that are available for 4046 use by being reclaimed as well (IOW it factors in things like cache pages 4047 as available). 4048 */ 4049 page_num_t 4050 vm_page_num_available_pages(void) 4051 { 4052 return vm_available_memory() / B_PAGE_SIZE; 4053 } 4054 4055 4056 page_num_t 4057 vm_page_num_free_pages(void) 4058 { 4059 int32 count = sUnreservedFreePages + sCachedPageQueue.Count(); 4060 return count > 0 ? count : 0; 4061 } 4062 4063 4064 page_num_t 4065 vm_page_num_unused_pages(void) 4066 { 4067 int32 count = sUnreservedFreePages; 4068 return count > 0 ? count : 0; 4069 } 4070 4071 4072 void 4073 vm_page_get_stats(system_info *info) 4074 { 4075 // Note: there's no locking protecting any of the queues or counters here, 4076 // so we run the risk of getting bogus values when evaluating them 4077 // throughout this function. As these stats are for informational purposes 4078 // only, it is not really worth introducing such locking. Therefore we just 4079 // ensure that we don't under- or overflow any of the values. 4080 4081 // The pages used for the block cache buffers. Those should not be counted 4082 // as used but as cached pages. 4083 // TODO: We should subtract the blocks that are in use ATM, since those 4084 // can't really be freed in a low memory situation. 4085 page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE; 4086 info->block_cache_pages = blockCachePages; 4087 4088 // Non-temporary modified pages are special as they represent pages that 4089 // can be written back, so they could be freed if necessary, for us 4090 // basically making them into cached pages with a higher overhead. The 4091 // modified queue count is therefore split into temporary and non-temporary 4092 // counts that are then added to the corresponding number. 4093 page_num_t modifiedNonTemporaryPages 4094 = (sModifiedPageQueue.Count() - sModifiedTemporaryPages); 4095 4096 info->max_pages = vm_page_num_pages(); 4097 info->cached_pages = sCachedPageQueue.Count() + modifiedNonTemporaryPages 4098 + blockCachePages; 4099 4100 // max_pages is composed of: 4101 // active + inactive + unused + wired + modified + cached + free + clear 4102 // So taking out the cached (including modified non-temporary), free and 4103 // clear ones leaves us with all used pages. 4104 uint32 subtractPages = info->cached_pages + sFreePageQueue.Count() 4105 + sClearPageQueue.Count(); 4106 info->used_pages = subtractPages > info->max_pages 4107 ? 0 : info->max_pages - subtractPages; 4108 4109 if (info->used_pages + info->cached_pages > info->max_pages) { 4110 // Something was shuffled around while we were summing up the counts. 4111 // Make the values sane, preferring the worse case of more used pages. 4112 info->cached_pages = info->max_pages - info->used_pages; 4113 } 4114 4115 info->page_faults = vm_num_page_faults(); 4116 info->ignored_pages = sIgnoredPages; 4117 4118 // TODO: We don't consider pages used for page directories/tables yet. 4119 } 4120 4121 4122 /*! Returns the greatest address within the last page of accessible physical 4123 memory. 4124 The value is inclusive, i.e. in case of a 32 bit phys_addr_t 0xffffffff 4125 means the that the last page ends at exactly 4 GB. 4126 */ 4127 phys_addr_t 4128 vm_page_max_address() 4129 { 4130 return ((phys_addr_t)sPhysicalPageOffset + sNumPages) * B_PAGE_SIZE - 1; 4131 } 4132 4133 4134 RANGE_MARKER_FUNCTION_END(vm_page) 4135