1 /* 2 * Copyright 2010-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <string.h> 12 #include <stdlib.h> 13 14 #include <algorithm> 15 16 #include <KernelExport.h> 17 #include <OS.h> 18 19 #include <AutoDeleter.h> 20 21 #include <arch/cpu.h> 22 #include <arch/vm_translation_map.h> 23 #include <block_cache.h> 24 #include <boot/kernel_args.h> 25 #include <condition_variable.h> 26 #include <elf.h> 27 #include <heap.h> 28 #include <kernel.h> 29 #include <low_resource_manager.h> 30 #include <thread.h> 31 #include <tracing.h> 32 #include <util/AutoLock.h> 33 #include <vfs.h> 34 #include <vm/vm.h> 35 #include <vm/vm_priv.h> 36 #include <vm/vm_page.h> 37 #include <vm/VMAddressSpace.h> 38 #include <vm/VMArea.h> 39 #include <vm/VMCache.h> 40 41 #include "IORequest.h" 42 #include "PageCacheLocker.h" 43 #include "VMAnonymousCache.h" 44 #include "VMPageQueue.h" 45 46 47 //#define TRACE_VM_PAGE 48 #ifdef TRACE_VM_PAGE 49 # define TRACE(x) dprintf x 50 #else 51 # define TRACE(x) ; 52 #endif 53 54 //#define TRACE_VM_DAEMONS 55 #ifdef TRACE_VM_DAEMONS 56 #define TRACE_DAEMON(x...) dprintf(x) 57 #else 58 #define TRACE_DAEMON(x...) do {} while (false) 59 #endif 60 61 //#define TRACK_PAGE_USAGE_STATS 1 62 63 #define PAGE_ASSERT(page, condition) \ 64 ASSERT_PRINT((condition), "page: %p", (page)) 65 66 #define SCRUB_SIZE 16 67 // this many pages will be cleared at once in the page scrubber thread 68 69 #define MAX_PAGE_WRITER_IO_PRIORITY B_URGENT_DISPLAY_PRIORITY 70 // maximum I/O priority of the page writer 71 #define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD 10000 72 // the maximum I/O priority shall be reached when this many pages need to 73 // be written 74 75 76 // The page reserve an allocation of the certain priority must not touch. 77 static const size_t kPageReserveForPriority[] = { 78 VM_PAGE_RESERVE_USER, // user 79 VM_PAGE_RESERVE_SYSTEM, // system 80 0 // VIP 81 }; 82 83 // Minimum number of free pages the page daemon will try to achieve. 84 static uint32 sFreePagesTarget; 85 static uint32 sFreeOrCachedPagesTarget; 86 static uint32 sInactivePagesTarget; 87 88 // Wait interval between page daemon runs. 89 static const bigtime_t kIdleScanWaitInterval = 1000000LL; // 1 sec 90 static const bigtime_t kBusyScanWaitInterval = 500000LL; // 0.5 sec 91 92 // Number of idle runs after which we want to have processed the full active 93 // queue. 94 static const uint32 kIdleRunsForFullQueue = 20; 95 96 // Maximum limit for the vm_page::usage_count. 97 static const int32 kPageUsageMax = 64; 98 // vm_page::usage_count buff an accessed page receives in a scan. 99 static const int32 kPageUsageAdvance = 3; 100 // vm_page::usage_count debuff an unaccessed page receives in a scan. 101 static const int32 kPageUsageDecline = 1; 102 103 int32 gMappedPagesCount; 104 105 static VMPageQueue sPageQueues[PAGE_STATE_COUNT]; 106 107 static VMPageQueue& sFreePageQueue = sPageQueues[PAGE_STATE_FREE]; 108 static VMPageQueue& sClearPageQueue = sPageQueues[PAGE_STATE_CLEAR]; 109 static VMPageQueue& sModifiedPageQueue = sPageQueues[PAGE_STATE_MODIFIED]; 110 static VMPageQueue& sInactivePageQueue = sPageQueues[PAGE_STATE_INACTIVE]; 111 static VMPageQueue& sActivePageQueue = sPageQueues[PAGE_STATE_ACTIVE]; 112 static VMPageQueue& sCachedPageQueue = sPageQueues[PAGE_STATE_CACHED]; 113 114 static vm_page *sPages; 115 static page_num_t sPhysicalPageOffset; 116 static page_num_t sNumPages; 117 static page_num_t sNonExistingPages; 118 // pages in the sPages array that aren't backed by physical memory 119 static uint64 sIgnoredPages; 120 // pages of physical memory ignored by the boot loader (and thus not 121 // available here) 122 static vint32 sUnreservedFreePages; 123 static vint32 sUnsatisfiedPageReservations; 124 static vint32 sModifiedTemporaryPages; 125 126 static ConditionVariable sFreePageCondition; 127 static mutex sPageDeficitLock = MUTEX_INITIALIZER("page deficit"); 128 129 // This lock must be used whenever the free or clear page queues are changed. 130 // If you need to work on both queues at the same time, you need to hold a write 131 // lock, otherwise, a read lock suffices (each queue still has a spinlock to 132 // guard against concurrent changes). 133 static rw_lock sFreePageQueuesLock 134 = RW_LOCK_INITIALIZER("free/clear page queues"); 135 136 #ifdef TRACK_PAGE_USAGE_STATS 137 static page_num_t sPageUsageArrays[512]; 138 static page_num_t* sPageUsage = sPageUsageArrays; 139 static page_num_t sPageUsagePageCount; 140 static page_num_t* sNextPageUsage = sPageUsageArrays + 256; 141 static page_num_t sNextPageUsagePageCount; 142 #endif 143 144 145 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 146 147 struct caller_info { 148 addr_t caller; 149 size_t count; 150 }; 151 152 static const int32 kCallerInfoTableSize = 1024; 153 static caller_info sCallerInfoTable[kCallerInfoTableSize]; 154 static int32 sCallerInfoCount = 0; 155 156 static caller_info* get_caller_info(addr_t caller); 157 158 159 RANGE_MARKER_FUNCTION_PROTOTYPES(vm_page) 160 161 static const addr_t kVMPageCodeAddressRange[] = { 162 RANGE_MARKER_FUNCTION_ADDRESS_RANGE(vm_page) 163 }; 164 165 #endif 166 167 168 RANGE_MARKER_FUNCTION_BEGIN(vm_page) 169 170 171 struct page_stats { 172 int32 totalFreePages; 173 int32 unsatisfiedReservations; 174 int32 cachedPages; 175 }; 176 177 178 struct PageReservationWaiter 179 : public DoublyLinkedListLinkImpl<PageReservationWaiter> { 180 Thread* thread; 181 uint32 dontTouch; // reserve not to touch 182 uint32 missing; // pages missing for the reservation 183 int32 threadPriority; 184 185 bool operator<(const PageReservationWaiter& other) const 186 { 187 // Implies an order by descending VM priority (ascending dontTouch) 188 // and (secondarily) descending thread priority. 189 if (dontTouch != other.dontTouch) 190 return dontTouch < other.dontTouch; 191 return threadPriority > other.threadPriority; 192 } 193 }; 194 195 typedef DoublyLinkedList<PageReservationWaiter> PageReservationWaiterList; 196 static PageReservationWaiterList sPageReservationWaiters; 197 198 199 struct DaemonCondition { 200 void Init(const char* name) 201 { 202 mutex_init(&fLock, "daemon condition"); 203 fCondition.Init(this, name); 204 fActivated = false; 205 } 206 207 bool Lock() 208 { 209 return mutex_lock(&fLock) == B_OK; 210 } 211 212 void Unlock() 213 { 214 mutex_unlock(&fLock); 215 } 216 217 bool Wait(bigtime_t timeout, bool clearActivated) 218 { 219 MutexLocker locker(fLock); 220 if (clearActivated) 221 fActivated = false; 222 else if (fActivated) 223 return true; 224 225 ConditionVariableEntry entry; 226 fCondition.Add(&entry); 227 228 locker.Unlock(); 229 230 return entry.Wait(B_RELATIVE_TIMEOUT, timeout) == B_OK; 231 } 232 233 void WakeUp() 234 { 235 if (fActivated) 236 return; 237 238 MutexLocker locker(fLock); 239 fActivated = true; 240 fCondition.NotifyOne(); 241 } 242 243 void ClearActivated() 244 { 245 MutexLocker locker(fLock); 246 fActivated = false; 247 } 248 249 private: 250 mutex fLock; 251 ConditionVariable fCondition; 252 bool fActivated; 253 }; 254 255 256 static DaemonCondition sPageWriterCondition; 257 static DaemonCondition sPageDaemonCondition; 258 259 260 #if PAGE_ALLOCATION_TRACING 261 262 namespace PageAllocationTracing { 263 264 class ReservePages : public AbstractTraceEntry { 265 public: 266 ReservePages(uint32 count) 267 : 268 fCount(count) 269 { 270 Initialized(); 271 } 272 273 virtual void AddDump(TraceOutput& out) 274 { 275 out.Print("page reserve: %" B_PRIu32, fCount); 276 } 277 278 private: 279 uint32 fCount; 280 }; 281 282 283 class UnreservePages : public AbstractTraceEntry { 284 public: 285 UnreservePages(uint32 count) 286 : 287 fCount(count) 288 { 289 Initialized(); 290 } 291 292 virtual void AddDump(TraceOutput& out) 293 { 294 out.Print("page unreserve: %" B_PRId32, fCount); 295 } 296 297 private: 298 uint32 fCount; 299 }; 300 301 302 class AllocatePage 303 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) { 304 public: 305 AllocatePage(page_num_t pageNumber) 306 : 307 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true), 308 fPageNumber(pageNumber) 309 { 310 Initialized(); 311 } 312 313 virtual void AddDump(TraceOutput& out) 314 { 315 out.Print("page alloc: %#" B_PRIxPHYSADDR, fPageNumber); 316 } 317 318 private: 319 page_num_t fPageNumber; 320 }; 321 322 323 class AllocatePageRun 324 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) { 325 public: 326 AllocatePageRun(page_num_t startPage, uint32 length) 327 : 328 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true), 329 fStartPage(startPage), 330 fLength(length) 331 { 332 Initialized(); 333 } 334 335 virtual void AddDump(TraceOutput& out) 336 { 337 out.Print("page alloc run: start %#" B_PRIxPHYSADDR " length: %" 338 B_PRIu32, fStartPage, fLength); 339 } 340 341 private: 342 page_num_t fStartPage; 343 uint32 fLength; 344 }; 345 346 347 class FreePage 348 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) { 349 public: 350 FreePage(page_num_t pageNumber) 351 : 352 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true), 353 fPageNumber(pageNumber) 354 { 355 Initialized(); 356 } 357 358 virtual void AddDump(TraceOutput& out) 359 { 360 out.Print("page free: %#" B_PRIxPHYSADDR, fPageNumber); 361 } 362 363 private: 364 page_num_t fPageNumber; 365 }; 366 367 368 class ScrubbingPages : public AbstractTraceEntry { 369 public: 370 ScrubbingPages(uint32 count) 371 : 372 fCount(count) 373 { 374 Initialized(); 375 } 376 377 virtual void AddDump(TraceOutput& out) 378 { 379 out.Print("page scrubbing: %" B_PRId32, fCount); 380 } 381 382 private: 383 uint32 fCount; 384 }; 385 386 387 class ScrubbedPages : public AbstractTraceEntry { 388 public: 389 ScrubbedPages(uint32 count) 390 : 391 fCount(count) 392 { 393 Initialized(); 394 } 395 396 virtual void AddDump(TraceOutput& out) 397 { 398 out.Print("page scrubbed: %" B_PRId32, fCount); 399 } 400 401 private: 402 uint32 fCount; 403 }; 404 405 406 class StolenPage : public AbstractTraceEntry { 407 public: 408 StolenPage() 409 { 410 Initialized(); 411 } 412 413 virtual void AddDump(TraceOutput& out) 414 { 415 out.Print("page stolen"); 416 } 417 }; 418 419 } // namespace PageAllocationTracing 420 421 # define TA(x) new(std::nothrow) PageAllocationTracing::x 422 423 #else 424 # define TA(x) 425 #endif // PAGE_ALLOCATION_TRACING 426 427 428 #if PAGE_DAEMON_TRACING 429 430 namespace PageDaemonTracing { 431 432 class ActivatePage : public AbstractTraceEntry { 433 public: 434 ActivatePage(vm_page* page) 435 : 436 fCache(page->cache), 437 fPage(page) 438 { 439 Initialized(); 440 } 441 442 virtual void AddDump(TraceOutput& out) 443 { 444 out.Print("page activated: %p, cache: %p", fPage, fCache); 445 } 446 447 private: 448 VMCache* fCache; 449 vm_page* fPage; 450 }; 451 452 453 class DeactivatePage : public AbstractTraceEntry { 454 public: 455 DeactivatePage(vm_page* page) 456 : 457 fCache(page->cache), 458 fPage(page) 459 { 460 Initialized(); 461 } 462 463 virtual void AddDump(TraceOutput& out) 464 { 465 out.Print("page deactivated: %p, cache: %p", fPage, fCache); 466 } 467 468 private: 469 VMCache* fCache; 470 vm_page* fPage; 471 }; 472 473 474 class FreedPageSwap : public AbstractTraceEntry { 475 public: 476 FreedPageSwap(vm_page* page) 477 : 478 fCache(page->cache), 479 fPage(page) 480 { 481 Initialized(); 482 } 483 484 virtual void AddDump(TraceOutput& out) 485 { 486 out.Print("page swap freed: %p, cache: %p", fPage, fCache); 487 } 488 489 private: 490 VMCache* fCache; 491 vm_page* fPage; 492 }; 493 494 } // namespace PageDaemonTracing 495 496 # define TD(x) new(std::nothrow) PageDaemonTracing::x 497 498 #else 499 # define TD(x) 500 #endif // PAGE_DAEMON_TRACING 501 502 503 #if PAGE_WRITER_TRACING 504 505 namespace PageWriterTracing { 506 507 class WritePage : public AbstractTraceEntry { 508 public: 509 WritePage(vm_page* page) 510 : 511 fCache(page->Cache()), 512 fPage(page) 513 { 514 Initialized(); 515 } 516 517 virtual void AddDump(TraceOutput& out) 518 { 519 out.Print("page write: %p, cache: %p", fPage, fCache); 520 } 521 522 private: 523 VMCache* fCache; 524 vm_page* fPage; 525 }; 526 527 } // namespace PageWriterTracing 528 529 # define TPW(x) new(std::nothrow) PageWriterTracing::x 530 531 #else 532 # define TPW(x) 533 #endif // PAGE_WRITER_TRACING 534 535 536 #if PAGE_STATE_TRACING 537 538 namespace PageStateTracing { 539 540 class SetPageState : public AbstractTraceEntry { 541 public: 542 SetPageState(vm_page* page, uint8 newState) 543 : 544 fPage(page), 545 fOldState(page->State()), 546 fNewState(newState), 547 fBusy(page->busy), 548 fWired(page->WiredCount() > 0), 549 fMapped(!page->mappings.IsEmpty()), 550 fAccessed(page->accessed), 551 fModified(page->modified) 552 { 553 #if PAGE_STATE_TRACING_STACK_TRACE 554 fStackTrace = capture_tracing_stack_trace( 555 PAGE_STATE_TRACING_STACK_TRACE, 0, true); 556 // Don't capture userland stack trace to avoid potential 557 // deadlocks. 558 #endif 559 Initialized(); 560 } 561 562 #if PAGE_STATE_TRACING_STACK_TRACE 563 virtual void DumpStackTrace(TraceOutput& out) 564 { 565 out.PrintStackTrace(fStackTrace); 566 } 567 #endif 568 569 virtual void AddDump(TraceOutput& out) 570 { 571 out.Print("page set state: %p (%c%c%c%c%c): %s -> %s", fPage, 572 fBusy ? 'b' : '-', 573 fWired ? 'w' : '-', 574 fMapped ? 'm' : '-', 575 fAccessed ? 'a' : '-', 576 fModified ? 'm' : '-', 577 page_state_to_string(fOldState), 578 page_state_to_string(fNewState)); 579 } 580 581 private: 582 vm_page* fPage; 583 #if PAGE_STATE_TRACING_STACK_TRACE 584 tracing_stack_trace* fStackTrace; 585 #endif 586 uint8 fOldState; 587 uint8 fNewState; 588 bool fBusy : 1; 589 bool fWired : 1; 590 bool fMapped : 1; 591 bool fAccessed : 1; 592 bool fModified : 1; 593 }; 594 595 } // namespace PageStateTracing 596 597 # define TPS(x) new(std::nothrow) PageStateTracing::x 598 599 #else 600 # define TPS(x) 601 #endif // PAGE_STATE_TRACING 602 603 604 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 605 606 namespace BKernel { 607 608 class AllocationTrackingCallback { 609 public: 610 virtual ~AllocationTrackingCallback(); 611 612 virtual bool ProcessTrackingInfo( 613 AllocationTrackingInfo* info, 614 page_num_t pageNumber) = 0; 615 }; 616 617 } 618 619 using BKernel::AllocationTrackingCallback; 620 621 622 class AllocationCollectorCallback : public AllocationTrackingCallback { 623 public: 624 AllocationCollectorCallback(bool resetInfos) 625 : 626 fResetInfos(resetInfos) 627 { 628 } 629 630 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info, 631 page_num_t pageNumber) 632 { 633 if (!info->IsInitialized()) 634 return true; 635 636 addr_t caller = 0; 637 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry(); 638 639 if (traceEntry != NULL && info->IsTraceEntryValid()) { 640 caller = tracing_find_caller_in_stack_trace( 641 traceEntry->StackTrace(), kVMPageCodeAddressRange, 1); 642 } 643 644 caller_info* callerInfo = get_caller_info(caller); 645 if (callerInfo == NULL) { 646 kprintf("out of space for caller infos\n"); 647 return false; 648 } 649 650 callerInfo->count++; 651 652 if (fResetInfos) 653 info->Clear(); 654 655 return true; 656 } 657 658 private: 659 bool fResetInfos; 660 }; 661 662 663 class AllocationInfoPrinterCallback : public AllocationTrackingCallback { 664 public: 665 AllocationInfoPrinterCallback(bool printStackTrace, page_num_t pageFilter, 666 team_id teamFilter, thread_id threadFilter) 667 : 668 fPrintStackTrace(printStackTrace), 669 fPageFilter(pageFilter), 670 fTeamFilter(teamFilter), 671 fThreadFilter(threadFilter) 672 { 673 } 674 675 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info, 676 page_num_t pageNumber) 677 { 678 if (!info->IsInitialized()) 679 return true; 680 681 if (fPageFilter != 0 && pageNumber != fPageFilter) 682 return true; 683 684 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry(); 685 if (traceEntry != NULL && !info->IsTraceEntryValid()) 686 traceEntry = NULL; 687 688 if (traceEntry != NULL) { 689 if (fTeamFilter != -1 && traceEntry->TeamID() != fTeamFilter) 690 return true; 691 if (fThreadFilter != -1 && traceEntry->ThreadID() != fThreadFilter) 692 return true; 693 } else { 694 // we need the info if we have filters set 695 if (fTeamFilter != -1 || fThreadFilter != -1) 696 return true; 697 } 698 699 kprintf("page number %#" B_PRIxPHYSADDR, pageNumber); 700 701 if (traceEntry != NULL) { 702 kprintf(", team: %" B_PRId32 ", thread %" B_PRId32 703 ", time %" B_PRId64 "\n", traceEntry->TeamID(), 704 traceEntry->ThreadID(), traceEntry->Time()); 705 706 if (fPrintStackTrace) 707 tracing_print_stack_trace(traceEntry->StackTrace()); 708 } else 709 kprintf("\n"); 710 711 return true; 712 } 713 714 private: 715 bool fPrintStackTrace; 716 page_num_t fPageFilter; 717 team_id fTeamFilter; 718 thread_id fThreadFilter; 719 }; 720 721 722 class AllocationDetailPrinterCallback : public AllocationTrackingCallback { 723 public: 724 AllocationDetailPrinterCallback(addr_t caller) 725 : 726 fCaller(caller) 727 { 728 } 729 730 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info, 731 page_num_t pageNumber) 732 { 733 if (!info->IsInitialized()) 734 return true; 735 736 addr_t caller = 0; 737 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry(); 738 if (traceEntry != NULL && !info->IsTraceEntryValid()) 739 traceEntry = NULL; 740 741 if (traceEntry != NULL) { 742 caller = tracing_find_caller_in_stack_trace( 743 traceEntry->StackTrace(), kVMPageCodeAddressRange, 1); 744 } 745 746 if (caller != fCaller) 747 return true; 748 749 kprintf("page %#" B_PRIxPHYSADDR "\n", pageNumber); 750 if (traceEntry != NULL) 751 tracing_print_stack_trace(traceEntry->StackTrace()); 752 753 return true; 754 } 755 756 private: 757 addr_t fCaller; 758 }; 759 760 #endif // VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 761 762 763 static int 764 find_page(int argc, char **argv) 765 { 766 struct vm_page *page; 767 addr_t address; 768 int32 index = 1; 769 int i; 770 771 struct { 772 const char* name; 773 VMPageQueue* queue; 774 } pageQueueInfos[] = { 775 { "free", &sFreePageQueue }, 776 { "clear", &sClearPageQueue }, 777 { "modified", &sModifiedPageQueue }, 778 { "active", &sActivePageQueue }, 779 { "inactive", &sInactivePageQueue }, 780 { "cached", &sCachedPageQueue }, 781 { NULL, NULL } 782 }; 783 784 if (argc < 2 785 || strlen(argv[index]) <= 2 786 || argv[index][0] != '0' 787 || argv[index][1] != 'x') { 788 kprintf("usage: find_page <address>\n"); 789 return 0; 790 } 791 792 address = strtoul(argv[index], NULL, 0); 793 page = (vm_page*)address; 794 795 for (i = 0; pageQueueInfos[i].name; i++) { 796 VMPageQueue::Iterator it = pageQueueInfos[i].queue->GetIterator(); 797 while (vm_page* p = it.Next()) { 798 if (p == page) { 799 kprintf("found page %p in queue %p (%s)\n", page, 800 pageQueueInfos[i].queue, pageQueueInfos[i].name); 801 return 0; 802 } 803 } 804 } 805 806 kprintf("page %p isn't in any queue\n", page); 807 808 return 0; 809 } 810 811 812 const char * 813 page_state_to_string(int state) 814 { 815 switch(state) { 816 case PAGE_STATE_ACTIVE: 817 return "active"; 818 case PAGE_STATE_INACTIVE: 819 return "inactive"; 820 case PAGE_STATE_MODIFIED: 821 return "modified"; 822 case PAGE_STATE_CACHED: 823 return "cached"; 824 case PAGE_STATE_FREE: 825 return "free"; 826 case PAGE_STATE_CLEAR: 827 return "clear"; 828 case PAGE_STATE_WIRED: 829 return "wired"; 830 case PAGE_STATE_UNUSED: 831 return "unused"; 832 default: 833 return "unknown"; 834 } 835 } 836 837 838 static int 839 dump_page(int argc, char **argv) 840 { 841 bool addressIsPointer = true; 842 bool physical = false; 843 bool searchMappings = false; 844 int32 index = 1; 845 846 while (index < argc) { 847 if (argv[index][0] != '-') 848 break; 849 850 if (!strcmp(argv[index], "-p")) { 851 addressIsPointer = false; 852 physical = true; 853 } else if (!strcmp(argv[index], "-v")) { 854 addressIsPointer = false; 855 } else if (!strcmp(argv[index], "-m")) { 856 searchMappings = true; 857 } else { 858 print_debugger_command_usage(argv[0]); 859 return 0; 860 } 861 862 index++; 863 } 864 865 if (index + 1 != argc) { 866 print_debugger_command_usage(argv[0]); 867 return 0; 868 } 869 870 uint64 value; 871 if (!evaluate_debug_expression(argv[index], &value, false)) 872 return 0; 873 874 uint64 pageAddress = value; 875 struct vm_page* page; 876 877 if (addressIsPointer) { 878 page = (struct vm_page *)(addr_t)pageAddress; 879 } else { 880 if (!physical) { 881 VMAddressSpace *addressSpace = VMAddressSpace::Kernel(); 882 883 if (debug_get_debugged_thread()->team->address_space != NULL) 884 addressSpace = debug_get_debugged_thread()->team->address_space; 885 886 uint32 flags = 0; 887 phys_addr_t physicalAddress; 888 if (addressSpace->TranslationMap()->QueryInterrupt(pageAddress, 889 &physicalAddress, &flags) != B_OK 890 || (flags & PAGE_PRESENT) == 0) { 891 kprintf("Virtual address not mapped to a physical page in this " 892 "address space.\n"); 893 return 0; 894 } 895 pageAddress = physicalAddress; 896 } 897 898 page = vm_lookup_page(pageAddress / B_PAGE_SIZE); 899 } 900 901 kprintf("PAGE: %p\n", page); 902 kprintf("queue_next,prev: %p, %p\n", page->queue_link.next, 903 page->queue_link.previous); 904 kprintf("physical_number: %#" B_PRIxPHYSADDR "\n", 905 page->physical_page_number); 906 kprintf("cache: %p\n", page->Cache()); 907 kprintf("cache_offset: %" B_PRIuPHYSADDR "\n", page->cache_offset); 908 kprintf("cache_next: %p\n", page->cache_next); 909 kprintf("state: %s\n", page_state_to_string(page->State())); 910 kprintf("wired_count: %d\n", page->WiredCount()); 911 kprintf("usage_count: %d\n", page->usage_count); 912 kprintf("busy: %d\n", page->busy); 913 kprintf("busy_writing: %d\n", page->busy_writing); 914 kprintf("accessed: %d\n", page->accessed); 915 kprintf("modified: %d\n", page->modified); 916 #if DEBUG_PAGE_QUEUE 917 kprintf("queue: %p\n", page->queue); 918 #endif 919 #if DEBUG_PAGE_ACCESS 920 kprintf("accessor: %" B_PRId32 "\n", page->accessing_thread); 921 #endif 922 kprintf("area mappings:\n"); 923 924 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 925 vm_page_mapping *mapping; 926 while ((mapping = iterator.Next()) != NULL) { 927 kprintf(" %p (%" B_PRId32 ")\n", mapping->area, mapping->area->id); 928 mapping = mapping->page_link.next; 929 } 930 931 if (searchMappings) { 932 kprintf("all mappings:\n"); 933 VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst(); 934 while (addressSpace != NULL) { 935 size_t pageCount = addressSpace->Size() / B_PAGE_SIZE; 936 for (addr_t address = addressSpace->Base(); pageCount != 0; 937 address += B_PAGE_SIZE, pageCount--) { 938 phys_addr_t physicalAddress; 939 uint32 flags = 0; 940 if (addressSpace->TranslationMap()->QueryInterrupt(address, 941 &physicalAddress, &flags) == B_OK 942 && (flags & PAGE_PRESENT) != 0 943 && physicalAddress / B_PAGE_SIZE 944 == page->physical_page_number) { 945 VMArea* area = addressSpace->LookupArea(address); 946 kprintf(" aspace %" B_PRId32 ", area %" B_PRId32 ": %#" 947 B_PRIxADDR " (%c%c%s%s)\n", addressSpace->ID(), 948 area != NULL ? area->id : -1, address, 949 (flags & B_KERNEL_READ_AREA) != 0 ? 'r' : '-', 950 (flags & B_KERNEL_WRITE_AREA) != 0 ? 'w' : '-', 951 (flags & PAGE_MODIFIED) != 0 ? " modified" : "", 952 (flags & PAGE_ACCESSED) != 0 ? " accessed" : ""); 953 } 954 } 955 addressSpace = VMAddressSpace::DebugNext(addressSpace); 956 } 957 } 958 959 set_debug_variable("_cache", (addr_t)page->Cache()); 960 #if DEBUG_PAGE_ACCESS 961 set_debug_variable("_accessor", page->accessing_thread); 962 #endif 963 964 return 0; 965 } 966 967 968 static int 969 dump_page_queue(int argc, char **argv) 970 { 971 struct VMPageQueue *queue; 972 973 if (argc < 2) { 974 kprintf("usage: page_queue <address/name> [list]\n"); 975 return 0; 976 } 977 978 if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x') 979 queue = (VMPageQueue*)strtoul(argv[1], NULL, 16); 980 if (!strcmp(argv[1], "free")) 981 queue = &sFreePageQueue; 982 else if (!strcmp(argv[1], "clear")) 983 queue = &sClearPageQueue; 984 else if (!strcmp(argv[1], "modified")) 985 queue = &sModifiedPageQueue; 986 else if (!strcmp(argv[1], "active")) 987 queue = &sActivePageQueue; 988 else if (!strcmp(argv[1], "inactive")) 989 queue = &sInactivePageQueue; 990 else if (!strcmp(argv[1], "cached")) 991 queue = &sCachedPageQueue; 992 else { 993 kprintf("page_queue: unknown queue \"%s\".\n", argv[1]); 994 return 0; 995 } 996 997 kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %" 998 B_PRIuPHYSADDR "\n", queue, queue->Head(), queue->Tail(), 999 queue->Count()); 1000 1001 if (argc == 3) { 1002 struct vm_page *page = queue->Head(); 1003 1004 kprintf("page cache type state wired usage\n"); 1005 for (page_num_t i = 0; page; i++, page = queue->Next(page)) { 1006 kprintf("%p %p %-7s %8s %5d %5d\n", page, page->Cache(), 1007 vm_cache_type_to_string(page->Cache()->type), 1008 page_state_to_string(page->State()), 1009 page->WiredCount(), page->usage_count); 1010 } 1011 } 1012 return 0; 1013 } 1014 1015 1016 static int 1017 dump_page_stats(int argc, char **argv) 1018 { 1019 page_num_t swappableModified = 0; 1020 page_num_t swappableModifiedInactive = 0; 1021 1022 size_t counter[8]; 1023 size_t busyCounter[8]; 1024 memset(counter, 0, sizeof(counter)); 1025 memset(busyCounter, 0, sizeof(busyCounter)); 1026 1027 struct page_run { 1028 page_num_t start; 1029 page_num_t end; 1030 1031 page_num_t Length() const { return end - start; } 1032 }; 1033 1034 page_run currentFreeRun = { 0, 0 }; 1035 page_run currentCachedRun = { 0, 0 }; 1036 page_run longestFreeRun = { 0, 0 }; 1037 page_run longestCachedRun = { 0, 0 }; 1038 1039 for (page_num_t i = 0; i < sNumPages; i++) { 1040 if (sPages[i].State() > 7) { 1041 panic("page %" B_PRIuPHYSADDR " at %p has invalid state!\n", i, 1042 &sPages[i]); 1043 } 1044 1045 uint32 pageState = sPages[i].State(); 1046 1047 counter[pageState]++; 1048 if (sPages[i].busy) 1049 busyCounter[pageState]++; 1050 1051 if (pageState == PAGE_STATE_MODIFIED 1052 && sPages[i].Cache() != NULL 1053 && sPages[i].Cache()->temporary && sPages[i].WiredCount() == 0) { 1054 swappableModified++; 1055 if (sPages[i].usage_count == 0) 1056 swappableModifiedInactive++; 1057 } 1058 1059 // track free and cached pages runs 1060 if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) { 1061 currentFreeRun.end = i + 1; 1062 currentCachedRun.end = i + 1; 1063 } else { 1064 if (currentFreeRun.Length() > longestFreeRun.Length()) 1065 longestFreeRun = currentFreeRun; 1066 currentFreeRun.start = currentFreeRun.end = i + 1; 1067 1068 if (pageState == PAGE_STATE_CACHED) { 1069 currentCachedRun.end = i + 1; 1070 } else { 1071 if (currentCachedRun.Length() > longestCachedRun.Length()) 1072 longestCachedRun = currentCachedRun; 1073 currentCachedRun.start = currentCachedRun.end = i + 1; 1074 } 1075 } 1076 } 1077 1078 kprintf("page stats:\n"); 1079 kprintf("total: %" B_PRIuPHYSADDR "\n", sNumPages); 1080 1081 kprintf("active: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1082 counter[PAGE_STATE_ACTIVE], busyCounter[PAGE_STATE_ACTIVE]); 1083 kprintf("inactive: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1084 counter[PAGE_STATE_INACTIVE], busyCounter[PAGE_STATE_INACTIVE]); 1085 kprintf("cached: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1086 counter[PAGE_STATE_CACHED], busyCounter[PAGE_STATE_CACHED]); 1087 kprintf("unused: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1088 counter[PAGE_STATE_UNUSED], busyCounter[PAGE_STATE_UNUSED]); 1089 kprintf("wired: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1090 counter[PAGE_STATE_WIRED], busyCounter[PAGE_STATE_WIRED]); 1091 kprintf("modified: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1092 counter[PAGE_STATE_MODIFIED], busyCounter[PAGE_STATE_MODIFIED]); 1093 kprintf("free: %" B_PRIuSIZE "\n", counter[PAGE_STATE_FREE]); 1094 kprintf("clear: %" B_PRIuSIZE "\n", counter[PAGE_STATE_CLEAR]); 1095 1096 kprintf("unreserved free pages: %" B_PRId32 "\n", sUnreservedFreePages); 1097 kprintf("unsatisfied page reservations: %" B_PRId32 "\n", 1098 sUnsatisfiedPageReservations); 1099 kprintf("mapped pages: %" B_PRId32 "\n", gMappedPagesCount); 1100 kprintf("longest free pages run: %" B_PRIuPHYSADDR " pages (at %" 1101 B_PRIuPHYSADDR ")\n", longestFreeRun.Length(), 1102 sPages[longestFreeRun.start].physical_page_number); 1103 kprintf("longest free/cached pages run: %" B_PRIuPHYSADDR " pages (at %" 1104 B_PRIuPHYSADDR ")\n", longestCachedRun.Length(), 1105 sPages[longestCachedRun.start].physical_page_number); 1106 1107 kprintf("waiting threads:\n"); 1108 for (PageReservationWaiterList::Iterator it 1109 = sPageReservationWaiters.GetIterator(); 1110 PageReservationWaiter* waiter = it.Next();) { 1111 kprintf(" %6" B_PRId32 ": missing: %6" B_PRIu32 1112 ", don't touch: %6" B_PRIu32 "\n", waiter->thread->id, 1113 waiter->missing, waiter->dontTouch); 1114 } 1115 1116 kprintf("\nfree queue: %p, count = %" B_PRIuPHYSADDR "\n", &sFreePageQueue, 1117 sFreePageQueue.Count()); 1118 kprintf("clear queue: %p, count = %" B_PRIuPHYSADDR "\n", &sClearPageQueue, 1119 sClearPageQueue.Count()); 1120 kprintf("modified queue: %p, count = %" B_PRIuPHYSADDR " (%" B_PRId32 1121 " temporary, %" B_PRIuPHYSADDR " swappable, " "inactive: %" 1122 B_PRIuPHYSADDR ")\n", &sModifiedPageQueue, sModifiedPageQueue.Count(), 1123 sModifiedTemporaryPages, swappableModified, swappableModifiedInactive); 1124 kprintf("active queue: %p, count = %" B_PRIuPHYSADDR "\n", 1125 &sActivePageQueue, sActivePageQueue.Count()); 1126 kprintf("inactive queue: %p, count = %" B_PRIuPHYSADDR "\n", 1127 &sInactivePageQueue, sInactivePageQueue.Count()); 1128 kprintf("cached queue: %p, count = %" B_PRIuPHYSADDR "\n", 1129 &sCachedPageQueue, sCachedPageQueue.Count()); 1130 return 0; 1131 } 1132 1133 1134 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 1135 1136 static caller_info* 1137 get_caller_info(addr_t caller) 1138 { 1139 // find the caller info 1140 for (int32 i = 0; i < sCallerInfoCount; i++) { 1141 if (caller == sCallerInfoTable[i].caller) 1142 return &sCallerInfoTable[i]; 1143 } 1144 1145 // not found, add a new entry, if there are free slots 1146 if (sCallerInfoCount >= kCallerInfoTableSize) 1147 return NULL; 1148 1149 caller_info* info = &sCallerInfoTable[sCallerInfoCount++]; 1150 info->caller = caller; 1151 info->count = 0; 1152 1153 return info; 1154 } 1155 1156 1157 static int 1158 caller_info_compare_count(const void* _a, const void* _b) 1159 { 1160 const caller_info* a = (const caller_info*)_a; 1161 const caller_info* b = (const caller_info*)_b; 1162 return (int)(b->count - a->count); 1163 } 1164 1165 1166 static int 1167 dump_page_allocations_per_caller(int argc, char** argv) 1168 { 1169 bool resetAllocationInfos = false; 1170 bool printDetails = false; 1171 addr_t caller = 0; 1172 1173 for (int32 i = 1; i < argc; i++) { 1174 if (strcmp(argv[i], "-d") == 0) { 1175 uint64 callerAddress; 1176 if (++i >= argc 1177 || !evaluate_debug_expression(argv[i], &callerAddress, true)) { 1178 print_debugger_command_usage(argv[0]); 1179 return 0; 1180 } 1181 1182 caller = callerAddress; 1183 printDetails = true; 1184 } else if (strcmp(argv[i], "-r") == 0) { 1185 resetAllocationInfos = true; 1186 } else { 1187 print_debugger_command_usage(argv[0]); 1188 return 0; 1189 } 1190 } 1191 1192 sCallerInfoCount = 0; 1193 1194 AllocationCollectorCallback collectorCallback(resetAllocationInfos); 1195 AllocationDetailPrinterCallback detailsCallback(caller); 1196 AllocationTrackingCallback& callback = printDetails 1197 ? (AllocationTrackingCallback&)detailsCallback 1198 : (AllocationTrackingCallback&)collectorCallback; 1199 1200 for (page_num_t i = 0; i < sNumPages; i++) 1201 callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i); 1202 1203 if (printDetails) 1204 return 0; 1205 1206 // sort the array 1207 qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info), 1208 &caller_info_compare_count); 1209 1210 kprintf("%" B_PRId32 " different callers\n\n", sCallerInfoCount); 1211 1212 size_t totalAllocationCount = 0; 1213 1214 kprintf(" count caller\n"); 1215 kprintf("----------------------------------\n"); 1216 for (int32 i = 0; i < sCallerInfoCount; i++) { 1217 caller_info& info = sCallerInfoTable[i]; 1218 kprintf("%10" B_PRIuSIZE " %p", info.count, (void*)info.caller); 1219 1220 const char* symbol; 1221 const char* imageName; 1222 bool exactMatch; 1223 addr_t baseAddress; 1224 1225 if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol, 1226 &imageName, &exactMatch) == B_OK) { 1227 kprintf(" %s + %#" B_PRIxADDR " (%s)%s\n", symbol, 1228 info.caller - baseAddress, imageName, 1229 exactMatch ? "" : " (nearest)"); 1230 } else 1231 kprintf("\n"); 1232 1233 totalAllocationCount += info.count; 1234 } 1235 1236 kprintf("\ntotal page allocations: %" B_PRIuSIZE "\n", 1237 totalAllocationCount); 1238 1239 return 0; 1240 } 1241 1242 1243 static int 1244 dump_page_allocation_infos(int argc, char** argv) 1245 { 1246 page_num_t pageFilter = 0; 1247 team_id teamFilter = -1; 1248 thread_id threadFilter = -1; 1249 bool printStackTraces = false; 1250 1251 for (int32 i = 1; i < argc; i++) { 1252 if (strcmp(argv[i], "--stacktrace") == 0) 1253 printStackTraces = true; 1254 else if (strcmp(argv[i], "-p") == 0) { 1255 uint64 pageNumber; 1256 if (++i >= argc 1257 || !evaluate_debug_expression(argv[i], &pageNumber, true)) { 1258 print_debugger_command_usage(argv[0]); 1259 return 0; 1260 } 1261 1262 pageFilter = pageNumber; 1263 } else if (strcmp(argv[i], "--team") == 0) { 1264 uint64 team; 1265 if (++i >= argc 1266 || !evaluate_debug_expression(argv[i], &team, true)) { 1267 print_debugger_command_usage(argv[0]); 1268 return 0; 1269 } 1270 1271 teamFilter = team; 1272 } else if (strcmp(argv[i], "--thread") == 0) { 1273 uint64 thread; 1274 if (++i >= argc 1275 || !evaluate_debug_expression(argv[i], &thread, true)) { 1276 print_debugger_command_usage(argv[0]); 1277 return 0; 1278 } 1279 1280 threadFilter = thread; 1281 } else { 1282 print_debugger_command_usage(argv[0]); 1283 return 0; 1284 } 1285 } 1286 1287 AllocationInfoPrinterCallback callback(printStackTraces, pageFilter, 1288 teamFilter, threadFilter); 1289 1290 for (page_num_t i = 0; i < sNumPages; i++) 1291 callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i); 1292 1293 return 0; 1294 } 1295 1296 #endif // VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 1297 1298 1299 #ifdef TRACK_PAGE_USAGE_STATS 1300 1301 static void 1302 track_page_usage(vm_page* page) 1303 { 1304 if (page->WiredCount() == 0) { 1305 sNextPageUsage[(int32)page->usage_count + 128]++; 1306 sNextPageUsagePageCount++; 1307 } 1308 } 1309 1310 1311 static void 1312 update_page_usage_stats() 1313 { 1314 std::swap(sPageUsage, sNextPageUsage); 1315 sPageUsagePageCount = sNextPageUsagePageCount; 1316 1317 memset(sNextPageUsage, 0, sizeof(page_num_t) * 256); 1318 sNextPageUsagePageCount = 0; 1319 1320 // compute average 1321 if (sPageUsagePageCount > 0) { 1322 int64 sum = 0; 1323 for (int32 i = 0; i < 256; i++) 1324 sum += (int64)sPageUsage[i] * (i - 128); 1325 1326 TRACE_DAEMON("average page usage: %f (%lu pages)\n", 1327 (float)sum / sPageUsagePageCount, sPageUsagePageCount); 1328 } 1329 } 1330 1331 1332 static int 1333 dump_page_usage_stats(int argc, char** argv) 1334 { 1335 kprintf("distribution of page usage counts (%lu pages):", 1336 sPageUsagePageCount); 1337 1338 int64 sum = 0; 1339 for (int32 i = 0; i < 256; i++) { 1340 if (i % 8 == 0) 1341 kprintf("\n%4ld:", i - 128); 1342 1343 int64 count = sPageUsage[i]; 1344 sum += count * (i - 128); 1345 1346 kprintf(" %9llu", count); 1347 } 1348 1349 kprintf("\n\n"); 1350 1351 kprintf("average usage count: %f\n", 1352 sPageUsagePageCount > 0 ? (float)sum / sPageUsagePageCount : 0); 1353 1354 return 0; 1355 } 1356 1357 #endif // TRACK_PAGE_USAGE_STATS 1358 1359 1360 // #pragma mark - vm_page 1361 1362 1363 inline void 1364 vm_page::InitState(uint8 newState) 1365 { 1366 state = newState; 1367 } 1368 1369 1370 inline void 1371 vm_page::SetState(uint8 newState) 1372 { 1373 TPS(SetPageState(this, newState)); 1374 1375 state = newState; 1376 } 1377 1378 1379 // #pragma mark - 1380 1381 1382 static void 1383 get_page_stats(page_stats& _pageStats) 1384 { 1385 _pageStats.totalFreePages = sUnreservedFreePages; 1386 _pageStats.cachedPages = sCachedPageQueue.Count(); 1387 _pageStats.unsatisfiedReservations = sUnsatisfiedPageReservations; 1388 // TODO: We don't get an actual snapshot here! 1389 } 1390 1391 1392 static bool 1393 do_active_paging(const page_stats& pageStats) 1394 { 1395 return pageStats.totalFreePages + pageStats.cachedPages 1396 < pageStats.unsatisfiedReservations 1397 + (int32)sFreeOrCachedPagesTarget; 1398 } 1399 1400 1401 /*! Reserves as many pages as possible from \c sUnreservedFreePages up to 1402 \a count. Doesn't touch the last \a dontTouch pages of 1403 \c sUnreservedFreePages, though. 1404 \return The number of actually reserved pages. 1405 */ 1406 static uint32 1407 reserve_some_pages(uint32 count, uint32 dontTouch) 1408 { 1409 while (true) { 1410 int32 freePages = sUnreservedFreePages; 1411 if (freePages <= (int32)dontTouch) 1412 return 0; 1413 1414 int32 toReserve = std::min(count, freePages - dontTouch); 1415 if (atomic_test_and_set(&sUnreservedFreePages, 1416 freePages - toReserve, freePages) 1417 == freePages) { 1418 return toReserve; 1419 } 1420 1421 // the count changed in the meantime -- retry 1422 } 1423 } 1424 1425 1426 static void 1427 wake_up_page_reservation_waiters() 1428 { 1429 MutexLocker pageDeficitLocker(sPageDeficitLock); 1430 1431 // TODO: If this is a low priority thread, we might want to disable 1432 // interrupts or otherwise ensure that we aren't unscheduled. Otherwise 1433 // high priority threads wait be kept waiting while a medium priority thread 1434 // prevents us from running. 1435 1436 while (PageReservationWaiter* waiter = sPageReservationWaiters.Head()) { 1437 int32 reserved = reserve_some_pages(waiter->missing, 1438 waiter->dontTouch); 1439 if (reserved == 0) 1440 return; 1441 1442 atomic_add(&sUnsatisfiedPageReservations, -reserved); 1443 waiter->missing -= reserved; 1444 1445 if (waiter->missing > 0) 1446 return; 1447 1448 sPageReservationWaiters.Remove(waiter); 1449 1450 InterruptsSpinLocker schedulerLocker(gSchedulerLock); 1451 thread_unblock_locked(waiter->thread, B_OK); 1452 } 1453 } 1454 1455 1456 static inline void 1457 unreserve_pages(uint32 count) 1458 { 1459 atomic_add(&sUnreservedFreePages, count); 1460 if (sUnsatisfiedPageReservations != 0) 1461 wake_up_page_reservation_waiters(); 1462 } 1463 1464 1465 static void 1466 free_page(vm_page* page, bool clear) 1467 { 1468 DEBUG_PAGE_ACCESS_CHECK(page); 1469 1470 PAGE_ASSERT(page, !page->IsMapped()); 1471 1472 VMPageQueue* fromQueue; 1473 1474 switch (page->State()) { 1475 case PAGE_STATE_ACTIVE: 1476 fromQueue = &sActivePageQueue; 1477 break; 1478 case PAGE_STATE_INACTIVE: 1479 fromQueue = &sInactivePageQueue; 1480 break; 1481 case PAGE_STATE_MODIFIED: 1482 fromQueue = &sModifiedPageQueue; 1483 break; 1484 case PAGE_STATE_CACHED: 1485 fromQueue = &sCachedPageQueue; 1486 break; 1487 case PAGE_STATE_FREE: 1488 case PAGE_STATE_CLEAR: 1489 panic("free_page(): page %p already free", page); 1490 return; 1491 case PAGE_STATE_WIRED: 1492 case PAGE_STATE_UNUSED: 1493 fromQueue = NULL; 1494 break; 1495 default: 1496 panic("free_page(): page %p in invalid state %d", 1497 page, page->State()); 1498 return; 1499 } 1500 1501 if (page->CacheRef() != NULL) 1502 panic("to be freed page %p has cache", page); 1503 if (page->IsMapped()) 1504 panic("to be freed page %p has mappings", page); 1505 1506 if (fromQueue != NULL) 1507 fromQueue->RemoveUnlocked(page); 1508 1509 TA(FreePage(page->physical_page_number)); 1510 1511 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 1512 page->allocation_tracking_info.Clear(); 1513 #endif 1514 1515 ReadLocker locker(sFreePageQueuesLock); 1516 1517 DEBUG_PAGE_ACCESS_END(page); 1518 1519 if (clear) { 1520 page->SetState(PAGE_STATE_CLEAR); 1521 sClearPageQueue.PrependUnlocked(page); 1522 } else { 1523 page->SetState(PAGE_STATE_FREE); 1524 sFreePageQueue.PrependUnlocked(page); 1525 } 1526 1527 locker.Unlock(); 1528 1529 unreserve_pages(1); 1530 } 1531 1532 1533 /*! The caller must make sure that no-one else tries to change the page's state 1534 while the function is called. If the page has a cache, this can be done by 1535 locking the cache. 1536 */ 1537 static void 1538 set_page_state(vm_page *page, int pageState) 1539 { 1540 DEBUG_PAGE_ACCESS_CHECK(page); 1541 1542 if (pageState == page->State()) 1543 return; 1544 1545 VMPageQueue* fromQueue; 1546 1547 switch (page->State()) { 1548 case PAGE_STATE_ACTIVE: 1549 fromQueue = &sActivePageQueue; 1550 break; 1551 case PAGE_STATE_INACTIVE: 1552 fromQueue = &sInactivePageQueue; 1553 break; 1554 case PAGE_STATE_MODIFIED: 1555 fromQueue = &sModifiedPageQueue; 1556 break; 1557 case PAGE_STATE_CACHED: 1558 fromQueue = &sCachedPageQueue; 1559 break; 1560 case PAGE_STATE_FREE: 1561 case PAGE_STATE_CLEAR: 1562 panic("set_page_state(): page %p is free/clear", page); 1563 return; 1564 case PAGE_STATE_WIRED: 1565 case PAGE_STATE_UNUSED: 1566 fromQueue = NULL; 1567 break; 1568 default: 1569 panic("set_page_state(): page %p in invalid state %d", 1570 page, page->State()); 1571 return; 1572 } 1573 1574 VMPageQueue* toQueue; 1575 1576 switch (pageState) { 1577 case PAGE_STATE_ACTIVE: 1578 toQueue = &sActivePageQueue; 1579 break; 1580 case PAGE_STATE_INACTIVE: 1581 toQueue = &sInactivePageQueue; 1582 break; 1583 case PAGE_STATE_MODIFIED: 1584 toQueue = &sModifiedPageQueue; 1585 break; 1586 case PAGE_STATE_CACHED: 1587 PAGE_ASSERT(page, !page->IsMapped()); 1588 PAGE_ASSERT(page, !page->modified); 1589 toQueue = &sCachedPageQueue; 1590 break; 1591 case PAGE_STATE_FREE: 1592 case PAGE_STATE_CLEAR: 1593 panic("set_page_state(): target state is free/clear"); 1594 return; 1595 case PAGE_STATE_WIRED: 1596 case PAGE_STATE_UNUSED: 1597 toQueue = NULL; 1598 break; 1599 default: 1600 panic("set_page_state(): invalid target state %d", pageState); 1601 return; 1602 } 1603 1604 VMCache* cache = page->Cache(); 1605 if (cache != NULL && cache->temporary) { 1606 if (pageState == PAGE_STATE_MODIFIED) 1607 atomic_add(&sModifiedTemporaryPages, 1); 1608 else if (page->State() == PAGE_STATE_MODIFIED) 1609 atomic_add(&sModifiedTemporaryPages, -1); 1610 } 1611 1612 // move the page 1613 if (toQueue == fromQueue) { 1614 // Note: Theoretically we are required to lock when changing the page 1615 // state, even if we don't change the queue. We actually don't have to 1616 // do this, though, since only for the active queue there are different 1617 // page states and active pages have a cache that must be locked at 1618 // this point. So we rely on the fact that everyone must lock the cache 1619 // before trying to change/interpret the page state. 1620 PAGE_ASSERT(page, cache != NULL); 1621 cache->AssertLocked(); 1622 page->SetState(pageState); 1623 } else { 1624 if (fromQueue != NULL) 1625 fromQueue->RemoveUnlocked(page); 1626 1627 page->SetState(pageState); 1628 1629 if (toQueue != NULL) 1630 toQueue->AppendUnlocked(page); 1631 } 1632 } 1633 1634 1635 /*! Moves a previously modified page into a now appropriate queue. 1636 The page queues must not be locked. 1637 */ 1638 static void 1639 move_page_to_appropriate_queue(vm_page *page) 1640 { 1641 DEBUG_PAGE_ACCESS_CHECK(page); 1642 1643 // Note, this logic must be in sync with what the page daemon does. 1644 int32 state; 1645 if (page->IsMapped()) 1646 state = PAGE_STATE_ACTIVE; 1647 else if (page->modified) 1648 state = PAGE_STATE_MODIFIED; 1649 else 1650 state = PAGE_STATE_CACHED; 1651 1652 // TODO: If free + cached pages are low, we might directly want to free the 1653 // page. 1654 set_page_state(page, state); 1655 } 1656 1657 1658 static void 1659 clear_page(struct vm_page *page) 1660 { 1661 vm_memset_physical(page->physical_page_number << PAGE_SHIFT, 0, 1662 B_PAGE_SIZE); 1663 } 1664 1665 1666 static status_t 1667 mark_page_range_in_use(page_num_t startPage, page_num_t length, bool wired) 1668 { 1669 TRACE(("mark_page_range_in_use: start %#" B_PRIxPHYSADDR ", len %#" 1670 B_PRIxPHYSADDR "\n", startPage, length)); 1671 1672 if (sPhysicalPageOffset > startPage) { 1673 dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR 1674 "): start page is before free list\n", startPage, length); 1675 if (sPhysicalPageOffset - startPage >= length) 1676 return B_OK; 1677 length -= sPhysicalPageOffset - startPage; 1678 startPage = sPhysicalPageOffset; 1679 } 1680 1681 startPage -= sPhysicalPageOffset; 1682 1683 if (startPage + length > sNumPages) { 1684 dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR 1685 "): range would extend past free list\n", startPage, length); 1686 if (startPage >= sNumPages) 1687 return B_OK; 1688 length = sNumPages - startPage; 1689 } 1690 1691 WriteLocker locker(sFreePageQueuesLock); 1692 1693 for (page_num_t i = 0; i < length; i++) { 1694 vm_page *page = &sPages[startPage + i]; 1695 switch (page->State()) { 1696 case PAGE_STATE_FREE: 1697 case PAGE_STATE_CLEAR: 1698 { 1699 // TODO: This violates the page reservation policy, since we remove pages from 1700 // the free/clear queues without having reserved them before. This should happen 1701 // in the early boot process only, though. 1702 DEBUG_PAGE_ACCESS_START(page); 1703 VMPageQueue& queue = page->State() == PAGE_STATE_FREE 1704 ? sFreePageQueue : sClearPageQueue; 1705 queue.Remove(page); 1706 page->SetState(wired ? PAGE_STATE_WIRED : PAGE_STATE_UNUSED); 1707 page->busy = false; 1708 atomic_add(&sUnreservedFreePages, -1); 1709 DEBUG_PAGE_ACCESS_END(page); 1710 break; 1711 } 1712 case PAGE_STATE_WIRED: 1713 case PAGE_STATE_UNUSED: 1714 break; 1715 case PAGE_STATE_ACTIVE: 1716 case PAGE_STATE_INACTIVE: 1717 case PAGE_STATE_MODIFIED: 1718 case PAGE_STATE_CACHED: 1719 default: 1720 // uh 1721 dprintf("mark_page_range_in_use: page %#" B_PRIxPHYSADDR 1722 " in non-free state %d!\n", startPage + i, page->State()); 1723 break; 1724 } 1725 } 1726 1727 return B_OK; 1728 } 1729 1730 1731 /*! 1732 This is a background thread that wakes up every now and then (every 100ms) 1733 and moves some pages from the free queue over to the clear queue. 1734 Given enough time, it will clear out all pages from the free queue - we 1735 could probably slow it down after having reached a certain threshold. 1736 */ 1737 static int32 1738 page_scrubber(void *unused) 1739 { 1740 (void)(unused); 1741 1742 TRACE(("page_scrubber starting...\n")); 1743 1744 for (;;) { 1745 snooze(100000); // 100ms 1746 1747 if (sFreePageQueue.Count() == 0 1748 || sUnreservedFreePages < (int32)sFreePagesTarget) { 1749 continue; 1750 } 1751 1752 // Since we temporarily remove pages from the free pages reserve, 1753 // we must make sure we don't cause a violation of the page 1754 // reservation warranty. The following is usually stricter than 1755 // necessary, because we don't have information on how many of the 1756 // reserved pages have already been allocated. 1757 int32 reserved = reserve_some_pages(SCRUB_SIZE, 1758 kPageReserveForPriority[VM_PRIORITY_USER]); 1759 if (reserved == 0) 1760 continue; 1761 1762 // get some pages from the free queue 1763 ReadLocker locker(sFreePageQueuesLock); 1764 1765 vm_page *page[SCRUB_SIZE]; 1766 int32 scrubCount = 0; 1767 for (int32 i = 0; i < reserved; i++) { 1768 page[i] = sFreePageQueue.RemoveHeadUnlocked(); 1769 if (page[i] == NULL) 1770 break; 1771 1772 DEBUG_PAGE_ACCESS_START(page[i]); 1773 1774 page[i]->SetState(PAGE_STATE_ACTIVE); 1775 page[i]->busy = true; 1776 scrubCount++; 1777 } 1778 1779 locker.Unlock(); 1780 1781 if (scrubCount == 0) { 1782 unreserve_pages(reserved); 1783 continue; 1784 } 1785 1786 TA(ScrubbingPages(scrubCount)); 1787 1788 // clear them 1789 for (int32 i = 0; i < scrubCount; i++) 1790 clear_page(page[i]); 1791 1792 locker.Lock(); 1793 1794 // and put them into the clear queue 1795 for (int32 i = 0; i < scrubCount; i++) { 1796 page[i]->SetState(PAGE_STATE_CLEAR); 1797 page[i]->busy = false; 1798 DEBUG_PAGE_ACCESS_END(page[i]); 1799 sClearPageQueue.PrependUnlocked(page[i]); 1800 } 1801 1802 locker.Unlock(); 1803 1804 unreserve_pages(reserved); 1805 1806 TA(ScrubbedPages(scrubCount)); 1807 } 1808 1809 return 0; 1810 } 1811 1812 1813 static void 1814 init_page_marker(vm_page &marker) 1815 { 1816 marker.SetCacheRef(NULL); 1817 marker.InitState(PAGE_STATE_UNUSED); 1818 marker.busy = true; 1819 #if DEBUG_PAGE_QUEUE 1820 marker.queue = NULL; 1821 #endif 1822 #if DEBUG_PAGE_ACCESS 1823 marker.accessing_thread = thread_get_current_thread_id(); 1824 #endif 1825 } 1826 1827 1828 static void 1829 remove_page_marker(struct vm_page &marker) 1830 { 1831 DEBUG_PAGE_ACCESS_CHECK(&marker); 1832 1833 if (marker.State() < PAGE_STATE_FIRST_UNQUEUED) 1834 sPageQueues[marker.State()].RemoveUnlocked(&marker); 1835 1836 marker.SetState(PAGE_STATE_UNUSED); 1837 } 1838 1839 1840 static vm_page* 1841 next_modified_page(page_num_t& maxPagesToSee) 1842 { 1843 InterruptsSpinLocker locker(sModifiedPageQueue.GetLock()); 1844 1845 while (maxPagesToSee > 0) { 1846 vm_page* page = sModifiedPageQueue.Head(); 1847 if (page == NULL) 1848 return NULL; 1849 1850 sModifiedPageQueue.Requeue(page, true); 1851 1852 maxPagesToSee--; 1853 1854 if (!page->busy) 1855 return page; 1856 } 1857 1858 return NULL; 1859 } 1860 1861 1862 // #pragma mark - 1863 1864 1865 class PageWriteTransfer; 1866 class PageWriteWrapper; 1867 1868 1869 class PageWriterRun { 1870 public: 1871 status_t Init(uint32 maxPages); 1872 1873 void PrepareNextRun(); 1874 void AddPage(vm_page* page); 1875 uint32 Go(); 1876 1877 void PageWritten(PageWriteTransfer* transfer, status_t status, 1878 bool partialTransfer, size_t bytesTransferred); 1879 1880 private: 1881 uint32 fMaxPages; 1882 uint32 fWrapperCount; 1883 uint32 fTransferCount; 1884 vint32 fPendingTransfers; 1885 PageWriteWrapper* fWrappers; 1886 PageWriteTransfer* fTransfers; 1887 ConditionVariable fAllFinishedCondition; 1888 }; 1889 1890 1891 class PageWriteTransfer : public AsyncIOCallback { 1892 public: 1893 void SetTo(PageWriterRun* run, vm_page* page, int32 maxPages); 1894 bool AddPage(vm_page* page); 1895 1896 status_t Schedule(uint32 flags); 1897 1898 void SetStatus(status_t status, size_t transferred); 1899 1900 status_t Status() const { return fStatus; } 1901 struct VMCache* Cache() const { return fCache; } 1902 uint32 PageCount() const { return fPageCount; } 1903 1904 virtual void IOFinished(status_t status, bool partialTransfer, 1905 generic_size_t bytesTransferred); 1906 private: 1907 PageWriterRun* fRun; 1908 struct VMCache* fCache; 1909 off_t fOffset; 1910 uint32 fPageCount; 1911 int32 fMaxPages; 1912 status_t fStatus; 1913 uint32 fVecCount; 1914 generic_io_vec fVecs[32]; // TODO: make dynamic/configurable 1915 }; 1916 1917 1918 class PageWriteWrapper { 1919 public: 1920 PageWriteWrapper(); 1921 ~PageWriteWrapper(); 1922 void SetTo(vm_page* page); 1923 bool Done(status_t result); 1924 1925 private: 1926 vm_page* fPage; 1927 struct VMCache* fCache; 1928 bool fIsActive; 1929 }; 1930 1931 1932 PageWriteWrapper::PageWriteWrapper() 1933 : 1934 fIsActive(false) 1935 { 1936 } 1937 1938 1939 PageWriteWrapper::~PageWriteWrapper() 1940 { 1941 if (fIsActive) 1942 panic("page write wrapper going out of scope but isn't completed"); 1943 } 1944 1945 1946 /*! The page's cache must be locked. 1947 */ 1948 void 1949 PageWriteWrapper::SetTo(vm_page* page) 1950 { 1951 DEBUG_PAGE_ACCESS_CHECK(page); 1952 1953 if (page->busy) 1954 panic("setting page write wrapper to busy page"); 1955 1956 if (fIsActive) 1957 panic("re-setting page write wrapper that isn't completed"); 1958 1959 fPage = page; 1960 fCache = page->Cache(); 1961 fIsActive = true; 1962 1963 fPage->busy = true; 1964 fPage->busy_writing = true; 1965 1966 // We have a modified page -- however, while we're writing it back, 1967 // the page might still be mapped. In order not to lose any changes to the 1968 // page, we mark it clean before actually writing it back; if 1969 // writing the page fails for some reason, we'll just keep it in the 1970 // modified page list, but that should happen only rarely. 1971 1972 // If the page is changed after we cleared the dirty flag, but before we 1973 // had the chance to write it back, then we'll write it again later -- that 1974 // will probably not happen that often, though. 1975 1976 vm_clear_map_flags(fPage, PAGE_MODIFIED); 1977 } 1978 1979 1980 /*! The page's cache must be locked. 1981 The page queues must not be locked. 1982 \return \c true if the page was written successfully respectively could be 1983 handled somehow, \c false otherwise. 1984 */ 1985 bool 1986 PageWriteWrapper::Done(status_t result) 1987 { 1988 if (!fIsActive) 1989 panic("completing page write wrapper that is not active"); 1990 1991 DEBUG_PAGE_ACCESS_START(fPage); 1992 1993 fPage->busy = false; 1994 // Set unbusy and notify later by hand, since we might free the page. 1995 1996 bool success = true; 1997 1998 if (result == B_OK) { 1999 // put it into the active/inactive queue 2000 move_page_to_appropriate_queue(fPage); 2001 fPage->busy_writing = false; 2002 DEBUG_PAGE_ACCESS_END(fPage); 2003 } else { 2004 // Writing the page failed. One reason would be that the cache has been 2005 // shrunk and the page does no longer belong to the file. Otherwise the 2006 // actual I/O failed, in which case we'll simply keep the page modified. 2007 2008 if (!fPage->busy_writing) { 2009 // The busy_writing flag was cleared. That means the cache has been 2010 // shrunk while we were trying to write the page and we have to free 2011 // it now. 2012 vm_remove_all_page_mappings(fPage); 2013 // TODO: Unmapping should already happen when resizing the cache! 2014 fCache->RemovePage(fPage); 2015 free_page(fPage, false); 2016 } else { 2017 // Writing the page failed -- mark the page modified and move it to 2018 // an appropriate queue other than the modified queue, so we don't 2019 // keep trying to write it over and over again. We keep 2020 // non-temporary pages in the modified queue, though, so they don't 2021 // get lost in the inactive queue. 2022 dprintf("PageWriteWrapper: Failed to write page %p: %s\n", fPage, 2023 strerror(result)); 2024 2025 fPage->modified = true; 2026 if (!fCache->temporary) 2027 set_page_state(fPage, PAGE_STATE_MODIFIED); 2028 else if (fPage->IsMapped()) 2029 set_page_state(fPage, PAGE_STATE_ACTIVE); 2030 else 2031 set_page_state(fPage, PAGE_STATE_INACTIVE); 2032 2033 fPage->busy_writing = false; 2034 DEBUG_PAGE_ACCESS_END(fPage); 2035 2036 success = false; 2037 } 2038 } 2039 2040 fCache->NotifyPageEvents(fPage, PAGE_EVENT_NOT_BUSY); 2041 fIsActive = false; 2042 2043 return success; 2044 } 2045 2046 2047 /*! The page's cache must be locked. 2048 */ 2049 void 2050 PageWriteTransfer::SetTo(PageWriterRun* run, vm_page* page, int32 maxPages) 2051 { 2052 fRun = run; 2053 fCache = page->Cache(); 2054 fOffset = page->cache_offset; 2055 fPageCount = 1; 2056 fMaxPages = maxPages; 2057 fStatus = B_OK; 2058 2059 fVecs[0].base = (phys_addr_t)page->physical_page_number << PAGE_SHIFT; 2060 fVecs[0].length = B_PAGE_SIZE; 2061 fVecCount = 1; 2062 } 2063 2064 2065 /*! The page's cache must be locked. 2066 */ 2067 bool 2068 PageWriteTransfer::AddPage(vm_page* page) 2069 { 2070 if (page->Cache() != fCache 2071 || (fMaxPages >= 0 && fPageCount >= (uint32)fMaxPages)) 2072 return false; 2073 2074 phys_addr_t nextBase = fVecs[fVecCount - 1].base 2075 + fVecs[fVecCount - 1].length; 2076 2077 if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase 2078 && (off_t)page->cache_offset == fOffset + fPageCount) { 2079 // append to last iovec 2080 fVecs[fVecCount - 1].length += B_PAGE_SIZE; 2081 fPageCount++; 2082 return true; 2083 } 2084 2085 nextBase = fVecs[0].base - B_PAGE_SIZE; 2086 if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase 2087 && (off_t)page->cache_offset == fOffset - 1) { 2088 // prepend to first iovec and adjust offset 2089 fVecs[0].base = nextBase; 2090 fVecs[0].length += B_PAGE_SIZE; 2091 fOffset = page->cache_offset; 2092 fPageCount++; 2093 return true; 2094 } 2095 2096 if (((off_t)page->cache_offset == fOffset + fPageCount 2097 || (off_t)page->cache_offset == fOffset - 1) 2098 && fVecCount < sizeof(fVecs) / sizeof(fVecs[0])) { 2099 // not physically contiguous or not in the right order 2100 uint32 vectorIndex; 2101 if ((off_t)page->cache_offset < fOffset) { 2102 // we are pre-pending another vector, move the other vecs 2103 for (uint32 i = fVecCount; i > 0; i--) 2104 fVecs[i] = fVecs[i - 1]; 2105 2106 fOffset = page->cache_offset; 2107 vectorIndex = 0; 2108 } else 2109 vectorIndex = fVecCount; 2110 2111 fVecs[vectorIndex].base 2112 = (phys_addr_t)page->physical_page_number << PAGE_SHIFT; 2113 fVecs[vectorIndex].length = B_PAGE_SIZE; 2114 2115 fVecCount++; 2116 fPageCount++; 2117 return true; 2118 } 2119 2120 return false; 2121 } 2122 2123 2124 status_t 2125 PageWriteTransfer::Schedule(uint32 flags) 2126 { 2127 off_t writeOffset = (off_t)fOffset << PAGE_SHIFT; 2128 generic_size_t writeLength = (phys_size_t)fPageCount << PAGE_SHIFT; 2129 2130 if (fRun != NULL) { 2131 return fCache->WriteAsync(writeOffset, fVecs, fVecCount, writeLength, 2132 flags | B_PHYSICAL_IO_REQUEST, this); 2133 } 2134 2135 status_t status = fCache->Write(writeOffset, fVecs, fVecCount, 2136 flags | B_PHYSICAL_IO_REQUEST, &writeLength); 2137 2138 SetStatus(status, writeLength); 2139 return fStatus; 2140 } 2141 2142 2143 void 2144 PageWriteTransfer::SetStatus(status_t status, size_t transferred) 2145 { 2146 // only succeed if all pages up to the last one have been written fully 2147 // and the last page has at least been written partially 2148 if (status == B_OK && transferred <= (fPageCount - 1) * B_PAGE_SIZE) 2149 status = B_ERROR; 2150 2151 fStatus = status; 2152 } 2153 2154 2155 void 2156 PageWriteTransfer::IOFinished(status_t status, bool partialTransfer, 2157 generic_size_t bytesTransferred) 2158 { 2159 SetStatus(status, bytesTransferred); 2160 fRun->PageWritten(this, fStatus, partialTransfer, bytesTransferred); 2161 } 2162 2163 2164 status_t 2165 PageWriterRun::Init(uint32 maxPages) 2166 { 2167 fMaxPages = maxPages; 2168 fWrapperCount = 0; 2169 fTransferCount = 0; 2170 fPendingTransfers = 0; 2171 2172 fWrappers = new(std::nothrow) PageWriteWrapper[maxPages]; 2173 fTransfers = new(std::nothrow) PageWriteTransfer[maxPages]; 2174 if (fWrappers == NULL || fTransfers == NULL) 2175 return B_NO_MEMORY; 2176 2177 return B_OK; 2178 } 2179 2180 2181 void 2182 PageWriterRun::PrepareNextRun() 2183 { 2184 fWrapperCount = 0; 2185 fTransferCount = 0; 2186 fPendingTransfers = 0; 2187 } 2188 2189 2190 /*! The page's cache must be locked. 2191 */ 2192 void 2193 PageWriterRun::AddPage(vm_page* page) 2194 { 2195 fWrappers[fWrapperCount++].SetTo(page); 2196 2197 if (fTransferCount == 0 || !fTransfers[fTransferCount - 1].AddPage(page)) { 2198 fTransfers[fTransferCount++].SetTo(this, page, 2199 page->Cache()->MaxPagesPerAsyncWrite()); 2200 } 2201 } 2202 2203 2204 /*! Writes all pages previously added. 2205 \return The number of pages that could not be written or otherwise handled. 2206 */ 2207 uint32 2208 PageWriterRun::Go() 2209 { 2210 fPendingTransfers = fTransferCount; 2211 2212 fAllFinishedCondition.Init(this, "page writer wait for I/O"); 2213 ConditionVariableEntry waitEntry; 2214 fAllFinishedCondition.Add(&waitEntry); 2215 2216 // schedule writes 2217 for (uint32 i = 0; i < fTransferCount; i++) 2218 fTransfers[i].Schedule(B_VIP_IO_REQUEST); 2219 2220 // wait until all pages have been written 2221 waitEntry.Wait(); 2222 2223 // mark pages depending on whether they could be written or not 2224 2225 uint32 failedPages = 0; 2226 uint32 wrapperIndex = 0; 2227 for (uint32 i = 0; i < fTransferCount; i++) { 2228 PageWriteTransfer& transfer = fTransfers[i]; 2229 transfer.Cache()->Lock(); 2230 2231 for (uint32 j = 0; j < transfer.PageCount(); j++) { 2232 if (!fWrappers[wrapperIndex++].Done(transfer.Status())) 2233 failedPages++; 2234 } 2235 2236 transfer.Cache()->Unlock(); 2237 } 2238 2239 ASSERT(wrapperIndex == fWrapperCount); 2240 2241 for (uint32 i = 0; i < fTransferCount; i++) { 2242 PageWriteTransfer& transfer = fTransfers[i]; 2243 struct VMCache* cache = transfer.Cache(); 2244 2245 // We've acquired a references for each page 2246 for (uint32 j = 0; j < transfer.PageCount(); j++) { 2247 // We release the cache references after all pages were made 2248 // unbusy again - otherwise releasing a vnode could deadlock. 2249 cache->ReleaseStoreRef(); 2250 cache->ReleaseRef(); 2251 } 2252 } 2253 2254 return failedPages; 2255 } 2256 2257 2258 void 2259 PageWriterRun::PageWritten(PageWriteTransfer* transfer, status_t status, 2260 bool partialTransfer, size_t bytesTransferred) 2261 { 2262 if (atomic_add(&fPendingTransfers, -1) == 1) 2263 fAllFinishedCondition.NotifyAll(); 2264 } 2265 2266 2267 /*! The page writer continuously takes some pages from the modified 2268 queue, writes them back, and moves them back to the active queue. 2269 It runs in its own thread, and is only there to keep the number 2270 of modified pages low, so that more pages can be reused with 2271 fewer costs. 2272 */ 2273 status_t 2274 page_writer(void* /*unused*/) 2275 { 2276 const uint32 kNumPages = 256; 2277 #ifdef TRACE_VM_PAGE 2278 uint32 writtenPages = 0; 2279 bigtime_t lastWrittenTime = 0; 2280 bigtime_t pageCollectionTime = 0; 2281 bigtime_t pageWritingTime = 0; 2282 #endif 2283 2284 PageWriterRun run; 2285 if (run.Init(kNumPages) != B_OK) { 2286 panic("page writer: Failed to init PageWriterRun!"); 2287 return B_ERROR; 2288 } 2289 2290 page_num_t pagesSinceLastSuccessfulWrite = 0; 2291 2292 while (true) { 2293 // TODO: Maybe wait shorter when memory is low! 2294 if (sModifiedPageQueue.Count() < kNumPages) { 2295 sPageWriterCondition.Wait(3000000, true); 2296 // all 3 seconds when no one triggers us 2297 } 2298 2299 page_num_t modifiedPages = sModifiedPageQueue.Count(); 2300 if (modifiedPages == 0) 2301 continue; 2302 2303 if (modifiedPages <= pagesSinceLastSuccessfulWrite) { 2304 // We ran through the whole queue without being able to write a 2305 // single page. Take a break. 2306 snooze(500000); 2307 pagesSinceLastSuccessfulWrite = 0; 2308 } 2309 2310 #if ENABLE_SWAP_SUPPORT 2311 page_stats pageStats; 2312 get_page_stats(pageStats); 2313 bool activePaging = do_active_paging(pageStats); 2314 #endif 2315 2316 // depending on how urgent it becomes to get pages to disk, we adjust 2317 // our I/O priority 2318 uint32 lowPagesState = low_resource_state(B_KERNEL_RESOURCE_PAGES); 2319 int32 ioPriority = B_IDLE_PRIORITY; 2320 if (lowPagesState >= B_LOW_RESOURCE_CRITICAL 2321 || modifiedPages > MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD) { 2322 ioPriority = MAX_PAGE_WRITER_IO_PRIORITY; 2323 } else { 2324 ioPriority = (uint64)MAX_PAGE_WRITER_IO_PRIORITY * modifiedPages 2325 / MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD; 2326 } 2327 2328 thread_set_io_priority(ioPriority); 2329 2330 uint32 numPages = 0; 2331 run.PrepareNextRun(); 2332 2333 // TODO: make this laptop friendly, too (ie. only start doing 2334 // something if someone else did something or there is really 2335 // enough to do). 2336 2337 // collect pages to be written 2338 #ifdef TRACE_VM_PAGE 2339 pageCollectionTime -= system_time(); 2340 #endif 2341 2342 page_num_t maxPagesToSee = modifiedPages; 2343 2344 while (numPages < kNumPages && maxPagesToSee > 0) { 2345 vm_page *page = next_modified_page(maxPagesToSee); 2346 if (page == NULL) 2347 break; 2348 2349 PageCacheLocker cacheLocker(page, false); 2350 if (!cacheLocker.IsLocked()) 2351 continue; 2352 2353 VMCache *cache = page->Cache(); 2354 2355 // If the page is busy or its state has changed while we were 2356 // locking the cache, just ignore it. 2357 if (page->busy || page->State() != PAGE_STATE_MODIFIED) 2358 continue; 2359 2360 DEBUG_PAGE_ACCESS_START(page); 2361 2362 // Don't write back wired (locked) pages. 2363 if (page->WiredCount() > 0) { 2364 set_page_state(page, PAGE_STATE_ACTIVE); 2365 DEBUG_PAGE_ACCESS_END(page); 2366 continue; 2367 } 2368 2369 // Write back temporary pages only when we're actively paging. 2370 if (cache->temporary 2371 #if ENABLE_SWAP_SUPPORT 2372 && (!activePaging 2373 || !cache->CanWritePage( 2374 (off_t)page->cache_offset << PAGE_SHIFT)) 2375 #endif 2376 ) { 2377 // We can't/don't want to do anything with this page, so move it 2378 // to one of the other queues. 2379 if (page->mappings.IsEmpty()) 2380 set_page_state(page, PAGE_STATE_INACTIVE); 2381 else 2382 set_page_state(page, PAGE_STATE_ACTIVE); 2383 2384 DEBUG_PAGE_ACCESS_END(page); 2385 continue; 2386 } 2387 2388 // We need our own reference to the store, as it might currently be 2389 // destroyed. 2390 if (cache->AcquireUnreferencedStoreRef() != B_OK) { 2391 DEBUG_PAGE_ACCESS_END(page); 2392 cacheLocker.Unlock(); 2393 thread_yield(true); 2394 continue; 2395 } 2396 2397 run.AddPage(page); 2398 // TODO: We're possibly adding pages of different caches and 2399 // thus maybe of different underlying file systems here. This 2400 // is a potential problem for loop file systems/devices, since 2401 // we could mark a page busy that would need to be accessed 2402 // when writing back another page, thus causing a deadlock. 2403 2404 DEBUG_PAGE_ACCESS_END(page); 2405 2406 //dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count); 2407 TPW(WritePage(page)); 2408 2409 cache->AcquireRefLocked(); 2410 numPages++; 2411 } 2412 2413 #ifdef TRACE_VM_PAGE 2414 pageCollectionTime += system_time(); 2415 #endif 2416 if (numPages == 0) 2417 continue; 2418 2419 // write pages to disk and do all the cleanup 2420 #ifdef TRACE_VM_PAGE 2421 pageWritingTime -= system_time(); 2422 #endif 2423 uint32 failedPages = run.Go(); 2424 #ifdef TRACE_VM_PAGE 2425 pageWritingTime += system_time(); 2426 2427 // debug output only... 2428 writtenPages += numPages; 2429 if (writtenPages >= 1024) { 2430 bigtime_t now = system_time(); 2431 TRACE(("page writer: wrote 1024 pages (total: %" B_PRIu64 " ms, " 2432 "collect: %" B_PRIu64 " ms, write: %" B_PRIu64 " ms)\n", 2433 (now - lastWrittenTime) / 1000, 2434 pageCollectionTime / 1000, pageWritingTime / 1000)); 2435 lastWrittenTime = now; 2436 2437 writtenPages -= 1024; 2438 pageCollectionTime = 0; 2439 pageWritingTime = 0; 2440 } 2441 #endif 2442 2443 if (failedPages == numPages) 2444 pagesSinceLastSuccessfulWrite += modifiedPages - maxPagesToSee; 2445 else 2446 pagesSinceLastSuccessfulWrite = 0; 2447 } 2448 2449 return B_OK; 2450 } 2451 2452 2453 // #pragma mark - 2454 2455 2456 // TODO: This should be done in the page daemon! 2457 #if 0 2458 #if ENABLE_SWAP_SUPPORT 2459 static bool 2460 free_page_swap_space(int32 index) 2461 { 2462 vm_page *page = vm_page_at_index(index); 2463 PageCacheLocker locker(page); 2464 if (!locker.IsLocked()) 2465 return false; 2466 2467 DEBUG_PAGE_ACCESS_START(page); 2468 2469 VMCache* cache = page->Cache(); 2470 if (cache->temporary && page->WiredCount() == 0 2471 && cache->HasPage(page->cache_offset << PAGE_SHIFT) 2472 && page->usage_count > 0) { 2473 // TODO: how to judge a page is highly active? 2474 if (swap_free_page_swap_space(page)) { 2475 // We need to mark the page modified, since otherwise it could be 2476 // stolen and we'd lose its data. 2477 vm_page_set_state(page, PAGE_STATE_MODIFIED); 2478 TD(FreedPageSwap(page)); 2479 DEBUG_PAGE_ACCESS_END(page); 2480 return true; 2481 } 2482 } 2483 DEBUG_PAGE_ACCESS_END(page); 2484 return false; 2485 } 2486 #endif 2487 #endif // 0 2488 2489 2490 static vm_page * 2491 find_cached_page_candidate(struct vm_page &marker) 2492 { 2493 DEBUG_PAGE_ACCESS_CHECK(&marker); 2494 2495 InterruptsSpinLocker locker(sCachedPageQueue.GetLock()); 2496 vm_page *page; 2497 2498 if (marker.State() == PAGE_STATE_UNUSED) { 2499 // Get the first free pages of the (in)active queue 2500 page = sCachedPageQueue.Head(); 2501 } else { 2502 // Get the next page of the current queue 2503 if (marker.State() != PAGE_STATE_CACHED) { 2504 panic("invalid marker %p state", &marker); 2505 return NULL; 2506 } 2507 2508 page = sCachedPageQueue.Next(&marker); 2509 sCachedPageQueue.Remove(&marker); 2510 marker.SetState(PAGE_STATE_UNUSED); 2511 } 2512 2513 while (page != NULL) { 2514 if (!page->busy) { 2515 // we found a candidate, insert marker 2516 marker.SetState(PAGE_STATE_CACHED); 2517 sCachedPageQueue.InsertAfter(page, &marker); 2518 return page; 2519 } 2520 2521 page = sCachedPageQueue.Next(page); 2522 } 2523 2524 return NULL; 2525 } 2526 2527 2528 static bool 2529 free_cached_page(vm_page *page, bool dontWait) 2530 { 2531 // try to lock the page's cache 2532 if (vm_cache_acquire_locked_page_cache(page, dontWait) == NULL) 2533 return false; 2534 VMCache* cache = page->Cache(); 2535 2536 AutoLocker<VMCache> cacheLocker(cache, true); 2537 MethodDeleter<VMCache> _2(cache, &VMCache::ReleaseRefLocked); 2538 2539 // check again if that page is still a candidate 2540 if (page->busy || page->State() != PAGE_STATE_CACHED) 2541 return false; 2542 2543 DEBUG_PAGE_ACCESS_START(page); 2544 2545 PAGE_ASSERT(page, !page->IsMapped()); 2546 PAGE_ASSERT(page, !page->modified); 2547 2548 // we can now steal this page 2549 2550 cache->RemovePage(page); 2551 // Now the page doesn't have cache anymore, so no one else (e.g. 2552 // vm_page_allocate_page_run() can pick it up), since they would be 2553 // required to lock the cache first, which would fail. 2554 2555 sCachedPageQueue.RemoveUnlocked(page); 2556 return true; 2557 } 2558 2559 2560 static uint32 2561 free_cached_pages(uint32 pagesToFree, bool dontWait) 2562 { 2563 vm_page marker; 2564 init_page_marker(marker); 2565 2566 uint32 pagesFreed = 0; 2567 2568 while (pagesFreed < pagesToFree) { 2569 vm_page *page = find_cached_page_candidate(marker); 2570 if (page == NULL) 2571 break; 2572 2573 if (free_cached_page(page, dontWait)) { 2574 ReadLocker locker(sFreePageQueuesLock); 2575 page->SetState(PAGE_STATE_FREE); 2576 DEBUG_PAGE_ACCESS_END(page); 2577 sFreePageQueue.PrependUnlocked(page); 2578 locker.Unlock(); 2579 2580 TA(StolenPage()); 2581 2582 pagesFreed++; 2583 } 2584 } 2585 2586 remove_page_marker(marker); 2587 2588 return pagesFreed; 2589 } 2590 2591 2592 static void 2593 idle_scan_active_pages(page_stats& pageStats) 2594 { 2595 VMPageQueue& queue = sActivePageQueue; 2596 2597 // We want to scan the whole queue in roughly kIdleRunsForFullQueue runs. 2598 uint32 maxToScan = queue.Count() / kIdleRunsForFullQueue + 1; 2599 2600 while (maxToScan > 0) { 2601 maxToScan--; 2602 2603 // Get the next page. Note that we don't bother to lock here. We go with 2604 // the assumption that on all architectures reading/writing pointers is 2605 // atomic. Beyond that it doesn't really matter. We have to unlock the 2606 // queue anyway to lock the page's cache, and we'll recheck afterwards. 2607 vm_page* page = queue.Head(); 2608 if (page == NULL) 2609 break; 2610 2611 // lock the page's cache 2612 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2613 if (cache == NULL) 2614 continue; 2615 2616 if (page->State() != PAGE_STATE_ACTIVE) { 2617 // page is no longer in the cache or in this queue 2618 cache->ReleaseRefAndUnlock(); 2619 continue; 2620 } 2621 2622 if (page->busy) { 2623 // page is busy -- requeue at the end 2624 vm_page_requeue(page, true); 2625 cache->ReleaseRefAndUnlock(); 2626 continue; 2627 } 2628 2629 DEBUG_PAGE_ACCESS_START(page); 2630 2631 // Get the page active/modified flags and update the page's usage count. 2632 // We completely unmap inactive temporary pages. This saves us to 2633 // iterate through the inactive list as well, since we'll be notified 2634 // via page fault whenever such an inactive page is used again. 2635 // We don't remove the mappings of non-temporary pages, since we 2636 // wouldn't notice when those would become unused and could thus be 2637 // moved to the cached list. 2638 int32 usageCount; 2639 if (page->WiredCount() > 0 || page->usage_count > 0 2640 || !cache->temporary) { 2641 usageCount = vm_clear_page_mapping_accessed_flags(page); 2642 } else 2643 usageCount = vm_remove_all_page_mappings_if_unaccessed(page); 2644 2645 if (usageCount > 0) { 2646 usageCount += page->usage_count + kPageUsageAdvance; 2647 if (usageCount > kPageUsageMax) 2648 usageCount = kPageUsageMax; 2649 // TODO: This would probably also be the place to reclaim swap space. 2650 } else { 2651 usageCount += page->usage_count - (int32)kPageUsageDecline; 2652 if (usageCount < 0) { 2653 usageCount = 0; 2654 set_page_state(page, PAGE_STATE_INACTIVE); 2655 } 2656 } 2657 2658 page->usage_count = usageCount; 2659 2660 DEBUG_PAGE_ACCESS_END(page); 2661 2662 cache->ReleaseRefAndUnlock(); 2663 } 2664 } 2665 2666 2667 static void 2668 full_scan_inactive_pages(page_stats& pageStats, int32 despairLevel) 2669 { 2670 int32 pagesToFree = pageStats.unsatisfiedReservations 2671 + sFreeOrCachedPagesTarget 2672 - (pageStats.totalFreePages + pageStats.cachedPages); 2673 if (pagesToFree <= 0) 2674 return; 2675 2676 bigtime_t time = system_time(); 2677 uint32 pagesScanned = 0; 2678 uint32 pagesToCached = 0; 2679 uint32 pagesToModified = 0; 2680 uint32 pagesToActive = 0; 2681 2682 // Determine how many pages at maximum to send to the modified queue. Since 2683 // it is relatively expensive to page out pages, we do that on a grander 2684 // scale only when things get desperate. 2685 uint32 maxToFlush = despairLevel <= 1 ? 32 : 10000; 2686 2687 vm_page marker; 2688 init_page_marker(marker); 2689 2690 VMPageQueue& queue = sInactivePageQueue; 2691 InterruptsSpinLocker queueLocker(queue.GetLock()); 2692 uint32 maxToScan = queue.Count(); 2693 2694 vm_page* nextPage = queue.Head(); 2695 2696 while (pagesToFree > 0 && maxToScan > 0) { 2697 maxToScan--; 2698 2699 // get the next page 2700 vm_page* page = nextPage; 2701 if (page == NULL) 2702 break; 2703 nextPage = queue.Next(page); 2704 2705 if (page->busy) 2706 continue; 2707 2708 // mark the position 2709 queue.InsertAfter(page, &marker); 2710 queueLocker.Unlock(); 2711 2712 // lock the page's cache 2713 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2714 if (cache == NULL || page->busy 2715 || page->State() != PAGE_STATE_INACTIVE) { 2716 if (cache != NULL) 2717 cache->ReleaseRefAndUnlock(); 2718 queueLocker.Lock(); 2719 nextPage = queue.Next(&marker); 2720 queue.Remove(&marker); 2721 continue; 2722 } 2723 2724 pagesScanned++; 2725 2726 DEBUG_PAGE_ACCESS_START(page); 2727 2728 // Get the accessed count, clear the accessed/modified flags and 2729 // unmap the page, if it hasn't been accessed. 2730 int32 usageCount; 2731 if (page->WiredCount() > 0) 2732 usageCount = vm_clear_page_mapping_accessed_flags(page); 2733 else 2734 usageCount = vm_remove_all_page_mappings_if_unaccessed(page); 2735 2736 // update usage count 2737 if (usageCount > 0) { 2738 usageCount += page->usage_count + kPageUsageAdvance; 2739 if (usageCount > kPageUsageMax) 2740 usageCount = kPageUsageMax; 2741 } else { 2742 usageCount += page->usage_count - (int32)kPageUsageDecline; 2743 if (usageCount < 0) 2744 usageCount = 0; 2745 } 2746 2747 page->usage_count = usageCount; 2748 2749 // Move to fitting queue or requeue: 2750 // * Active mapped pages go to the active queue. 2751 // * Inactive mapped (i.e. wired) pages are requeued. 2752 // * The remaining pages are cachable. Thus, if unmodified they go to 2753 // the cached queue, otherwise to the modified queue (up to a limit). 2754 // Note that until in the idle scanning we don't exempt pages of 2755 // temporary caches. Apparently we really need memory, so we better 2756 // page out memory as well. 2757 bool isMapped = page->IsMapped(); 2758 if (usageCount > 0) { 2759 if (isMapped) { 2760 set_page_state(page, PAGE_STATE_ACTIVE); 2761 pagesToActive++; 2762 } else 2763 vm_page_requeue(page, true); 2764 } else if (isMapped) { 2765 vm_page_requeue(page, true); 2766 } else if (!page->modified) { 2767 set_page_state(page, PAGE_STATE_CACHED); 2768 pagesToFree--; 2769 pagesToCached++; 2770 } else if (maxToFlush > 0) { 2771 set_page_state(page, PAGE_STATE_MODIFIED); 2772 maxToFlush--; 2773 pagesToModified++; 2774 } else 2775 vm_page_requeue(page, true); 2776 2777 DEBUG_PAGE_ACCESS_END(page); 2778 2779 cache->ReleaseRefAndUnlock(); 2780 2781 // remove the marker 2782 queueLocker.Lock(); 2783 nextPage = queue.Next(&marker); 2784 queue.Remove(&marker); 2785 } 2786 2787 queueLocker.Unlock(); 2788 2789 time = system_time() - time; 2790 TRACE_DAEMON(" -> inactive scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32 2791 ", moved: %" B_PRIu32 " -> cached, %" B_PRIu32 " -> modified, %" 2792 B_PRIu32 " -> active\n", time, pagesScanned, pagesToCached, 2793 pagesToModified, pagesToActive); 2794 2795 // wake up the page writer, if we tossed it some pages 2796 if (pagesToModified > 0) 2797 sPageWriterCondition.WakeUp(); 2798 } 2799 2800 2801 static void 2802 full_scan_active_pages(page_stats& pageStats, int32 despairLevel) 2803 { 2804 vm_page marker; 2805 init_page_marker(marker); 2806 2807 VMPageQueue& queue = sActivePageQueue; 2808 InterruptsSpinLocker queueLocker(queue.GetLock()); 2809 uint32 maxToScan = queue.Count(); 2810 2811 int32 pagesToDeactivate = pageStats.unsatisfiedReservations 2812 + sFreeOrCachedPagesTarget 2813 - (pageStats.totalFreePages + pageStats.cachedPages) 2814 + std::max((int32)sInactivePagesTarget - (int32)maxToScan, (int32)0); 2815 if (pagesToDeactivate <= 0) 2816 return; 2817 2818 bigtime_t time = system_time(); 2819 uint32 pagesAccessed = 0; 2820 uint32 pagesToInactive = 0; 2821 uint32 pagesScanned = 0; 2822 2823 vm_page* nextPage = queue.Head(); 2824 2825 while (pagesToDeactivate > 0 && maxToScan > 0) { 2826 maxToScan--; 2827 2828 // get the next page 2829 vm_page* page = nextPage; 2830 if (page == NULL) 2831 break; 2832 nextPage = queue.Next(page); 2833 2834 if (page->busy) 2835 continue; 2836 2837 // mark the position 2838 queue.InsertAfter(page, &marker); 2839 queueLocker.Unlock(); 2840 2841 // lock the page's cache 2842 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2843 if (cache == NULL || page->busy || page->State() != PAGE_STATE_ACTIVE) { 2844 if (cache != NULL) 2845 cache->ReleaseRefAndUnlock(); 2846 queueLocker.Lock(); 2847 nextPage = queue.Next(&marker); 2848 queue.Remove(&marker); 2849 continue; 2850 } 2851 2852 pagesScanned++; 2853 2854 DEBUG_PAGE_ACCESS_START(page); 2855 2856 // Get the page active/modified flags and update the page's usage count. 2857 int32 usageCount = vm_clear_page_mapping_accessed_flags(page); 2858 2859 if (usageCount > 0) { 2860 usageCount += page->usage_count + kPageUsageAdvance; 2861 if (usageCount > kPageUsageMax) 2862 usageCount = kPageUsageMax; 2863 pagesAccessed++; 2864 // TODO: This would probably also be the place to reclaim swap space. 2865 } else { 2866 usageCount += page->usage_count - (int32)kPageUsageDecline; 2867 if (usageCount <= 0) { 2868 usageCount = 0; 2869 set_page_state(page, PAGE_STATE_INACTIVE); 2870 pagesToInactive++; 2871 } 2872 } 2873 2874 page->usage_count = usageCount; 2875 2876 DEBUG_PAGE_ACCESS_END(page); 2877 2878 cache->ReleaseRefAndUnlock(); 2879 2880 // remove the marker 2881 queueLocker.Lock(); 2882 nextPage = queue.Next(&marker); 2883 queue.Remove(&marker); 2884 } 2885 2886 time = system_time() - time; 2887 TRACE_DAEMON(" -> active scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32 2888 ", moved: %" B_PRIu32 " -> inactive, encountered %" B_PRIu32 " accessed" 2889 " ones\n", time, pagesScanned, pagesToInactive, pagesAccessed); 2890 } 2891 2892 2893 static void 2894 page_daemon_idle_scan(page_stats& pageStats) 2895 { 2896 TRACE_DAEMON("page daemon: idle run\n"); 2897 2898 if (pageStats.totalFreePages < (int32)sFreePagesTarget) { 2899 // We want more actually free pages, so free some from the cached 2900 // ones. 2901 uint32 freed = free_cached_pages( 2902 sFreePagesTarget - pageStats.totalFreePages, false); 2903 if (freed > 0) 2904 unreserve_pages(freed); 2905 get_page_stats(pageStats); 2906 } 2907 2908 // Walk the active list and move pages to the inactive queue. 2909 get_page_stats(pageStats); 2910 idle_scan_active_pages(pageStats); 2911 } 2912 2913 2914 static void 2915 page_daemon_full_scan(page_stats& pageStats, int32 despairLevel) 2916 { 2917 TRACE_DAEMON("page daemon: full run: free: %" B_PRIu32 ", cached: %" 2918 B_PRIu32 ", to free: %" B_PRIu32 "\n", pageStats.totalFreePages, 2919 pageStats.cachedPages, pageStats.unsatisfiedReservations 2920 + sFreeOrCachedPagesTarget 2921 - (pageStats.totalFreePages + pageStats.cachedPages)); 2922 2923 // Walk the inactive list and transfer pages to the cached and modified 2924 // queues. 2925 full_scan_inactive_pages(pageStats, despairLevel); 2926 2927 // Free cached pages. Also wake up reservation waiters. 2928 get_page_stats(pageStats); 2929 int32 pagesToFree = pageStats.unsatisfiedReservations + sFreePagesTarget 2930 - (pageStats.totalFreePages); 2931 if (pagesToFree > 0) { 2932 uint32 freed = free_cached_pages(pagesToFree, true); 2933 if (freed > 0) 2934 unreserve_pages(freed); 2935 } 2936 2937 // Walk the active list and move pages to the inactive queue. 2938 get_page_stats(pageStats); 2939 full_scan_active_pages(pageStats, despairLevel); 2940 } 2941 2942 2943 static status_t 2944 page_daemon(void* /*unused*/) 2945 { 2946 int32 despairLevel = 0; 2947 2948 while (true) { 2949 sPageDaemonCondition.ClearActivated(); 2950 2951 // evaluate the free pages situation 2952 page_stats pageStats; 2953 get_page_stats(pageStats); 2954 2955 if (!do_active_paging(pageStats)) { 2956 // Things look good -- just maintain statistics and keep the pool 2957 // of actually free pages full enough. 2958 despairLevel = 0; 2959 page_daemon_idle_scan(pageStats); 2960 sPageDaemonCondition.Wait(kIdleScanWaitInterval, false); 2961 } else { 2962 // Not enough free pages. We need to do some real work. 2963 despairLevel = std::max(despairLevel + 1, (int32)3); 2964 page_daemon_full_scan(pageStats, despairLevel); 2965 2966 // Don't wait after the first full scan, but rather immediately 2967 // check whether we were successful in freeing enough pages and 2968 // re-run with increased despair level. The first scan is 2969 // conservative with respect to moving inactive modified pages to 2970 // the modified list to avoid thrashing. The second scan, however, 2971 // will not hold back. 2972 if (despairLevel > 1) 2973 snooze(kBusyScanWaitInterval); 2974 } 2975 } 2976 2977 return B_OK; 2978 } 2979 2980 2981 /*! Returns how many pages could *not* be reserved. 2982 */ 2983 static uint32 2984 reserve_pages(uint32 count, int priority, bool dontWait) 2985 { 2986 int32 dontTouch = kPageReserveForPriority[priority]; 2987 2988 while (true) { 2989 count -= reserve_some_pages(count, dontTouch); 2990 if (count == 0) 2991 return 0; 2992 2993 if (sUnsatisfiedPageReservations == 0) { 2994 count -= free_cached_pages(count, dontWait); 2995 if (count == 0) 2996 return count; 2997 } 2998 2999 if (dontWait) 3000 return count; 3001 3002 // we need to wait for pages to become available 3003 3004 MutexLocker pageDeficitLocker(sPageDeficitLock); 3005 3006 bool notifyDaemon = sUnsatisfiedPageReservations == 0; 3007 sUnsatisfiedPageReservations += count; 3008 3009 if (sUnreservedFreePages > dontTouch) { 3010 // the situation changed 3011 sUnsatisfiedPageReservations -= count; 3012 continue; 3013 } 3014 3015 PageReservationWaiter waiter; 3016 waiter.dontTouch = dontTouch; 3017 waiter.missing = count; 3018 waiter.thread = thread_get_current_thread(); 3019 waiter.threadPriority = waiter.thread->priority; 3020 3021 // insert ordered (i.e. after all waiters with higher or equal priority) 3022 PageReservationWaiter* otherWaiter = NULL; 3023 for (PageReservationWaiterList::Iterator it 3024 = sPageReservationWaiters.GetIterator(); 3025 (otherWaiter = it.Next()) != NULL;) { 3026 if (waiter < *otherWaiter) 3027 break; 3028 } 3029 3030 sPageReservationWaiters.InsertBefore(otherWaiter, &waiter); 3031 3032 thread_prepare_to_block(waiter.thread, 0, THREAD_BLOCK_TYPE_OTHER, 3033 "waiting for pages"); 3034 3035 if (notifyDaemon) 3036 sPageDaemonCondition.WakeUp(); 3037 3038 pageDeficitLocker.Unlock(); 3039 3040 low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0); 3041 thread_block(); 3042 3043 pageDeficitLocker.Lock(); 3044 3045 return 0; 3046 } 3047 } 3048 3049 3050 // #pragma mark - private kernel API 3051 3052 3053 /*! Writes a range of modified pages of a cache to disk. 3054 You need to hold the VMCache lock when calling this function. 3055 Note that the cache lock is released in this function. 3056 \param cache The cache. 3057 \param firstPage Offset (in page size units) of the first page in the range. 3058 \param endPage End offset (in page size units) of the page range. The page 3059 at this offset is not included. 3060 */ 3061 status_t 3062 vm_page_write_modified_page_range(struct VMCache* cache, uint32 firstPage, 3063 uint32 endPage) 3064 { 3065 static const int32 kMaxPages = 256; 3066 int32 maxPages = cache->MaxPagesPerWrite(); 3067 if (maxPages < 0 || maxPages > kMaxPages) 3068 maxPages = kMaxPages; 3069 3070 const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 3071 | HEAP_DONT_LOCK_KERNEL_SPACE; 3072 3073 PageWriteWrapper stackWrappersPool[2]; 3074 PageWriteWrapper* stackWrappers[1]; 3075 PageWriteWrapper* wrapperPool 3076 = new(malloc_flags(allocationFlags)) PageWriteWrapper[maxPages + 1]; 3077 PageWriteWrapper** wrappers 3078 = new(malloc_flags(allocationFlags)) PageWriteWrapper*[maxPages]; 3079 if (wrapperPool == NULL || wrappers == NULL) { 3080 // don't fail, just limit our capabilities 3081 free(wrapperPool); 3082 free(wrappers); 3083 wrapperPool = stackWrappersPool; 3084 wrappers = stackWrappers; 3085 maxPages = 1; 3086 } 3087 3088 int32 nextWrapper = 0; 3089 int32 usedWrappers = 0; 3090 3091 PageWriteTransfer transfer; 3092 bool transferEmpty = true; 3093 3094 VMCachePagesTree::Iterator it 3095 = cache->pages.GetIterator(firstPage, true, true); 3096 3097 while (true) { 3098 vm_page* page = it.Next(); 3099 if (page == NULL || page->cache_offset >= endPage) { 3100 if (transferEmpty) 3101 break; 3102 3103 page = NULL; 3104 } 3105 3106 if (page != NULL) { 3107 if (page->busy 3108 || (page->State() != PAGE_STATE_MODIFIED 3109 && !vm_test_map_modification(page))) { 3110 page = NULL; 3111 } 3112 } 3113 3114 PageWriteWrapper* wrapper = NULL; 3115 if (page != NULL) { 3116 wrapper = &wrapperPool[nextWrapper++]; 3117 if (nextWrapper > maxPages) 3118 nextWrapper = 0; 3119 3120 DEBUG_PAGE_ACCESS_START(page); 3121 3122 wrapper->SetTo(page); 3123 3124 if (transferEmpty || transfer.AddPage(page)) { 3125 if (transferEmpty) { 3126 transfer.SetTo(NULL, page, maxPages); 3127 transferEmpty = false; 3128 } 3129 3130 DEBUG_PAGE_ACCESS_END(page); 3131 3132 wrappers[usedWrappers++] = wrapper; 3133 continue; 3134 } 3135 3136 DEBUG_PAGE_ACCESS_END(page); 3137 } 3138 3139 if (transferEmpty) 3140 continue; 3141 3142 cache->Unlock(); 3143 status_t status = transfer.Schedule(0); 3144 cache->Lock(); 3145 3146 for (int32 i = 0; i < usedWrappers; i++) 3147 wrappers[i]->Done(status); 3148 3149 usedWrappers = 0; 3150 3151 if (page != NULL) { 3152 transfer.SetTo(NULL, page, maxPages); 3153 wrappers[usedWrappers++] = wrapper; 3154 } else 3155 transferEmpty = true; 3156 } 3157 3158 if (wrapperPool != stackWrappersPool) { 3159 delete[] wrapperPool; 3160 delete[] wrappers; 3161 } 3162 3163 return B_OK; 3164 } 3165 3166 3167 /*! You need to hold the VMCache lock when calling this function. 3168 Note that the cache lock is released in this function. 3169 */ 3170 status_t 3171 vm_page_write_modified_pages(VMCache *cache) 3172 { 3173 return vm_page_write_modified_page_range(cache, 0, 3174 (cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 3175 } 3176 3177 3178 /*! Schedules the page writer to write back the specified \a page. 3179 Note, however, that it might not do this immediately, and it can well 3180 take several seconds until the page is actually written out. 3181 */ 3182 void 3183 vm_page_schedule_write_page(vm_page *page) 3184 { 3185 PAGE_ASSERT(page, page->State() == PAGE_STATE_MODIFIED); 3186 3187 vm_page_requeue(page, false); 3188 3189 sPageWriterCondition.WakeUp(); 3190 } 3191 3192 3193 /*! Cache must be locked. 3194 */ 3195 void 3196 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage, 3197 uint32 endPage) 3198 { 3199 uint32 modified = 0; 3200 for (VMCachePagesTree::Iterator it 3201 = cache->pages.GetIterator(firstPage, true, true); 3202 vm_page *page = it.Next();) { 3203 if (page->cache_offset >= endPage) 3204 break; 3205 3206 if (!page->busy && page->State() == PAGE_STATE_MODIFIED) { 3207 DEBUG_PAGE_ACCESS_START(page); 3208 vm_page_requeue(page, false); 3209 modified++; 3210 DEBUG_PAGE_ACCESS_END(page); 3211 } 3212 } 3213 3214 if (modified > 0) 3215 sPageWriterCondition.WakeUp(); 3216 } 3217 3218 3219 void 3220 vm_page_init_num_pages(kernel_args *args) 3221 { 3222 // calculate the size of memory by looking at the physical_memory_range array 3223 sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE; 3224 page_num_t physicalPagesEnd = sPhysicalPageOffset 3225 + args->physical_memory_range[0].size / B_PAGE_SIZE; 3226 3227 sNonExistingPages = 0; 3228 sIgnoredPages = args->ignored_physical_memory / B_PAGE_SIZE; 3229 3230 for (uint32 i = 1; i < args->num_physical_memory_ranges; i++) { 3231 page_num_t start = args->physical_memory_range[i].start / B_PAGE_SIZE; 3232 if (start > physicalPagesEnd) 3233 sNonExistingPages += start - physicalPagesEnd; 3234 physicalPagesEnd = start 3235 + args->physical_memory_range[i].size / B_PAGE_SIZE; 3236 3237 #ifdef LIMIT_AVAILABLE_MEMORY 3238 page_num_t available 3239 = physicalPagesEnd - sPhysicalPageOffset - sNonExistingPages; 3240 if (available > LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE)) { 3241 physicalPagesEnd = sPhysicalPageOffset + sNonExistingPages 3242 + LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE); 3243 break; 3244 } 3245 #endif 3246 } 3247 3248 TRACE(("first phys page = %#" B_PRIxPHYSADDR ", end %#" B_PRIxPHYSADDR "\n", 3249 sPhysicalPageOffset, physicalPagesEnd)); 3250 3251 sNumPages = physicalPagesEnd - sPhysicalPageOffset; 3252 } 3253 3254 3255 status_t 3256 vm_page_init(kernel_args *args) 3257 { 3258 TRACE(("vm_page_init: entry\n")); 3259 3260 // init page queues 3261 sModifiedPageQueue.Init("modified pages queue"); 3262 sInactivePageQueue.Init("inactive pages queue"); 3263 sActivePageQueue.Init("active pages queue"); 3264 sCachedPageQueue.Init("cached pages queue"); 3265 sFreePageQueue.Init("free pages queue"); 3266 sClearPageQueue.Init("clear pages queue"); 3267 3268 new (&sPageReservationWaiters) PageReservationWaiterList; 3269 3270 // map in the new free page table 3271 sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page), 3272 ~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 3273 3274 TRACE(("vm_init: putting free_page_table @ %p, # ents %" B_PRIuPHYSADDR 3275 " (size %#" B_PRIxPHYSADDR ")\n", sPages, sNumPages, 3276 (phys_addr_t)(sNumPages * sizeof(vm_page)))); 3277 3278 // initialize the free page table 3279 for (uint32 i = 0; i < sNumPages; i++) { 3280 sPages[i].Init(sPhysicalPageOffset + i); 3281 sFreePageQueue.Append(&sPages[i]); 3282 3283 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3284 sPages[i].allocation_tracking_info.Clear(); 3285 #endif 3286 } 3287 3288 sUnreservedFreePages = sNumPages; 3289 3290 TRACE(("initialized table\n")); 3291 3292 // mark the ranges between usable physical memory unused 3293 phys_addr_t previousEnd = 0; 3294 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 3295 phys_addr_t base = args->physical_memory_range[i].start; 3296 phys_size_t size = args->physical_memory_range[i].size; 3297 if (base > previousEnd) { 3298 mark_page_range_in_use(previousEnd / B_PAGE_SIZE, 3299 (base - previousEnd) / B_PAGE_SIZE, false); 3300 } 3301 previousEnd = base + size; 3302 } 3303 3304 // mark the allocated physical page ranges wired 3305 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 3306 mark_page_range_in_use( 3307 args->physical_allocated_range[i].start / B_PAGE_SIZE, 3308 args->physical_allocated_range[i].size / B_PAGE_SIZE, true); 3309 } 3310 3311 // The target of actually free pages. This must be at least the system 3312 // reserve, but should be a few more pages, so we don't have to extract 3313 // a cached page with each allocation. 3314 sFreePagesTarget = VM_PAGE_RESERVE_USER 3315 + std::max((page_num_t)32, (sNumPages - sNonExistingPages) / 1024); 3316 3317 // The target of free + cached and inactive pages. On low-memory machines 3318 // keep things tight. free + cached is the pool of immediately allocatable 3319 // pages. We want a few inactive pages, so when we're actually paging, we 3320 // have a reasonably large set of pages to work with. 3321 if (sUnreservedFreePages < 16 * 1024) { 3322 sFreeOrCachedPagesTarget = sFreePagesTarget + 128; 3323 sInactivePagesTarget = sFreePagesTarget / 3; 3324 } else { 3325 sFreeOrCachedPagesTarget = 2 * sFreePagesTarget; 3326 sInactivePagesTarget = sFreePagesTarget / 2; 3327 } 3328 3329 TRACE(("vm_page_init: exit\n")); 3330 3331 return B_OK; 3332 } 3333 3334 3335 status_t 3336 vm_page_init_post_area(kernel_args *args) 3337 { 3338 void *dummy; 3339 3340 dummy = sPages; 3341 create_area("page structures", &dummy, B_EXACT_ADDRESS, 3342 PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED, 3343 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3344 3345 add_debugger_command("page_stats", &dump_page_stats, 3346 "Dump statistics about page usage"); 3347 add_debugger_command_etc("page", &dump_page, 3348 "Dump page info", 3349 "[ \"-p\" | \"-v\" ] [ \"-m\" ] <address>\n" 3350 "Prints information for the physical page. If neither \"-p\" nor\n" 3351 "\"-v\" are given, the provided address is interpreted as address of\n" 3352 "the vm_page data structure for the page in question. If \"-p\" is\n" 3353 "given, the address is the physical address of the page. If \"-v\" is\n" 3354 "given, the address is interpreted as virtual address in the current\n" 3355 "thread's address space and for the page it is mapped to (if any)\n" 3356 "information are printed. If \"-m\" is specified, the command will\n" 3357 "search all known address spaces for mappings to that page and print\n" 3358 "them.\n", 0); 3359 add_debugger_command("page_queue", &dump_page_queue, "Dump page queue"); 3360 add_debugger_command("find_page", &find_page, 3361 "Find out which queue a page is actually in"); 3362 3363 #ifdef TRACK_PAGE_USAGE_STATS 3364 add_debugger_command_etc("page_usage", &dump_page_usage_stats, 3365 "Dumps statistics about page usage counts", 3366 "\n" 3367 "Dumps statistics about page usage counts.\n", 3368 B_KDEBUG_DONT_PARSE_ARGUMENTS); 3369 #endif 3370 3371 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3372 add_debugger_command_etc("page_allocations_per_caller", 3373 &dump_page_allocations_per_caller, 3374 "Dump current page allocations summed up per caller", 3375 "[ -d <caller> ] [ -r ]\n" 3376 "The current allocations will by summed up by caller (their count)\n" 3377 "printed in decreasing order by count.\n" 3378 "If \"-d\" is given, each allocation for caller <caller> is printed\n" 3379 "including the respective stack trace.\n" 3380 "If \"-r\" is given, the allocation infos are reset after gathering\n" 3381 "the information, so the next command invocation will only show the\n" 3382 "allocations made after the reset.\n", 0); 3383 add_debugger_command_etc("page_allocation_infos", 3384 &dump_page_allocation_infos, 3385 "Dump current page allocations", 3386 "[ --stacktrace ] [ -p <page number> ] [ --team <team ID> ] " 3387 "[ --thread <thread ID> ]\n" 3388 "The current allocations filtered by optional values will be printed.\n" 3389 "The optional \"-p\" page number filters for a specific page,\n" 3390 "with \"--team\" and \"--thread\" allocations by specific teams\n" 3391 "and/or threads can be filtered (these only work if a corresponding\n" 3392 "tracing entry is still available).\n" 3393 "If \"--stacktrace\" is given, then stack traces of the allocation\n" 3394 "callers are printed, where available\n", 0); 3395 #endif 3396 3397 return B_OK; 3398 } 3399 3400 3401 status_t 3402 vm_page_init_post_thread(kernel_args *args) 3403 { 3404 new (&sFreePageCondition) ConditionVariable; 3405 sFreePageCondition.Publish(&sFreePageQueue, "free page"); 3406 3407 // create a kernel thread to clear out pages 3408 3409 thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber", 3410 B_LOWEST_ACTIVE_PRIORITY, NULL); 3411 resume_thread(thread); 3412 3413 // start page writer 3414 3415 sPageWriterCondition.Init("page writer"); 3416 3417 thread = spawn_kernel_thread(&page_writer, "page writer", 3418 B_NORMAL_PRIORITY + 1, NULL); 3419 resume_thread(thread); 3420 3421 // start page daemon 3422 3423 sPageDaemonCondition.Init("page daemon"); 3424 3425 thread = spawn_kernel_thread(&page_daemon, "page daemon", 3426 B_NORMAL_PRIORITY, NULL); 3427 resume_thread(thread); 3428 3429 return B_OK; 3430 } 3431 3432 3433 status_t 3434 vm_mark_page_inuse(page_num_t page) 3435 { 3436 return vm_mark_page_range_inuse(page, 1); 3437 } 3438 3439 3440 status_t 3441 vm_mark_page_range_inuse(page_num_t startPage, page_num_t length) 3442 { 3443 return mark_page_range_in_use(startPage, length, false); 3444 } 3445 3446 3447 /*! Unreserve pages previously reserved with vm_page_reserve_pages(). 3448 */ 3449 void 3450 vm_page_unreserve_pages(vm_page_reservation* reservation) 3451 { 3452 uint32 count = reservation->count; 3453 reservation->count = 0; 3454 3455 if (count == 0) 3456 return; 3457 3458 TA(UnreservePages(count)); 3459 3460 unreserve_pages(count); 3461 } 3462 3463 3464 /*! With this call, you can reserve a number of free pages in the system. 3465 They will only be handed out to someone who has actually reserved them. 3466 This call returns as soon as the number of requested pages has been 3467 reached. 3468 The caller must not hold any cache lock or the function might deadlock. 3469 */ 3470 void 3471 vm_page_reserve_pages(vm_page_reservation* reservation, uint32 count, 3472 int priority) 3473 { 3474 reservation->count = count; 3475 3476 if (count == 0) 3477 return; 3478 3479 TA(ReservePages(count)); 3480 3481 reserve_pages(count, priority, false); 3482 } 3483 3484 3485 bool 3486 vm_page_try_reserve_pages(vm_page_reservation* reservation, uint32 count, 3487 int priority) 3488 { 3489 if (count == 0) { 3490 reservation->count = count; 3491 return true; 3492 } 3493 3494 uint32 remaining = reserve_pages(count, priority, true); 3495 if (remaining == 0) { 3496 TA(ReservePages(count)); 3497 reservation->count = count; 3498 return true; 3499 } 3500 3501 unreserve_pages(count - remaining); 3502 3503 return false; 3504 } 3505 3506 3507 vm_page * 3508 vm_page_allocate_page(vm_page_reservation* reservation, uint32 flags) 3509 { 3510 uint32 pageState = flags & VM_PAGE_ALLOC_STATE; 3511 ASSERT(pageState != PAGE_STATE_FREE); 3512 ASSERT(pageState != PAGE_STATE_CLEAR); 3513 3514 ASSERT(reservation->count > 0); 3515 reservation->count--; 3516 3517 VMPageQueue* queue; 3518 VMPageQueue* otherQueue; 3519 3520 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) { 3521 queue = &sClearPageQueue; 3522 otherQueue = &sFreePageQueue; 3523 } else { 3524 queue = &sFreePageQueue; 3525 otherQueue = &sClearPageQueue; 3526 } 3527 3528 ReadLocker locker(sFreePageQueuesLock); 3529 3530 vm_page* page = queue->RemoveHeadUnlocked(); 3531 if (page == NULL) { 3532 // if the primary queue was empty, grab the page from the 3533 // secondary queue 3534 page = otherQueue->RemoveHeadUnlocked(); 3535 3536 if (page == NULL) { 3537 // Unlikely, but possible: the page we have reserved has moved 3538 // between the queues after we checked the first queue. Grab the 3539 // write locker to make sure this doesn't happen again. 3540 locker.Unlock(); 3541 WriteLocker writeLocker(sFreePageQueuesLock); 3542 3543 page = queue->RemoveHead(); 3544 if (page == NULL) 3545 otherQueue->RemoveHead(); 3546 3547 if (page == NULL) { 3548 panic("Had reserved page, but there is none!"); 3549 return NULL; 3550 } 3551 3552 // downgrade to read lock 3553 locker.Lock(); 3554 } 3555 } 3556 3557 if (page->CacheRef() != NULL) 3558 panic("supposed to be free page %p has cache\n", page); 3559 3560 DEBUG_PAGE_ACCESS_START(page); 3561 3562 int oldPageState = page->State(); 3563 page->SetState(pageState); 3564 page->busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3565 page->usage_count = 0; 3566 page->accessed = false; 3567 page->modified = false; 3568 3569 locker.Unlock(); 3570 3571 if (pageState < PAGE_STATE_FIRST_UNQUEUED) 3572 sPageQueues[pageState].AppendUnlocked(page); 3573 3574 // clear the page, if we had to take it from the free queue and a clear 3575 // page was requested 3576 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0 && oldPageState != PAGE_STATE_CLEAR) 3577 clear_page(page); 3578 3579 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3580 page->allocation_tracking_info.Init( 3581 TA(AllocatePage(page->physical_page_number))); 3582 #else 3583 TA(AllocatePage(page->physical_page_number)); 3584 #endif 3585 3586 return page; 3587 } 3588 3589 3590 static void 3591 allocate_page_run_cleanup(VMPageQueue::PageList& freePages, 3592 VMPageQueue::PageList& clearPages) 3593 { 3594 while (vm_page* page = freePages.RemoveHead()) { 3595 page->busy = false; 3596 page->SetState(PAGE_STATE_FREE); 3597 DEBUG_PAGE_ACCESS_END(page); 3598 sFreePageQueue.PrependUnlocked(page); 3599 } 3600 3601 while (vm_page* page = clearPages.RemoveHead()) { 3602 page->busy = false; 3603 page->SetState(PAGE_STATE_CLEAR); 3604 DEBUG_PAGE_ACCESS_END(page); 3605 sClearPageQueue.PrependUnlocked(page); 3606 } 3607 } 3608 3609 3610 /*! Tries to allocate the a contiguous run of \a length pages starting at 3611 index \a start. 3612 3613 The caller must have write-locked the free/clear page queues. The function 3614 will unlock regardless of whether it succeeds or fails. 3615 3616 If the function fails, it cleans up after itself, i.e. it will free all 3617 pages it managed to allocate. 3618 3619 \param start The start index (into \c sPages) of the run. 3620 \param length The number of pages to allocate. 3621 \param flags Page allocation flags. Encodes the state the function shall 3622 set the allocated pages to, whether the pages shall be marked busy 3623 (VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared 3624 (VM_PAGE_ALLOC_CLEAR). 3625 \param freeClearQueueLocker Locked WriteLocker for the free/clear page 3626 queues in locked state. Will be unlocked by the function. 3627 \return The index of the first page that could not be allocated. \a length 3628 is returned when the function was successful. 3629 */ 3630 static page_num_t 3631 allocate_page_run(page_num_t start, page_num_t length, uint32 flags, 3632 WriteLocker& freeClearQueueLocker) 3633 { 3634 uint32 pageState = flags & VM_PAGE_ALLOC_STATE; 3635 ASSERT(pageState != PAGE_STATE_FREE); 3636 ASSERT(pageState != PAGE_STATE_CLEAR); 3637 ASSERT(start + length <= sNumPages); 3638 3639 // Pull the free/clear pages out of their respective queues. Cached pages 3640 // are allocated later. 3641 page_num_t cachedPages = 0; 3642 VMPageQueue::PageList freePages; 3643 VMPageQueue::PageList clearPages; 3644 page_num_t i = 0; 3645 for (; i < length; i++) { 3646 bool pageAllocated = true; 3647 bool noPage = false; 3648 vm_page& page = sPages[start + i]; 3649 switch (page.State()) { 3650 case PAGE_STATE_CLEAR: 3651 DEBUG_PAGE_ACCESS_START(&page); 3652 sClearPageQueue.Remove(&page); 3653 clearPages.Add(&page); 3654 break; 3655 case PAGE_STATE_FREE: 3656 DEBUG_PAGE_ACCESS_START(&page); 3657 sFreePageQueue.Remove(&page); 3658 freePages.Add(&page); 3659 break; 3660 case PAGE_STATE_CACHED: 3661 // We allocate cached pages later. 3662 cachedPages++; 3663 pageAllocated = false; 3664 break; 3665 3666 default: 3667 // Probably a page was cached when our caller checked. Now it's 3668 // gone and we have to abort. 3669 noPage = true; 3670 break; 3671 } 3672 3673 if (noPage) 3674 break; 3675 3676 if (pageAllocated) { 3677 page.SetState(flags & VM_PAGE_ALLOC_STATE); 3678 page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3679 page.usage_count = 0; 3680 page.accessed = false; 3681 page.modified = false; 3682 } 3683 } 3684 3685 if (i < length) { 3686 // failed to allocate a page -- free all that we've got 3687 allocate_page_run_cleanup(freePages, clearPages); 3688 return i; 3689 } 3690 3691 freeClearQueueLocker.Unlock(); 3692 3693 if (cachedPages > 0) { 3694 // allocate the pages that weren't free but cached 3695 page_num_t freedCachedPages = 0; 3696 page_num_t nextIndex = start; 3697 vm_page* freePage = freePages.Head(); 3698 vm_page* clearPage = clearPages.Head(); 3699 while (cachedPages > 0) { 3700 // skip, if we've already got the page 3701 if (freePage != NULL && size_t(freePage - sPages) == nextIndex) { 3702 freePage = freePages.GetNext(freePage); 3703 nextIndex++; 3704 continue; 3705 } 3706 if (clearPage != NULL && size_t(clearPage - sPages) == nextIndex) { 3707 clearPage = clearPages.GetNext(clearPage); 3708 nextIndex++; 3709 continue; 3710 } 3711 3712 // free the page, if it is still cached 3713 vm_page& page = sPages[nextIndex]; 3714 if (!free_cached_page(&page, false)) { 3715 // TODO: if the page turns out to have been freed already, 3716 // there would be no need to fail 3717 break; 3718 } 3719 3720 page.SetState(flags & VM_PAGE_ALLOC_STATE); 3721 page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3722 page.usage_count = 0; 3723 page.accessed = false; 3724 page.modified = false; 3725 3726 freePages.InsertBefore(freePage, &page); 3727 freedCachedPages++; 3728 cachedPages--; 3729 nextIndex++; 3730 } 3731 3732 // If we have freed cached pages, we need to balance things. 3733 if (freedCachedPages > 0) 3734 unreserve_pages(freedCachedPages); 3735 3736 if (nextIndex - start < length) { 3737 // failed to allocate all cached pages -- free all that we've got 3738 freeClearQueueLocker.Lock(); 3739 allocate_page_run_cleanup(freePages, clearPages); 3740 freeClearQueueLocker.Unlock(); 3741 3742 return nextIndex - start; 3743 } 3744 } 3745 3746 // clear pages, if requested 3747 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) { 3748 for (VMPageQueue::PageList::Iterator it = freePages.GetIterator(); 3749 vm_page* page = it.Next();) { 3750 clear_page(page); 3751 } 3752 } 3753 3754 // add pages to target queue 3755 if (pageState < PAGE_STATE_FIRST_UNQUEUED) { 3756 freePages.MoveFrom(&clearPages); 3757 sPageQueues[pageState].AppendUnlocked(freePages, length); 3758 } 3759 3760 // Note: We don't unreserve the pages since we pulled them out of the 3761 // free/clear queues without adjusting sUnreservedFreePages. 3762 3763 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3764 AbstractTraceEntryWithStackTrace* traceEntry 3765 = TA(AllocatePageRun(start, length)); 3766 3767 for (page_num_t i = start; i < start + length; i++) 3768 sPages[i].allocation_tracking_info.Init(traceEntry); 3769 #else 3770 TA(AllocatePageRun(start, length)); 3771 #endif 3772 3773 return length; 3774 } 3775 3776 3777 /*! Allocate a physically contiguous range of pages. 3778 3779 \param flags Page allocation flags. Encodes the state the function shall 3780 set the allocated pages to, whether the pages shall be marked busy 3781 (VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared 3782 (VM_PAGE_ALLOC_CLEAR). 3783 \param length The number of contiguous pages to allocate. 3784 \param restrictions Restrictions to the physical addresses of the page run 3785 to allocate, including \c low_address, the first acceptable physical 3786 address where the page run may start, \c high_address, the last 3787 acceptable physical address where the page run may end (i.e. it must 3788 hold \code runStartAddress + length <= high_address \endcode), 3789 \c alignment, the alignment of the page run start address, and 3790 \c boundary, multiples of which the page run must not cross. 3791 Values set to \c 0 are ignored. 3792 \param priority The page reservation priority (as passed to 3793 vm_page_reserve_pages()). 3794 \return The first page of the allocated page run on success; \c NULL 3795 when the allocation failed. 3796 */ 3797 vm_page* 3798 vm_page_allocate_page_run(uint32 flags, page_num_t length, 3799 const physical_address_restrictions* restrictions, int priority) 3800 { 3801 // compute start and end page index 3802 page_num_t requestedStart 3803 = std::max(restrictions->low_address / B_PAGE_SIZE, sPhysicalPageOffset) 3804 - sPhysicalPageOffset; 3805 page_num_t start = requestedStart; 3806 page_num_t end; 3807 if (restrictions->high_address > 0) { 3808 end = std::max(restrictions->high_address / B_PAGE_SIZE, 3809 sPhysicalPageOffset) 3810 - sPhysicalPageOffset; 3811 end = std::min(end, sNumPages); 3812 } else 3813 end = sNumPages; 3814 3815 // compute alignment mask 3816 page_num_t alignmentMask 3817 = std::max(restrictions->alignment / B_PAGE_SIZE, (phys_addr_t)1) - 1; 3818 ASSERT(((alignmentMask + 1) & alignmentMask) == 0); 3819 // alignment must be a power of 2 3820 3821 // compute the boundary mask 3822 uint32 boundaryMask = 0; 3823 if (restrictions->boundary != 0) { 3824 page_num_t boundary = restrictions->boundary / B_PAGE_SIZE; 3825 // boundary must be a power of two and not less than alignment and 3826 // length 3827 ASSERT(((boundary - 1) & boundary) == 0); 3828 ASSERT(boundary >= alignmentMask + 1); 3829 ASSERT(boundary >= length); 3830 3831 boundaryMask = -boundary; 3832 } 3833 3834 vm_page_reservation reservation; 3835 vm_page_reserve_pages(&reservation, length, priority); 3836 3837 WriteLocker freeClearQueueLocker(sFreePageQueuesLock); 3838 3839 // First we try to get a run with free pages only. If that fails, we also 3840 // consider cached pages. If there are only few free pages and many cached 3841 // ones, the odds are that we won't find enough contiguous ones, so we skip 3842 // the first iteration in this case. 3843 int32 freePages = sUnreservedFreePages; 3844 int useCached = freePages > 0 && (page_num_t)freePages > 2 * length ? 0 : 1; 3845 3846 for (;;) { 3847 if (alignmentMask != 0 || boundaryMask != 0) { 3848 page_num_t offsetStart = start + sPhysicalPageOffset; 3849 3850 // enforce alignment 3851 if ((offsetStart & alignmentMask) != 0) 3852 offsetStart = (offsetStart + alignmentMask) & ~alignmentMask; 3853 3854 // enforce boundary 3855 if (boundaryMask != 0 && ((offsetStart ^ (offsetStart 3856 + length - 1)) & boundaryMask) != 0) { 3857 offsetStart = (offsetStart + length - 1) & boundaryMask; 3858 } 3859 3860 start = offsetStart - sPhysicalPageOffset; 3861 } 3862 3863 if (start + length > end) { 3864 if (useCached == 0) { 3865 // The first iteration with free pages only was unsuccessful. 3866 // Try again also considering cached pages. 3867 useCached = 1; 3868 start = requestedStart; 3869 continue; 3870 } 3871 3872 dprintf("vm_page_allocate_page_run(): Failed to allocate run of " 3873 "length %" B_PRIuPHYSADDR " (%" B_PRIuPHYSADDR " %" 3874 B_PRIuPHYSADDR ") in second iteration (align: %" B_PRIuPHYSADDR 3875 " boundary: %" B_PRIuPHYSADDR ")!\n", length, requestedStart, 3876 end, restrictions->alignment, restrictions->boundary); 3877 3878 freeClearQueueLocker.Unlock(); 3879 vm_page_unreserve_pages(&reservation); 3880 return NULL; 3881 } 3882 3883 bool foundRun = true; 3884 page_num_t i; 3885 for (i = 0; i < length; i++) { 3886 uint32 pageState = sPages[start + i].State(); 3887 if (pageState != PAGE_STATE_FREE 3888 && pageState != PAGE_STATE_CLEAR 3889 && (pageState != PAGE_STATE_CACHED || useCached == 0)) { 3890 foundRun = false; 3891 break; 3892 } 3893 } 3894 3895 if (foundRun) { 3896 i = allocate_page_run(start, length, flags, freeClearQueueLocker); 3897 if (i == length) 3898 return &sPages[start]; 3899 3900 // apparently a cached page couldn't be allocated -- skip it and 3901 // continue 3902 freeClearQueueLocker.Lock(); 3903 } 3904 3905 start += i + 1; 3906 } 3907 } 3908 3909 3910 vm_page * 3911 vm_page_at_index(int32 index) 3912 { 3913 return &sPages[index]; 3914 } 3915 3916 3917 vm_page * 3918 vm_lookup_page(page_num_t pageNumber) 3919 { 3920 if (pageNumber < sPhysicalPageOffset) 3921 return NULL; 3922 3923 pageNumber -= sPhysicalPageOffset; 3924 if (pageNumber >= sNumPages) 3925 return NULL; 3926 3927 return &sPages[pageNumber]; 3928 } 3929 3930 3931 bool 3932 vm_page_is_dummy(struct vm_page *page) 3933 { 3934 return page < sPages || page >= sPages + sNumPages; 3935 } 3936 3937 3938 /*! Free the page that belonged to a certain cache. 3939 You can use vm_page_set_state() manually if you prefer, but only 3940 if the page does not equal PAGE_STATE_MODIFIED. 3941 */ 3942 void 3943 vm_page_free(VMCache *cache, vm_page *page) 3944 { 3945 PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE 3946 && page->State() != PAGE_STATE_CLEAR); 3947 3948 if (page->State() == PAGE_STATE_MODIFIED && cache->temporary) 3949 atomic_add(&sModifiedTemporaryPages, -1); 3950 3951 free_page(page, false); 3952 } 3953 3954 3955 void 3956 vm_page_set_state(vm_page *page, int pageState) 3957 { 3958 PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE 3959 && page->State() != PAGE_STATE_CLEAR); 3960 3961 if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) 3962 free_page(page, pageState == PAGE_STATE_CLEAR); 3963 else 3964 set_page_state(page, pageState); 3965 } 3966 3967 3968 /*! Moves a page to either the tail of the head of its current queue, 3969 depending on \a tail. 3970 The page must have a cache and the cache must be locked! 3971 */ 3972 void 3973 vm_page_requeue(struct vm_page *page, bool tail) 3974 { 3975 PAGE_ASSERT(page, page->Cache() != NULL); 3976 page->Cache()->AssertLocked(); 3977 // DEBUG_PAGE_ACCESS_CHECK(page); 3978 // TODO: This assertion cannot be satisfied by idle_scan_active_pages() 3979 // when it requeues busy pages. The reason is that vm_soft_fault() 3980 // (respectively fault_get_page()) and the file cache keep newly 3981 // allocated pages accessed while they are reading them from disk. It 3982 // would probably be better to change that code and reenable this 3983 // check. 3984 3985 VMPageQueue *queue = NULL; 3986 3987 switch (page->State()) { 3988 case PAGE_STATE_ACTIVE: 3989 queue = &sActivePageQueue; 3990 break; 3991 case PAGE_STATE_INACTIVE: 3992 queue = &sInactivePageQueue; 3993 break; 3994 case PAGE_STATE_MODIFIED: 3995 queue = &sModifiedPageQueue; 3996 break; 3997 case PAGE_STATE_CACHED: 3998 queue = &sCachedPageQueue; 3999 break; 4000 case PAGE_STATE_FREE: 4001 case PAGE_STATE_CLEAR: 4002 panic("vm_page_requeue() called for free/clear page %p", page); 4003 return; 4004 case PAGE_STATE_WIRED: 4005 case PAGE_STATE_UNUSED: 4006 return; 4007 default: 4008 panic("vm_page_touch: vm_page %p in invalid state %d\n", 4009 page, page->State()); 4010 break; 4011 } 4012 4013 queue->RequeueUnlocked(page, tail); 4014 } 4015 4016 4017 page_num_t 4018 vm_page_num_pages(void) 4019 { 4020 return sNumPages - sNonExistingPages; 4021 } 4022 4023 4024 /*! There is a subtle distinction between the page counts returned by 4025 this function and vm_page_num_free_pages(): 4026 The latter returns the number of pages that are completely uncommitted, 4027 whereas this one returns the number of pages that are available for 4028 use by being reclaimed as well (IOW it factors in things like cache pages 4029 as available). 4030 */ 4031 page_num_t 4032 vm_page_num_available_pages(void) 4033 { 4034 return vm_available_memory() / B_PAGE_SIZE; 4035 } 4036 4037 4038 page_num_t 4039 vm_page_num_free_pages(void) 4040 { 4041 int32 count = sUnreservedFreePages + sCachedPageQueue.Count(); 4042 return count > 0 ? count : 0; 4043 } 4044 4045 4046 page_num_t 4047 vm_page_num_unused_pages(void) 4048 { 4049 int32 count = sUnreservedFreePages; 4050 return count > 0 ? count : 0; 4051 } 4052 4053 4054 void 4055 vm_page_get_stats(system_info *info) 4056 { 4057 // Note: there's no locking protecting any of the queues or counters here, 4058 // so we run the risk of getting bogus values when evaluating them 4059 // throughout this function. As these stats are for informational purposes 4060 // only, it is not really worth introducing such locking. Therefore we just 4061 // ensure that we don't under- or overflow any of the values. 4062 4063 // The pages used for the block cache buffers. Those should not be counted 4064 // as used but as cached pages. 4065 // TODO: We should subtract the blocks that are in use ATM, since those 4066 // can't really be freed in a low memory situation. 4067 page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE; 4068 4069 // Non-temporary modified pages are special as they represent pages that 4070 // can be written back, so they could be freed if necessary, for us 4071 // basically making them into cached pages with a higher overhead. The 4072 // modified queue count is therefore split into temporary and non-temporary 4073 // counts that are then added to the corresponding number. 4074 page_num_t modifiedNonTemporaryPages 4075 = (sModifiedPageQueue.Count() - sModifiedTemporaryPages); 4076 4077 info->max_pages = vm_page_num_pages(); 4078 info->cached_pages = sCachedPageQueue.Count() + modifiedNonTemporaryPages 4079 + blockCachePages; 4080 4081 // max_pages is composed of: 4082 // active + inactive + unused + wired + modified + cached + free + clear 4083 // So taking out the cached (including modified non-temporary), free and 4084 // clear ones leaves us with all used pages. 4085 int32 subtractPages = info->cached_pages + sFreePageQueue.Count() 4086 + sClearPageQueue.Count(); 4087 info->used_pages = subtractPages > info->max_pages 4088 ? 0 : info->max_pages - subtractPages; 4089 4090 if (info->used_pages + info->cached_pages > info->max_pages) { 4091 // Something was shuffled around while we were summing up the counts. 4092 // Make the values sane, preferring the worse case of more used pages. 4093 info->cached_pages = info->max_pages - info->used_pages; 4094 } 4095 4096 info->page_faults = vm_num_page_faults(); 4097 info->ignored_pages = sIgnoredPages; 4098 4099 // TODO: We don't consider pages used for page directories/tables yet. 4100 } 4101 4102 4103 /*! Returns the greatest address within the last page of accessible physical 4104 memory. 4105 The value is inclusive, i.e. in case of a 32 bit phys_addr_t 0xffffffff 4106 means the that the last page ends at exactly 4 GB. 4107 */ 4108 phys_addr_t 4109 vm_page_max_address() 4110 { 4111 return ((phys_addr_t)sPhysicalPageOffset + sNumPages) * B_PAGE_SIZE - 1; 4112 } 4113 4114 4115 RANGE_MARKER_FUNCTION_END(vm_page) 4116