1 /* 2 * Copyright 2010-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <string.h> 12 #include <stdlib.h> 13 14 #include <algorithm> 15 16 #include <KernelExport.h> 17 #include <OS.h> 18 19 #include <AutoDeleter.h> 20 21 #include <arch/cpu.h> 22 #include <arch/vm_translation_map.h> 23 #include <block_cache.h> 24 #include <boot/kernel_args.h> 25 #include <condition_variable.h> 26 #include <elf.h> 27 #include <heap.h> 28 #include <kernel.h> 29 #include <low_resource_manager.h> 30 #include <thread.h> 31 #include <tracing.h> 32 #include <util/AutoLock.h> 33 #include <vfs.h> 34 #include <vm/vm.h> 35 #include <vm/vm_priv.h> 36 #include <vm/vm_page.h> 37 #include <vm/VMAddressSpace.h> 38 #include <vm/VMArea.h> 39 #include <vm/VMCache.h> 40 41 #include "IORequest.h" 42 #include "PageCacheLocker.h" 43 #include "VMAnonymousCache.h" 44 #include "VMPageQueue.h" 45 46 47 //#define TRACE_VM_PAGE 48 #ifdef TRACE_VM_PAGE 49 # define TRACE(x) dprintf x 50 #else 51 # define TRACE(x) ; 52 #endif 53 54 //#define TRACE_VM_DAEMONS 55 #ifdef TRACE_VM_DAEMONS 56 #define TRACE_DAEMON(x...) dprintf(x) 57 #else 58 #define TRACE_DAEMON(x...) do {} while (false) 59 #endif 60 61 //#define TRACK_PAGE_USAGE_STATS 1 62 63 #define PAGE_ASSERT(page, condition) \ 64 ASSERT_PRINT((condition), "page: %p", (page)) 65 66 #define SCRUB_SIZE 16 67 // this many pages will be cleared at once in the page scrubber thread 68 69 #define MAX_PAGE_WRITER_IO_PRIORITY B_URGENT_DISPLAY_PRIORITY 70 // maximum I/O priority of the page writer 71 #define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD 10000 72 // the maximum I/O priority shall be reached when this many pages need to 73 // be written 74 75 76 // The page reserve an allocation of the certain priority must not touch. 77 static const size_t kPageReserveForPriority[] = { 78 VM_PAGE_RESERVE_USER, // user 79 VM_PAGE_RESERVE_SYSTEM, // system 80 0 // VIP 81 }; 82 83 // Minimum number of free pages the page daemon will try to achieve. 84 static uint32 sFreePagesTarget; 85 static uint32 sFreeOrCachedPagesTarget; 86 static uint32 sInactivePagesTarget; 87 88 // Wait interval between page daemon runs. 89 static const bigtime_t kIdleScanWaitInterval = 1000000LL; // 1 sec 90 static const bigtime_t kBusyScanWaitInterval = 500000LL; // 0.5 sec 91 92 // Number of idle runs after which we want to have processed the full active 93 // queue. 94 static const uint32 kIdleRunsForFullQueue = 20; 95 96 // Maximum limit for the vm_page::usage_count. 97 static const int32 kPageUsageMax = 64; 98 // vm_page::usage_count buff an accessed page receives in a scan. 99 static const int32 kPageUsageAdvance = 3; 100 // vm_page::usage_count debuff an unaccessed page receives in a scan. 101 static const int32 kPageUsageDecline = 1; 102 103 int32 gMappedPagesCount; 104 105 static VMPageQueue sPageQueues[PAGE_STATE_COUNT]; 106 107 static VMPageQueue& sFreePageQueue = sPageQueues[PAGE_STATE_FREE]; 108 static VMPageQueue& sClearPageQueue = sPageQueues[PAGE_STATE_CLEAR]; 109 static VMPageQueue& sModifiedPageQueue = sPageQueues[PAGE_STATE_MODIFIED]; 110 static VMPageQueue& sInactivePageQueue = sPageQueues[PAGE_STATE_INACTIVE]; 111 static VMPageQueue& sActivePageQueue = sPageQueues[PAGE_STATE_ACTIVE]; 112 static VMPageQueue& sCachedPageQueue = sPageQueues[PAGE_STATE_CACHED]; 113 114 static vm_page *sPages; 115 static page_num_t sPhysicalPageOffset; 116 static page_num_t sNumPages; 117 static page_num_t sNonExistingPages; 118 // pages in the sPages array that aren't backed by physical memory 119 static uint64 sIgnoredPages; 120 // pages of physical memory ignored by the boot loader (and thus not 121 // available here) 122 static int32 sUnreservedFreePages; 123 static int32 sUnsatisfiedPageReservations; 124 static int32 sModifiedTemporaryPages; 125 126 static ConditionVariable sFreePageCondition; 127 static mutex sPageDeficitLock = MUTEX_INITIALIZER("page deficit"); 128 129 // This lock must be used whenever the free or clear page queues are changed. 130 // If you need to work on both queues at the same time, you need to hold a write 131 // lock, otherwise, a read lock suffices (each queue still has a spinlock to 132 // guard against concurrent changes). 133 static rw_lock sFreePageQueuesLock 134 = RW_LOCK_INITIALIZER("free/clear page queues"); 135 136 #ifdef TRACK_PAGE_USAGE_STATS 137 static page_num_t sPageUsageArrays[512]; 138 static page_num_t* sPageUsage = sPageUsageArrays; 139 static page_num_t sPageUsagePageCount; 140 static page_num_t* sNextPageUsage = sPageUsageArrays + 256; 141 static page_num_t sNextPageUsagePageCount; 142 #endif 143 144 145 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 146 147 struct caller_info { 148 addr_t caller; 149 size_t count; 150 }; 151 152 static const int32 kCallerInfoTableSize = 1024; 153 static caller_info sCallerInfoTable[kCallerInfoTableSize]; 154 static int32 sCallerInfoCount = 0; 155 156 static caller_info* get_caller_info(addr_t caller); 157 158 159 RANGE_MARKER_FUNCTION_PROTOTYPES(vm_page) 160 161 static const addr_t kVMPageCodeAddressRange[] = { 162 RANGE_MARKER_FUNCTION_ADDRESS_RANGE(vm_page) 163 }; 164 165 #endif 166 167 168 RANGE_MARKER_FUNCTION_BEGIN(vm_page) 169 170 171 struct page_stats { 172 int32 totalFreePages; 173 int32 unsatisfiedReservations; 174 int32 cachedPages; 175 }; 176 177 178 struct PageReservationWaiter 179 : public DoublyLinkedListLinkImpl<PageReservationWaiter> { 180 Thread* thread; 181 uint32 dontTouch; // reserve not to touch 182 uint32 missing; // pages missing for the reservation 183 int32 threadPriority; 184 185 bool operator<(const PageReservationWaiter& other) const 186 { 187 // Implies an order by descending VM priority (ascending dontTouch) 188 // and (secondarily) descending thread priority. 189 if (dontTouch != other.dontTouch) 190 return dontTouch < other.dontTouch; 191 return threadPriority > other.threadPriority; 192 } 193 }; 194 195 typedef DoublyLinkedList<PageReservationWaiter> PageReservationWaiterList; 196 static PageReservationWaiterList sPageReservationWaiters; 197 198 199 struct DaemonCondition { 200 void Init(const char* name) 201 { 202 mutex_init(&fLock, "daemon condition"); 203 fCondition.Init(this, name); 204 fActivated = false; 205 } 206 207 bool Lock() 208 { 209 return mutex_lock(&fLock) == B_OK; 210 } 211 212 void Unlock() 213 { 214 mutex_unlock(&fLock); 215 } 216 217 bool Wait(bigtime_t timeout, bool clearActivated) 218 { 219 MutexLocker locker(fLock); 220 if (clearActivated) 221 fActivated = false; 222 else if (fActivated) 223 return true; 224 225 ConditionVariableEntry entry; 226 fCondition.Add(&entry); 227 228 locker.Unlock(); 229 230 return entry.Wait(B_RELATIVE_TIMEOUT, timeout) == B_OK; 231 } 232 233 void WakeUp() 234 { 235 if (fActivated) 236 return; 237 238 MutexLocker locker(fLock); 239 fActivated = true; 240 fCondition.NotifyOne(); 241 } 242 243 void ClearActivated() 244 { 245 MutexLocker locker(fLock); 246 fActivated = false; 247 } 248 249 private: 250 mutex fLock; 251 ConditionVariable fCondition; 252 bool fActivated; 253 }; 254 255 256 static DaemonCondition sPageWriterCondition; 257 static DaemonCondition sPageDaemonCondition; 258 259 260 #if PAGE_ALLOCATION_TRACING 261 262 namespace PageAllocationTracing { 263 264 class ReservePages : public AbstractTraceEntry { 265 public: 266 ReservePages(uint32 count) 267 : 268 fCount(count) 269 { 270 Initialized(); 271 } 272 273 virtual void AddDump(TraceOutput& out) 274 { 275 out.Print("page reserve: %" B_PRIu32, fCount); 276 } 277 278 private: 279 uint32 fCount; 280 }; 281 282 283 class UnreservePages : public AbstractTraceEntry { 284 public: 285 UnreservePages(uint32 count) 286 : 287 fCount(count) 288 { 289 Initialized(); 290 } 291 292 virtual void AddDump(TraceOutput& out) 293 { 294 out.Print("page unreserve: %" B_PRId32, fCount); 295 } 296 297 private: 298 uint32 fCount; 299 }; 300 301 302 class AllocatePage 303 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) { 304 public: 305 AllocatePage(page_num_t pageNumber) 306 : 307 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true), 308 fPageNumber(pageNumber) 309 { 310 Initialized(); 311 } 312 313 virtual void AddDump(TraceOutput& out) 314 { 315 out.Print("page alloc: %#" B_PRIxPHYSADDR, fPageNumber); 316 } 317 318 private: 319 page_num_t fPageNumber; 320 }; 321 322 323 class AllocatePageRun 324 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) { 325 public: 326 AllocatePageRun(page_num_t startPage, uint32 length) 327 : 328 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true), 329 fStartPage(startPage), 330 fLength(length) 331 { 332 Initialized(); 333 } 334 335 virtual void AddDump(TraceOutput& out) 336 { 337 out.Print("page alloc run: start %#" B_PRIxPHYSADDR " length: %" 338 B_PRIu32, fStartPage, fLength); 339 } 340 341 private: 342 page_num_t fStartPage; 343 uint32 fLength; 344 }; 345 346 347 class FreePage 348 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) { 349 public: 350 FreePage(page_num_t pageNumber) 351 : 352 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true), 353 fPageNumber(pageNumber) 354 { 355 Initialized(); 356 } 357 358 virtual void AddDump(TraceOutput& out) 359 { 360 out.Print("page free: %#" B_PRIxPHYSADDR, fPageNumber); 361 } 362 363 private: 364 page_num_t fPageNumber; 365 }; 366 367 368 class ScrubbingPages : public AbstractTraceEntry { 369 public: 370 ScrubbingPages(uint32 count) 371 : 372 fCount(count) 373 { 374 Initialized(); 375 } 376 377 virtual void AddDump(TraceOutput& out) 378 { 379 out.Print("page scrubbing: %" B_PRId32, fCount); 380 } 381 382 private: 383 uint32 fCount; 384 }; 385 386 387 class ScrubbedPages : public AbstractTraceEntry { 388 public: 389 ScrubbedPages(uint32 count) 390 : 391 fCount(count) 392 { 393 Initialized(); 394 } 395 396 virtual void AddDump(TraceOutput& out) 397 { 398 out.Print("page scrubbed: %" B_PRId32, fCount); 399 } 400 401 private: 402 uint32 fCount; 403 }; 404 405 406 class StolenPage : public AbstractTraceEntry { 407 public: 408 StolenPage() 409 { 410 Initialized(); 411 } 412 413 virtual void AddDump(TraceOutput& out) 414 { 415 out.Print("page stolen"); 416 } 417 }; 418 419 } // namespace PageAllocationTracing 420 421 # define TA(x) new(std::nothrow) PageAllocationTracing::x 422 423 #else 424 # define TA(x) 425 #endif // PAGE_ALLOCATION_TRACING 426 427 428 #if PAGE_DAEMON_TRACING 429 430 namespace PageDaemonTracing { 431 432 class ActivatePage : public AbstractTraceEntry { 433 public: 434 ActivatePage(vm_page* page) 435 : 436 fCache(page->cache), 437 fPage(page) 438 { 439 Initialized(); 440 } 441 442 virtual void AddDump(TraceOutput& out) 443 { 444 out.Print("page activated: %p, cache: %p", fPage, fCache); 445 } 446 447 private: 448 VMCache* fCache; 449 vm_page* fPage; 450 }; 451 452 453 class DeactivatePage : public AbstractTraceEntry { 454 public: 455 DeactivatePage(vm_page* page) 456 : 457 fCache(page->cache), 458 fPage(page) 459 { 460 Initialized(); 461 } 462 463 virtual void AddDump(TraceOutput& out) 464 { 465 out.Print("page deactivated: %p, cache: %p", fPage, fCache); 466 } 467 468 private: 469 VMCache* fCache; 470 vm_page* fPage; 471 }; 472 473 474 class FreedPageSwap : public AbstractTraceEntry { 475 public: 476 FreedPageSwap(vm_page* page) 477 : 478 fCache(page->cache), 479 fPage(page) 480 { 481 Initialized(); 482 } 483 484 virtual void AddDump(TraceOutput& out) 485 { 486 out.Print("page swap freed: %p, cache: %p", fPage, fCache); 487 } 488 489 private: 490 VMCache* fCache; 491 vm_page* fPage; 492 }; 493 494 } // namespace PageDaemonTracing 495 496 # define TD(x) new(std::nothrow) PageDaemonTracing::x 497 498 #else 499 # define TD(x) 500 #endif // PAGE_DAEMON_TRACING 501 502 503 #if PAGE_WRITER_TRACING 504 505 namespace PageWriterTracing { 506 507 class WritePage : public AbstractTraceEntry { 508 public: 509 WritePage(vm_page* page) 510 : 511 fCache(page->Cache()), 512 fPage(page) 513 { 514 Initialized(); 515 } 516 517 virtual void AddDump(TraceOutput& out) 518 { 519 out.Print("page write: %p, cache: %p", fPage, fCache); 520 } 521 522 private: 523 VMCache* fCache; 524 vm_page* fPage; 525 }; 526 527 } // namespace PageWriterTracing 528 529 # define TPW(x) new(std::nothrow) PageWriterTracing::x 530 531 #else 532 # define TPW(x) 533 #endif // PAGE_WRITER_TRACING 534 535 536 #if PAGE_STATE_TRACING 537 538 namespace PageStateTracing { 539 540 class SetPageState : public AbstractTraceEntry { 541 public: 542 SetPageState(vm_page* page, uint8 newState) 543 : 544 fPage(page), 545 fOldState(page->State()), 546 fNewState(newState), 547 fBusy(page->busy), 548 fWired(page->WiredCount() > 0), 549 fMapped(!page->mappings.IsEmpty()), 550 fAccessed(page->accessed), 551 fModified(page->modified) 552 { 553 #if PAGE_STATE_TRACING_STACK_TRACE 554 fStackTrace = capture_tracing_stack_trace( 555 PAGE_STATE_TRACING_STACK_TRACE, 0, true); 556 // Don't capture userland stack trace to avoid potential 557 // deadlocks. 558 #endif 559 Initialized(); 560 } 561 562 #if PAGE_STATE_TRACING_STACK_TRACE 563 virtual void DumpStackTrace(TraceOutput& out) 564 { 565 out.PrintStackTrace(fStackTrace); 566 } 567 #endif 568 569 virtual void AddDump(TraceOutput& out) 570 { 571 out.Print("page set state: %p (%c%c%c%c%c): %s -> %s", fPage, 572 fBusy ? 'b' : '-', 573 fWired ? 'w' : '-', 574 fMapped ? 'm' : '-', 575 fAccessed ? 'a' : '-', 576 fModified ? 'm' : '-', 577 page_state_to_string(fOldState), 578 page_state_to_string(fNewState)); 579 } 580 581 private: 582 vm_page* fPage; 583 #if PAGE_STATE_TRACING_STACK_TRACE 584 tracing_stack_trace* fStackTrace; 585 #endif 586 uint8 fOldState; 587 uint8 fNewState; 588 bool fBusy : 1; 589 bool fWired : 1; 590 bool fMapped : 1; 591 bool fAccessed : 1; 592 bool fModified : 1; 593 }; 594 595 } // namespace PageStateTracing 596 597 # define TPS(x) new(std::nothrow) PageStateTracing::x 598 599 #else 600 # define TPS(x) 601 #endif // PAGE_STATE_TRACING 602 603 604 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 605 606 namespace BKernel { 607 608 class AllocationTrackingCallback { 609 public: 610 virtual ~AllocationTrackingCallback(); 611 612 virtual bool ProcessTrackingInfo( 613 AllocationTrackingInfo* info, 614 page_num_t pageNumber) = 0; 615 }; 616 617 } 618 619 using BKernel::AllocationTrackingCallback; 620 621 622 class AllocationCollectorCallback : public AllocationTrackingCallback { 623 public: 624 AllocationCollectorCallback(bool resetInfos) 625 : 626 fResetInfos(resetInfos) 627 { 628 } 629 630 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info, 631 page_num_t pageNumber) 632 { 633 if (!info->IsInitialized()) 634 return true; 635 636 addr_t caller = 0; 637 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry(); 638 639 if (traceEntry != NULL && info->IsTraceEntryValid()) { 640 caller = tracing_find_caller_in_stack_trace( 641 traceEntry->StackTrace(), kVMPageCodeAddressRange, 1); 642 } 643 644 caller_info* callerInfo = get_caller_info(caller); 645 if (callerInfo == NULL) { 646 kprintf("out of space for caller infos\n"); 647 return false; 648 } 649 650 callerInfo->count++; 651 652 if (fResetInfos) 653 info->Clear(); 654 655 return true; 656 } 657 658 private: 659 bool fResetInfos; 660 }; 661 662 663 class AllocationInfoPrinterCallback : public AllocationTrackingCallback { 664 public: 665 AllocationInfoPrinterCallback(bool printStackTrace, page_num_t pageFilter, 666 team_id teamFilter, thread_id threadFilter) 667 : 668 fPrintStackTrace(printStackTrace), 669 fPageFilter(pageFilter), 670 fTeamFilter(teamFilter), 671 fThreadFilter(threadFilter) 672 { 673 } 674 675 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info, 676 page_num_t pageNumber) 677 { 678 if (!info->IsInitialized()) 679 return true; 680 681 if (fPageFilter != 0 && pageNumber != fPageFilter) 682 return true; 683 684 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry(); 685 if (traceEntry != NULL && !info->IsTraceEntryValid()) 686 traceEntry = NULL; 687 688 if (traceEntry != NULL) { 689 if (fTeamFilter != -1 && traceEntry->TeamID() != fTeamFilter) 690 return true; 691 if (fThreadFilter != -1 && traceEntry->ThreadID() != fThreadFilter) 692 return true; 693 } else { 694 // we need the info if we have filters set 695 if (fTeamFilter != -1 || fThreadFilter != -1) 696 return true; 697 } 698 699 kprintf("page number %#" B_PRIxPHYSADDR, pageNumber); 700 701 if (traceEntry != NULL) { 702 kprintf(", team: %" B_PRId32 ", thread %" B_PRId32 703 ", time %" B_PRId64 "\n", traceEntry->TeamID(), 704 traceEntry->ThreadID(), traceEntry->Time()); 705 706 if (fPrintStackTrace) 707 tracing_print_stack_trace(traceEntry->StackTrace()); 708 } else 709 kprintf("\n"); 710 711 return true; 712 } 713 714 private: 715 bool fPrintStackTrace; 716 page_num_t fPageFilter; 717 team_id fTeamFilter; 718 thread_id fThreadFilter; 719 }; 720 721 722 class AllocationDetailPrinterCallback : public AllocationTrackingCallback { 723 public: 724 AllocationDetailPrinterCallback(addr_t caller) 725 : 726 fCaller(caller) 727 { 728 } 729 730 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info, 731 page_num_t pageNumber) 732 { 733 if (!info->IsInitialized()) 734 return true; 735 736 addr_t caller = 0; 737 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry(); 738 if (traceEntry != NULL && !info->IsTraceEntryValid()) 739 traceEntry = NULL; 740 741 if (traceEntry != NULL) { 742 caller = tracing_find_caller_in_stack_trace( 743 traceEntry->StackTrace(), kVMPageCodeAddressRange, 1); 744 } 745 746 if (caller != fCaller) 747 return true; 748 749 kprintf("page %#" B_PRIxPHYSADDR "\n", pageNumber); 750 if (traceEntry != NULL) 751 tracing_print_stack_trace(traceEntry->StackTrace()); 752 753 return true; 754 } 755 756 private: 757 addr_t fCaller; 758 }; 759 760 #endif // VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 761 762 763 static int 764 find_page(int argc, char **argv) 765 { 766 struct vm_page *page; 767 addr_t address; 768 int32 index = 1; 769 int i; 770 771 struct { 772 const char* name; 773 VMPageQueue* queue; 774 } pageQueueInfos[] = { 775 { "free", &sFreePageQueue }, 776 { "clear", &sClearPageQueue }, 777 { "modified", &sModifiedPageQueue }, 778 { "active", &sActivePageQueue }, 779 { "inactive", &sInactivePageQueue }, 780 { "cached", &sCachedPageQueue }, 781 { NULL, NULL } 782 }; 783 784 if (argc < 2 785 || strlen(argv[index]) <= 2 786 || argv[index][0] != '0' 787 || argv[index][1] != 'x') { 788 kprintf("usage: find_page <address>\n"); 789 return 0; 790 } 791 792 address = strtoul(argv[index], NULL, 0); 793 page = (vm_page*)address; 794 795 for (i = 0; pageQueueInfos[i].name; i++) { 796 VMPageQueue::Iterator it = pageQueueInfos[i].queue->GetIterator(); 797 while (vm_page* p = it.Next()) { 798 if (p == page) { 799 kprintf("found page %p in queue %p (%s)\n", page, 800 pageQueueInfos[i].queue, pageQueueInfos[i].name); 801 return 0; 802 } 803 } 804 } 805 806 kprintf("page %p isn't in any queue\n", page); 807 808 return 0; 809 } 810 811 812 const char * 813 page_state_to_string(int state) 814 { 815 switch(state) { 816 case PAGE_STATE_ACTIVE: 817 return "active"; 818 case PAGE_STATE_INACTIVE: 819 return "inactive"; 820 case PAGE_STATE_MODIFIED: 821 return "modified"; 822 case PAGE_STATE_CACHED: 823 return "cached"; 824 case PAGE_STATE_FREE: 825 return "free"; 826 case PAGE_STATE_CLEAR: 827 return "clear"; 828 case PAGE_STATE_WIRED: 829 return "wired"; 830 case PAGE_STATE_UNUSED: 831 return "unused"; 832 default: 833 return "unknown"; 834 } 835 } 836 837 838 static int 839 dump_page(int argc, char **argv) 840 { 841 bool addressIsPointer = true; 842 bool physical = false; 843 bool searchMappings = false; 844 int32 index = 1; 845 846 while (index < argc) { 847 if (argv[index][0] != '-') 848 break; 849 850 if (!strcmp(argv[index], "-p")) { 851 addressIsPointer = false; 852 physical = true; 853 } else if (!strcmp(argv[index], "-v")) { 854 addressIsPointer = false; 855 } else if (!strcmp(argv[index], "-m")) { 856 searchMappings = true; 857 } else { 858 print_debugger_command_usage(argv[0]); 859 return 0; 860 } 861 862 index++; 863 } 864 865 if (index + 1 != argc) { 866 print_debugger_command_usage(argv[0]); 867 return 0; 868 } 869 870 uint64 value; 871 if (!evaluate_debug_expression(argv[index], &value, false)) 872 return 0; 873 874 uint64 pageAddress = value; 875 struct vm_page* page; 876 877 if (addressIsPointer) { 878 page = (struct vm_page *)(addr_t)pageAddress; 879 } else { 880 if (!physical) { 881 VMAddressSpace *addressSpace = VMAddressSpace::Kernel(); 882 883 if (debug_get_debugged_thread()->team->address_space != NULL) 884 addressSpace = debug_get_debugged_thread()->team->address_space; 885 886 uint32 flags = 0; 887 phys_addr_t physicalAddress; 888 if (addressSpace->TranslationMap()->QueryInterrupt(pageAddress, 889 &physicalAddress, &flags) != B_OK 890 || (flags & PAGE_PRESENT) == 0) { 891 kprintf("Virtual address not mapped to a physical page in this " 892 "address space.\n"); 893 return 0; 894 } 895 pageAddress = physicalAddress; 896 } 897 898 page = vm_lookup_page(pageAddress / B_PAGE_SIZE); 899 } 900 901 kprintf("PAGE: %p\n", page); 902 kprintf("queue_next,prev: %p, %p\n", page->queue_link.next, 903 page->queue_link.previous); 904 kprintf("physical_number: %#" B_PRIxPHYSADDR "\n", 905 page->physical_page_number); 906 kprintf("cache: %p\n", page->Cache()); 907 kprintf("cache_offset: %" B_PRIuPHYSADDR "\n", page->cache_offset); 908 kprintf("cache_next: %p\n", page->cache_next); 909 kprintf("state: %s\n", page_state_to_string(page->State())); 910 kprintf("wired_count: %d\n", page->WiredCount()); 911 kprintf("usage_count: %d\n", page->usage_count); 912 kprintf("busy: %d\n", page->busy); 913 kprintf("busy_writing: %d\n", page->busy_writing); 914 kprintf("accessed: %d\n", page->accessed); 915 kprintf("modified: %d\n", page->modified); 916 #if DEBUG_PAGE_QUEUE 917 kprintf("queue: %p\n", page->queue); 918 #endif 919 #if DEBUG_PAGE_ACCESS 920 kprintf("accessor: %" B_PRId32 "\n", page->accessing_thread); 921 #endif 922 kprintf("area mappings:\n"); 923 924 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 925 vm_page_mapping *mapping; 926 while ((mapping = iterator.Next()) != NULL) { 927 kprintf(" %p (%" B_PRId32 ")\n", mapping->area, mapping->area->id); 928 mapping = mapping->page_link.next; 929 } 930 931 if (searchMappings) { 932 kprintf("all mappings:\n"); 933 VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst(); 934 while (addressSpace != NULL) { 935 size_t pageCount = addressSpace->Size() / B_PAGE_SIZE; 936 for (addr_t address = addressSpace->Base(); pageCount != 0; 937 address += B_PAGE_SIZE, pageCount--) { 938 phys_addr_t physicalAddress; 939 uint32 flags = 0; 940 if (addressSpace->TranslationMap()->QueryInterrupt(address, 941 &physicalAddress, &flags) == B_OK 942 && (flags & PAGE_PRESENT) != 0 943 && physicalAddress / B_PAGE_SIZE 944 == page->physical_page_number) { 945 VMArea* area = addressSpace->LookupArea(address); 946 kprintf(" aspace %" B_PRId32 ", area %" B_PRId32 ": %#" 947 B_PRIxADDR " (%c%c%s%s)\n", addressSpace->ID(), 948 area != NULL ? area->id : -1, address, 949 (flags & B_KERNEL_READ_AREA) != 0 ? 'r' : '-', 950 (flags & B_KERNEL_WRITE_AREA) != 0 ? 'w' : '-', 951 (flags & PAGE_MODIFIED) != 0 ? " modified" : "", 952 (flags & PAGE_ACCESSED) != 0 ? " accessed" : ""); 953 } 954 } 955 addressSpace = VMAddressSpace::DebugNext(addressSpace); 956 } 957 } 958 959 set_debug_variable("_cache", (addr_t)page->Cache()); 960 #if DEBUG_PAGE_ACCESS 961 set_debug_variable("_accessor", page->accessing_thread); 962 #endif 963 964 return 0; 965 } 966 967 968 static int 969 dump_page_queue(int argc, char **argv) 970 { 971 struct VMPageQueue *queue; 972 973 if (argc < 2) { 974 kprintf("usage: page_queue <address/name> [list]\n"); 975 return 0; 976 } 977 978 if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x') 979 queue = (VMPageQueue*)strtoul(argv[1], NULL, 16); 980 else if (!strcmp(argv[1], "free")) 981 queue = &sFreePageQueue; 982 else if (!strcmp(argv[1], "clear")) 983 queue = &sClearPageQueue; 984 else if (!strcmp(argv[1], "modified")) 985 queue = &sModifiedPageQueue; 986 else if (!strcmp(argv[1], "active")) 987 queue = &sActivePageQueue; 988 else if (!strcmp(argv[1], "inactive")) 989 queue = &sInactivePageQueue; 990 else if (!strcmp(argv[1], "cached")) 991 queue = &sCachedPageQueue; 992 else { 993 kprintf("page_queue: unknown queue \"%s\".\n", argv[1]); 994 return 0; 995 } 996 997 kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %" 998 B_PRIuPHYSADDR "\n", queue, queue->Head(), queue->Tail(), 999 queue->Count()); 1000 1001 if (argc == 3) { 1002 struct vm_page *page = queue->Head(); 1003 1004 kprintf("page cache type state wired usage\n"); 1005 for (page_num_t i = 0; page; i++, page = queue->Next(page)) { 1006 kprintf("%p %p %-7s %8s %5d %5d\n", page, page->Cache(), 1007 vm_cache_type_to_string(page->Cache()->type), 1008 page_state_to_string(page->State()), 1009 page->WiredCount(), page->usage_count); 1010 } 1011 } 1012 return 0; 1013 } 1014 1015 1016 static int 1017 dump_page_stats(int argc, char **argv) 1018 { 1019 page_num_t swappableModified = 0; 1020 page_num_t swappableModifiedInactive = 0; 1021 1022 size_t counter[8]; 1023 size_t busyCounter[8]; 1024 memset(counter, 0, sizeof(counter)); 1025 memset(busyCounter, 0, sizeof(busyCounter)); 1026 1027 struct page_run { 1028 page_num_t start; 1029 page_num_t end; 1030 1031 page_num_t Length() const { return end - start; } 1032 }; 1033 1034 page_run currentFreeRun = { 0, 0 }; 1035 page_run currentCachedRun = { 0, 0 }; 1036 page_run longestFreeRun = { 0, 0 }; 1037 page_run longestCachedRun = { 0, 0 }; 1038 1039 for (page_num_t i = 0; i < sNumPages; i++) { 1040 if (sPages[i].State() > 7) { 1041 panic("page %" B_PRIuPHYSADDR " at %p has invalid state!\n", i, 1042 &sPages[i]); 1043 } 1044 1045 uint32 pageState = sPages[i].State(); 1046 1047 counter[pageState]++; 1048 if (sPages[i].busy) 1049 busyCounter[pageState]++; 1050 1051 if (pageState == PAGE_STATE_MODIFIED 1052 && sPages[i].Cache() != NULL 1053 && sPages[i].Cache()->temporary && sPages[i].WiredCount() == 0) { 1054 swappableModified++; 1055 if (sPages[i].usage_count == 0) 1056 swappableModifiedInactive++; 1057 } 1058 1059 // track free and cached pages runs 1060 if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) { 1061 currentFreeRun.end = i + 1; 1062 currentCachedRun.end = i + 1; 1063 } else { 1064 if (currentFreeRun.Length() > longestFreeRun.Length()) 1065 longestFreeRun = currentFreeRun; 1066 currentFreeRun.start = currentFreeRun.end = i + 1; 1067 1068 if (pageState == PAGE_STATE_CACHED) { 1069 currentCachedRun.end = i + 1; 1070 } else { 1071 if (currentCachedRun.Length() > longestCachedRun.Length()) 1072 longestCachedRun = currentCachedRun; 1073 currentCachedRun.start = currentCachedRun.end = i + 1; 1074 } 1075 } 1076 } 1077 1078 kprintf("page stats:\n"); 1079 kprintf("total: %" B_PRIuPHYSADDR "\n", sNumPages); 1080 1081 kprintf("active: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1082 counter[PAGE_STATE_ACTIVE], busyCounter[PAGE_STATE_ACTIVE]); 1083 kprintf("inactive: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1084 counter[PAGE_STATE_INACTIVE], busyCounter[PAGE_STATE_INACTIVE]); 1085 kprintf("cached: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1086 counter[PAGE_STATE_CACHED], busyCounter[PAGE_STATE_CACHED]); 1087 kprintf("unused: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1088 counter[PAGE_STATE_UNUSED], busyCounter[PAGE_STATE_UNUSED]); 1089 kprintf("wired: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1090 counter[PAGE_STATE_WIRED], busyCounter[PAGE_STATE_WIRED]); 1091 kprintf("modified: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1092 counter[PAGE_STATE_MODIFIED], busyCounter[PAGE_STATE_MODIFIED]); 1093 kprintf("free: %" B_PRIuSIZE "\n", counter[PAGE_STATE_FREE]); 1094 kprintf("clear: %" B_PRIuSIZE "\n", counter[PAGE_STATE_CLEAR]); 1095 1096 kprintf("unreserved free pages: %" B_PRId32 "\n", sUnreservedFreePages); 1097 kprintf("unsatisfied page reservations: %" B_PRId32 "\n", 1098 sUnsatisfiedPageReservations); 1099 kprintf("mapped pages: %" B_PRId32 "\n", gMappedPagesCount); 1100 kprintf("longest free pages run: %" B_PRIuPHYSADDR " pages (at %" 1101 B_PRIuPHYSADDR ")\n", longestFreeRun.Length(), 1102 sPages[longestFreeRun.start].physical_page_number); 1103 kprintf("longest free/cached pages run: %" B_PRIuPHYSADDR " pages (at %" 1104 B_PRIuPHYSADDR ")\n", longestCachedRun.Length(), 1105 sPages[longestCachedRun.start].physical_page_number); 1106 1107 kprintf("waiting threads:\n"); 1108 for (PageReservationWaiterList::Iterator it 1109 = sPageReservationWaiters.GetIterator(); 1110 PageReservationWaiter* waiter = it.Next();) { 1111 kprintf(" %6" B_PRId32 ": missing: %6" B_PRIu32 1112 ", don't touch: %6" B_PRIu32 "\n", waiter->thread->id, 1113 waiter->missing, waiter->dontTouch); 1114 } 1115 1116 kprintf("\nfree queue: %p, count = %" B_PRIuPHYSADDR "\n", &sFreePageQueue, 1117 sFreePageQueue.Count()); 1118 kprintf("clear queue: %p, count = %" B_PRIuPHYSADDR "\n", &sClearPageQueue, 1119 sClearPageQueue.Count()); 1120 kprintf("modified queue: %p, count = %" B_PRIuPHYSADDR " (%" B_PRId32 1121 " temporary, %" B_PRIuPHYSADDR " swappable, " "inactive: %" 1122 B_PRIuPHYSADDR ")\n", &sModifiedPageQueue, sModifiedPageQueue.Count(), 1123 sModifiedTemporaryPages, swappableModified, swappableModifiedInactive); 1124 kprintf("active queue: %p, count = %" B_PRIuPHYSADDR "\n", 1125 &sActivePageQueue, sActivePageQueue.Count()); 1126 kprintf("inactive queue: %p, count = %" B_PRIuPHYSADDR "\n", 1127 &sInactivePageQueue, sInactivePageQueue.Count()); 1128 kprintf("cached queue: %p, count = %" B_PRIuPHYSADDR "\n", 1129 &sCachedPageQueue, sCachedPageQueue.Count()); 1130 return 0; 1131 } 1132 1133 1134 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 1135 1136 static caller_info* 1137 get_caller_info(addr_t caller) 1138 { 1139 // find the caller info 1140 for (int32 i = 0; i < sCallerInfoCount; i++) { 1141 if (caller == sCallerInfoTable[i].caller) 1142 return &sCallerInfoTable[i]; 1143 } 1144 1145 // not found, add a new entry, if there are free slots 1146 if (sCallerInfoCount >= kCallerInfoTableSize) 1147 return NULL; 1148 1149 caller_info* info = &sCallerInfoTable[sCallerInfoCount++]; 1150 info->caller = caller; 1151 info->count = 0; 1152 1153 return info; 1154 } 1155 1156 1157 static int 1158 caller_info_compare_count(const void* _a, const void* _b) 1159 { 1160 const caller_info* a = (const caller_info*)_a; 1161 const caller_info* b = (const caller_info*)_b; 1162 return (int)(b->count - a->count); 1163 } 1164 1165 1166 static int 1167 dump_page_allocations_per_caller(int argc, char** argv) 1168 { 1169 bool resetAllocationInfos = false; 1170 bool printDetails = false; 1171 addr_t caller = 0; 1172 1173 for (int32 i = 1; i < argc; i++) { 1174 if (strcmp(argv[i], "-d") == 0) { 1175 uint64 callerAddress; 1176 if (++i >= argc 1177 || !evaluate_debug_expression(argv[i], &callerAddress, true)) { 1178 print_debugger_command_usage(argv[0]); 1179 return 0; 1180 } 1181 1182 caller = callerAddress; 1183 printDetails = true; 1184 } else if (strcmp(argv[i], "-r") == 0) { 1185 resetAllocationInfos = true; 1186 } else { 1187 print_debugger_command_usage(argv[0]); 1188 return 0; 1189 } 1190 } 1191 1192 sCallerInfoCount = 0; 1193 1194 AllocationCollectorCallback collectorCallback(resetAllocationInfos); 1195 AllocationDetailPrinterCallback detailsCallback(caller); 1196 AllocationTrackingCallback& callback = printDetails 1197 ? (AllocationTrackingCallback&)detailsCallback 1198 : (AllocationTrackingCallback&)collectorCallback; 1199 1200 for (page_num_t i = 0; i < sNumPages; i++) 1201 callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i); 1202 1203 if (printDetails) 1204 return 0; 1205 1206 // sort the array 1207 qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info), 1208 &caller_info_compare_count); 1209 1210 kprintf("%" B_PRId32 " different callers\n\n", sCallerInfoCount); 1211 1212 size_t totalAllocationCount = 0; 1213 1214 kprintf(" count caller\n"); 1215 kprintf("----------------------------------\n"); 1216 for (int32 i = 0; i < sCallerInfoCount; i++) { 1217 caller_info& info = sCallerInfoTable[i]; 1218 kprintf("%10" B_PRIuSIZE " %p", info.count, (void*)info.caller); 1219 1220 const char* symbol; 1221 const char* imageName; 1222 bool exactMatch; 1223 addr_t baseAddress; 1224 1225 if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol, 1226 &imageName, &exactMatch) == B_OK) { 1227 kprintf(" %s + %#" B_PRIxADDR " (%s)%s\n", symbol, 1228 info.caller - baseAddress, imageName, 1229 exactMatch ? "" : " (nearest)"); 1230 } else 1231 kprintf("\n"); 1232 1233 totalAllocationCount += info.count; 1234 } 1235 1236 kprintf("\ntotal page allocations: %" B_PRIuSIZE "\n", 1237 totalAllocationCount); 1238 1239 return 0; 1240 } 1241 1242 1243 static int 1244 dump_page_allocation_infos(int argc, char** argv) 1245 { 1246 page_num_t pageFilter = 0; 1247 team_id teamFilter = -1; 1248 thread_id threadFilter = -1; 1249 bool printStackTraces = false; 1250 1251 for (int32 i = 1; i < argc; i++) { 1252 if (strcmp(argv[i], "--stacktrace") == 0) 1253 printStackTraces = true; 1254 else if (strcmp(argv[i], "-p") == 0) { 1255 uint64 pageNumber; 1256 if (++i >= argc 1257 || !evaluate_debug_expression(argv[i], &pageNumber, true)) { 1258 print_debugger_command_usage(argv[0]); 1259 return 0; 1260 } 1261 1262 pageFilter = pageNumber; 1263 } else if (strcmp(argv[i], "--team") == 0) { 1264 uint64 team; 1265 if (++i >= argc 1266 || !evaluate_debug_expression(argv[i], &team, true)) { 1267 print_debugger_command_usage(argv[0]); 1268 return 0; 1269 } 1270 1271 teamFilter = team; 1272 } else if (strcmp(argv[i], "--thread") == 0) { 1273 uint64 thread; 1274 if (++i >= argc 1275 || !evaluate_debug_expression(argv[i], &thread, true)) { 1276 print_debugger_command_usage(argv[0]); 1277 return 0; 1278 } 1279 1280 threadFilter = thread; 1281 } else { 1282 print_debugger_command_usage(argv[0]); 1283 return 0; 1284 } 1285 } 1286 1287 AllocationInfoPrinterCallback callback(printStackTraces, pageFilter, 1288 teamFilter, threadFilter); 1289 1290 for (page_num_t i = 0; i < sNumPages; i++) 1291 callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i); 1292 1293 return 0; 1294 } 1295 1296 #endif // VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 1297 1298 1299 #ifdef TRACK_PAGE_USAGE_STATS 1300 1301 static void 1302 track_page_usage(vm_page* page) 1303 { 1304 if (page->WiredCount() == 0) { 1305 sNextPageUsage[(int32)page->usage_count + 128]++; 1306 sNextPageUsagePageCount++; 1307 } 1308 } 1309 1310 1311 static void 1312 update_page_usage_stats() 1313 { 1314 std::swap(sPageUsage, sNextPageUsage); 1315 sPageUsagePageCount = sNextPageUsagePageCount; 1316 1317 memset(sNextPageUsage, 0, sizeof(page_num_t) * 256); 1318 sNextPageUsagePageCount = 0; 1319 1320 // compute average 1321 if (sPageUsagePageCount > 0) { 1322 int64 sum = 0; 1323 for (int32 i = 0; i < 256; i++) 1324 sum += (int64)sPageUsage[i] * (i - 128); 1325 1326 TRACE_DAEMON("average page usage: %f (%lu pages)\n", 1327 (float)sum / sPageUsagePageCount, sPageUsagePageCount); 1328 } 1329 } 1330 1331 1332 static int 1333 dump_page_usage_stats(int argc, char** argv) 1334 { 1335 kprintf("distribution of page usage counts (%lu pages):", 1336 sPageUsagePageCount); 1337 1338 int64 sum = 0; 1339 for (int32 i = 0; i < 256; i++) { 1340 if (i % 8 == 0) 1341 kprintf("\n%4ld:", i - 128); 1342 1343 int64 count = sPageUsage[i]; 1344 sum += count * (i - 128); 1345 1346 kprintf(" %9llu", count); 1347 } 1348 1349 kprintf("\n\n"); 1350 1351 kprintf("average usage count: %f\n", 1352 sPageUsagePageCount > 0 ? (float)sum / sPageUsagePageCount : 0); 1353 1354 return 0; 1355 } 1356 1357 #endif // TRACK_PAGE_USAGE_STATS 1358 1359 1360 // #pragma mark - vm_page 1361 1362 1363 inline void 1364 vm_page::InitState(uint8 newState) 1365 { 1366 state = newState; 1367 } 1368 1369 1370 inline void 1371 vm_page::SetState(uint8 newState) 1372 { 1373 TPS(SetPageState(this, newState)); 1374 1375 state = newState; 1376 } 1377 1378 1379 // #pragma mark - 1380 1381 1382 static void 1383 get_page_stats(page_stats& _pageStats) 1384 { 1385 _pageStats.totalFreePages = sUnreservedFreePages; 1386 _pageStats.cachedPages = sCachedPageQueue.Count(); 1387 _pageStats.unsatisfiedReservations = sUnsatisfiedPageReservations; 1388 // TODO: We don't get an actual snapshot here! 1389 } 1390 1391 1392 static bool 1393 do_active_paging(const page_stats& pageStats) 1394 { 1395 return pageStats.totalFreePages + pageStats.cachedPages 1396 < pageStats.unsatisfiedReservations 1397 + (int32)sFreeOrCachedPagesTarget; 1398 } 1399 1400 1401 /*! Reserves as many pages as possible from \c sUnreservedFreePages up to 1402 \a count. Doesn't touch the last \a dontTouch pages of 1403 \c sUnreservedFreePages, though. 1404 \return The number of actually reserved pages. 1405 */ 1406 static uint32 1407 reserve_some_pages(uint32 count, uint32 dontTouch) 1408 { 1409 while (true) { 1410 int32 freePages = atomic_get(&sUnreservedFreePages); 1411 if (freePages <= (int32)dontTouch) 1412 return 0; 1413 1414 int32 toReserve = std::min(count, freePages - dontTouch); 1415 if (atomic_test_and_set(&sUnreservedFreePages, 1416 freePages - toReserve, freePages) 1417 == freePages) { 1418 return toReserve; 1419 } 1420 1421 // the count changed in the meantime -- retry 1422 } 1423 } 1424 1425 1426 static void 1427 wake_up_page_reservation_waiters() 1428 { 1429 MutexLocker pageDeficitLocker(sPageDeficitLock); 1430 1431 // TODO: If this is a low priority thread, we might want to disable 1432 // interrupts or otherwise ensure that we aren't unscheduled. Otherwise 1433 // high priority threads wait be kept waiting while a medium priority thread 1434 // prevents us from running. 1435 1436 while (PageReservationWaiter* waiter = sPageReservationWaiters.Head()) { 1437 int32 reserved = reserve_some_pages(waiter->missing, 1438 waiter->dontTouch); 1439 if (reserved == 0) 1440 return; 1441 1442 atomic_add(&sUnsatisfiedPageReservations, -reserved); 1443 waiter->missing -= reserved; 1444 1445 if (waiter->missing > 0) 1446 return; 1447 1448 sPageReservationWaiters.Remove(waiter); 1449 1450 thread_unblock(waiter->thread, B_OK); 1451 } 1452 } 1453 1454 1455 static inline void 1456 unreserve_pages(uint32 count) 1457 { 1458 atomic_add(&sUnreservedFreePages, count); 1459 if (atomic_get(&sUnsatisfiedPageReservations) != 0) 1460 wake_up_page_reservation_waiters(); 1461 } 1462 1463 1464 static void 1465 free_page(vm_page* page, bool clear) 1466 { 1467 DEBUG_PAGE_ACCESS_CHECK(page); 1468 1469 PAGE_ASSERT(page, !page->IsMapped()); 1470 1471 VMPageQueue* fromQueue; 1472 1473 switch (page->State()) { 1474 case PAGE_STATE_ACTIVE: 1475 fromQueue = &sActivePageQueue; 1476 break; 1477 case PAGE_STATE_INACTIVE: 1478 fromQueue = &sInactivePageQueue; 1479 break; 1480 case PAGE_STATE_MODIFIED: 1481 fromQueue = &sModifiedPageQueue; 1482 break; 1483 case PAGE_STATE_CACHED: 1484 fromQueue = &sCachedPageQueue; 1485 break; 1486 case PAGE_STATE_FREE: 1487 case PAGE_STATE_CLEAR: 1488 panic("free_page(): page %p already free", page); 1489 return; 1490 case PAGE_STATE_WIRED: 1491 case PAGE_STATE_UNUSED: 1492 fromQueue = NULL; 1493 break; 1494 default: 1495 panic("free_page(): page %p in invalid state %d", 1496 page, page->State()); 1497 return; 1498 } 1499 1500 if (page->CacheRef() != NULL) 1501 panic("to be freed page %p has cache", page); 1502 if (page->IsMapped()) 1503 panic("to be freed page %p has mappings", page); 1504 1505 if (fromQueue != NULL) 1506 fromQueue->RemoveUnlocked(page); 1507 1508 TA(FreePage(page->physical_page_number)); 1509 1510 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 1511 page->allocation_tracking_info.Clear(); 1512 #endif 1513 1514 ReadLocker locker(sFreePageQueuesLock); 1515 1516 DEBUG_PAGE_ACCESS_END(page); 1517 1518 if (clear) { 1519 page->SetState(PAGE_STATE_CLEAR); 1520 sClearPageQueue.PrependUnlocked(page); 1521 } else { 1522 page->SetState(PAGE_STATE_FREE); 1523 sFreePageQueue.PrependUnlocked(page); 1524 } 1525 1526 locker.Unlock(); 1527 } 1528 1529 1530 /*! The caller must make sure that no-one else tries to change the page's state 1531 while the function is called. If the page has a cache, this can be done by 1532 locking the cache. 1533 */ 1534 static void 1535 set_page_state(vm_page *page, int pageState) 1536 { 1537 DEBUG_PAGE_ACCESS_CHECK(page); 1538 1539 if (pageState == page->State()) 1540 return; 1541 1542 VMPageQueue* fromQueue; 1543 1544 switch (page->State()) { 1545 case PAGE_STATE_ACTIVE: 1546 fromQueue = &sActivePageQueue; 1547 break; 1548 case PAGE_STATE_INACTIVE: 1549 fromQueue = &sInactivePageQueue; 1550 break; 1551 case PAGE_STATE_MODIFIED: 1552 fromQueue = &sModifiedPageQueue; 1553 break; 1554 case PAGE_STATE_CACHED: 1555 fromQueue = &sCachedPageQueue; 1556 break; 1557 case PAGE_STATE_FREE: 1558 case PAGE_STATE_CLEAR: 1559 panic("set_page_state(): page %p is free/clear", page); 1560 return; 1561 case PAGE_STATE_WIRED: 1562 case PAGE_STATE_UNUSED: 1563 fromQueue = NULL; 1564 break; 1565 default: 1566 panic("set_page_state(): page %p in invalid state %d", 1567 page, page->State()); 1568 return; 1569 } 1570 1571 VMPageQueue* toQueue; 1572 1573 switch (pageState) { 1574 case PAGE_STATE_ACTIVE: 1575 toQueue = &sActivePageQueue; 1576 break; 1577 case PAGE_STATE_INACTIVE: 1578 toQueue = &sInactivePageQueue; 1579 break; 1580 case PAGE_STATE_MODIFIED: 1581 toQueue = &sModifiedPageQueue; 1582 break; 1583 case PAGE_STATE_CACHED: 1584 PAGE_ASSERT(page, !page->IsMapped()); 1585 PAGE_ASSERT(page, !page->modified); 1586 toQueue = &sCachedPageQueue; 1587 break; 1588 case PAGE_STATE_FREE: 1589 case PAGE_STATE_CLEAR: 1590 panic("set_page_state(): target state is free/clear"); 1591 return; 1592 case PAGE_STATE_WIRED: 1593 case PAGE_STATE_UNUSED: 1594 toQueue = NULL; 1595 break; 1596 default: 1597 panic("set_page_state(): invalid target state %d", pageState); 1598 return; 1599 } 1600 1601 VMCache* cache = page->Cache(); 1602 if (cache != NULL && cache->temporary) { 1603 if (pageState == PAGE_STATE_MODIFIED) 1604 atomic_add(&sModifiedTemporaryPages, 1); 1605 else if (page->State() == PAGE_STATE_MODIFIED) 1606 atomic_add(&sModifiedTemporaryPages, -1); 1607 } 1608 1609 // move the page 1610 if (toQueue == fromQueue) { 1611 // Note: Theoretically we are required to lock when changing the page 1612 // state, even if we don't change the queue. We actually don't have to 1613 // do this, though, since only for the active queue there are different 1614 // page states and active pages have a cache that must be locked at 1615 // this point. So we rely on the fact that everyone must lock the cache 1616 // before trying to change/interpret the page state. 1617 PAGE_ASSERT(page, cache != NULL); 1618 cache->AssertLocked(); 1619 page->SetState(pageState); 1620 } else { 1621 if (fromQueue != NULL) 1622 fromQueue->RemoveUnlocked(page); 1623 1624 page->SetState(pageState); 1625 1626 if (toQueue != NULL) 1627 toQueue->AppendUnlocked(page); 1628 } 1629 } 1630 1631 1632 /*! Moves a previously modified page into a now appropriate queue. 1633 The page queues must not be locked. 1634 */ 1635 static void 1636 move_page_to_appropriate_queue(vm_page *page) 1637 { 1638 DEBUG_PAGE_ACCESS_CHECK(page); 1639 1640 // Note, this logic must be in sync with what the page daemon does. 1641 int32 state; 1642 if (page->IsMapped()) 1643 state = PAGE_STATE_ACTIVE; 1644 else if (page->modified) 1645 state = PAGE_STATE_MODIFIED; 1646 else 1647 state = PAGE_STATE_CACHED; 1648 1649 // TODO: If free + cached pages are low, we might directly want to free the 1650 // page. 1651 set_page_state(page, state); 1652 } 1653 1654 1655 static void 1656 clear_page(struct vm_page *page) 1657 { 1658 vm_memset_physical(page->physical_page_number << PAGE_SHIFT, 0, 1659 B_PAGE_SIZE); 1660 } 1661 1662 1663 static status_t 1664 mark_page_range_in_use(page_num_t startPage, page_num_t length, bool wired) 1665 { 1666 TRACE(("mark_page_range_in_use: start %#" B_PRIxPHYSADDR ", len %#" 1667 B_PRIxPHYSADDR "\n", startPage, length)); 1668 1669 if (sPhysicalPageOffset > startPage) { 1670 dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR 1671 "): start page is before free list\n", startPage, length); 1672 if (sPhysicalPageOffset - startPage >= length) 1673 return B_OK; 1674 length -= sPhysicalPageOffset - startPage; 1675 startPage = sPhysicalPageOffset; 1676 } 1677 1678 startPage -= sPhysicalPageOffset; 1679 1680 if (startPage + length > sNumPages) { 1681 dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR 1682 "): range would extend past free list\n", startPage, length); 1683 if (startPage >= sNumPages) 1684 return B_OK; 1685 length = sNumPages - startPage; 1686 } 1687 1688 WriteLocker locker(sFreePageQueuesLock); 1689 1690 for (page_num_t i = 0; i < length; i++) { 1691 vm_page *page = &sPages[startPage + i]; 1692 switch (page->State()) { 1693 case PAGE_STATE_FREE: 1694 case PAGE_STATE_CLEAR: 1695 { 1696 // TODO: This violates the page reservation policy, since we remove pages from 1697 // the free/clear queues without having reserved them before. This should happen 1698 // in the early boot process only, though. 1699 DEBUG_PAGE_ACCESS_START(page); 1700 VMPageQueue& queue = page->State() == PAGE_STATE_FREE 1701 ? sFreePageQueue : sClearPageQueue; 1702 queue.Remove(page); 1703 page->SetState(wired ? PAGE_STATE_WIRED : PAGE_STATE_UNUSED); 1704 page->busy = false; 1705 atomic_add(&sUnreservedFreePages, -1); 1706 DEBUG_PAGE_ACCESS_END(page); 1707 break; 1708 } 1709 case PAGE_STATE_WIRED: 1710 case PAGE_STATE_UNUSED: 1711 break; 1712 case PAGE_STATE_ACTIVE: 1713 case PAGE_STATE_INACTIVE: 1714 case PAGE_STATE_MODIFIED: 1715 case PAGE_STATE_CACHED: 1716 default: 1717 // uh 1718 dprintf("mark_page_range_in_use: page %#" B_PRIxPHYSADDR 1719 " in non-free state %d!\n", startPage + i, page->State()); 1720 break; 1721 } 1722 } 1723 1724 return B_OK; 1725 } 1726 1727 1728 /*! 1729 This is a background thread that wakes up every now and then (every 100ms) 1730 and moves some pages from the free queue over to the clear queue. 1731 Given enough time, it will clear out all pages from the free queue - we 1732 could probably slow it down after having reached a certain threshold. 1733 */ 1734 static int32 1735 page_scrubber(void *unused) 1736 { 1737 (void)(unused); 1738 1739 TRACE(("page_scrubber starting...\n")); 1740 1741 for (;;) { 1742 snooze(100000); // 100ms 1743 1744 if (sFreePageQueue.Count() == 0 1745 || atomic_get(&sUnreservedFreePages) 1746 < (int32)sFreePagesTarget) { 1747 continue; 1748 } 1749 1750 // Since we temporarily remove pages from the free pages reserve, 1751 // we must make sure we don't cause a violation of the page 1752 // reservation warranty. The following is usually stricter than 1753 // necessary, because we don't have information on how many of the 1754 // reserved pages have already been allocated. 1755 int32 reserved = reserve_some_pages(SCRUB_SIZE, 1756 kPageReserveForPriority[VM_PRIORITY_USER]); 1757 if (reserved == 0) 1758 continue; 1759 1760 // get some pages from the free queue 1761 ReadLocker locker(sFreePageQueuesLock); 1762 1763 vm_page *page[SCRUB_SIZE]; 1764 int32 scrubCount = 0; 1765 for (int32 i = 0; i < reserved; i++) { 1766 page[i] = sFreePageQueue.RemoveHeadUnlocked(); 1767 if (page[i] == NULL) 1768 break; 1769 1770 DEBUG_PAGE_ACCESS_START(page[i]); 1771 1772 page[i]->SetState(PAGE_STATE_ACTIVE); 1773 page[i]->busy = true; 1774 scrubCount++; 1775 } 1776 1777 locker.Unlock(); 1778 1779 if (scrubCount == 0) { 1780 unreserve_pages(reserved); 1781 continue; 1782 } 1783 1784 TA(ScrubbingPages(scrubCount)); 1785 1786 // clear them 1787 for (int32 i = 0; i < scrubCount; i++) 1788 clear_page(page[i]); 1789 1790 locker.Lock(); 1791 1792 // and put them into the clear queue 1793 for (int32 i = 0; i < scrubCount; i++) { 1794 page[i]->SetState(PAGE_STATE_CLEAR); 1795 page[i]->busy = false; 1796 DEBUG_PAGE_ACCESS_END(page[i]); 1797 sClearPageQueue.PrependUnlocked(page[i]); 1798 } 1799 1800 locker.Unlock(); 1801 1802 unreserve_pages(reserved); 1803 1804 TA(ScrubbedPages(scrubCount)); 1805 } 1806 1807 return 0; 1808 } 1809 1810 1811 static void 1812 init_page_marker(vm_page &marker) 1813 { 1814 marker.SetCacheRef(NULL); 1815 marker.InitState(PAGE_STATE_UNUSED); 1816 marker.busy = true; 1817 #if DEBUG_PAGE_QUEUE 1818 marker.queue = NULL; 1819 #endif 1820 #if DEBUG_PAGE_ACCESS 1821 marker.accessing_thread = thread_get_current_thread_id(); 1822 #endif 1823 } 1824 1825 1826 static void 1827 remove_page_marker(struct vm_page &marker) 1828 { 1829 DEBUG_PAGE_ACCESS_CHECK(&marker); 1830 1831 if (marker.State() < PAGE_STATE_FIRST_UNQUEUED) 1832 sPageQueues[marker.State()].RemoveUnlocked(&marker); 1833 1834 marker.SetState(PAGE_STATE_UNUSED); 1835 } 1836 1837 1838 static vm_page* 1839 next_modified_page(page_num_t& maxPagesToSee) 1840 { 1841 InterruptsSpinLocker locker(sModifiedPageQueue.GetLock()); 1842 1843 while (maxPagesToSee > 0) { 1844 vm_page* page = sModifiedPageQueue.Head(); 1845 if (page == NULL) 1846 return NULL; 1847 1848 sModifiedPageQueue.Requeue(page, true); 1849 1850 maxPagesToSee--; 1851 1852 if (!page->busy) 1853 return page; 1854 } 1855 1856 return NULL; 1857 } 1858 1859 1860 // #pragma mark - 1861 1862 1863 class PageWriteTransfer; 1864 class PageWriteWrapper; 1865 1866 1867 class PageWriterRun { 1868 public: 1869 status_t Init(uint32 maxPages); 1870 1871 void PrepareNextRun(); 1872 void AddPage(vm_page* page); 1873 uint32 Go(); 1874 1875 void PageWritten(PageWriteTransfer* transfer, status_t status, 1876 bool partialTransfer, size_t bytesTransferred); 1877 1878 private: 1879 uint32 fMaxPages; 1880 uint32 fWrapperCount; 1881 uint32 fTransferCount; 1882 int32 fPendingTransfers; 1883 PageWriteWrapper* fWrappers; 1884 PageWriteTransfer* fTransfers; 1885 ConditionVariable fAllFinishedCondition; 1886 }; 1887 1888 1889 class PageWriteTransfer : public AsyncIOCallback { 1890 public: 1891 void SetTo(PageWriterRun* run, vm_page* page, int32 maxPages); 1892 bool AddPage(vm_page* page); 1893 1894 status_t Schedule(uint32 flags); 1895 1896 void SetStatus(status_t status, size_t transferred); 1897 1898 status_t Status() const { return fStatus; } 1899 struct VMCache* Cache() const { return fCache; } 1900 uint32 PageCount() const { return fPageCount; } 1901 1902 virtual void IOFinished(status_t status, bool partialTransfer, 1903 generic_size_t bytesTransferred); 1904 private: 1905 PageWriterRun* fRun; 1906 struct VMCache* fCache; 1907 off_t fOffset; 1908 uint32 fPageCount; 1909 int32 fMaxPages; 1910 status_t fStatus; 1911 uint32 fVecCount; 1912 generic_io_vec fVecs[32]; // TODO: make dynamic/configurable 1913 }; 1914 1915 1916 class PageWriteWrapper { 1917 public: 1918 PageWriteWrapper(); 1919 ~PageWriteWrapper(); 1920 void SetTo(vm_page* page); 1921 bool Done(status_t result); 1922 1923 private: 1924 vm_page* fPage; 1925 struct VMCache* fCache; 1926 bool fIsActive; 1927 }; 1928 1929 1930 PageWriteWrapper::PageWriteWrapper() 1931 : 1932 fIsActive(false) 1933 { 1934 } 1935 1936 1937 PageWriteWrapper::~PageWriteWrapper() 1938 { 1939 if (fIsActive) 1940 panic("page write wrapper going out of scope but isn't completed"); 1941 } 1942 1943 1944 /*! The page's cache must be locked. 1945 */ 1946 void 1947 PageWriteWrapper::SetTo(vm_page* page) 1948 { 1949 DEBUG_PAGE_ACCESS_CHECK(page); 1950 1951 if (page->busy) 1952 panic("setting page write wrapper to busy page"); 1953 1954 if (fIsActive) 1955 panic("re-setting page write wrapper that isn't completed"); 1956 1957 fPage = page; 1958 fCache = page->Cache(); 1959 fIsActive = true; 1960 1961 fPage->busy = true; 1962 fPage->busy_writing = true; 1963 1964 // We have a modified page -- however, while we're writing it back, 1965 // the page might still be mapped. In order not to lose any changes to the 1966 // page, we mark it clean before actually writing it back; if 1967 // writing the page fails for some reason, we'll just keep it in the 1968 // modified page list, but that should happen only rarely. 1969 1970 // If the page is changed after we cleared the dirty flag, but before we 1971 // had the chance to write it back, then we'll write it again later -- that 1972 // will probably not happen that often, though. 1973 1974 vm_clear_map_flags(fPage, PAGE_MODIFIED); 1975 } 1976 1977 1978 /*! The page's cache must be locked. 1979 The page queues must not be locked. 1980 \return \c true if the page was written successfully respectively could be 1981 handled somehow, \c false otherwise. 1982 */ 1983 bool 1984 PageWriteWrapper::Done(status_t result) 1985 { 1986 if (!fIsActive) 1987 panic("completing page write wrapper that is not active"); 1988 1989 DEBUG_PAGE_ACCESS_START(fPage); 1990 1991 fPage->busy = false; 1992 // Set unbusy and notify later by hand, since we might free the page. 1993 1994 bool success = true; 1995 1996 if (result == B_OK) { 1997 // put it into the active/inactive queue 1998 move_page_to_appropriate_queue(fPage); 1999 fPage->busy_writing = false; 2000 DEBUG_PAGE_ACCESS_END(fPage); 2001 } else { 2002 // Writing the page failed. One reason would be that the cache has been 2003 // shrunk and the page does no longer belong to the file. Otherwise the 2004 // actual I/O failed, in which case we'll simply keep the page modified. 2005 2006 if (!fPage->busy_writing) { 2007 // The busy_writing flag was cleared. That means the cache has been 2008 // shrunk while we were trying to write the page and we have to free 2009 // it now. 2010 vm_remove_all_page_mappings(fPage); 2011 // TODO: Unmapping should already happen when resizing the cache! 2012 fCache->RemovePage(fPage); 2013 free_page(fPage, false); 2014 unreserve_pages(1); 2015 } else { 2016 // Writing the page failed -- mark the page modified and move it to 2017 // an appropriate queue other than the modified queue, so we don't 2018 // keep trying to write it over and over again. We keep 2019 // non-temporary pages in the modified queue, though, so they don't 2020 // get lost in the inactive queue. 2021 dprintf("PageWriteWrapper: Failed to write page %p: %s\n", fPage, 2022 strerror(result)); 2023 2024 fPage->modified = true; 2025 if (!fCache->temporary) 2026 set_page_state(fPage, PAGE_STATE_MODIFIED); 2027 else if (fPage->IsMapped()) 2028 set_page_state(fPage, PAGE_STATE_ACTIVE); 2029 else 2030 set_page_state(fPage, PAGE_STATE_INACTIVE); 2031 2032 fPage->busy_writing = false; 2033 DEBUG_PAGE_ACCESS_END(fPage); 2034 2035 success = false; 2036 } 2037 } 2038 2039 fCache->NotifyPageEvents(fPage, PAGE_EVENT_NOT_BUSY); 2040 fIsActive = false; 2041 2042 return success; 2043 } 2044 2045 2046 /*! The page's cache must be locked. 2047 */ 2048 void 2049 PageWriteTransfer::SetTo(PageWriterRun* run, vm_page* page, int32 maxPages) 2050 { 2051 fRun = run; 2052 fCache = page->Cache(); 2053 fOffset = page->cache_offset; 2054 fPageCount = 1; 2055 fMaxPages = maxPages; 2056 fStatus = B_OK; 2057 2058 fVecs[0].base = (phys_addr_t)page->physical_page_number << PAGE_SHIFT; 2059 fVecs[0].length = B_PAGE_SIZE; 2060 fVecCount = 1; 2061 } 2062 2063 2064 /*! The page's cache must be locked. 2065 */ 2066 bool 2067 PageWriteTransfer::AddPage(vm_page* page) 2068 { 2069 if (page->Cache() != fCache 2070 || (fMaxPages >= 0 && fPageCount >= (uint32)fMaxPages)) 2071 return false; 2072 2073 phys_addr_t nextBase = fVecs[fVecCount - 1].base 2074 + fVecs[fVecCount - 1].length; 2075 2076 if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase 2077 && (off_t)page->cache_offset == fOffset + fPageCount) { 2078 // append to last iovec 2079 fVecs[fVecCount - 1].length += B_PAGE_SIZE; 2080 fPageCount++; 2081 return true; 2082 } 2083 2084 nextBase = fVecs[0].base - B_PAGE_SIZE; 2085 if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase 2086 && (off_t)page->cache_offset == fOffset - 1) { 2087 // prepend to first iovec and adjust offset 2088 fVecs[0].base = nextBase; 2089 fVecs[0].length += B_PAGE_SIZE; 2090 fOffset = page->cache_offset; 2091 fPageCount++; 2092 return true; 2093 } 2094 2095 if (((off_t)page->cache_offset == fOffset + fPageCount 2096 || (off_t)page->cache_offset == fOffset - 1) 2097 && fVecCount < sizeof(fVecs) / sizeof(fVecs[0])) { 2098 // not physically contiguous or not in the right order 2099 uint32 vectorIndex; 2100 if ((off_t)page->cache_offset < fOffset) { 2101 // we are pre-pending another vector, move the other vecs 2102 for (uint32 i = fVecCount; i > 0; i--) 2103 fVecs[i] = fVecs[i - 1]; 2104 2105 fOffset = page->cache_offset; 2106 vectorIndex = 0; 2107 } else 2108 vectorIndex = fVecCount; 2109 2110 fVecs[vectorIndex].base 2111 = (phys_addr_t)page->physical_page_number << PAGE_SHIFT; 2112 fVecs[vectorIndex].length = B_PAGE_SIZE; 2113 2114 fVecCount++; 2115 fPageCount++; 2116 return true; 2117 } 2118 2119 return false; 2120 } 2121 2122 2123 status_t 2124 PageWriteTransfer::Schedule(uint32 flags) 2125 { 2126 off_t writeOffset = (off_t)fOffset << PAGE_SHIFT; 2127 generic_size_t writeLength = (phys_size_t)fPageCount << PAGE_SHIFT; 2128 2129 if (fRun != NULL) { 2130 return fCache->WriteAsync(writeOffset, fVecs, fVecCount, writeLength, 2131 flags | B_PHYSICAL_IO_REQUEST, this); 2132 } 2133 2134 status_t status = fCache->Write(writeOffset, fVecs, fVecCount, 2135 flags | B_PHYSICAL_IO_REQUEST, &writeLength); 2136 2137 SetStatus(status, writeLength); 2138 return fStatus; 2139 } 2140 2141 2142 void 2143 PageWriteTransfer::SetStatus(status_t status, size_t transferred) 2144 { 2145 // only succeed if all pages up to the last one have been written fully 2146 // and the last page has at least been written partially 2147 if (status == B_OK && transferred <= (fPageCount - 1) * B_PAGE_SIZE) 2148 status = B_ERROR; 2149 2150 fStatus = status; 2151 } 2152 2153 2154 void 2155 PageWriteTransfer::IOFinished(status_t status, bool partialTransfer, 2156 generic_size_t bytesTransferred) 2157 { 2158 SetStatus(status, bytesTransferred); 2159 fRun->PageWritten(this, fStatus, partialTransfer, bytesTransferred); 2160 } 2161 2162 2163 status_t 2164 PageWriterRun::Init(uint32 maxPages) 2165 { 2166 fMaxPages = maxPages; 2167 fWrapperCount = 0; 2168 fTransferCount = 0; 2169 fPendingTransfers = 0; 2170 2171 fWrappers = new(std::nothrow) PageWriteWrapper[maxPages]; 2172 fTransfers = new(std::nothrow) PageWriteTransfer[maxPages]; 2173 if (fWrappers == NULL || fTransfers == NULL) 2174 return B_NO_MEMORY; 2175 2176 return B_OK; 2177 } 2178 2179 2180 void 2181 PageWriterRun::PrepareNextRun() 2182 { 2183 fWrapperCount = 0; 2184 fTransferCount = 0; 2185 fPendingTransfers = 0; 2186 } 2187 2188 2189 /*! The page's cache must be locked. 2190 */ 2191 void 2192 PageWriterRun::AddPage(vm_page* page) 2193 { 2194 fWrappers[fWrapperCount++].SetTo(page); 2195 2196 if (fTransferCount == 0 || !fTransfers[fTransferCount - 1].AddPage(page)) { 2197 fTransfers[fTransferCount++].SetTo(this, page, 2198 page->Cache()->MaxPagesPerAsyncWrite()); 2199 } 2200 } 2201 2202 2203 /*! Writes all pages previously added. 2204 \return The number of pages that could not be written or otherwise handled. 2205 */ 2206 uint32 2207 PageWriterRun::Go() 2208 { 2209 atomic_set(&fPendingTransfers, fTransferCount); 2210 2211 fAllFinishedCondition.Init(this, "page writer wait for I/O"); 2212 ConditionVariableEntry waitEntry; 2213 fAllFinishedCondition.Add(&waitEntry); 2214 2215 // schedule writes 2216 for (uint32 i = 0; i < fTransferCount; i++) 2217 fTransfers[i].Schedule(B_VIP_IO_REQUEST); 2218 2219 // wait until all pages have been written 2220 waitEntry.Wait(); 2221 2222 // mark pages depending on whether they could be written or not 2223 2224 uint32 failedPages = 0; 2225 uint32 wrapperIndex = 0; 2226 for (uint32 i = 0; i < fTransferCount; i++) { 2227 PageWriteTransfer& transfer = fTransfers[i]; 2228 transfer.Cache()->Lock(); 2229 2230 for (uint32 j = 0; j < transfer.PageCount(); j++) { 2231 if (!fWrappers[wrapperIndex++].Done(transfer.Status())) 2232 failedPages++; 2233 } 2234 2235 transfer.Cache()->Unlock(); 2236 } 2237 2238 ASSERT(wrapperIndex == fWrapperCount); 2239 2240 for (uint32 i = 0; i < fTransferCount; i++) { 2241 PageWriteTransfer& transfer = fTransfers[i]; 2242 struct VMCache* cache = transfer.Cache(); 2243 2244 // We've acquired a references for each page 2245 for (uint32 j = 0; j < transfer.PageCount(); j++) { 2246 // We release the cache references after all pages were made 2247 // unbusy again - otherwise releasing a vnode could deadlock. 2248 cache->ReleaseStoreRef(); 2249 cache->ReleaseRef(); 2250 } 2251 } 2252 2253 return failedPages; 2254 } 2255 2256 2257 void 2258 PageWriterRun::PageWritten(PageWriteTransfer* transfer, status_t status, 2259 bool partialTransfer, size_t bytesTransferred) 2260 { 2261 if (atomic_add(&fPendingTransfers, -1) == 1) 2262 fAllFinishedCondition.NotifyAll(); 2263 } 2264 2265 2266 /*! The page writer continuously takes some pages from the modified 2267 queue, writes them back, and moves them back to the active queue. 2268 It runs in its own thread, and is only there to keep the number 2269 of modified pages low, so that more pages can be reused with 2270 fewer costs. 2271 */ 2272 status_t 2273 page_writer(void* /*unused*/) 2274 { 2275 const uint32 kNumPages = 256; 2276 #ifdef TRACE_VM_PAGE 2277 uint32 writtenPages = 0; 2278 bigtime_t lastWrittenTime = 0; 2279 bigtime_t pageCollectionTime = 0; 2280 bigtime_t pageWritingTime = 0; 2281 #endif 2282 2283 PageWriterRun run; 2284 if (run.Init(kNumPages) != B_OK) { 2285 panic("page writer: Failed to init PageWriterRun!"); 2286 return B_ERROR; 2287 } 2288 2289 page_num_t pagesSinceLastSuccessfulWrite = 0; 2290 2291 while (true) { 2292 // TODO: Maybe wait shorter when memory is low! 2293 if (sModifiedPageQueue.Count() < kNumPages) { 2294 sPageWriterCondition.Wait(3000000, true); 2295 // all 3 seconds when no one triggers us 2296 } 2297 2298 page_num_t modifiedPages = sModifiedPageQueue.Count(); 2299 if (modifiedPages == 0) 2300 continue; 2301 2302 if (modifiedPages <= pagesSinceLastSuccessfulWrite) { 2303 // We ran through the whole queue without being able to write a 2304 // single page. Take a break. 2305 snooze(500000); 2306 pagesSinceLastSuccessfulWrite = 0; 2307 } 2308 2309 #if ENABLE_SWAP_SUPPORT 2310 page_stats pageStats; 2311 get_page_stats(pageStats); 2312 bool activePaging = do_active_paging(pageStats); 2313 #endif 2314 2315 // depending on how urgent it becomes to get pages to disk, we adjust 2316 // our I/O priority 2317 uint32 lowPagesState = low_resource_state(B_KERNEL_RESOURCE_PAGES); 2318 int32 ioPriority = B_IDLE_PRIORITY; 2319 if (lowPagesState >= B_LOW_RESOURCE_CRITICAL 2320 || modifiedPages > MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD) { 2321 ioPriority = MAX_PAGE_WRITER_IO_PRIORITY; 2322 } else { 2323 ioPriority = (uint64)MAX_PAGE_WRITER_IO_PRIORITY * modifiedPages 2324 / MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD; 2325 } 2326 2327 thread_set_io_priority(ioPriority); 2328 2329 uint32 numPages = 0; 2330 run.PrepareNextRun(); 2331 2332 // TODO: make this laptop friendly, too (ie. only start doing 2333 // something if someone else did something or there is really 2334 // enough to do). 2335 2336 // collect pages to be written 2337 #ifdef TRACE_VM_PAGE 2338 pageCollectionTime -= system_time(); 2339 #endif 2340 2341 page_num_t maxPagesToSee = modifiedPages; 2342 2343 while (numPages < kNumPages && maxPagesToSee > 0) { 2344 vm_page *page = next_modified_page(maxPagesToSee); 2345 if (page == NULL) 2346 break; 2347 2348 PageCacheLocker cacheLocker(page, false); 2349 if (!cacheLocker.IsLocked()) 2350 continue; 2351 2352 VMCache *cache = page->Cache(); 2353 2354 // If the page is busy or its state has changed while we were 2355 // locking the cache, just ignore it. 2356 if (page->busy || page->State() != PAGE_STATE_MODIFIED) 2357 continue; 2358 2359 DEBUG_PAGE_ACCESS_START(page); 2360 2361 // Don't write back wired (locked) pages. 2362 if (page->WiredCount() > 0) { 2363 set_page_state(page, PAGE_STATE_ACTIVE); 2364 DEBUG_PAGE_ACCESS_END(page); 2365 continue; 2366 } 2367 2368 // Write back temporary pages only when we're actively paging. 2369 if (cache->temporary 2370 #if ENABLE_SWAP_SUPPORT 2371 && (!activePaging 2372 || !cache->CanWritePage( 2373 (off_t)page->cache_offset << PAGE_SHIFT)) 2374 #endif 2375 ) { 2376 // We can't/don't want to do anything with this page, so move it 2377 // to one of the other queues. 2378 if (page->mappings.IsEmpty()) 2379 set_page_state(page, PAGE_STATE_INACTIVE); 2380 else 2381 set_page_state(page, PAGE_STATE_ACTIVE); 2382 2383 DEBUG_PAGE_ACCESS_END(page); 2384 continue; 2385 } 2386 2387 // We need our own reference to the store, as it might currently be 2388 // destroyed. 2389 if (cache->AcquireUnreferencedStoreRef() != B_OK) { 2390 DEBUG_PAGE_ACCESS_END(page); 2391 cacheLocker.Unlock(); 2392 thread_yield(); 2393 continue; 2394 } 2395 2396 run.AddPage(page); 2397 // TODO: We're possibly adding pages of different caches and 2398 // thus maybe of different underlying file systems here. This 2399 // is a potential problem for loop file systems/devices, since 2400 // we could mark a page busy that would need to be accessed 2401 // when writing back another page, thus causing a deadlock. 2402 2403 DEBUG_PAGE_ACCESS_END(page); 2404 2405 //dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count); 2406 TPW(WritePage(page)); 2407 2408 cache->AcquireRefLocked(); 2409 numPages++; 2410 } 2411 2412 #ifdef TRACE_VM_PAGE 2413 pageCollectionTime += system_time(); 2414 #endif 2415 if (numPages == 0) 2416 continue; 2417 2418 // write pages to disk and do all the cleanup 2419 #ifdef TRACE_VM_PAGE 2420 pageWritingTime -= system_time(); 2421 #endif 2422 uint32 failedPages = run.Go(); 2423 #ifdef TRACE_VM_PAGE 2424 pageWritingTime += system_time(); 2425 2426 // debug output only... 2427 writtenPages += numPages; 2428 if (writtenPages >= 1024) { 2429 bigtime_t now = system_time(); 2430 TRACE(("page writer: wrote 1024 pages (total: %" B_PRIu64 " ms, " 2431 "collect: %" B_PRIu64 " ms, write: %" B_PRIu64 " ms)\n", 2432 (now - lastWrittenTime) / 1000, 2433 pageCollectionTime / 1000, pageWritingTime / 1000)); 2434 lastWrittenTime = now; 2435 2436 writtenPages -= 1024; 2437 pageCollectionTime = 0; 2438 pageWritingTime = 0; 2439 } 2440 #endif 2441 2442 if (failedPages == numPages) 2443 pagesSinceLastSuccessfulWrite += modifiedPages - maxPagesToSee; 2444 else 2445 pagesSinceLastSuccessfulWrite = 0; 2446 } 2447 2448 return B_OK; 2449 } 2450 2451 2452 // #pragma mark - 2453 2454 2455 // TODO: This should be done in the page daemon! 2456 #if 0 2457 #if ENABLE_SWAP_SUPPORT 2458 static bool 2459 free_page_swap_space(int32 index) 2460 { 2461 vm_page *page = vm_page_at_index(index); 2462 PageCacheLocker locker(page); 2463 if (!locker.IsLocked()) 2464 return false; 2465 2466 DEBUG_PAGE_ACCESS_START(page); 2467 2468 VMCache* cache = page->Cache(); 2469 if (cache->temporary && page->WiredCount() == 0 2470 && cache->HasPage(page->cache_offset << PAGE_SHIFT) 2471 && page->usage_count > 0) { 2472 // TODO: how to judge a page is highly active? 2473 if (swap_free_page_swap_space(page)) { 2474 // We need to mark the page modified, since otherwise it could be 2475 // stolen and we'd lose its data. 2476 vm_page_set_state(page, PAGE_STATE_MODIFIED); 2477 TD(FreedPageSwap(page)); 2478 DEBUG_PAGE_ACCESS_END(page); 2479 return true; 2480 } 2481 } 2482 DEBUG_PAGE_ACCESS_END(page); 2483 return false; 2484 } 2485 #endif 2486 #endif // 0 2487 2488 2489 static vm_page * 2490 find_cached_page_candidate(struct vm_page &marker) 2491 { 2492 DEBUG_PAGE_ACCESS_CHECK(&marker); 2493 2494 InterruptsSpinLocker locker(sCachedPageQueue.GetLock()); 2495 vm_page *page; 2496 2497 if (marker.State() == PAGE_STATE_UNUSED) { 2498 // Get the first free pages of the (in)active queue 2499 page = sCachedPageQueue.Head(); 2500 } else { 2501 // Get the next page of the current queue 2502 if (marker.State() != PAGE_STATE_CACHED) { 2503 panic("invalid marker %p state", &marker); 2504 return NULL; 2505 } 2506 2507 page = sCachedPageQueue.Next(&marker); 2508 sCachedPageQueue.Remove(&marker); 2509 marker.SetState(PAGE_STATE_UNUSED); 2510 } 2511 2512 while (page != NULL) { 2513 if (!page->busy) { 2514 // we found a candidate, insert marker 2515 marker.SetState(PAGE_STATE_CACHED); 2516 sCachedPageQueue.InsertAfter(page, &marker); 2517 return page; 2518 } 2519 2520 page = sCachedPageQueue.Next(page); 2521 } 2522 2523 return NULL; 2524 } 2525 2526 2527 static bool 2528 free_cached_page(vm_page *page, bool dontWait) 2529 { 2530 // try to lock the page's cache 2531 if (vm_cache_acquire_locked_page_cache(page, dontWait) == NULL) 2532 return false; 2533 VMCache* cache = page->Cache(); 2534 2535 AutoLocker<VMCache> cacheLocker(cache, true); 2536 MethodDeleter<VMCache> _2(cache, &VMCache::ReleaseRefLocked); 2537 2538 // check again if that page is still a candidate 2539 if (page->busy || page->State() != PAGE_STATE_CACHED) 2540 return false; 2541 2542 DEBUG_PAGE_ACCESS_START(page); 2543 2544 PAGE_ASSERT(page, !page->IsMapped()); 2545 PAGE_ASSERT(page, !page->modified); 2546 2547 // we can now steal this page 2548 2549 cache->RemovePage(page); 2550 // Now the page doesn't have cache anymore, so no one else (e.g. 2551 // vm_page_allocate_page_run() can pick it up), since they would be 2552 // required to lock the cache first, which would fail. 2553 2554 sCachedPageQueue.RemoveUnlocked(page); 2555 return true; 2556 } 2557 2558 2559 static uint32 2560 free_cached_pages(uint32 pagesToFree, bool dontWait) 2561 { 2562 vm_page marker; 2563 init_page_marker(marker); 2564 2565 uint32 pagesFreed = 0; 2566 2567 while (pagesFreed < pagesToFree) { 2568 vm_page *page = find_cached_page_candidate(marker); 2569 if (page == NULL) 2570 break; 2571 2572 if (free_cached_page(page, dontWait)) { 2573 ReadLocker locker(sFreePageQueuesLock); 2574 page->SetState(PAGE_STATE_FREE); 2575 DEBUG_PAGE_ACCESS_END(page); 2576 sFreePageQueue.PrependUnlocked(page); 2577 locker.Unlock(); 2578 2579 TA(StolenPage()); 2580 2581 pagesFreed++; 2582 } 2583 } 2584 2585 remove_page_marker(marker); 2586 2587 return pagesFreed; 2588 } 2589 2590 2591 static void 2592 idle_scan_active_pages(page_stats& pageStats) 2593 { 2594 VMPageQueue& queue = sActivePageQueue; 2595 2596 // We want to scan the whole queue in roughly kIdleRunsForFullQueue runs. 2597 uint32 maxToScan = queue.Count() / kIdleRunsForFullQueue + 1; 2598 2599 while (maxToScan > 0) { 2600 maxToScan--; 2601 2602 // Get the next page. Note that we don't bother to lock here. We go with 2603 // the assumption that on all architectures reading/writing pointers is 2604 // atomic. Beyond that it doesn't really matter. We have to unlock the 2605 // queue anyway to lock the page's cache, and we'll recheck afterwards. 2606 vm_page* page = queue.Head(); 2607 if (page == NULL) 2608 break; 2609 2610 // lock the page's cache 2611 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2612 if (cache == NULL) 2613 continue; 2614 2615 if (page->State() != PAGE_STATE_ACTIVE) { 2616 // page is no longer in the cache or in this queue 2617 cache->ReleaseRefAndUnlock(); 2618 continue; 2619 } 2620 2621 if (page->busy) { 2622 // page is busy -- requeue at the end 2623 vm_page_requeue(page, true); 2624 cache->ReleaseRefAndUnlock(); 2625 continue; 2626 } 2627 2628 DEBUG_PAGE_ACCESS_START(page); 2629 2630 // Get the page active/modified flags and update the page's usage count. 2631 // We completely unmap inactive temporary pages. This saves us to 2632 // iterate through the inactive list as well, since we'll be notified 2633 // via page fault whenever such an inactive page is used again. 2634 // We don't remove the mappings of non-temporary pages, since we 2635 // wouldn't notice when those would become unused and could thus be 2636 // moved to the cached list. 2637 int32 usageCount; 2638 if (page->WiredCount() > 0 || page->usage_count > 0 2639 || !cache->temporary) { 2640 usageCount = vm_clear_page_mapping_accessed_flags(page); 2641 } else 2642 usageCount = vm_remove_all_page_mappings_if_unaccessed(page); 2643 2644 if (usageCount > 0) { 2645 usageCount += page->usage_count + kPageUsageAdvance; 2646 if (usageCount > kPageUsageMax) 2647 usageCount = kPageUsageMax; 2648 // TODO: This would probably also be the place to reclaim swap space. 2649 } else { 2650 usageCount += page->usage_count - (int32)kPageUsageDecline; 2651 if (usageCount < 0) { 2652 usageCount = 0; 2653 set_page_state(page, PAGE_STATE_INACTIVE); 2654 } 2655 } 2656 2657 page->usage_count = usageCount; 2658 2659 DEBUG_PAGE_ACCESS_END(page); 2660 2661 cache->ReleaseRefAndUnlock(); 2662 } 2663 } 2664 2665 2666 static void 2667 full_scan_inactive_pages(page_stats& pageStats, int32 despairLevel) 2668 { 2669 int32 pagesToFree = pageStats.unsatisfiedReservations 2670 + sFreeOrCachedPagesTarget 2671 - (pageStats.totalFreePages + pageStats.cachedPages); 2672 if (pagesToFree <= 0) 2673 return; 2674 2675 bigtime_t time = system_time(); 2676 uint32 pagesScanned = 0; 2677 uint32 pagesToCached = 0; 2678 uint32 pagesToModified = 0; 2679 uint32 pagesToActive = 0; 2680 2681 // Determine how many pages at maximum to send to the modified queue. Since 2682 // it is relatively expensive to page out pages, we do that on a grander 2683 // scale only when things get desperate. 2684 uint32 maxToFlush = despairLevel <= 1 ? 32 : 10000; 2685 2686 vm_page marker; 2687 init_page_marker(marker); 2688 2689 VMPageQueue& queue = sInactivePageQueue; 2690 InterruptsSpinLocker queueLocker(queue.GetLock()); 2691 uint32 maxToScan = queue.Count(); 2692 2693 vm_page* nextPage = queue.Head(); 2694 2695 while (pagesToFree > 0 && maxToScan > 0) { 2696 maxToScan--; 2697 2698 // get the next page 2699 vm_page* page = nextPage; 2700 if (page == NULL) 2701 break; 2702 nextPage = queue.Next(page); 2703 2704 if (page->busy) 2705 continue; 2706 2707 // mark the position 2708 queue.InsertAfter(page, &marker); 2709 queueLocker.Unlock(); 2710 2711 // lock the page's cache 2712 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2713 if (cache == NULL || page->busy 2714 || page->State() != PAGE_STATE_INACTIVE) { 2715 if (cache != NULL) 2716 cache->ReleaseRefAndUnlock(); 2717 queueLocker.Lock(); 2718 nextPage = queue.Next(&marker); 2719 queue.Remove(&marker); 2720 continue; 2721 } 2722 2723 pagesScanned++; 2724 2725 DEBUG_PAGE_ACCESS_START(page); 2726 2727 // Get the accessed count, clear the accessed/modified flags and 2728 // unmap the page, if it hasn't been accessed. 2729 int32 usageCount; 2730 if (page->WiredCount() > 0) 2731 usageCount = vm_clear_page_mapping_accessed_flags(page); 2732 else 2733 usageCount = vm_remove_all_page_mappings_if_unaccessed(page); 2734 2735 // update usage count 2736 if (usageCount > 0) { 2737 usageCount += page->usage_count + kPageUsageAdvance; 2738 if (usageCount > kPageUsageMax) 2739 usageCount = kPageUsageMax; 2740 } else { 2741 usageCount += page->usage_count - (int32)kPageUsageDecline; 2742 if (usageCount < 0) 2743 usageCount = 0; 2744 } 2745 2746 page->usage_count = usageCount; 2747 2748 // Move to fitting queue or requeue: 2749 // * Active mapped pages go to the active queue. 2750 // * Inactive mapped (i.e. wired) pages are requeued. 2751 // * The remaining pages are cachable. Thus, if unmodified they go to 2752 // the cached queue, otherwise to the modified queue (up to a limit). 2753 // Note that until in the idle scanning we don't exempt pages of 2754 // temporary caches. Apparently we really need memory, so we better 2755 // page out memory as well. 2756 bool isMapped = page->IsMapped(); 2757 if (usageCount > 0) { 2758 if (isMapped) { 2759 set_page_state(page, PAGE_STATE_ACTIVE); 2760 pagesToActive++; 2761 } else 2762 vm_page_requeue(page, true); 2763 } else if (isMapped) { 2764 vm_page_requeue(page, true); 2765 } else if (!page->modified) { 2766 set_page_state(page, PAGE_STATE_CACHED); 2767 pagesToFree--; 2768 pagesToCached++; 2769 } else if (maxToFlush > 0) { 2770 set_page_state(page, PAGE_STATE_MODIFIED); 2771 maxToFlush--; 2772 pagesToModified++; 2773 } else 2774 vm_page_requeue(page, true); 2775 2776 DEBUG_PAGE_ACCESS_END(page); 2777 2778 cache->ReleaseRefAndUnlock(); 2779 2780 // remove the marker 2781 queueLocker.Lock(); 2782 nextPage = queue.Next(&marker); 2783 queue.Remove(&marker); 2784 } 2785 2786 queueLocker.Unlock(); 2787 2788 time = system_time() - time; 2789 TRACE_DAEMON(" -> inactive scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32 2790 ", moved: %" B_PRIu32 " -> cached, %" B_PRIu32 " -> modified, %" 2791 B_PRIu32 " -> active\n", time, pagesScanned, pagesToCached, 2792 pagesToModified, pagesToActive); 2793 2794 // wake up the page writer, if we tossed it some pages 2795 if (pagesToModified > 0) 2796 sPageWriterCondition.WakeUp(); 2797 } 2798 2799 2800 static void 2801 full_scan_active_pages(page_stats& pageStats, int32 despairLevel) 2802 { 2803 vm_page marker; 2804 init_page_marker(marker); 2805 2806 VMPageQueue& queue = sActivePageQueue; 2807 InterruptsSpinLocker queueLocker(queue.GetLock()); 2808 uint32 maxToScan = queue.Count(); 2809 2810 int32 pagesToDeactivate = pageStats.unsatisfiedReservations 2811 + sFreeOrCachedPagesTarget 2812 - (pageStats.totalFreePages + pageStats.cachedPages) 2813 + std::max((int32)sInactivePagesTarget - (int32)maxToScan, (int32)0); 2814 if (pagesToDeactivate <= 0) 2815 return; 2816 2817 bigtime_t time = system_time(); 2818 uint32 pagesAccessed = 0; 2819 uint32 pagesToInactive = 0; 2820 uint32 pagesScanned = 0; 2821 2822 vm_page* nextPage = queue.Head(); 2823 2824 while (pagesToDeactivate > 0 && maxToScan > 0) { 2825 maxToScan--; 2826 2827 // get the next page 2828 vm_page* page = nextPage; 2829 if (page == NULL) 2830 break; 2831 nextPage = queue.Next(page); 2832 2833 if (page->busy) 2834 continue; 2835 2836 // mark the position 2837 queue.InsertAfter(page, &marker); 2838 queueLocker.Unlock(); 2839 2840 // lock the page's cache 2841 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2842 if (cache == NULL || page->busy || page->State() != PAGE_STATE_ACTIVE) { 2843 if (cache != NULL) 2844 cache->ReleaseRefAndUnlock(); 2845 queueLocker.Lock(); 2846 nextPage = queue.Next(&marker); 2847 queue.Remove(&marker); 2848 continue; 2849 } 2850 2851 pagesScanned++; 2852 2853 DEBUG_PAGE_ACCESS_START(page); 2854 2855 // Get the page active/modified flags and update the page's usage count. 2856 int32 usageCount = vm_clear_page_mapping_accessed_flags(page); 2857 2858 if (usageCount > 0) { 2859 usageCount += page->usage_count + kPageUsageAdvance; 2860 if (usageCount > kPageUsageMax) 2861 usageCount = kPageUsageMax; 2862 pagesAccessed++; 2863 // TODO: This would probably also be the place to reclaim swap space. 2864 } else { 2865 usageCount += page->usage_count - (int32)kPageUsageDecline; 2866 if (usageCount <= 0) { 2867 usageCount = 0; 2868 set_page_state(page, PAGE_STATE_INACTIVE); 2869 pagesToInactive++; 2870 } 2871 } 2872 2873 page->usage_count = usageCount; 2874 2875 DEBUG_PAGE_ACCESS_END(page); 2876 2877 cache->ReleaseRefAndUnlock(); 2878 2879 // remove the marker 2880 queueLocker.Lock(); 2881 nextPage = queue.Next(&marker); 2882 queue.Remove(&marker); 2883 } 2884 2885 time = system_time() - time; 2886 TRACE_DAEMON(" -> active scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32 2887 ", moved: %" B_PRIu32 " -> inactive, encountered %" B_PRIu32 " accessed" 2888 " ones\n", time, pagesScanned, pagesToInactive, pagesAccessed); 2889 } 2890 2891 2892 static void 2893 page_daemon_idle_scan(page_stats& pageStats) 2894 { 2895 TRACE_DAEMON("page daemon: idle run\n"); 2896 2897 if (pageStats.totalFreePages < (int32)sFreePagesTarget) { 2898 // We want more actually free pages, so free some from the cached 2899 // ones. 2900 uint32 freed = free_cached_pages( 2901 sFreePagesTarget - pageStats.totalFreePages, false); 2902 if (freed > 0) 2903 unreserve_pages(freed); 2904 get_page_stats(pageStats); 2905 } 2906 2907 // Walk the active list and move pages to the inactive queue. 2908 get_page_stats(pageStats); 2909 idle_scan_active_pages(pageStats); 2910 } 2911 2912 2913 static void 2914 page_daemon_full_scan(page_stats& pageStats, int32 despairLevel) 2915 { 2916 TRACE_DAEMON("page daemon: full run: free: %" B_PRIu32 ", cached: %" 2917 B_PRIu32 ", to free: %" B_PRIu32 "\n", pageStats.totalFreePages, 2918 pageStats.cachedPages, pageStats.unsatisfiedReservations 2919 + sFreeOrCachedPagesTarget 2920 - (pageStats.totalFreePages + pageStats.cachedPages)); 2921 2922 // Walk the inactive list and transfer pages to the cached and modified 2923 // queues. 2924 full_scan_inactive_pages(pageStats, despairLevel); 2925 2926 // Free cached pages. Also wake up reservation waiters. 2927 get_page_stats(pageStats); 2928 int32 pagesToFree = pageStats.unsatisfiedReservations + sFreePagesTarget 2929 - (pageStats.totalFreePages); 2930 if (pagesToFree > 0) { 2931 uint32 freed = free_cached_pages(pagesToFree, true); 2932 if (freed > 0) 2933 unreserve_pages(freed); 2934 } 2935 2936 // Walk the active list and move pages to the inactive queue. 2937 get_page_stats(pageStats); 2938 full_scan_active_pages(pageStats, despairLevel); 2939 } 2940 2941 2942 static status_t 2943 page_daemon(void* /*unused*/) 2944 { 2945 int32 despairLevel = 0; 2946 2947 while (true) { 2948 sPageDaemonCondition.ClearActivated(); 2949 2950 // evaluate the free pages situation 2951 page_stats pageStats; 2952 get_page_stats(pageStats); 2953 2954 if (!do_active_paging(pageStats)) { 2955 // Things look good -- just maintain statistics and keep the pool 2956 // of actually free pages full enough. 2957 despairLevel = 0; 2958 page_daemon_idle_scan(pageStats); 2959 sPageDaemonCondition.Wait(kIdleScanWaitInterval, false); 2960 } else { 2961 // Not enough free pages. We need to do some real work. 2962 despairLevel = std::max(despairLevel + 1, (int32)3); 2963 page_daemon_full_scan(pageStats, despairLevel); 2964 2965 // Don't wait after the first full scan, but rather immediately 2966 // check whether we were successful in freeing enough pages and 2967 // re-run with increased despair level. The first scan is 2968 // conservative with respect to moving inactive modified pages to 2969 // the modified list to avoid thrashing. The second scan, however, 2970 // will not hold back. 2971 if (despairLevel > 1) 2972 snooze(kBusyScanWaitInterval); 2973 } 2974 } 2975 2976 return B_OK; 2977 } 2978 2979 2980 /*! Returns how many pages could *not* be reserved. 2981 */ 2982 static uint32 2983 reserve_pages(uint32 count, int priority, bool dontWait) 2984 { 2985 int32 dontTouch = kPageReserveForPriority[priority]; 2986 2987 while (true) { 2988 count -= reserve_some_pages(count, dontTouch); 2989 if (count == 0) 2990 return 0; 2991 2992 if (sUnsatisfiedPageReservations == 0) { 2993 count -= free_cached_pages(count, dontWait); 2994 if (count == 0) 2995 return count; 2996 } 2997 2998 if (dontWait) 2999 return count; 3000 3001 // we need to wait for pages to become available 3002 3003 MutexLocker pageDeficitLocker(sPageDeficitLock); 3004 3005 bool notifyDaemon = sUnsatisfiedPageReservations == 0; 3006 sUnsatisfiedPageReservations += count; 3007 3008 if (atomic_get(&sUnreservedFreePages) > dontTouch) { 3009 // the situation changed 3010 sUnsatisfiedPageReservations -= count; 3011 continue; 3012 } 3013 3014 PageReservationWaiter waiter; 3015 waiter.dontTouch = dontTouch; 3016 waiter.missing = count; 3017 waiter.thread = thread_get_current_thread(); 3018 waiter.threadPriority = waiter.thread->priority; 3019 3020 // insert ordered (i.e. after all waiters with higher or equal priority) 3021 PageReservationWaiter* otherWaiter = NULL; 3022 for (PageReservationWaiterList::Iterator it 3023 = sPageReservationWaiters.GetIterator(); 3024 (otherWaiter = it.Next()) != NULL;) { 3025 if (waiter < *otherWaiter) 3026 break; 3027 } 3028 3029 sPageReservationWaiters.InsertBefore(otherWaiter, &waiter); 3030 3031 thread_prepare_to_block(waiter.thread, 0, THREAD_BLOCK_TYPE_OTHER, 3032 "waiting for pages"); 3033 3034 if (notifyDaemon) 3035 sPageDaemonCondition.WakeUp(); 3036 3037 pageDeficitLocker.Unlock(); 3038 3039 low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0); 3040 thread_block(); 3041 3042 pageDeficitLocker.Lock(); 3043 3044 return 0; 3045 } 3046 } 3047 3048 3049 // #pragma mark - private kernel API 3050 3051 3052 /*! Writes a range of modified pages of a cache to disk. 3053 You need to hold the VMCache lock when calling this function. 3054 Note that the cache lock is released in this function. 3055 \param cache The cache. 3056 \param firstPage Offset (in page size units) of the first page in the range. 3057 \param endPage End offset (in page size units) of the page range. The page 3058 at this offset is not included. 3059 */ 3060 status_t 3061 vm_page_write_modified_page_range(struct VMCache* cache, uint32 firstPage, 3062 uint32 endPage) 3063 { 3064 static const int32 kMaxPages = 256; 3065 int32 maxPages = cache->MaxPagesPerWrite(); 3066 if (maxPages < 0 || maxPages > kMaxPages) 3067 maxPages = kMaxPages; 3068 3069 const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 3070 | HEAP_DONT_LOCK_KERNEL_SPACE; 3071 3072 PageWriteWrapper stackWrappersPool[2]; 3073 PageWriteWrapper* stackWrappers[1]; 3074 PageWriteWrapper* wrapperPool 3075 = new(malloc_flags(allocationFlags)) PageWriteWrapper[maxPages + 1]; 3076 PageWriteWrapper** wrappers 3077 = new(malloc_flags(allocationFlags)) PageWriteWrapper*[maxPages]; 3078 if (wrapperPool == NULL || wrappers == NULL) { 3079 // don't fail, just limit our capabilities 3080 free(wrapperPool); 3081 free(wrappers); 3082 wrapperPool = stackWrappersPool; 3083 wrappers = stackWrappers; 3084 maxPages = 1; 3085 } 3086 3087 int32 nextWrapper = 0; 3088 int32 usedWrappers = 0; 3089 3090 PageWriteTransfer transfer; 3091 bool transferEmpty = true; 3092 3093 VMCachePagesTree::Iterator it 3094 = cache->pages.GetIterator(firstPage, true, true); 3095 3096 while (true) { 3097 vm_page* page = it.Next(); 3098 if (page == NULL || page->cache_offset >= endPage) { 3099 if (transferEmpty) 3100 break; 3101 3102 page = NULL; 3103 } 3104 3105 if (page != NULL) { 3106 if (page->busy 3107 || (page->State() != PAGE_STATE_MODIFIED 3108 && !vm_test_map_modification(page))) { 3109 page = NULL; 3110 } 3111 } 3112 3113 PageWriteWrapper* wrapper = NULL; 3114 if (page != NULL) { 3115 wrapper = &wrapperPool[nextWrapper++]; 3116 if (nextWrapper > maxPages) 3117 nextWrapper = 0; 3118 3119 DEBUG_PAGE_ACCESS_START(page); 3120 3121 wrapper->SetTo(page); 3122 3123 if (transferEmpty || transfer.AddPage(page)) { 3124 if (transferEmpty) { 3125 transfer.SetTo(NULL, page, maxPages); 3126 transferEmpty = false; 3127 } 3128 3129 DEBUG_PAGE_ACCESS_END(page); 3130 3131 wrappers[usedWrappers++] = wrapper; 3132 continue; 3133 } 3134 3135 DEBUG_PAGE_ACCESS_END(page); 3136 } 3137 3138 if (transferEmpty) 3139 continue; 3140 3141 cache->Unlock(); 3142 status_t status = transfer.Schedule(0); 3143 cache->Lock(); 3144 3145 for (int32 i = 0; i < usedWrappers; i++) 3146 wrappers[i]->Done(status); 3147 3148 usedWrappers = 0; 3149 3150 if (page != NULL) { 3151 transfer.SetTo(NULL, page, maxPages); 3152 wrappers[usedWrappers++] = wrapper; 3153 } else 3154 transferEmpty = true; 3155 } 3156 3157 if (wrapperPool != stackWrappersPool) { 3158 delete[] wrapperPool; 3159 delete[] wrappers; 3160 } 3161 3162 return B_OK; 3163 } 3164 3165 3166 /*! You need to hold the VMCache lock when calling this function. 3167 Note that the cache lock is released in this function. 3168 */ 3169 status_t 3170 vm_page_write_modified_pages(VMCache *cache) 3171 { 3172 return vm_page_write_modified_page_range(cache, 0, 3173 (cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 3174 } 3175 3176 3177 /*! Schedules the page writer to write back the specified \a page. 3178 Note, however, that it might not do this immediately, and it can well 3179 take several seconds until the page is actually written out. 3180 */ 3181 void 3182 vm_page_schedule_write_page(vm_page *page) 3183 { 3184 PAGE_ASSERT(page, page->State() == PAGE_STATE_MODIFIED); 3185 3186 vm_page_requeue(page, false); 3187 3188 sPageWriterCondition.WakeUp(); 3189 } 3190 3191 3192 /*! Cache must be locked. 3193 */ 3194 void 3195 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage, 3196 uint32 endPage) 3197 { 3198 uint32 modified = 0; 3199 for (VMCachePagesTree::Iterator it 3200 = cache->pages.GetIterator(firstPage, true, true); 3201 vm_page *page = it.Next();) { 3202 if (page->cache_offset >= endPage) 3203 break; 3204 3205 if (!page->busy && page->State() == PAGE_STATE_MODIFIED) { 3206 DEBUG_PAGE_ACCESS_START(page); 3207 vm_page_requeue(page, false); 3208 modified++; 3209 DEBUG_PAGE_ACCESS_END(page); 3210 } 3211 } 3212 3213 if (modified > 0) 3214 sPageWriterCondition.WakeUp(); 3215 } 3216 3217 3218 void 3219 vm_page_init_num_pages(kernel_args *args) 3220 { 3221 // calculate the size of memory by looking at the physical_memory_range array 3222 sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE; 3223 page_num_t physicalPagesEnd = sPhysicalPageOffset 3224 + args->physical_memory_range[0].size / B_PAGE_SIZE; 3225 3226 sNonExistingPages = 0; 3227 sIgnoredPages = args->ignored_physical_memory / B_PAGE_SIZE; 3228 3229 for (uint32 i = 1; i < args->num_physical_memory_ranges; i++) { 3230 page_num_t start = args->physical_memory_range[i].start / B_PAGE_SIZE; 3231 if (start > physicalPagesEnd) 3232 sNonExistingPages += start - physicalPagesEnd; 3233 physicalPagesEnd = start 3234 + args->physical_memory_range[i].size / B_PAGE_SIZE; 3235 3236 #ifdef LIMIT_AVAILABLE_MEMORY 3237 page_num_t available 3238 = physicalPagesEnd - sPhysicalPageOffset - sNonExistingPages; 3239 if (available > LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE)) { 3240 physicalPagesEnd = sPhysicalPageOffset + sNonExistingPages 3241 + LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE); 3242 break; 3243 } 3244 #endif 3245 } 3246 3247 TRACE(("first phys page = %#" B_PRIxPHYSADDR ", end %#" B_PRIxPHYSADDR "\n", 3248 sPhysicalPageOffset, physicalPagesEnd)); 3249 3250 sNumPages = physicalPagesEnd - sPhysicalPageOffset; 3251 } 3252 3253 3254 status_t 3255 vm_page_init(kernel_args *args) 3256 { 3257 TRACE(("vm_page_init: entry\n")); 3258 3259 // init page queues 3260 sModifiedPageQueue.Init("modified pages queue"); 3261 sInactivePageQueue.Init("inactive pages queue"); 3262 sActivePageQueue.Init("active pages queue"); 3263 sCachedPageQueue.Init("cached pages queue"); 3264 sFreePageQueue.Init("free pages queue"); 3265 sClearPageQueue.Init("clear pages queue"); 3266 3267 new (&sPageReservationWaiters) PageReservationWaiterList; 3268 3269 // map in the new free page table 3270 sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page), 3271 ~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 3272 3273 TRACE(("vm_init: putting free_page_table @ %p, # ents %" B_PRIuPHYSADDR 3274 " (size %#" B_PRIxPHYSADDR ")\n", sPages, sNumPages, 3275 (phys_addr_t)(sNumPages * sizeof(vm_page)))); 3276 3277 // initialize the free page table 3278 for (uint32 i = 0; i < sNumPages; i++) { 3279 sPages[i].Init(sPhysicalPageOffset + i); 3280 sFreePageQueue.Append(&sPages[i]); 3281 3282 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3283 sPages[i].allocation_tracking_info.Clear(); 3284 #endif 3285 } 3286 3287 sUnreservedFreePages = sNumPages; 3288 3289 TRACE(("initialized table\n")); 3290 3291 // mark the ranges between usable physical memory unused 3292 phys_addr_t previousEnd = 0; 3293 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 3294 phys_addr_t base = args->physical_memory_range[i].start; 3295 phys_size_t size = args->physical_memory_range[i].size; 3296 if (base > previousEnd) { 3297 mark_page_range_in_use(previousEnd / B_PAGE_SIZE, 3298 (base - previousEnd) / B_PAGE_SIZE, false); 3299 } 3300 previousEnd = base + size; 3301 } 3302 3303 // mark the allocated physical page ranges wired 3304 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 3305 mark_page_range_in_use( 3306 args->physical_allocated_range[i].start / B_PAGE_SIZE, 3307 args->physical_allocated_range[i].size / B_PAGE_SIZE, true); 3308 } 3309 3310 // The target of actually free pages. This must be at least the system 3311 // reserve, but should be a few more pages, so we don't have to extract 3312 // a cached page with each allocation. 3313 sFreePagesTarget = VM_PAGE_RESERVE_USER 3314 + std::max((page_num_t)32, (sNumPages - sNonExistingPages) / 1024); 3315 3316 // The target of free + cached and inactive pages. On low-memory machines 3317 // keep things tight. free + cached is the pool of immediately allocatable 3318 // pages. We want a few inactive pages, so when we're actually paging, we 3319 // have a reasonably large set of pages to work with. 3320 if (sUnreservedFreePages < 16 * 1024) { 3321 sFreeOrCachedPagesTarget = sFreePagesTarget + 128; 3322 sInactivePagesTarget = sFreePagesTarget / 3; 3323 } else { 3324 sFreeOrCachedPagesTarget = 2 * sFreePagesTarget; 3325 sInactivePagesTarget = sFreePagesTarget / 2; 3326 } 3327 3328 TRACE(("vm_page_init: exit\n")); 3329 3330 return B_OK; 3331 } 3332 3333 3334 status_t 3335 vm_page_init_post_area(kernel_args *args) 3336 { 3337 void *dummy; 3338 3339 dummy = sPages; 3340 create_area("page structures", &dummy, B_EXACT_ADDRESS, 3341 PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED, 3342 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3343 3344 add_debugger_command("page_stats", &dump_page_stats, 3345 "Dump statistics about page usage"); 3346 add_debugger_command_etc("page", &dump_page, 3347 "Dump page info", 3348 "[ \"-p\" | \"-v\" ] [ \"-m\" ] <address>\n" 3349 "Prints information for the physical page. If neither \"-p\" nor\n" 3350 "\"-v\" are given, the provided address is interpreted as address of\n" 3351 "the vm_page data structure for the page in question. If \"-p\" is\n" 3352 "given, the address is the physical address of the page. If \"-v\" is\n" 3353 "given, the address is interpreted as virtual address in the current\n" 3354 "thread's address space and for the page it is mapped to (if any)\n" 3355 "information are printed. If \"-m\" is specified, the command will\n" 3356 "search all known address spaces for mappings to that page and print\n" 3357 "them.\n", 0); 3358 add_debugger_command("page_queue", &dump_page_queue, "Dump page queue"); 3359 add_debugger_command("find_page", &find_page, 3360 "Find out which queue a page is actually in"); 3361 3362 #ifdef TRACK_PAGE_USAGE_STATS 3363 add_debugger_command_etc("page_usage", &dump_page_usage_stats, 3364 "Dumps statistics about page usage counts", 3365 "\n" 3366 "Dumps statistics about page usage counts.\n", 3367 B_KDEBUG_DONT_PARSE_ARGUMENTS); 3368 #endif 3369 3370 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3371 add_debugger_command_etc("page_allocations_per_caller", 3372 &dump_page_allocations_per_caller, 3373 "Dump current page allocations summed up per caller", 3374 "[ -d <caller> ] [ -r ]\n" 3375 "The current allocations will by summed up by caller (their count)\n" 3376 "printed in decreasing order by count.\n" 3377 "If \"-d\" is given, each allocation for caller <caller> is printed\n" 3378 "including the respective stack trace.\n" 3379 "If \"-r\" is given, the allocation infos are reset after gathering\n" 3380 "the information, so the next command invocation will only show the\n" 3381 "allocations made after the reset.\n", 0); 3382 add_debugger_command_etc("page_allocation_infos", 3383 &dump_page_allocation_infos, 3384 "Dump current page allocations", 3385 "[ --stacktrace ] [ -p <page number> ] [ --team <team ID> ] " 3386 "[ --thread <thread ID> ]\n" 3387 "The current allocations filtered by optional values will be printed.\n" 3388 "The optional \"-p\" page number filters for a specific page,\n" 3389 "with \"--team\" and \"--thread\" allocations by specific teams\n" 3390 "and/or threads can be filtered (these only work if a corresponding\n" 3391 "tracing entry is still available).\n" 3392 "If \"--stacktrace\" is given, then stack traces of the allocation\n" 3393 "callers are printed, where available\n", 0); 3394 #endif 3395 3396 return B_OK; 3397 } 3398 3399 3400 status_t 3401 vm_page_init_post_thread(kernel_args *args) 3402 { 3403 new (&sFreePageCondition) ConditionVariable; 3404 sFreePageCondition.Publish(&sFreePageQueue, "free page"); 3405 3406 // create a kernel thread to clear out pages 3407 3408 thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber", 3409 B_LOWEST_ACTIVE_PRIORITY, NULL); 3410 resume_thread(thread); 3411 3412 // start page writer 3413 3414 sPageWriterCondition.Init("page writer"); 3415 3416 thread = spawn_kernel_thread(&page_writer, "page writer", 3417 B_NORMAL_PRIORITY + 1, NULL); 3418 resume_thread(thread); 3419 3420 // start page daemon 3421 3422 sPageDaemonCondition.Init("page daemon"); 3423 3424 thread = spawn_kernel_thread(&page_daemon, "page daemon", 3425 B_NORMAL_PRIORITY, NULL); 3426 resume_thread(thread); 3427 3428 return B_OK; 3429 } 3430 3431 3432 status_t 3433 vm_mark_page_inuse(page_num_t page) 3434 { 3435 return vm_mark_page_range_inuse(page, 1); 3436 } 3437 3438 3439 status_t 3440 vm_mark_page_range_inuse(page_num_t startPage, page_num_t length) 3441 { 3442 return mark_page_range_in_use(startPage, length, false); 3443 } 3444 3445 3446 /*! Unreserve pages previously reserved with vm_page_reserve_pages(). 3447 */ 3448 void 3449 vm_page_unreserve_pages(vm_page_reservation* reservation) 3450 { 3451 uint32 count = reservation->count; 3452 reservation->count = 0; 3453 3454 if (count == 0) 3455 return; 3456 3457 TA(UnreservePages(count)); 3458 3459 unreserve_pages(count); 3460 } 3461 3462 3463 /*! With this call, you can reserve a number of free pages in the system. 3464 They will only be handed out to someone who has actually reserved them. 3465 This call returns as soon as the number of requested pages has been 3466 reached. 3467 The caller must not hold any cache lock or the function might deadlock. 3468 */ 3469 void 3470 vm_page_reserve_pages(vm_page_reservation* reservation, uint32 count, 3471 int priority) 3472 { 3473 reservation->count = count; 3474 3475 if (count == 0) 3476 return; 3477 3478 TA(ReservePages(count)); 3479 3480 reserve_pages(count, priority, false); 3481 } 3482 3483 3484 bool 3485 vm_page_try_reserve_pages(vm_page_reservation* reservation, uint32 count, 3486 int priority) 3487 { 3488 if (count == 0) { 3489 reservation->count = count; 3490 return true; 3491 } 3492 3493 uint32 remaining = reserve_pages(count, priority, true); 3494 if (remaining == 0) { 3495 TA(ReservePages(count)); 3496 reservation->count = count; 3497 return true; 3498 } 3499 3500 unreserve_pages(count - remaining); 3501 3502 return false; 3503 } 3504 3505 3506 vm_page * 3507 vm_page_allocate_page(vm_page_reservation* reservation, uint32 flags) 3508 { 3509 uint32 pageState = flags & VM_PAGE_ALLOC_STATE; 3510 ASSERT(pageState != PAGE_STATE_FREE); 3511 ASSERT(pageState != PAGE_STATE_CLEAR); 3512 3513 ASSERT(reservation->count > 0); 3514 reservation->count--; 3515 3516 VMPageQueue* queue; 3517 VMPageQueue* otherQueue; 3518 3519 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) { 3520 queue = &sClearPageQueue; 3521 otherQueue = &sFreePageQueue; 3522 } else { 3523 queue = &sFreePageQueue; 3524 otherQueue = &sClearPageQueue; 3525 } 3526 3527 ReadLocker locker(sFreePageQueuesLock); 3528 3529 vm_page* page = queue->RemoveHeadUnlocked(); 3530 if (page == NULL) { 3531 // if the primary queue was empty, grab the page from the 3532 // secondary queue 3533 page = otherQueue->RemoveHeadUnlocked(); 3534 3535 if (page == NULL) { 3536 // Unlikely, but possible: the page we have reserved has moved 3537 // between the queues after we checked the first queue. Grab the 3538 // write locker to make sure this doesn't happen again. 3539 locker.Unlock(); 3540 WriteLocker writeLocker(sFreePageQueuesLock); 3541 3542 page = queue->RemoveHead(); 3543 if (page == NULL) 3544 otherQueue->RemoveHead(); 3545 3546 if (page == NULL) { 3547 panic("Had reserved page, but there is none!"); 3548 return NULL; 3549 } 3550 3551 // downgrade to read lock 3552 locker.Lock(); 3553 } 3554 } 3555 3556 if (page->CacheRef() != NULL) 3557 panic("supposed to be free page %p has cache\n", page); 3558 3559 DEBUG_PAGE_ACCESS_START(page); 3560 3561 int oldPageState = page->State(); 3562 page->SetState(pageState); 3563 page->busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3564 page->usage_count = 0; 3565 page->accessed = false; 3566 page->modified = false; 3567 3568 locker.Unlock(); 3569 3570 if (pageState < PAGE_STATE_FIRST_UNQUEUED) 3571 sPageQueues[pageState].AppendUnlocked(page); 3572 3573 // clear the page, if we had to take it from the free queue and a clear 3574 // page was requested 3575 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0 && oldPageState != PAGE_STATE_CLEAR) 3576 clear_page(page); 3577 3578 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3579 page->allocation_tracking_info.Init( 3580 TA(AllocatePage(page->physical_page_number))); 3581 #else 3582 TA(AllocatePage(page->physical_page_number)); 3583 #endif 3584 3585 return page; 3586 } 3587 3588 3589 static void 3590 allocate_page_run_cleanup(VMPageQueue::PageList& freePages, 3591 VMPageQueue::PageList& clearPages) 3592 { 3593 while (vm_page* page = freePages.RemoveHead()) { 3594 page->busy = false; 3595 page->SetState(PAGE_STATE_FREE); 3596 DEBUG_PAGE_ACCESS_END(page); 3597 sFreePageQueue.PrependUnlocked(page); 3598 } 3599 3600 while (vm_page* page = clearPages.RemoveHead()) { 3601 page->busy = false; 3602 page->SetState(PAGE_STATE_CLEAR); 3603 DEBUG_PAGE_ACCESS_END(page); 3604 sClearPageQueue.PrependUnlocked(page); 3605 } 3606 } 3607 3608 3609 /*! Tries to allocate the a contiguous run of \a length pages starting at 3610 index \a start. 3611 3612 The caller must have write-locked the free/clear page queues. The function 3613 will unlock regardless of whether it succeeds or fails. 3614 3615 If the function fails, it cleans up after itself, i.e. it will free all 3616 pages it managed to allocate. 3617 3618 \param start The start index (into \c sPages) of the run. 3619 \param length The number of pages to allocate. 3620 \param flags Page allocation flags. Encodes the state the function shall 3621 set the allocated pages to, whether the pages shall be marked busy 3622 (VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared 3623 (VM_PAGE_ALLOC_CLEAR). 3624 \param freeClearQueueLocker Locked WriteLocker for the free/clear page 3625 queues in locked state. Will be unlocked by the function. 3626 \return The index of the first page that could not be allocated. \a length 3627 is returned when the function was successful. 3628 */ 3629 static page_num_t 3630 allocate_page_run(page_num_t start, page_num_t length, uint32 flags, 3631 WriteLocker& freeClearQueueLocker) 3632 { 3633 uint32 pageState = flags & VM_PAGE_ALLOC_STATE; 3634 ASSERT(pageState != PAGE_STATE_FREE); 3635 ASSERT(pageState != PAGE_STATE_CLEAR); 3636 ASSERT(start + length <= sNumPages); 3637 3638 // Pull the free/clear pages out of their respective queues. Cached pages 3639 // are allocated later. 3640 page_num_t cachedPages = 0; 3641 VMPageQueue::PageList freePages; 3642 VMPageQueue::PageList clearPages; 3643 page_num_t i = 0; 3644 for (; i < length; i++) { 3645 bool pageAllocated = true; 3646 bool noPage = false; 3647 vm_page& page = sPages[start + i]; 3648 switch (page.State()) { 3649 case PAGE_STATE_CLEAR: 3650 DEBUG_PAGE_ACCESS_START(&page); 3651 sClearPageQueue.Remove(&page); 3652 clearPages.Add(&page); 3653 break; 3654 case PAGE_STATE_FREE: 3655 DEBUG_PAGE_ACCESS_START(&page); 3656 sFreePageQueue.Remove(&page); 3657 freePages.Add(&page); 3658 break; 3659 case PAGE_STATE_CACHED: 3660 // We allocate cached pages later. 3661 cachedPages++; 3662 pageAllocated = false; 3663 break; 3664 3665 default: 3666 // Probably a page was cached when our caller checked. Now it's 3667 // gone and we have to abort. 3668 noPage = true; 3669 break; 3670 } 3671 3672 if (noPage) 3673 break; 3674 3675 if (pageAllocated) { 3676 page.SetState(flags & VM_PAGE_ALLOC_STATE); 3677 page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3678 page.usage_count = 0; 3679 page.accessed = false; 3680 page.modified = false; 3681 } 3682 } 3683 3684 if (i < length) { 3685 // failed to allocate a page -- free all that we've got 3686 allocate_page_run_cleanup(freePages, clearPages); 3687 return i; 3688 } 3689 3690 freeClearQueueLocker.Unlock(); 3691 3692 if (cachedPages > 0) { 3693 // allocate the pages that weren't free but cached 3694 page_num_t freedCachedPages = 0; 3695 page_num_t nextIndex = start; 3696 vm_page* freePage = freePages.Head(); 3697 vm_page* clearPage = clearPages.Head(); 3698 while (cachedPages > 0) { 3699 // skip, if we've already got the page 3700 if (freePage != NULL && size_t(freePage - sPages) == nextIndex) { 3701 freePage = freePages.GetNext(freePage); 3702 nextIndex++; 3703 continue; 3704 } 3705 if (clearPage != NULL && size_t(clearPage - sPages) == nextIndex) { 3706 clearPage = clearPages.GetNext(clearPage); 3707 nextIndex++; 3708 continue; 3709 } 3710 3711 // free the page, if it is still cached 3712 vm_page& page = sPages[nextIndex]; 3713 if (!free_cached_page(&page, false)) { 3714 // TODO: if the page turns out to have been freed already, 3715 // there would be no need to fail 3716 break; 3717 } 3718 3719 page.SetState(flags & VM_PAGE_ALLOC_STATE); 3720 page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3721 page.usage_count = 0; 3722 page.accessed = false; 3723 page.modified = false; 3724 3725 freePages.InsertBefore(freePage, &page); 3726 freedCachedPages++; 3727 cachedPages--; 3728 nextIndex++; 3729 } 3730 3731 // If we have freed cached pages, we need to balance things. 3732 if (freedCachedPages > 0) 3733 unreserve_pages(freedCachedPages); 3734 3735 if (nextIndex - start < length) { 3736 // failed to allocate all cached pages -- free all that we've got 3737 freeClearQueueLocker.Lock(); 3738 allocate_page_run_cleanup(freePages, clearPages); 3739 freeClearQueueLocker.Unlock(); 3740 3741 return nextIndex - start; 3742 } 3743 } 3744 3745 // clear pages, if requested 3746 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) { 3747 for (VMPageQueue::PageList::Iterator it = freePages.GetIterator(); 3748 vm_page* page = it.Next();) { 3749 clear_page(page); 3750 } 3751 } 3752 3753 // add pages to target queue 3754 if (pageState < PAGE_STATE_FIRST_UNQUEUED) { 3755 freePages.MoveFrom(&clearPages); 3756 sPageQueues[pageState].AppendUnlocked(freePages, length); 3757 } 3758 3759 // Note: We don't unreserve the pages since we pulled them out of the 3760 // free/clear queues without adjusting sUnreservedFreePages. 3761 3762 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3763 AbstractTraceEntryWithStackTrace* traceEntry 3764 = TA(AllocatePageRun(start, length)); 3765 3766 for (page_num_t i = start; i < start + length; i++) 3767 sPages[i].allocation_tracking_info.Init(traceEntry); 3768 #else 3769 TA(AllocatePageRun(start, length)); 3770 #endif 3771 3772 return length; 3773 } 3774 3775 3776 /*! Allocate a physically contiguous range of pages. 3777 3778 \param flags Page allocation flags. Encodes the state the function shall 3779 set the allocated pages to, whether the pages shall be marked busy 3780 (VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared 3781 (VM_PAGE_ALLOC_CLEAR). 3782 \param length The number of contiguous pages to allocate. 3783 \param restrictions Restrictions to the physical addresses of the page run 3784 to allocate, including \c low_address, the first acceptable physical 3785 address where the page run may start, \c high_address, the last 3786 acceptable physical address where the page run may end (i.e. it must 3787 hold \code runStartAddress + length <= high_address \endcode), 3788 \c alignment, the alignment of the page run start address, and 3789 \c boundary, multiples of which the page run must not cross. 3790 Values set to \c 0 are ignored. 3791 \param priority The page reservation priority (as passed to 3792 vm_page_reserve_pages()). 3793 \return The first page of the allocated page run on success; \c NULL 3794 when the allocation failed. 3795 */ 3796 vm_page* 3797 vm_page_allocate_page_run(uint32 flags, page_num_t length, 3798 const physical_address_restrictions* restrictions, int priority) 3799 { 3800 // compute start and end page index 3801 page_num_t requestedStart 3802 = std::max(restrictions->low_address / B_PAGE_SIZE, sPhysicalPageOffset) 3803 - sPhysicalPageOffset; 3804 page_num_t start = requestedStart; 3805 page_num_t end; 3806 if (restrictions->high_address > 0) { 3807 end = std::max(restrictions->high_address / B_PAGE_SIZE, 3808 sPhysicalPageOffset) 3809 - sPhysicalPageOffset; 3810 end = std::min(end, sNumPages); 3811 } else 3812 end = sNumPages; 3813 3814 // compute alignment mask 3815 page_num_t alignmentMask 3816 = std::max(restrictions->alignment / B_PAGE_SIZE, (phys_addr_t)1) - 1; 3817 ASSERT(((alignmentMask + 1) & alignmentMask) == 0); 3818 // alignment must be a power of 2 3819 3820 // compute the boundary mask 3821 uint32 boundaryMask = 0; 3822 if (restrictions->boundary != 0) { 3823 page_num_t boundary = restrictions->boundary / B_PAGE_SIZE; 3824 // boundary must be a power of two and not less than alignment and 3825 // length 3826 ASSERT(((boundary - 1) & boundary) == 0); 3827 ASSERT(boundary >= alignmentMask + 1); 3828 ASSERT(boundary >= length); 3829 3830 boundaryMask = -boundary; 3831 } 3832 3833 vm_page_reservation reservation; 3834 vm_page_reserve_pages(&reservation, length, priority); 3835 3836 WriteLocker freeClearQueueLocker(sFreePageQueuesLock); 3837 3838 // First we try to get a run with free pages only. If that fails, we also 3839 // consider cached pages. If there are only few free pages and many cached 3840 // ones, the odds are that we won't find enough contiguous ones, so we skip 3841 // the first iteration in this case. 3842 int32 freePages = sUnreservedFreePages; 3843 int useCached = freePages > 0 && (page_num_t)freePages > 2 * length ? 0 : 1; 3844 3845 for (;;) { 3846 if (alignmentMask != 0 || boundaryMask != 0) { 3847 page_num_t offsetStart = start + sPhysicalPageOffset; 3848 3849 // enforce alignment 3850 if ((offsetStart & alignmentMask) != 0) 3851 offsetStart = (offsetStart + alignmentMask) & ~alignmentMask; 3852 3853 // enforce boundary 3854 if (boundaryMask != 0 && ((offsetStart ^ (offsetStart 3855 + length - 1)) & boundaryMask) != 0) { 3856 offsetStart = (offsetStart + length - 1) & boundaryMask; 3857 } 3858 3859 start = offsetStart - sPhysicalPageOffset; 3860 } 3861 3862 if (start + length > end) { 3863 if (useCached == 0) { 3864 // The first iteration with free pages only was unsuccessful. 3865 // Try again also considering cached pages. 3866 useCached = 1; 3867 start = requestedStart; 3868 continue; 3869 } 3870 3871 dprintf("vm_page_allocate_page_run(): Failed to allocate run of " 3872 "length %" B_PRIuPHYSADDR " (%" B_PRIuPHYSADDR " %" 3873 B_PRIuPHYSADDR ") in second iteration (align: %" B_PRIuPHYSADDR 3874 " boundary: %" B_PRIuPHYSADDR ")!\n", length, requestedStart, 3875 end, restrictions->alignment, restrictions->boundary); 3876 3877 freeClearQueueLocker.Unlock(); 3878 vm_page_unreserve_pages(&reservation); 3879 return NULL; 3880 } 3881 3882 bool foundRun = true; 3883 page_num_t i; 3884 for (i = 0; i < length; i++) { 3885 uint32 pageState = sPages[start + i].State(); 3886 if (pageState != PAGE_STATE_FREE 3887 && pageState != PAGE_STATE_CLEAR 3888 && (pageState != PAGE_STATE_CACHED || useCached == 0)) { 3889 foundRun = false; 3890 break; 3891 } 3892 } 3893 3894 if (foundRun) { 3895 i = allocate_page_run(start, length, flags, freeClearQueueLocker); 3896 if (i == length) 3897 return &sPages[start]; 3898 3899 // apparently a cached page couldn't be allocated -- skip it and 3900 // continue 3901 freeClearQueueLocker.Lock(); 3902 } 3903 3904 start += i + 1; 3905 } 3906 } 3907 3908 3909 vm_page * 3910 vm_page_at_index(int32 index) 3911 { 3912 return &sPages[index]; 3913 } 3914 3915 3916 vm_page * 3917 vm_lookup_page(page_num_t pageNumber) 3918 { 3919 if (pageNumber < sPhysicalPageOffset) 3920 return NULL; 3921 3922 pageNumber -= sPhysicalPageOffset; 3923 if (pageNumber >= sNumPages) 3924 return NULL; 3925 3926 return &sPages[pageNumber]; 3927 } 3928 3929 3930 bool 3931 vm_page_is_dummy(struct vm_page *page) 3932 { 3933 return page < sPages || page >= sPages + sNumPages; 3934 } 3935 3936 3937 /*! Free the page that belonged to a certain cache. 3938 You can use vm_page_set_state() manually if you prefer, but only 3939 if the page does not equal PAGE_STATE_MODIFIED. 3940 3941 \param cache The cache the page was previously owned by or NULL. The page 3942 must have been removed from its cache before calling this method in 3943 either case. 3944 \param page The page to free. 3945 \param reservation If not NULL, the page count of the reservation will be 3946 incremented, thus allowing to allocate another page for the freed one at 3947 a later time. 3948 */ 3949 void 3950 vm_page_free_etc(VMCache* cache, vm_page* page, 3951 vm_page_reservation* reservation) 3952 { 3953 PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE 3954 && page->State() != PAGE_STATE_CLEAR); 3955 3956 if (page->State() == PAGE_STATE_MODIFIED && cache->temporary) 3957 atomic_add(&sModifiedTemporaryPages, -1); 3958 3959 free_page(page, false); 3960 if (reservation == NULL) 3961 unreserve_pages(1); 3962 } 3963 3964 3965 void 3966 vm_page_set_state(vm_page *page, int pageState) 3967 { 3968 PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE 3969 && page->State() != PAGE_STATE_CLEAR); 3970 3971 if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) { 3972 free_page(page, pageState == PAGE_STATE_CLEAR); 3973 unreserve_pages(1); 3974 } else 3975 set_page_state(page, pageState); 3976 } 3977 3978 3979 /*! Moves a page to either the tail of the head of its current queue, 3980 depending on \a tail. 3981 The page must have a cache and the cache must be locked! 3982 */ 3983 void 3984 vm_page_requeue(struct vm_page *page, bool tail) 3985 { 3986 PAGE_ASSERT(page, page->Cache() != NULL); 3987 page->Cache()->AssertLocked(); 3988 // DEBUG_PAGE_ACCESS_CHECK(page); 3989 // TODO: This assertion cannot be satisfied by idle_scan_active_pages() 3990 // when it requeues busy pages. The reason is that vm_soft_fault() 3991 // (respectively fault_get_page()) and the file cache keep newly 3992 // allocated pages accessed while they are reading them from disk. It 3993 // would probably be better to change that code and reenable this 3994 // check. 3995 3996 VMPageQueue *queue = NULL; 3997 3998 switch (page->State()) { 3999 case PAGE_STATE_ACTIVE: 4000 queue = &sActivePageQueue; 4001 break; 4002 case PAGE_STATE_INACTIVE: 4003 queue = &sInactivePageQueue; 4004 break; 4005 case PAGE_STATE_MODIFIED: 4006 queue = &sModifiedPageQueue; 4007 break; 4008 case PAGE_STATE_CACHED: 4009 queue = &sCachedPageQueue; 4010 break; 4011 case PAGE_STATE_FREE: 4012 case PAGE_STATE_CLEAR: 4013 panic("vm_page_requeue() called for free/clear page %p", page); 4014 return; 4015 case PAGE_STATE_WIRED: 4016 case PAGE_STATE_UNUSED: 4017 return; 4018 default: 4019 panic("vm_page_touch: vm_page %p in invalid state %d\n", 4020 page, page->State()); 4021 break; 4022 } 4023 4024 queue->RequeueUnlocked(page, tail); 4025 } 4026 4027 4028 page_num_t 4029 vm_page_num_pages(void) 4030 { 4031 return sNumPages - sNonExistingPages; 4032 } 4033 4034 4035 /*! There is a subtle distinction between the page counts returned by 4036 this function and vm_page_num_free_pages(): 4037 The latter returns the number of pages that are completely uncommitted, 4038 whereas this one returns the number of pages that are available for 4039 use by being reclaimed as well (IOW it factors in things like cache pages 4040 as available). 4041 */ 4042 page_num_t 4043 vm_page_num_available_pages(void) 4044 { 4045 return vm_available_memory() / B_PAGE_SIZE; 4046 } 4047 4048 4049 page_num_t 4050 vm_page_num_free_pages(void) 4051 { 4052 int32 count = sUnreservedFreePages + sCachedPageQueue.Count(); 4053 return count > 0 ? count : 0; 4054 } 4055 4056 4057 page_num_t 4058 vm_page_num_unused_pages(void) 4059 { 4060 int32 count = sUnreservedFreePages; 4061 return count > 0 ? count : 0; 4062 } 4063 4064 4065 void 4066 vm_page_get_stats(system_info *info) 4067 { 4068 // Note: there's no locking protecting any of the queues or counters here, 4069 // so we run the risk of getting bogus values when evaluating them 4070 // throughout this function. As these stats are for informational purposes 4071 // only, it is not really worth introducing such locking. Therefore we just 4072 // ensure that we don't under- or overflow any of the values. 4073 4074 // The pages used for the block cache buffers. Those should not be counted 4075 // as used but as cached pages. 4076 // TODO: We should subtract the blocks that are in use ATM, since those 4077 // can't really be freed in a low memory situation. 4078 page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE; 4079 info->block_cache_pages = blockCachePages; 4080 4081 // Non-temporary modified pages are special as they represent pages that 4082 // can be written back, so they could be freed if necessary, for us 4083 // basically making them into cached pages with a higher overhead. The 4084 // modified queue count is therefore split into temporary and non-temporary 4085 // counts that are then added to the corresponding number. 4086 page_num_t modifiedNonTemporaryPages 4087 = (sModifiedPageQueue.Count() - sModifiedTemporaryPages); 4088 4089 info->max_pages = vm_page_num_pages(); 4090 info->cached_pages = sCachedPageQueue.Count() + modifiedNonTemporaryPages 4091 + blockCachePages; 4092 4093 // max_pages is composed of: 4094 // active + inactive + unused + wired + modified + cached + free + clear 4095 // So taking out the cached (including modified non-temporary), free and 4096 // clear ones leaves us with all used pages. 4097 uint32 subtractPages = info->cached_pages + sFreePageQueue.Count() 4098 + sClearPageQueue.Count(); 4099 info->used_pages = subtractPages > info->max_pages 4100 ? 0 : info->max_pages - subtractPages; 4101 4102 if (info->used_pages + info->cached_pages > info->max_pages) { 4103 // Something was shuffled around while we were summing up the counts. 4104 // Make the values sane, preferring the worse case of more used pages. 4105 info->cached_pages = info->max_pages - info->used_pages; 4106 } 4107 4108 info->page_faults = vm_num_page_faults(); 4109 info->ignored_pages = sIgnoredPages; 4110 4111 // TODO: We don't consider pages used for page directories/tables yet. 4112 } 4113 4114 4115 /*! Returns the greatest address within the last page of accessible physical 4116 memory. 4117 The value is inclusive, i.e. in case of a 32 bit phys_addr_t 0xffffffff 4118 means the that the last page ends at exactly 4 GB. 4119 */ 4120 phys_addr_t 4121 vm_page_max_address() 4122 { 4123 return ((phys_addr_t)sPhysicalPageOffset + sNumPages) * B_PAGE_SIZE - 1; 4124 } 4125 4126 4127 RANGE_MARKER_FUNCTION_END(vm_page) 4128