1 /* 2 * Copyright 2010-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <string.h> 12 #include <stdlib.h> 13 14 #include <algorithm> 15 16 #include <KernelExport.h> 17 #include <OS.h> 18 19 #include <AutoDeleter.h> 20 21 #include <arch/cpu.h> 22 #include <arch/vm_translation_map.h> 23 #include <block_cache.h> 24 #include <boot/kernel_args.h> 25 #include <condition_variable.h> 26 #include <elf.h> 27 #include <heap.h> 28 #include <kernel.h> 29 #include <low_resource_manager.h> 30 #include <thread.h> 31 #include <tracing.h> 32 #include <util/AutoLock.h> 33 #include <vfs.h> 34 #include <vm/vm.h> 35 #include <vm/vm_priv.h> 36 #include <vm/vm_page.h> 37 #include <vm/VMAddressSpace.h> 38 #include <vm/VMArea.h> 39 #include <vm/VMCache.h> 40 41 #include "IORequest.h" 42 #include "PageCacheLocker.h" 43 #include "VMAnonymousCache.h" 44 #include "VMPageQueue.h" 45 46 47 //#define TRACE_VM_PAGE 48 #ifdef TRACE_VM_PAGE 49 # define TRACE(x) dprintf x 50 #else 51 # define TRACE(x) ; 52 #endif 53 54 //#define TRACE_VM_DAEMONS 55 #ifdef TRACE_VM_DAEMONS 56 #define TRACE_DAEMON(x...) dprintf(x) 57 #else 58 #define TRACE_DAEMON(x...) do {} while (false) 59 #endif 60 61 //#define TRACK_PAGE_USAGE_STATS 1 62 63 #define PAGE_ASSERT(page, condition) \ 64 ASSERT_PRINT((condition), "page: %p", (page)) 65 66 #define SCRUB_SIZE 16 67 // this many pages will be cleared at once in the page scrubber thread 68 69 #define MAX_PAGE_WRITER_IO_PRIORITY B_URGENT_DISPLAY_PRIORITY 70 // maximum I/O priority of the page writer 71 #define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD 10000 72 // the maximum I/O priority shall be reached when this many pages need to 73 // be written 74 75 76 // The page reserve an allocation of the certain priority must not touch. 77 static const size_t kPageReserveForPriority[] = { 78 VM_PAGE_RESERVE_USER, // user 79 VM_PAGE_RESERVE_SYSTEM, // system 80 0 // VIP 81 }; 82 83 // Minimum number of free pages the page daemon will try to achieve. 84 static uint32 sFreePagesTarget; 85 static uint32 sFreeOrCachedPagesTarget; 86 static uint32 sInactivePagesTarget; 87 88 // Wait interval between page daemon runs. 89 static const bigtime_t kIdleScanWaitInterval = 1000000LL; // 1 sec 90 static const bigtime_t kBusyScanWaitInterval = 500000LL; // 0.5 sec 91 92 // Number of idle runs after which we want to have processed the full active 93 // queue. 94 static const uint32 kIdleRunsForFullQueue = 20; 95 96 // Maximum limit for the vm_page::usage_count. 97 static const int32 kPageUsageMax = 64; 98 // vm_page::usage_count buff an accessed page receives in a scan. 99 static const int32 kPageUsageAdvance = 3; 100 // vm_page::usage_count debuff an unaccessed page receives in a scan. 101 static const int32 kPageUsageDecline = 1; 102 103 int32 gMappedPagesCount; 104 105 static VMPageQueue sPageQueues[PAGE_STATE_COUNT]; 106 107 static VMPageQueue& sFreePageQueue = sPageQueues[PAGE_STATE_FREE]; 108 static VMPageQueue& sClearPageQueue = sPageQueues[PAGE_STATE_CLEAR]; 109 static VMPageQueue& sModifiedPageQueue = sPageQueues[PAGE_STATE_MODIFIED]; 110 static VMPageQueue& sInactivePageQueue = sPageQueues[PAGE_STATE_INACTIVE]; 111 static VMPageQueue& sActivePageQueue = sPageQueues[PAGE_STATE_ACTIVE]; 112 static VMPageQueue& sCachedPageQueue = sPageQueues[PAGE_STATE_CACHED]; 113 114 static vm_page *sPages; 115 static page_num_t sPhysicalPageOffset; 116 static page_num_t sNumPages; 117 static page_num_t sNonExistingPages; 118 // pages in the sPages array that aren't backed by physical memory 119 static uint64 sIgnoredPages; 120 // pages of physical memory ignored by the boot loader (and thus not 121 // available here) 122 static vint32 sUnreservedFreePages; 123 static vint32 sUnsatisfiedPageReservations; 124 static vint32 sModifiedTemporaryPages; 125 126 static ConditionVariable sFreePageCondition; 127 static mutex sPageDeficitLock = MUTEX_INITIALIZER("page deficit"); 128 129 // This lock must be used whenever the free or clear page queues are changed. 130 // If you need to work on both queues at the same time, you need to hold a write 131 // lock, otherwise, a read lock suffices (each queue still has a spinlock to 132 // guard against concurrent changes). 133 static rw_lock sFreePageQueuesLock 134 = RW_LOCK_INITIALIZER("free/clear page queues"); 135 136 #ifdef TRACK_PAGE_USAGE_STATS 137 static page_num_t sPageUsageArrays[512]; 138 static page_num_t* sPageUsage = sPageUsageArrays; 139 static page_num_t sPageUsagePageCount; 140 static page_num_t* sNextPageUsage = sPageUsageArrays + 256; 141 static page_num_t sNextPageUsagePageCount; 142 #endif 143 144 145 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 146 147 struct caller_info { 148 addr_t caller; 149 size_t count; 150 }; 151 152 static const int32 kCallerInfoTableSize = 1024; 153 static caller_info sCallerInfoTable[kCallerInfoTableSize]; 154 static int32 sCallerInfoCount = 0; 155 156 static caller_info* get_caller_info(addr_t caller); 157 158 159 RANGE_MARKER_FUNCTION_PROTOTYPES(vm_page) 160 161 static const addr_t kVMPageCodeAddressRange[] = { 162 RANGE_MARKER_FUNCTION_ADDRESS_RANGE(vm_page) 163 }; 164 165 #endif 166 167 168 RANGE_MARKER_FUNCTION_BEGIN(vm_page) 169 170 171 struct page_stats { 172 int32 totalFreePages; 173 int32 unsatisfiedReservations; 174 int32 cachedPages; 175 }; 176 177 178 struct PageReservationWaiter 179 : public DoublyLinkedListLinkImpl<PageReservationWaiter> { 180 Thread* thread; 181 uint32 dontTouch; // reserve not to touch 182 uint32 missing; // pages missing for the reservation 183 int32 threadPriority; 184 185 bool operator<(const PageReservationWaiter& other) const 186 { 187 // Implies an order by descending VM priority (ascending dontTouch) 188 // and (secondarily) descending thread priority. 189 if (dontTouch != other.dontTouch) 190 return dontTouch < other.dontTouch; 191 return threadPriority > other.threadPriority; 192 } 193 }; 194 195 typedef DoublyLinkedList<PageReservationWaiter> PageReservationWaiterList; 196 static PageReservationWaiterList sPageReservationWaiters; 197 198 199 struct DaemonCondition { 200 void Init(const char* name) 201 { 202 mutex_init(&fLock, "daemon condition"); 203 fCondition.Init(this, name); 204 fActivated = false; 205 } 206 207 bool Lock() 208 { 209 return mutex_lock(&fLock) == B_OK; 210 } 211 212 void Unlock() 213 { 214 mutex_unlock(&fLock); 215 } 216 217 bool Wait(bigtime_t timeout, bool clearActivated) 218 { 219 MutexLocker locker(fLock); 220 if (clearActivated) 221 fActivated = false; 222 else if (fActivated) 223 return true; 224 225 ConditionVariableEntry entry; 226 fCondition.Add(&entry); 227 228 locker.Unlock(); 229 230 return entry.Wait(B_RELATIVE_TIMEOUT, timeout) == B_OK; 231 } 232 233 void WakeUp() 234 { 235 if (fActivated) 236 return; 237 238 MutexLocker locker(fLock); 239 fActivated = true; 240 fCondition.NotifyOne(); 241 } 242 243 void ClearActivated() 244 { 245 MutexLocker locker(fLock); 246 fActivated = false; 247 } 248 249 private: 250 mutex fLock; 251 ConditionVariable fCondition; 252 bool fActivated; 253 }; 254 255 256 static DaemonCondition sPageWriterCondition; 257 static DaemonCondition sPageDaemonCondition; 258 259 260 #if PAGE_ALLOCATION_TRACING 261 262 namespace PageAllocationTracing { 263 264 class ReservePages : public AbstractTraceEntry { 265 public: 266 ReservePages(uint32 count) 267 : 268 fCount(count) 269 { 270 Initialized(); 271 } 272 273 virtual void AddDump(TraceOutput& out) 274 { 275 out.Print("page reserve: %lu", fCount); 276 } 277 278 private: 279 uint32 fCount; 280 }; 281 282 283 class UnreservePages : public AbstractTraceEntry { 284 public: 285 UnreservePages(uint32 count) 286 : 287 fCount(count) 288 { 289 Initialized(); 290 } 291 292 virtual void AddDump(TraceOutput& out) 293 { 294 out.Print("page unreserve: %lu", fCount); 295 } 296 297 private: 298 uint32 fCount; 299 }; 300 301 302 class AllocatePage 303 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) { 304 public: 305 AllocatePage(page_num_t pageNumber) 306 : 307 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true), 308 fPageNumber(pageNumber) 309 { 310 Initialized(); 311 } 312 313 virtual void AddDump(TraceOutput& out) 314 { 315 out.Print("page alloc: %#" B_PRIxPHYSADDR, fPageNumber); 316 } 317 318 private: 319 page_num_t fPageNumber; 320 }; 321 322 323 class AllocatePageRun 324 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) { 325 public: 326 AllocatePageRun(page_num_t startPage, uint32 length) 327 : 328 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true), 329 fStartPage(startPage), 330 fLength(length) 331 { 332 Initialized(); 333 } 334 335 virtual void AddDump(TraceOutput& out) 336 { 337 out.Print("page alloc run: start %#" B_PRIxPHYSADDR " length: %" 338 B_PRIu32, fStartPage, fLength); 339 } 340 341 private: 342 page_num_t fStartPage; 343 uint32 fLength; 344 }; 345 346 347 class FreePage 348 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) { 349 public: 350 FreePage(page_num_t pageNumber) 351 : 352 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true), 353 fPageNumber(pageNumber) 354 { 355 Initialized(); 356 } 357 358 virtual void AddDump(TraceOutput& out) 359 { 360 out.Print("page free: %#" B_PRIxPHYSADDR, fPageNumber); 361 } 362 363 private: 364 page_num_t fPageNumber; 365 }; 366 367 368 class ScrubbingPages : public AbstractTraceEntry { 369 public: 370 ScrubbingPages(uint32 count) 371 : 372 fCount(count) 373 { 374 Initialized(); 375 } 376 377 virtual void AddDump(TraceOutput& out) 378 { 379 out.Print("page scrubbing: %lu", fCount); 380 } 381 382 private: 383 uint32 fCount; 384 }; 385 386 387 class ScrubbedPages : public AbstractTraceEntry { 388 public: 389 ScrubbedPages(uint32 count) 390 : 391 fCount(count) 392 { 393 Initialized(); 394 } 395 396 virtual void AddDump(TraceOutput& out) 397 { 398 out.Print("page scrubbed: %lu", fCount); 399 } 400 401 private: 402 uint32 fCount; 403 }; 404 405 406 class StolenPage : public AbstractTraceEntry { 407 public: 408 StolenPage() 409 { 410 Initialized(); 411 } 412 413 virtual void AddDump(TraceOutput& out) 414 { 415 out.Print("page stolen"); 416 } 417 }; 418 419 } // namespace PageAllocationTracing 420 421 # define TA(x) new(std::nothrow) PageAllocationTracing::x 422 423 #else 424 # define TA(x) 425 #endif // PAGE_ALLOCATION_TRACING 426 427 428 #if PAGE_DAEMON_TRACING 429 430 namespace PageDaemonTracing { 431 432 class ActivatePage : public AbstractTraceEntry { 433 public: 434 ActivatePage(vm_page* page) 435 : 436 fCache(page->cache), 437 fPage(page) 438 { 439 Initialized(); 440 } 441 442 virtual void AddDump(TraceOutput& out) 443 { 444 out.Print("page activated: %p, cache: %p", fPage, fCache); 445 } 446 447 private: 448 VMCache* fCache; 449 vm_page* fPage; 450 }; 451 452 453 class DeactivatePage : public AbstractTraceEntry { 454 public: 455 DeactivatePage(vm_page* page) 456 : 457 fCache(page->cache), 458 fPage(page) 459 { 460 Initialized(); 461 } 462 463 virtual void AddDump(TraceOutput& out) 464 { 465 out.Print("page deactivated: %p, cache: %p", fPage, fCache); 466 } 467 468 private: 469 VMCache* fCache; 470 vm_page* fPage; 471 }; 472 473 474 class FreedPageSwap : public AbstractTraceEntry { 475 public: 476 FreedPageSwap(vm_page* page) 477 : 478 fCache(page->cache), 479 fPage(page) 480 { 481 Initialized(); 482 } 483 484 virtual void AddDump(TraceOutput& out) 485 { 486 out.Print("page swap freed: %p, cache: %p", fPage, fCache); 487 } 488 489 private: 490 VMCache* fCache; 491 vm_page* fPage; 492 }; 493 494 } // namespace PageDaemonTracing 495 496 # define TD(x) new(std::nothrow) PageDaemonTracing::x 497 498 #else 499 # define TD(x) 500 #endif // PAGE_DAEMON_TRACING 501 502 503 #if PAGE_WRITER_TRACING 504 505 namespace PageWriterTracing { 506 507 class WritePage : public AbstractTraceEntry { 508 public: 509 WritePage(vm_page* page) 510 : 511 fCache(page->Cache()), 512 fPage(page) 513 { 514 Initialized(); 515 } 516 517 virtual void AddDump(TraceOutput& out) 518 { 519 out.Print("page write: %p, cache: %p", fPage, fCache); 520 } 521 522 private: 523 VMCache* fCache; 524 vm_page* fPage; 525 }; 526 527 } // namespace PageWriterTracing 528 529 # define TPW(x) new(std::nothrow) PageWriterTracing::x 530 531 #else 532 # define TPW(x) 533 #endif // PAGE_WRITER_TRACING 534 535 536 #if PAGE_STATE_TRACING 537 538 namespace PageStateTracing { 539 540 class SetPageState : public AbstractTraceEntry { 541 public: 542 SetPageState(vm_page* page, uint8 newState) 543 : 544 fPage(page), 545 fOldState(page->State()), 546 fNewState(newState), 547 fBusy(page->busy), 548 fWired(page->WiredCount() > 0), 549 fMapped(!page->mappings.IsEmpty()), 550 fAccessed(page->accessed), 551 fModified(page->modified) 552 { 553 #if PAGE_STATE_TRACING_STACK_TRACE 554 fStackTrace = capture_tracing_stack_trace( 555 PAGE_STATE_TRACING_STACK_TRACE, 0, true); 556 // Don't capture userland stack trace to avoid potential 557 // deadlocks. 558 #endif 559 Initialized(); 560 } 561 562 #if PAGE_STATE_TRACING_STACK_TRACE 563 virtual void DumpStackTrace(TraceOutput& out) 564 { 565 out.PrintStackTrace(fStackTrace); 566 } 567 #endif 568 569 virtual void AddDump(TraceOutput& out) 570 { 571 out.Print("page set state: %p (%c%c%c%c%c): %s -> %s", fPage, 572 fBusy ? 'b' : '-', 573 fWired ? 'w' : '-', 574 fMapped ? 'm' : '-', 575 fAccessed ? 'a' : '-', 576 fModified ? 'm' : '-', 577 page_state_to_string(fOldState), 578 page_state_to_string(fNewState)); 579 } 580 581 private: 582 vm_page* fPage; 583 #if PAGE_STATE_TRACING_STACK_TRACE 584 tracing_stack_trace* fStackTrace; 585 #endif 586 uint8 fOldState; 587 uint8 fNewState; 588 bool fBusy : 1; 589 bool fWired : 1; 590 bool fMapped : 1; 591 bool fAccessed : 1; 592 bool fModified : 1; 593 }; 594 595 } // namespace PageStateTracing 596 597 # define TPS(x) new(std::nothrow) PageStateTracing::x 598 599 #else 600 # define TPS(x) 601 #endif // PAGE_STATE_TRACING 602 603 604 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 605 606 namespace BKernel { 607 608 class AllocationTrackingCallback { 609 public: 610 virtual ~AllocationTrackingCallback(); 611 612 virtual bool ProcessTrackingInfo( 613 AllocationTrackingInfo* info, 614 page_num_t pageNumber) = 0; 615 }; 616 617 } 618 619 using BKernel::AllocationTrackingCallback; 620 621 622 class AllocationCollectorCallback : public AllocationTrackingCallback { 623 public: 624 AllocationCollectorCallback(bool resetInfos) 625 : 626 fResetInfos(resetInfos) 627 { 628 } 629 630 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info, 631 page_num_t pageNumber) 632 { 633 if (!info->IsInitialized()) 634 return true; 635 636 addr_t caller = 0; 637 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry(); 638 639 if (traceEntry != NULL && info->IsTraceEntryValid()) { 640 caller = tracing_find_caller_in_stack_trace( 641 traceEntry->StackTrace(), kVMPageCodeAddressRange, 1); 642 } 643 644 caller_info* callerInfo = get_caller_info(caller); 645 if (callerInfo == NULL) { 646 kprintf("out of space for caller infos\n"); 647 return false; 648 } 649 650 callerInfo->count++; 651 652 if (fResetInfos) 653 info->Clear(); 654 655 return true; 656 } 657 658 private: 659 bool fResetInfos; 660 }; 661 662 663 class AllocationInfoPrinterCallback : public AllocationTrackingCallback { 664 public: 665 AllocationInfoPrinterCallback(bool printStackTrace, page_num_t pageFilter, 666 team_id teamFilter, thread_id threadFilter) 667 : 668 fPrintStackTrace(printStackTrace), 669 fPageFilter(pageFilter), 670 fTeamFilter(teamFilter), 671 fThreadFilter(threadFilter) 672 { 673 } 674 675 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info, 676 page_num_t pageNumber) 677 { 678 if (!info->IsInitialized()) 679 return true; 680 681 if (fPageFilter != 0 && pageNumber != fPageFilter) 682 return true; 683 684 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry(); 685 if (traceEntry != NULL && !info->IsTraceEntryValid()) 686 traceEntry = NULL; 687 688 if (traceEntry != NULL) { 689 if (fTeamFilter != -1 && traceEntry->TeamID() != fTeamFilter) 690 return true; 691 if (fThreadFilter != -1 && traceEntry->ThreadID() != fThreadFilter) 692 return true; 693 } else { 694 // we need the info if we have filters set 695 if (fTeamFilter != -1 || fThreadFilter != -1) 696 return true; 697 } 698 699 kprintf("page number %#" B_PRIxPHYSADDR, pageNumber); 700 701 if (traceEntry != NULL) { 702 kprintf(", team: %" B_PRId32 ", thread %" B_PRId32 703 ", time %" B_PRId64 "\n", traceEntry->TeamID(), 704 traceEntry->ThreadID(), traceEntry->Time()); 705 706 if (fPrintStackTrace) 707 tracing_print_stack_trace(traceEntry->StackTrace()); 708 } else 709 kprintf("\n"); 710 711 return true; 712 } 713 714 private: 715 bool fPrintStackTrace; 716 page_num_t fPageFilter; 717 team_id fTeamFilter; 718 thread_id fThreadFilter; 719 }; 720 721 722 class AllocationDetailPrinterCallback : public AllocationTrackingCallback { 723 public: 724 AllocationDetailPrinterCallback(addr_t caller) 725 : 726 fCaller(caller) 727 { 728 } 729 730 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info, 731 page_num_t pageNumber) 732 { 733 if (!info->IsInitialized()) 734 return true; 735 736 addr_t caller = 0; 737 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry(); 738 if (traceEntry != NULL && !info->IsTraceEntryValid()) 739 traceEntry = NULL; 740 741 if (traceEntry != NULL) { 742 caller = tracing_find_caller_in_stack_trace( 743 traceEntry->StackTrace(), kVMPageCodeAddressRange, 1); 744 } 745 746 if (caller != fCaller) 747 return true; 748 749 kprintf("page %#" B_PRIxPHYSADDR "\n", pageNumber); 750 if (traceEntry != NULL) 751 tracing_print_stack_trace(traceEntry->StackTrace()); 752 753 return true; 754 } 755 756 private: 757 addr_t fCaller; 758 }; 759 760 #endif // VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 761 762 763 static int 764 find_page(int argc, char **argv) 765 { 766 struct vm_page *page; 767 addr_t address; 768 int32 index = 1; 769 int i; 770 771 struct { 772 const char* name; 773 VMPageQueue* queue; 774 } pageQueueInfos[] = { 775 { "free", &sFreePageQueue }, 776 { "clear", &sClearPageQueue }, 777 { "modified", &sModifiedPageQueue }, 778 { "active", &sActivePageQueue }, 779 { "inactive", &sInactivePageQueue }, 780 { "cached", &sCachedPageQueue }, 781 { NULL, NULL } 782 }; 783 784 if (argc < 2 785 || strlen(argv[index]) <= 2 786 || argv[index][0] != '0' 787 || argv[index][1] != 'x') { 788 kprintf("usage: find_page <address>\n"); 789 return 0; 790 } 791 792 address = strtoul(argv[index], NULL, 0); 793 page = (vm_page*)address; 794 795 for (i = 0; pageQueueInfos[i].name; i++) { 796 VMPageQueue::Iterator it = pageQueueInfos[i].queue->GetIterator(); 797 while (vm_page* p = it.Next()) { 798 if (p == page) { 799 kprintf("found page %p in queue %p (%s)\n", page, 800 pageQueueInfos[i].queue, pageQueueInfos[i].name); 801 return 0; 802 } 803 } 804 } 805 806 kprintf("page %p isn't in any queue\n", page); 807 808 return 0; 809 } 810 811 812 const char * 813 page_state_to_string(int state) 814 { 815 switch(state) { 816 case PAGE_STATE_ACTIVE: 817 return "active"; 818 case PAGE_STATE_INACTIVE: 819 return "inactive"; 820 case PAGE_STATE_MODIFIED: 821 return "modified"; 822 case PAGE_STATE_CACHED: 823 return "cached"; 824 case PAGE_STATE_FREE: 825 return "free"; 826 case PAGE_STATE_CLEAR: 827 return "clear"; 828 case PAGE_STATE_WIRED: 829 return "wired"; 830 case PAGE_STATE_UNUSED: 831 return "unused"; 832 default: 833 return "unknown"; 834 } 835 } 836 837 838 static int 839 dump_page(int argc, char **argv) 840 { 841 bool addressIsPointer = true; 842 bool physical = false; 843 bool searchMappings = false; 844 int32 index = 1; 845 846 while (index < argc) { 847 if (argv[index][0] != '-') 848 break; 849 850 if (!strcmp(argv[index], "-p")) { 851 addressIsPointer = false; 852 physical = true; 853 } else if (!strcmp(argv[index], "-v")) { 854 addressIsPointer = false; 855 } else if (!strcmp(argv[index], "-m")) { 856 searchMappings = true; 857 } else { 858 print_debugger_command_usage(argv[0]); 859 return 0; 860 } 861 862 index++; 863 } 864 865 if (index + 1 != argc) { 866 print_debugger_command_usage(argv[0]); 867 return 0; 868 } 869 870 uint64 value; 871 if (!evaluate_debug_expression(argv[index], &value, false)) 872 return 0; 873 874 uint64 pageAddress = value; 875 struct vm_page* page; 876 877 if (addressIsPointer) { 878 page = (struct vm_page *)(addr_t)pageAddress; 879 } else { 880 if (!physical) { 881 VMAddressSpace *addressSpace = VMAddressSpace::Kernel(); 882 883 if (debug_get_debugged_thread()->team->address_space != NULL) 884 addressSpace = debug_get_debugged_thread()->team->address_space; 885 886 uint32 flags = 0; 887 phys_addr_t physicalAddress; 888 if (addressSpace->TranslationMap()->QueryInterrupt(pageAddress, 889 &physicalAddress, &flags) != B_OK 890 || (flags & PAGE_PRESENT) == 0) { 891 kprintf("Virtual address not mapped to a physical page in this " 892 "address space.\n"); 893 return 0; 894 } 895 pageAddress = physicalAddress; 896 } 897 898 page = vm_lookup_page(pageAddress / B_PAGE_SIZE); 899 } 900 901 kprintf("PAGE: %p\n", page); 902 kprintf("queue_next,prev: %p, %p\n", page->queue_link.next, 903 page->queue_link.previous); 904 kprintf("physical_number: %#" B_PRIxPHYSADDR "\n", 905 page->physical_page_number); 906 kprintf("cache: %p\n", page->Cache()); 907 kprintf("cache_offset: %" B_PRIuPHYSADDR "\n", page->cache_offset); 908 kprintf("cache_next: %p\n", page->cache_next); 909 kprintf("state: %s\n", page_state_to_string(page->State())); 910 kprintf("wired_count: %d\n", page->WiredCount()); 911 kprintf("usage_count: %d\n", page->usage_count); 912 kprintf("busy: %d\n", page->busy); 913 kprintf("busy_writing: %d\n", page->busy_writing); 914 kprintf("accessed: %d\n", page->accessed); 915 kprintf("modified: %d\n", page->modified); 916 #if DEBUG_PAGE_QUEUE 917 kprintf("queue: %p\n", page->queue); 918 #endif 919 #if DEBUG_PAGE_ACCESS 920 kprintf("accessor: %" B_PRId32 "\n", page->accessing_thread); 921 #endif 922 kprintf("area mappings:\n"); 923 924 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 925 vm_page_mapping *mapping; 926 while ((mapping = iterator.Next()) != NULL) { 927 kprintf(" %p (%" B_PRId32 ")\n", mapping->area, mapping->area->id); 928 mapping = mapping->page_link.next; 929 } 930 931 if (searchMappings) { 932 kprintf("all mappings:\n"); 933 VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst(); 934 while (addressSpace != NULL) { 935 size_t pageCount = addressSpace->Size() / B_PAGE_SIZE; 936 for (addr_t address = addressSpace->Base(); pageCount != 0; 937 address += B_PAGE_SIZE, pageCount--) { 938 phys_addr_t physicalAddress; 939 uint32 flags = 0; 940 if (addressSpace->TranslationMap()->QueryInterrupt(address, 941 &physicalAddress, &flags) == B_OK 942 && (flags & PAGE_PRESENT) != 0 943 && physicalAddress / B_PAGE_SIZE 944 == page->physical_page_number) { 945 VMArea* area = addressSpace->LookupArea(address); 946 kprintf(" aspace %" B_PRId32 ", area %" B_PRId32 ": %#" 947 B_PRIxADDR " (%c%c%s%s)\n", addressSpace->ID(), 948 area != NULL ? area->id : -1, address, 949 (flags & B_KERNEL_READ_AREA) != 0 ? 'r' : '-', 950 (flags & B_KERNEL_WRITE_AREA) != 0 ? 'w' : '-', 951 (flags & PAGE_MODIFIED) != 0 ? " modified" : "", 952 (flags & PAGE_ACCESSED) != 0 ? " accessed" : ""); 953 } 954 } 955 addressSpace = VMAddressSpace::DebugNext(addressSpace); 956 } 957 } 958 959 set_debug_variable("_cache", (addr_t)page->Cache()); 960 #if DEBUG_PAGE_ACCESS 961 set_debug_variable("_accessor", page->accessing_thread); 962 #endif 963 964 return 0; 965 } 966 967 968 static int 969 dump_page_queue(int argc, char **argv) 970 { 971 struct VMPageQueue *queue; 972 973 if (argc < 2) { 974 kprintf("usage: page_queue <address/name> [list]\n"); 975 return 0; 976 } 977 978 if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x') 979 queue = (VMPageQueue*)strtoul(argv[1], NULL, 16); 980 if (!strcmp(argv[1], "free")) 981 queue = &sFreePageQueue; 982 else if (!strcmp(argv[1], "clear")) 983 queue = &sClearPageQueue; 984 else if (!strcmp(argv[1], "modified")) 985 queue = &sModifiedPageQueue; 986 else if (!strcmp(argv[1], "active")) 987 queue = &sActivePageQueue; 988 else if (!strcmp(argv[1], "inactive")) 989 queue = &sInactivePageQueue; 990 else if (!strcmp(argv[1], "cached")) 991 queue = &sCachedPageQueue; 992 else { 993 kprintf("page_queue: unknown queue \"%s\".\n", argv[1]); 994 return 0; 995 } 996 997 kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %" 998 B_PRIuPHYSADDR "\n", queue, queue->Head(), queue->Tail(), 999 queue->Count()); 1000 1001 if (argc == 3) { 1002 struct vm_page *page = queue->Head(); 1003 const char *type = "none"; 1004 int i; 1005 1006 if (page->Cache() != NULL) { 1007 switch (page->Cache()->type) { 1008 case CACHE_TYPE_RAM: 1009 type = "RAM"; 1010 break; 1011 case CACHE_TYPE_DEVICE: 1012 type = "device"; 1013 break; 1014 case CACHE_TYPE_VNODE: 1015 type = "vnode"; 1016 break; 1017 case CACHE_TYPE_NULL: 1018 type = "null"; 1019 break; 1020 default: 1021 type = "???"; 1022 break; 1023 } 1024 } 1025 1026 kprintf("page cache type state wired usage\n"); 1027 for (i = 0; page; i++, page = queue->Next(page)) { 1028 kprintf("%p %p %-7s %8s %5d %5d\n", page, page->Cache(), 1029 type, page_state_to_string(page->State()), 1030 page->WiredCount(), page->usage_count); 1031 } 1032 } 1033 return 0; 1034 } 1035 1036 1037 static int 1038 dump_page_stats(int argc, char **argv) 1039 { 1040 page_num_t swappableModified = 0; 1041 page_num_t swappableModifiedInactive = 0; 1042 1043 size_t counter[8]; 1044 size_t busyCounter[8]; 1045 memset(counter, 0, sizeof(counter)); 1046 memset(busyCounter, 0, sizeof(busyCounter)); 1047 1048 struct page_run { 1049 page_num_t start; 1050 page_num_t end; 1051 1052 page_num_t Length() const { return end - start; } 1053 }; 1054 1055 page_run currentFreeRun = { 0, 0 }; 1056 page_run currentCachedRun = { 0, 0 }; 1057 page_run longestFreeRun = { 0, 0 }; 1058 page_run longestCachedRun = { 0, 0 }; 1059 1060 for (page_num_t i = 0; i < sNumPages; i++) { 1061 if (sPages[i].State() > 7) { 1062 panic("page %" B_PRIuPHYSADDR " at %p has invalid state!\n", i, 1063 &sPages[i]); 1064 } 1065 1066 uint32 pageState = sPages[i].State(); 1067 1068 counter[pageState]++; 1069 if (sPages[i].busy) 1070 busyCounter[pageState]++; 1071 1072 if (pageState == PAGE_STATE_MODIFIED 1073 && sPages[i].Cache() != NULL 1074 && sPages[i].Cache()->temporary && sPages[i].WiredCount() == 0) { 1075 swappableModified++; 1076 if (sPages[i].usage_count == 0) 1077 swappableModifiedInactive++; 1078 } 1079 1080 // track free and cached pages runs 1081 if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) { 1082 currentFreeRun.end = i + 1; 1083 currentCachedRun.end = i + 1; 1084 } else { 1085 if (currentFreeRun.Length() > longestFreeRun.Length()) 1086 longestFreeRun = currentFreeRun; 1087 currentFreeRun.start = currentFreeRun.end = i + 1; 1088 1089 if (pageState == PAGE_STATE_CACHED) { 1090 currentCachedRun.end = i + 1; 1091 } else { 1092 if (currentCachedRun.Length() > longestCachedRun.Length()) 1093 longestCachedRun = currentCachedRun; 1094 currentCachedRun.start = currentCachedRun.end = i + 1; 1095 } 1096 } 1097 } 1098 1099 kprintf("page stats:\n"); 1100 kprintf("total: %" B_PRIuPHYSADDR "\n", sNumPages); 1101 1102 kprintf("active: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1103 counter[PAGE_STATE_ACTIVE], busyCounter[PAGE_STATE_ACTIVE]); 1104 kprintf("inactive: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1105 counter[PAGE_STATE_INACTIVE], busyCounter[PAGE_STATE_INACTIVE]); 1106 kprintf("cached: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1107 counter[PAGE_STATE_CACHED], busyCounter[PAGE_STATE_CACHED]); 1108 kprintf("unused: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1109 counter[PAGE_STATE_UNUSED], busyCounter[PAGE_STATE_UNUSED]); 1110 kprintf("wired: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1111 counter[PAGE_STATE_WIRED], busyCounter[PAGE_STATE_WIRED]); 1112 kprintf("modified: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1113 counter[PAGE_STATE_MODIFIED], busyCounter[PAGE_STATE_MODIFIED]); 1114 kprintf("free: %" B_PRIuSIZE "\n", counter[PAGE_STATE_FREE]); 1115 kprintf("clear: %" B_PRIuSIZE "\n", counter[PAGE_STATE_CLEAR]); 1116 1117 kprintf("unreserved free pages: %" B_PRId32 "\n", sUnreservedFreePages); 1118 kprintf("unsatisfied page reservations: %" B_PRId32 "\n", 1119 sUnsatisfiedPageReservations); 1120 kprintf("mapped pages: %" B_PRId32 "\n", gMappedPagesCount); 1121 kprintf("longest free pages run: %" B_PRIuPHYSADDR " pages (at %" 1122 B_PRIuPHYSADDR ")\n", longestFreeRun.Length(), 1123 sPages[longestFreeRun.start].physical_page_number); 1124 kprintf("longest free/cached pages run: %" B_PRIuPHYSADDR " pages (at %" 1125 B_PRIuPHYSADDR ")\n", longestCachedRun.Length(), 1126 sPages[longestCachedRun.start].physical_page_number); 1127 1128 kprintf("waiting threads:\n"); 1129 for (PageReservationWaiterList::Iterator it 1130 = sPageReservationWaiters.GetIterator(); 1131 PageReservationWaiter* waiter = it.Next();) { 1132 kprintf(" %6" B_PRId32 ": missing: %6" B_PRIu32 1133 ", don't touch: %6" B_PRIu32 "\n", waiter->thread->id, 1134 waiter->missing, waiter->dontTouch); 1135 } 1136 1137 kprintf("\nfree queue: %p, count = %" B_PRIuPHYSADDR "\n", &sFreePageQueue, 1138 sFreePageQueue.Count()); 1139 kprintf("clear queue: %p, count = %" B_PRIuPHYSADDR "\n", &sClearPageQueue, 1140 sClearPageQueue.Count()); 1141 kprintf("modified queue: %p, count = %" B_PRIuPHYSADDR " (%" B_PRId32 1142 " temporary, %" B_PRIuPHYSADDR " swappable, " "inactive: %" 1143 B_PRIuPHYSADDR ")\n", &sModifiedPageQueue, sModifiedPageQueue.Count(), 1144 sModifiedTemporaryPages, swappableModified, swappableModifiedInactive); 1145 kprintf("active queue: %p, count = %" B_PRIuPHYSADDR "\n", 1146 &sActivePageQueue, sActivePageQueue.Count()); 1147 kprintf("inactive queue: %p, count = %" B_PRIuPHYSADDR "\n", 1148 &sInactivePageQueue, sInactivePageQueue.Count()); 1149 kprintf("cached queue: %p, count = %" B_PRIuPHYSADDR "\n", 1150 &sCachedPageQueue, sCachedPageQueue.Count()); 1151 return 0; 1152 } 1153 1154 1155 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 1156 1157 static caller_info* 1158 get_caller_info(addr_t caller) 1159 { 1160 // find the caller info 1161 for (int32 i = 0; i < sCallerInfoCount; i++) { 1162 if (caller == sCallerInfoTable[i].caller) 1163 return &sCallerInfoTable[i]; 1164 } 1165 1166 // not found, add a new entry, if there are free slots 1167 if (sCallerInfoCount >= kCallerInfoTableSize) 1168 return NULL; 1169 1170 caller_info* info = &sCallerInfoTable[sCallerInfoCount++]; 1171 info->caller = caller; 1172 info->count = 0; 1173 1174 return info; 1175 } 1176 1177 1178 static int 1179 caller_info_compare_count(const void* _a, const void* _b) 1180 { 1181 const caller_info* a = (const caller_info*)_a; 1182 const caller_info* b = (const caller_info*)_b; 1183 return (int)(b->count - a->count); 1184 } 1185 1186 1187 static int 1188 dump_page_allocations_per_caller(int argc, char** argv) 1189 { 1190 bool resetAllocationInfos = false; 1191 bool printDetails = false; 1192 addr_t caller = 0; 1193 1194 for (int32 i = 1; i < argc; i++) { 1195 if (strcmp(argv[i], "-d") == 0) { 1196 uint64 callerAddress; 1197 if (++i >= argc 1198 || !evaluate_debug_expression(argv[i], &callerAddress, true)) { 1199 print_debugger_command_usage(argv[0]); 1200 return 0; 1201 } 1202 1203 caller = callerAddress; 1204 printDetails = true; 1205 } else if (strcmp(argv[i], "-r") == 0) { 1206 resetAllocationInfos = true; 1207 } else { 1208 print_debugger_command_usage(argv[0]); 1209 return 0; 1210 } 1211 } 1212 1213 sCallerInfoCount = 0; 1214 1215 AllocationCollectorCallback collectorCallback(resetAllocationInfos); 1216 AllocationDetailPrinterCallback detailsCallback(caller); 1217 AllocationTrackingCallback& callback = printDetails 1218 ? (AllocationTrackingCallback&)detailsCallback 1219 : (AllocationTrackingCallback&)collectorCallback; 1220 1221 for (page_num_t i = 0; i < sNumPages; i++) 1222 callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i); 1223 1224 if (printDetails) 1225 return 0; 1226 1227 // sort the array 1228 qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info), 1229 &caller_info_compare_count); 1230 1231 kprintf("%" B_PRId32 " different callers\n\n", sCallerInfoCount); 1232 1233 size_t totalAllocationCount = 0; 1234 1235 kprintf(" count caller\n"); 1236 kprintf("----------------------------------\n"); 1237 for (int32 i = 0; i < sCallerInfoCount; i++) { 1238 caller_info& info = sCallerInfoTable[i]; 1239 kprintf("%10" B_PRIuSIZE " %p", info.count, (void*)info.caller); 1240 1241 const char* symbol; 1242 const char* imageName; 1243 bool exactMatch; 1244 addr_t baseAddress; 1245 1246 if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol, 1247 &imageName, &exactMatch) == B_OK) { 1248 kprintf(" %s + %#" B_PRIxADDR " (%s)%s\n", symbol, 1249 info.caller - baseAddress, imageName, 1250 exactMatch ? "" : " (nearest)"); 1251 } else 1252 kprintf("\n"); 1253 1254 totalAllocationCount += info.count; 1255 } 1256 1257 kprintf("\ntotal page allocations: %" B_PRIuSIZE "\n", 1258 totalAllocationCount); 1259 1260 return 0; 1261 } 1262 1263 1264 static int 1265 dump_page_allocation_infos(int argc, char** argv) 1266 { 1267 page_num_t pageFilter = 0; 1268 team_id teamFilter = -1; 1269 thread_id threadFilter = -1; 1270 bool printStackTraces = false; 1271 1272 for (int32 i = 1; i < argc; i++) { 1273 if (strcmp(argv[i], "--stacktrace") == 0) 1274 printStackTraces = true; 1275 else if (strcmp(argv[i], "-p") == 0) { 1276 uint64 pageNumber; 1277 if (++i >= argc 1278 || !evaluate_debug_expression(argv[i], &pageNumber, true)) { 1279 print_debugger_command_usage(argv[0]); 1280 return 0; 1281 } 1282 1283 pageFilter = pageNumber; 1284 } else if (strcmp(argv[i], "--team") == 0) { 1285 uint64 team; 1286 if (++i >= argc 1287 || !evaluate_debug_expression(argv[i], &team, true)) { 1288 print_debugger_command_usage(argv[0]); 1289 return 0; 1290 } 1291 1292 teamFilter = team; 1293 } else if (strcmp(argv[i], "--thread") == 0) { 1294 uint64 thread; 1295 if (++i >= argc 1296 || !evaluate_debug_expression(argv[i], &thread, true)) { 1297 print_debugger_command_usage(argv[0]); 1298 return 0; 1299 } 1300 1301 threadFilter = thread; 1302 } else { 1303 print_debugger_command_usage(argv[0]); 1304 return 0; 1305 } 1306 } 1307 1308 AllocationInfoPrinterCallback callback(printStackTraces, pageFilter, 1309 teamFilter, threadFilter); 1310 1311 for (page_num_t i = 0; i < sNumPages; i++) 1312 callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i); 1313 1314 return 0; 1315 } 1316 1317 #endif // VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 1318 1319 1320 #ifdef TRACK_PAGE_USAGE_STATS 1321 1322 static void 1323 track_page_usage(vm_page* page) 1324 { 1325 if (page->WiredCount() == 0) { 1326 sNextPageUsage[(int32)page->usage_count + 128]++; 1327 sNextPageUsagePageCount++; 1328 } 1329 } 1330 1331 1332 static void 1333 update_page_usage_stats() 1334 { 1335 std::swap(sPageUsage, sNextPageUsage); 1336 sPageUsagePageCount = sNextPageUsagePageCount; 1337 1338 memset(sNextPageUsage, 0, sizeof(page_num_t) * 256); 1339 sNextPageUsagePageCount = 0; 1340 1341 // compute average 1342 if (sPageUsagePageCount > 0) { 1343 int64 sum = 0; 1344 for (int32 i = 0; i < 256; i++) 1345 sum += (int64)sPageUsage[i] * (i - 128); 1346 1347 TRACE_DAEMON("average page usage: %f (%lu pages)\n", 1348 (float)sum / sPageUsagePageCount, sPageUsagePageCount); 1349 } 1350 } 1351 1352 1353 static int 1354 dump_page_usage_stats(int argc, char** argv) 1355 { 1356 kprintf("distribution of page usage counts (%lu pages):", 1357 sPageUsagePageCount); 1358 1359 int64 sum = 0; 1360 for (int32 i = 0; i < 256; i++) { 1361 if (i % 8 == 0) 1362 kprintf("\n%4ld:", i - 128); 1363 1364 int64 count = sPageUsage[i]; 1365 sum += count * (i - 128); 1366 1367 kprintf(" %9llu", count); 1368 } 1369 1370 kprintf("\n\n"); 1371 1372 kprintf("average usage count: %f\n", 1373 sPageUsagePageCount > 0 ? (float)sum / sPageUsagePageCount : 0); 1374 1375 return 0; 1376 } 1377 1378 #endif // TRACK_PAGE_USAGE_STATS 1379 1380 1381 // #pragma mark - vm_page 1382 1383 1384 inline void 1385 vm_page::InitState(uint8 newState) 1386 { 1387 state = newState; 1388 } 1389 1390 1391 inline void 1392 vm_page::SetState(uint8 newState) 1393 { 1394 TPS(SetPageState(this, newState)); 1395 1396 state = newState; 1397 } 1398 1399 1400 // #pragma mark - 1401 1402 1403 static void 1404 get_page_stats(page_stats& _pageStats) 1405 { 1406 _pageStats.totalFreePages = sUnreservedFreePages; 1407 _pageStats.cachedPages = sCachedPageQueue.Count(); 1408 _pageStats.unsatisfiedReservations = sUnsatisfiedPageReservations; 1409 // TODO: We don't get an actual snapshot here! 1410 } 1411 1412 1413 static bool 1414 do_active_paging(const page_stats& pageStats) 1415 { 1416 return pageStats.totalFreePages + pageStats.cachedPages 1417 < pageStats.unsatisfiedReservations 1418 + (int32)sFreeOrCachedPagesTarget; 1419 } 1420 1421 1422 /*! Reserves as many pages as possible from \c sUnreservedFreePages up to 1423 \a count. Doesn't touch the last \a dontTouch pages of 1424 \c sUnreservedFreePages, though. 1425 \return The number of actually reserved pages. 1426 */ 1427 static uint32 1428 reserve_some_pages(uint32 count, uint32 dontTouch) 1429 { 1430 while (true) { 1431 int32 freePages = sUnreservedFreePages; 1432 if (freePages <= (int32)dontTouch) 1433 return 0; 1434 1435 int32 toReserve = std::min(count, freePages - dontTouch); 1436 if (atomic_test_and_set(&sUnreservedFreePages, 1437 freePages - toReserve, freePages) 1438 == freePages) { 1439 return toReserve; 1440 } 1441 1442 // the count changed in the meantime -- retry 1443 } 1444 } 1445 1446 1447 static void 1448 wake_up_page_reservation_waiters() 1449 { 1450 MutexLocker pageDeficitLocker(sPageDeficitLock); 1451 1452 // TODO: If this is a low priority thread, we might want to disable 1453 // interrupts or otherwise ensure that we aren't unscheduled. Otherwise 1454 // high priority threads wait be kept waiting while a medium priority thread 1455 // prevents us from running. 1456 1457 while (PageReservationWaiter* waiter = sPageReservationWaiters.Head()) { 1458 int32 reserved = reserve_some_pages(waiter->missing, 1459 waiter->dontTouch); 1460 if (reserved == 0) 1461 return; 1462 1463 atomic_add(&sUnsatisfiedPageReservations, -reserved); 1464 waiter->missing -= reserved; 1465 1466 if (waiter->missing > 0) 1467 return; 1468 1469 sPageReservationWaiters.Remove(waiter); 1470 1471 InterruptsSpinLocker schedulerLocker(gSchedulerLock); 1472 thread_unblock_locked(waiter->thread, B_OK); 1473 } 1474 } 1475 1476 1477 static inline void 1478 unreserve_pages(uint32 count) 1479 { 1480 atomic_add(&sUnreservedFreePages, count); 1481 if (sUnsatisfiedPageReservations != 0) 1482 wake_up_page_reservation_waiters(); 1483 } 1484 1485 1486 static void 1487 free_page(vm_page* page, bool clear) 1488 { 1489 DEBUG_PAGE_ACCESS_CHECK(page); 1490 1491 PAGE_ASSERT(page, !page->IsMapped()); 1492 1493 VMPageQueue* fromQueue; 1494 1495 switch (page->State()) { 1496 case PAGE_STATE_ACTIVE: 1497 fromQueue = &sActivePageQueue; 1498 break; 1499 case PAGE_STATE_INACTIVE: 1500 fromQueue = &sInactivePageQueue; 1501 break; 1502 case PAGE_STATE_MODIFIED: 1503 fromQueue = &sModifiedPageQueue; 1504 break; 1505 case PAGE_STATE_CACHED: 1506 fromQueue = &sCachedPageQueue; 1507 break; 1508 case PAGE_STATE_FREE: 1509 case PAGE_STATE_CLEAR: 1510 panic("free_page(): page %p already free", page); 1511 return; 1512 case PAGE_STATE_WIRED: 1513 case PAGE_STATE_UNUSED: 1514 fromQueue = NULL; 1515 break; 1516 default: 1517 panic("free_page(): page %p in invalid state %d", 1518 page, page->State()); 1519 return; 1520 } 1521 1522 if (page->CacheRef() != NULL) 1523 panic("to be freed page %p has cache", page); 1524 if (page->IsMapped()) 1525 panic("to be freed page %p has mappings", page); 1526 1527 if (fromQueue != NULL) 1528 fromQueue->RemoveUnlocked(page); 1529 1530 TA(FreePage(page->physical_page_number)); 1531 1532 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 1533 page->allocation_tracking_info.Clear(); 1534 #endif 1535 1536 ReadLocker locker(sFreePageQueuesLock); 1537 1538 DEBUG_PAGE_ACCESS_END(page); 1539 1540 if (clear) { 1541 page->SetState(PAGE_STATE_CLEAR); 1542 sClearPageQueue.PrependUnlocked(page); 1543 } else { 1544 page->SetState(PAGE_STATE_FREE); 1545 sFreePageQueue.PrependUnlocked(page); 1546 } 1547 1548 locker.Unlock(); 1549 1550 unreserve_pages(1); 1551 } 1552 1553 1554 /*! The caller must make sure that no-one else tries to change the page's state 1555 while the function is called. If the page has a cache, this can be done by 1556 locking the cache. 1557 */ 1558 static void 1559 set_page_state(vm_page *page, int pageState) 1560 { 1561 DEBUG_PAGE_ACCESS_CHECK(page); 1562 1563 if (pageState == page->State()) 1564 return; 1565 1566 VMPageQueue* fromQueue; 1567 1568 switch (page->State()) { 1569 case PAGE_STATE_ACTIVE: 1570 fromQueue = &sActivePageQueue; 1571 break; 1572 case PAGE_STATE_INACTIVE: 1573 fromQueue = &sInactivePageQueue; 1574 break; 1575 case PAGE_STATE_MODIFIED: 1576 fromQueue = &sModifiedPageQueue; 1577 break; 1578 case PAGE_STATE_CACHED: 1579 fromQueue = &sCachedPageQueue; 1580 break; 1581 case PAGE_STATE_FREE: 1582 case PAGE_STATE_CLEAR: 1583 panic("set_page_state(): page %p is free/clear", page); 1584 return; 1585 case PAGE_STATE_WIRED: 1586 case PAGE_STATE_UNUSED: 1587 fromQueue = NULL; 1588 break; 1589 default: 1590 panic("set_page_state(): page %p in invalid state %d", 1591 page, page->State()); 1592 return; 1593 } 1594 1595 VMPageQueue* toQueue; 1596 1597 switch (pageState) { 1598 case PAGE_STATE_ACTIVE: 1599 toQueue = &sActivePageQueue; 1600 break; 1601 case PAGE_STATE_INACTIVE: 1602 toQueue = &sInactivePageQueue; 1603 break; 1604 case PAGE_STATE_MODIFIED: 1605 toQueue = &sModifiedPageQueue; 1606 break; 1607 case PAGE_STATE_CACHED: 1608 PAGE_ASSERT(page, !page->IsMapped()); 1609 PAGE_ASSERT(page, !page->modified); 1610 toQueue = &sCachedPageQueue; 1611 break; 1612 case PAGE_STATE_FREE: 1613 case PAGE_STATE_CLEAR: 1614 panic("set_page_state(): target state is free/clear"); 1615 return; 1616 case PAGE_STATE_WIRED: 1617 case PAGE_STATE_UNUSED: 1618 toQueue = NULL; 1619 break; 1620 default: 1621 panic("set_page_state(): invalid target state %d", pageState); 1622 return; 1623 } 1624 1625 VMCache* cache = page->Cache(); 1626 if (cache != NULL && cache->temporary) { 1627 if (pageState == PAGE_STATE_MODIFIED) 1628 atomic_add(&sModifiedTemporaryPages, 1); 1629 else if (page->State() == PAGE_STATE_MODIFIED) 1630 atomic_add(&sModifiedTemporaryPages, -1); 1631 } 1632 1633 // move the page 1634 if (toQueue == fromQueue) { 1635 // Note: Theoretically we are required to lock when changing the page 1636 // state, even if we don't change the queue. We actually don't have to 1637 // do this, though, since only for the active queue there are different 1638 // page states and active pages have a cache that must be locked at 1639 // this point. So we rely on the fact that everyone must lock the cache 1640 // before trying to change/interpret the page state. 1641 PAGE_ASSERT(page, cache != NULL); 1642 cache->AssertLocked(); 1643 page->SetState(pageState); 1644 } else { 1645 if (fromQueue != NULL) 1646 fromQueue->RemoveUnlocked(page); 1647 1648 page->SetState(pageState); 1649 1650 if (toQueue != NULL) 1651 toQueue->AppendUnlocked(page); 1652 } 1653 } 1654 1655 1656 /*! Moves a previously modified page into a now appropriate queue. 1657 The page queues must not be locked. 1658 */ 1659 static void 1660 move_page_to_appropriate_queue(vm_page *page) 1661 { 1662 DEBUG_PAGE_ACCESS_CHECK(page); 1663 1664 // Note, this logic must be in sync with what the page daemon does. 1665 int32 state; 1666 if (page->IsMapped()) 1667 state = PAGE_STATE_ACTIVE; 1668 else if (page->modified) 1669 state = PAGE_STATE_MODIFIED; 1670 else 1671 state = PAGE_STATE_CACHED; 1672 1673 // TODO: If free + cached pages are low, we might directly want to free the 1674 // page. 1675 set_page_state(page, state); 1676 } 1677 1678 1679 static void 1680 clear_page(struct vm_page *page) 1681 { 1682 vm_memset_physical(page->physical_page_number << PAGE_SHIFT, 0, 1683 B_PAGE_SIZE); 1684 } 1685 1686 1687 static status_t 1688 mark_page_range_in_use(page_num_t startPage, page_num_t length, bool wired) 1689 { 1690 TRACE(("mark_page_range_in_use: start %#" B_PRIxPHYSADDR ", len %#" 1691 B_PRIxPHYSADDR "\n", startPage, length)); 1692 1693 if (sPhysicalPageOffset > startPage) { 1694 dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR 1695 "): start page is before free list\n", startPage, length); 1696 if (sPhysicalPageOffset - startPage >= length) 1697 return B_OK; 1698 length -= sPhysicalPageOffset - startPage; 1699 startPage = sPhysicalPageOffset; 1700 } 1701 1702 startPage -= sPhysicalPageOffset; 1703 1704 if (startPage + length > sNumPages) { 1705 dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR 1706 "): range would extend past free list\n", startPage, length); 1707 if (startPage >= sNumPages) 1708 return B_OK; 1709 length = sNumPages - startPage; 1710 } 1711 1712 WriteLocker locker(sFreePageQueuesLock); 1713 1714 for (page_num_t i = 0; i < length; i++) { 1715 vm_page *page = &sPages[startPage + i]; 1716 switch (page->State()) { 1717 case PAGE_STATE_FREE: 1718 case PAGE_STATE_CLEAR: 1719 { 1720 // TODO: This violates the page reservation policy, since we remove pages from 1721 // the free/clear queues without having reserved them before. This should happen 1722 // in the early boot process only, though. 1723 DEBUG_PAGE_ACCESS_START(page); 1724 VMPageQueue& queue = page->State() == PAGE_STATE_FREE 1725 ? sFreePageQueue : sClearPageQueue; 1726 queue.Remove(page); 1727 page->SetState(wired ? PAGE_STATE_WIRED : PAGE_STATE_UNUSED); 1728 page->busy = false; 1729 atomic_add(&sUnreservedFreePages, -1); 1730 DEBUG_PAGE_ACCESS_END(page); 1731 break; 1732 } 1733 case PAGE_STATE_WIRED: 1734 case PAGE_STATE_UNUSED: 1735 break; 1736 case PAGE_STATE_ACTIVE: 1737 case PAGE_STATE_INACTIVE: 1738 case PAGE_STATE_MODIFIED: 1739 case PAGE_STATE_CACHED: 1740 default: 1741 // uh 1742 dprintf("mark_page_range_in_use: page %#" B_PRIxPHYSADDR 1743 " in non-free state %d!\n", startPage + i, page->State()); 1744 break; 1745 } 1746 } 1747 1748 return B_OK; 1749 } 1750 1751 1752 /*! 1753 This is a background thread that wakes up every now and then (every 100ms) 1754 and moves some pages from the free queue over to the clear queue. 1755 Given enough time, it will clear out all pages from the free queue - we 1756 could probably slow it down after having reached a certain threshold. 1757 */ 1758 static int32 1759 page_scrubber(void *unused) 1760 { 1761 (void)(unused); 1762 1763 TRACE(("page_scrubber starting...\n")); 1764 1765 for (;;) { 1766 snooze(100000); // 100ms 1767 1768 if (sFreePageQueue.Count() == 0 1769 || sUnreservedFreePages < (int32)sFreePagesTarget) { 1770 continue; 1771 } 1772 1773 // Since we temporarily remove pages from the free pages reserve, 1774 // we must make sure we don't cause a violation of the page 1775 // reservation warranty. The following is usually stricter than 1776 // necessary, because we don't have information on how many of the 1777 // reserved pages have already been allocated. 1778 int32 reserved = reserve_some_pages(SCRUB_SIZE, 1779 kPageReserveForPriority[VM_PRIORITY_USER]); 1780 if (reserved == 0) 1781 continue; 1782 1783 // get some pages from the free queue 1784 ReadLocker locker(sFreePageQueuesLock); 1785 1786 vm_page *page[SCRUB_SIZE]; 1787 int32 scrubCount = 0; 1788 for (int32 i = 0; i < reserved; i++) { 1789 page[i] = sFreePageQueue.RemoveHeadUnlocked(); 1790 if (page[i] == NULL) 1791 break; 1792 1793 DEBUG_PAGE_ACCESS_START(page[i]); 1794 1795 page[i]->SetState(PAGE_STATE_ACTIVE); 1796 page[i]->busy = true; 1797 scrubCount++; 1798 } 1799 1800 locker.Unlock(); 1801 1802 if (scrubCount == 0) { 1803 unreserve_pages(reserved); 1804 continue; 1805 } 1806 1807 TA(ScrubbingPages(scrubCount)); 1808 1809 // clear them 1810 for (int32 i = 0; i < scrubCount; i++) 1811 clear_page(page[i]); 1812 1813 locker.Lock(); 1814 1815 // and put them into the clear queue 1816 for (int32 i = 0; i < scrubCount; i++) { 1817 page[i]->SetState(PAGE_STATE_CLEAR); 1818 page[i]->busy = false; 1819 DEBUG_PAGE_ACCESS_END(page[i]); 1820 sClearPageQueue.PrependUnlocked(page[i]); 1821 } 1822 1823 locker.Unlock(); 1824 1825 unreserve_pages(reserved); 1826 1827 TA(ScrubbedPages(scrubCount)); 1828 } 1829 1830 return 0; 1831 } 1832 1833 1834 static void 1835 init_page_marker(vm_page &marker) 1836 { 1837 marker.SetCacheRef(NULL); 1838 marker.InitState(PAGE_STATE_UNUSED); 1839 marker.busy = true; 1840 #if DEBUG_PAGE_QUEUE 1841 marker.queue = NULL; 1842 #endif 1843 #if DEBUG_PAGE_ACCESS 1844 marker.accessing_thread = thread_get_current_thread_id(); 1845 #endif 1846 } 1847 1848 1849 static void 1850 remove_page_marker(struct vm_page &marker) 1851 { 1852 DEBUG_PAGE_ACCESS_CHECK(&marker); 1853 1854 if (marker.State() < PAGE_STATE_FIRST_UNQUEUED) 1855 sPageQueues[marker.State()].RemoveUnlocked(&marker); 1856 1857 marker.SetState(PAGE_STATE_UNUSED); 1858 } 1859 1860 1861 static vm_page* 1862 next_modified_page(page_num_t& maxPagesToSee) 1863 { 1864 InterruptsSpinLocker locker(sModifiedPageQueue.GetLock()); 1865 1866 while (maxPagesToSee > 0) { 1867 vm_page* page = sModifiedPageQueue.Head(); 1868 if (page == NULL) 1869 return NULL; 1870 1871 sModifiedPageQueue.Requeue(page, true); 1872 1873 maxPagesToSee--; 1874 1875 if (!page->busy) 1876 return page; 1877 } 1878 1879 return NULL; 1880 } 1881 1882 1883 // #pragma mark - 1884 1885 1886 class PageWriteTransfer; 1887 class PageWriteWrapper; 1888 1889 1890 class PageWriterRun { 1891 public: 1892 status_t Init(uint32 maxPages); 1893 1894 void PrepareNextRun(); 1895 void AddPage(vm_page* page); 1896 uint32 Go(); 1897 1898 void PageWritten(PageWriteTransfer* transfer, status_t status, 1899 bool partialTransfer, size_t bytesTransferred); 1900 1901 private: 1902 uint32 fMaxPages; 1903 uint32 fWrapperCount; 1904 uint32 fTransferCount; 1905 vint32 fPendingTransfers; 1906 PageWriteWrapper* fWrappers; 1907 PageWriteTransfer* fTransfers; 1908 ConditionVariable fAllFinishedCondition; 1909 }; 1910 1911 1912 class PageWriteTransfer : public AsyncIOCallback { 1913 public: 1914 void SetTo(PageWriterRun* run, vm_page* page, int32 maxPages); 1915 bool AddPage(vm_page* page); 1916 1917 status_t Schedule(uint32 flags); 1918 1919 void SetStatus(status_t status, size_t transferred); 1920 1921 status_t Status() const { return fStatus; } 1922 struct VMCache* Cache() const { return fCache; } 1923 uint32 PageCount() const { return fPageCount; } 1924 1925 virtual void IOFinished(status_t status, bool partialTransfer, 1926 generic_size_t bytesTransferred); 1927 private: 1928 PageWriterRun* fRun; 1929 struct VMCache* fCache; 1930 off_t fOffset; 1931 uint32 fPageCount; 1932 int32 fMaxPages; 1933 status_t fStatus; 1934 uint32 fVecCount; 1935 generic_io_vec fVecs[32]; // TODO: make dynamic/configurable 1936 }; 1937 1938 1939 class PageWriteWrapper { 1940 public: 1941 PageWriteWrapper(); 1942 ~PageWriteWrapper(); 1943 void SetTo(vm_page* page); 1944 bool Done(status_t result); 1945 1946 private: 1947 vm_page* fPage; 1948 struct VMCache* fCache; 1949 bool fIsActive; 1950 }; 1951 1952 1953 PageWriteWrapper::PageWriteWrapper() 1954 : 1955 fIsActive(false) 1956 { 1957 } 1958 1959 1960 PageWriteWrapper::~PageWriteWrapper() 1961 { 1962 if (fIsActive) 1963 panic("page write wrapper going out of scope but isn't completed"); 1964 } 1965 1966 1967 /*! The page's cache must be locked. 1968 */ 1969 void 1970 PageWriteWrapper::SetTo(vm_page* page) 1971 { 1972 DEBUG_PAGE_ACCESS_CHECK(page); 1973 1974 if (page->busy) 1975 panic("setting page write wrapper to busy page"); 1976 1977 if (fIsActive) 1978 panic("re-setting page write wrapper that isn't completed"); 1979 1980 fPage = page; 1981 fCache = page->Cache(); 1982 fIsActive = true; 1983 1984 fPage->busy = true; 1985 fPage->busy_writing = true; 1986 1987 // We have a modified page -- however, while we're writing it back, 1988 // the page might still be mapped. In order not to lose any changes to the 1989 // page, we mark it clean before actually writing it back; if 1990 // writing the page fails for some reason, we'll just keep it in the 1991 // modified page list, but that should happen only rarely. 1992 1993 // If the page is changed after we cleared the dirty flag, but before we 1994 // had the chance to write it back, then we'll write it again later -- that 1995 // will probably not happen that often, though. 1996 1997 vm_clear_map_flags(fPage, PAGE_MODIFIED); 1998 } 1999 2000 2001 /*! The page's cache must be locked. 2002 The page queues must not be locked. 2003 \return \c true if the page was written successfully respectively could be 2004 handled somehow, \c false otherwise. 2005 */ 2006 bool 2007 PageWriteWrapper::Done(status_t result) 2008 { 2009 if (!fIsActive) 2010 panic("completing page write wrapper that is not active"); 2011 2012 DEBUG_PAGE_ACCESS_START(fPage); 2013 2014 fPage->busy = false; 2015 // Set unbusy and notify later by hand, since we might free the page. 2016 2017 bool success = true; 2018 2019 if (result == B_OK) { 2020 // put it into the active/inactive queue 2021 move_page_to_appropriate_queue(fPage); 2022 fPage->busy_writing = false; 2023 DEBUG_PAGE_ACCESS_END(fPage); 2024 } else { 2025 // Writing the page failed. One reason would be that the cache has been 2026 // shrunk and the page does no longer belong to the file. Otherwise the 2027 // actual I/O failed, in which case we'll simply keep the page modified. 2028 2029 if (!fPage->busy_writing) { 2030 // The busy_writing flag was cleared. That means the cache has been 2031 // shrunk while we were trying to write the page and we have to free 2032 // it now. 2033 vm_remove_all_page_mappings(fPage); 2034 // TODO: Unmapping should already happen when resizing the cache! 2035 fCache->RemovePage(fPage); 2036 free_page(fPage, false); 2037 } else { 2038 // Writing the page failed -- mark the page modified and move it to 2039 // an appropriate queue other than the modified queue, so we don't 2040 // keep trying to write it over and over again. We keep 2041 // non-temporary pages in the modified queue, though, so they don't 2042 // get lost in the inactive queue. 2043 dprintf("PageWriteWrapper: Failed to write page %p: %s\n", fPage, 2044 strerror(result)); 2045 2046 fPage->modified = true; 2047 if (!fCache->temporary) 2048 set_page_state(fPage, PAGE_STATE_MODIFIED); 2049 else if (fPage->IsMapped()) 2050 set_page_state(fPage, PAGE_STATE_ACTIVE); 2051 else 2052 set_page_state(fPage, PAGE_STATE_INACTIVE); 2053 2054 fPage->busy_writing = false; 2055 DEBUG_PAGE_ACCESS_END(fPage); 2056 2057 success = false; 2058 } 2059 } 2060 2061 fCache->NotifyPageEvents(fPage, PAGE_EVENT_NOT_BUSY); 2062 fIsActive = false; 2063 2064 return success; 2065 } 2066 2067 2068 /*! The page's cache must be locked. 2069 */ 2070 void 2071 PageWriteTransfer::SetTo(PageWriterRun* run, vm_page* page, int32 maxPages) 2072 { 2073 fRun = run; 2074 fCache = page->Cache(); 2075 fOffset = page->cache_offset; 2076 fPageCount = 1; 2077 fMaxPages = maxPages; 2078 fStatus = B_OK; 2079 2080 fVecs[0].base = (phys_addr_t)page->physical_page_number << PAGE_SHIFT; 2081 fVecs[0].length = B_PAGE_SIZE; 2082 fVecCount = 1; 2083 } 2084 2085 2086 /*! The page's cache must be locked. 2087 */ 2088 bool 2089 PageWriteTransfer::AddPage(vm_page* page) 2090 { 2091 if (page->Cache() != fCache 2092 || (fMaxPages >= 0 && fPageCount >= (uint32)fMaxPages)) 2093 return false; 2094 2095 phys_addr_t nextBase = fVecs[fVecCount - 1].base 2096 + fVecs[fVecCount - 1].length; 2097 2098 if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase 2099 && (off_t)page->cache_offset == fOffset + fPageCount) { 2100 // append to last iovec 2101 fVecs[fVecCount - 1].length += B_PAGE_SIZE; 2102 fPageCount++; 2103 return true; 2104 } 2105 2106 nextBase = fVecs[0].base - B_PAGE_SIZE; 2107 if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase 2108 && (off_t)page->cache_offset == fOffset - 1) { 2109 // prepend to first iovec and adjust offset 2110 fVecs[0].base = nextBase; 2111 fVecs[0].length += B_PAGE_SIZE; 2112 fOffset = page->cache_offset; 2113 fPageCount++; 2114 return true; 2115 } 2116 2117 if (((off_t)page->cache_offset == fOffset + fPageCount 2118 || (off_t)page->cache_offset == fOffset - 1) 2119 && fVecCount < sizeof(fVecs) / sizeof(fVecs[0])) { 2120 // not physically contiguous or not in the right order 2121 uint32 vectorIndex; 2122 if ((off_t)page->cache_offset < fOffset) { 2123 // we are pre-pending another vector, move the other vecs 2124 for (uint32 i = fVecCount; i > 0; i--) 2125 fVecs[i] = fVecs[i - 1]; 2126 2127 fOffset = page->cache_offset; 2128 vectorIndex = 0; 2129 } else 2130 vectorIndex = fVecCount; 2131 2132 fVecs[vectorIndex].base 2133 = (phys_addr_t)page->physical_page_number << PAGE_SHIFT; 2134 fVecs[vectorIndex].length = B_PAGE_SIZE; 2135 2136 fVecCount++; 2137 fPageCount++; 2138 return true; 2139 } 2140 2141 return false; 2142 } 2143 2144 2145 status_t 2146 PageWriteTransfer::Schedule(uint32 flags) 2147 { 2148 off_t writeOffset = (off_t)fOffset << PAGE_SHIFT; 2149 generic_size_t writeLength = (phys_size_t)fPageCount << PAGE_SHIFT; 2150 2151 if (fRun != NULL) { 2152 return fCache->WriteAsync(writeOffset, fVecs, fVecCount, writeLength, 2153 flags | B_PHYSICAL_IO_REQUEST, this); 2154 } 2155 2156 status_t status = fCache->Write(writeOffset, fVecs, fVecCount, 2157 flags | B_PHYSICAL_IO_REQUEST, &writeLength); 2158 2159 SetStatus(status, writeLength); 2160 return fStatus; 2161 } 2162 2163 2164 void 2165 PageWriteTransfer::SetStatus(status_t status, size_t transferred) 2166 { 2167 // only succeed if all pages up to the last one have been written fully 2168 // and the last page has at least been written partially 2169 if (status == B_OK && transferred <= (fPageCount - 1) * B_PAGE_SIZE) 2170 status = B_ERROR; 2171 2172 fStatus = status; 2173 } 2174 2175 2176 void 2177 PageWriteTransfer::IOFinished(status_t status, bool partialTransfer, 2178 generic_size_t bytesTransferred) 2179 { 2180 SetStatus(status, bytesTransferred); 2181 fRun->PageWritten(this, fStatus, partialTransfer, bytesTransferred); 2182 } 2183 2184 2185 status_t 2186 PageWriterRun::Init(uint32 maxPages) 2187 { 2188 fMaxPages = maxPages; 2189 fWrapperCount = 0; 2190 fTransferCount = 0; 2191 fPendingTransfers = 0; 2192 2193 fWrappers = new(std::nothrow) PageWriteWrapper[maxPages]; 2194 fTransfers = new(std::nothrow) PageWriteTransfer[maxPages]; 2195 if (fWrappers == NULL || fTransfers == NULL) 2196 return B_NO_MEMORY; 2197 2198 return B_OK; 2199 } 2200 2201 2202 void 2203 PageWriterRun::PrepareNextRun() 2204 { 2205 fWrapperCount = 0; 2206 fTransferCount = 0; 2207 fPendingTransfers = 0; 2208 } 2209 2210 2211 /*! The page's cache must be locked. 2212 */ 2213 void 2214 PageWriterRun::AddPage(vm_page* page) 2215 { 2216 fWrappers[fWrapperCount++].SetTo(page); 2217 2218 if (fTransferCount == 0 || !fTransfers[fTransferCount - 1].AddPage(page)) { 2219 fTransfers[fTransferCount++].SetTo(this, page, 2220 page->Cache()->MaxPagesPerAsyncWrite()); 2221 } 2222 } 2223 2224 2225 /*! Writes all pages previously added. 2226 \return The number of pages that could not be written or otherwise handled. 2227 */ 2228 uint32 2229 PageWriterRun::Go() 2230 { 2231 fPendingTransfers = fTransferCount; 2232 2233 fAllFinishedCondition.Init(this, "page writer wait for I/O"); 2234 ConditionVariableEntry waitEntry; 2235 fAllFinishedCondition.Add(&waitEntry); 2236 2237 // schedule writes 2238 for (uint32 i = 0; i < fTransferCount; i++) 2239 fTransfers[i].Schedule(B_VIP_IO_REQUEST); 2240 2241 // wait until all pages have been written 2242 waitEntry.Wait(); 2243 2244 // mark pages depending on whether they could be written or not 2245 2246 uint32 failedPages = 0; 2247 uint32 wrapperIndex = 0; 2248 for (uint32 i = 0; i < fTransferCount; i++) { 2249 PageWriteTransfer& transfer = fTransfers[i]; 2250 transfer.Cache()->Lock(); 2251 2252 for (uint32 j = 0; j < transfer.PageCount(); j++) { 2253 if (!fWrappers[wrapperIndex++].Done(transfer.Status())) 2254 failedPages++; 2255 } 2256 2257 transfer.Cache()->Unlock(); 2258 } 2259 2260 ASSERT(wrapperIndex == fWrapperCount); 2261 2262 for (uint32 i = 0; i < fTransferCount; i++) { 2263 PageWriteTransfer& transfer = fTransfers[i]; 2264 struct VMCache* cache = transfer.Cache(); 2265 2266 // We've acquired a references for each page 2267 for (uint32 j = 0; j < transfer.PageCount(); j++) { 2268 // We release the cache references after all pages were made 2269 // unbusy again - otherwise releasing a vnode could deadlock. 2270 cache->ReleaseStoreRef(); 2271 cache->ReleaseRef(); 2272 } 2273 } 2274 2275 return failedPages; 2276 } 2277 2278 2279 void 2280 PageWriterRun::PageWritten(PageWriteTransfer* transfer, status_t status, 2281 bool partialTransfer, size_t bytesTransferred) 2282 { 2283 if (atomic_add(&fPendingTransfers, -1) == 1) 2284 fAllFinishedCondition.NotifyAll(); 2285 } 2286 2287 2288 /*! The page writer continuously takes some pages from the modified 2289 queue, writes them back, and moves them back to the active queue. 2290 It runs in its own thread, and is only there to keep the number 2291 of modified pages low, so that more pages can be reused with 2292 fewer costs. 2293 */ 2294 status_t 2295 page_writer(void* /*unused*/) 2296 { 2297 const uint32 kNumPages = 256; 2298 #ifdef TRACE_VM_PAGE 2299 uint32 writtenPages = 0; 2300 bigtime_t lastWrittenTime = 0; 2301 bigtime_t pageCollectionTime = 0; 2302 bigtime_t pageWritingTime = 0; 2303 #endif 2304 2305 PageWriterRun run; 2306 if (run.Init(kNumPages) != B_OK) { 2307 panic("page writer: Failed to init PageWriterRun!"); 2308 return B_ERROR; 2309 } 2310 2311 page_num_t pagesSinceLastSuccessfulWrite = 0; 2312 2313 while (true) { 2314 // TODO: Maybe wait shorter when memory is low! 2315 if (sModifiedPageQueue.Count() < kNumPages) { 2316 sPageWriterCondition.Wait(3000000, true); 2317 // all 3 seconds when no one triggers us 2318 } 2319 2320 page_num_t modifiedPages = sModifiedPageQueue.Count(); 2321 if (modifiedPages == 0) 2322 continue; 2323 2324 if (modifiedPages <= pagesSinceLastSuccessfulWrite) { 2325 // We ran through the whole queue without being able to write a 2326 // single page. Take a break. 2327 snooze(500000); 2328 pagesSinceLastSuccessfulWrite = 0; 2329 } 2330 2331 #if ENABLE_SWAP_SUPPORT 2332 page_stats pageStats; 2333 get_page_stats(pageStats); 2334 bool activePaging = do_active_paging(pageStats); 2335 #endif 2336 2337 // depending on how urgent it becomes to get pages to disk, we adjust 2338 // our I/O priority 2339 uint32 lowPagesState = low_resource_state(B_KERNEL_RESOURCE_PAGES); 2340 int32 ioPriority = B_IDLE_PRIORITY; 2341 if (lowPagesState >= B_LOW_RESOURCE_CRITICAL 2342 || modifiedPages > MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD) { 2343 ioPriority = MAX_PAGE_WRITER_IO_PRIORITY; 2344 } else { 2345 ioPriority = (uint64)MAX_PAGE_WRITER_IO_PRIORITY * modifiedPages 2346 / MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD; 2347 } 2348 2349 thread_set_io_priority(ioPriority); 2350 2351 uint32 numPages = 0; 2352 run.PrepareNextRun(); 2353 2354 // TODO: make this laptop friendly, too (ie. only start doing 2355 // something if someone else did something or there is really 2356 // enough to do). 2357 2358 // collect pages to be written 2359 #ifdef TRACE_VM_PAGE 2360 pageCollectionTime -= system_time(); 2361 #endif 2362 2363 page_num_t maxPagesToSee = modifiedPages; 2364 2365 while (numPages < kNumPages && maxPagesToSee > 0) { 2366 vm_page *page = next_modified_page(maxPagesToSee); 2367 if (page == NULL) 2368 break; 2369 2370 PageCacheLocker cacheLocker(page, false); 2371 if (!cacheLocker.IsLocked()) 2372 continue; 2373 2374 VMCache *cache = page->Cache(); 2375 2376 // If the page is busy or its state has changed while we were 2377 // locking the cache, just ignore it. 2378 if (page->busy || page->State() != PAGE_STATE_MODIFIED) 2379 continue; 2380 2381 DEBUG_PAGE_ACCESS_START(page); 2382 2383 // Don't write back wired (locked) pages. 2384 if (page->WiredCount() > 0) { 2385 set_page_state(page, PAGE_STATE_ACTIVE); 2386 DEBUG_PAGE_ACCESS_END(page); 2387 continue; 2388 } 2389 2390 // Write back temporary pages only when we're actively paging. 2391 if (cache->temporary 2392 #if ENABLE_SWAP_SUPPORT 2393 && (!activePaging 2394 || !cache->CanWritePage( 2395 (off_t)page->cache_offset << PAGE_SHIFT)) 2396 #endif 2397 ) { 2398 // We can't/don't want to do anything with this page, so move it 2399 // to one of the other queues. 2400 if (page->mappings.IsEmpty()) 2401 set_page_state(page, PAGE_STATE_INACTIVE); 2402 else 2403 set_page_state(page, PAGE_STATE_ACTIVE); 2404 2405 DEBUG_PAGE_ACCESS_END(page); 2406 continue; 2407 } 2408 2409 // We need our own reference to the store, as it might currently be 2410 // destroyed. 2411 if (cache->AcquireUnreferencedStoreRef() != B_OK) { 2412 DEBUG_PAGE_ACCESS_END(page); 2413 cacheLocker.Unlock(); 2414 thread_yield(true); 2415 continue; 2416 } 2417 2418 run.AddPage(page); 2419 // TODO: We're possibly adding pages of different caches and 2420 // thus maybe of different underlying file systems here. This 2421 // is a potential problem for loop file systems/devices, since 2422 // we could mark a page busy that would need to be accessed 2423 // when writing back another page, thus causing a deadlock. 2424 2425 DEBUG_PAGE_ACCESS_END(page); 2426 2427 //dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count); 2428 TPW(WritePage(page)); 2429 2430 cache->AcquireRefLocked(); 2431 numPages++; 2432 } 2433 2434 #ifdef TRACE_VM_PAGE 2435 pageCollectionTime += system_time(); 2436 #endif 2437 if (numPages == 0) 2438 continue; 2439 2440 // write pages to disk and do all the cleanup 2441 #ifdef TRACE_VM_PAGE 2442 pageWritingTime -= system_time(); 2443 #endif 2444 uint32 failedPages = run.Go(); 2445 #ifdef TRACE_VM_PAGE 2446 pageWritingTime += system_time(); 2447 2448 // debug output only... 2449 writtenPages += numPages; 2450 if (writtenPages >= 1024) { 2451 bigtime_t now = system_time(); 2452 TRACE(("page writer: wrote 1024 pages (total: %" B_PRIu64 " ms, " 2453 "collect: %" B_PRIu64 " ms, write: %" B_PRIu64 " ms)\n", 2454 (now - lastWrittenTime) / 1000, 2455 pageCollectionTime / 1000, pageWritingTime / 1000)); 2456 lastWrittenTime = now; 2457 2458 writtenPages -= 1024; 2459 pageCollectionTime = 0; 2460 pageWritingTime = 0; 2461 } 2462 #endif 2463 2464 if (failedPages == numPages) 2465 pagesSinceLastSuccessfulWrite += modifiedPages - maxPagesToSee; 2466 else 2467 pagesSinceLastSuccessfulWrite = 0; 2468 } 2469 2470 return B_OK; 2471 } 2472 2473 2474 // #pragma mark - 2475 2476 2477 // TODO: This should be done in the page daemon! 2478 #if 0 2479 #if ENABLE_SWAP_SUPPORT 2480 static bool 2481 free_page_swap_space(int32 index) 2482 { 2483 vm_page *page = vm_page_at_index(index); 2484 PageCacheLocker locker(page); 2485 if (!locker.IsLocked()) 2486 return false; 2487 2488 DEBUG_PAGE_ACCESS_START(page); 2489 2490 VMCache* cache = page->Cache(); 2491 if (cache->temporary && page->WiredCount() == 0 2492 && cache->HasPage(page->cache_offset << PAGE_SHIFT) 2493 && page->usage_count > 0) { 2494 // TODO: how to judge a page is highly active? 2495 if (swap_free_page_swap_space(page)) { 2496 // We need to mark the page modified, since otherwise it could be 2497 // stolen and we'd lose its data. 2498 vm_page_set_state(page, PAGE_STATE_MODIFIED); 2499 TD(FreedPageSwap(page)); 2500 DEBUG_PAGE_ACCESS_END(page); 2501 return true; 2502 } 2503 } 2504 DEBUG_PAGE_ACCESS_END(page); 2505 return false; 2506 } 2507 #endif 2508 #endif // 0 2509 2510 2511 static vm_page * 2512 find_cached_page_candidate(struct vm_page &marker) 2513 { 2514 DEBUG_PAGE_ACCESS_CHECK(&marker); 2515 2516 InterruptsSpinLocker locker(sCachedPageQueue.GetLock()); 2517 vm_page *page; 2518 2519 if (marker.State() == PAGE_STATE_UNUSED) { 2520 // Get the first free pages of the (in)active queue 2521 page = sCachedPageQueue.Head(); 2522 } else { 2523 // Get the next page of the current queue 2524 if (marker.State() != PAGE_STATE_CACHED) { 2525 panic("invalid marker %p state", &marker); 2526 return NULL; 2527 } 2528 2529 page = sCachedPageQueue.Next(&marker); 2530 sCachedPageQueue.Remove(&marker); 2531 marker.SetState(PAGE_STATE_UNUSED); 2532 } 2533 2534 while (page != NULL) { 2535 if (!page->busy) { 2536 // we found a candidate, insert marker 2537 marker.SetState(PAGE_STATE_CACHED); 2538 sCachedPageQueue.InsertAfter(page, &marker); 2539 return page; 2540 } 2541 2542 page = sCachedPageQueue.Next(page); 2543 } 2544 2545 return NULL; 2546 } 2547 2548 2549 static bool 2550 free_cached_page(vm_page *page, bool dontWait) 2551 { 2552 // try to lock the page's cache 2553 if (vm_cache_acquire_locked_page_cache(page, dontWait) == NULL) 2554 return false; 2555 VMCache* cache = page->Cache(); 2556 2557 AutoLocker<VMCache> cacheLocker(cache, true); 2558 MethodDeleter<VMCache> _2(cache, &VMCache::ReleaseRefLocked); 2559 2560 // check again if that page is still a candidate 2561 if (page->busy || page->State() != PAGE_STATE_CACHED) 2562 return false; 2563 2564 DEBUG_PAGE_ACCESS_START(page); 2565 2566 PAGE_ASSERT(page, !page->IsMapped()); 2567 PAGE_ASSERT(page, !page->modified); 2568 2569 // we can now steal this page 2570 2571 cache->RemovePage(page); 2572 // Now the page doesn't have cache anymore, so no one else (e.g. 2573 // vm_page_allocate_page_run() can pick it up), since they would be 2574 // required to lock the cache first, which would fail. 2575 2576 sCachedPageQueue.RemoveUnlocked(page); 2577 return true; 2578 } 2579 2580 2581 static uint32 2582 free_cached_pages(uint32 pagesToFree, bool dontWait) 2583 { 2584 vm_page marker; 2585 init_page_marker(marker); 2586 2587 uint32 pagesFreed = 0; 2588 2589 while (pagesFreed < pagesToFree) { 2590 vm_page *page = find_cached_page_candidate(marker); 2591 if (page == NULL) 2592 break; 2593 2594 if (free_cached_page(page, dontWait)) { 2595 ReadLocker locker(sFreePageQueuesLock); 2596 page->SetState(PAGE_STATE_FREE); 2597 DEBUG_PAGE_ACCESS_END(page); 2598 sFreePageQueue.PrependUnlocked(page); 2599 locker.Unlock(); 2600 2601 TA(StolenPage()); 2602 2603 pagesFreed++; 2604 } 2605 } 2606 2607 remove_page_marker(marker); 2608 2609 return pagesFreed; 2610 } 2611 2612 2613 static void 2614 idle_scan_active_pages(page_stats& pageStats) 2615 { 2616 VMPageQueue& queue = sActivePageQueue; 2617 2618 // We want to scan the whole queue in roughly kIdleRunsForFullQueue runs. 2619 uint32 maxToScan = queue.Count() / kIdleRunsForFullQueue + 1; 2620 2621 while (maxToScan > 0) { 2622 maxToScan--; 2623 2624 // Get the next page. Note that we don't bother to lock here. We go with 2625 // the assumption that on all architectures reading/writing pointers is 2626 // atomic. Beyond that it doesn't really matter. We have to unlock the 2627 // queue anyway to lock the page's cache, and we'll recheck afterwards. 2628 vm_page* page = queue.Head(); 2629 if (page == NULL) 2630 break; 2631 2632 // lock the page's cache 2633 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2634 if (cache == NULL) 2635 continue; 2636 2637 if (page->State() != PAGE_STATE_ACTIVE) { 2638 // page is no longer in the cache or in this queue 2639 cache->ReleaseRefAndUnlock(); 2640 continue; 2641 } 2642 2643 if (page->busy) { 2644 // page is busy -- requeue at the end 2645 vm_page_requeue(page, true); 2646 cache->ReleaseRefAndUnlock(); 2647 continue; 2648 } 2649 2650 DEBUG_PAGE_ACCESS_START(page); 2651 2652 // Get the page active/modified flags and update the page's usage count. 2653 // We completely unmap inactive temporary pages. This saves us to 2654 // iterate through the inactive list as well, since we'll be notified 2655 // via page fault whenever such an inactive page is used again. 2656 // We don't remove the mappings of non-temporary pages, since we 2657 // wouldn't notice when those would become unused and could thus be 2658 // moved to the cached list. 2659 int32 usageCount; 2660 if (page->WiredCount() > 0 || page->usage_count > 0 2661 || !cache->temporary) { 2662 usageCount = vm_clear_page_mapping_accessed_flags(page); 2663 } else 2664 usageCount = vm_remove_all_page_mappings_if_unaccessed(page); 2665 2666 if (usageCount > 0) { 2667 usageCount += page->usage_count + kPageUsageAdvance; 2668 if (usageCount > kPageUsageMax) 2669 usageCount = kPageUsageMax; 2670 // TODO: This would probably also be the place to reclaim swap space. 2671 } else { 2672 usageCount += page->usage_count - (int32)kPageUsageDecline; 2673 if (usageCount < 0) { 2674 usageCount = 0; 2675 set_page_state(page, PAGE_STATE_INACTIVE); 2676 } 2677 } 2678 2679 page->usage_count = usageCount; 2680 2681 DEBUG_PAGE_ACCESS_END(page); 2682 2683 cache->ReleaseRefAndUnlock(); 2684 } 2685 } 2686 2687 2688 static void 2689 full_scan_inactive_pages(page_stats& pageStats, int32 despairLevel) 2690 { 2691 int32 pagesToFree = pageStats.unsatisfiedReservations 2692 + sFreeOrCachedPagesTarget 2693 - (pageStats.totalFreePages + pageStats.cachedPages); 2694 if (pagesToFree <= 0) 2695 return; 2696 2697 bigtime_t time = system_time(); 2698 uint32 pagesScanned = 0; 2699 uint32 pagesToCached = 0; 2700 uint32 pagesToModified = 0; 2701 uint32 pagesToActive = 0; 2702 2703 // Determine how many pages at maximum to send to the modified queue. Since 2704 // it is relatively expensive to page out pages, we do that on a grander 2705 // scale only when things get desperate. 2706 uint32 maxToFlush = despairLevel <= 1 ? 32 : 10000; 2707 2708 vm_page marker; 2709 init_page_marker(marker); 2710 2711 VMPageQueue& queue = sInactivePageQueue; 2712 InterruptsSpinLocker queueLocker(queue.GetLock()); 2713 uint32 maxToScan = queue.Count(); 2714 2715 vm_page* nextPage = queue.Head(); 2716 2717 while (pagesToFree > 0 && maxToScan > 0) { 2718 maxToScan--; 2719 2720 // get the next page 2721 vm_page* page = nextPage; 2722 if (page == NULL) 2723 break; 2724 nextPage = queue.Next(page); 2725 2726 if (page->busy) 2727 continue; 2728 2729 // mark the position 2730 queue.InsertAfter(page, &marker); 2731 queueLocker.Unlock(); 2732 2733 // lock the page's cache 2734 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2735 if (cache == NULL || page->busy 2736 || page->State() != PAGE_STATE_INACTIVE) { 2737 if (cache != NULL) 2738 cache->ReleaseRefAndUnlock(); 2739 queueLocker.Lock(); 2740 nextPage = queue.Next(&marker); 2741 queue.Remove(&marker); 2742 continue; 2743 } 2744 2745 pagesScanned++; 2746 2747 DEBUG_PAGE_ACCESS_START(page); 2748 2749 // Get the accessed count, clear the accessed/modified flags and 2750 // unmap the page, if it hasn't been accessed. 2751 int32 usageCount; 2752 if (page->WiredCount() > 0) 2753 usageCount = vm_clear_page_mapping_accessed_flags(page); 2754 else 2755 usageCount = vm_remove_all_page_mappings_if_unaccessed(page); 2756 2757 // update usage count 2758 if (usageCount > 0) { 2759 usageCount += page->usage_count + kPageUsageAdvance; 2760 if (usageCount > kPageUsageMax) 2761 usageCount = kPageUsageMax; 2762 } else { 2763 usageCount += page->usage_count - (int32)kPageUsageDecline; 2764 if (usageCount < 0) 2765 usageCount = 0; 2766 } 2767 2768 page->usage_count = usageCount; 2769 2770 // Move to fitting queue or requeue: 2771 // * Active mapped pages go to the active queue. 2772 // * Inactive mapped (i.e. wired) pages are requeued. 2773 // * The remaining pages are cachable. Thus, if unmodified they go to 2774 // the cached queue, otherwise to the modified queue (up to a limit). 2775 // Note that until in the idle scanning we don't exempt pages of 2776 // temporary caches. Apparently we really need memory, so we better 2777 // page out memory as well. 2778 bool isMapped = page->IsMapped(); 2779 if (usageCount > 0) { 2780 if (isMapped) { 2781 set_page_state(page, PAGE_STATE_ACTIVE); 2782 pagesToActive++; 2783 } else 2784 vm_page_requeue(page, true); 2785 } else if (isMapped) { 2786 vm_page_requeue(page, true); 2787 } else if (!page->modified) { 2788 set_page_state(page, PAGE_STATE_CACHED); 2789 pagesToFree--; 2790 pagesToCached++; 2791 } else if (maxToFlush > 0) { 2792 set_page_state(page, PAGE_STATE_MODIFIED); 2793 maxToFlush--; 2794 pagesToModified++; 2795 } else 2796 vm_page_requeue(page, true); 2797 2798 DEBUG_PAGE_ACCESS_END(page); 2799 2800 cache->ReleaseRefAndUnlock(); 2801 2802 // remove the marker 2803 queueLocker.Lock(); 2804 nextPage = queue.Next(&marker); 2805 queue.Remove(&marker); 2806 } 2807 2808 queueLocker.Unlock(); 2809 2810 time = system_time() - time; 2811 TRACE_DAEMON(" -> inactive scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32 2812 ", moved: %" B_PRIu32 " -> cached, %" B_PRIu32 " -> modified, %" 2813 B_PRIu32 " -> active\n", time, pagesScanned, pagesToCached, 2814 pagesToModified, pagesToActive); 2815 2816 // wake up the page writer, if we tossed it some pages 2817 if (pagesToModified > 0) 2818 sPageWriterCondition.WakeUp(); 2819 } 2820 2821 2822 static void 2823 full_scan_active_pages(page_stats& pageStats, int32 despairLevel) 2824 { 2825 vm_page marker; 2826 init_page_marker(marker); 2827 2828 VMPageQueue& queue = sActivePageQueue; 2829 InterruptsSpinLocker queueLocker(queue.GetLock()); 2830 uint32 maxToScan = queue.Count(); 2831 2832 int32 pagesToDeactivate = pageStats.unsatisfiedReservations 2833 + sFreeOrCachedPagesTarget 2834 - (pageStats.totalFreePages + pageStats.cachedPages) 2835 + std::max((int32)sInactivePagesTarget - (int32)maxToScan, (int32)0); 2836 if (pagesToDeactivate <= 0) 2837 return; 2838 2839 bigtime_t time = system_time(); 2840 uint32 pagesAccessed = 0; 2841 uint32 pagesToInactive = 0; 2842 uint32 pagesScanned = 0; 2843 2844 vm_page* nextPage = queue.Head(); 2845 2846 while (pagesToDeactivate > 0 && maxToScan > 0) { 2847 maxToScan--; 2848 2849 // get the next page 2850 vm_page* page = nextPage; 2851 if (page == NULL) 2852 break; 2853 nextPage = queue.Next(page); 2854 2855 if (page->busy) 2856 continue; 2857 2858 // mark the position 2859 queue.InsertAfter(page, &marker); 2860 queueLocker.Unlock(); 2861 2862 // lock the page's cache 2863 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2864 if (cache == NULL || page->busy || page->State() != PAGE_STATE_ACTIVE) { 2865 if (cache != NULL) 2866 cache->ReleaseRefAndUnlock(); 2867 queueLocker.Lock(); 2868 nextPage = queue.Next(&marker); 2869 queue.Remove(&marker); 2870 continue; 2871 } 2872 2873 pagesScanned++; 2874 2875 DEBUG_PAGE_ACCESS_START(page); 2876 2877 // Get the page active/modified flags and update the page's usage count. 2878 int32 usageCount = vm_clear_page_mapping_accessed_flags(page); 2879 2880 if (usageCount > 0) { 2881 usageCount += page->usage_count + kPageUsageAdvance; 2882 if (usageCount > kPageUsageMax) 2883 usageCount = kPageUsageMax; 2884 pagesAccessed++; 2885 // TODO: This would probably also be the place to reclaim swap space. 2886 } else { 2887 usageCount += page->usage_count - (int32)kPageUsageDecline; 2888 if (usageCount <= 0) { 2889 usageCount = 0; 2890 set_page_state(page, PAGE_STATE_INACTIVE); 2891 pagesToInactive++; 2892 } 2893 } 2894 2895 page->usage_count = usageCount; 2896 2897 DEBUG_PAGE_ACCESS_END(page); 2898 2899 cache->ReleaseRefAndUnlock(); 2900 2901 // remove the marker 2902 queueLocker.Lock(); 2903 nextPage = queue.Next(&marker); 2904 queue.Remove(&marker); 2905 } 2906 2907 time = system_time() - time; 2908 TRACE_DAEMON(" -> active scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32 2909 ", moved: %" B_PRIu32 " -> inactive, encountered %" B_PRIu32 " accessed" 2910 " ones\n", time, pagesScanned, pagesToInactive, pagesAccessed); 2911 } 2912 2913 2914 static void 2915 page_daemon_idle_scan(page_stats& pageStats) 2916 { 2917 TRACE_DAEMON("page daemon: idle run\n"); 2918 2919 if (pageStats.totalFreePages < (int32)sFreePagesTarget) { 2920 // We want more actually free pages, so free some from the cached 2921 // ones. 2922 uint32 freed = free_cached_pages( 2923 sFreePagesTarget - pageStats.totalFreePages, false); 2924 if (freed > 0) 2925 unreserve_pages(freed); 2926 get_page_stats(pageStats); 2927 } 2928 2929 // Walk the active list and move pages to the inactive queue. 2930 get_page_stats(pageStats); 2931 idle_scan_active_pages(pageStats); 2932 } 2933 2934 2935 static void 2936 page_daemon_full_scan(page_stats& pageStats, int32 despairLevel) 2937 { 2938 TRACE_DAEMON("page daemon: full run: free: %" B_PRIu32 ", cached: %" 2939 B_PRIu32 ", to free: %" B_PRIu32 "\n", pageStats.totalFreePages, 2940 pageStats.cachedPages, pageStats.unsatisfiedReservations 2941 + sFreeOrCachedPagesTarget 2942 - (pageStats.totalFreePages + pageStats.cachedPages)); 2943 2944 // Walk the inactive list and transfer pages to the cached and modified 2945 // queues. 2946 full_scan_inactive_pages(pageStats, despairLevel); 2947 2948 // Free cached pages. Also wake up reservation waiters. 2949 get_page_stats(pageStats); 2950 int32 pagesToFree = pageStats.unsatisfiedReservations + sFreePagesTarget 2951 - (pageStats.totalFreePages); 2952 if (pagesToFree > 0) { 2953 uint32 freed = free_cached_pages(pagesToFree, true); 2954 if (freed > 0) 2955 unreserve_pages(freed); 2956 } 2957 2958 // Walk the active list and move pages to the inactive queue. 2959 get_page_stats(pageStats); 2960 full_scan_active_pages(pageStats, despairLevel); 2961 } 2962 2963 2964 static status_t 2965 page_daemon(void* /*unused*/) 2966 { 2967 int32 despairLevel = 0; 2968 2969 while (true) { 2970 sPageDaemonCondition.ClearActivated(); 2971 2972 // evaluate the free pages situation 2973 page_stats pageStats; 2974 get_page_stats(pageStats); 2975 2976 if (!do_active_paging(pageStats)) { 2977 // Things look good -- just maintain statistics and keep the pool 2978 // of actually free pages full enough. 2979 despairLevel = 0; 2980 page_daemon_idle_scan(pageStats); 2981 sPageDaemonCondition.Wait(kIdleScanWaitInterval, false); 2982 } else { 2983 // Not enough free pages. We need to do some real work. 2984 despairLevel = std::max(despairLevel + 1, (int32)3); 2985 page_daemon_full_scan(pageStats, despairLevel); 2986 2987 // Don't wait after the first full scan, but rather immediately 2988 // check whether we were successful in freeing enough pages and 2989 // re-run with increased despair level. The first scan is 2990 // conservative with respect to moving inactive modified pages to 2991 // the modified list to avoid thrashing. The second scan, however, 2992 // will not hold back. 2993 if (despairLevel > 1) 2994 snooze(kBusyScanWaitInterval); 2995 } 2996 } 2997 2998 return B_OK; 2999 } 3000 3001 3002 /*! Returns how many pages could *not* be reserved. 3003 */ 3004 static uint32 3005 reserve_pages(uint32 count, int priority, bool dontWait) 3006 { 3007 int32 dontTouch = kPageReserveForPriority[priority]; 3008 3009 while (true) { 3010 count -= reserve_some_pages(count, dontTouch); 3011 if (count == 0) 3012 return 0; 3013 3014 if (sUnsatisfiedPageReservations == 0) { 3015 count -= free_cached_pages(count, dontWait); 3016 if (count == 0) 3017 return count; 3018 } 3019 3020 if (dontWait) 3021 return count; 3022 3023 // we need to wait for pages to become available 3024 3025 MutexLocker pageDeficitLocker(sPageDeficitLock); 3026 3027 bool notifyDaemon = sUnsatisfiedPageReservations == 0; 3028 sUnsatisfiedPageReservations += count; 3029 3030 if (sUnreservedFreePages > dontTouch) { 3031 // the situation changed 3032 sUnsatisfiedPageReservations -= count; 3033 continue; 3034 } 3035 3036 PageReservationWaiter waiter; 3037 waiter.dontTouch = dontTouch; 3038 waiter.missing = count; 3039 waiter.thread = thread_get_current_thread(); 3040 waiter.threadPriority = waiter.thread->priority; 3041 3042 // insert ordered (i.e. after all waiters with higher or equal priority) 3043 PageReservationWaiter* otherWaiter = NULL; 3044 for (PageReservationWaiterList::Iterator it 3045 = sPageReservationWaiters.GetIterator(); 3046 (otherWaiter = it.Next()) != NULL;) { 3047 if (waiter < *otherWaiter) 3048 break; 3049 } 3050 3051 sPageReservationWaiters.InsertBefore(otherWaiter, &waiter); 3052 3053 thread_prepare_to_block(waiter.thread, 0, THREAD_BLOCK_TYPE_OTHER, 3054 "waiting for pages"); 3055 3056 if (notifyDaemon) 3057 sPageDaemonCondition.WakeUp(); 3058 3059 pageDeficitLocker.Unlock(); 3060 3061 low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0); 3062 thread_block(); 3063 3064 pageDeficitLocker.Lock(); 3065 3066 return 0; 3067 } 3068 } 3069 3070 3071 // #pragma mark - private kernel API 3072 3073 3074 /*! Writes a range of modified pages of a cache to disk. 3075 You need to hold the VMCache lock when calling this function. 3076 Note that the cache lock is released in this function. 3077 \param cache The cache. 3078 \param firstPage Offset (in page size units) of the first page in the range. 3079 \param endPage End offset (in page size units) of the page range. The page 3080 at this offset is not included. 3081 */ 3082 status_t 3083 vm_page_write_modified_page_range(struct VMCache* cache, uint32 firstPage, 3084 uint32 endPage) 3085 { 3086 static const int32 kMaxPages = 256; 3087 int32 maxPages = cache->MaxPagesPerWrite(); 3088 if (maxPages < 0 || maxPages > kMaxPages) 3089 maxPages = kMaxPages; 3090 3091 const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 3092 | HEAP_DONT_LOCK_KERNEL_SPACE; 3093 3094 PageWriteWrapper stackWrappers[2]; 3095 PageWriteWrapper* wrapperPool 3096 = new(malloc_flags(allocationFlags)) PageWriteWrapper[maxPages + 1]; 3097 if (wrapperPool == NULL) { 3098 // don't fail, just limit our capabilities 3099 wrapperPool = stackWrappers; 3100 maxPages = 1; 3101 } 3102 3103 int32 nextWrapper = 0; 3104 3105 PageWriteWrapper* wrappers[maxPages]; 3106 int32 usedWrappers = 0; 3107 3108 PageWriteTransfer transfer; 3109 bool transferEmpty = true; 3110 3111 VMCachePagesTree::Iterator it 3112 = cache->pages.GetIterator(firstPage, true, true); 3113 3114 while (true) { 3115 vm_page* page = it.Next(); 3116 if (page == NULL || page->cache_offset >= endPage) { 3117 if (transferEmpty) 3118 break; 3119 3120 page = NULL; 3121 } 3122 3123 if (page != NULL) { 3124 if (page->busy 3125 || (page->State() != PAGE_STATE_MODIFIED 3126 && !vm_test_map_modification(page))) { 3127 page = NULL; 3128 } 3129 } 3130 3131 PageWriteWrapper* wrapper = NULL; 3132 if (page != NULL) { 3133 wrapper = &wrapperPool[nextWrapper++]; 3134 if (nextWrapper > maxPages) 3135 nextWrapper = 0; 3136 3137 DEBUG_PAGE_ACCESS_START(page); 3138 3139 wrapper->SetTo(page); 3140 3141 if (transferEmpty || transfer.AddPage(page)) { 3142 if (transferEmpty) { 3143 transfer.SetTo(NULL, page, maxPages); 3144 transferEmpty = false; 3145 } 3146 3147 DEBUG_PAGE_ACCESS_END(page); 3148 3149 wrappers[usedWrappers++] = wrapper; 3150 continue; 3151 } 3152 3153 DEBUG_PAGE_ACCESS_END(page); 3154 } 3155 3156 if (transferEmpty) 3157 continue; 3158 3159 cache->Unlock(); 3160 status_t status = transfer.Schedule(0); 3161 cache->Lock(); 3162 3163 for (int32 i = 0; i < usedWrappers; i++) 3164 wrappers[i]->Done(status); 3165 3166 usedWrappers = 0; 3167 3168 if (page != NULL) { 3169 transfer.SetTo(NULL, page, maxPages); 3170 wrappers[usedWrappers++] = wrapper; 3171 } else 3172 transferEmpty = true; 3173 } 3174 3175 if (wrapperPool != stackWrappers) 3176 delete[] wrapperPool; 3177 3178 return B_OK; 3179 } 3180 3181 3182 /*! You need to hold the VMCache lock when calling this function. 3183 Note that the cache lock is released in this function. 3184 */ 3185 status_t 3186 vm_page_write_modified_pages(VMCache *cache) 3187 { 3188 return vm_page_write_modified_page_range(cache, 0, 3189 (cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 3190 } 3191 3192 3193 /*! Schedules the page writer to write back the specified \a page. 3194 Note, however, that it might not do this immediately, and it can well 3195 take several seconds until the page is actually written out. 3196 */ 3197 void 3198 vm_page_schedule_write_page(vm_page *page) 3199 { 3200 PAGE_ASSERT(page, page->State() == PAGE_STATE_MODIFIED); 3201 3202 vm_page_requeue(page, false); 3203 3204 sPageWriterCondition.WakeUp(); 3205 } 3206 3207 3208 /*! Cache must be locked. 3209 */ 3210 void 3211 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage, 3212 uint32 endPage) 3213 { 3214 uint32 modified = 0; 3215 for (VMCachePagesTree::Iterator it 3216 = cache->pages.GetIterator(firstPage, true, true); 3217 vm_page *page = it.Next();) { 3218 if (page->cache_offset >= endPage) 3219 break; 3220 3221 if (!page->busy && page->State() == PAGE_STATE_MODIFIED) { 3222 DEBUG_PAGE_ACCESS_START(page); 3223 vm_page_requeue(page, false); 3224 modified++; 3225 DEBUG_PAGE_ACCESS_END(page); 3226 } 3227 } 3228 3229 if (modified > 0) 3230 sPageWriterCondition.WakeUp(); 3231 } 3232 3233 3234 void 3235 vm_page_init_num_pages(kernel_args *args) 3236 { 3237 // calculate the size of memory by looking at the physical_memory_range array 3238 sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE; 3239 page_num_t physicalPagesEnd = sPhysicalPageOffset 3240 + args->physical_memory_range[0].size / B_PAGE_SIZE; 3241 3242 sNonExistingPages = 0; 3243 sIgnoredPages = args->ignored_physical_memory / B_PAGE_SIZE; 3244 3245 for (uint32 i = 1; i < args->num_physical_memory_ranges; i++) { 3246 page_num_t start = args->physical_memory_range[i].start / B_PAGE_SIZE; 3247 if (start > physicalPagesEnd) 3248 sNonExistingPages += start - physicalPagesEnd; 3249 physicalPagesEnd = start 3250 + args->physical_memory_range[i].size / B_PAGE_SIZE; 3251 3252 #ifdef LIMIT_AVAILABLE_MEMORY 3253 page_num_t available 3254 = physicalPagesEnd - sPhysicalPageOffset - sNonExistingPages; 3255 if (available > LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE)) { 3256 physicalPagesEnd = sPhysicalPageOffset + sNonExistingPages 3257 + LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE); 3258 break; 3259 } 3260 #endif 3261 } 3262 3263 TRACE(("first phys page = %#" B_PRIxPHYSADDR ", end %#" B_PRIxPHYSADDR "\n", 3264 sPhysicalPageOffset, physicalPagesEnd)); 3265 3266 sNumPages = physicalPagesEnd - sPhysicalPageOffset; 3267 } 3268 3269 3270 status_t 3271 vm_page_init(kernel_args *args) 3272 { 3273 TRACE(("vm_page_init: entry\n")); 3274 3275 // init page queues 3276 sModifiedPageQueue.Init("modified pages queue"); 3277 sInactivePageQueue.Init("inactive pages queue"); 3278 sActivePageQueue.Init("active pages queue"); 3279 sCachedPageQueue.Init("cached pages queue"); 3280 sFreePageQueue.Init("free pages queue"); 3281 sClearPageQueue.Init("clear pages queue"); 3282 3283 new (&sPageReservationWaiters) PageReservationWaiterList; 3284 3285 // map in the new free page table 3286 sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page), 3287 ~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 3288 3289 TRACE(("vm_init: putting free_page_table @ %p, # ents %" B_PRIuPHYSADDR 3290 " (size %#" B_PRIxPHYSADDR ")\n", sPages, sNumPages, 3291 (phys_addr_t)(sNumPages * sizeof(vm_page)))); 3292 3293 // initialize the free page table 3294 for (uint32 i = 0; i < sNumPages; i++) { 3295 sPages[i].Init(sPhysicalPageOffset + i); 3296 sFreePageQueue.Append(&sPages[i]); 3297 3298 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3299 sPages[i].allocation_tracking_info.Clear(); 3300 #endif 3301 } 3302 3303 sUnreservedFreePages = sNumPages; 3304 3305 TRACE(("initialized table\n")); 3306 3307 // mark the ranges between usable physical memory unused 3308 phys_addr_t previousEnd = 0; 3309 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 3310 phys_addr_t base = args->physical_memory_range[i].start; 3311 phys_size_t size = args->physical_memory_range[i].size; 3312 if (base > previousEnd) { 3313 mark_page_range_in_use(previousEnd / B_PAGE_SIZE, 3314 (base - previousEnd) / B_PAGE_SIZE, false); 3315 } 3316 previousEnd = base + size; 3317 } 3318 3319 // mark the allocated physical page ranges wired 3320 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 3321 mark_page_range_in_use( 3322 args->physical_allocated_range[i].start / B_PAGE_SIZE, 3323 args->physical_allocated_range[i].size / B_PAGE_SIZE, true); 3324 } 3325 3326 // The target of actually free pages. This must be at least the system 3327 // reserve, but should be a few more pages, so we don't have to extract 3328 // a cached page with each allocation. 3329 sFreePagesTarget = VM_PAGE_RESERVE_USER 3330 + std::max((page_num_t)32, (sNumPages - sNonExistingPages) / 1024); 3331 3332 // The target of free + cached and inactive pages. On low-memory machines 3333 // keep things tight. free + cached is the pool of immediately allocatable 3334 // pages. We want a few inactive pages, so when we're actually paging, we 3335 // have a reasonably large set of pages to work with. 3336 if (sUnreservedFreePages < 16 * 1024) { 3337 sFreeOrCachedPagesTarget = sFreePagesTarget + 128; 3338 sInactivePagesTarget = sFreePagesTarget / 3; 3339 } else { 3340 sFreeOrCachedPagesTarget = 2 * sFreePagesTarget; 3341 sInactivePagesTarget = sFreePagesTarget / 2; 3342 } 3343 3344 TRACE(("vm_page_init: exit\n")); 3345 3346 return B_OK; 3347 } 3348 3349 3350 status_t 3351 vm_page_init_post_area(kernel_args *args) 3352 { 3353 void *dummy; 3354 3355 dummy = sPages; 3356 create_area("page structures", &dummy, B_EXACT_ADDRESS, 3357 PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED, 3358 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3359 3360 add_debugger_command("page_stats", &dump_page_stats, 3361 "Dump statistics about page usage"); 3362 add_debugger_command_etc("page", &dump_page, 3363 "Dump page info", 3364 "[ \"-p\" | \"-v\" ] [ \"-m\" ] <address>\n" 3365 "Prints information for the physical page. If neither \"-p\" nor\n" 3366 "\"-v\" are given, the provided address is interpreted as address of\n" 3367 "the vm_page data structure for the page in question. If \"-p\" is\n" 3368 "given, the address is the physical address of the page. If \"-v\" is\n" 3369 "given, the address is interpreted as virtual address in the current\n" 3370 "thread's address space and for the page it is mapped to (if any)\n" 3371 "information are printed. If \"-m\" is specified, the command will\n" 3372 "search all known address spaces for mappings to that page and print\n" 3373 "them.\n", 0); 3374 add_debugger_command("page_queue", &dump_page_queue, "Dump page queue"); 3375 add_debugger_command("find_page", &find_page, 3376 "Find out which queue a page is actually in"); 3377 3378 #ifdef TRACK_PAGE_USAGE_STATS 3379 add_debugger_command_etc("page_usage", &dump_page_usage_stats, 3380 "Dumps statistics about page usage counts", 3381 "\n" 3382 "Dumps statistics about page usage counts.\n", 3383 B_KDEBUG_DONT_PARSE_ARGUMENTS); 3384 #endif 3385 3386 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3387 add_debugger_command_etc("page_allocations_per_caller", 3388 &dump_page_allocations_per_caller, 3389 "Dump current page allocations summed up per caller", 3390 "[ -d <caller> ] [ -r ]\n" 3391 "The current allocations will by summed up by caller (their count)\n" 3392 "printed in decreasing order by count.\n" 3393 "If \"-d\" is given, each allocation for caller <caller> is printed\n" 3394 "including the respective stack trace.\n" 3395 "If \"-r\" is given, the allocation infos are reset after gathering\n" 3396 "the information, so the next command invocation will only show the\n" 3397 "allocations made after the reset.\n", 0); 3398 add_debugger_command_etc("page_allocation_infos", 3399 &dump_page_allocation_infos, 3400 "Dump current page allocations", 3401 "[ --stacktrace ] [ -p <page number> ] [ --team <team ID> ] " 3402 "[ --thread <thread ID> ]\n" 3403 "The current allocations filtered by optional values will be printed.\n" 3404 "The optional \"-p\" page number filters for a specific page,\n" 3405 "with \"--team\" and \"--thread\" allocations by specific teams\n" 3406 "and/or threads can be filtered (these only work if a corresponding\n" 3407 "tracing entry is still available).\n" 3408 "If \"--stacktrace\" is given, then stack traces of the allocation\n" 3409 "callers are printed, where available\n", 0); 3410 #endif 3411 3412 return B_OK; 3413 } 3414 3415 3416 status_t 3417 vm_page_init_post_thread(kernel_args *args) 3418 { 3419 new (&sFreePageCondition) ConditionVariable; 3420 sFreePageCondition.Publish(&sFreePageQueue, "free page"); 3421 3422 // create a kernel thread to clear out pages 3423 3424 thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber", 3425 B_LOWEST_ACTIVE_PRIORITY, NULL); 3426 resume_thread(thread); 3427 3428 // start page writer 3429 3430 sPageWriterCondition.Init("page writer"); 3431 3432 thread = spawn_kernel_thread(&page_writer, "page writer", 3433 B_NORMAL_PRIORITY + 1, NULL); 3434 resume_thread(thread); 3435 3436 // start page daemon 3437 3438 sPageDaemonCondition.Init("page daemon"); 3439 3440 thread = spawn_kernel_thread(&page_daemon, "page daemon", 3441 B_NORMAL_PRIORITY, NULL); 3442 resume_thread(thread); 3443 3444 return B_OK; 3445 } 3446 3447 3448 status_t 3449 vm_mark_page_inuse(page_num_t page) 3450 { 3451 return vm_mark_page_range_inuse(page, 1); 3452 } 3453 3454 3455 status_t 3456 vm_mark_page_range_inuse(page_num_t startPage, page_num_t length) 3457 { 3458 return mark_page_range_in_use(startPage, length, false); 3459 } 3460 3461 3462 /*! Unreserve pages previously reserved with vm_page_reserve_pages(). 3463 */ 3464 void 3465 vm_page_unreserve_pages(vm_page_reservation* reservation) 3466 { 3467 uint32 count = reservation->count; 3468 reservation->count = 0; 3469 3470 if (count == 0) 3471 return; 3472 3473 TA(UnreservePages(count)); 3474 3475 unreserve_pages(count); 3476 } 3477 3478 3479 /*! With this call, you can reserve a number of free pages in the system. 3480 They will only be handed out to someone who has actually reserved them. 3481 This call returns as soon as the number of requested pages has been 3482 reached. 3483 The caller must not hold any cache lock or the function might deadlock. 3484 */ 3485 void 3486 vm_page_reserve_pages(vm_page_reservation* reservation, uint32 count, 3487 int priority) 3488 { 3489 reservation->count = count; 3490 3491 if (count == 0) 3492 return; 3493 3494 TA(ReservePages(count)); 3495 3496 reserve_pages(count, priority, false); 3497 } 3498 3499 3500 bool 3501 vm_page_try_reserve_pages(vm_page_reservation* reservation, uint32 count, 3502 int priority) 3503 { 3504 if (count == 0) { 3505 reservation->count = count; 3506 return true; 3507 } 3508 3509 uint32 remaining = reserve_pages(count, priority, true); 3510 if (remaining == 0) { 3511 TA(ReservePages(count)); 3512 reservation->count = count; 3513 return true; 3514 } 3515 3516 unreserve_pages(count - remaining); 3517 3518 return false; 3519 } 3520 3521 3522 vm_page * 3523 vm_page_allocate_page(vm_page_reservation* reservation, uint32 flags) 3524 { 3525 uint32 pageState = flags & VM_PAGE_ALLOC_STATE; 3526 ASSERT(pageState != PAGE_STATE_FREE); 3527 ASSERT(pageState != PAGE_STATE_CLEAR); 3528 3529 ASSERT(reservation->count > 0); 3530 reservation->count--; 3531 3532 VMPageQueue* queue; 3533 VMPageQueue* otherQueue; 3534 3535 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) { 3536 queue = &sClearPageQueue; 3537 otherQueue = &sFreePageQueue; 3538 } else { 3539 queue = &sFreePageQueue; 3540 otherQueue = &sClearPageQueue; 3541 } 3542 3543 ReadLocker locker(sFreePageQueuesLock); 3544 3545 vm_page* page = queue->RemoveHeadUnlocked(); 3546 if (page == NULL) { 3547 // if the primary queue was empty, grab the page from the 3548 // secondary queue 3549 page = otherQueue->RemoveHeadUnlocked(); 3550 3551 if (page == NULL) { 3552 // Unlikely, but possible: the page we have reserved has moved 3553 // between the queues after we checked the first queue. Grab the 3554 // write locker to make sure this doesn't happen again. 3555 locker.Unlock(); 3556 WriteLocker writeLocker(sFreePageQueuesLock); 3557 3558 page = queue->RemoveHead(); 3559 if (page == NULL) 3560 otherQueue->RemoveHead(); 3561 3562 if (page == NULL) { 3563 panic("Had reserved page, but there is none!"); 3564 return NULL; 3565 } 3566 3567 // downgrade to read lock 3568 locker.Lock(); 3569 } 3570 } 3571 3572 if (page->CacheRef() != NULL) 3573 panic("supposed to be free page %p has cache\n", page); 3574 3575 DEBUG_PAGE_ACCESS_START(page); 3576 3577 int oldPageState = page->State(); 3578 page->SetState(pageState); 3579 page->busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3580 page->usage_count = 0; 3581 page->accessed = false; 3582 page->modified = false; 3583 3584 locker.Unlock(); 3585 3586 if (pageState < PAGE_STATE_FIRST_UNQUEUED) 3587 sPageQueues[pageState].AppendUnlocked(page); 3588 3589 // clear the page, if we had to take it from the free queue and a clear 3590 // page was requested 3591 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0 && oldPageState != PAGE_STATE_CLEAR) 3592 clear_page(page); 3593 3594 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3595 page->allocation_tracking_info.Init( 3596 TA(AllocatePage(page->physical_page_number))); 3597 #else 3598 TA(AllocatePage(page->physical_page_number)); 3599 #endif 3600 3601 return page; 3602 } 3603 3604 3605 static void 3606 allocate_page_run_cleanup(VMPageQueue::PageList& freePages, 3607 VMPageQueue::PageList& clearPages) 3608 { 3609 while (vm_page* page = freePages.RemoveHead()) { 3610 page->busy = false; 3611 page->SetState(PAGE_STATE_FREE); 3612 DEBUG_PAGE_ACCESS_END(page); 3613 sFreePageQueue.PrependUnlocked(page); 3614 } 3615 3616 while (vm_page* page = clearPages.RemoveHead()) { 3617 page->busy = false; 3618 page->SetState(PAGE_STATE_CLEAR); 3619 DEBUG_PAGE_ACCESS_END(page); 3620 sClearPageQueue.PrependUnlocked(page); 3621 } 3622 } 3623 3624 3625 /*! Tries to allocate the a contiguous run of \a length pages starting at 3626 index \a start. 3627 3628 The caller must have write-locked the free/clear page queues. The function 3629 will unlock regardless of whether it succeeds or fails. 3630 3631 If the function fails, it cleans up after itself, i.e. it will free all 3632 pages it managed to allocate. 3633 3634 \param start The start index (into \c sPages) of the run. 3635 \param length The number of pages to allocate. 3636 \param flags Page allocation flags. Encodes the state the function shall 3637 set the allocated pages to, whether the pages shall be marked busy 3638 (VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared 3639 (VM_PAGE_ALLOC_CLEAR). 3640 \param freeClearQueueLocker Locked WriteLocker for the free/clear page 3641 queues in locked state. Will be unlocked by the function. 3642 \return The index of the first page that could not be allocated. \a length 3643 is returned when the function was successful. 3644 */ 3645 static page_num_t 3646 allocate_page_run(page_num_t start, page_num_t length, uint32 flags, 3647 WriteLocker& freeClearQueueLocker) 3648 { 3649 uint32 pageState = flags & VM_PAGE_ALLOC_STATE; 3650 ASSERT(pageState != PAGE_STATE_FREE); 3651 ASSERT(pageState != PAGE_STATE_CLEAR); 3652 ASSERT(start + length <= sNumPages); 3653 3654 // Pull the free/clear pages out of their respective queues. Cached pages 3655 // are allocated later. 3656 page_num_t cachedPages = 0; 3657 VMPageQueue::PageList freePages; 3658 VMPageQueue::PageList clearPages; 3659 page_num_t i = 0; 3660 for (; i < length; i++) { 3661 bool pageAllocated = true; 3662 bool noPage = false; 3663 vm_page& page = sPages[start + i]; 3664 switch (page.State()) { 3665 case PAGE_STATE_CLEAR: 3666 DEBUG_PAGE_ACCESS_START(&page); 3667 sClearPageQueue.Remove(&page); 3668 clearPages.Add(&page); 3669 break; 3670 case PAGE_STATE_FREE: 3671 DEBUG_PAGE_ACCESS_START(&page); 3672 sFreePageQueue.Remove(&page); 3673 freePages.Add(&page); 3674 break; 3675 case PAGE_STATE_CACHED: 3676 // We allocate cached pages later. 3677 cachedPages++; 3678 pageAllocated = false; 3679 break; 3680 3681 default: 3682 // Probably a page was cached when our caller checked. Now it's 3683 // gone and we have to abort. 3684 noPage = true; 3685 break; 3686 } 3687 3688 if (noPage) 3689 break; 3690 3691 if (pageAllocated) { 3692 page.SetState(flags & VM_PAGE_ALLOC_STATE); 3693 page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3694 page.usage_count = 0; 3695 page.accessed = false; 3696 page.modified = false; 3697 } 3698 } 3699 3700 if (i < length) { 3701 // failed to allocate a page -- free all that we've got 3702 allocate_page_run_cleanup(freePages, clearPages); 3703 return i; 3704 } 3705 3706 freeClearQueueLocker.Unlock(); 3707 3708 if (cachedPages > 0) { 3709 // allocate the pages that weren't free but cached 3710 page_num_t freedCachedPages = 0; 3711 page_num_t nextIndex = start; 3712 vm_page* freePage = freePages.Head(); 3713 vm_page* clearPage = clearPages.Head(); 3714 while (cachedPages > 0) { 3715 // skip, if we've already got the page 3716 if (freePage != NULL && size_t(freePage - sPages) == nextIndex) { 3717 freePage = freePages.GetNext(freePage); 3718 nextIndex++; 3719 continue; 3720 } 3721 if (clearPage != NULL && size_t(clearPage - sPages) == nextIndex) { 3722 clearPage = clearPages.GetNext(clearPage); 3723 nextIndex++; 3724 continue; 3725 } 3726 3727 // free the page, if it is still cached 3728 vm_page& page = sPages[nextIndex]; 3729 if (!free_cached_page(&page, false)) { 3730 // TODO: if the page turns out to have been freed already, 3731 // there would be no need to fail 3732 break; 3733 } 3734 3735 page.SetState(flags & VM_PAGE_ALLOC_STATE); 3736 page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3737 page.usage_count = 0; 3738 page.accessed = false; 3739 page.modified = false; 3740 3741 freePages.InsertBefore(freePage, &page); 3742 freedCachedPages++; 3743 cachedPages--; 3744 nextIndex++; 3745 } 3746 3747 // If we have freed cached pages, we need to balance things. 3748 if (freedCachedPages > 0) 3749 unreserve_pages(freedCachedPages); 3750 3751 if (nextIndex - start < length) { 3752 // failed to allocate all cached pages -- free all that we've got 3753 freeClearQueueLocker.Lock(); 3754 allocate_page_run_cleanup(freePages, clearPages); 3755 freeClearQueueLocker.Unlock(); 3756 3757 return nextIndex - start; 3758 } 3759 } 3760 3761 // clear pages, if requested 3762 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) { 3763 for (VMPageQueue::PageList::Iterator it = freePages.GetIterator(); 3764 vm_page* page = it.Next();) { 3765 clear_page(page); 3766 } 3767 } 3768 3769 // add pages to target queue 3770 if (pageState < PAGE_STATE_FIRST_UNQUEUED) { 3771 freePages.MoveFrom(&clearPages); 3772 sPageQueues[pageState].AppendUnlocked(freePages, length); 3773 } 3774 3775 // Note: We don't unreserve the pages since we pulled them out of the 3776 // free/clear queues without adjusting sUnreservedFreePages. 3777 3778 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3779 AbstractTraceEntryWithStackTrace* traceEntry 3780 = TA(AllocatePageRun(start, length)); 3781 3782 for (page_num_t i = start; i < start + length; i++) 3783 sPages[i].allocation_tracking_info.Init(traceEntry); 3784 #else 3785 TA(AllocatePageRun(start, length)); 3786 #endif 3787 3788 return length; 3789 } 3790 3791 3792 /*! Allocate a physically contiguous range of pages. 3793 3794 \param flags Page allocation flags. Encodes the state the function shall 3795 set the allocated pages to, whether the pages shall be marked busy 3796 (VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared 3797 (VM_PAGE_ALLOC_CLEAR). 3798 \param length The number of contiguous pages to allocate. 3799 \param restrictions Restrictions to the physical addresses of the page run 3800 to allocate, including \c low_address, the first acceptable physical 3801 address where the page run may start, \c high_address, the last 3802 acceptable physical address where the page run may end (i.e. it must 3803 hold \code runStartAddress + length <= high_address \endcode), 3804 \c alignment, the alignment of the page run start address, and 3805 \c boundary, multiples of which the page run must not cross. 3806 Values set to \c 0 are ignored. 3807 \param priority The page reservation priority (as passed to 3808 vm_page_reserve_pages()). 3809 \return The first page of the allocated page run on success; \c NULL 3810 when the allocation failed. 3811 */ 3812 vm_page* 3813 vm_page_allocate_page_run(uint32 flags, page_num_t length, 3814 const physical_address_restrictions* restrictions, int priority) 3815 { 3816 // compute start and end page index 3817 page_num_t requestedStart 3818 = std::max(restrictions->low_address / B_PAGE_SIZE, sPhysicalPageOffset) 3819 - sPhysicalPageOffset; 3820 page_num_t start = requestedStart; 3821 page_num_t end; 3822 if (restrictions->high_address > 0) { 3823 end = std::max(restrictions->high_address / B_PAGE_SIZE, 3824 sPhysicalPageOffset) 3825 - sPhysicalPageOffset; 3826 end = std::min(end, sNumPages); 3827 } else 3828 end = sNumPages; 3829 3830 // compute alignment mask 3831 page_num_t alignmentMask 3832 = std::max(restrictions->alignment / B_PAGE_SIZE, (phys_addr_t)1) - 1; 3833 ASSERT(((alignmentMask + 1) & alignmentMask) == 0); 3834 // alignment must be a power of 2 3835 3836 // compute the boundary mask 3837 uint32 boundaryMask = 0; 3838 if (restrictions->boundary != 0) { 3839 page_num_t boundary = restrictions->boundary / B_PAGE_SIZE; 3840 // boundary must be a power of two and not less than alignment and 3841 // length 3842 ASSERT(((boundary - 1) & boundary) == 0); 3843 ASSERT(boundary >= alignmentMask + 1); 3844 ASSERT(boundary >= length); 3845 3846 boundaryMask = -boundary; 3847 } 3848 3849 vm_page_reservation reservation; 3850 vm_page_reserve_pages(&reservation, length, priority); 3851 3852 WriteLocker freeClearQueueLocker(sFreePageQueuesLock); 3853 3854 // First we try to get a run with free pages only. If that fails, we also 3855 // consider cached pages. If there are only few free pages and many cached 3856 // ones, the odds are that we won't find enough contiguous ones, so we skip 3857 // the first iteration in this case. 3858 int32 freePages = sUnreservedFreePages; 3859 int useCached = freePages > 0 && (page_num_t)freePages > 2 * length ? 0 : 1; 3860 3861 for (;;) { 3862 if (alignmentMask != 0 || boundaryMask != 0) { 3863 page_num_t offsetStart = start + sPhysicalPageOffset; 3864 3865 // enforce alignment 3866 if ((offsetStart & alignmentMask) != 0) 3867 offsetStart = (offsetStart + alignmentMask) & ~alignmentMask; 3868 3869 // enforce boundary 3870 if (boundaryMask != 0 && ((offsetStart ^ (offsetStart 3871 + length - 1)) & boundaryMask) != 0) { 3872 offsetStart = (offsetStart + length - 1) & boundaryMask; 3873 } 3874 3875 start = offsetStart - sPhysicalPageOffset; 3876 } 3877 3878 if (start + length > end) { 3879 if (useCached == 0) { 3880 // The first iteration with free pages only was unsuccessful. 3881 // Try again also considering cached pages. 3882 useCached = 1; 3883 start = requestedStart; 3884 continue; 3885 } 3886 3887 dprintf("vm_page_allocate_page_run(): Failed to allocate run of " 3888 "length %" B_PRIuPHYSADDR " (%" B_PRIuPHYSADDR " %" 3889 B_PRIuPHYSADDR ") in second iteration (align: %" B_PRIuPHYSADDR 3890 " boundary: %" B_PRIuPHYSADDR ") !", length, requestedStart, 3891 end, restrictions->alignment, restrictions->boundary); 3892 3893 freeClearQueueLocker.Unlock(); 3894 vm_page_unreserve_pages(&reservation); 3895 return NULL; 3896 } 3897 3898 bool foundRun = true; 3899 page_num_t i; 3900 for (i = 0; i < length; i++) { 3901 uint32 pageState = sPages[start + i].State(); 3902 if (pageState != PAGE_STATE_FREE 3903 && pageState != PAGE_STATE_CLEAR 3904 && (pageState != PAGE_STATE_CACHED || useCached == 0)) { 3905 foundRun = false; 3906 break; 3907 } 3908 } 3909 3910 if (foundRun) { 3911 i = allocate_page_run(start, length, flags, freeClearQueueLocker); 3912 if (i == length) 3913 return &sPages[start]; 3914 3915 // apparently a cached page couldn't be allocated -- skip it and 3916 // continue 3917 freeClearQueueLocker.Lock(); 3918 } 3919 3920 start += i + 1; 3921 } 3922 } 3923 3924 3925 vm_page * 3926 vm_page_at_index(int32 index) 3927 { 3928 return &sPages[index]; 3929 } 3930 3931 3932 vm_page * 3933 vm_lookup_page(page_num_t pageNumber) 3934 { 3935 if (pageNumber < sPhysicalPageOffset) 3936 return NULL; 3937 3938 pageNumber -= sPhysicalPageOffset; 3939 if (pageNumber >= sNumPages) 3940 return NULL; 3941 3942 return &sPages[pageNumber]; 3943 } 3944 3945 3946 bool 3947 vm_page_is_dummy(struct vm_page *page) 3948 { 3949 return page < sPages || page >= sPages + sNumPages; 3950 } 3951 3952 3953 /*! Free the page that belonged to a certain cache. 3954 You can use vm_page_set_state() manually if you prefer, but only 3955 if the page does not equal PAGE_STATE_MODIFIED. 3956 */ 3957 void 3958 vm_page_free(VMCache *cache, vm_page *page) 3959 { 3960 PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE 3961 && page->State() != PAGE_STATE_CLEAR); 3962 3963 if (page->State() == PAGE_STATE_MODIFIED && cache->temporary) 3964 atomic_add(&sModifiedTemporaryPages, -1); 3965 3966 free_page(page, false); 3967 } 3968 3969 3970 void 3971 vm_page_set_state(vm_page *page, int pageState) 3972 { 3973 PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE 3974 && page->State() != PAGE_STATE_CLEAR); 3975 3976 if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) 3977 free_page(page, pageState == PAGE_STATE_CLEAR); 3978 else 3979 set_page_state(page, pageState); 3980 } 3981 3982 3983 /*! Moves a page to either the tail of the head of its current queue, 3984 depending on \a tail. 3985 The page must have a cache and the cache must be locked! 3986 */ 3987 void 3988 vm_page_requeue(struct vm_page *page, bool tail) 3989 { 3990 PAGE_ASSERT(page, page->Cache() != NULL); 3991 page->Cache()->AssertLocked(); 3992 // DEBUG_PAGE_ACCESS_CHECK(page); 3993 // TODO: This assertion cannot be satisfied by idle_scan_active_pages() 3994 // when it requeues busy pages. The reason is that vm_soft_fault() 3995 // (respectively fault_get_page()) and the file cache keep newly 3996 // allocated pages accessed while they are reading them from disk. It 3997 // would probably be better to change that code and reenable this 3998 // check. 3999 4000 VMPageQueue *queue = NULL; 4001 4002 switch (page->State()) { 4003 case PAGE_STATE_ACTIVE: 4004 queue = &sActivePageQueue; 4005 break; 4006 case PAGE_STATE_INACTIVE: 4007 queue = &sInactivePageQueue; 4008 break; 4009 case PAGE_STATE_MODIFIED: 4010 queue = &sModifiedPageQueue; 4011 break; 4012 case PAGE_STATE_CACHED: 4013 queue = &sCachedPageQueue; 4014 break; 4015 case PAGE_STATE_FREE: 4016 case PAGE_STATE_CLEAR: 4017 panic("vm_page_requeue() called for free/clear page %p", page); 4018 return; 4019 case PAGE_STATE_WIRED: 4020 case PAGE_STATE_UNUSED: 4021 return; 4022 default: 4023 panic("vm_page_touch: vm_page %p in invalid state %d\n", 4024 page, page->State()); 4025 break; 4026 } 4027 4028 queue->RequeueUnlocked(page, tail); 4029 } 4030 4031 4032 page_num_t 4033 vm_page_num_pages(void) 4034 { 4035 return sNumPages - sNonExistingPages; 4036 } 4037 4038 4039 /*! There is a subtle distinction between the page counts returned by 4040 this function and vm_page_num_free_pages(): 4041 The latter returns the number of pages that are completely uncommitted, 4042 whereas this one returns the number of pages that are available for 4043 use by being reclaimed as well (IOW it factors in things like cache pages 4044 as available). 4045 */ 4046 page_num_t 4047 vm_page_num_available_pages(void) 4048 { 4049 return vm_available_memory() / B_PAGE_SIZE; 4050 } 4051 4052 4053 page_num_t 4054 vm_page_num_free_pages(void) 4055 { 4056 int32 count = sUnreservedFreePages + sCachedPageQueue.Count(); 4057 return count > 0 ? count : 0; 4058 } 4059 4060 4061 page_num_t 4062 vm_page_num_unused_pages(void) 4063 { 4064 int32 count = sUnreservedFreePages; 4065 return count > 0 ? count : 0; 4066 } 4067 4068 4069 void 4070 vm_page_get_stats(system_info *info) 4071 { 4072 // Note: there's no locking protecting any of the queues or counters here, 4073 // so we run the risk of getting bogus values when evaluating them 4074 // throughout this function. As these stats are for informational purposes 4075 // only, it is not really worth introducing such locking. Therefore we just 4076 // ensure that we don't under- or overflow any of the values. 4077 4078 // The pages used for the block cache buffers. Those should not be counted 4079 // as used but as cached pages. 4080 // TODO: We should subtract the blocks that are in use ATM, since those 4081 // can't really be freed in a low memory situation. 4082 page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE; 4083 4084 // Non-temporary modified pages are special as they represent pages that 4085 // can be written back, so they could be freed if necessary, for us 4086 // basically making them into cached pages with a higher overhead. The 4087 // modified queue count is therefore split into temporary and non-temporary 4088 // counts that are then added to the corresponding number. 4089 page_num_t modifiedNonTemporaryPages 4090 = (sModifiedPageQueue.Count() - sModifiedTemporaryPages); 4091 4092 info->max_pages = vm_page_num_pages(); 4093 info->cached_pages = sCachedPageQueue.Count() + modifiedNonTemporaryPages 4094 + blockCachePages; 4095 4096 // max_pages is composed of: 4097 // active + inactive + unused + wired + modified + cached + free + clear 4098 // So taking out the cached (including modified non-temporary), free and 4099 // clear ones leaves us with all used pages. 4100 int32 subtractPages = info->cached_pages + sFreePageQueue.Count() 4101 + sClearPageQueue.Count(); 4102 info->used_pages = subtractPages > info->max_pages 4103 ? 0 : info->max_pages - subtractPages; 4104 4105 if (info->used_pages + info->cached_pages > info->max_pages) { 4106 // Something was shuffled around while we were summing up the counts. 4107 // Make the values sane, preferring the worse case of more used pages. 4108 info->cached_pages = info->max_pages - info->used_pages; 4109 } 4110 4111 info->page_faults = vm_num_page_faults(); 4112 info->ignored_pages = sIgnoredPages; 4113 4114 // TODO: We don't consider pages used for page directories/tables yet. 4115 } 4116 4117 4118 /*! Returns the greatest address within the last page of accessible physical 4119 memory. 4120 The value is inclusive, i.e. in case of a 32 bit phys_addr_t 0xffffffff 4121 means the that the last page ends at exactly 4 GB. 4122 */ 4123 phys_addr_t 4124 vm_page_max_address() 4125 { 4126 return ((phys_addr_t)sPhysicalPageOffset + sNumPages) * B_PAGE_SIZE - 1; 4127 } 4128 4129 4130 RANGE_MARKER_FUNCTION_END(vm_page) 4131