1 /* 2 * Copyright 2010-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <string.h> 12 #include <stdlib.h> 13 14 #include <algorithm> 15 16 #include <KernelExport.h> 17 #include <OS.h> 18 19 #include <AutoDeleter.h> 20 21 #include <arch/cpu.h> 22 #include <arch/vm_translation_map.h> 23 #include <block_cache.h> 24 #include <boot/kernel_args.h> 25 #include <condition_variable.h> 26 #include <elf.h> 27 #include <heap.h> 28 #include <kernel.h> 29 #include <low_resource_manager.h> 30 #include <thread.h> 31 #include <tracing.h> 32 #include <util/AutoLock.h> 33 #include <vfs.h> 34 #include <vm/vm.h> 35 #include <vm/vm_priv.h> 36 #include <vm/vm_page.h> 37 #include <vm/VMAddressSpace.h> 38 #include <vm/VMArea.h> 39 #include <vm/VMCache.h> 40 41 #include "IORequest.h" 42 #include "PageCacheLocker.h" 43 #include "VMAnonymousCache.h" 44 #include "VMPageQueue.h" 45 46 47 //#define TRACE_VM_PAGE 48 #ifdef TRACE_VM_PAGE 49 # define TRACE(x) dprintf x 50 #else 51 # define TRACE(x) ; 52 #endif 53 54 //#define TRACE_VM_DAEMONS 55 #ifdef TRACE_VM_DAEMONS 56 #define TRACE_DAEMON(x...) dprintf(x) 57 #else 58 #define TRACE_DAEMON(x...) do {} while (false) 59 #endif 60 61 //#define TRACK_PAGE_USAGE_STATS 1 62 63 #define PAGE_ASSERT(page, condition) \ 64 ASSERT_PRINT((condition), "page: %p", (page)) 65 66 #define SCRUB_SIZE 32 67 // this many pages will be cleared at once in the page scrubber thread 68 69 #define MAX_PAGE_WRITER_IO_PRIORITY B_URGENT_DISPLAY_PRIORITY 70 // maximum I/O priority of the page writer 71 #define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD 10000 72 // the maximum I/O priority shall be reached when this many pages need to 73 // be written 74 75 76 // The page reserve an allocation of the certain priority must not touch. 77 static const size_t kPageReserveForPriority[] = { 78 VM_PAGE_RESERVE_USER, // user 79 VM_PAGE_RESERVE_SYSTEM, // system 80 0 // VIP 81 }; 82 83 // Minimum number of free pages the page daemon will try to achieve. 84 static uint32 sFreePagesTarget; 85 static uint32 sFreeOrCachedPagesTarget; 86 static uint32 sInactivePagesTarget; 87 88 // Wait interval between page daemon runs. 89 static const bigtime_t kIdleScanWaitInterval = 1000000LL; // 1 sec 90 static const bigtime_t kBusyScanWaitInterval = 500000LL; // 0.5 sec 91 92 // Number of idle runs after which we want to have processed the full active 93 // queue. 94 static const uint32 kIdleRunsForFullQueue = 20; 95 96 // Maximum limit for the vm_page::usage_count. 97 static const int32 kPageUsageMax = 64; 98 // vm_page::usage_count buff an accessed page receives in a scan. 99 static const int32 kPageUsageAdvance = 3; 100 // vm_page::usage_count debuff an unaccessed page receives in a scan. 101 static const int32 kPageUsageDecline = 1; 102 103 int32 gMappedPagesCount; 104 105 static VMPageQueue sPageQueues[PAGE_STATE_COUNT]; 106 107 static VMPageQueue& sFreePageQueue = sPageQueues[PAGE_STATE_FREE]; 108 static VMPageQueue& sClearPageQueue = sPageQueues[PAGE_STATE_CLEAR]; 109 static VMPageQueue& sModifiedPageQueue = sPageQueues[PAGE_STATE_MODIFIED]; 110 static VMPageQueue& sInactivePageQueue = sPageQueues[PAGE_STATE_INACTIVE]; 111 static VMPageQueue& sActivePageQueue = sPageQueues[PAGE_STATE_ACTIVE]; 112 static VMPageQueue& sCachedPageQueue = sPageQueues[PAGE_STATE_CACHED]; 113 114 static vm_page *sPages; 115 static page_num_t sPhysicalPageOffset; 116 static page_num_t sNumPages; 117 static page_num_t sNonExistingPages; 118 // pages in the sPages array that aren't backed by physical memory 119 static uint64 sIgnoredPages; 120 // pages of physical memory ignored by the boot loader (and thus not 121 // available here) 122 static int32 sUnreservedFreePages; 123 static int32 sUnsatisfiedPageReservations; 124 static int32 sModifiedTemporaryPages; 125 126 static ConditionVariable sFreePageCondition; 127 static mutex sPageDeficitLock = MUTEX_INITIALIZER("page deficit"); 128 129 // This lock must be used whenever the free or clear page queues are changed. 130 // If you need to work on both queues at the same time, you need to hold a write 131 // lock, otherwise, a read lock suffices (each queue still has a spinlock to 132 // guard against concurrent changes). 133 static rw_lock sFreePageQueuesLock 134 = RW_LOCK_INITIALIZER("free/clear page queues"); 135 136 #ifdef TRACK_PAGE_USAGE_STATS 137 static page_num_t sPageUsageArrays[512]; 138 static page_num_t* sPageUsage = sPageUsageArrays; 139 static page_num_t sPageUsagePageCount; 140 static page_num_t* sNextPageUsage = sPageUsageArrays + 256; 141 static page_num_t sNextPageUsagePageCount; 142 #endif 143 144 145 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 146 147 struct caller_info { 148 addr_t caller; 149 size_t count; 150 }; 151 152 static const int32 kCallerInfoTableSize = 1024; 153 static caller_info sCallerInfoTable[kCallerInfoTableSize]; 154 static int32 sCallerInfoCount = 0; 155 156 static caller_info* get_caller_info(addr_t caller); 157 158 159 RANGE_MARKER_FUNCTION_PROTOTYPES(vm_page) 160 161 static const addr_t kVMPageCodeAddressRange[] = { 162 RANGE_MARKER_FUNCTION_ADDRESS_RANGE(vm_page) 163 }; 164 165 #endif 166 167 168 RANGE_MARKER_FUNCTION_BEGIN(vm_page) 169 170 171 struct page_stats { 172 int32 totalFreePages; 173 int32 unsatisfiedReservations; 174 int32 cachedPages; 175 }; 176 177 178 struct PageReservationWaiter 179 : public DoublyLinkedListLinkImpl<PageReservationWaiter> { 180 Thread* thread; 181 uint32 dontTouch; // reserve not to touch 182 uint32 missing; // pages missing for the reservation 183 int32 threadPriority; 184 185 bool operator<(const PageReservationWaiter& other) const 186 { 187 // Implies an order by descending VM priority (ascending dontTouch) 188 // and (secondarily) descending thread priority. 189 if (dontTouch != other.dontTouch) 190 return dontTouch < other.dontTouch; 191 return threadPriority > other.threadPriority; 192 } 193 }; 194 195 typedef DoublyLinkedList<PageReservationWaiter> PageReservationWaiterList; 196 static PageReservationWaiterList sPageReservationWaiters; 197 198 199 struct DaemonCondition { 200 void Init(const char* name) 201 { 202 mutex_init(&fLock, "daemon condition"); 203 fCondition.Init(this, name); 204 fActivated = false; 205 } 206 207 bool Lock() 208 { 209 return mutex_lock(&fLock) == B_OK; 210 } 211 212 void Unlock() 213 { 214 mutex_unlock(&fLock); 215 } 216 217 bool Wait(bigtime_t timeout, bool clearActivated) 218 { 219 MutexLocker locker(fLock); 220 if (clearActivated) 221 fActivated = false; 222 else if (fActivated) 223 return true; 224 225 ConditionVariableEntry entry; 226 fCondition.Add(&entry); 227 228 locker.Unlock(); 229 230 return entry.Wait(B_RELATIVE_TIMEOUT, timeout) == B_OK; 231 } 232 233 void WakeUp() 234 { 235 if (fActivated) 236 return; 237 238 MutexLocker locker(fLock); 239 fActivated = true; 240 fCondition.NotifyOne(); 241 } 242 243 void ClearActivated() 244 { 245 MutexLocker locker(fLock); 246 fActivated = false; 247 } 248 249 private: 250 mutex fLock; 251 ConditionVariable fCondition; 252 bool fActivated; 253 }; 254 255 256 static DaemonCondition sPageWriterCondition; 257 static DaemonCondition sPageDaemonCondition; 258 259 260 #if PAGE_ALLOCATION_TRACING 261 262 namespace PageAllocationTracing { 263 264 class ReservePages : public AbstractTraceEntry { 265 public: 266 ReservePages(uint32 count) 267 : 268 fCount(count) 269 { 270 Initialized(); 271 } 272 273 virtual void AddDump(TraceOutput& out) 274 { 275 out.Print("page reserve: %" B_PRIu32, fCount); 276 } 277 278 private: 279 uint32 fCount; 280 }; 281 282 283 class UnreservePages : public AbstractTraceEntry { 284 public: 285 UnreservePages(uint32 count) 286 : 287 fCount(count) 288 { 289 Initialized(); 290 } 291 292 virtual void AddDump(TraceOutput& out) 293 { 294 out.Print("page unreserve: %" B_PRId32, fCount); 295 } 296 297 private: 298 uint32 fCount; 299 }; 300 301 302 class AllocatePage 303 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) { 304 public: 305 AllocatePage(page_num_t pageNumber) 306 : 307 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true), 308 fPageNumber(pageNumber) 309 { 310 Initialized(); 311 } 312 313 virtual void AddDump(TraceOutput& out) 314 { 315 out.Print("page alloc: %#" B_PRIxPHYSADDR, fPageNumber); 316 } 317 318 private: 319 page_num_t fPageNumber; 320 }; 321 322 323 class AllocatePageRun 324 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) { 325 public: 326 AllocatePageRun(page_num_t startPage, uint32 length) 327 : 328 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true), 329 fStartPage(startPage), 330 fLength(length) 331 { 332 Initialized(); 333 } 334 335 virtual void AddDump(TraceOutput& out) 336 { 337 out.Print("page alloc run: start %#" B_PRIxPHYSADDR " length: %" 338 B_PRIu32, fStartPage, fLength); 339 } 340 341 private: 342 page_num_t fStartPage; 343 uint32 fLength; 344 }; 345 346 347 class FreePage 348 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) { 349 public: 350 FreePage(page_num_t pageNumber) 351 : 352 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true), 353 fPageNumber(pageNumber) 354 { 355 Initialized(); 356 } 357 358 virtual void AddDump(TraceOutput& out) 359 { 360 out.Print("page free: %#" B_PRIxPHYSADDR, fPageNumber); 361 } 362 363 private: 364 page_num_t fPageNumber; 365 }; 366 367 368 class ScrubbingPages : public AbstractTraceEntry { 369 public: 370 ScrubbingPages(uint32 count) 371 : 372 fCount(count) 373 { 374 Initialized(); 375 } 376 377 virtual void AddDump(TraceOutput& out) 378 { 379 out.Print("page scrubbing: %" B_PRId32, fCount); 380 } 381 382 private: 383 uint32 fCount; 384 }; 385 386 387 class ScrubbedPages : public AbstractTraceEntry { 388 public: 389 ScrubbedPages(uint32 count) 390 : 391 fCount(count) 392 { 393 Initialized(); 394 } 395 396 virtual void AddDump(TraceOutput& out) 397 { 398 out.Print("page scrubbed: %" B_PRId32, fCount); 399 } 400 401 private: 402 uint32 fCount; 403 }; 404 405 406 class StolenPage : public AbstractTraceEntry { 407 public: 408 StolenPage() 409 { 410 Initialized(); 411 } 412 413 virtual void AddDump(TraceOutput& out) 414 { 415 out.Print("page stolen"); 416 } 417 }; 418 419 } // namespace PageAllocationTracing 420 421 # define TA(x) new(std::nothrow) PageAllocationTracing::x 422 423 #else 424 # define TA(x) 425 #endif // PAGE_ALLOCATION_TRACING 426 427 428 #if PAGE_DAEMON_TRACING 429 430 namespace PageDaemonTracing { 431 432 class ActivatePage : public AbstractTraceEntry { 433 public: 434 ActivatePage(vm_page* page) 435 : 436 fCache(page->cache), 437 fPage(page) 438 { 439 Initialized(); 440 } 441 442 virtual void AddDump(TraceOutput& out) 443 { 444 out.Print("page activated: %p, cache: %p", fPage, fCache); 445 } 446 447 private: 448 VMCache* fCache; 449 vm_page* fPage; 450 }; 451 452 453 class DeactivatePage : public AbstractTraceEntry { 454 public: 455 DeactivatePage(vm_page* page) 456 : 457 fCache(page->cache), 458 fPage(page) 459 { 460 Initialized(); 461 } 462 463 virtual void AddDump(TraceOutput& out) 464 { 465 out.Print("page deactivated: %p, cache: %p", fPage, fCache); 466 } 467 468 private: 469 VMCache* fCache; 470 vm_page* fPage; 471 }; 472 473 474 class FreedPageSwap : public AbstractTraceEntry { 475 public: 476 FreedPageSwap(vm_page* page) 477 : 478 fCache(page->cache), 479 fPage(page) 480 { 481 Initialized(); 482 } 483 484 virtual void AddDump(TraceOutput& out) 485 { 486 out.Print("page swap freed: %p, cache: %p", fPage, fCache); 487 } 488 489 private: 490 VMCache* fCache; 491 vm_page* fPage; 492 }; 493 494 } // namespace PageDaemonTracing 495 496 # define TD(x) new(std::nothrow) PageDaemonTracing::x 497 498 #else 499 # define TD(x) 500 #endif // PAGE_DAEMON_TRACING 501 502 503 #if PAGE_WRITER_TRACING 504 505 namespace PageWriterTracing { 506 507 class WritePage : public AbstractTraceEntry { 508 public: 509 WritePage(vm_page* page) 510 : 511 fCache(page->Cache()), 512 fPage(page) 513 { 514 Initialized(); 515 } 516 517 virtual void AddDump(TraceOutput& out) 518 { 519 out.Print("page write: %p, cache: %p", fPage, fCache); 520 } 521 522 private: 523 VMCache* fCache; 524 vm_page* fPage; 525 }; 526 527 } // namespace PageWriterTracing 528 529 # define TPW(x) new(std::nothrow) PageWriterTracing::x 530 531 #else 532 # define TPW(x) 533 #endif // PAGE_WRITER_TRACING 534 535 536 #if PAGE_STATE_TRACING 537 538 namespace PageStateTracing { 539 540 class SetPageState : public AbstractTraceEntry { 541 public: 542 SetPageState(vm_page* page, uint8 newState) 543 : 544 fPage(page), 545 fOldState(page->State()), 546 fNewState(newState), 547 fBusy(page->busy), 548 fWired(page->WiredCount() > 0), 549 fMapped(!page->mappings.IsEmpty()), 550 fAccessed(page->accessed), 551 fModified(page->modified) 552 { 553 #if PAGE_STATE_TRACING_STACK_TRACE 554 fStackTrace = capture_tracing_stack_trace( 555 PAGE_STATE_TRACING_STACK_TRACE, 0, true); 556 // Don't capture userland stack trace to avoid potential 557 // deadlocks. 558 #endif 559 Initialized(); 560 } 561 562 #if PAGE_STATE_TRACING_STACK_TRACE 563 virtual void DumpStackTrace(TraceOutput& out) 564 { 565 out.PrintStackTrace(fStackTrace); 566 } 567 #endif 568 569 virtual void AddDump(TraceOutput& out) 570 { 571 out.Print("page set state: %p (%c%c%c%c%c): %s -> %s", fPage, 572 fBusy ? 'b' : '-', 573 fWired ? 'w' : '-', 574 fMapped ? 'm' : '-', 575 fAccessed ? 'a' : '-', 576 fModified ? 'm' : '-', 577 page_state_to_string(fOldState), 578 page_state_to_string(fNewState)); 579 } 580 581 private: 582 vm_page* fPage; 583 #if PAGE_STATE_TRACING_STACK_TRACE 584 tracing_stack_trace* fStackTrace; 585 #endif 586 uint8 fOldState; 587 uint8 fNewState; 588 bool fBusy : 1; 589 bool fWired : 1; 590 bool fMapped : 1; 591 bool fAccessed : 1; 592 bool fModified : 1; 593 }; 594 595 } // namespace PageStateTracing 596 597 # define TPS(x) new(std::nothrow) PageStateTracing::x 598 599 #else 600 # define TPS(x) 601 #endif // PAGE_STATE_TRACING 602 603 604 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 605 606 namespace BKernel { 607 608 class AllocationTrackingCallback { 609 public: 610 virtual ~AllocationTrackingCallback(); 611 612 virtual bool ProcessTrackingInfo( 613 AllocationTrackingInfo* info, 614 page_num_t pageNumber) = 0; 615 }; 616 617 } 618 619 using BKernel::AllocationTrackingCallback; 620 621 622 class AllocationCollectorCallback : public AllocationTrackingCallback { 623 public: 624 AllocationCollectorCallback(bool resetInfos) 625 : 626 fResetInfos(resetInfos) 627 { 628 } 629 630 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info, 631 page_num_t pageNumber) 632 { 633 if (!info->IsInitialized()) 634 return true; 635 636 addr_t caller = 0; 637 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry(); 638 639 if (traceEntry != NULL && info->IsTraceEntryValid()) { 640 caller = tracing_find_caller_in_stack_trace( 641 traceEntry->StackTrace(), kVMPageCodeAddressRange, 1); 642 } 643 644 caller_info* callerInfo = get_caller_info(caller); 645 if (callerInfo == NULL) { 646 kprintf("out of space for caller infos\n"); 647 return false; 648 } 649 650 callerInfo->count++; 651 652 if (fResetInfos) 653 info->Clear(); 654 655 return true; 656 } 657 658 private: 659 bool fResetInfos; 660 }; 661 662 663 class AllocationInfoPrinterCallback : public AllocationTrackingCallback { 664 public: 665 AllocationInfoPrinterCallback(bool printStackTrace, page_num_t pageFilter, 666 team_id teamFilter, thread_id threadFilter) 667 : 668 fPrintStackTrace(printStackTrace), 669 fPageFilter(pageFilter), 670 fTeamFilter(teamFilter), 671 fThreadFilter(threadFilter) 672 { 673 } 674 675 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info, 676 page_num_t pageNumber) 677 { 678 if (!info->IsInitialized()) 679 return true; 680 681 if (fPageFilter != 0 && pageNumber != fPageFilter) 682 return true; 683 684 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry(); 685 if (traceEntry != NULL && !info->IsTraceEntryValid()) 686 traceEntry = NULL; 687 688 if (traceEntry != NULL) { 689 if (fTeamFilter != -1 && traceEntry->TeamID() != fTeamFilter) 690 return true; 691 if (fThreadFilter != -1 && traceEntry->ThreadID() != fThreadFilter) 692 return true; 693 } else { 694 // we need the info if we have filters set 695 if (fTeamFilter != -1 || fThreadFilter != -1) 696 return true; 697 } 698 699 kprintf("page number %#" B_PRIxPHYSADDR, pageNumber); 700 701 if (traceEntry != NULL) { 702 kprintf(", team: %" B_PRId32 ", thread %" B_PRId32 703 ", time %" B_PRId64 "\n", traceEntry->TeamID(), 704 traceEntry->ThreadID(), traceEntry->Time()); 705 706 if (fPrintStackTrace) 707 tracing_print_stack_trace(traceEntry->StackTrace()); 708 } else 709 kprintf("\n"); 710 711 return true; 712 } 713 714 private: 715 bool fPrintStackTrace; 716 page_num_t fPageFilter; 717 team_id fTeamFilter; 718 thread_id fThreadFilter; 719 }; 720 721 722 class AllocationDetailPrinterCallback : public AllocationTrackingCallback { 723 public: 724 AllocationDetailPrinterCallback(addr_t caller) 725 : 726 fCaller(caller) 727 { 728 } 729 730 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info, 731 page_num_t pageNumber) 732 { 733 if (!info->IsInitialized()) 734 return true; 735 736 addr_t caller = 0; 737 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry(); 738 if (traceEntry != NULL && !info->IsTraceEntryValid()) 739 traceEntry = NULL; 740 741 if (traceEntry != NULL) { 742 caller = tracing_find_caller_in_stack_trace( 743 traceEntry->StackTrace(), kVMPageCodeAddressRange, 1); 744 } 745 746 if (caller != fCaller) 747 return true; 748 749 kprintf("page %#" B_PRIxPHYSADDR "\n", pageNumber); 750 if (traceEntry != NULL) 751 tracing_print_stack_trace(traceEntry->StackTrace()); 752 753 return true; 754 } 755 756 private: 757 addr_t fCaller; 758 }; 759 760 #endif // VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 761 762 763 static void 764 list_page(vm_page* page) 765 { 766 kprintf("0x%08" B_PRIxADDR " ", 767 (addr_t)(page->physical_page_number * B_PAGE_SIZE)); 768 switch (page->State()) { 769 case PAGE_STATE_ACTIVE: kprintf("A"); break; 770 case PAGE_STATE_INACTIVE: kprintf("I"); break; 771 case PAGE_STATE_MODIFIED: kprintf("M"); break; 772 case PAGE_STATE_CACHED: kprintf("C"); break; 773 case PAGE_STATE_FREE: kprintf("F"); break; 774 case PAGE_STATE_CLEAR: kprintf("L"); break; 775 case PAGE_STATE_WIRED: kprintf("W"); break; 776 case PAGE_STATE_UNUSED: kprintf("-"); break; 777 } 778 kprintf(" "); 779 if (page->busy) kprintf("B"); else kprintf("-"); 780 if (page->busy_writing) kprintf("W"); else kprintf("-"); 781 if (page->accessed) kprintf("A"); else kprintf("-"); 782 if (page->modified) kprintf("M"); else kprintf("-"); 783 if (page->unused) kprintf("U"); else kprintf("-"); 784 785 kprintf(" usage:%3u", page->usage_count); 786 kprintf(" wired:%5u", page->WiredCount()); 787 788 bool first = true; 789 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 790 vm_page_mapping* mapping; 791 while ((mapping = iterator.Next()) != NULL) { 792 if (first) { 793 kprintf(": "); 794 first = false; 795 } else 796 kprintf(", "); 797 798 kprintf("%" B_PRId32 " (%s)", mapping->area->id, mapping->area->name); 799 mapping = mapping->page_link.next; 800 } 801 } 802 803 804 static int 805 dump_page_list(int argc, char **argv) 806 { 807 kprintf("page table:\n"); 808 for (page_num_t i = 0; i < sNumPages; i++) { 809 if (sPages[i].State() != PAGE_STATE_UNUSED) { 810 list_page(&sPages[i]); 811 kprintf("\n"); 812 } 813 } 814 kprintf("end of page table\n"); 815 816 return 0; 817 } 818 819 820 static int 821 find_page(int argc, char **argv) 822 { 823 struct vm_page *page; 824 addr_t address; 825 int32 index = 1; 826 int i; 827 828 struct { 829 const char* name; 830 VMPageQueue* queue; 831 } pageQueueInfos[] = { 832 { "free", &sFreePageQueue }, 833 { "clear", &sClearPageQueue }, 834 { "modified", &sModifiedPageQueue }, 835 { "active", &sActivePageQueue }, 836 { "inactive", &sInactivePageQueue }, 837 { "cached", &sCachedPageQueue }, 838 { NULL, NULL } 839 }; 840 841 if (argc < 2 842 || strlen(argv[index]) <= 2 843 || argv[index][0] != '0' 844 || argv[index][1] != 'x') { 845 kprintf("usage: find_page <address>\n"); 846 return 0; 847 } 848 849 address = strtoul(argv[index], NULL, 0); 850 page = (vm_page*)address; 851 852 for (i = 0; pageQueueInfos[i].name; i++) { 853 VMPageQueue::Iterator it = pageQueueInfos[i].queue->GetIterator(); 854 while (vm_page* p = it.Next()) { 855 if (p == page) { 856 kprintf("found page %p in queue %p (%s)\n", page, 857 pageQueueInfos[i].queue, pageQueueInfos[i].name); 858 return 0; 859 } 860 } 861 } 862 863 kprintf("page %p isn't in any queue\n", page); 864 865 return 0; 866 } 867 868 869 const char * 870 page_state_to_string(int state) 871 { 872 switch(state) { 873 case PAGE_STATE_ACTIVE: 874 return "active"; 875 case PAGE_STATE_INACTIVE: 876 return "inactive"; 877 case PAGE_STATE_MODIFIED: 878 return "modified"; 879 case PAGE_STATE_CACHED: 880 return "cached"; 881 case PAGE_STATE_FREE: 882 return "free"; 883 case PAGE_STATE_CLEAR: 884 return "clear"; 885 case PAGE_STATE_WIRED: 886 return "wired"; 887 case PAGE_STATE_UNUSED: 888 return "unused"; 889 default: 890 return "unknown"; 891 } 892 } 893 894 895 static int 896 dump_page_long(int argc, char **argv) 897 { 898 bool addressIsPointer = true; 899 bool physical = false; 900 bool searchMappings = false; 901 int32 index = 1; 902 903 while (index < argc) { 904 if (argv[index][0] != '-') 905 break; 906 907 if (!strcmp(argv[index], "-p")) { 908 addressIsPointer = false; 909 physical = true; 910 } else if (!strcmp(argv[index], "-v")) { 911 addressIsPointer = false; 912 } else if (!strcmp(argv[index], "-m")) { 913 searchMappings = true; 914 } else { 915 print_debugger_command_usage(argv[0]); 916 return 0; 917 } 918 919 index++; 920 } 921 922 if (index + 1 != argc) { 923 print_debugger_command_usage(argv[0]); 924 return 0; 925 } 926 927 uint64 value; 928 if (!evaluate_debug_expression(argv[index], &value, false)) 929 return 0; 930 931 uint64 pageAddress = value; 932 struct vm_page* page; 933 934 if (addressIsPointer) { 935 page = (struct vm_page *)(addr_t)pageAddress; 936 } else { 937 if (!physical) { 938 VMAddressSpace *addressSpace = VMAddressSpace::Kernel(); 939 940 if (debug_get_debugged_thread()->team->address_space != NULL) 941 addressSpace = debug_get_debugged_thread()->team->address_space; 942 943 uint32 flags = 0; 944 phys_addr_t physicalAddress; 945 if (addressSpace->TranslationMap()->QueryInterrupt(pageAddress, 946 &physicalAddress, &flags) != B_OK 947 || (flags & PAGE_PRESENT) == 0) { 948 kprintf("Virtual address not mapped to a physical page in this " 949 "address space.\n"); 950 return 0; 951 } 952 pageAddress = physicalAddress; 953 } 954 955 page = vm_lookup_page(pageAddress / B_PAGE_SIZE); 956 } 957 958 kprintf("PAGE: %p\n", page); 959 kprintf("queue_next,prev: %p, %p\n", page->queue_link.next, 960 page->queue_link.previous); 961 kprintf("physical_number: %#" B_PRIxPHYSADDR "\n", 962 page->physical_page_number); 963 kprintf("cache: %p\n", page->Cache()); 964 kprintf("cache_offset: %" B_PRIuPHYSADDR "\n", page->cache_offset); 965 kprintf("cache_next: %p\n", page->cache_next); 966 kprintf("state: %s\n", page_state_to_string(page->State())); 967 kprintf("wired_count: %d\n", page->WiredCount()); 968 kprintf("usage_count: %d\n", page->usage_count); 969 kprintf("busy: %d\n", page->busy); 970 kprintf("busy_writing: %d\n", page->busy_writing); 971 kprintf("accessed: %d\n", page->accessed); 972 kprintf("modified: %d\n", page->modified); 973 #if DEBUG_PAGE_QUEUE 974 kprintf("queue: %p\n", page->queue); 975 #endif 976 #if DEBUG_PAGE_ACCESS 977 kprintf("accessor: %" B_PRId32 "\n", page->accessing_thread); 978 #endif 979 kprintf("area mappings:\n"); 980 981 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 982 vm_page_mapping *mapping; 983 while ((mapping = iterator.Next()) != NULL) { 984 kprintf(" %p (%" B_PRId32 ")\n", mapping->area, mapping->area->id); 985 mapping = mapping->page_link.next; 986 } 987 988 if (searchMappings) { 989 kprintf("all mappings:\n"); 990 VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst(); 991 while (addressSpace != NULL) { 992 size_t pageCount = addressSpace->Size() / B_PAGE_SIZE; 993 for (addr_t address = addressSpace->Base(); pageCount != 0; 994 address += B_PAGE_SIZE, pageCount--) { 995 phys_addr_t physicalAddress; 996 uint32 flags = 0; 997 if (addressSpace->TranslationMap()->QueryInterrupt(address, 998 &physicalAddress, &flags) == B_OK 999 && (flags & PAGE_PRESENT) != 0 1000 && physicalAddress / B_PAGE_SIZE 1001 == page->physical_page_number) { 1002 VMArea* area = addressSpace->LookupArea(address); 1003 kprintf(" aspace %" B_PRId32 ", area %" B_PRId32 ": %#" 1004 B_PRIxADDR " (%c%c%s%s)\n", addressSpace->ID(), 1005 area != NULL ? area->id : -1, address, 1006 (flags & B_KERNEL_READ_AREA) != 0 ? 'r' : '-', 1007 (flags & B_KERNEL_WRITE_AREA) != 0 ? 'w' : '-', 1008 (flags & PAGE_MODIFIED) != 0 ? " modified" : "", 1009 (flags & PAGE_ACCESSED) != 0 ? " accessed" : ""); 1010 } 1011 } 1012 addressSpace = VMAddressSpace::DebugNext(addressSpace); 1013 } 1014 } 1015 1016 set_debug_variable("_cache", (addr_t)page->Cache()); 1017 #if DEBUG_PAGE_ACCESS 1018 set_debug_variable("_accessor", page->accessing_thread); 1019 #endif 1020 1021 return 0; 1022 } 1023 1024 1025 static int 1026 dump_page_queue(int argc, char **argv) 1027 { 1028 struct VMPageQueue *queue; 1029 1030 if (argc < 2) { 1031 kprintf("usage: page_queue <address/name> [list]\n"); 1032 return 0; 1033 } 1034 1035 if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x') 1036 queue = (VMPageQueue*)strtoul(argv[1], NULL, 16); 1037 else if (!strcmp(argv[1], "free")) 1038 queue = &sFreePageQueue; 1039 else if (!strcmp(argv[1], "clear")) 1040 queue = &sClearPageQueue; 1041 else if (!strcmp(argv[1], "modified")) 1042 queue = &sModifiedPageQueue; 1043 else if (!strcmp(argv[1], "active")) 1044 queue = &sActivePageQueue; 1045 else if (!strcmp(argv[1], "inactive")) 1046 queue = &sInactivePageQueue; 1047 else if (!strcmp(argv[1], "cached")) 1048 queue = &sCachedPageQueue; 1049 else { 1050 kprintf("page_queue: unknown queue \"%s\".\n", argv[1]); 1051 return 0; 1052 } 1053 1054 kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %" 1055 B_PRIuPHYSADDR "\n", queue, queue->Head(), queue->Tail(), 1056 queue->Count()); 1057 1058 if (argc == 3) { 1059 struct vm_page *page = queue->Head(); 1060 1061 kprintf("page cache type state wired usage\n"); 1062 for (page_num_t i = 0; page; i++, page = queue->Next(page)) { 1063 kprintf("%p %p %-7s %8s %5d %5d\n", page, page->Cache(), 1064 vm_cache_type_to_string(page->Cache()->type), 1065 page_state_to_string(page->State()), 1066 page->WiredCount(), page->usage_count); 1067 } 1068 } 1069 return 0; 1070 } 1071 1072 1073 static int 1074 dump_page_stats(int argc, char **argv) 1075 { 1076 page_num_t swappableModified = 0; 1077 page_num_t swappableModifiedInactive = 0; 1078 1079 size_t counter[8]; 1080 size_t busyCounter[8]; 1081 memset(counter, 0, sizeof(counter)); 1082 memset(busyCounter, 0, sizeof(busyCounter)); 1083 1084 struct page_run { 1085 page_num_t start; 1086 page_num_t end; 1087 1088 page_num_t Length() const { return end - start; } 1089 }; 1090 1091 page_run currentFreeRun = { 0, 0 }; 1092 page_run currentCachedRun = { 0, 0 }; 1093 page_run longestFreeRun = { 0, 0 }; 1094 page_run longestCachedRun = { 0, 0 }; 1095 1096 for (page_num_t i = 0; i < sNumPages; i++) { 1097 if (sPages[i].State() > 7) { 1098 panic("page %" B_PRIuPHYSADDR " at %p has invalid state!\n", i, 1099 &sPages[i]); 1100 } 1101 1102 uint32 pageState = sPages[i].State(); 1103 1104 counter[pageState]++; 1105 if (sPages[i].busy) 1106 busyCounter[pageState]++; 1107 1108 if (pageState == PAGE_STATE_MODIFIED 1109 && sPages[i].Cache() != NULL 1110 && sPages[i].Cache()->temporary && sPages[i].WiredCount() == 0) { 1111 swappableModified++; 1112 if (sPages[i].usage_count == 0) 1113 swappableModifiedInactive++; 1114 } 1115 1116 // track free and cached pages runs 1117 if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) { 1118 currentFreeRun.end = i + 1; 1119 currentCachedRun.end = i + 1; 1120 } else { 1121 if (currentFreeRun.Length() > longestFreeRun.Length()) 1122 longestFreeRun = currentFreeRun; 1123 currentFreeRun.start = currentFreeRun.end = i + 1; 1124 1125 if (pageState == PAGE_STATE_CACHED) { 1126 currentCachedRun.end = i + 1; 1127 } else { 1128 if (currentCachedRun.Length() > longestCachedRun.Length()) 1129 longestCachedRun = currentCachedRun; 1130 currentCachedRun.start = currentCachedRun.end = i + 1; 1131 } 1132 } 1133 } 1134 1135 kprintf("page stats:\n"); 1136 kprintf("total: %" B_PRIuPHYSADDR "\n", sNumPages); 1137 1138 kprintf("active: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1139 counter[PAGE_STATE_ACTIVE], busyCounter[PAGE_STATE_ACTIVE]); 1140 kprintf("inactive: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1141 counter[PAGE_STATE_INACTIVE], busyCounter[PAGE_STATE_INACTIVE]); 1142 kprintf("cached: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1143 counter[PAGE_STATE_CACHED], busyCounter[PAGE_STATE_CACHED]); 1144 kprintf("unused: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1145 counter[PAGE_STATE_UNUSED], busyCounter[PAGE_STATE_UNUSED]); 1146 kprintf("wired: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1147 counter[PAGE_STATE_WIRED], busyCounter[PAGE_STATE_WIRED]); 1148 kprintf("modified: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1149 counter[PAGE_STATE_MODIFIED], busyCounter[PAGE_STATE_MODIFIED]); 1150 kprintf("free: %" B_PRIuSIZE "\n", counter[PAGE_STATE_FREE]); 1151 kprintf("clear: %" B_PRIuSIZE "\n", counter[PAGE_STATE_CLEAR]); 1152 1153 kprintf("unreserved free pages: %" B_PRId32 "\n", sUnreservedFreePages); 1154 kprintf("unsatisfied page reservations: %" B_PRId32 "\n", 1155 sUnsatisfiedPageReservations); 1156 kprintf("mapped pages: %" B_PRId32 "\n", gMappedPagesCount); 1157 kprintf("longest free pages run: %" B_PRIuPHYSADDR " pages (at %" 1158 B_PRIuPHYSADDR ")\n", longestFreeRun.Length(), 1159 sPages[longestFreeRun.start].physical_page_number); 1160 kprintf("longest free/cached pages run: %" B_PRIuPHYSADDR " pages (at %" 1161 B_PRIuPHYSADDR ")\n", longestCachedRun.Length(), 1162 sPages[longestCachedRun.start].physical_page_number); 1163 1164 kprintf("waiting threads:\n"); 1165 for (PageReservationWaiterList::Iterator it 1166 = sPageReservationWaiters.GetIterator(); 1167 PageReservationWaiter* waiter = it.Next();) { 1168 kprintf(" %6" B_PRId32 ": missing: %6" B_PRIu32 1169 ", don't touch: %6" B_PRIu32 "\n", waiter->thread->id, 1170 waiter->missing, waiter->dontTouch); 1171 } 1172 1173 kprintf("\nfree queue: %p, count = %" B_PRIuPHYSADDR "\n", &sFreePageQueue, 1174 sFreePageQueue.Count()); 1175 kprintf("clear queue: %p, count = %" B_PRIuPHYSADDR "\n", &sClearPageQueue, 1176 sClearPageQueue.Count()); 1177 kprintf("modified queue: %p, count = %" B_PRIuPHYSADDR " (%" B_PRId32 1178 " temporary, %" B_PRIuPHYSADDR " swappable, " "inactive: %" 1179 B_PRIuPHYSADDR ")\n", &sModifiedPageQueue, sModifiedPageQueue.Count(), 1180 sModifiedTemporaryPages, swappableModified, swappableModifiedInactive); 1181 kprintf("active queue: %p, count = %" B_PRIuPHYSADDR "\n", 1182 &sActivePageQueue, sActivePageQueue.Count()); 1183 kprintf("inactive queue: %p, count = %" B_PRIuPHYSADDR "\n", 1184 &sInactivePageQueue, sInactivePageQueue.Count()); 1185 kprintf("cached queue: %p, count = %" B_PRIuPHYSADDR "\n", 1186 &sCachedPageQueue, sCachedPageQueue.Count()); 1187 return 0; 1188 } 1189 1190 1191 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 1192 1193 static caller_info* 1194 get_caller_info(addr_t caller) 1195 { 1196 // find the caller info 1197 for (int32 i = 0; i < sCallerInfoCount; i++) { 1198 if (caller == sCallerInfoTable[i].caller) 1199 return &sCallerInfoTable[i]; 1200 } 1201 1202 // not found, add a new entry, if there are free slots 1203 if (sCallerInfoCount >= kCallerInfoTableSize) 1204 return NULL; 1205 1206 caller_info* info = &sCallerInfoTable[sCallerInfoCount++]; 1207 info->caller = caller; 1208 info->count = 0; 1209 1210 return info; 1211 } 1212 1213 1214 static int 1215 caller_info_compare_count(const void* _a, const void* _b) 1216 { 1217 const caller_info* a = (const caller_info*)_a; 1218 const caller_info* b = (const caller_info*)_b; 1219 return (int)(b->count - a->count); 1220 } 1221 1222 1223 static int 1224 dump_page_allocations_per_caller(int argc, char** argv) 1225 { 1226 bool resetAllocationInfos = false; 1227 bool printDetails = false; 1228 addr_t caller = 0; 1229 1230 for (int32 i = 1; i < argc; i++) { 1231 if (strcmp(argv[i], "-d") == 0) { 1232 uint64 callerAddress; 1233 if (++i >= argc 1234 || !evaluate_debug_expression(argv[i], &callerAddress, true)) { 1235 print_debugger_command_usage(argv[0]); 1236 return 0; 1237 } 1238 1239 caller = callerAddress; 1240 printDetails = true; 1241 } else if (strcmp(argv[i], "-r") == 0) { 1242 resetAllocationInfos = true; 1243 } else { 1244 print_debugger_command_usage(argv[0]); 1245 return 0; 1246 } 1247 } 1248 1249 sCallerInfoCount = 0; 1250 1251 AllocationCollectorCallback collectorCallback(resetAllocationInfos); 1252 AllocationDetailPrinterCallback detailsCallback(caller); 1253 AllocationTrackingCallback& callback = printDetails 1254 ? (AllocationTrackingCallback&)detailsCallback 1255 : (AllocationTrackingCallback&)collectorCallback; 1256 1257 for (page_num_t i = 0; i < sNumPages; i++) 1258 callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i); 1259 1260 if (printDetails) 1261 return 0; 1262 1263 // sort the array 1264 qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info), 1265 &caller_info_compare_count); 1266 1267 kprintf("%" B_PRId32 " different callers\n\n", sCallerInfoCount); 1268 1269 size_t totalAllocationCount = 0; 1270 1271 kprintf(" count caller\n"); 1272 kprintf("----------------------------------\n"); 1273 for (int32 i = 0; i < sCallerInfoCount; i++) { 1274 caller_info& info = sCallerInfoTable[i]; 1275 kprintf("%10" B_PRIuSIZE " %p", info.count, (void*)info.caller); 1276 1277 const char* symbol; 1278 const char* imageName; 1279 bool exactMatch; 1280 addr_t baseAddress; 1281 1282 if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol, 1283 &imageName, &exactMatch) == B_OK) { 1284 kprintf(" %s + %#" B_PRIxADDR " (%s)%s\n", symbol, 1285 info.caller - baseAddress, imageName, 1286 exactMatch ? "" : " (nearest)"); 1287 } else 1288 kprintf("\n"); 1289 1290 totalAllocationCount += info.count; 1291 } 1292 1293 kprintf("\ntotal page allocations: %" B_PRIuSIZE "\n", 1294 totalAllocationCount); 1295 1296 return 0; 1297 } 1298 1299 1300 static int 1301 dump_page_allocation_infos(int argc, char** argv) 1302 { 1303 page_num_t pageFilter = 0; 1304 team_id teamFilter = -1; 1305 thread_id threadFilter = -1; 1306 bool printStackTraces = false; 1307 1308 for (int32 i = 1; i < argc; i++) { 1309 if (strcmp(argv[i], "--stacktrace") == 0) 1310 printStackTraces = true; 1311 else if (strcmp(argv[i], "-p") == 0) { 1312 uint64 pageNumber; 1313 if (++i >= argc 1314 || !evaluate_debug_expression(argv[i], &pageNumber, true)) { 1315 print_debugger_command_usage(argv[0]); 1316 return 0; 1317 } 1318 1319 pageFilter = pageNumber; 1320 } else if (strcmp(argv[i], "--team") == 0) { 1321 uint64 team; 1322 if (++i >= argc 1323 || !evaluate_debug_expression(argv[i], &team, true)) { 1324 print_debugger_command_usage(argv[0]); 1325 return 0; 1326 } 1327 1328 teamFilter = team; 1329 } else if (strcmp(argv[i], "--thread") == 0) { 1330 uint64 thread; 1331 if (++i >= argc 1332 || !evaluate_debug_expression(argv[i], &thread, true)) { 1333 print_debugger_command_usage(argv[0]); 1334 return 0; 1335 } 1336 1337 threadFilter = thread; 1338 } else { 1339 print_debugger_command_usage(argv[0]); 1340 return 0; 1341 } 1342 } 1343 1344 AllocationInfoPrinterCallback callback(printStackTraces, pageFilter, 1345 teamFilter, threadFilter); 1346 1347 for (page_num_t i = 0; i < sNumPages; i++) 1348 callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i); 1349 1350 return 0; 1351 } 1352 1353 #endif // VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 1354 1355 1356 #ifdef TRACK_PAGE_USAGE_STATS 1357 1358 static void 1359 track_page_usage(vm_page* page) 1360 { 1361 if (page->WiredCount() == 0) { 1362 sNextPageUsage[(int32)page->usage_count + 128]++; 1363 sNextPageUsagePageCount++; 1364 } 1365 } 1366 1367 1368 static void 1369 update_page_usage_stats() 1370 { 1371 std::swap(sPageUsage, sNextPageUsage); 1372 sPageUsagePageCount = sNextPageUsagePageCount; 1373 1374 memset(sNextPageUsage, 0, sizeof(page_num_t) * 256); 1375 sNextPageUsagePageCount = 0; 1376 1377 // compute average 1378 if (sPageUsagePageCount > 0) { 1379 int64 sum = 0; 1380 for (int32 i = 0; i < 256; i++) 1381 sum += (int64)sPageUsage[i] * (i - 128); 1382 1383 TRACE_DAEMON("average page usage: %f (%lu pages)\n", 1384 (float)sum / sPageUsagePageCount, sPageUsagePageCount); 1385 } 1386 } 1387 1388 1389 static int 1390 dump_page_usage_stats(int argc, char** argv) 1391 { 1392 kprintf("distribution of page usage counts (%lu pages):", 1393 sPageUsagePageCount); 1394 1395 int64 sum = 0; 1396 for (int32 i = 0; i < 256; i++) { 1397 if (i % 8 == 0) 1398 kprintf("\n%4ld:", i - 128); 1399 1400 int64 count = sPageUsage[i]; 1401 sum += count * (i - 128); 1402 1403 kprintf(" %9llu", count); 1404 } 1405 1406 kprintf("\n\n"); 1407 1408 kprintf("average usage count: %f\n", 1409 sPageUsagePageCount > 0 ? (float)sum / sPageUsagePageCount : 0); 1410 1411 return 0; 1412 } 1413 1414 #endif // TRACK_PAGE_USAGE_STATS 1415 1416 1417 // #pragma mark - vm_page 1418 1419 1420 inline void 1421 vm_page::InitState(uint8 newState) 1422 { 1423 state = newState; 1424 } 1425 1426 1427 inline void 1428 vm_page::SetState(uint8 newState) 1429 { 1430 TPS(SetPageState(this, newState)); 1431 1432 state = newState; 1433 } 1434 1435 1436 // #pragma mark - 1437 1438 1439 static void 1440 get_page_stats(page_stats& _pageStats) 1441 { 1442 _pageStats.totalFreePages = sUnreservedFreePages; 1443 _pageStats.cachedPages = sCachedPageQueue.Count(); 1444 _pageStats.unsatisfiedReservations = sUnsatisfiedPageReservations; 1445 // TODO: We don't get an actual snapshot here! 1446 } 1447 1448 1449 static bool 1450 do_active_paging(const page_stats& pageStats) 1451 { 1452 return pageStats.totalFreePages + pageStats.cachedPages 1453 < pageStats.unsatisfiedReservations 1454 + (int32)sFreeOrCachedPagesTarget; 1455 } 1456 1457 1458 /*! Reserves as many pages as possible from \c sUnreservedFreePages up to 1459 \a count. Doesn't touch the last \a dontTouch pages of 1460 \c sUnreservedFreePages, though. 1461 \return The number of actually reserved pages. 1462 */ 1463 static uint32 1464 reserve_some_pages(uint32 count, uint32 dontTouch) 1465 { 1466 while (true) { 1467 int32 freePages = atomic_get(&sUnreservedFreePages); 1468 if (freePages <= (int32)dontTouch) 1469 return 0; 1470 1471 int32 toReserve = std::min(count, freePages - dontTouch); 1472 if (atomic_test_and_set(&sUnreservedFreePages, 1473 freePages - toReserve, freePages) 1474 == freePages) { 1475 return toReserve; 1476 } 1477 1478 // the count changed in the meantime -- retry 1479 } 1480 } 1481 1482 1483 static void 1484 wake_up_page_reservation_waiters() 1485 { 1486 MutexLocker pageDeficitLocker(sPageDeficitLock); 1487 1488 // TODO: If this is a low priority thread, we might want to disable 1489 // interrupts or otherwise ensure that we aren't unscheduled. Otherwise 1490 // high priority threads wait be kept waiting while a medium priority thread 1491 // prevents us from running. 1492 1493 while (PageReservationWaiter* waiter = sPageReservationWaiters.Head()) { 1494 int32 reserved = reserve_some_pages(waiter->missing, 1495 waiter->dontTouch); 1496 if (reserved == 0) 1497 return; 1498 1499 atomic_add(&sUnsatisfiedPageReservations, -reserved); 1500 waiter->missing -= reserved; 1501 1502 if (waiter->missing > 0) 1503 return; 1504 1505 sPageReservationWaiters.Remove(waiter); 1506 1507 thread_unblock(waiter->thread, B_OK); 1508 } 1509 } 1510 1511 1512 static inline void 1513 unreserve_pages(uint32 count) 1514 { 1515 atomic_add(&sUnreservedFreePages, count); 1516 if (atomic_get(&sUnsatisfiedPageReservations) != 0) 1517 wake_up_page_reservation_waiters(); 1518 } 1519 1520 1521 static void 1522 free_page(vm_page* page, bool clear) 1523 { 1524 DEBUG_PAGE_ACCESS_CHECK(page); 1525 1526 PAGE_ASSERT(page, !page->IsMapped()); 1527 1528 VMPageQueue* fromQueue; 1529 1530 switch (page->State()) { 1531 case PAGE_STATE_ACTIVE: 1532 fromQueue = &sActivePageQueue; 1533 break; 1534 case PAGE_STATE_INACTIVE: 1535 fromQueue = &sInactivePageQueue; 1536 break; 1537 case PAGE_STATE_MODIFIED: 1538 fromQueue = &sModifiedPageQueue; 1539 break; 1540 case PAGE_STATE_CACHED: 1541 fromQueue = &sCachedPageQueue; 1542 break; 1543 case PAGE_STATE_FREE: 1544 case PAGE_STATE_CLEAR: 1545 panic("free_page(): page %p already free", page); 1546 return; 1547 case PAGE_STATE_WIRED: 1548 case PAGE_STATE_UNUSED: 1549 fromQueue = NULL; 1550 break; 1551 default: 1552 panic("free_page(): page %p in invalid state %d", 1553 page, page->State()); 1554 return; 1555 } 1556 1557 if (page->CacheRef() != NULL) 1558 panic("to be freed page %p has cache", page); 1559 if (page->IsMapped()) 1560 panic("to be freed page %p has mappings", page); 1561 1562 if (fromQueue != NULL) 1563 fromQueue->RemoveUnlocked(page); 1564 1565 TA(FreePage(page->physical_page_number)); 1566 1567 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 1568 page->allocation_tracking_info.Clear(); 1569 #endif 1570 1571 ReadLocker locker(sFreePageQueuesLock); 1572 1573 DEBUG_PAGE_ACCESS_END(page); 1574 1575 if (clear) { 1576 page->SetState(PAGE_STATE_CLEAR); 1577 sClearPageQueue.PrependUnlocked(page); 1578 } else { 1579 page->SetState(PAGE_STATE_FREE); 1580 sFreePageQueue.PrependUnlocked(page); 1581 sFreePageCondition.NotifyAll(); 1582 } 1583 1584 locker.Unlock(); 1585 } 1586 1587 1588 /*! The caller must make sure that no-one else tries to change the page's state 1589 while the function is called. If the page has a cache, this can be done by 1590 locking the cache. 1591 */ 1592 static void 1593 set_page_state(vm_page *page, int pageState) 1594 { 1595 DEBUG_PAGE_ACCESS_CHECK(page); 1596 1597 if (pageState == page->State()) 1598 return; 1599 1600 VMPageQueue* fromQueue; 1601 1602 switch (page->State()) { 1603 case PAGE_STATE_ACTIVE: 1604 fromQueue = &sActivePageQueue; 1605 break; 1606 case PAGE_STATE_INACTIVE: 1607 fromQueue = &sInactivePageQueue; 1608 break; 1609 case PAGE_STATE_MODIFIED: 1610 fromQueue = &sModifiedPageQueue; 1611 break; 1612 case PAGE_STATE_CACHED: 1613 fromQueue = &sCachedPageQueue; 1614 break; 1615 case PAGE_STATE_FREE: 1616 case PAGE_STATE_CLEAR: 1617 panic("set_page_state(): page %p is free/clear", page); 1618 return; 1619 case PAGE_STATE_WIRED: 1620 case PAGE_STATE_UNUSED: 1621 fromQueue = NULL; 1622 break; 1623 default: 1624 panic("set_page_state(): page %p in invalid state %d", 1625 page, page->State()); 1626 return; 1627 } 1628 1629 VMPageQueue* toQueue; 1630 1631 switch (pageState) { 1632 case PAGE_STATE_ACTIVE: 1633 toQueue = &sActivePageQueue; 1634 break; 1635 case PAGE_STATE_INACTIVE: 1636 toQueue = &sInactivePageQueue; 1637 break; 1638 case PAGE_STATE_MODIFIED: 1639 toQueue = &sModifiedPageQueue; 1640 break; 1641 case PAGE_STATE_CACHED: 1642 PAGE_ASSERT(page, !page->IsMapped()); 1643 PAGE_ASSERT(page, !page->modified); 1644 toQueue = &sCachedPageQueue; 1645 break; 1646 case PAGE_STATE_FREE: 1647 case PAGE_STATE_CLEAR: 1648 panic("set_page_state(): target state is free/clear"); 1649 return; 1650 case PAGE_STATE_WIRED: 1651 case PAGE_STATE_UNUSED: 1652 toQueue = NULL; 1653 break; 1654 default: 1655 panic("set_page_state(): invalid target state %d", pageState); 1656 return; 1657 } 1658 1659 VMCache* cache = page->Cache(); 1660 if (cache != NULL && cache->temporary) { 1661 if (pageState == PAGE_STATE_MODIFIED) 1662 atomic_add(&sModifiedTemporaryPages, 1); 1663 else if (page->State() == PAGE_STATE_MODIFIED) 1664 atomic_add(&sModifiedTemporaryPages, -1); 1665 } 1666 1667 // move the page 1668 if (toQueue == fromQueue) { 1669 // Note: Theoretically we are required to lock when changing the page 1670 // state, even if we don't change the queue. We actually don't have to 1671 // do this, though, since only for the active queue there are different 1672 // page states and active pages have a cache that must be locked at 1673 // this point. So we rely on the fact that everyone must lock the cache 1674 // before trying to change/interpret the page state. 1675 PAGE_ASSERT(page, cache != NULL); 1676 cache->AssertLocked(); 1677 page->SetState(pageState); 1678 } else { 1679 if (fromQueue != NULL) 1680 fromQueue->RemoveUnlocked(page); 1681 1682 page->SetState(pageState); 1683 1684 if (toQueue != NULL) 1685 toQueue->AppendUnlocked(page); 1686 } 1687 } 1688 1689 1690 /*! Moves a previously modified page into a now appropriate queue. 1691 The page queues must not be locked. 1692 */ 1693 static void 1694 move_page_to_appropriate_queue(vm_page *page) 1695 { 1696 DEBUG_PAGE_ACCESS_CHECK(page); 1697 1698 // Note, this logic must be in sync with what the page daemon does. 1699 int32 state; 1700 if (page->IsMapped()) 1701 state = PAGE_STATE_ACTIVE; 1702 else if (page->modified) 1703 state = PAGE_STATE_MODIFIED; 1704 else 1705 state = PAGE_STATE_CACHED; 1706 1707 // TODO: If free + cached pages are low, we might directly want to free the 1708 // page. 1709 set_page_state(page, state); 1710 } 1711 1712 1713 static void 1714 clear_page(struct vm_page *page) 1715 { 1716 vm_memset_physical(page->physical_page_number << PAGE_SHIFT, 0, 1717 B_PAGE_SIZE); 1718 } 1719 1720 1721 static status_t 1722 mark_page_range_in_use(page_num_t startPage, page_num_t length, bool wired) 1723 { 1724 TRACE(("mark_page_range_in_use: start %#" B_PRIxPHYSADDR ", len %#" 1725 B_PRIxPHYSADDR "\n", startPage, length)); 1726 1727 if (sPhysicalPageOffset > startPage) { 1728 dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR 1729 "): start page is before free list\n", startPage, length); 1730 if (sPhysicalPageOffset - startPage >= length) 1731 return B_OK; 1732 length -= sPhysicalPageOffset - startPage; 1733 startPage = sPhysicalPageOffset; 1734 } 1735 1736 startPage -= sPhysicalPageOffset; 1737 1738 if (startPage + length > sNumPages) { 1739 dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR 1740 "): range would extend past free list\n", startPage, length); 1741 if (startPage >= sNumPages) 1742 return B_OK; 1743 length = sNumPages - startPage; 1744 } 1745 1746 WriteLocker locker(sFreePageQueuesLock); 1747 1748 for (page_num_t i = 0; i < length; i++) { 1749 vm_page *page = &sPages[startPage + i]; 1750 switch (page->State()) { 1751 case PAGE_STATE_FREE: 1752 case PAGE_STATE_CLEAR: 1753 { 1754 // TODO: This violates the page reservation policy, since we remove pages from 1755 // the free/clear queues without having reserved them before. This should happen 1756 // in the early boot process only, though. 1757 DEBUG_PAGE_ACCESS_START(page); 1758 VMPageQueue& queue = page->State() == PAGE_STATE_FREE 1759 ? sFreePageQueue : sClearPageQueue; 1760 queue.Remove(page); 1761 page->SetState(wired ? PAGE_STATE_WIRED : PAGE_STATE_UNUSED); 1762 page->busy = false; 1763 atomic_add(&sUnreservedFreePages, -1); 1764 DEBUG_PAGE_ACCESS_END(page); 1765 break; 1766 } 1767 case PAGE_STATE_WIRED: 1768 case PAGE_STATE_UNUSED: 1769 break; 1770 case PAGE_STATE_ACTIVE: 1771 case PAGE_STATE_INACTIVE: 1772 case PAGE_STATE_MODIFIED: 1773 case PAGE_STATE_CACHED: 1774 default: 1775 // uh 1776 dprintf("mark_page_range_in_use: page %#" B_PRIxPHYSADDR 1777 " in non-free state %d!\n", startPage + i, page->State()); 1778 break; 1779 } 1780 } 1781 1782 return B_OK; 1783 } 1784 1785 1786 /*! 1787 This is a background thread that wakes up when its condition is notified 1788 and moves some pages from the free queue over to the clear queue. 1789 Given enough time, it will clear out all pages from the free queue - we 1790 could probably slow it down after having reached a certain threshold. 1791 */ 1792 static int32 1793 page_scrubber(void *unused) 1794 { 1795 (void)(unused); 1796 1797 TRACE(("page_scrubber starting...\n")); 1798 1799 ConditionVariableEntry entry; 1800 for (;;) { 1801 while (sFreePageQueue.Count() == 0 1802 || atomic_get(&sUnreservedFreePages) 1803 < (int32)sFreePagesTarget) { 1804 sFreePageCondition.Add(&entry); 1805 entry.Wait(); 1806 } 1807 1808 // Since we temporarily remove pages from the free pages reserve, 1809 // we must make sure we don't cause a violation of the page 1810 // reservation warranty. The following is usually stricter than 1811 // necessary, because we don't have information on how many of the 1812 // reserved pages have already been allocated. 1813 int32 reserved = reserve_some_pages(SCRUB_SIZE, 1814 kPageReserveForPriority[VM_PRIORITY_USER]); 1815 if (reserved == 0) 1816 continue; 1817 1818 // get some pages from the free queue 1819 ReadLocker locker(sFreePageQueuesLock); 1820 1821 vm_page *page[SCRUB_SIZE]; 1822 int32 scrubCount = 0; 1823 for (int32 i = 0; i < reserved; i++) { 1824 page[i] = sFreePageQueue.RemoveHeadUnlocked(); 1825 if (page[i] == NULL) 1826 break; 1827 1828 DEBUG_PAGE_ACCESS_START(page[i]); 1829 1830 page[i]->SetState(PAGE_STATE_ACTIVE); 1831 page[i]->busy = true; 1832 scrubCount++; 1833 } 1834 1835 locker.Unlock(); 1836 1837 if (scrubCount == 0) { 1838 unreserve_pages(reserved); 1839 continue; 1840 } 1841 1842 TA(ScrubbingPages(scrubCount)); 1843 1844 // clear them 1845 for (int32 i = 0; i < scrubCount; i++) 1846 clear_page(page[i]); 1847 1848 locker.Lock(); 1849 1850 // and put them into the clear queue 1851 for (int32 i = 0; i < scrubCount; i++) { 1852 page[i]->SetState(PAGE_STATE_CLEAR); 1853 page[i]->busy = false; 1854 DEBUG_PAGE_ACCESS_END(page[i]); 1855 sClearPageQueue.PrependUnlocked(page[i]); 1856 } 1857 1858 locker.Unlock(); 1859 1860 unreserve_pages(reserved); 1861 1862 TA(ScrubbedPages(scrubCount)); 1863 1864 // wait at least 100ms between runs 1865 snooze(100 * 1000); 1866 } 1867 1868 return 0; 1869 } 1870 1871 1872 static void 1873 init_page_marker(vm_page &marker) 1874 { 1875 marker.SetCacheRef(NULL); 1876 marker.InitState(PAGE_STATE_UNUSED); 1877 marker.busy = true; 1878 #if DEBUG_PAGE_QUEUE 1879 marker.queue = NULL; 1880 #endif 1881 #if DEBUG_PAGE_ACCESS 1882 marker.accessing_thread = thread_get_current_thread_id(); 1883 #endif 1884 } 1885 1886 1887 static void 1888 remove_page_marker(struct vm_page &marker) 1889 { 1890 DEBUG_PAGE_ACCESS_CHECK(&marker); 1891 1892 if (marker.State() < PAGE_STATE_FIRST_UNQUEUED) 1893 sPageQueues[marker.State()].RemoveUnlocked(&marker); 1894 1895 marker.SetState(PAGE_STATE_UNUSED); 1896 } 1897 1898 1899 static vm_page* 1900 next_modified_page(page_num_t& maxPagesToSee) 1901 { 1902 InterruptsSpinLocker locker(sModifiedPageQueue.GetLock()); 1903 1904 while (maxPagesToSee > 0) { 1905 vm_page* page = sModifiedPageQueue.Head(); 1906 if (page == NULL) 1907 return NULL; 1908 1909 sModifiedPageQueue.Requeue(page, true); 1910 1911 maxPagesToSee--; 1912 1913 if (!page->busy) 1914 return page; 1915 } 1916 1917 return NULL; 1918 } 1919 1920 1921 // #pragma mark - 1922 1923 1924 class PageWriteTransfer; 1925 class PageWriteWrapper; 1926 1927 1928 class PageWriterRun { 1929 public: 1930 status_t Init(uint32 maxPages); 1931 1932 void PrepareNextRun(); 1933 void AddPage(vm_page* page); 1934 uint32 Go(); 1935 1936 void PageWritten(PageWriteTransfer* transfer, status_t status, 1937 bool partialTransfer, size_t bytesTransferred); 1938 1939 private: 1940 uint32 fMaxPages; 1941 uint32 fWrapperCount; 1942 uint32 fTransferCount; 1943 int32 fPendingTransfers; 1944 PageWriteWrapper* fWrappers; 1945 PageWriteTransfer* fTransfers; 1946 ConditionVariable fAllFinishedCondition; 1947 }; 1948 1949 1950 class PageWriteTransfer : public AsyncIOCallback { 1951 public: 1952 void SetTo(PageWriterRun* run, vm_page* page, int32 maxPages); 1953 bool AddPage(vm_page* page); 1954 1955 status_t Schedule(uint32 flags); 1956 1957 void SetStatus(status_t status, size_t transferred); 1958 1959 status_t Status() const { return fStatus; } 1960 struct VMCache* Cache() const { return fCache; } 1961 uint32 PageCount() const { return fPageCount; } 1962 1963 virtual void IOFinished(status_t status, bool partialTransfer, 1964 generic_size_t bytesTransferred); 1965 private: 1966 PageWriterRun* fRun; 1967 struct VMCache* fCache; 1968 off_t fOffset; 1969 uint32 fPageCount; 1970 int32 fMaxPages; 1971 status_t fStatus; 1972 uint32 fVecCount; 1973 generic_io_vec fVecs[32]; // TODO: make dynamic/configurable 1974 }; 1975 1976 1977 class PageWriteWrapper { 1978 public: 1979 PageWriteWrapper(); 1980 ~PageWriteWrapper(); 1981 void SetTo(vm_page* page); 1982 bool Done(status_t result); 1983 1984 private: 1985 vm_page* fPage; 1986 struct VMCache* fCache; 1987 bool fIsActive; 1988 }; 1989 1990 1991 PageWriteWrapper::PageWriteWrapper() 1992 : 1993 fIsActive(false) 1994 { 1995 } 1996 1997 1998 PageWriteWrapper::~PageWriteWrapper() 1999 { 2000 if (fIsActive) 2001 panic("page write wrapper going out of scope but isn't completed"); 2002 } 2003 2004 2005 /*! The page's cache must be locked. 2006 */ 2007 void 2008 PageWriteWrapper::SetTo(vm_page* page) 2009 { 2010 DEBUG_PAGE_ACCESS_CHECK(page); 2011 2012 if (page->busy) 2013 panic("setting page write wrapper to busy page"); 2014 2015 if (fIsActive) 2016 panic("re-setting page write wrapper that isn't completed"); 2017 2018 fPage = page; 2019 fCache = page->Cache(); 2020 fIsActive = true; 2021 2022 fPage->busy = true; 2023 fPage->busy_writing = true; 2024 2025 // We have a modified page -- however, while we're writing it back, 2026 // the page might still be mapped. In order not to lose any changes to the 2027 // page, we mark it clean before actually writing it back; if 2028 // writing the page fails for some reason, we'll just keep it in the 2029 // modified page list, but that should happen only rarely. 2030 2031 // If the page is changed after we cleared the dirty flag, but before we 2032 // had the chance to write it back, then we'll write it again later -- that 2033 // will probably not happen that often, though. 2034 2035 vm_clear_map_flags(fPage, PAGE_MODIFIED); 2036 } 2037 2038 2039 /*! The page's cache must be locked. 2040 The page queues must not be locked. 2041 \return \c true if the page was written successfully respectively could be 2042 handled somehow, \c false otherwise. 2043 */ 2044 bool 2045 PageWriteWrapper::Done(status_t result) 2046 { 2047 if (!fIsActive) 2048 panic("completing page write wrapper that is not active"); 2049 2050 DEBUG_PAGE_ACCESS_START(fPage); 2051 2052 fPage->busy = false; 2053 // Set unbusy and notify later by hand, since we might free the page. 2054 2055 bool success = true; 2056 2057 if (result == B_OK) { 2058 // put it into the active/inactive queue 2059 move_page_to_appropriate_queue(fPage); 2060 fPage->busy_writing = false; 2061 DEBUG_PAGE_ACCESS_END(fPage); 2062 } else { 2063 // Writing the page failed. One reason would be that the cache has been 2064 // shrunk and the page does no longer belong to the file. Otherwise the 2065 // actual I/O failed, in which case we'll simply keep the page modified. 2066 2067 if (!fPage->busy_writing) { 2068 // The busy_writing flag was cleared. That means the cache has been 2069 // shrunk while we were trying to write the page and we have to free 2070 // it now. 2071 vm_remove_all_page_mappings(fPage); 2072 // TODO: Unmapping should already happen when resizing the cache! 2073 fCache->RemovePage(fPage); 2074 free_page(fPage, false); 2075 unreserve_pages(1); 2076 } else { 2077 // Writing the page failed -- mark the page modified and move it to 2078 // an appropriate queue other than the modified queue, so we don't 2079 // keep trying to write it over and over again. We keep 2080 // non-temporary pages in the modified queue, though, so they don't 2081 // get lost in the inactive queue. 2082 dprintf("PageWriteWrapper: Failed to write page %p: %s\n", fPage, 2083 strerror(result)); 2084 2085 fPage->modified = true; 2086 if (!fCache->temporary) 2087 set_page_state(fPage, PAGE_STATE_MODIFIED); 2088 else if (fPage->IsMapped()) 2089 set_page_state(fPage, PAGE_STATE_ACTIVE); 2090 else 2091 set_page_state(fPage, PAGE_STATE_INACTIVE); 2092 2093 fPage->busy_writing = false; 2094 DEBUG_PAGE_ACCESS_END(fPage); 2095 2096 success = false; 2097 } 2098 } 2099 2100 fCache->NotifyPageEvents(fPage, PAGE_EVENT_NOT_BUSY); 2101 fIsActive = false; 2102 2103 return success; 2104 } 2105 2106 2107 /*! The page's cache must be locked. 2108 */ 2109 void 2110 PageWriteTransfer::SetTo(PageWriterRun* run, vm_page* page, int32 maxPages) 2111 { 2112 fRun = run; 2113 fCache = page->Cache(); 2114 fOffset = page->cache_offset; 2115 fPageCount = 1; 2116 fMaxPages = maxPages; 2117 fStatus = B_OK; 2118 2119 fVecs[0].base = (phys_addr_t)page->physical_page_number << PAGE_SHIFT; 2120 fVecs[0].length = B_PAGE_SIZE; 2121 fVecCount = 1; 2122 } 2123 2124 2125 /*! The page's cache must be locked. 2126 */ 2127 bool 2128 PageWriteTransfer::AddPage(vm_page* page) 2129 { 2130 if (page->Cache() != fCache 2131 || (fMaxPages >= 0 && fPageCount >= (uint32)fMaxPages)) 2132 return false; 2133 2134 phys_addr_t nextBase = fVecs[fVecCount - 1].base 2135 + fVecs[fVecCount - 1].length; 2136 2137 if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase 2138 && (off_t)page->cache_offset == fOffset + fPageCount) { 2139 // append to last iovec 2140 fVecs[fVecCount - 1].length += B_PAGE_SIZE; 2141 fPageCount++; 2142 return true; 2143 } 2144 2145 nextBase = fVecs[0].base - B_PAGE_SIZE; 2146 if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase 2147 && (off_t)page->cache_offset == fOffset - 1) { 2148 // prepend to first iovec and adjust offset 2149 fVecs[0].base = nextBase; 2150 fVecs[0].length += B_PAGE_SIZE; 2151 fOffset = page->cache_offset; 2152 fPageCount++; 2153 return true; 2154 } 2155 2156 if (((off_t)page->cache_offset == fOffset + fPageCount 2157 || (off_t)page->cache_offset == fOffset - 1) 2158 && fVecCount < sizeof(fVecs) / sizeof(fVecs[0])) { 2159 // not physically contiguous or not in the right order 2160 uint32 vectorIndex; 2161 if ((off_t)page->cache_offset < fOffset) { 2162 // we are pre-pending another vector, move the other vecs 2163 for (uint32 i = fVecCount; i > 0; i--) 2164 fVecs[i] = fVecs[i - 1]; 2165 2166 fOffset = page->cache_offset; 2167 vectorIndex = 0; 2168 } else 2169 vectorIndex = fVecCount; 2170 2171 fVecs[vectorIndex].base 2172 = (phys_addr_t)page->physical_page_number << PAGE_SHIFT; 2173 fVecs[vectorIndex].length = B_PAGE_SIZE; 2174 2175 fVecCount++; 2176 fPageCount++; 2177 return true; 2178 } 2179 2180 return false; 2181 } 2182 2183 2184 status_t 2185 PageWriteTransfer::Schedule(uint32 flags) 2186 { 2187 off_t writeOffset = (off_t)fOffset << PAGE_SHIFT; 2188 generic_size_t writeLength = (phys_size_t)fPageCount << PAGE_SHIFT; 2189 2190 if (fRun != NULL) { 2191 return fCache->WriteAsync(writeOffset, fVecs, fVecCount, writeLength, 2192 flags | B_PHYSICAL_IO_REQUEST, this); 2193 } 2194 2195 status_t status = fCache->Write(writeOffset, fVecs, fVecCount, 2196 flags | B_PHYSICAL_IO_REQUEST, &writeLength); 2197 2198 SetStatus(status, writeLength); 2199 return fStatus; 2200 } 2201 2202 2203 void 2204 PageWriteTransfer::SetStatus(status_t status, size_t transferred) 2205 { 2206 // only succeed if all pages up to the last one have been written fully 2207 // and the last page has at least been written partially 2208 if (status == B_OK && transferred <= (fPageCount - 1) * B_PAGE_SIZE) 2209 status = B_ERROR; 2210 2211 fStatus = status; 2212 } 2213 2214 2215 void 2216 PageWriteTransfer::IOFinished(status_t status, bool partialTransfer, 2217 generic_size_t bytesTransferred) 2218 { 2219 SetStatus(status, bytesTransferred); 2220 fRun->PageWritten(this, fStatus, partialTransfer, bytesTransferred); 2221 } 2222 2223 2224 status_t 2225 PageWriterRun::Init(uint32 maxPages) 2226 { 2227 fMaxPages = maxPages; 2228 fWrapperCount = 0; 2229 fTransferCount = 0; 2230 fPendingTransfers = 0; 2231 2232 fWrappers = new(std::nothrow) PageWriteWrapper[maxPages]; 2233 fTransfers = new(std::nothrow) PageWriteTransfer[maxPages]; 2234 if (fWrappers == NULL || fTransfers == NULL) 2235 return B_NO_MEMORY; 2236 2237 return B_OK; 2238 } 2239 2240 2241 void 2242 PageWriterRun::PrepareNextRun() 2243 { 2244 fWrapperCount = 0; 2245 fTransferCount = 0; 2246 fPendingTransfers = 0; 2247 } 2248 2249 2250 /*! The page's cache must be locked. 2251 */ 2252 void 2253 PageWriterRun::AddPage(vm_page* page) 2254 { 2255 fWrappers[fWrapperCount++].SetTo(page); 2256 2257 if (fTransferCount == 0 || !fTransfers[fTransferCount - 1].AddPage(page)) { 2258 fTransfers[fTransferCount++].SetTo(this, page, 2259 page->Cache()->MaxPagesPerAsyncWrite()); 2260 } 2261 } 2262 2263 2264 /*! Writes all pages previously added. 2265 \return The number of pages that could not be written or otherwise handled. 2266 */ 2267 uint32 2268 PageWriterRun::Go() 2269 { 2270 atomic_set(&fPendingTransfers, fTransferCount); 2271 2272 fAllFinishedCondition.Init(this, "page writer wait for I/O"); 2273 ConditionVariableEntry waitEntry; 2274 fAllFinishedCondition.Add(&waitEntry); 2275 2276 // schedule writes 2277 for (uint32 i = 0; i < fTransferCount; i++) 2278 fTransfers[i].Schedule(B_VIP_IO_REQUEST); 2279 2280 // wait until all pages have been written 2281 waitEntry.Wait(); 2282 2283 // mark pages depending on whether they could be written or not 2284 2285 uint32 failedPages = 0; 2286 uint32 wrapperIndex = 0; 2287 for (uint32 i = 0; i < fTransferCount; i++) { 2288 PageWriteTransfer& transfer = fTransfers[i]; 2289 transfer.Cache()->Lock(); 2290 2291 for (uint32 j = 0; j < transfer.PageCount(); j++) { 2292 if (!fWrappers[wrapperIndex++].Done(transfer.Status())) 2293 failedPages++; 2294 } 2295 2296 transfer.Cache()->Unlock(); 2297 } 2298 2299 ASSERT(wrapperIndex == fWrapperCount); 2300 2301 for (uint32 i = 0; i < fTransferCount; i++) { 2302 PageWriteTransfer& transfer = fTransfers[i]; 2303 struct VMCache* cache = transfer.Cache(); 2304 2305 // We've acquired a references for each page 2306 for (uint32 j = 0; j < transfer.PageCount(); j++) { 2307 // We release the cache references after all pages were made 2308 // unbusy again - otherwise releasing a vnode could deadlock. 2309 cache->ReleaseStoreRef(); 2310 cache->ReleaseRef(); 2311 } 2312 } 2313 2314 return failedPages; 2315 } 2316 2317 2318 void 2319 PageWriterRun::PageWritten(PageWriteTransfer* transfer, status_t status, 2320 bool partialTransfer, size_t bytesTransferred) 2321 { 2322 if (atomic_add(&fPendingTransfers, -1) == 1) 2323 fAllFinishedCondition.NotifyAll(); 2324 } 2325 2326 2327 /*! The page writer continuously takes some pages from the modified 2328 queue, writes them back, and moves them back to the active queue. 2329 It runs in its own thread, and is only there to keep the number 2330 of modified pages low, so that more pages can be reused with 2331 fewer costs. 2332 */ 2333 status_t 2334 page_writer(void* /*unused*/) 2335 { 2336 const uint32 kNumPages = 256; 2337 #ifdef TRACE_VM_PAGE 2338 uint32 writtenPages = 0; 2339 bigtime_t lastWrittenTime = 0; 2340 bigtime_t pageCollectionTime = 0; 2341 bigtime_t pageWritingTime = 0; 2342 #endif 2343 2344 PageWriterRun run; 2345 if (run.Init(kNumPages) != B_OK) { 2346 panic("page writer: Failed to init PageWriterRun!"); 2347 return B_ERROR; 2348 } 2349 2350 page_num_t pagesSinceLastSuccessfulWrite = 0; 2351 2352 while (true) { 2353 // TODO: Maybe wait shorter when memory is low! 2354 if (sModifiedPageQueue.Count() < kNumPages) { 2355 sPageWriterCondition.Wait(3000000, true); 2356 // all 3 seconds when no one triggers us 2357 } 2358 2359 page_num_t modifiedPages = sModifiedPageQueue.Count(); 2360 if (modifiedPages == 0) 2361 continue; 2362 2363 if (modifiedPages <= pagesSinceLastSuccessfulWrite) { 2364 // We ran through the whole queue without being able to write a 2365 // single page. Take a break. 2366 snooze(500000); 2367 pagesSinceLastSuccessfulWrite = 0; 2368 } 2369 2370 #if ENABLE_SWAP_SUPPORT 2371 page_stats pageStats; 2372 get_page_stats(pageStats); 2373 bool activePaging = do_active_paging(pageStats); 2374 #endif 2375 2376 // depending on how urgent it becomes to get pages to disk, we adjust 2377 // our I/O priority 2378 uint32 lowPagesState = low_resource_state(B_KERNEL_RESOURCE_PAGES); 2379 int32 ioPriority = B_IDLE_PRIORITY; 2380 if (lowPagesState >= B_LOW_RESOURCE_CRITICAL 2381 || modifiedPages > MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD) { 2382 ioPriority = MAX_PAGE_WRITER_IO_PRIORITY; 2383 } else { 2384 ioPriority = (uint64)MAX_PAGE_WRITER_IO_PRIORITY * modifiedPages 2385 / MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD; 2386 } 2387 2388 thread_set_io_priority(ioPriority); 2389 2390 uint32 numPages = 0; 2391 run.PrepareNextRun(); 2392 2393 // TODO: make this laptop friendly, too (ie. only start doing 2394 // something if someone else did something or there is really 2395 // enough to do). 2396 2397 // collect pages to be written 2398 #ifdef TRACE_VM_PAGE 2399 pageCollectionTime -= system_time(); 2400 #endif 2401 2402 page_num_t maxPagesToSee = modifiedPages; 2403 2404 while (numPages < kNumPages && maxPagesToSee > 0) { 2405 vm_page *page = next_modified_page(maxPagesToSee); 2406 if (page == NULL) 2407 break; 2408 2409 PageCacheLocker cacheLocker(page, false); 2410 if (!cacheLocker.IsLocked()) 2411 continue; 2412 2413 VMCache *cache = page->Cache(); 2414 2415 // If the page is busy or its state has changed while we were 2416 // locking the cache, just ignore it. 2417 if (page->busy || page->State() != PAGE_STATE_MODIFIED) 2418 continue; 2419 2420 DEBUG_PAGE_ACCESS_START(page); 2421 2422 // Don't write back wired (locked) pages. 2423 if (page->WiredCount() > 0) { 2424 set_page_state(page, PAGE_STATE_ACTIVE); 2425 DEBUG_PAGE_ACCESS_END(page); 2426 continue; 2427 } 2428 2429 // Write back temporary pages only when we're actively paging. 2430 if (cache->temporary 2431 #if ENABLE_SWAP_SUPPORT 2432 && (!activePaging 2433 || !cache->CanWritePage( 2434 (off_t)page->cache_offset << PAGE_SHIFT)) 2435 #endif 2436 ) { 2437 // We can't/don't want to do anything with this page, so move it 2438 // to one of the other queues. 2439 if (page->mappings.IsEmpty()) 2440 set_page_state(page, PAGE_STATE_INACTIVE); 2441 else 2442 set_page_state(page, PAGE_STATE_ACTIVE); 2443 2444 DEBUG_PAGE_ACCESS_END(page); 2445 continue; 2446 } 2447 2448 // We need our own reference to the store, as it might currently be 2449 // destroyed. 2450 if (cache->AcquireUnreferencedStoreRef() != B_OK) { 2451 DEBUG_PAGE_ACCESS_END(page); 2452 cacheLocker.Unlock(); 2453 thread_yield(); 2454 continue; 2455 } 2456 2457 run.AddPage(page); 2458 // TODO: We're possibly adding pages of different caches and 2459 // thus maybe of different underlying file systems here. This 2460 // is a potential problem for loop file systems/devices, since 2461 // we could mark a page busy that would need to be accessed 2462 // when writing back another page, thus causing a deadlock. 2463 2464 DEBUG_PAGE_ACCESS_END(page); 2465 2466 //dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count); 2467 TPW(WritePage(page)); 2468 2469 cache->AcquireRefLocked(); 2470 numPages++; 2471 } 2472 2473 #ifdef TRACE_VM_PAGE 2474 pageCollectionTime += system_time(); 2475 #endif 2476 if (numPages == 0) 2477 continue; 2478 2479 // write pages to disk and do all the cleanup 2480 #ifdef TRACE_VM_PAGE 2481 pageWritingTime -= system_time(); 2482 #endif 2483 uint32 failedPages = run.Go(); 2484 #ifdef TRACE_VM_PAGE 2485 pageWritingTime += system_time(); 2486 2487 // debug output only... 2488 writtenPages += numPages; 2489 if (writtenPages >= 1024) { 2490 bigtime_t now = system_time(); 2491 TRACE(("page writer: wrote 1024 pages (total: %" B_PRIu64 " ms, " 2492 "collect: %" B_PRIu64 " ms, write: %" B_PRIu64 " ms)\n", 2493 (now - lastWrittenTime) / 1000, 2494 pageCollectionTime / 1000, pageWritingTime / 1000)); 2495 lastWrittenTime = now; 2496 2497 writtenPages -= 1024; 2498 pageCollectionTime = 0; 2499 pageWritingTime = 0; 2500 } 2501 #endif 2502 2503 if (failedPages == numPages) 2504 pagesSinceLastSuccessfulWrite += modifiedPages - maxPagesToSee; 2505 else 2506 pagesSinceLastSuccessfulWrite = 0; 2507 } 2508 2509 return B_OK; 2510 } 2511 2512 2513 // #pragma mark - 2514 2515 2516 // TODO: This should be done in the page daemon! 2517 #if 0 2518 #if ENABLE_SWAP_SUPPORT 2519 static bool 2520 free_page_swap_space(int32 index) 2521 { 2522 vm_page *page = vm_page_at_index(index); 2523 PageCacheLocker locker(page); 2524 if (!locker.IsLocked()) 2525 return false; 2526 2527 DEBUG_PAGE_ACCESS_START(page); 2528 2529 VMCache* cache = page->Cache(); 2530 if (cache->temporary && page->WiredCount() == 0 2531 && cache->HasPage(page->cache_offset << PAGE_SHIFT) 2532 && page->usage_count > 0) { 2533 // TODO: how to judge a page is highly active? 2534 if (swap_free_page_swap_space(page)) { 2535 // We need to mark the page modified, since otherwise it could be 2536 // stolen and we'd lose its data. 2537 vm_page_set_state(page, PAGE_STATE_MODIFIED); 2538 TD(FreedPageSwap(page)); 2539 DEBUG_PAGE_ACCESS_END(page); 2540 return true; 2541 } 2542 } 2543 DEBUG_PAGE_ACCESS_END(page); 2544 return false; 2545 } 2546 #endif 2547 #endif // 0 2548 2549 2550 static vm_page * 2551 find_cached_page_candidate(struct vm_page &marker) 2552 { 2553 DEBUG_PAGE_ACCESS_CHECK(&marker); 2554 2555 InterruptsSpinLocker locker(sCachedPageQueue.GetLock()); 2556 vm_page *page; 2557 2558 if (marker.State() == PAGE_STATE_UNUSED) { 2559 // Get the first free pages of the (in)active queue 2560 page = sCachedPageQueue.Head(); 2561 } else { 2562 // Get the next page of the current queue 2563 if (marker.State() != PAGE_STATE_CACHED) { 2564 panic("invalid marker %p state", &marker); 2565 return NULL; 2566 } 2567 2568 page = sCachedPageQueue.Next(&marker); 2569 sCachedPageQueue.Remove(&marker); 2570 marker.SetState(PAGE_STATE_UNUSED); 2571 } 2572 2573 while (page != NULL) { 2574 if (!page->busy) { 2575 // we found a candidate, insert marker 2576 marker.SetState(PAGE_STATE_CACHED); 2577 sCachedPageQueue.InsertAfter(page, &marker); 2578 return page; 2579 } 2580 2581 page = sCachedPageQueue.Next(page); 2582 } 2583 2584 return NULL; 2585 } 2586 2587 2588 static bool 2589 free_cached_page(vm_page *page, bool dontWait) 2590 { 2591 // try to lock the page's cache 2592 if (vm_cache_acquire_locked_page_cache(page, dontWait) == NULL) 2593 return false; 2594 VMCache* cache = page->Cache(); 2595 2596 AutoLocker<VMCache> cacheLocker(cache, true); 2597 MethodDeleter<VMCache, void, &VMCache::ReleaseRefLocked> _2(cache); 2598 2599 // check again if that page is still a candidate 2600 if (page->busy || page->State() != PAGE_STATE_CACHED) 2601 return false; 2602 2603 DEBUG_PAGE_ACCESS_START(page); 2604 2605 PAGE_ASSERT(page, !page->IsMapped()); 2606 PAGE_ASSERT(page, !page->modified); 2607 2608 // we can now steal this page 2609 2610 cache->RemovePage(page); 2611 // Now the page doesn't have cache anymore, so no one else (e.g. 2612 // vm_page_allocate_page_run() can pick it up), since they would be 2613 // required to lock the cache first, which would fail. 2614 2615 sCachedPageQueue.RemoveUnlocked(page); 2616 return true; 2617 } 2618 2619 2620 static uint32 2621 free_cached_pages(uint32 pagesToFree, bool dontWait) 2622 { 2623 vm_page marker; 2624 init_page_marker(marker); 2625 2626 uint32 pagesFreed = 0; 2627 2628 while (pagesFreed < pagesToFree) { 2629 vm_page *page = find_cached_page_candidate(marker); 2630 if (page == NULL) 2631 break; 2632 2633 if (free_cached_page(page, dontWait)) { 2634 ReadLocker locker(sFreePageQueuesLock); 2635 page->SetState(PAGE_STATE_FREE); 2636 DEBUG_PAGE_ACCESS_END(page); 2637 sFreePageQueue.PrependUnlocked(page); 2638 locker.Unlock(); 2639 2640 TA(StolenPage()); 2641 2642 pagesFreed++; 2643 } 2644 } 2645 2646 remove_page_marker(marker); 2647 2648 sFreePageCondition.NotifyAll(); 2649 2650 return pagesFreed; 2651 } 2652 2653 2654 static void 2655 idle_scan_active_pages(page_stats& pageStats) 2656 { 2657 VMPageQueue& queue = sActivePageQueue; 2658 2659 // We want to scan the whole queue in roughly kIdleRunsForFullQueue runs. 2660 uint32 maxToScan = queue.Count() / kIdleRunsForFullQueue + 1; 2661 2662 while (maxToScan > 0) { 2663 maxToScan--; 2664 2665 // Get the next page. Note that we don't bother to lock here. We go with 2666 // the assumption that on all architectures reading/writing pointers is 2667 // atomic. Beyond that it doesn't really matter. We have to unlock the 2668 // queue anyway to lock the page's cache, and we'll recheck afterwards. 2669 vm_page* page = queue.Head(); 2670 if (page == NULL) 2671 break; 2672 2673 // lock the page's cache 2674 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2675 if (cache == NULL) 2676 continue; 2677 2678 if (page->State() != PAGE_STATE_ACTIVE) { 2679 // page is no longer in the cache or in this queue 2680 cache->ReleaseRefAndUnlock(); 2681 continue; 2682 } 2683 2684 if (page->busy) { 2685 // page is busy -- requeue at the end 2686 vm_page_requeue(page, true); 2687 cache->ReleaseRefAndUnlock(); 2688 continue; 2689 } 2690 2691 DEBUG_PAGE_ACCESS_START(page); 2692 2693 // Get the page active/modified flags and update the page's usage count. 2694 // We completely unmap inactive temporary pages. This saves us to 2695 // iterate through the inactive list as well, since we'll be notified 2696 // via page fault whenever such an inactive page is used again. 2697 // We don't remove the mappings of non-temporary pages, since we 2698 // wouldn't notice when those would become unused and could thus be 2699 // moved to the cached list. 2700 int32 usageCount; 2701 if (page->WiredCount() > 0 || page->usage_count > 0 2702 || !cache->temporary) { 2703 usageCount = vm_clear_page_mapping_accessed_flags(page); 2704 } else 2705 usageCount = vm_remove_all_page_mappings_if_unaccessed(page); 2706 2707 if (usageCount > 0) { 2708 usageCount += page->usage_count + kPageUsageAdvance; 2709 if (usageCount > kPageUsageMax) 2710 usageCount = kPageUsageMax; 2711 // TODO: This would probably also be the place to reclaim swap space. 2712 } else { 2713 usageCount += page->usage_count - (int32)kPageUsageDecline; 2714 if (usageCount < 0) { 2715 usageCount = 0; 2716 set_page_state(page, PAGE_STATE_INACTIVE); 2717 } 2718 } 2719 2720 page->usage_count = usageCount; 2721 2722 DEBUG_PAGE_ACCESS_END(page); 2723 2724 cache->ReleaseRefAndUnlock(); 2725 } 2726 } 2727 2728 2729 static void 2730 full_scan_inactive_pages(page_stats& pageStats, int32 despairLevel) 2731 { 2732 int32 pagesToFree = pageStats.unsatisfiedReservations 2733 + sFreeOrCachedPagesTarget 2734 - (pageStats.totalFreePages + pageStats.cachedPages); 2735 if (pagesToFree <= 0) 2736 return; 2737 2738 bigtime_t time = system_time(); 2739 uint32 pagesScanned = 0; 2740 uint32 pagesToCached = 0; 2741 uint32 pagesToModified = 0; 2742 uint32 pagesToActive = 0; 2743 2744 // Determine how many pages at maximum to send to the modified queue. Since 2745 // it is relatively expensive to page out pages, we do that on a grander 2746 // scale only when things get desperate. 2747 uint32 maxToFlush = despairLevel <= 1 ? 32 : 10000; 2748 2749 vm_page marker; 2750 init_page_marker(marker); 2751 2752 VMPageQueue& queue = sInactivePageQueue; 2753 InterruptsSpinLocker queueLocker(queue.GetLock()); 2754 uint32 maxToScan = queue.Count(); 2755 2756 vm_page* nextPage = queue.Head(); 2757 2758 while (pagesToFree > 0 && maxToScan > 0) { 2759 maxToScan--; 2760 2761 // get the next page 2762 vm_page* page = nextPage; 2763 if (page == NULL) 2764 break; 2765 nextPage = queue.Next(page); 2766 2767 if (page->busy) 2768 continue; 2769 2770 // mark the position 2771 queue.InsertAfter(page, &marker); 2772 queueLocker.Unlock(); 2773 2774 // lock the page's cache 2775 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2776 if (cache == NULL || page->busy 2777 || page->State() != PAGE_STATE_INACTIVE) { 2778 if (cache != NULL) 2779 cache->ReleaseRefAndUnlock(); 2780 queueLocker.Lock(); 2781 nextPage = queue.Next(&marker); 2782 queue.Remove(&marker); 2783 continue; 2784 } 2785 2786 pagesScanned++; 2787 2788 DEBUG_PAGE_ACCESS_START(page); 2789 2790 // Get the accessed count, clear the accessed/modified flags and 2791 // unmap the page, if it hasn't been accessed. 2792 int32 usageCount; 2793 if (page->WiredCount() > 0) 2794 usageCount = vm_clear_page_mapping_accessed_flags(page); 2795 else 2796 usageCount = vm_remove_all_page_mappings_if_unaccessed(page); 2797 2798 // update usage count 2799 if (usageCount > 0) { 2800 usageCount += page->usage_count + kPageUsageAdvance; 2801 if (usageCount > kPageUsageMax) 2802 usageCount = kPageUsageMax; 2803 } else { 2804 usageCount += page->usage_count - (int32)kPageUsageDecline; 2805 if (usageCount < 0) 2806 usageCount = 0; 2807 } 2808 2809 page->usage_count = usageCount; 2810 2811 // Move to fitting queue or requeue: 2812 // * Active mapped pages go to the active queue. 2813 // * Inactive mapped (i.e. wired) pages are requeued. 2814 // * The remaining pages are cachable. Thus, if unmodified they go to 2815 // the cached queue, otherwise to the modified queue (up to a limit). 2816 // Note that until in the idle scanning we don't exempt pages of 2817 // temporary caches. Apparently we really need memory, so we better 2818 // page out memory as well. 2819 bool isMapped = page->IsMapped(); 2820 if (usageCount > 0) { 2821 if (isMapped) { 2822 set_page_state(page, PAGE_STATE_ACTIVE); 2823 pagesToActive++; 2824 } else 2825 vm_page_requeue(page, true); 2826 } else if (isMapped) { 2827 vm_page_requeue(page, true); 2828 } else if (!page->modified) { 2829 set_page_state(page, PAGE_STATE_CACHED); 2830 pagesToFree--; 2831 pagesToCached++; 2832 } else if (maxToFlush > 0) { 2833 set_page_state(page, PAGE_STATE_MODIFIED); 2834 maxToFlush--; 2835 pagesToModified++; 2836 } else 2837 vm_page_requeue(page, true); 2838 2839 DEBUG_PAGE_ACCESS_END(page); 2840 2841 cache->ReleaseRefAndUnlock(); 2842 2843 // remove the marker 2844 queueLocker.Lock(); 2845 nextPage = queue.Next(&marker); 2846 queue.Remove(&marker); 2847 } 2848 2849 queueLocker.Unlock(); 2850 2851 time = system_time() - time; 2852 TRACE_DAEMON(" -> inactive scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32 2853 ", moved: %" B_PRIu32 " -> cached, %" B_PRIu32 " -> modified, %" 2854 B_PRIu32 " -> active\n", time, pagesScanned, pagesToCached, 2855 pagesToModified, pagesToActive); 2856 2857 // wake up the page writer, if we tossed it some pages 2858 if (pagesToModified > 0) 2859 sPageWriterCondition.WakeUp(); 2860 } 2861 2862 2863 static void 2864 full_scan_active_pages(page_stats& pageStats, int32 despairLevel) 2865 { 2866 vm_page marker; 2867 init_page_marker(marker); 2868 2869 VMPageQueue& queue = sActivePageQueue; 2870 InterruptsSpinLocker queueLocker(queue.GetLock()); 2871 uint32 maxToScan = queue.Count(); 2872 2873 int32 pagesToDeactivate = pageStats.unsatisfiedReservations 2874 + sFreeOrCachedPagesTarget 2875 - (pageStats.totalFreePages + pageStats.cachedPages) 2876 + std::max((int32)sInactivePagesTarget - (int32)maxToScan, (int32)0); 2877 if (pagesToDeactivate <= 0) 2878 return; 2879 2880 bigtime_t time = system_time(); 2881 uint32 pagesAccessed = 0; 2882 uint32 pagesToInactive = 0; 2883 uint32 pagesScanned = 0; 2884 2885 vm_page* nextPage = queue.Head(); 2886 2887 while (pagesToDeactivate > 0 && maxToScan > 0) { 2888 maxToScan--; 2889 2890 // get the next page 2891 vm_page* page = nextPage; 2892 if (page == NULL) 2893 break; 2894 nextPage = queue.Next(page); 2895 2896 if (page->busy) 2897 continue; 2898 2899 // mark the position 2900 queue.InsertAfter(page, &marker); 2901 queueLocker.Unlock(); 2902 2903 // lock the page's cache 2904 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2905 if (cache == NULL || page->busy || page->State() != PAGE_STATE_ACTIVE) { 2906 if (cache != NULL) 2907 cache->ReleaseRefAndUnlock(); 2908 queueLocker.Lock(); 2909 nextPage = queue.Next(&marker); 2910 queue.Remove(&marker); 2911 continue; 2912 } 2913 2914 pagesScanned++; 2915 2916 DEBUG_PAGE_ACCESS_START(page); 2917 2918 // Get the page active/modified flags and update the page's usage count. 2919 int32 usageCount = vm_clear_page_mapping_accessed_flags(page); 2920 2921 if (usageCount > 0) { 2922 usageCount += page->usage_count + kPageUsageAdvance; 2923 if (usageCount > kPageUsageMax) 2924 usageCount = kPageUsageMax; 2925 pagesAccessed++; 2926 // TODO: This would probably also be the place to reclaim swap space. 2927 } else { 2928 usageCount += page->usage_count - (int32)kPageUsageDecline; 2929 if (usageCount <= 0) { 2930 usageCount = 0; 2931 set_page_state(page, PAGE_STATE_INACTIVE); 2932 pagesToInactive++; 2933 } 2934 } 2935 2936 page->usage_count = usageCount; 2937 2938 DEBUG_PAGE_ACCESS_END(page); 2939 2940 cache->ReleaseRefAndUnlock(); 2941 2942 // remove the marker 2943 queueLocker.Lock(); 2944 nextPage = queue.Next(&marker); 2945 queue.Remove(&marker); 2946 } 2947 2948 time = system_time() - time; 2949 TRACE_DAEMON(" -> active scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32 2950 ", moved: %" B_PRIu32 " -> inactive, encountered %" B_PRIu32 " accessed" 2951 " ones\n", time, pagesScanned, pagesToInactive, pagesAccessed); 2952 } 2953 2954 2955 static void 2956 page_daemon_idle_scan(page_stats& pageStats) 2957 { 2958 TRACE_DAEMON("page daemon: idle run\n"); 2959 2960 if (pageStats.totalFreePages < (int32)sFreePagesTarget) { 2961 // We want more actually free pages, so free some from the cached 2962 // ones. 2963 uint32 freed = free_cached_pages( 2964 sFreePagesTarget - pageStats.totalFreePages, false); 2965 if (freed > 0) 2966 unreserve_pages(freed); 2967 get_page_stats(pageStats); 2968 } 2969 2970 // Walk the active list and move pages to the inactive queue. 2971 get_page_stats(pageStats); 2972 idle_scan_active_pages(pageStats); 2973 } 2974 2975 2976 static void 2977 page_daemon_full_scan(page_stats& pageStats, int32 despairLevel) 2978 { 2979 TRACE_DAEMON("page daemon: full run: free: %" B_PRIu32 ", cached: %" 2980 B_PRIu32 ", to free: %" B_PRIu32 "\n", pageStats.totalFreePages, 2981 pageStats.cachedPages, pageStats.unsatisfiedReservations 2982 + sFreeOrCachedPagesTarget 2983 - (pageStats.totalFreePages + pageStats.cachedPages)); 2984 2985 // Walk the inactive list and transfer pages to the cached and modified 2986 // queues. 2987 full_scan_inactive_pages(pageStats, despairLevel); 2988 2989 // Free cached pages. Also wake up reservation waiters. 2990 get_page_stats(pageStats); 2991 int32 pagesToFree = pageStats.unsatisfiedReservations + sFreePagesTarget 2992 - (pageStats.totalFreePages); 2993 if (pagesToFree > 0) { 2994 uint32 freed = free_cached_pages(pagesToFree, true); 2995 if (freed > 0) 2996 unreserve_pages(freed); 2997 } 2998 2999 // Walk the active list and move pages to the inactive queue. 3000 get_page_stats(pageStats); 3001 full_scan_active_pages(pageStats, despairLevel); 3002 } 3003 3004 3005 static status_t 3006 page_daemon(void* /*unused*/) 3007 { 3008 int32 despairLevel = 0; 3009 3010 while (true) { 3011 sPageDaemonCondition.ClearActivated(); 3012 3013 // evaluate the free pages situation 3014 page_stats pageStats; 3015 get_page_stats(pageStats); 3016 3017 if (!do_active_paging(pageStats)) { 3018 // Things look good -- just maintain statistics and keep the pool 3019 // of actually free pages full enough. 3020 despairLevel = 0; 3021 page_daemon_idle_scan(pageStats); 3022 sPageDaemonCondition.Wait(kIdleScanWaitInterval, false); 3023 } else { 3024 // Not enough free pages. We need to do some real work. 3025 despairLevel = std::max(despairLevel + 1, (int32)3); 3026 page_daemon_full_scan(pageStats, despairLevel); 3027 3028 // Don't wait after the first full scan, but rather immediately 3029 // check whether we were successful in freeing enough pages and 3030 // re-run with increased despair level. The first scan is 3031 // conservative with respect to moving inactive modified pages to 3032 // the modified list to avoid thrashing. The second scan, however, 3033 // will not hold back. 3034 if (despairLevel > 1) 3035 snooze(kBusyScanWaitInterval); 3036 } 3037 } 3038 3039 return B_OK; 3040 } 3041 3042 3043 /*! Returns how many pages could *not* be reserved. 3044 */ 3045 static uint32 3046 reserve_pages(uint32 count, int priority, bool dontWait) 3047 { 3048 int32 dontTouch = kPageReserveForPriority[priority]; 3049 3050 while (true) { 3051 count -= reserve_some_pages(count, dontTouch); 3052 if (count == 0) 3053 return 0; 3054 3055 if (sUnsatisfiedPageReservations == 0) { 3056 count -= free_cached_pages(count, dontWait); 3057 if (count == 0) 3058 return count; 3059 } 3060 3061 if (dontWait) 3062 return count; 3063 3064 // we need to wait for pages to become available 3065 3066 MutexLocker pageDeficitLocker(sPageDeficitLock); 3067 3068 bool notifyDaemon = sUnsatisfiedPageReservations == 0; 3069 sUnsatisfiedPageReservations += count; 3070 3071 if (atomic_get(&sUnreservedFreePages) > dontTouch) { 3072 // the situation changed 3073 sUnsatisfiedPageReservations -= count; 3074 continue; 3075 } 3076 3077 PageReservationWaiter waiter; 3078 waiter.dontTouch = dontTouch; 3079 waiter.missing = count; 3080 waiter.thread = thread_get_current_thread(); 3081 waiter.threadPriority = waiter.thread->priority; 3082 3083 // insert ordered (i.e. after all waiters with higher or equal priority) 3084 PageReservationWaiter* otherWaiter = NULL; 3085 for (PageReservationWaiterList::Iterator it 3086 = sPageReservationWaiters.GetIterator(); 3087 (otherWaiter = it.Next()) != NULL;) { 3088 if (waiter < *otherWaiter) 3089 break; 3090 } 3091 3092 sPageReservationWaiters.InsertBefore(otherWaiter, &waiter); 3093 3094 thread_prepare_to_block(waiter.thread, 0, THREAD_BLOCK_TYPE_OTHER, 3095 "waiting for pages"); 3096 3097 if (notifyDaemon) 3098 sPageDaemonCondition.WakeUp(); 3099 3100 pageDeficitLocker.Unlock(); 3101 3102 low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0); 3103 thread_block(); 3104 3105 pageDeficitLocker.Lock(); 3106 3107 return 0; 3108 } 3109 } 3110 3111 3112 // #pragma mark - private kernel API 3113 3114 3115 /*! Writes a range of modified pages of a cache to disk. 3116 You need to hold the VMCache lock when calling this function. 3117 Note that the cache lock is released in this function. 3118 \param cache The cache. 3119 \param firstPage Offset (in page size units) of the first page in the range. 3120 \param endPage End offset (in page size units) of the page range. The page 3121 at this offset is not included. 3122 */ 3123 status_t 3124 vm_page_write_modified_page_range(struct VMCache* cache, uint32 firstPage, 3125 uint32 endPage) 3126 { 3127 static const int32 kMaxPages = 256; 3128 int32 maxPages = cache->MaxPagesPerWrite(); 3129 if (maxPages < 0 || maxPages > kMaxPages) 3130 maxPages = kMaxPages; 3131 3132 const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 3133 | HEAP_DONT_LOCK_KERNEL_SPACE; 3134 3135 PageWriteWrapper stackWrappersPool[2]; 3136 PageWriteWrapper* stackWrappers[1]; 3137 PageWriteWrapper* wrapperPool 3138 = new(malloc_flags(allocationFlags)) PageWriteWrapper[maxPages + 1]; 3139 PageWriteWrapper** wrappers 3140 = new(malloc_flags(allocationFlags)) PageWriteWrapper*[maxPages]; 3141 if (wrapperPool == NULL || wrappers == NULL) { 3142 // don't fail, just limit our capabilities 3143 delete[] wrapperPool; 3144 delete[] wrappers; 3145 wrapperPool = stackWrappersPool; 3146 wrappers = stackWrappers; 3147 maxPages = 1; 3148 } 3149 3150 int32 nextWrapper = 0; 3151 int32 usedWrappers = 0; 3152 3153 PageWriteTransfer transfer; 3154 bool transferEmpty = true; 3155 3156 VMCachePagesTree::Iterator it 3157 = cache->pages.GetIterator(firstPage, true, true); 3158 3159 while (true) { 3160 vm_page* page = it.Next(); 3161 if (page == NULL || page->cache_offset >= endPage) { 3162 if (transferEmpty) 3163 break; 3164 3165 page = NULL; 3166 } 3167 3168 if (page != NULL) { 3169 if (page->busy 3170 || (page->State() != PAGE_STATE_MODIFIED 3171 && !vm_test_map_modification(page))) { 3172 page = NULL; 3173 } 3174 } 3175 3176 PageWriteWrapper* wrapper = NULL; 3177 if (page != NULL) { 3178 wrapper = &wrapperPool[nextWrapper++]; 3179 if (nextWrapper > maxPages) 3180 nextWrapper = 0; 3181 3182 DEBUG_PAGE_ACCESS_START(page); 3183 3184 wrapper->SetTo(page); 3185 3186 if (transferEmpty || transfer.AddPage(page)) { 3187 if (transferEmpty) { 3188 transfer.SetTo(NULL, page, maxPages); 3189 transferEmpty = false; 3190 } 3191 3192 DEBUG_PAGE_ACCESS_END(page); 3193 3194 wrappers[usedWrappers++] = wrapper; 3195 continue; 3196 } 3197 3198 DEBUG_PAGE_ACCESS_END(page); 3199 } 3200 3201 if (transferEmpty) 3202 continue; 3203 3204 cache->Unlock(); 3205 status_t status = transfer.Schedule(0); 3206 cache->Lock(); 3207 3208 for (int32 i = 0; i < usedWrappers; i++) 3209 wrappers[i]->Done(status); 3210 3211 usedWrappers = 0; 3212 3213 if (page != NULL) { 3214 transfer.SetTo(NULL, page, maxPages); 3215 wrappers[usedWrappers++] = wrapper; 3216 } else 3217 transferEmpty = true; 3218 } 3219 3220 if (wrapperPool != stackWrappersPool) { 3221 delete[] wrapperPool; 3222 delete[] wrappers; 3223 } 3224 3225 return B_OK; 3226 } 3227 3228 3229 /*! You need to hold the VMCache lock when calling this function. 3230 Note that the cache lock is released in this function. 3231 */ 3232 status_t 3233 vm_page_write_modified_pages(VMCache *cache) 3234 { 3235 return vm_page_write_modified_page_range(cache, 0, 3236 (cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 3237 } 3238 3239 3240 /*! Schedules the page writer to write back the specified \a page. 3241 Note, however, that it might not do this immediately, and it can well 3242 take several seconds until the page is actually written out. 3243 */ 3244 void 3245 vm_page_schedule_write_page(vm_page *page) 3246 { 3247 PAGE_ASSERT(page, page->State() == PAGE_STATE_MODIFIED); 3248 3249 vm_page_requeue(page, false); 3250 3251 sPageWriterCondition.WakeUp(); 3252 } 3253 3254 3255 /*! Cache must be locked. 3256 */ 3257 void 3258 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage, 3259 uint32 endPage) 3260 { 3261 uint32 modified = 0; 3262 for (VMCachePagesTree::Iterator it 3263 = cache->pages.GetIterator(firstPage, true, true); 3264 vm_page *page = it.Next();) { 3265 if (page->cache_offset >= endPage) 3266 break; 3267 3268 if (!page->busy && page->State() == PAGE_STATE_MODIFIED) { 3269 DEBUG_PAGE_ACCESS_START(page); 3270 vm_page_requeue(page, false); 3271 modified++; 3272 DEBUG_PAGE_ACCESS_END(page); 3273 } 3274 } 3275 3276 if (modified > 0) 3277 sPageWriterCondition.WakeUp(); 3278 } 3279 3280 3281 void 3282 vm_page_init_num_pages(kernel_args *args) 3283 { 3284 // calculate the size of memory by looking at the physical_memory_range array 3285 sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE; 3286 page_num_t physicalPagesEnd = sPhysicalPageOffset 3287 + args->physical_memory_range[0].size / B_PAGE_SIZE; 3288 3289 sNonExistingPages = 0; 3290 sIgnoredPages = args->ignored_physical_memory / B_PAGE_SIZE; 3291 3292 for (uint32 i = 1; i < args->num_physical_memory_ranges; i++) { 3293 page_num_t start = args->physical_memory_range[i].start / B_PAGE_SIZE; 3294 if (start > physicalPagesEnd) 3295 sNonExistingPages += start - physicalPagesEnd; 3296 physicalPagesEnd = start 3297 + args->physical_memory_range[i].size / B_PAGE_SIZE; 3298 3299 #ifdef LIMIT_AVAILABLE_MEMORY 3300 page_num_t available 3301 = physicalPagesEnd - sPhysicalPageOffset - sNonExistingPages; 3302 if (available > LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE)) { 3303 physicalPagesEnd = sPhysicalPageOffset + sNonExistingPages 3304 + LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE); 3305 break; 3306 } 3307 #endif 3308 } 3309 3310 TRACE(("first phys page = %#" B_PRIxPHYSADDR ", end %#" B_PRIxPHYSADDR "\n", 3311 sPhysicalPageOffset, physicalPagesEnd)); 3312 3313 sNumPages = physicalPagesEnd - sPhysicalPageOffset; 3314 } 3315 3316 3317 status_t 3318 vm_page_init(kernel_args *args) 3319 { 3320 TRACE(("vm_page_init: entry\n")); 3321 3322 // init page queues 3323 sModifiedPageQueue.Init("modified pages queue"); 3324 sInactivePageQueue.Init("inactive pages queue"); 3325 sActivePageQueue.Init("active pages queue"); 3326 sCachedPageQueue.Init("cached pages queue"); 3327 sFreePageQueue.Init("free pages queue"); 3328 sClearPageQueue.Init("clear pages queue"); 3329 3330 new (&sPageReservationWaiters) PageReservationWaiterList; 3331 3332 // map in the new free page table 3333 sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page), 3334 ~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 3335 3336 TRACE(("vm_init: putting free_page_table @ %p, # ents %" B_PRIuPHYSADDR 3337 " (size %#" B_PRIxPHYSADDR ")\n", sPages, sNumPages, 3338 (phys_addr_t)(sNumPages * sizeof(vm_page)))); 3339 3340 // initialize the free page table 3341 for (uint32 i = 0; i < sNumPages; i++) { 3342 sPages[i].Init(sPhysicalPageOffset + i); 3343 sFreePageQueue.Append(&sPages[i]); 3344 3345 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3346 sPages[i].allocation_tracking_info.Clear(); 3347 #endif 3348 } 3349 3350 sUnreservedFreePages = sNumPages; 3351 3352 TRACE(("initialized table\n")); 3353 3354 // mark the ranges between usable physical memory unused 3355 phys_addr_t previousEnd = 0; 3356 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 3357 phys_addr_t base = args->physical_memory_range[i].start; 3358 phys_size_t size = args->physical_memory_range[i].size; 3359 if (base > previousEnd) { 3360 mark_page_range_in_use(previousEnd / B_PAGE_SIZE, 3361 (base - previousEnd) / B_PAGE_SIZE, false); 3362 } 3363 previousEnd = base + size; 3364 } 3365 3366 // mark the allocated physical page ranges wired 3367 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 3368 mark_page_range_in_use( 3369 args->physical_allocated_range[i].start / B_PAGE_SIZE, 3370 args->physical_allocated_range[i].size / B_PAGE_SIZE, true); 3371 } 3372 3373 // The target of actually free pages. This must be at least the system 3374 // reserve, but should be a few more pages, so we don't have to extract 3375 // a cached page with each allocation. 3376 sFreePagesTarget = VM_PAGE_RESERVE_USER 3377 + std::max((page_num_t)32, (sNumPages - sNonExistingPages) / 1024); 3378 3379 // The target of free + cached and inactive pages. On low-memory machines 3380 // keep things tight. free + cached is the pool of immediately allocatable 3381 // pages. We want a few inactive pages, so when we're actually paging, we 3382 // have a reasonably large set of pages to work with. 3383 if (sUnreservedFreePages < 16 * 1024) { 3384 sFreeOrCachedPagesTarget = sFreePagesTarget + 128; 3385 sInactivePagesTarget = sFreePagesTarget / 3; 3386 } else { 3387 sFreeOrCachedPagesTarget = 2 * sFreePagesTarget; 3388 sInactivePagesTarget = sFreePagesTarget / 2; 3389 } 3390 3391 TRACE(("vm_page_init: exit\n")); 3392 3393 return B_OK; 3394 } 3395 3396 3397 status_t 3398 vm_page_init_post_area(kernel_args *args) 3399 { 3400 void *dummy; 3401 3402 dummy = sPages; 3403 create_area("page structures", &dummy, B_EXACT_ADDRESS, 3404 PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED, 3405 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3406 3407 add_debugger_command("list_pages", &dump_page_list, 3408 "List physical pages"); 3409 add_debugger_command("page_stats", &dump_page_stats, 3410 "Dump statistics about page usage"); 3411 add_debugger_command_etc("page", &dump_page_long, 3412 "Dump page info", 3413 "[ \"-p\" | \"-v\" ] [ \"-m\" ] <address>\n" 3414 "Prints information for the physical page. If neither \"-p\" nor\n" 3415 "\"-v\" are given, the provided address is interpreted as address of\n" 3416 "the vm_page data structure for the page in question. If \"-p\" is\n" 3417 "given, the address is the physical address of the page. If \"-v\" is\n" 3418 "given, the address is interpreted as virtual address in the current\n" 3419 "thread's address space and for the page it is mapped to (if any)\n" 3420 "information are printed. If \"-m\" is specified, the command will\n" 3421 "search all known address spaces for mappings to that page and print\n" 3422 "them.\n", 0); 3423 add_debugger_command("page_queue", &dump_page_queue, "Dump page queue"); 3424 add_debugger_command("find_page", &find_page, 3425 "Find out which queue a page is actually in"); 3426 3427 #ifdef TRACK_PAGE_USAGE_STATS 3428 add_debugger_command_etc("page_usage", &dump_page_usage_stats, 3429 "Dumps statistics about page usage counts", 3430 "\n" 3431 "Dumps statistics about page usage counts.\n", 3432 B_KDEBUG_DONT_PARSE_ARGUMENTS); 3433 #endif 3434 3435 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3436 add_debugger_command_etc("page_allocations_per_caller", 3437 &dump_page_allocations_per_caller, 3438 "Dump current page allocations summed up per caller", 3439 "[ -d <caller> ] [ -r ]\n" 3440 "The current allocations will by summed up by caller (their count)\n" 3441 "printed in decreasing order by count.\n" 3442 "If \"-d\" is given, each allocation for caller <caller> is printed\n" 3443 "including the respective stack trace.\n" 3444 "If \"-r\" is given, the allocation infos are reset after gathering\n" 3445 "the information, so the next command invocation will only show the\n" 3446 "allocations made after the reset.\n", 0); 3447 add_debugger_command_etc("page_allocation_infos", 3448 &dump_page_allocation_infos, 3449 "Dump current page allocations", 3450 "[ --stacktrace ] [ -p <page number> ] [ --team <team ID> ] " 3451 "[ --thread <thread ID> ]\n" 3452 "The current allocations filtered by optional values will be printed.\n" 3453 "The optional \"-p\" page number filters for a specific page,\n" 3454 "with \"--team\" and \"--thread\" allocations by specific teams\n" 3455 "and/or threads can be filtered (these only work if a corresponding\n" 3456 "tracing entry is still available).\n" 3457 "If \"--stacktrace\" is given, then stack traces of the allocation\n" 3458 "callers are printed, where available\n", 0); 3459 #endif 3460 3461 return B_OK; 3462 } 3463 3464 3465 status_t 3466 vm_page_init_post_thread(kernel_args *args) 3467 { 3468 new (&sFreePageCondition) ConditionVariable; 3469 3470 // create a kernel thread to clear out pages 3471 3472 thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber", 3473 B_LOWEST_ACTIVE_PRIORITY, NULL); 3474 resume_thread(thread); 3475 3476 // start page writer 3477 3478 sPageWriterCondition.Init("page writer"); 3479 3480 thread = spawn_kernel_thread(&page_writer, "page writer", 3481 B_NORMAL_PRIORITY + 1, NULL); 3482 resume_thread(thread); 3483 3484 // start page daemon 3485 3486 sPageDaemonCondition.Init("page daemon"); 3487 3488 thread = spawn_kernel_thread(&page_daemon, "page daemon", 3489 B_NORMAL_PRIORITY, NULL); 3490 resume_thread(thread); 3491 3492 return B_OK; 3493 } 3494 3495 3496 status_t 3497 vm_mark_page_inuse(page_num_t page) 3498 { 3499 return vm_mark_page_range_inuse(page, 1); 3500 } 3501 3502 3503 status_t 3504 vm_mark_page_range_inuse(page_num_t startPage, page_num_t length) 3505 { 3506 return mark_page_range_in_use(startPage, length, false); 3507 } 3508 3509 3510 /*! Unreserve pages previously reserved with vm_page_reserve_pages(). 3511 */ 3512 void 3513 vm_page_unreserve_pages(vm_page_reservation* reservation) 3514 { 3515 uint32 count = reservation->count; 3516 reservation->count = 0; 3517 3518 if (count == 0) 3519 return; 3520 3521 TA(UnreservePages(count)); 3522 3523 unreserve_pages(count); 3524 } 3525 3526 3527 /*! With this call, you can reserve a number of free pages in the system. 3528 They will only be handed out to someone who has actually reserved them. 3529 This call returns as soon as the number of requested pages has been 3530 reached. 3531 The caller must not hold any cache lock or the function might deadlock. 3532 */ 3533 void 3534 vm_page_reserve_pages(vm_page_reservation* reservation, uint32 count, 3535 int priority) 3536 { 3537 reservation->count = count; 3538 3539 if (count == 0) 3540 return; 3541 3542 TA(ReservePages(count)); 3543 3544 reserve_pages(count, priority, false); 3545 } 3546 3547 3548 bool 3549 vm_page_try_reserve_pages(vm_page_reservation* reservation, uint32 count, 3550 int priority) 3551 { 3552 if (count == 0) { 3553 reservation->count = count; 3554 return true; 3555 } 3556 3557 uint32 remaining = reserve_pages(count, priority, true); 3558 if (remaining == 0) { 3559 TA(ReservePages(count)); 3560 reservation->count = count; 3561 return true; 3562 } 3563 3564 unreserve_pages(count - remaining); 3565 3566 return false; 3567 } 3568 3569 3570 vm_page * 3571 vm_page_allocate_page(vm_page_reservation* reservation, uint32 flags) 3572 { 3573 uint32 pageState = flags & VM_PAGE_ALLOC_STATE; 3574 ASSERT(pageState != PAGE_STATE_FREE); 3575 ASSERT(pageState != PAGE_STATE_CLEAR); 3576 3577 ASSERT(reservation->count > 0); 3578 reservation->count--; 3579 3580 VMPageQueue* queue; 3581 VMPageQueue* otherQueue; 3582 3583 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) { 3584 queue = &sClearPageQueue; 3585 otherQueue = &sFreePageQueue; 3586 } else { 3587 queue = &sFreePageQueue; 3588 otherQueue = &sClearPageQueue; 3589 } 3590 3591 ReadLocker locker(sFreePageQueuesLock); 3592 3593 vm_page* page = queue->RemoveHeadUnlocked(); 3594 if (page == NULL) { 3595 // if the primary queue was empty, grab the page from the 3596 // secondary queue 3597 page = otherQueue->RemoveHeadUnlocked(); 3598 3599 if (page == NULL) { 3600 // Unlikely, but possible: the page we have reserved has moved 3601 // between the queues after we checked the first queue. Grab the 3602 // write locker to make sure this doesn't happen again. 3603 locker.Unlock(); 3604 WriteLocker writeLocker(sFreePageQueuesLock); 3605 3606 page = queue->RemoveHead(); 3607 if (page == NULL) 3608 otherQueue->RemoveHead(); 3609 3610 if (page == NULL) { 3611 panic("Had reserved page, but there is none!"); 3612 return NULL; 3613 } 3614 3615 // downgrade to read lock 3616 locker.Lock(); 3617 } 3618 } 3619 3620 if (page->CacheRef() != NULL) 3621 panic("supposed to be free page %p has cache\n", page); 3622 3623 DEBUG_PAGE_ACCESS_START(page); 3624 3625 int oldPageState = page->State(); 3626 page->SetState(pageState); 3627 page->busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3628 page->usage_count = 0; 3629 page->accessed = false; 3630 page->modified = false; 3631 3632 locker.Unlock(); 3633 3634 if (pageState < PAGE_STATE_FIRST_UNQUEUED) 3635 sPageQueues[pageState].AppendUnlocked(page); 3636 3637 // clear the page, if we had to take it from the free queue and a clear 3638 // page was requested 3639 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0 && oldPageState != PAGE_STATE_CLEAR) 3640 clear_page(page); 3641 3642 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3643 page->allocation_tracking_info.Init( 3644 TA(AllocatePage(page->physical_page_number))); 3645 #else 3646 TA(AllocatePage(page->physical_page_number)); 3647 #endif 3648 3649 return page; 3650 } 3651 3652 3653 static void 3654 allocate_page_run_cleanup(VMPageQueue::PageList& freePages, 3655 VMPageQueue::PageList& clearPages) 3656 { 3657 while (vm_page* page = freePages.RemoveHead()) { 3658 page->busy = false; 3659 page->SetState(PAGE_STATE_FREE); 3660 DEBUG_PAGE_ACCESS_END(page); 3661 sFreePageQueue.PrependUnlocked(page); 3662 } 3663 3664 while (vm_page* page = clearPages.RemoveHead()) { 3665 page->busy = false; 3666 page->SetState(PAGE_STATE_CLEAR); 3667 DEBUG_PAGE_ACCESS_END(page); 3668 sClearPageQueue.PrependUnlocked(page); 3669 } 3670 3671 sFreePageCondition.NotifyAll(); 3672 } 3673 3674 3675 /*! Tries to allocate the a contiguous run of \a length pages starting at 3676 index \a start. 3677 3678 The caller must have write-locked the free/clear page queues. The function 3679 will unlock regardless of whether it succeeds or fails. 3680 3681 If the function fails, it cleans up after itself, i.e. it will free all 3682 pages it managed to allocate. 3683 3684 \param start The start index (into \c sPages) of the run. 3685 \param length The number of pages to allocate. 3686 \param flags Page allocation flags. Encodes the state the function shall 3687 set the allocated pages to, whether the pages shall be marked busy 3688 (VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared 3689 (VM_PAGE_ALLOC_CLEAR). 3690 \param freeClearQueueLocker Locked WriteLocker for the free/clear page 3691 queues in locked state. Will be unlocked by the function. 3692 \return The index of the first page that could not be allocated. \a length 3693 is returned when the function was successful. 3694 */ 3695 static page_num_t 3696 allocate_page_run(page_num_t start, page_num_t length, uint32 flags, 3697 WriteLocker& freeClearQueueLocker) 3698 { 3699 uint32 pageState = flags & VM_PAGE_ALLOC_STATE; 3700 ASSERT(pageState != PAGE_STATE_FREE); 3701 ASSERT(pageState != PAGE_STATE_CLEAR); 3702 ASSERT(start + length <= sNumPages); 3703 3704 // Pull the free/clear pages out of their respective queues. Cached pages 3705 // are allocated later. 3706 page_num_t cachedPages = 0; 3707 VMPageQueue::PageList freePages; 3708 VMPageQueue::PageList clearPages; 3709 page_num_t i = 0; 3710 for (; i < length; i++) { 3711 bool pageAllocated = true; 3712 bool noPage = false; 3713 vm_page& page = sPages[start + i]; 3714 switch (page.State()) { 3715 case PAGE_STATE_CLEAR: 3716 DEBUG_PAGE_ACCESS_START(&page); 3717 sClearPageQueue.Remove(&page); 3718 clearPages.Add(&page); 3719 break; 3720 case PAGE_STATE_FREE: 3721 DEBUG_PAGE_ACCESS_START(&page); 3722 sFreePageQueue.Remove(&page); 3723 freePages.Add(&page); 3724 break; 3725 case PAGE_STATE_CACHED: 3726 // We allocate cached pages later. 3727 cachedPages++; 3728 pageAllocated = false; 3729 break; 3730 3731 default: 3732 // Probably a page was cached when our caller checked. Now it's 3733 // gone and we have to abort. 3734 noPage = true; 3735 break; 3736 } 3737 3738 if (noPage) 3739 break; 3740 3741 if (pageAllocated) { 3742 page.SetState(flags & VM_PAGE_ALLOC_STATE); 3743 page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3744 page.usage_count = 0; 3745 page.accessed = false; 3746 page.modified = false; 3747 } 3748 } 3749 3750 if (i < length) { 3751 // failed to allocate a page -- free all that we've got 3752 allocate_page_run_cleanup(freePages, clearPages); 3753 return i; 3754 } 3755 3756 freeClearQueueLocker.Unlock(); 3757 3758 if (cachedPages > 0) { 3759 // allocate the pages that weren't free but cached 3760 page_num_t freedCachedPages = 0; 3761 page_num_t nextIndex = start; 3762 vm_page* freePage = freePages.Head(); 3763 vm_page* clearPage = clearPages.Head(); 3764 while (cachedPages > 0) { 3765 // skip, if we've already got the page 3766 if (freePage != NULL && size_t(freePage - sPages) == nextIndex) { 3767 freePage = freePages.GetNext(freePage); 3768 nextIndex++; 3769 continue; 3770 } 3771 if (clearPage != NULL && size_t(clearPage - sPages) == nextIndex) { 3772 clearPage = clearPages.GetNext(clearPage); 3773 nextIndex++; 3774 continue; 3775 } 3776 3777 // free the page, if it is still cached 3778 vm_page& page = sPages[nextIndex]; 3779 if (!free_cached_page(&page, false)) { 3780 // TODO: if the page turns out to have been freed already, 3781 // there would be no need to fail 3782 break; 3783 } 3784 3785 page.SetState(flags & VM_PAGE_ALLOC_STATE); 3786 page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3787 page.usage_count = 0; 3788 page.accessed = false; 3789 page.modified = false; 3790 3791 freePages.InsertBefore(freePage, &page); 3792 freedCachedPages++; 3793 cachedPages--; 3794 nextIndex++; 3795 } 3796 3797 // If we have freed cached pages, we need to balance things. 3798 if (freedCachedPages > 0) 3799 unreserve_pages(freedCachedPages); 3800 3801 if (nextIndex - start < length) { 3802 // failed to allocate all cached pages -- free all that we've got 3803 freeClearQueueLocker.Lock(); 3804 allocate_page_run_cleanup(freePages, clearPages); 3805 freeClearQueueLocker.Unlock(); 3806 3807 return nextIndex - start; 3808 } 3809 } 3810 3811 // clear pages, if requested 3812 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) { 3813 for (VMPageQueue::PageList::Iterator it = freePages.GetIterator(); 3814 vm_page* page = it.Next();) { 3815 clear_page(page); 3816 } 3817 } 3818 3819 // add pages to target queue 3820 if (pageState < PAGE_STATE_FIRST_UNQUEUED) { 3821 freePages.MoveFrom(&clearPages); 3822 sPageQueues[pageState].AppendUnlocked(freePages, length); 3823 } 3824 3825 // Note: We don't unreserve the pages since we pulled them out of the 3826 // free/clear queues without adjusting sUnreservedFreePages. 3827 3828 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3829 AbstractTraceEntryWithStackTrace* traceEntry 3830 = TA(AllocatePageRun(start, length)); 3831 3832 for (page_num_t i = start; i < start + length; i++) 3833 sPages[i].allocation_tracking_info.Init(traceEntry); 3834 #else 3835 TA(AllocatePageRun(start, length)); 3836 #endif 3837 3838 return length; 3839 } 3840 3841 3842 /*! Allocate a physically contiguous range of pages. 3843 3844 \param flags Page allocation flags. Encodes the state the function shall 3845 set the allocated pages to, whether the pages shall be marked busy 3846 (VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared 3847 (VM_PAGE_ALLOC_CLEAR). 3848 \param length The number of contiguous pages to allocate. 3849 \param restrictions Restrictions to the physical addresses of the page run 3850 to allocate, including \c low_address, the first acceptable physical 3851 address where the page run may start, \c high_address, the last 3852 acceptable physical address where the page run may end (i.e. it must 3853 hold \code runStartAddress + length <= high_address \endcode), 3854 \c alignment, the alignment of the page run start address, and 3855 \c boundary, multiples of which the page run must not cross. 3856 Values set to \c 0 are ignored. 3857 \param priority The page reservation priority (as passed to 3858 vm_page_reserve_pages()). 3859 \return The first page of the allocated page run on success; \c NULL 3860 when the allocation failed. 3861 */ 3862 vm_page* 3863 vm_page_allocate_page_run(uint32 flags, page_num_t length, 3864 const physical_address_restrictions* restrictions, int priority) 3865 { 3866 // compute start and end page index 3867 page_num_t requestedStart 3868 = std::max(restrictions->low_address / B_PAGE_SIZE, sPhysicalPageOffset) 3869 - sPhysicalPageOffset; 3870 page_num_t start = requestedStart; 3871 page_num_t end; 3872 if (restrictions->high_address > 0) { 3873 end = std::max(restrictions->high_address / B_PAGE_SIZE, 3874 sPhysicalPageOffset) 3875 - sPhysicalPageOffset; 3876 end = std::min(end, sNumPages); 3877 } else 3878 end = sNumPages; 3879 3880 // compute alignment mask 3881 page_num_t alignmentMask 3882 = std::max(restrictions->alignment / B_PAGE_SIZE, (phys_addr_t)1) - 1; 3883 ASSERT(((alignmentMask + 1) & alignmentMask) == 0); 3884 // alignment must be a power of 2 3885 3886 // compute the boundary mask 3887 uint32 boundaryMask = 0; 3888 if (restrictions->boundary != 0) { 3889 page_num_t boundary = restrictions->boundary / B_PAGE_SIZE; 3890 // boundary must be a power of two and not less than alignment and 3891 // length 3892 ASSERT(((boundary - 1) & boundary) == 0); 3893 ASSERT(boundary >= alignmentMask + 1); 3894 ASSERT(boundary >= length); 3895 3896 boundaryMask = -boundary; 3897 } 3898 3899 vm_page_reservation reservation; 3900 vm_page_reserve_pages(&reservation, length, priority); 3901 3902 WriteLocker freeClearQueueLocker(sFreePageQueuesLock); 3903 3904 // First we try to get a run with free pages only. If that fails, we also 3905 // consider cached pages. If there are only few free pages and many cached 3906 // ones, the odds are that we won't find enough contiguous ones, so we skip 3907 // the first iteration in this case. 3908 int32 freePages = sUnreservedFreePages; 3909 int useCached = freePages > 0 && (page_num_t)freePages > 2 * length ? 0 : 1; 3910 3911 for (;;) { 3912 if (alignmentMask != 0 || boundaryMask != 0) { 3913 page_num_t offsetStart = start + sPhysicalPageOffset; 3914 3915 // enforce alignment 3916 if ((offsetStart & alignmentMask) != 0) 3917 offsetStart = (offsetStart + alignmentMask) & ~alignmentMask; 3918 3919 // enforce boundary 3920 if (boundaryMask != 0 && ((offsetStart ^ (offsetStart 3921 + length - 1)) & boundaryMask) != 0) { 3922 offsetStart = (offsetStart + length - 1) & boundaryMask; 3923 } 3924 3925 start = offsetStart - sPhysicalPageOffset; 3926 } 3927 3928 if (start + length > end) { 3929 if (useCached == 0) { 3930 // The first iteration with free pages only was unsuccessful. 3931 // Try again also considering cached pages. 3932 useCached = 1; 3933 start = requestedStart; 3934 continue; 3935 } 3936 3937 dprintf("vm_page_allocate_page_run(): Failed to allocate run of " 3938 "length %" B_PRIuPHYSADDR " (%" B_PRIuPHYSADDR " %" 3939 B_PRIuPHYSADDR ") in second iteration (align: %" B_PRIuPHYSADDR 3940 " boundary: %" B_PRIuPHYSADDR ")!\n", length, requestedStart, 3941 end, restrictions->alignment, restrictions->boundary); 3942 3943 freeClearQueueLocker.Unlock(); 3944 vm_page_unreserve_pages(&reservation); 3945 return NULL; 3946 } 3947 3948 bool foundRun = true; 3949 page_num_t i; 3950 for (i = 0; i < length; i++) { 3951 uint32 pageState = sPages[start + i].State(); 3952 if (pageState != PAGE_STATE_FREE 3953 && pageState != PAGE_STATE_CLEAR 3954 && (pageState != PAGE_STATE_CACHED || useCached == 0)) { 3955 foundRun = false; 3956 break; 3957 } 3958 } 3959 3960 if (foundRun) { 3961 i = allocate_page_run(start, length, flags, freeClearQueueLocker); 3962 if (i == length) 3963 return &sPages[start]; 3964 3965 // apparently a cached page couldn't be allocated -- skip it and 3966 // continue 3967 freeClearQueueLocker.Lock(); 3968 } 3969 3970 start += i + 1; 3971 } 3972 } 3973 3974 3975 vm_page * 3976 vm_page_at_index(int32 index) 3977 { 3978 return &sPages[index]; 3979 } 3980 3981 3982 vm_page * 3983 vm_lookup_page(page_num_t pageNumber) 3984 { 3985 if (pageNumber < sPhysicalPageOffset) 3986 return NULL; 3987 3988 pageNumber -= sPhysicalPageOffset; 3989 if (pageNumber >= sNumPages) 3990 return NULL; 3991 3992 return &sPages[pageNumber]; 3993 } 3994 3995 3996 bool 3997 vm_page_is_dummy(struct vm_page *page) 3998 { 3999 return page < sPages || page >= sPages + sNumPages; 4000 } 4001 4002 4003 /*! Free the page that belonged to a certain cache. 4004 You can use vm_page_set_state() manually if you prefer, but only 4005 if the page does not equal PAGE_STATE_MODIFIED. 4006 4007 \param cache The cache the page was previously owned by or NULL. The page 4008 must have been removed from its cache before calling this method in 4009 either case. 4010 \param page The page to free. 4011 \param reservation If not NULL, the page count of the reservation will be 4012 incremented, thus allowing to allocate another page for the freed one at 4013 a later time. 4014 */ 4015 void 4016 vm_page_free_etc(VMCache* cache, vm_page* page, 4017 vm_page_reservation* reservation) 4018 { 4019 PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE 4020 && page->State() != PAGE_STATE_CLEAR); 4021 4022 if (page->State() == PAGE_STATE_MODIFIED && cache->temporary) 4023 atomic_add(&sModifiedTemporaryPages, -1); 4024 4025 free_page(page, false); 4026 if (reservation == NULL) 4027 unreserve_pages(1); 4028 } 4029 4030 4031 void 4032 vm_page_set_state(vm_page *page, int pageState) 4033 { 4034 PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE 4035 && page->State() != PAGE_STATE_CLEAR); 4036 4037 if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) { 4038 free_page(page, pageState == PAGE_STATE_CLEAR); 4039 unreserve_pages(1); 4040 } else 4041 set_page_state(page, pageState); 4042 } 4043 4044 4045 /*! Moves a page to either the tail of the head of its current queue, 4046 depending on \a tail. 4047 The page must have a cache and the cache must be locked! 4048 */ 4049 void 4050 vm_page_requeue(struct vm_page *page, bool tail) 4051 { 4052 PAGE_ASSERT(page, page->Cache() != NULL); 4053 page->Cache()->AssertLocked(); 4054 // DEBUG_PAGE_ACCESS_CHECK(page); 4055 // TODO: This assertion cannot be satisfied by idle_scan_active_pages() 4056 // when it requeues busy pages. The reason is that vm_soft_fault() 4057 // (respectively fault_get_page()) and the file cache keep newly 4058 // allocated pages accessed while they are reading them from disk. It 4059 // would probably be better to change that code and reenable this 4060 // check. 4061 4062 VMPageQueue *queue = NULL; 4063 4064 switch (page->State()) { 4065 case PAGE_STATE_ACTIVE: 4066 queue = &sActivePageQueue; 4067 break; 4068 case PAGE_STATE_INACTIVE: 4069 queue = &sInactivePageQueue; 4070 break; 4071 case PAGE_STATE_MODIFIED: 4072 queue = &sModifiedPageQueue; 4073 break; 4074 case PAGE_STATE_CACHED: 4075 queue = &sCachedPageQueue; 4076 break; 4077 case PAGE_STATE_FREE: 4078 case PAGE_STATE_CLEAR: 4079 panic("vm_page_requeue() called for free/clear page %p", page); 4080 return; 4081 case PAGE_STATE_WIRED: 4082 case PAGE_STATE_UNUSED: 4083 return; 4084 default: 4085 panic("vm_page_touch: vm_page %p in invalid state %d\n", 4086 page, page->State()); 4087 break; 4088 } 4089 4090 queue->RequeueUnlocked(page, tail); 4091 } 4092 4093 4094 page_num_t 4095 vm_page_num_pages(void) 4096 { 4097 return sNumPages - sNonExistingPages; 4098 } 4099 4100 4101 /*! There is a subtle distinction between the page counts returned by 4102 this function and vm_page_num_free_pages(): 4103 The latter returns the number of pages that are completely uncommitted, 4104 whereas this one returns the number of pages that are available for 4105 use by being reclaimed as well (IOW it factors in things like cache pages 4106 as available). 4107 */ 4108 page_num_t 4109 vm_page_num_available_pages(void) 4110 { 4111 return vm_available_memory() / B_PAGE_SIZE; 4112 } 4113 4114 4115 page_num_t 4116 vm_page_num_free_pages(void) 4117 { 4118 int32 count = sUnreservedFreePages + sCachedPageQueue.Count(); 4119 return count > 0 ? count : 0; 4120 } 4121 4122 4123 page_num_t 4124 vm_page_num_unused_pages(void) 4125 { 4126 int32 count = sUnreservedFreePages; 4127 return count > 0 ? count : 0; 4128 } 4129 4130 4131 void 4132 vm_page_get_stats(system_info *info) 4133 { 4134 // Note: there's no locking protecting any of the queues or counters here, 4135 // so we run the risk of getting bogus values when evaluating them 4136 // throughout this function. As these stats are for informational purposes 4137 // only, it is not really worth introducing such locking. Therefore we just 4138 // ensure that we don't under- or overflow any of the values. 4139 4140 // The pages used for the block cache buffers. Those should not be counted 4141 // as used but as cached pages. 4142 // TODO: We should subtract the blocks that are in use ATM, since those 4143 // can't really be freed in a low memory situation. 4144 page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE; 4145 info->block_cache_pages = blockCachePages; 4146 4147 // Non-temporary modified pages are special as they represent pages that 4148 // can be written back, so they could be freed if necessary, for us 4149 // basically making them into cached pages with a higher overhead. The 4150 // modified queue count is therefore split into temporary and non-temporary 4151 // counts that are then added to the corresponding number. 4152 page_num_t modifiedNonTemporaryPages 4153 = (sModifiedPageQueue.Count() - sModifiedTemporaryPages); 4154 4155 info->max_pages = vm_page_num_pages(); 4156 info->cached_pages = sCachedPageQueue.Count() + modifiedNonTemporaryPages 4157 + blockCachePages; 4158 4159 // max_pages is composed of: 4160 // active + inactive + unused + wired + modified + cached + free + clear 4161 // So taking out the cached (including modified non-temporary), free and 4162 // clear ones leaves us with all used pages. 4163 uint32 subtractPages = info->cached_pages + sFreePageQueue.Count() 4164 + sClearPageQueue.Count(); 4165 info->used_pages = subtractPages > info->max_pages 4166 ? 0 : info->max_pages - subtractPages; 4167 4168 if (info->used_pages + info->cached_pages > info->max_pages) { 4169 // Something was shuffled around while we were summing up the counts. 4170 // Make the values sane, preferring the worse case of more used pages. 4171 info->cached_pages = info->max_pages - info->used_pages; 4172 } 4173 4174 info->page_faults = vm_num_page_faults(); 4175 info->ignored_pages = sIgnoredPages; 4176 4177 // TODO: We don't consider pages used for page directories/tables yet. 4178 } 4179 4180 4181 /*! Returns the greatest address within the last page of accessible physical 4182 memory. 4183 The value is inclusive, i.e. in case of a 32 bit phys_addr_t 0xffffffff 4184 means the that the last page ends at exactly 4 GB. 4185 */ 4186 phys_addr_t 4187 vm_page_max_address() 4188 { 4189 return ((phys_addr_t)sPhysicalPageOffset + sNumPages) * B_PAGE_SIZE - 1; 4190 } 4191 4192 4193 RANGE_MARKER_FUNCTION_END(vm_page) 4194