1 /* 2 * Copyright 2010-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <string.h> 12 #include <stdlib.h> 13 14 #include <algorithm> 15 16 #include <KernelExport.h> 17 #include <OS.h> 18 19 #include <AutoDeleter.h> 20 21 #include <arch/cpu.h> 22 #include <arch/vm_translation_map.h> 23 #include <block_cache.h> 24 #include <boot/kernel_args.h> 25 #include <condition_variable.h> 26 #include <elf.h> 27 #include <heap.h> 28 #include <kernel.h> 29 #include <low_resource_manager.h> 30 #include <thread.h> 31 #include <tracing.h> 32 #include <util/AutoLock.h> 33 #include <vfs.h> 34 #include <vm/vm.h> 35 #include <vm/vm_priv.h> 36 #include <vm/vm_page.h> 37 #include <vm/VMAddressSpace.h> 38 #include <vm/VMArea.h> 39 #include <vm/VMCache.h> 40 41 #include "IORequest.h" 42 #include "PageCacheLocker.h" 43 #include "VMAnonymousCache.h" 44 #include "VMPageQueue.h" 45 46 47 //#define TRACE_VM_PAGE 48 #ifdef TRACE_VM_PAGE 49 # define TRACE(x) dprintf x 50 #else 51 # define TRACE(x) ; 52 #endif 53 54 //#define TRACE_VM_DAEMONS 55 #ifdef TRACE_VM_DAEMONS 56 #define TRACE_DAEMON(x...) dprintf(x) 57 #else 58 #define TRACE_DAEMON(x...) do {} while (false) 59 #endif 60 61 //#define TRACK_PAGE_USAGE_STATS 1 62 63 #define PAGE_ASSERT(page, condition) \ 64 ASSERT_PRINT((condition), "page: %p", (page)) 65 66 #define SCRUB_SIZE 32 67 // this many pages will be cleared at once in the page scrubber thread 68 69 #define MAX_PAGE_WRITER_IO_PRIORITY B_URGENT_DISPLAY_PRIORITY 70 // maximum I/O priority of the page writer 71 #define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD 10000 72 // the maximum I/O priority shall be reached when this many pages need to 73 // be written 74 75 76 // The page reserve an allocation of the certain priority must not touch. 77 static const size_t kPageReserveForPriority[] = { 78 VM_PAGE_RESERVE_USER, // user 79 VM_PAGE_RESERVE_SYSTEM, // system 80 0 // VIP 81 }; 82 83 // Minimum number of free pages the page daemon will try to achieve. 84 static uint32 sFreePagesTarget; 85 static uint32 sFreeOrCachedPagesTarget; 86 static uint32 sInactivePagesTarget; 87 88 // Wait interval between page daemon runs. 89 static const bigtime_t kIdleScanWaitInterval = 1000000LL; // 1 sec 90 static const bigtime_t kBusyScanWaitInterval = 500000LL; // 0.5 sec 91 92 // Number of idle runs after which we want to have processed the full active 93 // queue. 94 static const uint32 kIdleRunsForFullQueue = 20; 95 96 // Maximum limit for the vm_page::usage_count. 97 static const int32 kPageUsageMax = 64; 98 // vm_page::usage_count buff an accessed page receives in a scan. 99 static const int32 kPageUsageAdvance = 3; 100 // vm_page::usage_count debuff an unaccessed page receives in a scan. 101 static const int32 kPageUsageDecline = 1; 102 103 int32 gMappedPagesCount; 104 105 static VMPageQueue sPageQueues[PAGE_STATE_COUNT]; 106 107 static VMPageQueue& sFreePageQueue = sPageQueues[PAGE_STATE_FREE]; 108 static VMPageQueue& sClearPageQueue = sPageQueues[PAGE_STATE_CLEAR]; 109 static VMPageQueue& sModifiedPageQueue = sPageQueues[PAGE_STATE_MODIFIED]; 110 static VMPageQueue& sInactivePageQueue = sPageQueues[PAGE_STATE_INACTIVE]; 111 static VMPageQueue& sActivePageQueue = sPageQueues[PAGE_STATE_ACTIVE]; 112 static VMPageQueue& sCachedPageQueue = sPageQueues[PAGE_STATE_CACHED]; 113 114 static vm_page *sPages; 115 static page_num_t sPhysicalPageOffset; 116 static page_num_t sNumPages; 117 static page_num_t sNonExistingPages; 118 // pages in the sPages array that aren't backed by physical memory 119 static uint64 sIgnoredPages; 120 // pages of physical memory ignored by the boot loader (and thus not 121 // available here) 122 static int32 sUnreservedFreePages; 123 static int32 sUnsatisfiedPageReservations; 124 static int32 sModifiedTemporaryPages; 125 126 static ConditionVariable sFreePageCondition; 127 static mutex sPageDeficitLock = MUTEX_INITIALIZER("page deficit"); 128 129 // This lock must be used whenever the free or clear page queues are changed. 130 // If you need to work on both queues at the same time, you need to hold a write 131 // lock, otherwise, a read lock suffices (each queue still has a spinlock to 132 // guard against concurrent changes). 133 static rw_lock sFreePageQueuesLock 134 = RW_LOCK_INITIALIZER("free/clear page queues"); 135 136 #ifdef TRACK_PAGE_USAGE_STATS 137 static page_num_t sPageUsageArrays[512]; 138 static page_num_t* sPageUsage = sPageUsageArrays; 139 static page_num_t sPageUsagePageCount; 140 static page_num_t* sNextPageUsage = sPageUsageArrays + 256; 141 static page_num_t sNextPageUsagePageCount; 142 #endif 143 144 145 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 146 147 struct caller_info { 148 addr_t caller; 149 size_t count; 150 }; 151 152 static const int32 kCallerInfoTableSize = 1024; 153 static caller_info sCallerInfoTable[kCallerInfoTableSize]; 154 static int32 sCallerInfoCount = 0; 155 156 static caller_info* get_caller_info(addr_t caller); 157 158 159 RANGE_MARKER_FUNCTION_PROTOTYPES(vm_page) 160 161 static const addr_t kVMPageCodeAddressRange[] = { 162 RANGE_MARKER_FUNCTION_ADDRESS_RANGE(vm_page) 163 }; 164 165 #endif 166 167 168 RANGE_MARKER_FUNCTION_BEGIN(vm_page) 169 170 171 struct page_stats { 172 int32 totalFreePages; 173 int32 unsatisfiedReservations; 174 int32 cachedPages; 175 }; 176 177 178 struct PageReservationWaiter 179 : public DoublyLinkedListLinkImpl<PageReservationWaiter> { 180 Thread* thread; 181 uint32 dontTouch; // reserve not to touch 182 uint32 missing; // pages missing for the reservation 183 int32 threadPriority; 184 185 bool operator<(const PageReservationWaiter& other) const 186 { 187 // Implies an order by descending VM priority (ascending dontTouch) 188 // and (secondarily) descending thread priority. 189 if (dontTouch != other.dontTouch) 190 return dontTouch < other.dontTouch; 191 return threadPriority > other.threadPriority; 192 } 193 }; 194 195 typedef DoublyLinkedList<PageReservationWaiter> PageReservationWaiterList; 196 static PageReservationWaiterList sPageReservationWaiters; 197 198 199 struct DaemonCondition { 200 void Init(const char* name) 201 { 202 mutex_init(&fLock, "daemon condition"); 203 fCondition.Init(this, name); 204 fActivated = false; 205 } 206 207 bool Lock() 208 { 209 return mutex_lock(&fLock) == B_OK; 210 } 211 212 void Unlock() 213 { 214 mutex_unlock(&fLock); 215 } 216 217 bool Wait(bigtime_t timeout, bool clearActivated) 218 { 219 MutexLocker locker(fLock); 220 if (clearActivated) 221 fActivated = false; 222 else if (fActivated) 223 return true; 224 225 ConditionVariableEntry entry; 226 fCondition.Add(&entry); 227 228 locker.Unlock(); 229 230 return entry.Wait(B_RELATIVE_TIMEOUT, timeout) == B_OK; 231 } 232 233 void WakeUp() 234 { 235 if (fActivated) 236 return; 237 238 MutexLocker locker(fLock); 239 fActivated = true; 240 fCondition.NotifyOne(); 241 } 242 243 void ClearActivated() 244 { 245 MutexLocker locker(fLock); 246 fActivated = false; 247 } 248 249 private: 250 mutex fLock; 251 ConditionVariable fCondition; 252 bool fActivated; 253 }; 254 255 256 static DaemonCondition sPageWriterCondition; 257 static DaemonCondition sPageDaemonCondition; 258 259 260 #if PAGE_ALLOCATION_TRACING 261 262 namespace PageAllocationTracing { 263 264 class ReservePages : public AbstractTraceEntry { 265 public: 266 ReservePages(uint32 count) 267 : 268 fCount(count) 269 { 270 Initialized(); 271 } 272 273 virtual void AddDump(TraceOutput& out) 274 { 275 out.Print("page reserve: %" B_PRIu32, fCount); 276 } 277 278 private: 279 uint32 fCount; 280 }; 281 282 283 class UnreservePages : public AbstractTraceEntry { 284 public: 285 UnreservePages(uint32 count) 286 : 287 fCount(count) 288 { 289 Initialized(); 290 } 291 292 virtual void AddDump(TraceOutput& out) 293 { 294 out.Print("page unreserve: %" B_PRId32, fCount); 295 } 296 297 private: 298 uint32 fCount; 299 }; 300 301 302 class AllocatePage 303 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) { 304 public: 305 AllocatePage(page_num_t pageNumber) 306 : 307 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true), 308 fPageNumber(pageNumber) 309 { 310 Initialized(); 311 } 312 313 virtual void AddDump(TraceOutput& out) 314 { 315 out.Print("page alloc: %#" B_PRIxPHYSADDR, fPageNumber); 316 } 317 318 private: 319 page_num_t fPageNumber; 320 }; 321 322 323 class AllocatePageRun 324 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) { 325 public: 326 AllocatePageRun(page_num_t startPage, uint32 length) 327 : 328 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true), 329 fStartPage(startPage), 330 fLength(length) 331 { 332 Initialized(); 333 } 334 335 virtual void AddDump(TraceOutput& out) 336 { 337 out.Print("page alloc run: start %#" B_PRIxPHYSADDR " length: %" 338 B_PRIu32, fStartPage, fLength); 339 } 340 341 private: 342 page_num_t fStartPage; 343 uint32 fLength; 344 }; 345 346 347 class FreePage 348 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) { 349 public: 350 FreePage(page_num_t pageNumber) 351 : 352 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true), 353 fPageNumber(pageNumber) 354 { 355 Initialized(); 356 } 357 358 virtual void AddDump(TraceOutput& out) 359 { 360 out.Print("page free: %#" B_PRIxPHYSADDR, fPageNumber); 361 } 362 363 private: 364 page_num_t fPageNumber; 365 }; 366 367 368 class ScrubbingPages : public AbstractTraceEntry { 369 public: 370 ScrubbingPages(uint32 count) 371 : 372 fCount(count) 373 { 374 Initialized(); 375 } 376 377 virtual void AddDump(TraceOutput& out) 378 { 379 out.Print("page scrubbing: %" B_PRId32, fCount); 380 } 381 382 private: 383 uint32 fCount; 384 }; 385 386 387 class ScrubbedPages : public AbstractTraceEntry { 388 public: 389 ScrubbedPages(uint32 count) 390 : 391 fCount(count) 392 { 393 Initialized(); 394 } 395 396 virtual void AddDump(TraceOutput& out) 397 { 398 out.Print("page scrubbed: %" B_PRId32, fCount); 399 } 400 401 private: 402 uint32 fCount; 403 }; 404 405 406 class StolenPage : public AbstractTraceEntry { 407 public: 408 StolenPage() 409 { 410 Initialized(); 411 } 412 413 virtual void AddDump(TraceOutput& out) 414 { 415 out.Print("page stolen"); 416 } 417 }; 418 419 } // namespace PageAllocationTracing 420 421 # define TA(x) new(std::nothrow) PageAllocationTracing::x 422 423 #else 424 # define TA(x) 425 #endif // PAGE_ALLOCATION_TRACING 426 427 428 #if PAGE_DAEMON_TRACING 429 430 namespace PageDaemonTracing { 431 432 class ActivatePage : public AbstractTraceEntry { 433 public: 434 ActivatePage(vm_page* page) 435 : 436 fCache(page->cache), 437 fPage(page) 438 { 439 Initialized(); 440 } 441 442 virtual void AddDump(TraceOutput& out) 443 { 444 out.Print("page activated: %p, cache: %p", fPage, fCache); 445 } 446 447 private: 448 VMCache* fCache; 449 vm_page* fPage; 450 }; 451 452 453 class DeactivatePage : public AbstractTraceEntry { 454 public: 455 DeactivatePage(vm_page* page) 456 : 457 fCache(page->cache), 458 fPage(page) 459 { 460 Initialized(); 461 } 462 463 virtual void AddDump(TraceOutput& out) 464 { 465 out.Print("page deactivated: %p, cache: %p", fPage, fCache); 466 } 467 468 private: 469 VMCache* fCache; 470 vm_page* fPage; 471 }; 472 473 474 class FreedPageSwap : public AbstractTraceEntry { 475 public: 476 FreedPageSwap(vm_page* page) 477 : 478 fCache(page->cache), 479 fPage(page) 480 { 481 Initialized(); 482 } 483 484 virtual void AddDump(TraceOutput& out) 485 { 486 out.Print("page swap freed: %p, cache: %p", fPage, fCache); 487 } 488 489 private: 490 VMCache* fCache; 491 vm_page* fPage; 492 }; 493 494 } // namespace PageDaemonTracing 495 496 # define TD(x) new(std::nothrow) PageDaemonTracing::x 497 498 #else 499 # define TD(x) 500 #endif // PAGE_DAEMON_TRACING 501 502 503 #if PAGE_WRITER_TRACING 504 505 namespace PageWriterTracing { 506 507 class WritePage : public AbstractTraceEntry { 508 public: 509 WritePage(vm_page* page) 510 : 511 fCache(page->Cache()), 512 fPage(page) 513 { 514 Initialized(); 515 } 516 517 virtual void AddDump(TraceOutput& out) 518 { 519 out.Print("page write: %p, cache: %p", fPage, fCache); 520 } 521 522 private: 523 VMCache* fCache; 524 vm_page* fPage; 525 }; 526 527 } // namespace PageWriterTracing 528 529 # define TPW(x) new(std::nothrow) PageWriterTracing::x 530 531 #else 532 # define TPW(x) 533 #endif // PAGE_WRITER_TRACING 534 535 536 #if PAGE_STATE_TRACING 537 538 namespace PageStateTracing { 539 540 class SetPageState : public AbstractTraceEntry { 541 public: 542 SetPageState(vm_page* page, uint8 newState) 543 : 544 fPage(page), 545 fOldState(page->State()), 546 fNewState(newState), 547 fBusy(page->busy), 548 fWired(page->WiredCount() > 0), 549 fMapped(!page->mappings.IsEmpty()), 550 fAccessed(page->accessed), 551 fModified(page->modified) 552 { 553 #if PAGE_STATE_TRACING_STACK_TRACE 554 fStackTrace = capture_tracing_stack_trace( 555 PAGE_STATE_TRACING_STACK_TRACE, 0, true); 556 // Don't capture userland stack trace to avoid potential 557 // deadlocks. 558 #endif 559 Initialized(); 560 } 561 562 #if PAGE_STATE_TRACING_STACK_TRACE 563 virtual void DumpStackTrace(TraceOutput& out) 564 { 565 out.PrintStackTrace(fStackTrace); 566 } 567 #endif 568 569 virtual void AddDump(TraceOutput& out) 570 { 571 out.Print("page set state: %p (%c%c%c%c%c): %s -> %s", fPage, 572 fBusy ? 'b' : '-', 573 fWired ? 'w' : '-', 574 fMapped ? 'm' : '-', 575 fAccessed ? 'a' : '-', 576 fModified ? 'm' : '-', 577 page_state_to_string(fOldState), 578 page_state_to_string(fNewState)); 579 } 580 581 private: 582 vm_page* fPage; 583 #if PAGE_STATE_TRACING_STACK_TRACE 584 tracing_stack_trace* fStackTrace; 585 #endif 586 uint8 fOldState; 587 uint8 fNewState; 588 bool fBusy : 1; 589 bool fWired : 1; 590 bool fMapped : 1; 591 bool fAccessed : 1; 592 bool fModified : 1; 593 }; 594 595 } // namespace PageStateTracing 596 597 # define TPS(x) new(std::nothrow) PageStateTracing::x 598 599 #else 600 # define TPS(x) 601 #endif // PAGE_STATE_TRACING 602 603 604 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 605 606 namespace BKernel { 607 608 class AllocationTrackingCallback { 609 public: 610 virtual ~AllocationTrackingCallback(); 611 612 virtual bool ProcessTrackingInfo( 613 AllocationTrackingInfo* info, 614 page_num_t pageNumber) = 0; 615 }; 616 617 } 618 619 using BKernel::AllocationTrackingCallback; 620 621 622 class AllocationCollectorCallback : public AllocationTrackingCallback { 623 public: 624 AllocationCollectorCallback(bool resetInfos) 625 : 626 fResetInfos(resetInfos) 627 { 628 } 629 630 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info, 631 page_num_t pageNumber) 632 { 633 if (!info->IsInitialized()) 634 return true; 635 636 addr_t caller = 0; 637 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry(); 638 639 if (traceEntry != NULL && info->IsTraceEntryValid()) { 640 caller = tracing_find_caller_in_stack_trace( 641 traceEntry->StackTrace(), kVMPageCodeAddressRange, 1); 642 } 643 644 caller_info* callerInfo = get_caller_info(caller); 645 if (callerInfo == NULL) { 646 kprintf("out of space for caller infos\n"); 647 return false; 648 } 649 650 callerInfo->count++; 651 652 if (fResetInfos) 653 info->Clear(); 654 655 return true; 656 } 657 658 private: 659 bool fResetInfos; 660 }; 661 662 663 class AllocationInfoPrinterCallback : public AllocationTrackingCallback { 664 public: 665 AllocationInfoPrinterCallback(bool printStackTrace, page_num_t pageFilter, 666 team_id teamFilter, thread_id threadFilter) 667 : 668 fPrintStackTrace(printStackTrace), 669 fPageFilter(pageFilter), 670 fTeamFilter(teamFilter), 671 fThreadFilter(threadFilter) 672 { 673 } 674 675 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info, 676 page_num_t pageNumber) 677 { 678 if (!info->IsInitialized()) 679 return true; 680 681 if (fPageFilter != 0 && pageNumber != fPageFilter) 682 return true; 683 684 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry(); 685 if (traceEntry != NULL && !info->IsTraceEntryValid()) 686 traceEntry = NULL; 687 688 if (traceEntry != NULL) { 689 if (fTeamFilter != -1 && traceEntry->TeamID() != fTeamFilter) 690 return true; 691 if (fThreadFilter != -1 && traceEntry->ThreadID() != fThreadFilter) 692 return true; 693 } else { 694 // we need the info if we have filters set 695 if (fTeamFilter != -1 || fThreadFilter != -1) 696 return true; 697 } 698 699 kprintf("page number %#" B_PRIxPHYSADDR, pageNumber); 700 701 if (traceEntry != NULL) { 702 kprintf(", team: %" B_PRId32 ", thread %" B_PRId32 703 ", time %" B_PRId64 "\n", traceEntry->TeamID(), 704 traceEntry->ThreadID(), traceEntry->Time()); 705 706 if (fPrintStackTrace) 707 tracing_print_stack_trace(traceEntry->StackTrace()); 708 } else 709 kprintf("\n"); 710 711 return true; 712 } 713 714 private: 715 bool fPrintStackTrace; 716 page_num_t fPageFilter; 717 team_id fTeamFilter; 718 thread_id fThreadFilter; 719 }; 720 721 722 class AllocationDetailPrinterCallback : public AllocationTrackingCallback { 723 public: 724 AllocationDetailPrinterCallback(addr_t caller) 725 : 726 fCaller(caller) 727 { 728 } 729 730 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info, 731 page_num_t pageNumber) 732 { 733 if (!info->IsInitialized()) 734 return true; 735 736 addr_t caller = 0; 737 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry(); 738 if (traceEntry != NULL && !info->IsTraceEntryValid()) 739 traceEntry = NULL; 740 741 if (traceEntry != NULL) { 742 caller = tracing_find_caller_in_stack_trace( 743 traceEntry->StackTrace(), kVMPageCodeAddressRange, 1); 744 } 745 746 if (caller != fCaller) 747 return true; 748 749 kprintf("page %#" B_PRIxPHYSADDR "\n", pageNumber); 750 if (traceEntry != NULL) 751 tracing_print_stack_trace(traceEntry->StackTrace()); 752 753 return true; 754 } 755 756 private: 757 addr_t fCaller; 758 }; 759 760 #endif // VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 761 762 763 static void 764 list_page(vm_page* page) 765 { 766 kprintf("0x%08" B_PRIxADDR " ", 767 (addr_t)(page->physical_page_number * B_PAGE_SIZE)); 768 switch (page->State()) { 769 case PAGE_STATE_ACTIVE: kprintf("A"); break; 770 case PAGE_STATE_INACTIVE: kprintf("I"); break; 771 case PAGE_STATE_MODIFIED: kprintf("M"); break; 772 case PAGE_STATE_CACHED: kprintf("C"); break; 773 case PAGE_STATE_FREE: kprintf("F"); break; 774 case PAGE_STATE_CLEAR: kprintf("L"); break; 775 case PAGE_STATE_WIRED: kprintf("W"); break; 776 case PAGE_STATE_UNUSED: kprintf("-"); break; 777 } 778 kprintf(" "); 779 if (page->busy) kprintf("B"); else kprintf("-"); 780 if (page->busy_writing) kprintf("W"); else kprintf("-"); 781 if (page->accessed) kprintf("A"); else kprintf("-"); 782 if (page->modified) kprintf("M"); else kprintf("-"); 783 if (page->unused) kprintf("U"); else kprintf("-"); 784 785 kprintf(" usage:%3u", page->usage_count); 786 kprintf(" wired:%5u", page->WiredCount()); 787 788 bool first = true; 789 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 790 vm_page_mapping* mapping; 791 while ((mapping = iterator.Next()) != NULL) { 792 if (first) { 793 kprintf(": "); 794 first = false; 795 } else 796 kprintf(", "); 797 798 kprintf("%" B_PRId32 " (%s)", mapping->area->id, mapping->area->name); 799 mapping = mapping->page_link.next; 800 } 801 } 802 803 804 static int 805 dump_page_list(int argc, char **argv) 806 { 807 kprintf("page table:\n"); 808 for (page_num_t i = 0; i < sNumPages; i++) { 809 if (sPages[i].State() != PAGE_STATE_UNUSED) { 810 list_page(&sPages[i]); 811 kprintf("\n"); 812 } 813 } 814 kprintf("end of page table\n"); 815 816 return 0; 817 } 818 819 820 static int 821 find_page(int argc, char **argv) 822 { 823 struct vm_page *page; 824 addr_t address; 825 int32 index = 1; 826 int i; 827 828 struct { 829 const char* name; 830 VMPageQueue* queue; 831 } pageQueueInfos[] = { 832 { "free", &sFreePageQueue }, 833 { "clear", &sClearPageQueue }, 834 { "modified", &sModifiedPageQueue }, 835 { "active", &sActivePageQueue }, 836 { "inactive", &sInactivePageQueue }, 837 { "cached", &sCachedPageQueue }, 838 { NULL, NULL } 839 }; 840 841 if (argc < 2 842 || strlen(argv[index]) <= 2 843 || argv[index][0] != '0' 844 || argv[index][1] != 'x') { 845 kprintf("usage: find_page <address>\n"); 846 return 0; 847 } 848 849 address = strtoul(argv[index], NULL, 0); 850 page = (vm_page*)address; 851 852 for (i = 0; pageQueueInfos[i].name; i++) { 853 VMPageQueue::Iterator it = pageQueueInfos[i].queue->GetIterator(); 854 while (vm_page* p = it.Next()) { 855 if (p == page) { 856 kprintf("found page %p in queue %p (%s)\n", page, 857 pageQueueInfos[i].queue, pageQueueInfos[i].name); 858 return 0; 859 } 860 } 861 } 862 863 kprintf("page %p isn't in any queue\n", page); 864 865 return 0; 866 } 867 868 869 const char * 870 page_state_to_string(int state) 871 { 872 switch(state) { 873 case PAGE_STATE_ACTIVE: 874 return "active"; 875 case PAGE_STATE_INACTIVE: 876 return "inactive"; 877 case PAGE_STATE_MODIFIED: 878 return "modified"; 879 case PAGE_STATE_CACHED: 880 return "cached"; 881 case PAGE_STATE_FREE: 882 return "free"; 883 case PAGE_STATE_CLEAR: 884 return "clear"; 885 case PAGE_STATE_WIRED: 886 return "wired"; 887 case PAGE_STATE_UNUSED: 888 return "unused"; 889 default: 890 return "unknown"; 891 } 892 } 893 894 895 static int 896 dump_page_long(int argc, char **argv) 897 { 898 bool addressIsPointer = true; 899 bool physical = false; 900 bool searchMappings = false; 901 int32 index = 1; 902 903 while (index < argc) { 904 if (argv[index][0] != '-') 905 break; 906 907 if (!strcmp(argv[index], "-p")) { 908 addressIsPointer = false; 909 physical = true; 910 } else if (!strcmp(argv[index], "-v")) { 911 addressIsPointer = false; 912 } else if (!strcmp(argv[index], "-m")) { 913 searchMappings = true; 914 } else { 915 print_debugger_command_usage(argv[0]); 916 return 0; 917 } 918 919 index++; 920 } 921 922 if (index + 1 != argc) { 923 print_debugger_command_usage(argv[0]); 924 return 0; 925 } 926 927 uint64 value; 928 if (!evaluate_debug_expression(argv[index], &value, false)) 929 return 0; 930 931 uint64 pageAddress = value; 932 struct vm_page* page; 933 934 if (addressIsPointer) { 935 page = (struct vm_page *)(addr_t)pageAddress; 936 } else { 937 if (!physical) { 938 VMAddressSpace *addressSpace = VMAddressSpace::Kernel(); 939 940 if (debug_get_debugged_thread()->team->address_space != NULL) 941 addressSpace = debug_get_debugged_thread()->team->address_space; 942 943 uint32 flags = 0; 944 phys_addr_t physicalAddress; 945 if (addressSpace->TranslationMap()->QueryInterrupt(pageAddress, 946 &physicalAddress, &flags) != B_OK 947 || (flags & PAGE_PRESENT) == 0) { 948 kprintf("Virtual address not mapped to a physical page in this " 949 "address space.\n"); 950 return 0; 951 } 952 pageAddress = physicalAddress; 953 } 954 955 page = vm_lookup_page(pageAddress / B_PAGE_SIZE); 956 } 957 958 kprintf("PAGE: %p\n", page); 959 kprintf("queue_next,prev: %p, %p\n", page->queue_link.next, 960 page->queue_link.previous); 961 kprintf("physical_number: %#" B_PRIxPHYSADDR "\n", 962 page->physical_page_number); 963 kprintf("cache: %p\n", page->Cache()); 964 kprintf("cache_offset: %" B_PRIuPHYSADDR "\n", page->cache_offset); 965 kprintf("cache_next: %p\n", page->cache_next); 966 kprintf("state: %s\n", page_state_to_string(page->State())); 967 kprintf("wired_count: %d\n", page->WiredCount()); 968 kprintf("usage_count: %d\n", page->usage_count); 969 kprintf("busy: %d\n", page->busy); 970 kprintf("busy_writing: %d\n", page->busy_writing); 971 kprintf("accessed: %d\n", page->accessed); 972 kprintf("modified: %d\n", page->modified); 973 #if DEBUG_PAGE_QUEUE 974 kprintf("queue: %p\n", page->queue); 975 #endif 976 #if DEBUG_PAGE_ACCESS 977 kprintf("accessor: %" B_PRId32 "\n", page->accessing_thread); 978 #endif 979 kprintf("area mappings:\n"); 980 981 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 982 vm_page_mapping *mapping; 983 while ((mapping = iterator.Next()) != NULL) { 984 kprintf(" %p (%" B_PRId32 ")\n", mapping->area, mapping->area->id); 985 mapping = mapping->page_link.next; 986 } 987 988 if (searchMappings) { 989 kprintf("all mappings:\n"); 990 VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst(); 991 while (addressSpace != NULL) { 992 size_t pageCount = addressSpace->Size() / B_PAGE_SIZE; 993 for (addr_t address = addressSpace->Base(); pageCount != 0; 994 address += B_PAGE_SIZE, pageCount--) { 995 phys_addr_t physicalAddress; 996 uint32 flags = 0; 997 if (addressSpace->TranslationMap()->QueryInterrupt(address, 998 &physicalAddress, &flags) == B_OK 999 && (flags & PAGE_PRESENT) != 0 1000 && physicalAddress / B_PAGE_SIZE 1001 == page->physical_page_number) { 1002 VMArea* area = addressSpace->LookupArea(address); 1003 kprintf(" aspace %" B_PRId32 ", area %" B_PRId32 ": %#" 1004 B_PRIxADDR " (%c%c%s%s)\n", addressSpace->ID(), 1005 area != NULL ? area->id : -1, address, 1006 (flags & B_KERNEL_READ_AREA) != 0 ? 'r' : '-', 1007 (flags & B_KERNEL_WRITE_AREA) != 0 ? 'w' : '-', 1008 (flags & PAGE_MODIFIED) != 0 ? " modified" : "", 1009 (flags & PAGE_ACCESSED) != 0 ? " accessed" : ""); 1010 } 1011 } 1012 addressSpace = VMAddressSpace::DebugNext(addressSpace); 1013 } 1014 } 1015 1016 set_debug_variable("_cache", (addr_t)page->Cache()); 1017 #if DEBUG_PAGE_ACCESS 1018 set_debug_variable("_accessor", page->accessing_thread); 1019 #endif 1020 1021 return 0; 1022 } 1023 1024 1025 static int 1026 dump_page_queue(int argc, char **argv) 1027 { 1028 struct VMPageQueue *queue; 1029 1030 if (argc < 2) { 1031 kprintf("usage: page_queue <address/name> [list]\n"); 1032 return 0; 1033 } 1034 1035 if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x') 1036 queue = (VMPageQueue*)strtoul(argv[1], NULL, 16); 1037 else if (!strcmp(argv[1], "free")) 1038 queue = &sFreePageQueue; 1039 else if (!strcmp(argv[1], "clear")) 1040 queue = &sClearPageQueue; 1041 else if (!strcmp(argv[1], "modified")) 1042 queue = &sModifiedPageQueue; 1043 else if (!strcmp(argv[1], "active")) 1044 queue = &sActivePageQueue; 1045 else if (!strcmp(argv[1], "inactive")) 1046 queue = &sInactivePageQueue; 1047 else if (!strcmp(argv[1], "cached")) 1048 queue = &sCachedPageQueue; 1049 else { 1050 kprintf("page_queue: unknown queue \"%s\".\n", argv[1]); 1051 return 0; 1052 } 1053 1054 kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %" 1055 B_PRIuPHYSADDR "\n", queue, queue->Head(), queue->Tail(), 1056 queue->Count()); 1057 1058 if (argc == 3) { 1059 struct vm_page *page = queue->Head(); 1060 1061 kprintf("page cache type state wired usage\n"); 1062 for (page_num_t i = 0; page; i++, page = queue->Next(page)) { 1063 kprintf("%p %p %-7s %8s %5d %5d\n", page, page->Cache(), 1064 vm_cache_type_to_string(page->Cache()->type), 1065 page_state_to_string(page->State()), 1066 page->WiredCount(), page->usage_count); 1067 } 1068 } 1069 return 0; 1070 } 1071 1072 1073 static int 1074 dump_page_stats(int argc, char **argv) 1075 { 1076 page_num_t swappableModified = 0; 1077 page_num_t swappableModifiedInactive = 0; 1078 1079 size_t counter[8]; 1080 size_t busyCounter[8]; 1081 memset(counter, 0, sizeof(counter)); 1082 memset(busyCounter, 0, sizeof(busyCounter)); 1083 1084 struct page_run { 1085 page_num_t start; 1086 page_num_t end; 1087 1088 page_num_t Length() const { return end - start; } 1089 }; 1090 1091 page_run currentFreeRun = { 0, 0 }; 1092 page_run currentCachedRun = { 0, 0 }; 1093 page_run longestFreeRun = { 0, 0 }; 1094 page_run longestCachedRun = { 0, 0 }; 1095 1096 for (page_num_t i = 0; i < sNumPages; i++) { 1097 if (sPages[i].State() > 7) { 1098 panic("page %" B_PRIuPHYSADDR " at %p has invalid state!\n", i, 1099 &sPages[i]); 1100 } 1101 1102 uint32 pageState = sPages[i].State(); 1103 1104 counter[pageState]++; 1105 if (sPages[i].busy) 1106 busyCounter[pageState]++; 1107 1108 if (pageState == PAGE_STATE_MODIFIED 1109 && sPages[i].Cache() != NULL 1110 && sPages[i].Cache()->temporary && sPages[i].WiredCount() == 0) { 1111 swappableModified++; 1112 if (sPages[i].usage_count == 0) 1113 swappableModifiedInactive++; 1114 } 1115 1116 // track free and cached pages runs 1117 if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) { 1118 currentFreeRun.end = i + 1; 1119 currentCachedRun.end = i + 1; 1120 } else { 1121 if (currentFreeRun.Length() > longestFreeRun.Length()) 1122 longestFreeRun = currentFreeRun; 1123 currentFreeRun.start = currentFreeRun.end = i + 1; 1124 1125 if (pageState == PAGE_STATE_CACHED) { 1126 currentCachedRun.end = i + 1; 1127 } else { 1128 if (currentCachedRun.Length() > longestCachedRun.Length()) 1129 longestCachedRun = currentCachedRun; 1130 currentCachedRun.start = currentCachedRun.end = i + 1; 1131 } 1132 } 1133 } 1134 1135 kprintf("page stats:\n"); 1136 kprintf("total: %" B_PRIuPHYSADDR "\n", sNumPages); 1137 1138 kprintf("active: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1139 counter[PAGE_STATE_ACTIVE], busyCounter[PAGE_STATE_ACTIVE]); 1140 kprintf("inactive: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1141 counter[PAGE_STATE_INACTIVE], busyCounter[PAGE_STATE_INACTIVE]); 1142 kprintf("cached: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1143 counter[PAGE_STATE_CACHED], busyCounter[PAGE_STATE_CACHED]); 1144 kprintf("unused: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1145 counter[PAGE_STATE_UNUSED], busyCounter[PAGE_STATE_UNUSED]); 1146 kprintf("wired: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1147 counter[PAGE_STATE_WIRED], busyCounter[PAGE_STATE_WIRED]); 1148 kprintf("modified: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 1149 counter[PAGE_STATE_MODIFIED], busyCounter[PAGE_STATE_MODIFIED]); 1150 kprintf("free: %" B_PRIuSIZE "\n", counter[PAGE_STATE_FREE]); 1151 kprintf("clear: %" B_PRIuSIZE "\n", counter[PAGE_STATE_CLEAR]); 1152 1153 kprintf("unreserved free pages: %" B_PRId32 "\n", sUnreservedFreePages); 1154 kprintf("unsatisfied page reservations: %" B_PRId32 "\n", 1155 sUnsatisfiedPageReservations); 1156 kprintf("mapped pages: %" B_PRId32 "\n", gMappedPagesCount); 1157 kprintf("longest free pages run: %" B_PRIuPHYSADDR " pages (at %" 1158 B_PRIuPHYSADDR ")\n", longestFreeRun.Length(), 1159 sPages[longestFreeRun.start].physical_page_number); 1160 kprintf("longest free/cached pages run: %" B_PRIuPHYSADDR " pages (at %" 1161 B_PRIuPHYSADDR ")\n", longestCachedRun.Length(), 1162 sPages[longestCachedRun.start].physical_page_number); 1163 1164 kprintf("waiting threads:\n"); 1165 for (PageReservationWaiterList::Iterator it 1166 = sPageReservationWaiters.GetIterator(); 1167 PageReservationWaiter* waiter = it.Next();) { 1168 kprintf(" %6" B_PRId32 ": missing: %6" B_PRIu32 1169 ", don't touch: %6" B_PRIu32 "\n", waiter->thread->id, 1170 waiter->missing, waiter->dontTouch); 1171 } 1172 1173 kprintf("\nfree queue: %p, count = %" B_PRIuPHYSADDR "\n", &sFreePageQueue, 1174 sFreePageQueue.Count()); 1175 kprintf("clear queue: %p, count = %" B_PRIuPHYSADDR "\n", &sClearPageQueue, 1176 sClearPageQueue.Count()); 1177 kprintf("modified queue: %p, count = %" B_PRIuPHYSADDR " (%" B_PRId32 1178 " temporary, %" B_PRIuPHYSADDR " swappable, " "inactive: %" 1179 B_PRIuPHYSADDR ")\n", &sModifiedPageQueue, sModifiedPageQueue.Count(), 1180 sModifiedTemporaryPages, swappableModified, swappableModifiedInactive); 1181 kprintf("active queue: %p, count = %" B_PRIuPHYSADDR "\n", 1182 &sActivePageQueue, sActivePageQueue.Count()); 1183 kprintf("inactive queue: %p, count = %" B_PRIuPHYSADDR "\n", 1184 &sInactivePageQueue, sInactivePageQueue.Count()); 1185 kprintf("cached queue: %p, count = %" B_PRIuPHYSADDR "\n", 1186 &sCachedPageQueue, sCachedPageQueue.Count()); 1187 return 0; 1188 } 1189 1190 1191 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 1192 1193 static caller_info* 1194 get_caller_info(addr_t caller) 1195 { 1196 // find the caller info 1197 for (int32 i = 0; i < sCallerInfoCount; i++) { 1198 if (caller == sCallerInfoTable[i].caller) 1199 return &sCallerInfoTable[i]; 1200 } 1201 1202 // not found, add a new entry, if there are free slots 1203 if (sCallerInfoCount >= kCallerInfoTableSize) 1204 return NULL; 1205 1206 caller_info* info = &sCallerInfoTable[sCallerInfoCount++]; 1207 info->caller = caller; 1208 info->count = 0; 1209 1210 return info; 1211 } 1212 1213 1214 static int 1215 caller_info_compare_count(const void* _a, const void* _b) 1216 { 1217 const caller_info* a = (const caller_info*)_a; 1218 const caller_info* b = (const caller_info*)_b; 1219 return (int)(b->count - a->count); 1220 } 1221 1222 1223 static int 1224 dump_page_allocations_per_caller(int argc, char** argv) 1225 { 1226 bool resetAllocationInfos = false; 1227 bool printDetails = false; 1228 addr_t caller = 0; 1229 1230 for (int32 i = 1; i < argc; i++) { 1231 if (strcmp(argv[i], "-d") == 0) { 1232 uint64 callerAddress; 1233 if (++i >= argc 1234 || !evaluate_debug_expression(argv[i], &callerAddress, true)) { 1235 print_debugger_command_usage(argv[0]); 1236 return 0; 1237 } 1238 1239 caller = callerAddress; 1240 printDetails = true; 1241 } else if (strcmp(argv[i], "-r") == 0) { 1242 resetAllocationInfos = true; 1243 } else { 1244 print_debugger_command_usage(argv[0]); 1245 return 0; 1246 } 1247 } 1248 1249 sCallerInfoCount = 0; 1250 1251 AllocationCollectorCallback collectorCallback(resetAllocationInfos); 1252 AllocationDetailPrinterCallback detailsCallback(caller); 1253 AllocationTrackingCallback& callback = printDetails 1254 ? (AllocationTrackingCallback&)detailsCallback 1255 : (AllocationTrackingCallback&)collectorCallback; 1256 1257 for (page_num_t i = 0; i < sNumPages; i++) 1258 callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i); 1259 1260 if (printDetails) 1261 return 0; 1262 1263 // sort the array 1264 qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info), 1265 &caller_info_compare_count); 1266 1267 kprintf("%" B_PRId32 " different callers\n\n", sCallerInfoCount); 1268 1269 size_t totalAllocationCount = 0; 1270 1271 kprintf(" count caller\n"); 1272 kprintf("----------------------------------\n"); 1273 for (int32 i = 0; i < sCallerInfoCount; i++) { 1274 caller_info& info = sCallerInfoTable[i]; 1275 kprintf("%10" B_PRIuSIZE " %p", info.count, (void*)info.caller); 1276 1277 const char* symbol; 1278 const char* imageName; 1279 bool exactMatch; 1280 addr_t baseAddress; 1281 1282 if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol, 1283 &imageName, &exactMatch) == B_OK) { 1284 kprintf(" %s + %#" B_PRIxADDR " (%s)%s\n", symbol, 1285 info.caller - baseAddress, imageName, 1286 exactMatch ? "" : " (nearest)"); 1287 } else 1288 kprintf("\n"); 1289 1290 totalAllocationCount += info.count; 1291 } 1292 1293 kprintf("\ntotal page allocations: %" B_PRIuSIZE "\n", 1294 totalAllocationCount); 1295 1296 return 0; 1297 } 1298 1299 1300 static int 1301 dump_page_allocation_infos(int argc, char** argv) 1302 { 1303 page_num_t pageFilter = 0; 1304 team_id teamFilter = -1; 1305 thread_id threadFilter = -1; 1306 bool printStackTraces = false; 1307 1308 for (int32 i = 1; i < argc; i++) { 1309 if (strcmp(argv[i], "--stacktrace") == 0) 1310 printStackTraces = true; 1311 else if (strcmp(argv[i], "-p") == 0) { 1312 uint64 pageNumber; 1313 if (++i >= argc 1314 || !evaluate_debug_expression(argv[i], &pageNumber, true)) { 1315 print_debugger_command_usage(argv[0]); 1316 return 0; 1317 } 1318 1319 pageFilter = pageNumber; 1320 } else if (strcmp(argv[i], "--team") == 0) { 1321 uint64 team; 1322 if (++i >= argc 1323 || !evaluate_debug_expression(argv[i], &team, true)) { 1324 print_debugger_command_usage(argv[0]); 1325 return 0; 1326 } 1327 1328 teamFilter = team; 1329 } else if (strcmp(argv[i], "--thread") == 0) { 1330 uint64 thread; 1331 if (++i >= argc 1332 || !evaluate_debug_expression(argv[i], &thread, true)) { 1333 print_debugger_command_usage(argv[0]); 1334 return 0; 1335 } 1336 1337 threadFilter = thread; 1338 } else { 1339 print_debugger_command_usage(argv[0]); 1340 return 0; 1341 } 1342 } 1343 1344 AllocationInfoPrinterCallback callback(printStackTraces, pageFilter, 1345 teamFilter, threadFilter); 1346 1347 for (page_num_t i = 0; i < sNumPages; i++) 1348 callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i); 1349 1350 return 0; 1351 } 1352 1353 #endif // VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 1354 1355 1356 #ifdef TRACK_PAGE_USAGE_STATS 1357 1358 static void 1359 track_page_usage(vm_page* page) 1360 { 1361 if (page->WiredCount() == 0) { 1362 sNextPageUsage[(int32)page->usage_count + 128]++; 1363 sNextPageUsagePageCount++; 1364 } 1365 } 1366 1367 1368 static void 1369 update_page_usage_stats() 1370 { 1371 std::swap(sPageUsage, sNextPageUsage); 1372 sPageUsagePageCount = sNextPageUsagePageCount; 1373 1374 memset(sNextPageUsage, 0, sizeof(page_num_t) * 256); 1375 sNextPageUsagePageCount = 0; 1376 1377 // compute average 1378 if (sPageUsagePageCount > 0) { 1379 int64 sum = 0; 1380 for (int32 i = 0; i < 256; i++) 1381 sum += (int64)sPageUsage[i] * (i - 128); 1382 1383 TRACE_DAEMON("average page usage: %f (%lu pages)\n", 1384 (float)sum / sPageUsagePageCount, sPageUsagePageCount); 1385 } 1386 } 1387 1388 1389 static int 1390 dump_page_usage_stats(int argc, char** argv) 1391 { 1392 kprintf("distribution of page usage counts (%lu pages):", 1393 sPageUsagePageCount); 1394 1395 int64 sum = 0; 1396 for (int32 i = 0; i < 256; i++) { 1397 if (i % 8 == 0) 1398 kprintf("\n%4ld:", i - 128); 1399 1400 int64 count = sPageUsage[i]; 1401 sum += count * (i - 128); 1402 1403 kprintf(" %9llu", count); 1404 } 1405 1406 kprintf("\n\n"); 1407 1408 kprintf("average usage count: %f\n", 1409 sPageUsagePageCount > 0 ? (float)sum / sPageUsagePageCount : 0); 1410 1411 return 0; 1412 } 1413 1414 #endif // TRACK_PAGE_USAGE_STATS 1415 1416 1417 // #pragma mark - vm_page 1418 1419 1420 inline void 1421 vm_page::InitState(uint8 newState) 1422 { 1423 state = newState; 1424 } 1425 1426 1427 inline void 1428 vm_page::SetState(uint8 newState) 1429 { 1430 TPS(SetPageState(this, newState)); 1431 1432 state = newState; 1433 } 1434 1435 1436 // #pragma mark - 1437 1438 1439 static void 1440 get_page_stats(page_stats& _pageStats) 1441 { 1442 _pageStats.totalFreePages = sUnreservedFreePages; 1443 _pageStats.cachedPages = sCachedPageQueue.Count(); 1444 _pageStats.unsatisfiedReservations = sUnsatisfiedPageReservations; 1445 // TODO: We don't get an actual snapshot here! 1446 } 1447 1448 1449 static bool 1450 do_active_paging(const page_stats& pageStats) 1451 { 1452 return pageStats.totalFreePages + pageStats.cachedPages 1453 < pageStats.unsatisfiedReservations 1454 + (int32)sFreeOrCachedPagesTarget; 1455 } 1456 1457 1458 /*! Reserves as many pages as possible from \c sUnreservedFreePages up to 1459 \a count. Doesn't touch the last \a dontTouch pages of 1460 \c sUnreservedFreePages, though. 1461 \return The number of actually reserved pages. 1462 */ 1463 static uint32 1464 reserve_some_pages(uint32 count, uint32 dontTouch) 1465 { 1466 while (true) { 1467 int32 freePages = atomic_get(&sUnreservedFreePages); 1468 if (freePages <= (int32)dontTouch) 1469 return 0; 1470 1471 int32 toReserve = std::min(count, freePages - dontTouch); 1472 if (atomic_test_and_set(&sUnreservedFreePages, 1473 freePages - toReserve, freePages) 1474 == freePages) { 1475 return toReserve; 1476 } 1477 1478 // the count changed in the meantime -- retry 1479 } 1480 } 1481 1482 1483 static void 1484 wake_up_page_reservation_waiters() 1485 { 1486 MutexLocker pageDeficitLocker(sPageDeficitLock); 1487 1488 // TODO: If this is a low priority thread, we might want to disable 1489 // interrupts or otherwise ensure that we aren't unscheduled. Otherwise 1490 // high priority threads wait be kept waiting while a medium priority thread 1491 // prevents us from running. 1492 1493 while (PageReservationWaiter* waiter = sPageReservationWaiters.Head()) { 1494 int32 reserved = reserve_some_pages(waiter->missing, 1495 waiter->dontTouch); 1496 if (reserved == 0) 1497 return; 1498 1499 atomic_add(&sUnsatisfiedPageReservations, -reserved); 1500 waiter->missing -= reserved; 1501 1502 if (waiter->missing > 0) 1503 return; 1504 1505 sPageReservationWaiters.Remove(waiter); 1506 1507 thread_unblock(waiter->thread, B_OK); 1508 } 1509 } 1510 1511 1512 static inline void 1513 unreserve_pages(uint32 count) 1514 { 1515 atomic_add(&sUnreservedFreePages, count); 1516 if (atomic_get(&sUnsatisfiedPageReservations) != 0) 1517 wake_up_page_reservation_waiters(); 1518 } 1519 1520 1521 static void 1522 free_page(vm_page* page, bool clear) 1523 { 1524 DEBUG_PAGE_ACCESS_CHECK(page); 1525 1526 PAGE_ASSERT(page, !page->IsMapped()); 1527 1528 VMPageQueue* fromQueue; 1529 1530 switch (page->State()) { 1531 case PAGE_STATE_ACTIVE: 1532 fromQueue = &sActivePageQueue; 1533 break; 1534 case PAGE_STATE_INACTIVE: 1535 fromQueue = &sInactivePageQueue; 1536 break; 1537 case PAGE_STATE_MODIFIED: 1538 fromQueue = &sModifiedPageQueue; 1539 break; 1540 case PAGE_STATE_CACHED: 1541 fromQueue = &sCachedPageQueue; 1542 break; 1543 case PAGE_STATE_FREE: 1544 case PAGE_STATE_CLEAR: 1545 panic("free_page(): page %p already free", page); 1546 return; 1547 case PAGE_STATE_WIRED: 1548 case PAGE_STATE_UNUSED: 1549 fromQueue = NULL; 1550 break; 1551 default: 1552 panic("free_page(): page %p in invalid state %d", 1553 page, page->State()); 1554 return; 1555 } 1556 1557 if (page->CacheRef() != NULL) 1558 panic("to be freed page %p has cache", page); 1559 if (page->IsMapped()) 1560 panic("to be freed page %p has mappings", page); 1561 1562 if (fromQueue != NULL) 1563 fromQueue->RemoveUnlocked(page); 1564 1565 TA(FreePage(page->physical_page_number)); 1566 1567 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 1568 page->allocation_tracking_info.Clear(); 1569 #endif 1570 1571 ReadLocker locker(sFreePageQueuesLock); 1572 1573 DEBUG_PAGE_ACCESS_END(page); 1574 1575 if (clear) { 1576 page->SetState(PAGE_STATE_CLEAR); 1577 sClearPageQueue.PrependUnlocked(page); 1578 } else { 1579 page->SetState(PAGE_STATE_FREE); 1580 sFreePageQueue.PrependUnlocked(page); 1581 sFreePageCondition.NotifyAll(); 1582 } 1583 1584 locker.Unlock(); 1585 } 1586 1587 1588 /*! The caller must make sure that no-one else tries to change the page's state 1589 while the function is called. If the page has a cache, this can be done by 1590 locking the cache. 1591 */ 1592 static void 1593 set_page_state(vm_page *page, int pageState) 1594 { 1595 DEBUG_PAGE_ACCESS_CHECK(page); 1596 1597 if (pageState == page->State()) 1598 return; 1599 1600 VMPageQueue* fromQueue; 1601 1602 switch (page->State()) { 1603 case PAGE_STATE_ACTIVE: 1604 fromQueue = &sActivePageQueue; 1605 break; 1606 case PAGE_STATE_INACTIVE: 1607 fromQueue = &sInactivePageQueue; 1608 break; 1609 case PAGE_STATE_MODIFIED: 1610 fromQueue = &sModifiedPageQueue; 1611 break; 1612 case PAGE_STATE_CACHED: 1613 fromQueue = &sCachedPageQueue; 1614 break; 1615 case PAGE_STATE_FREE: 1616 case PAGE_STATE_CLEAR: 1617 panic("set_page_state(): page %p is free/clear", page); 1618 return; 1619 case PAGE_STATE_WIRED: 1620 case PAGE_STATE_UNUSED: 1621 fromQueue = NULL; 1622 break; 1623 default: 1624 panic("set_page_state(): page %p in invalid state %d", 1625 page, page->State()); 1626 return; 1627 } 1628 1629 VMPageQueue* toQueue; 1630 1631 switch (pageState) { 1632 case PAGE_STATE_ACTIVE: 1633 toQueue = &sActivePageQueue; 1634 break; 1635 case PAGE_STATE_INACTIVE: 1636 toQueue = &sInactivePageQueue; 1637 break; 1638 case PAGE_STATE_MODIFIED: 1639 toQueue = &sModifiedPageQueue; 1640 break; 1641 case PAGE_STATE_CACHED: 1642 PAGE_ASSERT(page, !page->IsMapped()); 1643 PAGE_ASSERT(page, !page->modified); 1644 toQueue = &sCachedPageQueue; 1645 break; 1646 case PAGE_STATE_FREE: 1647 case PAGE_STATE_CLEAR: 1648 panic("set_page_state(): target state is free/clear"); 1649 return; 1650 case PAGE_STATE_WIRED: 1651 case PAGE_STATE_UNUSED: 1652 toQueue = NULL; 1653 break; 1654 default: 1655 panic("set_page_state(): invalid target state %d", pageState); 1656 return; 1657 } 1658 1659 VMCache* cache = page->Cache(); 1660 if (cache != NULL && cache->temporary) { 1661 if (pageState == PAGE_STATE_MODIFIED) 1662 atomic_add(&sModifiedTemporaryPages, 1); 1663 else if (page->State() == PAGE_STATE_MODIFIED) 1664 atomic_add(&sModifiedTemporaryPages, -1); 1665 } 1666 1667 // move the page 1668 if (toQueue == fromQueue) { 1669 // Note: Theoretically we are required to lock when changing the page 1670 // state, even if we don't change the queue. We actually don't have to 1671 // do this, though, since only for the active queue there are different 1672 // page states and active pages have a cache that must be locked at 1673 // this point. So we rely on the fact that everyone must lock the cache 1674 // before trying to change/interpret the page state. 1675 PAGE_ASSERT(page, cache != NULL); 1676 cache->AssertLocked(); 1677 page->SetState(pageState); 1678 } else { 1679 if (fromQueue != NULL) 1680 fromQueue->RemoveUnlocked(page); 1681 1682 page->SetState(pageState); 1683 1684 if (toQueue != NULL) 1685 toQueue->AppendUnlocked(page); 1686 } 1687 } 1688 1689 1690 /*! Moves a previously modified page into a now appropriate queue. 1691 The page queues must not be locked. 1692 */ 1693 static void 1694 move_page_to_appropriate_queue(vm_page *page) 1695 { 1696 DEBUG_PAGE_ACCESS_CHECK(page); 1697 1698 // Note, this logic must be in sync with what the page daemon does. 1699 int32 state; 1700 if (page->IsMapped()) 1701 state = PAGE_STATE_ACTIVE; 1702 else if (page->modified) 1703 state = PAGE_STATE_MODIFIED; 1704 else 1705 state = PAGE_STATE_CACHED; 1706 1707 // TODO: If free + cached pages are low, we might directly want to free the 1708 // page. 1709 set_page_state(page, state); 1710 } 1711 1712 1713 static void 1714 clear_page(struct vm_page *page) 1715 { 1716 vm_memset_physical(page->physical_page_number << PAGE_SHIFT, 0, 1717 B_PAGE_SIZE); 1718 } 1719 1720 1721 static status_t 1722 mark_page_range_in_use(page_num_t startPage, page_num_t length, bool wired) 1723 { 1724 TRACE(("mark_page_range_in_use: start %#" B_PRIxPHYSADDR ", len %#" 1725 B_PRIxPHYSADDR "\n", startPage, length)); 1726 1727 if (sPhysicalPageOffset > startPage) { 1728 dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR 1729 "): start page is before free list\n", startPage, length); 1730 if (sPhysicalPageOffset - startPage >= length) 1731 return B_OK; 1732 length -= sPhysicalPageOffset - startPage; 1733 startPage = sPhysicalPageOffset; 1734 } 1735 1736 startPage -= sPhysicalPageOffset; 1737 1738 if (startPage + length > sNumPages) { 1739 dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR 1740 "): range would extend past free list\n", startPage, length); 1741 if (startPage >= sNumPages) 1742 return B_OK; 1743 length = sNumPages - startPage; 1744 } 1745 1746 WriteLocker locker(sFreePageQueuesLock); 1747 1748 for (page_num_t i = 0; i < length; i++) { 1749 vm_page *page = &sPages[startPage + i]; 1750 switch (page->State()) { 1751 case PAGE_STATE_FREE: 1752 case PAGE_STATE_CLEAR: 1753 { 1754 // TODO: This violates the page reservation policy, since we remove pages from 1755 // the free/clear queues without having reserved them before. This should happen 1756 // in the early boot process only, though. 1757 DEBUG_PAGE_ACCESS_START(page); 1758 VMPageQueue& queue = page->State() == PAGE_STATE_FREE 1759 ? sFreePageQueue : sClearPageQueue; 1760 queue.Remove(page); 1761 page->SetState(wired ? PAGE_STATE_WIRED : PAGE_STATE_UNUSED); 1762 page->busy = false; 1763 atomic_add(&sUnreservedFreePages, -1); 1764 DEBUG_PAGE_ACCESS_END(page); 1765 break; 1766 } 1767 case PAGE_STATE_WIRED: 1768 case PAGE_STATE_UNUSED: 1769 break; 1770 case PAGE_STATE_ACTIVE: 1771 case PAGE_STATE_INACTIVE: 1772 case PAGE_STATE_MODIFIED: 1773 case PAGE_STATE_CACHED: 1774 default: 1775 // uh 1776 dprintf("mark_page_range_in_use: page %#" B_PRIxPHYSADDR 1777 " in non-free state %d!\n", startPage + i, page->State()); 1778 break; 1779 } 1780 } 1781 1782 return B_OK; 1783 } 1784 1785 1786 /*! 1787 This is a background thread that wakes up when its condition is notified 1788 and moves some pages from the free queue over to the clear queue. 1789 Given enough time, it will clear out all pages from the free queue - we 1790 could probably slow it down after having reached a certain threshold. 1791 */ 1792 static int32 1793 page_scrubber(void *unused) 1794 { 1795 (void)(unused); 1796 1797 TRACE(("page_scrubber starting...\n")); 1798 1799 ConditionVariableEntry entry; 1800 for (;;) { 1801 while (sFreePageQueue.Count() == 0 1802 || atomic_get(&sUnreservedFreePages) 1803 < (int32)sFreePagesTarget) { 1804 sFreePageCondition.Add(&entry); 1805 entry.Wait(); 1806 } 1807 1808 // Since we temporarily remove pages from the free pages reserve, 1809 // we must make sure we don't cause a violation of the page 1810 // reservation warranty. The following is usually stricter than 1811 // necessary, because we don't have information on how many of the 1812 // reserved pages have already been allocated. 1813 int32 reserved = reserve_some_pages(SCRUB_SIZE, 1814 kPageReserveForPriority[VM_PRIORITY_USER]); 1815 if (reserved == 0) 1816 continue; 1817 1818 // get some pages from the free queue, mostly sorted 1819 ReadLocker locker(sFreePageQueuesLock); 1820 1821 vm_page *page[SCRUB_SIZE]; 1822 int32 scrubCount = 0; 1823 for (int32 i = 0; i < reserved; i++) { 1824 page[i] = sFreePageQueue.RemoveHeadUnlocked(); 1825 if (page[i] == NULL) 1826 break; 1827 1828 DEBUG_PAGE_ACCESS_START(page[i]); 1829 1830 page[i]->SetState(PAGE_STATE_ACTIVE); 1831 page[i]->busy = true; 1832 scrubCount++; 1833 } 1834 1835 locker.Unlock(); 1836 1837 if (scrubCount == 0) { 1838 unreserve_pages(reserved); 1839 continue; 1840 } 1841 1842 TA(ScrubbingPages(scrubCount)); 1843 1844 // clear them 1845 for (int32 i = 0; i < scrubCount; i++) 1846 clear_page(page[i]); 1847 1848 locker.Lock(); 1849 1850 // and put them into the clear queue 1851 // process the array reversed when prepending to preserve sequential order 1852 for (int32 i = scrubCount - 1; i >= 0; i--) { 1853 page[i]->SetState(PAGE_STATE_CLEAR); 1854 page[i]->busy = false; 1855 DEBUG_PAGE_ACCESS_END(page[i]); 1856 sClearPageQueue.PrependUnlocked(page[i]); 1857 } 1858 1859 locker.Unlock(); 1860 1861 unreserve_pages(reserved); 1862 1863 TA(ScrubbedPages(scrubCount)); 1864 1865 // wait at least 100ms between runs 1866 snooze(100 * 1000); 1867 } 1868 1869 return 0; 1870 } 1871 1872 1873 static void 1874 init_page_marker(vm_page &marker) 1875 { 1876 marker.SetCacheRef(NULL); 1877 marker.InitState(PAGE_STATE_UNUSED); 1878 marker.busy = true; 1879 #if DEBUG_PAGE_QUEUE 1880 marker.queue = NULL; 1881 #endif 1882 #if DEBUG_PAGE_ACCESS 1883 marker.accessing_thread = thread_get_current_thread_id(); 1884 #endif 1885 } 1886 1887 1888 static void 1889 remove_page_marker(struct vm_page &marker) 1890 { 1891 DEBUG_PAGE_ACCESS_CHECK(&marker); 1892 1893 if (marker.State() < PAGE_STATE_FIRST_UNQUEUED) 1894 sPageQueues[marker.State()].RemoveUnlocked(&marker); 1895 1896 marker.SetState(PAGE_STATE_UNUSED); 1897 } 1898 1899 1900 static vm_page* 1901 next_modified_page(page_num_t& maxPagesToSee) 1902 { 1903 InterruptsSpinLocker locker(sModifiedPageQueue.GetLock()); 1904 1905 while (maxPagesToSee > 0) { 1906 vm_page* page = sModifiedPageQueue.Head(); 1907 if (page == NULL) 1908 return NULL; 1909 1910 sModifiedPageQueue.Requeue(page, true); 1911 1912 maxPagesToSee--; 1913 1914 if (!page->busy) 1915 return page; 1916 } 1917 1918 return NULL; 1919 } 1920 1921 1922 // #pragma mark - 1923 1924 1925 class PageWriteTransfer; 1926 class PageWriteWrapper; 1927 1928 1929 class PageWriterRun { 1930 public: 1931 status_t Init(uint32 maxPages); 1932 1933 void PrepareNextRun(); 1934 void AddPage(vm_page* page); 1935 uint32 Go(); 1936 1937 void PageWritten(PageWriteTransfer* transfer, status_t status, 1938 bool partialTransfer, size_t bytesTransferred); 1939 1940 private: 1941 uint32 fMaxPages; 1942 uint32 fWrapperCount; 1943 uint32 fTransferCount; 1944 int32 fPendingTransfers; 1945 PageWriteWrapper* fWrappers; 1946 PageWriteTransfer* fTransfers; 1947 ConditionVariable fAllFinishedCondition; 1948 }; 1949 1950 1951 class PageWriteTransfer : public AsyncIOCallback { 1952 public: 1953 void SetTo(PageWriterRun* run, vm_page* page, int32 maxPages); 1954 bool AddPage(vm_page* page); 1955 1956 status_t Schedule(uint32 flags); 1957 1958 void SetStatus(status_t status, size_t transferred); 1959 1960 status_t Status() const { return fStatus; } 1961 struct VMCache* Cache() const { return fCache; } 1962 uint32 PageCount() const { return fPageCount; } 1963 1964 virtual void IOFinished(status_t status, bool partialTransfer, 1965 generic_size_t bytesTransferred); 1966 private: 1967 PageWriterRun* fRun; 1968 struct VMCache* fCache; 1969 off_t fOffset; 1970 uint32 fPageCount; 1971 int32 fMaxPages; 1972 status_t fStatus; 1973 uint32 fVecCount; 1974 generic_io_vec fVecs[32]; // TODO: make dynamic/configurable 1975 }; 1976 1977 1978 class PageWriteWrapper { 1979 public: 1980 PageWriteWrapper(); 1981 ~PageWriteWrapper(); 1982 void SetTo(vm_page* page); 1983 bool Done(status_t result); 1984 1985 private: 1986 vm_page* fPage; 1987 struct VMCache* fCache; 1988 bool fIsActive; 1989 }; 1990 1991 1992 PageWriteWrapper::PageWriteWrapper() 1993 : 1994 fIsActive(false) 1995 { 1996 } 1997 1998 1999 PageWriteWrapper::~PageWriteWrapper() 2000 { 2001 if (fIsActive) 2002 panic("page write wrapper going out of scope but isn't completed"); 2003 } 2004 2005 2006 /*! The page's cache must be locked. 2007 */ 2008 void 2009 PageWriteWrapper::SetTo(vm_page* page) 2010 { 2011 DEBUG_PAGE_ACCESS_CHECK(page); 2012 2013 if (page->busy) 2014 panic("setting page write wrapper to busy page"); 2015 2016 if (fIsActive) 2017 panic("re-setting page write wrapper that isn't completed"); 2018 2019 fPage = page; 2020 fCache = page->Cache(); 2021 fIsActive = true; 2022 2023 fPage->busy = true; 2024 fPage->busy_writing = true; 2025 2026 // We have a modified page -- however, while we're writing it back, 2027 // the page might still be mapped. In order not to lose any changes to the 2028 // page, we mark it clean before actually writing it back; if 2029 // writing the page fails for some reason, we'll just keep it in the 2030 // modified page list, but that should happen only rarely. 2031 2032 // If the page is changed after we cleared the dirty flag, but before we 2033 // had the chance to write it back, then we'll write it again later -- that 2034 // will probably not happen that often, though. 2035 2036 vm_clear_map_flags(fPage, PAGE_MODIFIED); 2037 } 2038 2039 2040 /*! The page's cache must be locked. 2041 The page queues must not be locked. 2042 \return \c true if the page was written successfully respectively could be 2043 handled somehow, \c false otherwise. 2044 */ 2045 bool 2046 PageWriteWrapper::Done(status_t result) 2047 { 2048 if (!fIsActive) 2049 panic("completing page write wrapper that is not active"); 2050 2051 DEBUG_PAGE_ACCESS_START(fPage); 2052 2053 fPage->busy = false; 2054 // Set unbusy and notify later by hand, since we might free the page. 2055 2056 bool success = true; 2057 2058 if (result == B_OK) { 2059 // put it into the active/inactive queue 2060 move_page_to_appropriate_queue(fPage); 2061 fPage->busy_writing = false; 2062 DEBUG_PAGE_ACCESS_END(fPage); 2063 } else { 2064 // Writing the page failed. One reason would be that the cache has been 2065 // shrunk and the page does no longer belong to the file. Otherwise the 2066 // actual I/O failed, in which case we'll simply keep the page modified. 2067 2068 if (!fPage->busy_writing) { 2069 // The busy_writing flag was cleared. That means the cache has been 2070 // shrunk while we were trying to write the page and we have to free 2071 // it now. 2072 vm_remove_all_page_mappings(fPage); 2073 // TODO: Unmapping should already happen when resizing the cache! 2074 fCache->RemovePage(fPage); 2075 free_page(fPage, false); 2076 unreserve_pages(1); 2077 } else { 2078 // Writing the page failed -- mark the page modified and move it to 2079 // an appropriate queue other than the modified queue, so we don't 2080 // keep trying to write it over and over again. We keep 2081 // non-temporary pages in the modified queue, though, so they don't 2082 // get lost in the inactive queue. 2083 dprintf("PageWriteWrapper: Failed to write page %p: %s\n", fPage, 2084 strerror(result)); 2085 2086 fPage->modified = true; 2087 if (!fCache->temporary) 2088 set_page_state(fPage, PAGE_STATE_MODIFIED); 2089 else if (fPage->IsMapped()) 2090 set_page_state(fPage, PAGE_STATE_ACTIVE); 2091 else 2092 set_page_state(fPage, PAGE_STATE_INACTIVE); 2093 2094 fPage->busy_writing = false; 2095 DEBUG_PAGE_ACCESS_END(fPage); 2096 2097 success = false; 2098 } 2099 } 2100 2101 fCache->NotifyPageEvents(fPage, PAGE_EVENT_NOT_BUSY); 2102 fIsActive = false; 2103 2104 return success; 2105 } 2106 2107 2108 /*! The page's cache must be locked. 2109 */ 2110 void 2111 PageWriteTransfer::SetTo(PageWriterRun* run, vm_page* page, int32 maxPages) 2112 { 2113 fRun = run; 2114 fCache = page->Cache(); 2115 fOffset = page->cache_offset; 2116 fPageCount = 1; 2117 fMaxPages = maxPages; 2118 fStatus = B_OK; 2119 2120 fVecs[0].base = (phys_addr_t)page->physical_page_number << PAGE_SHIFT; 2121 fVecs[0].length = B_PAGE_SIZE; 2122 fVecCount = 1; 2123 } 2124 2125 2126 /*! The page's cache must be locked. 2127 */ 2128 bool 2129 PageWriteTransfer::AddPage(vm_page* page) 2130 { 2131 if (page->Cache() != fCache 2132 || (fMaxPages >= 0 && fPageCount >= (uint32)fMaxPages)) 2133 return false; 2134 2135 phys_addr_t nextBase = fVecs[fVecCount - 1].base 2136 + fVecs[fVecCount - 1].length; 2137 2138 if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase 2139 && (off_t)page->cache_offset == fOffset + fPageCount) { 2140 // append to last iovec 2141 fVecs[fVecCount - 1].length += B_PAGE_SIZE; 2142 fPageCount++; 2143 return true; 2144 } 2145 2146 nextBase = fVecs[0].base - B_PAGE_SIZE; 2147 if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase 2148 && (off_t)page->cache_offset == fOffset - 1) { 2149 // prepend to first iovec and adjust offset 2150 fVecs[0].base = nextBase; 2151 fVecs[0].length += B_PAGE_SIZE; 2152 fOffset = page->cache_offset; 2153 fPageCount++; 2154 return true; 2155 } 2156 2157 if (((off_t)page->cache_offset == fOffset + fPageCount 2158 || (off_t)page->cache_offset == fOffset - 1) 2159 && fVecCount < sizeof(fVecs) / sizeof(fVecs[0])) { 2160 // not physically contiguous or not in the right order 2161 uint32 vectorIndex; 2162 if ((off_t)page->cache_offset < fOffset) { 2163 // we are pre-pending another vector, move the other vecs 2164 for (uint32 i = fVecCount; i > 0; i--) 2165 fVecs[i] = fVecs[i - 1]; 2166 2167 fOffset = page->cache_offset; 2168 vectorIndex = 0; 2169 } else 2170 vectorIndex = fVecCount; 2171 2172 fVecs[vectorIndex].base 2173 = (phys_addr_t)page->physical_page_number << PAGE_SHIFT; 2174 fVecs[vectorIndex].length = B_PAGE_SIZE; 2175 2176 fVecCount++; 2177 fPageCount++; 2178 return true; 2179 } 2180 2181 return false; 2182 } 2183 2184 2185 status_t 2186 PageWriteTransfer::Schedule(uint32 flags) 2187 { 2188 off_t writeOffset = (off_t)fOffset << PAGE_SHIFT; 2189 generic_size_t writeLength = (phys_size_t)fPageCount << PAGE_SHIFT; 2190 2191 if (fRun != NULL) { 2192 return fCache->WriteAsync(writeOffset, fVecs, fVecCount, writeLength, 2193 flags | B_PHYSICAL_IO_REQUEST, this); 2194 } 2195 2196 status_t status = fCache->Write(writeOffset, fVecs, fVecCount, 2197 flags | B_PHYSICAL_IO_REQUEST, &writeLength); 2198 2199 SetStatus(status, writeLength); 2200 return fStatus; 2201 } 2202 2203 2204 void 2205 PageWriteTransfer::SetStatus(status_t status, size_t transferred) 2206 { 2207 // only succeed if all pages up to the last one have been written fully 2208 // and the last page has at least been written partially 2209 if (status == B_OK && transferred <= (fPageCount - 1) * B_PAGE_SIZE) 2210 status = B_ERROR; 2211 2212 fStatus = status; 2213 } 2214 2215 2216 void 2217 PageWriteTransfer::IOFinished(status_t status, bool partialTransfer, 2218 generic_size_t bytesTransferred) 2219 { 2220 SetStatus(status, bytesTransferred); 2221 fRun->PageWritten(this, fStatus, partialTransfer, bytesTransferred); 2222 } 2223 2224 2225 status_t 2226 PageWriterRun::Init(uint32 maxPages) 2227 { 2228 fMaxPages = maxPages; 2229 fWrapperCount = 0; 2230 fTransferCount = 0; 2231 fPendingTransfers = 0; 2232 2233 fWrappers = new(std::nothrow) PageWriteWrapper[maxPages]; 2234 fTransfers = new(std::nothrow) PageWriteTransfer[maxPages]; 2235 if (fWrappers == NULL || fTransfers == NULL) 2236 return B_NO_MEMORY; 2237 2238 return B_OK; 2239 } 2240 2241 2242 void 2243 PageWriterRun::PrepareNextRun() 2244 { 2245 fWrapperCount = 0; 2246 fTransferCount = 0; 2247 fPendingTransfers = 0; 2248 } 2249 2250 2251 /*! The page's cache must be locked. 2252 */ 2253 void 2254 PageWriterRun::AddPage(vm_page* page) 2255 { 2256 fWrappers[fWrapperCount++].SetTo(page); 2257 2258 if (fTransferCount == 0 || !fTransfers[fTransferCount - 1].AddPage(page)) { 2259 fTransfers[fTransferCount++].SetTo(this, page, 2260 page->Cache()->MaxPagesPerAsyncWrite()); 2261 } 2262 } 2263 2264 2265 /*! Writes all pages previously added. 2266 \return The number of pages that could not be written or otherwise handled. 2267 */ 2268 uint32 2269 PageWriterRun::Go() 2270 { 2271 atomic_set(&fPendingTransfers, fTransferCount); 2272 2273 fAllFinishedCondition.Init(this, "page writer wait for I/O"); 2274 ConditionVariableEntry waitEntry; 2275 fAllFinishedCondition.Add(&waitEntry); 2276 2277 // schedule writes 2278 for (uint32 i = 0; i < fTransferCount; i++) 2279 fTransfers[i].Schedule(B_VIP_IO_REQUEST); 2280 2281 // wait until all pages have been written 2282 waitEntry.Wait(); 2283 2284 // mark pages depending on whether they could be written or not 2285 2286 uint32 failedPages = 0; 2287 uint32 wrapperIndex = 0; 2288 for (uint32 i = 0; i < fTransferCount; i++) { 2289 PageWriteTransfer& transfer = fTransfers[i]; 2290 transfer.Cache()->Lock(); 2291 2292 for (uint32 j = 0; j < transfer.PageCount(); j++) { 2293 if (!fWrappers[wrapperIndex++].Done(transfer.Status())) 2294 failedPages++; 2295 } 2296 2297 transfer.Cache()->Unlock(); 2298 } 2299 2300 ASSERT(wrapperIndex == fWrapperCount); 2301 2302 for (uint32 i = 0; i < fTransferCount; i++) { 2303 PageWriteTransfer& transfer = fTransfers[i]; 2304 struct VMCache* cache = transfer.Cache(); 2305 2306 // We've acquired a references for each page 2307 for (uint32 j = 0; j < transfer.PageCount(); j++) { 2308 // We release the cache references after all pages were made 2309 // unbusy again - otherwise releasing a vnode could deadlock. 2310 cache->ReleaseStoreRef(); 2311 cache->ReleaseRef(); 2312 } 2313 } 2314 2315 return failedPages; 2316 } 2317 2318 2319 void 2320 PageWriterRun::PageWritten(PageWriteTransfer* transfer, status_t status, 2321 bool partialTransfer, size_t bytesTransferred) 2322 { 2323 if (atomic_add(&fPendingTransfers, -1) == 1) 2324 fAllFinishedCondition.NotifyAll(); 2325 } 2326 2327 2328 /*! The page writer continuously takes some pages from the modified 2329 queue, writes them back, and moves them back to the active queue. 2330 It runs in its own thread, and is only there to keep the number 2331 of modified pages low, so that more pages can be reused with 2332 fewer costs. 2333 */ 2334 status_t 2335 page_writer(void* /*unused*/) 2336 { 2337 const uint32 kNumPages = 256; 2338 #ifdef TRACE_VM_PAGE 2339 uint32 writtenPages = 0; 2340 bigtime_t lastWrittenTime = 0; 2341 bigtime_t pageCollectionTime = 0; 2342 bigtime_t pageWritingTime = 0; 2343 #endif 2344 2345 PageWriterRun run; 2346 if (run.Init(kNumPages) != B_OK) { 2347 panic("page writer: Failed to init PageWriterRun!"); 2348 return B_ERROR; 2349 } 2350 2351 page_num_t pagesSinceLastSuccessfulWrite = 0; 2352 2353 while (true) { 2354 // TODO: Maybe wait shorter when memory is low! 2355 if (sModifiedPageQueue.Count() < kNumPages) { 2356 sPageWriterCondition.Wait(3000000, true); 2357 // all 3 seconds when no one triggers us 2358 } 2359 2360 page_num_t modifiedPages = sModifiedPageQueue.Count(); 2361 if (modifiedPages == 0) 2362 continue; 2363 2364 if (modifiedPages <= pagesSinceLastSuccessfulWrite) { 2365 // We ran through the whole queue without being able to write a 2366 // single page. Take a break. 2367 snooze(500000); 2368 pagesSinceLastSuccessfulWrite = 0; 2369 } 2370 2371 #if ENABLE_SWAP_SUPPORT 2372 page_stats pageStats; 2373 get_page_stats(pageStats); 2374 bool activePaging = do_active_paging(pageStats); 2375 #endif 2376 2377 // depending on how urgent it becomes to get pages to disk, we adjust 2378 // our I/O priority 2379 uint32 lowPagesState = low_resource_state(B_KERNEL_RESOURCE_PAGES); 2380 int32 ioPriority = B_IDLE_PRIORITY; 2381 if (lowPagesState >= B_LOW_RESOURCE_CRITICAL 2382 || modifiedPages > MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD) { 2383 ioPriority = MAX_PAGE_WRITER_IO_PRIORITY; 2384 } else { 2385 ioPriority = (uint64)MAX_PAGE_WRITER_IO_PRIORITY * modifiedPages 2386 / MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD; 2387 } 2388 2389 thread_set_io_priority(ioPriority); 2390 2391 uint32 numPages = 0; 2392 run.PrepareNextRun(); 2393 2394 // TODO: make this laptop friendly, too (ie. only start doing 2395 // something if someone else did something or there is really 2396 // enough to do). 2397 2398 // collect pages to be written 2399 #ifdef TRACE_VM_PAGE 2400 pageCollectionTime -= system_time(); 2401 #endif 2402 2403 page_num_t maxPagesToSee = modifiedPages; 2404 2405 while (numPages < kNumPages && maxPagesToSee > 0) { 2406 vm_page *page = next_modified_page(maxPagesToSee); 2407 if (page == NULL) 2408 break; 2409 2410 PageCacheLocker cacheLocker(page, false); 2411 if (!cacheLocker.IsLocked()) 2412 continue; 2413 2414 VMCache *cache = page->Cache(); 2415 2416 // If the page is busy or its state has changed while we were 2417 // locking the cache, just ignore it. 2418 if (page->busy || page->State() != PAGE_STATE_MODIFIED) 2419 continue; 2420 2421 DEBUG_PAGE_ACCESS_START(page); 2422 2423 // Don't write back wired (locked) pages. 2424 if (page->WiredCount() > 0) { 2425 set_page_state(page, PAGE_STATE_ACTIVE); 2426 DEBUG_PAGE_ACCESS_END(page); 2427 continue; 2428 } 2429 2430 // Write back temporary pages only when we're actively paging. 2431 if (cache->temporary 2432 #if ENABLE_SWAP_SUPPORT 2433 && (!activePaging 2434 || !cache->CanWritePage( 2435 (off_t)page->cache_offset << PAGE_SHIFT)) 2436 #endif 2437 ) { 2438 // We can't/don't want to do anything with this page, so move it 2439 // to one of the other queues. 2440 if (page->mappings.IsEmpty()) 2441 set_page_state(page, PAGE_STATE_INACTIVE); 2442 else 2443 set_page_state(page, PAGE_STATE_ACTIVE); 2444 2445 DEBUG_PAGE_ACCESS_END(page); 2446 continue; 2447 } 2448 2449 // We need our own reference to the store, as it might currently be 2450 // destroyed. 2451 if (cache->AcquireUnreferencedStoreRef() != B_OK) { 2452 DEBUG_PAGE_ACCESS_END(page); 2453 cacheLocker.Unlock(); 2454 thread_yield(); 2455 continue; 2456 } 2457 2458 run.AddPage(page); 2459 // TODO: We're possibly adding pages of different caches and 2460 // thus maybe of different underlying file systems here. This 2461 // is a potential problem for loop file systems/devices, since 2462 // we could mark a page busy that would need to be accessed 2463 // when writing back another page, thus causing a deadlock. 2464 2465 DEBUG_PAGE_ACCESS_END(page); 2466 2467 //dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count); 2468 TPW(WritePage(page)); 2469 2470 cache->AcquireRefLocked(); 2471 numPages++; 2472 } 2473 2474 #ifdef TRACE_VM_PAGE 2475 pageCollectionTime += system_time(); 2476 #endif 2477 if (numPages == 0) 2478 continue; 2479 2480 // write pages to disk and do all the cleanup 2481 #ifdef TRACE_VM_PAGE 2482 pageWritingTime -= system_time(); 2483 #endif 2484 uint32 failedPages = run.Go(); 2485 #ifdef TRACE_VM_PAGE 2486 pageWritingTime += system_time(); 2487 2488 // debug output only... 2489 writtenPages += numPages; 2490 if (writtenPages >= 1024) { 2491 bigtime_t now = system_time(); 2492 TRACE(("page writer: wrote 1024 pages (total: %" B_PRIu64 " ms, " 2493 "collect: %" B_PRIu64 " ms, write: %" B_PRIu64 " ms)\n", 2494 (now - lastWrittenTime) / 1000, 2495 pageCollectionTime / 1000, pageWritingTime / 1000)); 2496 lastWrittenTime = now; 2497 2498 writtenPages -= 1024; 2499 pageCollectionTime = 0; 2500 pageWritingTime = 0; 2501 } 2502 #endif 2503 2504 if (failedPages == numPages) 2505 pagesSinceLastSuccessfulWrite += modifiedPages - maxPagesToSee; 2506 else 2507 pagesSinceLastSuccessfulWrite = 0; 2508 } 2509 2510 return B_OK; 2511 } 2512 2513 2514 // #pragma mark - 2515 2516 2517 // TODO: This should be done in the page daemon! 2518 #if 0 2519 #if ENABLE_SWAP_SUPPORT 2520 static bool 2521 free_page_swap_space(int32 index) 2522 { 2523 vm_page *page = vm_page_at_index(index); 2524 PageCacheLocker locker(page); 2525 if (!locker.IsLocked()) 2526 return false; 2527 2528 DEBUG_PAGE_ACCESS_START(page); 2529 2530 VMCache* cache = page->Cache(); 2531 if (cache->temporary && page->WiredCount() == 0 2532 && cache->HasPage(page->cache_offset << PAGE_SHIFT) 2533 && page->usage_count > 0) { 2534 // TODO: how to judge a page is highly active? 2535 if (swap_free_page_swap_space(page)) { 2536 // We need to mark the page modified, since otherwise it could be 2537 // stolen and we'd lose its data. 2538 vm_page_set_state(page, PAGE_STATE_MODIFIED); 2539 TD(FreedPageSwap(page)); 2540 DEBUG_PAGE_ACCESS_END(page); 2541 return true; 2542 } 2543 } 2544 DEBUG_PAGE_ACCESS_END(page); 2545 return false; 2546 } 2547 #endif 2548 #endif // 0 2549 2550 2551 static vm_page * 2552 find_cached_page_candidate(struct vm_page &marker) 2553 { 2554 DEBUG_PAGE_ACCESS_CHECK(&marker); 2555 2556 InterruptsSpinLocker locker(sCachedPageQueue.GetLock()); 2557 vm_page *page; 2558 2559 if (marker.State() == PAGE_STATE_UNUSED) { 2560 // Get the first free pages of the (in)active queue 2561 page = sCachedPageQueue.Head(); 2562 } else { 2563 // Get the next page of the current queue 2564 if (marker.State() != PAGE_STATE_CACHED) { 2565 panic("invalid marker %p state", &marker); 2566 return NULL; 2567 } 2568 2569 page = sCachedPageQueue.Next(&marker); 2570 sCachedPageQueue.Remove(&marker); 2571 marker.SetState(PAGE_STATE_UNUSED); 2572 } 2573 2574 while (page != NULL) { 2575 if (!page->busy) { 2576 // we found a candidate, insert marker 2577 marker.SetState(PAGE_STATE_CACHED); 2578 sCachedPageQueue.InsertAfter(page, &marker); 2579 return page; 2580 } 2581 2582 page = sCachedPageQueue.Next(page); 2583 } 2584 2585 return NULL; 2586 } 2587 2588 2589 static bool 2590 free_cached_page(vm_page *page, bool dontWait) 2591 { 2592 // try to lock the page's cache 2593 if (vm_cache_acquire_locked_page_cache(page, dontWait) == NULL) 2594 return false; 2595 VMCache* cache = page->Cache(); 2596 2597 AutoLocker<VMCache> cacheLocker(cache, true); 2598 MethodDeleter<VMCache, void, &VMCache::ReleaseRefLocked> _2(cache); 2599 2600 // check again if that page is still a candidate 2601 if (page->busy || page->State() != PAGE_STATE_CACHED) 2602 return false; 2603 2604 DEBUG_PAGE_ACCESS_START(page); 2605 2606 PAGE_ASSERT(page, !page->IsMapped()); 2607 PAGE_ASSERT(page, !page->modified); 2608 2609 // we can now steal this page 2610 2611 cache->RemovePage(page); 2612 // Now the page doesn't have cache anymore, so no one else (e.g. 2613 // vm_page_allocate_page_run() can pick it up), since they would be 2614 // required to lock the cache first, which would fail. 2615 2616 sCachedPageQueue.RemoveUnlocked(page); 2617 return true; 2618 } 2619 2620 2621 static uint32 2622 free_cached_pages(uint32 pagesToFree, bool dontWait) 2623 { 2624 vm_page marker; 2625 init_page_marker(marker); 2626 2627 uint32 pagesFreed = 0; 2628 2629 while (pagesFreed < pagesToFree) { 2630 vm_page *page = find_cached_page_candidate(marker); 2631 if (page == NULL) 2632 break; 2633 2634 if (free_cached_page(page, dontWait)) { 2635 ReadLocker locker(sFreePageQueuesLock); 2636 page->SetState(PAGE_STATE_FREE); 2637 DEBUG_PAGE_ACCESS_END(page); 2638 sFreePageQueue.PrependUnlocked(page); 2639 locker.Unlock(); 2640 2641 TA(StolenPage()); 2642 2643 pagesFreed++; 2644 } 2645 } 2646 2647 remove_page_marker(marker); 2648 2649 sFreePageCondition.NotifyAll(); 2650 2651 return pagesFreed; 2652 } 2653 2654 2655 static void 2656 idle_scan_active_pages(page_stats& pageStats) 2657 { 2658 VMPageQueue& queue = sActivePageQueue; 2659 2660 // We want to scan the whole queue in roughly kIdleRunsForFullQueue runs. 2661 uint32 maxToScan = queue.Count() / kIdleRunsForFullQueue + 1; 2662 2663 while (maxToScan > 0) { 2664 maxToScan--; 2665 2666 // Get the next page. Note that we don't bother to lock here. We go with 2667 // the assumption that on all architectures reading/writing pointers is 2668 // atomic. Beyond that it doesn't really matter. We have to unlock the 2669 // queue anyway to lock the page's cache, and we'll recheck afterwards. 2670 vm_page* page = queue.Head(); 2671 if (page == NULL) 2672 break; 2673 2674 // lock the page's cache 2675 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2676 if (cache == NULL) 2677 continue; 2678 2679 if (page->State() != PAGE_STATE_ACTIVE) { 2680 // page is no longer in the cache or in this queue 2681 cache->ReleaseRefAndUnlock(); 2682 continue; 2683 } 2684 2685 if (page->busy) { 2686 // page is busy -- requeue at the end 2687 vm_page_requeue(page, true); 2688 cache->ReleaseRefAndUnlock(); 2689 continue; 2690 } 2691 2692 DEBUG_PAGE_ACCESS_START(page); 2693 2694 // Get the page active/modified flags and update the page's usage count. 2695 // We completely unmap inactive temporary pages. This saves us to 2696 // iterate through the inactive list as well, since we'll be notified 2697 // via page fault whenever such an inactive page is used again. 2698 // We don't remove the mappings of non-temporary pages, since we 2699 // wouldn't notice when those would become unused and could thus be 2700 // moved to the cached list. 2701 int32 usageCount; 2702 if (page->WiredCount() > 0 || page->usage_count > 0 2703 || !cache->temporary) { 2704 usageCount = vm_clear_page_mapping_accessed_flags(page); 2705 } else 2706 usageCount = vm_remove_all_page_mappings_if_unaccessed(page); 2707 2708 if (usageCount > 0) { 2709 usageCount += page->usage_count + kPageUsageAdvance; 2710 if (usageCount > kPageUsageMax) 2711 usageCount = kPageUsageMax; 2712 // TODO: This would probably also be the place to reclaim swap space. 2713 } else { 2714 usageCount += page->usage_count - (int32)kPageUsageDecline; 2715 if (usageCount < 0) { 2716 usageCount = 0; 2717 set_page_state(page, PAGE_STATE_INACTIVE); 2718 } 2719 } 2720 2721 page->usage_count = usageCount; 2722 2723 DEBUG_PAGE_ACCESS_END(page); 2724 2725 cache->ReleaseRefAndUnlock(); 2726 } 2727 } 2728 2729 2730 static void 2731 full_scan_inactive_pages(page_stats& pageStats, int32 despairLevel) 2732 { 2733 int32 pagesToFree = pageStats.unsatisfiedReservations 2734 + sFreeOrCachedPagesTarget 2735 - (pageStats.totalFreePages + pageStats.cachedPages); 2736 if (pagesToFree <= 0) 2737 return; 2738 2739 bigtime_t time = system_time(); 2740 uint32 pagesScanned = 0; 2741 uint32 pagesToCached = 0; 2742 uint32 pagesToModified = 0; 2743 uint32 pagesToActive = 0; 2744 2745 // Determine how many pages at maximum to send to the modified queue. Since 2746 // it is relatively expensive to page out pages, we do that on a grander 2747 // scale only when things get desperate. 2748 uint32 maxToFlush = despairLevel <= 1 ? 32 : 10000; 2749 2750 vm_page marker; 2751 init_page_marker(marker); 2752 2753 VMPageQueue& queue = sInactivePageQueue; 2754 InterruptsSpinLocker queueLocker(queue.GetLock()); 2755 uint32 maxToScan = queue.Count(); 2756 2757 vm_page* nextPage = queue.Head(); 2758 2759 while (pagesToFree > 0 && maxToScan > 0) { 2760 maxToScan--; 2761 2762 // get the next page 2763 vm_page* page = nextPage; 2764 if (page == NULL) 2765 break; 2766 nextPage = queue.Next(page); 2767 2768 if (page->busy) 2769 continue; 2770 2771 // mark the position 2772 queue.InsertAfter(page, &marker); 2773 queueLocker.Unlock(); 2774 2775 // lock the page's cache 2776 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2777 if (cache == NULL || page->busy 2778 || page->State() != PAGE_STATE_INACTIVE) { 2779 if (cache != NULL) 2780 cache->ReleaseRefAndUnlock(); 2781 queueLocker.Lock(); 2782 nextPage = queue.Next(&marker); 2783 queue.Remove(&marker); 2784 continue; 2785 } 2786 2787 pagesScanned++; 2788 2789 DEBUG_PAGE_ACCESS_START(page); 2790 2791 // Get the accessed count, clear the accessed/modified flags and 2792 // unmap the page, if it hasn't been accessed. 2793 int32 usageCount; 2794 if (page->WiredCount() > 0) 2795 usageCount = vm_clear_page_mapping_accessed_flags(page); 2796 else 2797 usageCount = vm_remove_all_page_mappings_if_unaccessed(page); 2798 2799 // update usage count 2800 if (usageCount > 0) { 2801 usageCount += page->usage_count + kPageUsageAdvance; 2802 if (usageCount > kPageUsageMax) 2803 usageCount = kPageUsageMax; 2804 } else { 2805 usageCount += page->usage_count - (int32)kPageUsageDecline; 2806 if (usageCount < 0) 2807 usageCount = 0; 2808 } 2809 2810 page->usage_count = usageCount; 2811 2812 // Move to fitting queue or requeue: 2813 // * Active mapped pages go to the active queue. 2814 // * Inactive mapped (i.e. wired) pages are requeued. 2815 // * The remaining pages are cachable. Thus, if unmodified they go to 2816 // the cached queue, otherwise to the modified queue (up to a limit). 2817 // Note that until in the idle scanning we don't exempt pages of 2818 // temporary caches. Apparently we really need memory, so we better 2819 // page out memory as well. 2820 bool isMapped = page->IsMapped(); 2821 if (usageCount > 0) { 2822 if (isMapped) { 2823 set_page_state(page, PAGE_STATE_ACTIVE); 2824 pagesToActive++; 2825 } else 2826 vm_page_requeue(page, true); 2827 } else if (isMapped) { 2828 vm_page_requeue(page, true); 2829 } else if (!page->modified) { 2830 set_page_state(page, PAGE_STATE_CACHED); 2831 pagesToFree--; 2832 pagesToCached++; 2833 } else if (maxToFlush > 0) { 2834 set_page_state(page, PAGE_STATE_MODIFIED); 2835 maxToFlush--; 2836 pagesToModified++; 2837 } else 2838 vm_page_requeue(page, true); 2839 2840 DEBUG_PAGE_ACCESS_END(page); 2841 2842 cache->ReleaseRefAndUnlock(); 2843 2844 // remove the marker 2845 queueLocker.Lock(); 2846 nextPage = queue.Next(&marker); 2847 queue.Remove(&marker); 2848 } 2849 2850 queueLocker.Unlock(); 2851 2852 time = system_time() - time; 2853 TRACE_DAEMON(" -> inactive scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32 2854 ", moved: %" B_PRIu32 " -> cached, %" B_PRIu32 " -> modified, %" 2855 B_PRIu32 " -> active\n", time, pagesScanned, pagesToCached, 2856 pagesToModified, pagesToActive); 2857 2858 // wake up the page writer, if we tossed it some pages 2859 if (pagesToModified > 0) 2860 sPageWriterCondition.WakeUp(); 2861 } 2862 2863 2864 static void 2865 full_scan_active_pages(page_stats& pageStats, int32 despairLevel) 2866 { 2867 vm_page marker; 2868 init_page_marker(marker); 2869 2870 VMPageQueue& queue = sActivePageQueue; 2871 InterruptsSpinLocker queueLocker(queue.GetLock()); 2872 uint32 maxToScan = queue.Count(); 2873 2874 int32 pagesToDeactivate = pageStats.unsatisfiedReservations 2875 + sFreeOrCachedPagesTarget 2876 - (pageStats.totalFreePages + pageStats.cachedPages) 2877 + std::max((int32)sInactivePagesTarget - (int32)maxToScan, (int32)0); 2878 if (pagesToDeactivate <= 0) 2879 return; 2880 2881 bigtime_t time = system_time(); 2882 uint32 pagesAccessed = 0; 2883 uint32 pagesToInactive = 0; 2884 uint32 pagesScanned = 0; 2885 2886 vm_page* nextPage = queue.Head(); 2887 2888 while (pagesToDeactivate > 0 && maxToScan > 0) { 2889 maxToScan--; 2890 2891 // get the next page 2892 vm_page* page = nextPage; 2893 if (page == NULL) 2894 break; 2895 nextPage = queue.Next(page); 2896 2897 if (page->busy) 2898 continue; 2899 2900 // mark the position 2901 queue.InsertAfter(page, &marker); 2902 queueLocker.Unlock(); 2903 2904 // lock the page's cache 2905 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2906 if (cache == NULL || page->busy || page->State() != PAGE_STATE_ACTIVE) { 2907 if (cache != NULL) 2908 cache->ReleaseRefAndUnlock(); 2909 queueLocker.Lock(); 2910 nextPage = queue.Next(&marker); 2911 queue.Remove(&marker); 2912 continue; 2913 } 2914 2915 pagesScanned++; 2916 2917 DEBUG_PAGE_ACCESS_START(page); 2918 2919 // Get the page active/modified flags and update the page's usage count. 2920 int32 usageCount = vm_clear_page_mapping_accessed_flags(page); 2921 2922 if (usageCount > 0) { 2923 usageCount += page->usage_count + kPageUsageAdvance; 2924 if (usageCount > kPageUsageMax) 2925 usageCount = kPageUsageMax; 2926 pagesAccessed++; 2927 // TODO: This would probably also be the place to reclaim swap space. 2928 } else { 2929 usageCount += page->usage_count - (int32)kPageUsageDecline; 2930 if (usageCount <= 0) { 2931 usageCount = 0; 2932 set_page_state(page, PAGE_STATE_INACTIVE); 2933 pagesToInactive++; 2934 } 2935 } 2936 2937 page->usage_count = usageCount; 2938 2939 DEBUG_PAGE_ACCESS_END(page); 2940 2941 cache->ReleaseRefAndUnlock(); 2942 2943 // remove the marker 2944 queueLocker.Lock(); 2945 nextPage = queue.Next(&marker); 2946 queue.Remove(&marker); 2947 } 2948 2949 time = system_time() - time; 2950 TRACE_DAEMON(" -> active scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32 2951 ", moved: %" B_PRIu32 " -> inactive, encountered %" B_PRIu32 " accessed" 2952 " ones\n", time, pagesScanned, pagesToInactive, pagesAccessed); 2953 } 2954 2955 2956 static void 2957 page_daemon_idle_scan(page_stats& pageStats) 2958 { 2959 TRACE_DAEMON("page daemon: idle run\n"); 2960 2961 if (pageStats.totalFreePages < (int32)sFreePagesTarget) { 2962 // We want more actually free pages, so free some from the cached 2963 // ones. 2964 uint32 freed = free_cached_pages( 2965 sFreePagesTarget - pageStats.totalFreePages, false); 2966 if (freed > 0) 2967 unreserve_pages(freed); 2968 get_page_stats(pageStats); 2969 } 2970 2971 // Walk the active list and move pages to the inactive queue. 2972 get_page_stats(pageStats); 2973 idle_scan_active_pages(pageStats); 2974 } 2975 2976 2977 static void 2978 page_daemon_full_scan(page_stats& pageStats, int32 despairLevel) 2979 { 2980 TRACE_DAEMON("page daemon: full run: free: %" B_PRIu32 ", cached: %" 2981 B_PRIu32 ", to free: %" B_PRIu32 "\n", pageStats.totalFreePages, 2982 pageStats.cachedPages, pageStats.unsatisfiedReservations 2983 + sFreeOrCachedPagesTarget 2984 - (pageStats.totalFreePages + pageStats.cachedPages)); 2985 2986 // Walk the inactive list and transfer pages to the cached and modified 2987 // queues. 2988 full_scan_inactive_pages(pageStats, despairLevel); 2989 2990 // Free cached pages. Also wake up reservation waiters. 2991 get_page_stats(pageStats); 2992 int32 pagesToFree = pageStats.unsatisfiedReservations + sFreePagesTarget 2993 - (pageStats.totalFreePages); 2994 if (pagesToFree > 0) { 2995 uint32 freed = free_cached_pages(pagesToFree, true); 2996 if (freed > 0) 2997 unreserve_pages(freed); 2998 } 2999 3000 // Walk the active list and move pages to the inactive queue. 3001 get_page_stats(pageStats); 3002 full_scan_active_pages(pageStats, despairLevel); 3003 } 3004 3005 3006 static status_t 3007 page_daemon(void* /*unused*/) 3008 { 3009 int32 despairLevel = 0; 3010 3011 while (true) { 3012 sPageDaemonCondition.ClearActivated(); 3013 3014 // evaluate the free pages situation 3015 page_stats pageStats; 3016 get_page_stats(pageStats); 3017 3018 if (!do_active_paging(pageStats)) { 3019 // Things look good -- just maintain statistics and keep the pool 3020 // of actually free pages full enough. 3021 despairLevel = 0; 3022 page_daemon_idle_scan(pageStats); 3023 sPageDaemonCondition.Wait(kIdleScanWaitInterval, false); 3024 } else { 3025 // Not enough free pages. We need to do some real work. 3026 despairLevel = std::max(despairLevel + 1, (int32)3); 3027 page_daemon_full_scan(pageStats, despairLevel); 3028 3029 // Don't wait after the first full scan, but rather immediately 3030 // check whether we were successful in freeing enough pages and 3031 // re-run with increased despair level. The first scan is 3032 // conservative with respect to moving inactive modified pages to 3033 // the modified list to avoid thrashing. The second scan, however, 3034 // will not hold back. 3035 if (despairLevel > 1) 3036 snooze(kBusyScanWaitInterval); 3037 } 3038 } 3039 3040 return B_OK; 3041 } 3042 3043 3044 /*! Returns how many pages could *not* be reserved. 3045 */ 3046 static uint32 3047 reserve_pages(uint32 count, int priority, bool dontWait) 3048 { 3049 int32 dontTouch = kPageReserveForPriority[priority]; 3050 3051 while (true) { 3052 count -= reserve_some_pages(count, dontTouch); 3053 if (count == 0) 3054 return 0; 3055 3056 if (sUnsatisfiedPageReservations == 0) { 3057 count -= free_cached_pages(count, dontWait); 3058 if (count == 0) 3059 return count; 3060 } 3061 3062 if (dontWait) 3063 return count; 3064 3065 // we need to wait for pages to become available 3066 3067 MutexLocker pageDeficitLocker(sPageDeficitLock); 3068 3069 bool notifyDaemon = sUnsatisfiedPageReservations == 0; 3070 sUnsatisfiedPageReservations += count; 3071 3072 if (atomic_get(&sUnreservedFreePages) > dontTouch) { 3073 // the situation changed 3074 sUnsatisfiedPageReservations -= count; 3075 continue; 3076 } 3077 3078 PageReservationWaiter waiter; 3079 waiter.dontTouch = dontTouch; 3080 waiter.missing = count; 3081 waiter.thread = thread_get_current_thread(); 3082 waiter.threadPriority = waiter.thread->priority; 3083 3084 // insert ordered (i.e. after all waiters with higher or equal priority) 3085 PageReservationWaiter* otherWaiter = NULL; 3086 for (PageReservationWaiterList::Iterator it 3087 = sPageReservationWaiters.GetIterator(); 3088 (otherWaiter = it.Next()) != NULL;) { 3089 if (waiter < *otherWaiter) 3090 break; 3091 } 3092 3093 sPageReservationWaiters.InsertBefore(otherWaiter, &waiter); 3094 3095 thread_prepare_to_block(waiter.thread, 0, THREAD_BLOCK_TYPE_OTHER, 3096 "waiting for pages"); 3097 3098 if (notifyDaemon) 3099 sPageDaemonCondition.WakeUp(); 3100 3101 pageDeficitLocker.Unlock(); 3102 3103 low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0); 3104 thread_block(); 3105 3106 pageDeficitLocker.Lock(); 3107 3108 return 0; 3109 } 3110 } 3111 3112 3113 // #pragma mark - private kernel API 3114 3115 3116 /*! Writes a range of modified pages of a cache to disk. 3117 You need to hold the VMCache lock when calling this function. 3118 Note that the cache lock is released in this function. 3119 \param cache The cache. 3120 \param firstPage Offset (in page size units) of the first page in the range. 3121 \param endPage End offset (in page size units) of the page range. The page 3122 at this offset is not included. 3123 */ 3124 status_t 3125 vm_page_write_modified_page_range(struct VMCache* cache, uint32 firstPage, 3126 uint32 endPage) 3127 { 3128 static const int32 kMaxPages = 256; 3129 int32 maxPages = cache->MaxPagesPerWrite(); 3130 if (maxPages < 0 || maxPages > kMaxPages) 3131 maxPages = kMaxPages; 3132 3133 const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 3134 | HEAP_DONT_LOCK_KERNEL_SPACE; 3135 3136 PageWriteWrapper stackWrappersPool[2]; 3137 PageWriteWrapper* stackWrappers[1]; 3138 PageWriteWrapper* wrapperPool 3139 = new(malloc_flags(allocationFlags)) PageWriteWrapper[maxPages + 1]; 3140 PageWriteWrapper** wrappers 3141 = new(malloc_flags(allocationFlags)) PageWriteWrapper*[maxPages]; 3142 if (wrapperPool == NULL || wrappers == NULL) { 3143 // don't fail, just limit our capabilities 3144 delete[] wrapperPool; 3145 delete[] wrappers; 3146 wrapperPool = stackWrappersPool; 3147 wrappers = stackWrappers; 3148 maxPages = 1; 3149 } 3150 3151 int32 nextWrapper = 0; 3152 int32 usedWrappers = 0; 3153 3154 PageWriteTransfer transfer; 3155 bool transferEmpty = true; 3156 3157 VMCachePagesTree::Iterator it 3158 = cache->pages.GetIterator(firstPage, true, true); 3159 3160 while (true) { 3161 vm_page* page = it.Next(); 3162 if (page == NULL || page->cache_offset >= endPage) { 3163 if (transferEmpty) 3164 break; 3165 3166 page = NULL; 3167 } 3168 3169 if (page != NULL) { 3170 if (page->busy 3171 || (page->State() != PAGE_STATE_MODIFIED 3172 && !vm_test_map_modification(page))) { 3173 page = NULL; 3174 } 3175 } 3176 3177 PageWriteWrapper* wrapper = NULL; 3178 if (page != NULL) { 3179 wrapper = &wrapperPool[nextWrapper++]; 3180 if (nextWrapper > maxPages) 3181 nextWrapper = 0; 3182 3183 DEBUG_PAGE_ACCESS_START(page); 3184 3185 wrapper->SetTo(page); 3186 3187 if (transferEmpty || transfer.AddPage(page)) { 3188 if (transferEmpty) { 3189 transfer.SetTo(NULL, page, maxPages); 3190 transferEmpty = false; 3191 } 3192 3193 DEBUG_PAGE_ACCESS_END(page); 3194 3195 wrappers[usedWrappers++] = wrapper; 3196 continue; 3197 } 3198 3199 DEBUG_PAGE_ACCESS_END(page); 3200 } 3201 3202 if (transferEmpty) 3203 continue; 3204 3205 cache->Unlock(); 3206 status_t status = transfer.Schedule(0); 3207 cache->Lock(); 3208 3209 for (int32 i = 0; i < usedWrappers; i++) 3210 wrappers[i]->Done(status); 3211 3212 usedWrappers = 0; 3213 3214 if (page != NULL) { 3215 transfer.SetTo(NULL, page, maxPages); 3216 wrappers[usedWrappers++] = wrapper; 3217 } else 3218 transferEmpty = true; 3219 } 3220 3221 if (wrapperPool != stackWrappersPool) { 3222 delete[] wrapperPool; 3223 delete[] wrappers; 3224 } 3225 3226 return B_OK; 3227 } 3228 3229 3230 /*! You need to hold the VMCache lock when calling this function. 3231 Note that the cache lock is released in this function. 3232 */ 3233 status_t 3234 vm_page_write_modified_pages(VMCache *cache) 3235 { 3236 return vm_page_write_modified_page_range(cache, 0, 3237 (cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 3238 } 3239 3240 3241 /*! Schedules the page writer to write back the specified \a page. 3242 Note, however, that it might not do this immediately, and it can well 3243 take several seconds until the page is actually written out. 3244 */ 3245 void 3246 vm_page_schedule_write_page(vm_page *page) 3247 { 3248 PAGE_ASSERT(page, page->State() == PAGE_STATE_MODIFIED); 3249 3250 vm_page_requeue(page, false); 3251 3252 sPageWriterCondition.WakeUp(); 3253 } 3254 3255 3256 /*! Cache must be locked. 3257 */ 3258 void 3259 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage, 3260 uint32 endPage) 3261 { 3262 uint32 modified = 0; 3263 for (VMCachePagesTree::Iterator it 3264 = cache->pages.GetIterator(firstPage, true, true); 3265 vm_page *page = it.Next();) { 3266 if (page->cache_offset >= endPage) 3267 break; 3268 3269 if (!page->busy && page->State() == PAGE_STATE_MODIFIED) { 3270 DEBUG_PAGE_ACCESS_START(page); 3271 vm_page_requeue(page, false); 3272 modified++; 3273 DEBUG_PAGE_ACCESS_END(page); 3274 } 3275 } 3276 3277 if (modified > 0) 3278 sPageWriterCondition.WakeUp(); 3279 } 3280 3281 3282 void 3283 vm_page_init_num_pages(kernel_args *args) 3284 { 3285 // calculate the size of memory by looking at the physical_memory_range array 3286 sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE; 3287 page_num_t physicalPagesEnd = sPhysicalPageOffset 3288 + args->physical_memory_range[0].size / B_PAGE_SIZE; 3289 3290 sNonExistingPages = 0; 3291 sIgnoredPages = args->ignored_physical_memory / B_PAGE_SIZE; 3292 3293 for (uint32 i = 1; i < args->num_physical_memory_ranges; i++) { 3294 page_num_t start = args->physical_memory_range[i].start / B_PAGE_SIZE; 3295 if (start > physicalPagesEnd) 3296 sNonExistingPages += start - physicalPagesEnd; 3297 physicalPagesEnd = start 3298 + args->physical_memory_range[i].size / B_PAGE_SIZE; 3299 3300 #ifdef LIMIT_AVAILABLE_MEMORY 3301 page_num_t available 3302 = physicalPagesEnd - sPhysicalPageOffset - sNonExistingPages; 3303 if (available > LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE)) { 3304 physicalPagesEnd = sPhysicalPageOffset + sNonExistingPages 3305 + LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE); 3306 break; 3307 } 3308 #endif 3309 } 3310 3311 TRACE(("first phys page = %#" B_PRIxPHYSADDR ", end %#" B_PRIxPHYSADDR "\n", 3312 sPhysicalPageOffset, physicalPagesEnd)); 3313 3314 sNumPages = physicalPagesEnd - sPhysicalPageOffset; 3315 } 3316 3317 3318 status_t 3319 vm_page_init(kernel_args *args) 3320 { 3321 TRACE(("vm_page_init: entry\n")); 3322 3323 // init page queues 3324 sModifiedPageQueue.Init("modified pages queue"); 3325 sInactivePageQueue.Init("inactive pages queue"); 3326 sActivePageQueue.Init("active pages queue"); 3327 sCachedPageQueue.Init("cached pages queue"); 3328 sFreePageQueue.Init("free pages queue"); 3329 sClearPageQueue.Init("clear pages queue"); 3330 3331 new (&sPageReservationWaiters) PageReservationWaiterList; 3332 3333 // map in the new free page table 3334 sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page), 3335 ~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 3336 3337 TRACE(("vm_init: putting free_page_table @ %p, # ents %" B_PRIuPHYSADDR 3338 " (size %#" B_PRIxPHYSADDR ")\n", sPages, sNumPages, 3339 (phys_addr_t)(sNumPages * sizeof(vm_page)))); 3340 3341 // initialize the free page table 3342 for (uint32 i = 0; i < sNumPages; i++) { 3343 sPages[i].Init(sPhysicalPageOffset + i); 3344 sFreePageQueue.Append(&sPages[i]); 3345 3346 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3347 sPages[i].allocation_tracking_info.Clear(); 3348 #endif 3349 } 3350 3351 sUnreservedFreePages = sNumPages; 3352 3353 TRACE(("initialized table\n")); 3354 3355 // mark the ranges between usable physical memory unused 3356 phys_addr_t previousEnd = 0; 3357 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 3358 phys_addr_t base = args->physical_memory_range[i].start; 3359 phys_size_t size = args->physical_memory_range[i].size; 3360 if (base > previousEnd) { 3361 mark_page_range_in_use(previousEnd / B_PAGE_SIZE, 3362 (base - previousEnd) / B_PAGE_SIZE, false); 3363 } 3364 previousEnd = base + size; 3365 } 3366 3367 // mark the allocated physical page ranges wired 3368 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 3369 mark_page_range_in_use( 3370 args->physical_allocated_range[i].start / B_PAGE_SIZE, 3371 args->physical_allocated_range[i].size / B_PAGE_SIZE, true); 3372 } 3373 3374 // The target of actually free pages. This must be at least the system 3375 // reserve, but should be a few more pages, so we don't have to extract 3376 // a cached page with each allocation. 3377 sFreePagesTarget = VM_PAGE_RESERVE_USER 3378 + std::max((page_num_t)32, (sNumPages - sNonExistingPages) / 1024); 3379 3380 // The target of free + cached and inactive pages. On low-memory machines 3381 // keep things tight. free + cached is the pool of immediately allocatable 3382 // pages. We want a few inactive pages, so when we're actually paging, we 3383 // have a reasonably large set of pages to work with. 3384 if (sUnreservedFreePages < 16 * 1024) { 3385 sFreeOrCachedPagesTarget = sFreePagesTarget + 128; 3386 sInactivePagesTarget = sFreePagesTarget / 3; 3387 } else { 3388 sFreeOrCachedPagesTarget = 2 * sFreePagesTarget; 3389 sInactivePagesTarget = sFreePagesTarget / 2; 3390 } 3391 3392 TRACE(("vm_page_init: exit\n")); 3393 3394 return B_OK; 3395 } 3396 3397 3398 status_t 3399 vm_page_init_post_area(kernel_args *args) 3400 { 3401 void *dummy; 3402 3403 dummy = sPages; 3404 create_area("page structures", &dummy, B_EXACT_ADDRESS, 3405 PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED, 3406 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3407 3408 add_debugger_command("list_pages", &dump_page_list, 3409 "List physical pages"); 3410 add_debugger_command("page_stats", &dump_page_stats, 3411 "Dump statistics about page usage"); 3412 add_debugger_command_etc("page", &dump_page_long, 3413 "Dump page info", 3414 "[ \"-p\" | \"-v\" ] [ \"-m\" ] <address>\n" 3415 "Prints information for the physical page. If neither \"-p\" nor\n" 3416 "\"-v\" are given, the provided address is interpreted as address of\n" 3417 "the vm_page data structure for the page in question. If \"-p\" is\n" 3418 "given, the address is the physical address of the page. If \"-v\" is\n" 3419 "given, the address is interpreted as virtual address in the current\n" 3420 "thread's address space and for the page it is mapped to (if any)\n" 3421 "information are printed. If \"-m\" is specified, the command will\n" 3422 "search all known address spaces for mappings to that page and print\n" 3423 "them.\n", 0); 3424 add_debugger_command("page_queue", &dump_page_queue, "Dump page queue"); 3425 add_debugger_command("find_page", &find_page, 3426 "Find out which queue a page is actually in"); 3427 3428 #ifdef TRACK_PAGE_USAGE_STATS 3429 add_debugger_command_etc("page_usage", &dump_page_usage_stats, 3430 "Dumps statistics about page usage counts", 3431 "\n" 3432 "Dumps statistics about page usage counts.\n", 3433 B_KDEBUG_DONT_PARSE_ARGUMENTS); 3434 #endif 3435 3436 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3437 add_debugger_command_etc("page_allocations_per_caller", 3438 &dump_page_allocations_per_caller, 3439 "Dump current page allocations summed up per caller", 3440 "[ -d <caller> ] [ -r ]\n" 3441 "The current allocations will by summed up by caller (their count)\n" 3442 "printed in decreasing order by count.\n" 3443 "If \"-d\" is given, each allocation for caller <caller> is printed\n" 3444 "including the respective stack trace.\n" 3445 "If \"-r\" is given, the allocation infos are reset after gathering\n" 3446 "the information, so the next command invocation will only show the\n" 3447 "allocations made after the reset.\n", 0); 3448 add_debugger_command_etc("page_allocation_infos", 3449 &dump_page_allocation_infos, 3450 "Dump current page allocations", 3451 "[ --stacktrace ] [ -p <page number> ] [ --team <team ID> ] " 3452 "[ --thread <thread ID> ]\n" 3453 "The current allocations filtered by optional values will be printed.\n" 3454 "The optional \"-p\" page number filters for a specific page,\n" 3455 "with \"--team\" and \"--thread\" allocations by specific teams\n" 3456 "and/or threads can be filtered (these only work if a corresponding\n" 3457 "tracing entry is still available).\n" 3458 "If \"--stacktrace\" is given, then stack traces of the allocation\n" 3459 "callers are printed, where available\n", 0); 3460 #endif 3461 3462 return B_OK; 3463 } 3464 3465 3466 status_t 3467 vm_page_init_post_thread(kernel_args *args) 3468 { 3469 new (&sFreePageCondition) ConditionVariable; 3470 3471 // create a kernel thread to clear out pages 3472 3473 thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber", 3474 B_LOWEST_ACTIVE_PRIORITY, NULL); 3475 resume_thread(thread); 3476 3477 // start page writer 3478 3479 sPageWriterCondition.Init("page writer"); 3480 3481 thread = spawn_kernel_thread(&page_writer, "page writer", 3482 B_NORMAL_PRIORITY + 1, NULL); 3483 resume_thread(thread); 3484 3485 // start page daemon 3486 3487 sPageDaemonCondition.Init("page daemon"); 3488 3489 thread = spawn_kernel_thread(&page_daemon, "page daemon", 3490 B_NORMAL_PRIORITY, NULL); 3491 resume_thread(thread); 3492 3493 return B_OK; 3494 } 3495 3496 3497 status_t 3498 vm_mark_page_inuse(page_num_t page) 3499 { 3500 return vm_mark_page_range_inuse(page, 1); 3501 } 3502 3503 3504 status_t 3505 vm_mark_page_range_inuse(page_num_t startPage, page_num_t length) 3506 { 3507 return mark_page_range_in_use(startPage, length, false); 3508 } 3509 3510 3511 /*! Unreserve pages previously reserved with vm_page_reserve_pages(). 3512 */ 3513 void 3514 vm_page_unreserve_pages(vm_page_reservation* reservation) 3515 { 3516 uint32 count = reservation->count; 3517 reservation->count = 0; 3518 3519 if (count == 0) 3520 return; 3521 3522 TA(UnreservePages(count)); 3523 3524 unreserve_pages(count); 3525 } 3526 3527 3528 /*! With this call, you can reserve a number of free pages in the system. 3529 They will only be handed out to someone who has actually reserved them. 3530 This call returns as soon as the number of requested pages has been 3531 reached. 3532 The caller must not hold any cache lock or the function might deadlock. 3533 */ 3534 void 3535 vm_page_reserve_pages(vm_page_reservation* reservation, uint32 count, 3536 int priority) 3537 { 3538 reservation->count = count; 3539 3540 if (count == 0) 3541 return; 3542 3543 TA(ReservePages(count)); 3544 3545 reserve_pages(count, priority, false); 3546 } 3547 3548 3549 bool 3550 vm_page_try_reserve_pages(vm_page_reservation* reservation, uint32 count, 3551 int priority) 3552 { 3553 if (count == 0) { 3554 reservation->count = count; 3555 return true; 3556 } 3557 3558 uint32 remaining = reserve_pages(count, priority, true); 3559 if (remaining == 0) { 3560 TA(ReservePages(count)); 3561 reservation->count = count; 3562 return true; 3563 } 3564 3565 unreserve_pages(count - remaining); 3566 3567 return false; 3568 } 3569 3570 3571 vm_page * 3572 vm_page_allocate_page(vm_page_reservation* reservation, uint32 flags) 3573 { 3574 uint32 pageState = flags & VM_PAGE_ALLOC_STATE; 3575 ASSERT(pageState != PAGE_STATE_FREE); 3576 ASSERT(pageState != PAGE_STATE_CLEAR); 3577 3578 ASSERT(reservation->count > 0); 3579 reservation->count--; 3580 3581 VMPageQueue* queue; 3582 VMPageQueue* otherQueue; 3583 3584 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) { 3585 queue = &sClearPageQueue; 3586 otherQueue = &sFreePageQueue; 3587 } else { 3588 queue = &sFreePageQueue; 3589 otherQueue = &sClearPageQueue; 3590 } 3591 3592 ReadLocker locker(sFreePageQueuesLock); 3593 3594 vm_page* page = queue->RemoveHeadUnlocked(); 3595 if (page == NULL) { 3596 // if the primary queue was empty, grab the page from the 3597 // secondary queue 3598 page = otherQueue->RemoveHeadUnlocked(); 3599 3600 if (page == NULL) { 3601 // Unlikely, but possible: the page we have reserved has moved 3602 // between the queues after we checked the first queue. Grab the 3603 // write locker to make sure this doesn't happen again. 3604 locker.Unlock(); 3605 WriteLocker writeLocker(sFreePageQueuesLock); 3606 3607 page = queue->RemoveHead(); 3608 if (page == NULL) 3609 otherQueue->RemoveHead(); 3610 3611 if (page == NULL) { 3612 panic("Had reserved page, but there is none!"); 3613 return NULL; 3614 } 3615 3616 // downgrade to read lock 3617 locker.Lock(); 3618 } 3619 } 3620 3621 if (page->CacheRef() != NULL) 3622 panic("supposed to be free page %p has cache\n", page); 3623 3624 DEBUG_PAGE_ACCESS_START(page); 3625 3626 int oldPageState = page->State(); 3627 page->SetState(pageState); 3628 page->busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3629 page->usage_count = 0; 3630 page->accessed = false; 3631 page->modified = false; 3632 3633 locker.Unlock(); 3634 3635 if (pageState < PAGE_STATE_FIRST_UNQUEUED) 3636 sPageQueues[pageState].AppendUnlocked(page); 3637 3638 // clear the page, if we had to take it from the free queue and a clear 3639 // page was requested 3640 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0 && oldPageState != PAGE_STATE_CLEAR) 3641 clear_page(page); 3642 3643 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3644 page->allocation_tracking_info.Init( 3645 TA(AllocatePage(page->physical_page_number))); 3646 #else 3647 TA(AllocatePage(page->physical_page_number)); 3648 #endif 3649 3650 return page; 3651 } 3652 3653 3654 static void 3655 allocate_page_run_cleanup(VMPageQueue::PageList& freePages, 3656 VMPageQueue::PageList& clearPages) 3657 { 3658 // Page lists are sorted, so remove tails before prepending to the respective queue. 3659 3660 while (vm_page* page = freePages.RemoveTail()) { 3661 page->busy = false; 3662 page->SetState(PAGE_STATE_FREE); 3663 DEBUG_PAGE_ACCESS_END(page); 3664 sFreePageQueue.PrependUnlocked(page); 3665 } 3666 3667 while (vm_page* page = clearPages.RemoveTail()) { 3668 page->busy = false; 3669 page->SetState(PAGE_STATE_CLEAR); 3670 DEBUG_PAGE_ACCESS_END(page); 3671 sClearPageQueue.PrependUnlocked(page); 3672 } 3673 3674 sFreePageCondition.NotifyAll(); 3675 } 3676 3677 3678 /*! Tries to allocate the a contiguous run of \a length pages starting at 3679 index \a start. 3680 3681 The caller must have write-locked the free/clear page queues. The function 3682 will unlock regardless of whether it succeeds or fails. 3683 3684 If the function fails, it cleans up after itself, i.e. it will free all 3685 pages it managed to allocate. 3686 3687 \param start The start index (into \c sPages) of the run. 3688 \param length The number of pages to allocate. 3689 \param flags Page allocation flags. Encodes the state the function shall 3690 set the allocated pages to, whether the pages shall be marked busy 3691 (VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared 3692 (VM_PAGE_ALLOC_CLEAR). 3693 \param freeClearQueueLocker Locked WriteLocker for the free/clear page 3694 queues in locked state. Will be unlocked by the function. 3695 \return The index of the first page that could not be allocated. \a length 3696 is returned when the function was successful. 3697 */ 3698 static page_num_t 3699 allocate_page_run(page_num_t start, page_num_t length, uint32 flags, 3700 WriteLocker& freeClearQueueLocker) 3701 { 3702 uint32 pageState = flags & VM_PAGE_ALLOC_STATE; 3703 ASSERT(pageState != PAGE_STATE_FREE); 3704 ASSERT(pageState != PAGE_STATE_CLEAR); 3705 ASSERT(start + length <= sNumPages); 3706 3707 // Pull the free/clear pages out of their respective queues. Cached pages 3708 // are allocated later. 3709 page_num_t cachedPages = 0; 3710 VMPageQueue::PageList freePages; 3711 VMPageQueue::PageList clearPages; 3712 page_num_t i = 0; 3713 for (; i < length; i++) { 3714 bool pageAllocated = true; 3715 bool noPage = false; 3716 vm_page& page = sPages[start + i]; 3717 switch (page.State()) { 3718 case PAGE_STATE_CLEAR: 3719 DEBUG_PAGE_ACCESS_START(&page); 3720 sClearPageQueue.Remove(&page); 3721 clearPages.Add(&page); 3722 break; 3723 case PAGE_STATE_FREE: 3724 DEBUG_PAGE_ACCESS_START(&page); 3725 sFreePageQueue.Remove(&page); 3726 freePages.Add(&page); 3727 break; 3728 case PAGE_STATE_CACHED: 3729 // We allocate cached pages later. 3730 cachedPages++; 3731 pageAllocated = false; 3732 break; 3733 3734 default: 3735 // Probably a page was cached when our caller checked. Now it's 3736 // gone and we have to abort. 3737 noPage = true; 3738 break; 3739 } 3740 3741 if (noPage) 3742 break; 3743 3744 if (pageAllocated) { 3745 page.SetState(flags & VM_PAGE_ALLOC_STATE); 3746 page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3747 page.usage_count = 0; 3748 page.accessed = false; 3749 page.modified = false; 3750 } 3751 } 3752 3753 if (i < length) { 3754 // failed to allocate a page -- free all that we've got 3755 allocate_page_run_cleanup(freePages, clearPages); 3756 return i; 3757 } 3758 3759 freeClearQueueLocker.Unlock(); 3760 3761 if (cachedPages > 0) { 3762 // allocate the pages that weren't free but cached 3763 page_num_t freedCachedPages = 0; 3764 page_num_t nextIndex = start; 3765 vm_page* freePage = freePages.Head(); 3766 vm_page* clearPage = clearPages.Head(); 3767 while (cachedPages > 0) { 3768 // skip, if we've already got the page 3769 if (freePage != NULL && size_t(freePage - sPages) == nextIndex) { 3770 freePage = freePages.GetNext(freePage); 3771 nextIndex++; 3772 continue; 3773 } 3774 if (clearPage != NULL && size_t(clearPage - sPages) == nextIndex) { 3775 clearPage = clearPages.GetNext(clearPage); 3776 nextIndex++; 3777 continue; 3778 } 3779 3780 // free the page, if it is still cached 3781 vm_page& page = sPages[nextIndex]; 3782 if (!free_cached_page(&page, false)) { 3783 // TODO: if the page turns out to have been freed already, 3784 // there would be no need to fail 3785 break; 3786 } 3787 3788 page.SetState(flags & VM_PAGE_ALLOC_STATE); 3789 page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3790 page.usage_count = 0; 3791 page.accessed = false; 3792 page.modified = false; 3793 3794 freePages.InsertBefore(freePage, &page); 3795 freedCachedPages++; 3796 cachedPages--; 3797 nextIndex++; 3798 } 3799 3800 // If we have freed cached pages, we need to balance things. 3801 if (freedCachedPages > 0) 3802 unreserve_pages(freedCachedPages); 3803 3804 if (nextIndex - start < length) { 3805 // failed to allocate all cached pages -- free all that we've got 3806 freeClearQueueLocker.Lock(); 3807 allocate_page_run_cleanup(freePages, clearPages); 3808 freeClearQueueLocker.Unlock(); 3809 3810 return nextIndex - start; 3811 } 3812 } 3813 3814 // clear pages, if requested 3815 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) { 3816 for (VMPageQueue::PageList::Iterator it = freePages.GetIterator(); 3817 vm_page* page = it.Next();) { 3818 clear_page(page); 3819 } 3820 } 3821 3822 // add pages to target queue 3823 if (pageState < PAGE_STATE_FIRST_UNQUEUED) { 3824 freePages.MoveFrom(&clearPages); 3825 sPageQueues[pageState].AppendUnlocked(freePages, length); 3826 } 3827 3828 // Note: We don't unreserve the pages since we pulled them out of the 3829 // free/clear queues without adjusting sUnreservedFreePages. 3830 3831 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE 3832 AbstractTraceEntryWithStackTrace* traceEntry 3833 = TA(AllocatePageRun(start, length)); 3834 3835 for (page_num_t i = start; i < start + length; i++) 3836 sPages[i].allocation_tracking_info.Init(traceEntry); 3837 #else 3838 TA(AllocatePageRun(start, length)); 3839 #endif 3840 3841 return length; 3842 } 3843 3844 3845 /*! Allocate a physically contiguous range of pages. 3846 3847 \param flags Page allocation flags. Encodes the state the function shall 3848 set the allocated pages to, whether the pages shall be marked busy 3849 (VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared 3850 (VM_PAGE_ALLOC_CLEAR). 3851 \param length The number of contiguous pages to allocate. 3852 \param restrictions Restrictions to the physical addresses of the page run 3853 to allocate, including \c low_address, the first acceptable physical 3854 address where the page run may start, \c high_address, the last 3855 acceptable physical address where the page run may end (i.e. it must 3856 hold \code runStartAddress + length <= high_address \endcode), 3857 \c alignment, the alignment of the page run start address, and 3858 \c boundary, multiples of which the page run must not cross. 3859 Values set to \c 0 are ignored. 3860 \param priority The page reservation priority (as passed to 3861 vm_page_reserve_pages()). 3862 \return The first page of the allocated page run on success; \c NULL 3863 when the allocation failed. 3864 */ 3865 vm_page* 3866 vm_page_allocate_page_run(uint32 flags, page_num_t length, 3867 const physical_address_restrictions* restrictions, int priority) 3868 { 3869 // compute start and end page index 3870 page_num_t requestedStart 3871 = std::max(restrictions->low_address / B_PAGE_SIZE, sPhysicalPageOffset) 3872 - sPhysicalPageOffset; 3873 page_num_t start = requestedStart; 3874 page_num_t end; 3875 if (restrictions->high_address > 0) { 3876 end = std::max(restrictions->high_address / B_PAGE_SIZE, 3877 sPhysicalPageOffset) 3878 - sPhysicalPageOffset; 3879 end = std::min(end, sNumPages); 3880 } else 3881 end = sNumPages; 3882 3883 // compute alignment mask 3884 page_num_t alignmentMask 3885 = std::max(restrictions->alignment / B_PAGE_SIZE, (phys_addr_t)1) - 1; 3886 ASSERT(((alignmentMask + 1) & alignmentMask) == 0); 3887 // alignment must be a power of 2 3888 3889 // compute the boundary mask 3890 uint32 boundaryMask = 0; 3891 if (restrictions->boundary != 0) { 3892 page_num_t boundary = restrictions->boundary / B_PAGE_SIZE; 3893 // boundary must be a power of two and not less than alignment and 3894 // length 3895 ASSERT(((boundary - 1) & boundary) == 0); 3896 ASSERT(boundary >= alignmentMask + 1); 3897 ASSERT(boundary >= length); 3898 3899 boundaryMask = -boundary; 3900 } 3901 3902 vm_page_reservation reservation; 3903 vm_page_reserve_pages(&reservation, length, priority); 3904 3905 WriteLocker freeClearQueueLocker(sFreePageQueuesLock); 3906 3907 // First we try to get a run with free pages only. If that fails, we also 3908 // consider cached pages. If there are only few free pages and many cached 3909 // ones, the odds are that we won't find enough contiguous ones, so we skip 3910 // the first iteration in this case. 3911 int32 freePages = sUnreservedFreePages; 3912 int useCached = freePages > 0 && (page_num_t)freePages > 2 * length ? 0 : 1; 3913 3914 for (;;) { 3915 if (alignmentMask != 0 || boundaryMask != 0) { 3916 page_num_t offsetStart = start + sPhysicalPageOffset; 3917 3918 // enforce alignment 3919 if ((offsetStart & alignmentMask) != 0) 3920 offsetStart = (offsetStart + alignmentMask) & ~alignmentMask; 3921 3922 // enforce boundary 3923 if (boundaryMask != 0 && ((offsetStart ^ (offsetStart 3924 + length - 1)) & boundaryMask) != 0) { 3925 offsetStart = (offsetStart + length - 1) & boundaryMask; 3926 } 3927 3928 start = offsetStart - sPhysicalPageOffset; 3929 } 3930 3931 if (start + length > end) { 3932 if (useCached == 0) { 3933 // The first iteration with free pages only was unsuccessful. 3934 // Try again also considering cached pages. 3935 useCached = 1; 3936 start = requestedStart; 3937 continue; 3938 } 3939 3940 dprintf("vm_page_allocate_page_run(): Failed to allocate run of " 3941 "length %" B_PRIuPHYSADDR " (%" B_PRIuPHYSADDR " %" 3942 B_PRIuPHYSADDR ") in second iteration (align: %" B_PRIuPHYSADDR 3943 " boundary: %" B_PRIuPHYSADDR ")!\n", length, requestedStart, 3944 end, restrictions->alignment, restrictions->boundary); 3945 3946 freeClearQueueLocker.Unlock(); 3947 vm_page_unreserve_pages(&reservation); 3948 return NULL; 3949 } 3950 3951 bool foundRun = true; 3952 page_num_t i; 3953 for (i = 0; i < length; i++) { 3954 uint32 pageState = sPages[start + i].State(); 3955 if (pageState != PAGE_STATE_FREE 3956 && pageState != PAGE_STATE_CLEAR 3957 && (pageState != PAGE_STATE_CACHED || useCached == 0)) { 3958 foundRun = false; 3959 break; 3960 } 3961 } 3962 3963 if (foundRun) { 3964 i = allocate_page_run(start, length, flags, freeClearQueueLocker); 3965 if (i == length) 3966 return &sPages[start]; 3967 3968 // apparently a cached page couldn't be allocated -- skip it and 3969 // continue 3970 freeClearQueueLocker.Lock(); 3971 } 3972 3973 start += i + 1; 3974 } 3975 } 3976 3977 3978 vm_page * 3979 vm_page_at_index(int32 index) 3980 { 3981 return &sPages[index]; 3982 } 3983 3984 3985 vm_page * 3986 vm_lookup_page(page_num_t pageNumber) 3987 { 3988 if (pageNumber < sPhysicalPageOffset) 3989 return NULL; 3990 3991 pageNumber -= sPhysicalPageOffset; 3992 if (pageNumber >= sNumPages) 3993 return NULL; 3994 3995 return &sPages[pageNumber]; 3996 } 3997 3998 3999 bool 4000 vm_page_is_dummy(struct vm_page *page) 4001 { 4002 return page < sPages || page >= sPages + sNumPages; 4003 } 4004 4005 4006 /*! Free the page that belonged to a certain cache. 4007 You can use vm_page_set_state() manually if you prefer, but only 4008 if the page does not equal PAGE_STATE_MODIFIED. 4009 4010 \param cache The cache the page was previously owned by or NULL. The page 4011 must have been removed from its cache before calling this method in 4012 either case. 4013 \param page The page to free. 4014 \param reservation If not NULL, the page count of the reservation will be 4015 incremented, thus allowing to allocate another page for the freed one at 4016 a later time. 4017 */ 4018 void 4019 vm_page_free_etc(VMCache* cache, vm_page* page, 4020 vm_page_reservation* reservation) 4021 { 4022 PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE 4023 && page->State() != PAGE_STATE_CLEAR); 4024 4025 if (page->State() == PAGE_STATE_MODIFIED && cache->temporary) 4026 atomic_add(&sModifiedTemporaryPages, -1); 4027 4028 free_page(page, false); 4029 if (reservation == NULL) 4030 unreserve_pages(1); 4031 } 4032 4033 4034 void 4035 vm_page_set_state(vm_page *page, int pageState) 4036 { 4037 PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE 4038 && page->State() != PAGE_STATE_CLEAR); 4039 4040 if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) { 4041 free_page(page, pageState == PAGE_STATE_CLEAR); 4042 unreserve_pages(1); 4043 } else 4044 set_page_state(page, pageState); 4045 } 4046 4047 4048 /*! Moves a page to either the tail of the head of its current queue, 4049 depending on \a tail. 4050 The page must have a cache and the cache must be locked! 4051 */ 4052 void 4053 vm_page_requeue(struct vm_page *page, bool tail) 4054 { 4055 PAGE_ASSERT(page, page->Cache() != NULL); 4056 page->Cache()->AssertLocked(); 4057 // DEBUG_PAGE_ACCESS_CHECK(page); 4058 // TODO: This assertion cannot be satisfied by idle_scan_active_pages() 4059 // when it requeues busy pages. The reason is that vm_soft_fault() 4060 // (respectively fault_get_page()) and the file cache keep newly 4061 // allocated pages accessed while they are reading them from disk. It 4062 // would probably be better to change that code and reenable this 4063 // check. 4064 4065 VMPageQueue *queue = NULL; 4066 4067 switch (page->State()) { 4068 case PAGE_STATE_ACTIVE: 4069 queue = &sActivePageQueue; 4070 break; 4071 case PAGE_STATE_INACTIVE: 4072 queue = &sInactivePageQueue; 4073 break; 4074 case PAGE_STATE_MODIFIED: 4075 queue = &sModifiedPageQueue; 4076 break; 4077 case PAGE_STATE_CACHED: 4078 queue = &sCachedPageQueue; 4079 break; 4080 case PAGE_STATE_FREE: 4081 case PAGE_STATE_CLEAR: 4082 panic("vm_page_requeue() called for free/clear page %p", page); 4083 return; 4084 case PAGE_STATE_WIRED: 4085 case PAGE_STATE_UNUSED: 4086 return; 4087 default: 4088 panic("vm_page_touch: vm_page %p in invalid state %d\n", 4089 page, page->State()); 4090 break; 4091 } 4092 4093 queue->RequeueUnlocked(page, tail); 4094 } 4095 4096 4097 page_num_t 4098 vm_page_num_pages(void) 4099 { 4100 return sNumPages - sNonExistingPages; 4101 } 4102 4103 4104 /*! There is a subtle distinction between the page counts returned by 4105 this function and vm_page_num_free_pages(): 4106 The latter returns the number of pages that are completely uncommitted, 4107 whereas this one returns the number of pages that are available for 4108 use by being reclaimed as well (IOW it factors in things like cache pages 4109 as available). 4110 */ 4111 page_num_t 4112 vm_page_num_available_pages(void) 4113 { 4114 return vm_available_memory() / B_PAGE_SIZE; 4115 } 4116 4117 4118 page_num_t 4119 vm_page_num_free_pages(void) 4120 { 4121 int32 count = sUnreservedFreePages + sCachedPageQueue.Count(); 4122 return count > 0 ? count : 0; 4123 } 4124 4125 4126 page_num_t 4127 vm_page_num_unused_pages(void) 4128 { 4129 int32 count = sUnreservedFreePages; 4130 return count > 0 ? count : 0; 4131 } 4132 4133 4134 void 4135 vm_page_get_stats(system_info *info) 4136 { 4137 // Note: there's no locking protecting any of the queues or counters here, 4138 // so we run the risk of getting bogus values when evaluating them 4139 // throughout this function. As these stats are for informational purposes 4140 // only, it is not really worth introducing such locking. Therefore we just 4141 // ensure that we don't under- or overflow any of the values. 4142 4143 // The pages used for the block cache buffers. Those should not be counted 4144 // as used but as cached pages. 4145 // TODO: We should subtract the blocks that are in use ATM, since those 4146 // can't really be freed in a low memory situation. 4147 page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE; 4148 info->block_cache_pages = blockCachePages; 4149 4150 // Non-temporary modified pages are special as they represent pages that 4151 // can be written back, so they could be freed if necessary, for us 4152 // basically making them into cached pages with a higher overhead. The 4153 // modified queue count is therefore split into temporary and non-temporary 4154 // counts that are then added to the corresponding number. 4155 page_num_t modifiedNonTemporaryPages 4156 = (sModifiedPageQueue.Count() - sModifiedTemporaryPages); 4157 4158 info->max_pages = vm_page_num_pages(); 4159 info->cached_pages = sCachedPageQueue.Count() + modifiedNonTemporaryPages 4160 + blockCachePages; 4161 4162 // max_pages is composed of: 4163 // active + inactive + unused + wired + modified + cached + free + clear 4164 // So taking out the cached (including modified non-temporary), free and 4165 // clear ones leaves us with all used pages. 4166 uint32 subtractPages = info->cached_pages + sFreePageQueue.Count() 4167 + sClearPageQueue.Count(); 4168 info->used_pages = subtractPages > info->max_pages 4169 ? 0 : info->max_pages - subtractPages; 4170 4171 if (info->used_pages + info->cached_pages > info->max_pages) { 4172 // Something was shuffled around while we were summing up the counts. 4173 // Make the values sane, preferring the worse case of more used pages. 4174 info->cached_pages = info->max_pages - info->used_pages; 4175 } 4176 4177 info->page_faults = vm_num_page_faults(); 4178 info->ignored_pages = sIgnoredPages; 4179 4180 // TODO: We don't consider pages used for page directories/tables yet. 4181 } 4182 4183 4184 /*! Returns the greatest address within the last page of accessible physical 4185 memory. 4186 The value is inclusive, i.e. in case of a 32 bit phys_addr_t 0xffffffff 4187 means the that the last page ends at exactly 4 GB. 4188 */ 4189 phys_addr_t 4190 vm_page_max_address() 4191 { 4192 return ((phys_addr_t)sPhysicalPageOffset + sNumPages) * B_PAGE_SIZE - 1; 4193 } 4194 4195 4196 RANGE_MARKER_FUNCTION_END(vm_page) 4197