1 /* 2 * Copyright 2010, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <string.h> 12 #include <stdlib.h> 13 14 #include <algorithm> 15 16 #include <KernelExport.h> 17 #include <OS.h> 18 19 #include <AutoDeleter.h> 20 21 #include <arch/cpu.h> 22 #include <arch/vm_translation_map.h> 23 #include <block_cache.h> 24 #include <boot/kernel_args.h> 25 #include <condition_variable.h> 26 #include <heap.h> 27 #include <kernel.h> 28 #include <low_resource_manager.h> 29 #include <thread.h> 30 #include <tracing.h> 31 #include <util/AutoLock.h> 32 #include <vfs.h> 33 #include <vm/vm.h> 34 #include <vm/vm_priv.h> 35 #include <vm/vm_page.h> 36 #include <vm/VMAddressSpace.h> 37 #include <vm/VMArea.h> 38 #include <vm/VMCache.h> 39 40 #include "IORequest.h" 41 #include "PageCacheLocker.h" 42 #include "VMAnonymousCache.h" 43 #include "VMPageQueue.h" 44 45 46 //#define TRACE_VM_PAGE 47 #ifdef TRACE_VM_PAGE 48 # define TRACE(x) dprintf x 49 #else 50 # define TRACE(x) ; 51 #endif 52 53 //#define TRACE_VM_DAEMONS 54 #ifdef TRACE_VM_DAEMONS 55 #define TRACE_DAEMON(x...) dprintf(x) 56 #else 57 #define TRACE_DAEMON(x...) do {} while (false) 58 #endif 59 60 //#define TRACK_PAGE_USAGE_STATS 1 61 62 #define PAGE_ASSERT(page, condition) \ 63 ASSERT_PRINT((condition), "page: %p", (page)) 64 65 #define SCRUB_SIZE 16 66 // this many pages will be cleared at once in the page scrubber thread 67 68 #define MAX_PAGE_WRITER_IO_PRIORITY B_URGENT_DISPLAY_PRIORITY 69 // maximum I/O priority of the page writer 70 #define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD 10000 71 // the maximum I/O priority shall be reached when this many pages need to 72 // be written 73 74 75 // The page reserve an allocation of the certain priority must not touch. 76 static const size_t kPageReserveForPriority[] = { 77 VM_PAGE_RESERVE_USER, // user 78 VM_PAGE_RESERVE_SYSTEM, // system 79 0 // VIP 80 }; 81 82 // Minimum number of free pages the page daemon will try to achieve. 83 static uint32 sFreePagesTarget; 84 static uint32 sFreeOrCachedPagesTarget; 85 static uint32 sInactivePagesTarget; 86 87 // Wait interval between page daemon runs. 88 static const bigtime_t kIdleScanWaitInterval = 1000000LL; // 1 sec 89 static const bigtime_t kBusyScanWaitInterval = 500000LL; // 0.5 sec 90 91 // Number of idle runs after which we want to have processed the full active 92 // queue. 93 static const uint32 kIdleRunsForFullQueue = 20; 94 95 // Maximum limit for the vm_page::usage_count. 96 static const int32 kPageUsageMax = 64; 97 // vm_page::usage_count buff an accessed page receives in a scan. 98 static const int32 kPageUsageAdvance = 3; 99 // vm_page::usage_count debuff an unaccessed page receives in a scan. 100 static const int32 kPageUsageDecline = 1; 101 102 int32 gMappedPagesCount; 103 104 static VMPageQueue sPageQueues[PAGE_STATE_COUNT]; 105 106 static VMPageQueue& sFreePageQueue = sPageQueues[PAGE_STATE_FREE]; 107 static VMPageQueue& sClearPageQueue = sPageQueues[PAGE_STATE_CLEAR]; 108 static VMPageQueue& sModifiedPageQueue = sPageQueues[PAGE_STATE_MODIFIED]; 109 static VMPageQueue& sInactivePageQueue = sPageQueues[PAGE_STATE_INACTIVE]; 110 static VMPageQueue& sActivePageQueue = sPageQueues[PAGE_STATE_ACTIVE]; 111 static VMPageQueue& sCachedPageQueue = sPageQueues[PAGE_STATE_CACHED]; 112 113 static vm_page *sPages; 114 static addr_t sPhysicalPageOffset; 115 static size_t sNumPages; 116 static vint32 sUnreservedFreePages; 117 static vint32 sUnsatisfiedPageReservations; 118 static vint32 sModifiedTemporaryPages; 119 120 static ConditionVariable sFreePageCondition; 121 static mutex sPageDeficitLock = MUTEX_INITIALIZER("page deficit"); 122 123 static rw_lock sFreePageQueuesLock 124 = RW_LOCK_INITIALIZER("free/clear page queues"); 125 126 #ifdef TRACK_PAGE_USAGE_STATS 127 static page_num_t sPageUsageArrays[512]; 128 static page_num_t* sPageUsage = sPageUsageArrays; 129 static page_num_t sPageUsagePageCount; 130 static page_num_t* sNextPageUsage = sPageUsageArrays + 256; 131 static page_num_t sNextPageUsagePageCount; 132 #endif 133 134 135 struct page_stats { 136 int32 totalFreePages; 137 int32 unsatisfiedReservations; 138 int32 cachedPages; 139 }; 140 141 142 struct PageReservationWaiter 143 : public DoublyLinkedListLinkImpl<PageReservationWaiter> { 144 struct thread* thread; 145 uint32 dontTouch; // reserve not to touch 146 uint32 missing; // pages missing for the reservation 147 int32 threadPriority; 148 149 bool operator<(const PageReservationWaiter& other) const 150 { 151 // Implies an order by descending VM priority (ascending dontTouch) 152 // and (secondarily) descending thread priority. 153 if (dontTouch != other.dontTouch) 154 return dontTouch < other.dontTouch; 155 return threadPriority > other.threadPriority; 156 } 157 }; 158 159 typedef DoublyLinkedList<PageReservationWaiter> PageReservationWaiterList; 160 static PageReservationWaiterList sPageReservationWaiters; 161 162 163 struct DaemonCondition { 164 void Init(const char* name) 165 { 166 mutex_init(&fLock, "daemon condition"); 167 fCondition.Init(this, name); 168 fActivated = false; 169 } 170 171 bool Lock() 172 { 173 return mutex_lock(&fLock) == B_OK; 174 } 175 176 void Unlock() 177 { 178 mutex_unlock(&fLock); 179 } 180 181 bool Wait(bigtime_t timeout, bool clearActivated) 182 { 183 MutexLocker locker(fLock); 184 if (clearActivated) 185 fActivated = false; 186 else if (fActivated) 187 return true; 188 189 ConditionVariableEntry entry; 190 fCondition.Add(&entry); 191 192 locker.Unlock(); 193 194 return entry.Wait(B_RELATIVE_TIMEOUT, timeout) == B_OK; 195 } 196 197 void WakeUp() 198 { 199 if (fActivated) 200 return; 201 202 MutexLocker locker(fLock); 203 fActivated = true; 204 fCondition.NotifyOne(); 205 } 206 207 void ClearActivated() 208 { 209 MutexLocker locker(fLock); 210 fActivated = false; 211 } 212 213 private: 214 mutex fLock; 215 ConditionVariable fCondition; 216 bool fActivated; 217 }; 218 219 220 static DaemonCondition sPageWriterCondition; 221 static DaemonCondition sPageDaemonCondition; 222 223 224 #if PAGE_ALLOCATION_TRACING 225 226 namespace PageAllocationTracing { 227 228 class ReservePages : public AbstractTraceEntry { 229 public: 230 ReservePages(uint32 count) 231 : 232 fCount(count) 233 { 234 Initialized(); 235 } 236 237 virtual void AddDump(TraceOutput& out) 238 { 239 out.Print("page reserve: %lu", fCount); 240 } 241 242 private: 243 uint32 fCount; 244 }; 245 246 247 class UnreservePages : public AbstractTraceEntry { 248 public: 249 UnreservePages(uint32 count) 250 : 251 fCount(count) 252 { 253 Initialized(); 254 } 255 256 virtual void AddDump(TraceOutput& out) 257 { 258 out.Print("page unreserve: %lu", fCount); 259 } 260 261 private: 262 uint32 fCount; 263 }; 264 265 266 class AllocatePage : public AbstractTraceEntry { 267 public: 268 AllocatePage() 269 { 270 Initialized(); 271 } 272 273 virtual void AddDump(TraceOutput& out) 274 { 275 out.Print("page alloc"); 276 } 277 }; 278 279 280 class AllocatePageRun : public AbstractTraceEntry { 281 public: 282 AllocatePageRun(uint32 length) 283 : 284 fLength(length) 285 { 286 Initialized(); 287 } 288 289 virtual void AddDump(TraceOutput& out) 290 { 291 out.Print("page alloc run: length: %ld", fLength); 292 } 293 294 private: 295 uint32 fLength; 296 }; 297 298 299 class FreePage : public AbstractTraceEntry { 300 public: 301 FreePage() 302 { 303 Initialized(); 304 } 305 306 virtual void AddDump(TraceOutput& out) 307 { 308 out.Print("page free"); 309 } 310 }; 311 312 313 class ScrubbingPages : public AbstractTraceEntry { 314 public: 315 ScrubbingPages(uint32 count) 316 : 317 fCount(count) 318 { 319 Initialized(); 320 } 321 322 virtual void AddDump(TraceOutput& out) 323 { 324 out.Print("page scrubbing: %lu", fCount); 325 } 326 327 private: 328 uint32 fCount; 329 }; 330 331 332 class ScrubbedPages : public AbstractTraceEntry { 333 public: 334 ScrubbedPages(uint32 count) 335 : 336 fCount(count) 337 { 338 Initialized(); 339 } 340 341 virtual void AddDump(TraceOutput& out) 342 { 343 out.Print("page scrubbed: %lu", fCount); 344 } 345 346 private: 347 uint32 fCount; 348 }; 349 350 351 class StolenPage : public AbstractTraceEntry { 352 public: 353 StolenPage() 354 { 355 Initialized(); 356 } 357 358 virtual void AddDump(TraceOutput& out) 359 { 360 out.Print("page stolen"); 361 } 362 }; 363 364 } // namespace PageAllocationTracing 365 366 # define TA(x) new(std::nothrow) PageAllocationTracing::x 367 368 #else 369 # define TA(x) 370 #endif // PAGE_ALLOCATION_TRACING 371 372 373 #if PAGE_DAEMON_TRACING 374 375 namespace PageDaemonTracing { 376 377 class ActivatePage : public AbstractTraceEntry { 378 public: 379 ActivatePage(vm_page* page) 380 : 381 fCache(page->cache), 382 fPage(page) 383 { 384 Initialized(); 385 } 386 387 virtual void AddDump(TraceOutput& out) 388 { 389 out.Print("page activated: %p, cache: %p", fPage, fCache); 390 } 391 392 private: 393 VMCache* fCache; 394 vm_page* fPage; 395 }; 396 397 398 class DeactivatePage : public AbstractTraceEntry { 399 public: 400 DeactivatePage(vm_page* page) 401 : 402 fCache(page->cache), 403 fPage(page) 404 { 405 Initialized(); 406 } 407 408 virtual void AddDump(TraceOutput& out) 409 { 410 out.Print("page deactivated: %p, cache: %p", fPage, fCache); 411 } 412 413 private: 414 VMCache* fCache; 415 vm_page* fPage; 416 }; 417 418 419 class FreedPageSwap : public AbstractTraceEntry { 420 public: 421 FreedPageSwap(vm_page* page) 422 : 423 fCache(page->cache), 424 fPage(page) 425 { 426 Initialized(); 427 } 428 429 virtual void AddDump(TraceOutput& out) 430 { 431 out.Print("page swap freed: %p, cache: %p", fPage, fCache); 432 } 433 434 private: 435 VMCache* fCache; 436 vm_page* fPage; 437 }; 438 439 } // namespace PageDaemonTracing 440 441 # define TD(x) new(std::nothrow) PageDaemonTracing::x 442 443 #else 444 # define TD(x) 445 #endif // PAGE_DAEMON_TRACING 446 447 448 #if PAGE_WRITER_TRACING 449 450 namespace PageWriterTracing { 451 452 class WritePage : public AbstractTraceEntry { 453 public: 454 WritePage(vm_page* page) 455 : 456 fCache(page->Cache()), 457 fPage(page) 458 { 459 Initialized(); 460 } 461 462 virtual void AddDump(TraceOutput& out) 463 { 464 out.Print("page write: %p, cache: %p", fPage, fCache); 465 } 466 467 private: 468 VMCache* fCache; 469 vm_page* fPage; 470 }; 471 472 } // namespace PageWriterTracing 473 474 # define TPW(x) new(std::nothrow) PageWriterTracing::x 475 476 #else 477 # define TPW(x) 478 #endif // PAGE_WRITER_TRACING 479 480 481 #if PAGE_STATE_TRACING 482 483 namespace PageStateTracing { 484 485 class SetPageState : public AbstractTraceEntry { 486 public: 487 SetPageState(vm_page* page, uint8 newState) 488 : 489 fPage(page), 490 fOldState(page->State()), 491 fNewState(newState), 492 fBusy(page->busy), 493 fWired(page->wired_count > 0), 494 fMapped(!page->mappings.IsEmpty()), 495 fAccessed(page->accessed), 496 fModified(page->modified) 497 { 498 #if PAGE_STATE_TRACING_STACK_TRACE 499 fStackTrace = capture_tracing_stack_trace( 500 PAGE_STATE_TRACING_STACK_TRACE, 0, true); 501 // Don't capture userland stack trace to avoid potential 502 // deadlocks. 503 #endif 504 Initialized(); 505 } 506 507 #if PAGE_STATE_TRACING_STACK_TRACE 508 virtual void DumpStackTrace(TraceOutput& out) 509 { 510 out.PrintStackTrace(fStackTrace); 511 } 512 #endif 513 514 virtual void AddDump(TraceOutput& out) 515 { 516 out.Print("page set state: %p (%c%c%c%c%c): %s -> %s", fPage, 517 fBusy ? 'b' : '-', 518 fWired ? 'w' : '-', 519 fMapped ? 'm' : '-', 520 fAccessed ? 'a' : '-', 521 fModified ? 'm' : '-', 522 page_state_to_string(fOldState), 523 page_state_to_string(fNewState)); 524 } 525 526 private: 527 vm_page* fPage; 528 #if PAGE_STATE_TRACING_STACK_TRACE 529 tracing_stack_trace* fStackTrace; 530 #endif 531 uint8 fOldState; 532 uint8 fNewState; 533 bool fBusy : 1; 534 bool fWired : 1; 535 bool fMapped : 1; 536 bool fAccessed : 1; 537 bool fModified : 1; 538 }; 539 540 } // namespace PageStateTracing 541 542 # define TPS(x) new(std::nothrow) PageStateTracing::x 543 544 #else 545 # define TPS(x) 546 #endif // PAGE_STATE_TRACING 547 548 549 static int 550 find_page(int argc, char **argv) 551 { 552 struct vm_page *page; 553 addr_t address; 554 int32 index = 1; 555 int i; 556 557 struct { 558 const char* name; 559 VMPageQueue* queue; 560 } pageQueueInfos[] = { 561 { "free", &sFreePageQueue }, 562 { "clear", &sClearPageQueue }, 563 { "modified", &sModifiedPageQueue }, 564 { "active", &sActivePageQueue }, 565 { "inactive", &sInactivePageQueue }, 566 { "cached", &sCachedPageQueue }, 567 { NULL, NULL } 568 }; 569 570 if (argc < 2 571 || strlen(argv[index]) <= 2 572 || argv[index][0] != '0' 573 || argv[index][1] != 'x') { 574 kprintf("usage: find_page <address>\n"); 575 return 0; 576 } 577 578 address = strtoul(argv[index], NULL, 0); 579 page = (vm_page*)address; 580 581 for (i = 0; pageQueueInfos[i].name; i++) { 582 VMPageQueue::Iterator it = pageQueueInfos[i].queue->GetIterator(); 583 while (vm_page* p = it.Next()) { 584 if (p == page) { 585 kprintf("found page %p in queue %p (%s)\n", page, 586 pageQueueInfos[i].queue, pageQueueInfos[i].name); 587 return 0; 588 } 589 } 590 } 591 592 kprintf("page %p isn't in any queue\n", page); 593 594 return 0; 595 } 596 597 598 const char * 599 page_state_to_string(int state) 600 { 601 switch(state) { 602 case PAGE_STATE_ACTIVE: 603 return "active"; 604 case PAGE_STATE_INACTIVE: 605 return "inactive"; 606 case PAGE_STATE_MODIFIED: 607 return "modified"; 608 case PAGE_STATE_CACHED: 609 return "cached"; 610 case PAGE_STATE_FREE: 611 return "free"; 612 case PAGE_STATE_CLEAR: 613 return "clear"; 614 case PAGE_STATE_WIRED: 615 return "wired"; 616 case PAGE_STATE_UNUSED: 617 return "unused"; 618 default: 619 return "unknown"; 620 } 621 } 622 623 624 static int 625 dump_page(int argc, char **argv) 626 { 627 bool addressIsPointer = true; 628 bool physical = false; 629 bool searchMappings = false; 630 int32 index = 1; 631 632 while (index < argc) { 633 if (argv[index][0] != '-') 634 break; 635 636 if (!strcmp(argv[index], "-p")) { 637 addressIsPointer = false; 638 physical = true; 639 } else if (!strcmp(argv[index], "-v")) { 640 addressIsPointer = false; 641 } else if (!strcmp(argv[index], "-m")) { 642 searchMappings = true; 643 } else { 644 print_debugger_command_usage(argv[0]); 645 return 0; 646 } 647 648 index++; 649 } 650 651 if (index + 1 != argc) { 652 print_debugger_command_usage(argv[0]); 653 return 0; 654 } 655 656 uint64 value; 657 if (!evaluate_debug_expression(argv[index], &value, false)) 658 return 0; 659 660 addr_t pageAddress = (addr_t)value; 661 struct vm_page* page; 662 663 if (addressIsPointer) { 664 page = (struct vm_page *)pageAddress; 665 } else { 666 if (!physical) { 667 VMAddressSpace *addressSpace = VMAddressSpace::Kernel(); 668 669 if (debug_get_debugged_thread()->team->address_space != NULL) 670 addressSpace = debug_get_debugged_thread()->team->address_space; 671 672 uint32 flags = 0; 673 if (addressSpace->TranslationMap()->QueryInterrupt(pageAddress, 674 &pageAddress, &flags) != B_OK 675 || (flags & PAGE_PRESENT) == 0) { 676 kprintf("Virtual address not mapped to a physical page in this " 677 "address space.\n"); 678 return 0; 679 } 680 } 681 682 page = vm_lookup_page(pageAddress / B_PAGE_SIZE); 683 } 684 685 kprintf("PAGE: %p\n", page); 686 kprintf("queue_next,prev: %p, %p\n", page->queue_link.next, 687 page->queue_link.previous); 688 kprintf("physical_number: %#lx\n", page->physical_page_number); 689 kprintf("cache: %p\n", page->Cache()); 690 kprintf("cache_offset: %ld\n", page->cache_offset); 691 kprintf("cache_next: %p\n", page->cache_next); 692 kprintf("state: %s\n", page_state_to_string(page->State())); 693 kprintf("wired_count: %d\n", page->wired_count); 694 kprintf("usage_count: %d\n", page->usage_count); 695 kprintf("busy: %d\n", page->busy); 696 kprintf("busy_writing: %d\n", page->busy_writing); 697 kprintf("accessed: %d\n", page->accessed); 698 kprintf("modified: %d\n", page->modified); 699 #if DEBUG_PAGE_QUEUE 700 kprintf("queue: %p\n", page->queue); 701 #endif 702 #if DEBUG_PAGE_ACCESS 703 kprintf("accessor: %" B_PRId32 "\n", page->accessing_thread); 704 #endif 705 kprintf("area mappings:\n"); 706 707 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 708 vm_page_mapping *mapping; 709 while ((mapping = iterator.Next()) != NULL) { 710 kprintf(" %p (%" B_PRId32 ")\n", mapping->area, mapping->area->id); 711 mapping = mapping->page_link.next; 712 } 713 714 if (searchMappings) { 715 kprintf("all mappings:\n"); 716 VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst(); 717 while (addressSpace != NULL) { 718 size_t pageCount = addressSpace->Size() / B_PAGE_SIZE; 719 for (addr_t address = addressSpace->Base(); pageCount != 0; 720 address += B_PAGE_SIZE, pageCount--) { 721 addr_t physicalAddress; 722 uint32 flags = 0; 723 if (addressSpace->TranslationMap()->QueryInterrupt(address, 724 &physicalAddress, &flags) == B_OK 725 && (flags & PAGE_PRESENT) != 0 726 && physicalAddress / B_PAGE_SIZE 727 == page->physical_page_number) { 728 VMArea* area = addressSpace->LookupArea(address); 729 kprintf(" aspace %ld, area %ld: %#" B_PRIxADDR 730 " (%c%c%s%s)\n", addressSpace->ID(), 731 area != NULL ? area->id : -1, address, 732 (flags & B_KERNEL_READ_AREA) != 0 ? 'r' : '-', 733 (flags & B_KERNEL_WRITE_AREA) != 0 ? 'w' : '-', 734 (flags & PAGE_MODIFIED) != 0 ? " modified" : "", 735 (flags & PAGE_ACCESSED) != 0 ? " accessed" : ""); 736 } 737 } 738 addressSpace = VMAddressSpace::DebugNext(addressSpace); 739 } 740 } 741 742 set_debug_variable("_cache", (addr_t)page->Cache()); 743 #if DEBUG_PAGE_ACCESS 744 set_debug_variable("_accessor", page->accessing_thread); 745 #endif 746 747 return 0; 748 } 749 750 751 static int 752 dump_page_queue(int argc, char **argv) 753 { 754 struct VMPageQueue *queue; 755 756 if (argc < 2) { 757 kprintf("usage: page_queue <address/name> [list]\n"); 758 return 0; 759 } 760 761 if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x') 762 queue = (VMPageQueue*)strtoul(argv[1], NULL, 16); 763 if (!strcmp(argv[1], "free")) 764 queue = &sFreePageQueue; 765 else if (!strcmp(argv[1], "clear")) 766 queue = &sClearPageQueue; 767 else if (!strcmp(argv[1], "modified")) 768 queue = &sModifiedPageQueue; 769 else if (!strcmp(argv[1], "active")) 770 queue = &sActivePageQueue; 771 else if (!strcmp(argv[1], "inactive")) 772 queue = &sInactivePageQueue; 773 else if (!strcmp(argv[1], "cached")) 774 queue = &sCachedPageQueue; 775 else { 776 kprintf("page_queue: unknown queue \"%s\".\n", argv[1]); 777 return 0; 778 } 779 780 kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %ld\n", 781 queue, queue->Head(), queue->Tail(), queue->Count()); 782 783 if (argc == 3) { 784 struct vm_page *page = queue->Head(); 785 const char *type = "none"; 786 int i; 787 788 if (page->Cache() != NULL) { 789 switch (page->Cache()->type) { 790 case CACHE_TYPE_RAM: 791 type = "RAM"; 792 break; 793 case CACHE_TYPE_DEVICE: 794 type = "device"; 795 break; 796 case CACHE_TYPE_VNODE: 797 type = "vnode"; 798 break; 799 case CACHE_TYPE_NULL: 800 type = "null"; 801 break; 802 default: 803 type = "???"; 804 break; 805 } 806 } 807 808 kprintf("page cache type state wired usage\n"); 809 for (i = 0; page; i++, page = queue->Next(page)) { 810 kprintf("%p %p %-7s %8s %5d %5d\n", page, page->Cache(), 811 type, page_state_to_string(page->State()), 812 page->wired_count, page->usage_count); 813 } 814 } 815 return 0; 816 } 817 818 819 static int 820 dump_page_stats(int argc, char **argv) 821 { 822 page_num_t swappableModified = 0; 823 page_num_t swappableModifiedInactive = 0; 824 825 size_t counter[8]; 826 size_t busyCounter[8]; 827 memset(counter, 0, sizeof(counter)); 828 memset(busyCounter, 0, sizeof(busyCounter)); 829 830 struct page_run { 831 page_num_t start; 832 page_num_t end; 833 834 page_num_t Length() const { return end - start; } 835 }; 836 837 page_run currentFreeRun = { 0, 0 }; 838 page_run currentCachedRun = { 0, 0 }; 839 page_run longestFreeRun = { 0, 0 }; 840 page_run longestCachedRun = { 0, 0 }; 841 842 for (addr_t i = 0; i < sNumPages; i++) { 843 if (sPages[i].State() > 7) 844 panic("page %li at %p has invalid state!\n", i, &sPages[i]); 845 846 uint32 pageState = sPages[i].State(); 847 848 counter[pageState]++; 849 if (sPages[i].busy) 850 busyCounter[pageState]++; 851 852 if (pageState == PAGE_STATE_MODIFIED 853 && sPages[i].Cache() != NULL 854 && sPages[i].Cache()->temporary && sPages[i].wired_count == 0) { 855 swappableModified++; 856 if (sPages[i].usage_count == 0) 857 swappableModifiedInactive++; 858 } 859 860 // track free and cached pages runs 861 if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) { 862 currentFreeRun.end = i + 1; 863 currentCachedRun.end = i + 1; 864 } else { 865 if (currentFreeRun.Length() > longestFreeRun.Length()) 866 longestFreeRun = currentFreeRun; 867 currentFreeRun.start = currentFreeRun.end = i + 1; 868 869 if (pageState == PAGE_STATE_CACHED) { 870 currentCachedRun.end = i + 1; 871 } else { 872 if (currentCachedRun.Length() > longestCachedRun.Length()) 873 longestCachedRun = currentCachedRun; 874 currentCachedRun.start = currentCachedRun.end = i + 1; 875 } 876 } 877 } 878 879 kprintf("page stats:\n"); 880 kprintf("total: %lu\n", sNumPages); 881 882 kprintf("active: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 883 counter[PAGE_STATE_ACTIVE], busyCounter[PAGE_STATE_ACTIVE]); 884 kprintf("inactive: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 885 counter[PAGE_STATE_INACTIVE], busyCounter[PAGE_STATE_INACTIVE]); 886 kprintf("cached: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 887 counter[PAGE_STATE_CACHED], busyCounter[PAGE_STATE_CACHED]); 888 kprintf("unused: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 889 counter[PAGE_STATE_UNUSED], busyCounter[PAGE_STATE_UNUSED]); 890 kprintf("wired: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 891 counter[PAGE_STATE_WIRED], busyCounter[PAGE_STATE_WIRED]); 892 kprintf("modified: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 893 counter[PAGE_STATE_MODIFIED], busyCounter[PAGE_STATE_MODIFIED]); 894 kprintf("free: %" B_PRIuSIZE "\n", counter[PAGE_STATE_FREE]); 895 kprintf("clear: %" B_PRIuSIZE "\n", counter[PAGE_STATE_CLEAR]); 896 897 kprintf("unreserved free pages: %" B_PRId32 "\n", sUnreservedFreePages); 898 kprintf("unsatisfied page reservations: %" B_PRId32 "\n", 899 sUnsatisfiedPageReservations); 900 kprintf("mapped pages: %lu\n", gMappedPagesCount); 901 kprintf("longest free pages run: %" B_PRIuSIZE " pages (at %" B_PRIuSIZE 902 ")\n", longestFreeRun.Length(), 903 sPages[longestFreeRun.start].physical_page_number); 904 kprintf("longest free/cached pages run: %" B_PRIuSIZE " pages (at %" 905 B_PRIuSIZE ")\n", longestCachedRun.Length(), 906 sPages[longestCachedRun.start].physical_page_number); 907 908 kprintf("waiting threads:\n"); 909 for (PageReservationWaiterList::Iterator it 910 = sPageReservationWaiters.GetIterator(); 911 PageReservationWaiter* waiter = it.Next();) { 912 kprintf(" %6" B_PRId32 ": missing: %6" B_PRIu32 913 ", don't touch: %6" B_PRIu32 "\n", waiter->thread->id, 914 waiter->missing, waiter->dontTouch); 915 } 916 917 kprintf("\nfree queue: %p, count = %ld\n", &sFreePageQueue, 918 sFreePageQueue.Count()); 919 kprintf("clear queue: %p, count = %ld\n", &sClearPageQueue, 920 sClearPageQueue.Count()); 921 kprintf("modified queue: %p, count = %ld (%ld temporary, %lu swappable, " 922 "inactive: %lu)\n", &sModifiedPageQueue, sModifiedPageQueue.Count(), 923 sModifiedTemporaryPages, swappableModified, swappableModifiedInactive); 924 kprintf("active queue: %p, count = %ld\n", &sActivePageQueue, 925 sActivePageQueue.Count()); 926 kprintf("inactive queue: %p, count = %ld\n", &sInactivePageQueue, 927 sInactivePageQueue.Count()); 928 kprintf("cached queue: %p, count = %ld\n", &sCachedPageQueue, 929 sCachedPageQueue.Count()); 930 return 0; 931 } 932 933 934 #ifdef TRACK_PAGE_USAGE_STATS 935 936 static void 937 track_page_usage(vm_page* page) 938 { 939 if (page->wired_count == 0) { 940 sNextPageUsage[(int32)page->usage_count + 128]++; 941 sNextPageUsagePageCount++; 942 } 943 } 944 945 946 static void 947 update_page_usage_stats() 948 { 949 std::swap(sPageUsage, sNextPageUsage); 950 sPageUsagePageCount = sNextPageUsagePageCount; 951 952 memset(sNextPageUsage, 0, sizeof(page_num_t) * 256); 953 sNextPageUsagePageCount = 0; 954 955 // compute average 956 if (sPageUsagePageCount > 0) { 957 int64 sum = 0; 958 for (int32 i = 0; i < 256; i++) 959 sum += (int64)sPageUsage[i] * (i - 128); 960 961 TRACE_DAEMON("average page usage: %f (%lu pages)\n", 962 (float)sum / sPageUsagePageCount, sPageUsagePageCount); 963 } 964 } 965 966 967 static int 968 dump_page_usage_stats(int argc, char** argv) 969 { 970 kprintf("distribution of page usage counts (%lu pages):", 971 sPageUsagePageCount); 972 973 int64 sum = 0; 974 for (int32 i = 0; i < 256; i++) { 975 if (i % 8 == 0) 976 kprintf("\n%4ld:", i - 128); 977 978 int64 count = sPageUsage[i]; 979 sum += count * (i - 128); 980 981 kprintf(" %9llu", count); 982 } 983 984 kprintf("\n\n"); 985 986 kprintf("average usage count: %f\n", 987 sPageUsagePageCount > 0 ? (float)sum / sPageUsagePageCount : 0); 988 989 return 0; 990 } 991 992 #endif // TRACK_PAGE_USAGE_STATS 993 994 995 // #pragma mark - vm_page 996 997 998 inline void 999 vm_page::InitState(uint8 newState) 1000 { 1001 state = newState; 1002 } 1003 1004 1005 inline void 1006 vm_page::SetState(uint8 newState) 1007 { 1008 TPS(SetPageState(this, newState)); 1009 1010 state = newState; 1011 } 1012 1013 1014 // #pragma mark - 1015 1016 1017 static void 1018 get_page_stats(page_stats& _pageStats) 1019 { 1020 _pageStats.totalFreePages = sUnreservedFreePages; 1021 _pageStats.cachedPages = sCachedPageQueue.Count(); 1022 _pageStats.unsatisfiedReservations = sUnsatisfiedPageReservations; 1023 // TODO: We don't get an actual snapshot here! 1024 } 1025 1026 1027 static bool 1028 do_active_paging(const page_stats& pageStats) 1029 { 1030 return pageStats.totalFreePages + pageStats.cachedPages 1031 < pageStats.unsatisfiedReservations 1032 + (int32)sFreeOrCachedPagesTarget; 1033 } 1034 1035 1036 /*! Reserves as many pages as possible from \c sUnreservedFreePages up to 1037 \a count. Doesn't touch the last \a dontTouch pages of 1038 \c sUnreservedFreePages, though. 1039 \return The number of actually reserved pages. 1040 */ 1041 static uint32 1042 reserve_some_pages(uint32 count, uint32 dontTouch) 1043 { 1044 while (true) { 1045 int32 freePages = sUnreservedFreePages; 1046 if (freePages <= (int32)dontTouch) 1047 return 0; 1048 1049 int32 toReserve = std::min(count, freePages - dontTouch); 1050 if (atomic_test_and_set(&sUnreservedFreePages, 1051 freePages - toReserve, freePages) 1052 == freePages) { 1053 return toReserve; 1054 } 1055 1056 // the count changed in the meantime -- retry 1057 } 1058 } 1059 1060 1061 static void 1062 wake_up_page_reservation_waiters() 1063 { 1064 MutexLocker pageDeficitLocker(sPageDeficitLock); 1065 1066 // TODO: If this is a low priority thread, we might want to disable 1067 // interrupts or otherwise ensure that we aren't unscheduled. Otherwise 1068 // high priority threads wait be kept waiting while a medium priority thread 1069 // prevents us from running. 1070 1071 while (PageReservationWaiter* waiter = sPageReservationWaiters.Head()) { 1072 int32 reserved = reserve_some_pages(waiter->missing, 1073 waiter->dontTouch); 1074 if (reserved == 0) 1075 return; 1076 1077 atomic_add(&sUnsatisfiedPageReservations, -reserved); 1078 waiter->missing -= reserved; 1079 1080 if (waiter->missing > 0) 1081 return; 1082 1083 sPageReservationWaiters.Remove(waiter); 1084 1085 InterruptsSpinLocker threadLocker(gThreadSpinlock); 1086 thread_unblock_locked(waiter->thread, B_OK); 1087 } 1088 } 1089 1090 1091 static inline void 1092 unreserve_pages(uint32 count) 1093 { 1094 atomic_add(&sUnreservedFreePages, count); 1095 if (sUnsatisfiedPageReservations != 0) 1096 wake_up_page_reservation_waiters(); 1097 } 1098 1099 1100 static void 1101 free_page(vm_page* page, bool clear) 1102 { 1103 DEBUG_PAGE_ACCESS_CHECK(page); 1104 1105 PAGE_ASSERT(page, !page->IsMapped()); 1106 1107 VMPageQueue* fromQueue; 1108 1109 switch (page->State()) { 1110 case PAGE_STATE_ACTIVE: 1111 fromQueue = &sActivePageQueue; 1112 break; 1113 case PAGE_STATE_INACTIVE: 1114 fromQueue = &sInactivePageQueue; 1115 break; 1116 case PAGE_STATE_MODIFIED: 1117 fromQueue = &sModifiedPageQueue; 1118 break; 1119 case PAGE_STATE_CACHED: 1120 fromQueue = &sCachedPageQueue; 1121 break; 1122 case PAGE_STATE_FREE: 1123 case PAGE_STATE_CLEAR: 1124 panic("free_page(): page %p already free", page); 1125 return; 1126 case PAGE_STATE_WIRED: 1127 case PAGE_STATE_UNUSED: 1128 fromQueue = NULL; 1129 break; 1130 default: 1131 panic("free_page(): page %p in invalid state %d", 1132 page, page->State()); 1133 return; 1134 } 1135 1136 if (page->CacheRef() != NULL) 1137 panic("to be freed page %p has cache", page); 1138 if (page->IsMapped()) 1139 panic("to be freed page %p has mappings", page); 1140 1141 if (fromQueue != NULL) 1142 fromQueue->RemoveUnlocked(page); 1143 1144 TA(FreePage()); 1145 1146 ReadLocker locker(sFreePageQueuesLock); 1147 1148 DEBUG_PAGE_ACCESS_END(page); 1149 1150 if (clear) { 1151 page->SetState(PAGE_STATE_CLEAR); 1152 sClearPageQueue.PrependUnlocked(page); 1153 } else { 1154 page->SetState(PAGE_STATE_FREE); 1155 sFreePageQueue.PrependUnlocked(page); 1156 } 1157 1158 locker.Unlock(); 1159 1160 unreserve_pages(1); 1161 } 1162 1163 1164 /*! The caller must make sure that no-one else tries to change the page's state 1165 while the function is called. If the page has a cache, this can be done by 1166 locking the cache. 1167 */ 1168 static void 1169 set_page_state(vm_page *page, int pageState) 1170 { 1171 DEBUG_PAGE_ACCESS_CHECK(page); 1172 1173 if (pageState == page->State()) 1174 return; 1175 1176 VMPageQueue* fromQueue; 1177 1178 switch (page->State()) { 1179 case PAGE_STATE_ACTIVE: 1180 fromQueue = &sActivePageQueue; 1181 break; 1182 case PAGE_STATE_INACTIVE: 1183 fromQueue = &sInactivePageQueue; 1184 break; 1185 case PAGE_STATE_MODIFIED: 1186 fromQueue = &sModifiedPageQueue; 1187 break; 1188 case PAGE_STATE_CACHED: 1189 fromQueue = &sCachedPageQueue; 1190 break; 1191 case PAGE_STATE_FREE: 1192 case PAGE_STATE_CLEAR: 1193 panic("set_page_state(): page %p is free/clear", page); 1194 return; 1195 case PAGE_STATE_WIRED: 1196 case PAGE_STATE_UNUSED: 1197 fromQueue = NULL; 1198 break; 1199 default: 1200 panic("set_page_state(): page %p in invalid state %d", 1201 page, page->State()); 1202 return; 1203 } 1204 1205 VMPageQueue* toQueue; 1206 1207 switch (pageState) { 1208 case PAGE_STATE_ACTIVE: 1209 toQueue = &sActivePageQueue; 1210 break; 1211 case PAGE_STATE_INACTIVE: 1212 toQueue = &sInactivePageQueue; 1213 break; 1214 case PAGE_STATE_MODIFIED: 1215 toQueue = &sModifiedPageQueue; 1216 break; 1217 case PAGE_STATE_CACHED: 1218 PAGE_ASSERT(page, !page->IsMapped()); 1219 PAGE_ASSERT(page, !page->modified); 1220 toQueue = &sCachedPageQueue; 1221 break; 1222 case PAGE_STATE_FREE: 1223 case PAGE_STATE_CLEAR: 1224 panic("set_page_state(): target state is free/clear"); 1225 return; 1226 case PAGE_STATE_WIRED: 1227 case PAGE_STATE_UNUSED: 1228 toQueue = NULL; 1229 break; 1230 default: 1231 panic("set_page_state(): invalid target state %d", pageState); 1232 return; 1233 } 1234 1235 VMCache* cache = page->Cache(); 1236 if (cache != NULL && cache->temporary) { 1237 if (pageState == PAGE_STATE_MODIFIED) 1238 atomic_add(&sModifiedTemporaryPages, 1); 1239 else if (page->State() == PAGE_STATE_MODIFIED) 1240 atomic_add(&sModifiedTemporaryPages, -1); 1241 } 1242 1243 // move the page 1244 if (toQueue == fromQueue) { 1245 // Note: Theoretically we are required to lock when changing the page 1246 // state, even if we don't change the queue. We actually don't have to 1247 // do this, though, since only for the active queue there are different 1248 // page states and active pages have a cache that must be locked at 1249 // this point. So we rely on the fact that everyone must lock the cache 1250 // before trying to change/interpret the page state. 1251 PAGE_ASSERT(page, cache != NULL); 1252 cache->AssertLocked(); 1253 page->SetState(pageState); 1254 } else { 1255 if (fromQueue != NULL) 1256 fromQueue->RemoveUnlocked(page); 1257 1258 page->SetState(pageState); 1259 1260 if (toQueue != NULL) 1261 toQueue->AppendUnlocked(page); 1262 } 1263 } 1264 1265 1266 /*! Moves a previously modified page into a now appropriate queue. 1267 The page queues must not be locked. 1268 */ 1269 static void 1270 move_page_to_appropriate_queue(vm_page *page) 1271 { 1272 DEBUG_PAGE_ACCESS_CHECK(page); 1273 1274 // Note, this logic must be in sync with what the page daemon does. 1275 int32 state; 1276 if (page->IsMapped()) 1277 state = PAGE_STATE_ACTIVE; 1278 else if (page->modified) 1279 state = PAGE_STATE_MODIFIED; 1280 else 1281 state = PAGE_STATE_CACHED; 1282 1283 // TODO: If free + cached pages are low, we might directly want to free the 1284 // page. 1285 set_page_state(page, state); 1286 } 1287 1288 1289 static void 1290 clear_page(struct vm_page *page) 1291 { 1292 vm_memset_physical(page->physical_page_number << PAGE_SHIFT, 0, 1293 B_PAGE_SIZE); 1294 } 1295 1296 1297 static status_t 1298 mark_page_range_in_use(addr_t startPage, size_t length, bool wired) 1299 { 1300 TRACE(("mark_page_range_in_use: start 0x%lx, len 0x%lx\n", 1301 startPage, length)); 1302 1303 if (sPhysicalPageOffset > startPage) { 1304 dprintf("mark_page_range_in_use(%#" B_PRIxADDR ", %#" B_PRIxSIZE "): " 1305 "start page is before free list\n", startPage, length); 1306 if (sPhysicalPageOffset - startPage >= length) 1307 return B_OK; 1308 length -= sPhysicalPageOffset - startPage; 1309 startPage = sPhysicalPageOffset; 1310 } 1311 1312 startPage -= sPhysicalPageOffset; 1313 1314 if (startPage + length > sNumPages) { 1315 dprintf("mark_page_range_in_use(%#" B_PRIxADDR ", %#" B_PRIxSIZE "): " 1316 "range would extend past free list\n", startPage, length); 1317 if (startPage >= sNumPages) 1318 return B_OK; 1319 length = sNumPages - startPage; 1320 } 1321 1322 WriteLocker locker(sFreePageQueuesLock); 1323 1324 for (size_t i = 0; i < length; i++) { 1325 vm_page *page = &sPages[startPage + i]; 1326 switch (page->State()) { 1327 case PAGE_STATE_FREE: 1328 case PAGE_STATE_CLEAR: 1329 { 1330 // TODO: This violates the page reservation policy, since we remove pages from 1331 // the free/clear queues without having reserved them before. This should happen 1332 // in the early boot process only, though. 1333 DEBUG_PAGE_ACCESS_START(page); 1334 VMPageQueue& queue = page->State() == PAGE_STATE_FREE 1335 ? sFreePageQueue : sClearPageQueue; 1336 queue.Remove(page); 1337 page->SetState(wired ? PAGE_STATE_UNUSED : PAGE_STATE_UNUSED); 1338 page->busy = false; 1339 atomic_add(&sUnreservedFreePages, -1); 1340 DEBUG_PAGE_ACCESS_END(page); 1341 break; 1342 } 1343 case PAGE_STATE_WIRED: 1344 case PAGE_STATE_UNUSED: 1345 break; 1346 case PAGE_STATE_ACTIVE: 1347 case PAGE_STATE_INACTIVE: 1348 case PAGE_STATE_MODIFIED: 1349 case PAGE_STATE_CACHED: 1350 default: 1351 // uh 1352 dprintf("mark_page_range_in_use: page 0x%lx in non-free state %d!\n", 1353 startPage + i, page->State()); 1354 break; 1355 } 1356 } 1357 1358 return B_OK; 1359 } 1360 1361 1362 /*! 1363 This is a background thread that wakes up every now and then (every 100ms) 1364 and moves some pages from the free queue over to the clear queue. 1365 Given enough time, it will clear out all pages from the free queue - we 1366 could probably slow it down after having reached a certain threshold. 1367 */ 1368 static int32 1369 page_scrubber(void *unused) 1370 { 1371 (void)(unused); 1372 1373 TRACE(("page_scrubber starting...\n")); 1374 1375 for (;;) { 1376 snooze(100000); // 100ms 1377 1378 if (sFreePageQueue.Count() == 0 1379 || sUnreservedFreePages < (int32)sFreePagesTarget) { 1380 continue; 1381 } 1382 1383 // Since we temporarily remove pages from the free pages reserve, 1384 // we must make sure we don't cause a violation of the page 1385 // reservation warranty. The following is usually stricter than 1386 // necessary, because we don't have information on how many of the 1387 // reserved pages have already been allocated. 1388 int32 reserved = reserve_some_pages(SCRUB_SIZE, 1389 kPageReserveForPriority[VM_PRIORITY_USER]); 1390 if (reserved == 0) 1391 continue; 1392 1393 // get some pages from the free queue 1394 ReadLocker locker(sFreePageQueuesLock); 1395 1396 vm_page *page[SCRUB_SIZE]; 1397 int32 scrubCount = 0; 1398 for (int32 i = 0; i < reserved; i++) { 1399 page[i] = sFreePageQueue.RemoveHeadUnlocked(); 1400 if (page[i] == NULL) 1401 break; 1402 1403 DEBUG_PAGE_ACCESS_START(page[i]); 1404 1405 page[i]->SetState(PAGE_STATE_ACTIVE); 1406 page[i]->busy = true; 1407 scrubCount++; 1408 } 1409 1410 locker.Unlock(); 1411 1412 if (scrubCount == 0) { 1413 unreserve_pages(reserved); 1414 continue; 1415 } 1416 1417 TA(ScrubbingPages(scrubCount)); 1418 1419 // clear them 1420 for (int32 i = 0; i < scrubCount; i++) 1421 clear_page(page[i]); 1422 1423 locker.Lock(); 1424 1425 // and put them into the clear queue 1426 for (int32 i = 0; i < scrubCount; i++) { 1427 page[i]->SetState(PAGE_STATE_CLEAR); 1428 page[i]->busy = false; 1429 DEBUG_PAGE_ACCESS_END(page[i]); 1430 sClearPageQueue.PrependUnlocked(page[i]); 1431 } 1432 1433 locker.Unlock(); 1434 1435 unreserve_pages(reserved); 1436 1437 TA(ScrubbedPages(scrubCount)); 1438 } 1439 1440 return 0; 1441 } 1442 1443 1444 static void 1445 init_page_marker(vm_page &marker) 1446 { 1447 marker.SetCacheRef(NULL); 1448 marker.InitState(PAGE_STATE_UNUSED); 1449 marker.busy = true; 1450 #if DEBUG_PAGE_QUEUE 1451 marker.queue = NULL; 1452 #endif 1453 #if DEBUG_PAGE_ACCESS 1454 marker.accessing_thread = thread_get_current_thread_id(); 1455 #endif 1456 } 1457 1458 1459 static void 1460 remove_page_marker(struct vm_page &marker) 1461 { 1462 DEBUG_PAGE_ACCESS_CHECK(&marker); 1463 1464 if (marker.State() < PAGE_STATE_FIRST_UNQUEUED) 1465 sPageQueues[marker.State()].RemoveUnlocked(&marker); 1466 1467 marker.SetState(PAGE_STATE_UNUSED); 1468 } 1469 1470 1471 static vm_page * 1472 next_modified_page(struct vm_page &marker) 1473 { 1474 InterruptsSpinLocker locker(sModifiedPageQueue.GetLock()); 1475 vm_page *page; 1476 1477 DEBUG_PAGE_ACCESS_CHECK(&marker); 1478 1479 if (marker.State() == PAGE_STATE_MODIFIED) { 1480 page = sModifiedPageQueue.Next(&marker); 1481 sModifiedPageQueue.Remove(&marker); 1482 marker.SetState(PAGE_STATE_UNUSED); 1483 } else 1484 page = sModifiedPageQueue.Head(); 1485 1486 for (; page != NULL; page = sModifiedPageQueue.Next(page)) { 1487 if (!page->busy) { 1488 // insert marker 1489 marker.SetState(PAGE_STATE_MODIFIED); 1490 sModifiedPageQueue.InsertAfter(page, &marker); 1491 return page; 1492 } 1493 } 1494 1495 return NULL; 1496 } 1497 1498 1499 // #pragma mark - 1500 1501 1502 class PageWriteTransfer; 1503 class PageWriteWrapper; 1504 1505 1506 class PageWriterRun { 1507 public: 1508 status_t Init(uint32 maxPages); 1509 1510 void PrepareNextRun(); 1511 void AddPage(vm_page* page); 1512 void Go(); 1513 1514 void PageWritten(PageWriteTransfer* transfer, status_t status, 1515 bool partialTransfer, size_t bytesTransferred); 1516 1517 private: 1518 uint32 fMaxPages; 1519 uint32 fWrapperCount; 1520 uint32 fTransferCount; 1521 vint32 fPendingTransfers; 1522 PageWriteWrapper* fWrappers; 1523 PageWriteTransfer* fTransfers; 1524 ConditionVariable fAllFinishedCondition; 1525 }; 1526 1527 1528 class PageWriteTransfer : public AsyncIOCallback { 1529 public: 1530 void SetTo(PageWriterRun* run, vm_page* page, int32 maxPages); 1531 bool AddPage(vm_page* page); 1532 1533 status_t Schedule(uint32 flags); 1534 1535 void SetStatus(status_t status, size_t transferred); 1536 1537 status_t Status() const { return fStatus; } 1538 struct VMCache* Cache() const { return fCache; } 1539 uint32 PageCount() const { return fPageCount; } 1540 1541 virtual void IOFinished(status_t status, bool partialTransfer, 1542 size_t bytesTransferred); 1543 private: 1544 PageWriterRun* fRun; 1545 struct VMCache* fCache; 1546 off_t fOffset; 1547 uint32 fPageCount; 1548 int32 fMaxPages; 1549 status_t fStatus; 1550 uint32 fVecCount; 1551 iovec fVecs[32]; // TODO: make dynamic/configurable 1552 }; 1553 1554 1555 class PageWriteWrapper { 1556 public: 1557 PageWriteWrapper(); 1558 ~PageWriteWrapper(); 1559 void SetTo(vm_page* page); 1560 void Done(status_t result); 1561 1562 private: 1563 vm_page* fPage; 1564 struct VMCache* fCache; 1565 bool fIsActive; 1566 }; 1567 1568 1569 PageWriteWrapper::PageWriteWrapper() 1570 : 1571 fIsActive(false) 1572 { 1573 } 1574 1575 1576 PageWriteWrapper::~PageWriteWrapper() 1577 { 1578 if (fIsActive) 1579 panic("page write wrapper going out of scope but isn't completed"); 1580 } 1581 1582 1583 /*! The page's cache must be locked. 1584 */ 1585 void 1586 PageWriteWrapper::SetTo(vm_page* page) 1587 { 1588 DEBUG_PAGE_ACCESS_CHECK(page); 1589 1590 if (page->busy) 1591 panic("setting page write wrapper to busy page"); 1592 1593 if (fIsActive) 1594 panic("re-setting page write wrapper that isn't completed"); 1595 1596 fPage = page; 1597 fCache = page->Cache(); 1598 fIsActive = true; 1599 1600 fPage->busy = true; 1601 fPage->busy_writing = true; 1602 1603 // We have a modified page -- however, while we're writing it back, 1604 // the page might still be mapped. In order not to lose any changes to the 1605 // page, we mark it clean before actually writing it back; if 1606 // writing the page fails for some reason, we'll just keep it in the 1607 // modified page list, but that should happen only rarely. 1608 1609 // If the page is changed after we cleared the dirty flag, but before we 1610 // had the chance to write it back, then we'll write it again later -- that 1611 // will probably not happen that often, though. 1612 1613 vm_clear_map_flags(fPage, PAGE_MODIFIED); 1614 } 1615 1616 1617 /*! The page's cache must be locked. 1618 The page queues must not be locked. 1619 */ 1620 void 1621 PageWriteWrapper::Done(status_t result) 1622 { 1623 if (!fIsActive) 1624 panic("completing page write wrapper that is not active"); 1625 1626 DEBUG_PAGE_ACCESS_START(fPage); 1627 1628 fPage->busy = false; 1629 // Set unbusy and notify later by hand, since we might free the page. 1630 1631 if (result == B_OK) { 1632 // put it into the active/inactive queue 1633 move_page_to_appropriate_queue(fPage); 1634 fPage->busy_writing = false; 1635 DEBUG_PAGE_ACCESS_END(fPage); 1636 } else { 1637 // Writing the page failed. One reason would be that the cache has been 1638 // shrunk and the page does no longer belong to the file. Otherwise the 1639 // actual I/O failed, in which case we'll simply keep the page modified. 1640 1641 if (!fPage->busy_writing) { 1642 // The busy_writing flag was cleared. That means the cache has been 1643 // shrunk while we were trying to write the page and we have to free 1644 // it now. 1645 vm_remove_all_page_mappings(fPage); 1646 // TODO: Unmapping should already happen when resizing the cache! 1647 fCache->RemovePage(fPage); 1648 free_page(fPage, false); 1649 } else { 1650 // Writing the page failed -- mark the page modified and move it to 1651 // an appropriate queue other than the modified queue, so we don't 1652 // keep trying to write it over and over again. We keep 1653 // non-temporary pages in the modified queue, though, so they don't 1654 // get lost in the inactive queue. 1655 dprintf("PageWriteWrapper: Failed to write page %p: %s\n", fPage, 1656 strerror(result)); 1657 1658 fPage->modified = true; 1659 if (!fCache->temporary) 1660 set_page_state(fPage, PAGE_STATE_MODIFIED); 1661 else if (fPage->IsMapped()) 1662 set_page_state(fPage, PAGE_STATE_ACTIVE); 1663 else 1664 set_page_state(fPage, PAGE_STATE_INACTIVE); 1665 1666 fPage->busy_writing = false; 1667 DEBUG_PAGE_ACCESS_END(fPage); 1668 } 1669 } 1670 1671 fCache->NotifyPageEvents(fPage, PAGE_EVENT_NOT_BUSY); 1672 fIsActive = false; 1673 } 1674 1675 1676 /*! The page's cache must be locked. 1677 */ 1678 void 1679 PageWriteTransfer::SetTo(PageWriterRun* run, vm_page* page, int32 maxPages) 1680 { 1681 fRun = run; 1682 fCache = page->Cache(); 1683 fOffset = page->cache_offset; 1684 fPageCount = 1; 1685 fMaxPages = maxPages; 1686 fStatus = B_OK; 1687 1688 fVecs[0].iov_base = (void*)(page->physical_page_number << PAGE_SHIFT); 1689 fVecs[0].iov_len = B_PAGE_SIZE; 1690 fVecCount = 1; 1691 } 1692 1693 1694 /*! The page's cache must be locked. 1695 */ 1696 bool 1697 PageWriteTransfer::AddPage(vm_page* page) 1698 { 1699 if (page->Cache() != fCache 1700 || (fMaxPages >= 0 && fPageCount >= (uint32)fMaxPages)) 1701 return false; 1702 1703 addr_t nextBase 1704 = (addr_t)fVecs[fVecCount - 1].iov_base + fVecs[fVecCount - 1].iov_len; 1705 1706 if (page->physical_page_number << PAGE_SHIFT == nextBase 1707 && page->cache_offset == fOffset + fPageCount) { 1708 // append to last iovec 1709 fVecs[fVecCount - 1].iov_len += B_PAGE_SIZE; 1710 fPageCount++; 1711 return true; 1712 } 1713 1714 nextBase = (addr_t)fVecs[0].iov_base - B_PAGE_SIZE; 1715 if (page->physical_page_number << PAGE_SHIFT == nextBase 1716 && page->cache_offset == fOffset - 1) { 1717 // prepend to first iovec and adjust offset 1718 fVecs[0].iov_base = (void*)nextBase; 1719 fVecs[0].iov_len += B_PAGE_SIZE; 1720 fOffset = page->cache_offset; 1721 fPageCount++; 1722 return true; 1723 } 1724 1725 if ((page->cache_offset == fOffset + fPageCount 1726 || page->cache_offset == fOffset - 1) 1727 && fVecCount < sizeof(fVecs) / sizeof(fVecs[0])) { 1728 // not physically contiguous or not in the right order 1729 uint32 vectorIndex; 1730 if (page->cache_offset < fOffset) { 1731 // we are pre-pending another vector, move the other vecs 1732 for (uint32 i = fVecCount; i > 0; i--) 1733 fVecs[i] = fVecs[i - 1]; 1734 1735 fOffset = page->cache_offset; 1736 vectorIndex = 0; 1737 } else 1738 vectorIndex = fVecCount; 1739 1740 fVecs[vectorIndex].iov_base 1741 = (void*)(page->physical_page_number << PAGE_SHIFT); 1742 fVecs[vectorIndex].iov_len = B_PAGE_SIZE; 1743 1744 fVecCount++; 1745 fPageCount++; 1746 return true; 1747 } 1748 1749 return false; 1750 } 1751 1752 1753 status_t 1754 PageWriteTransfer::Schedule(uint32 flags) 1755 { 1756 off_t writeOffset = (off_t)fOffset << PAGE_SHIFT; 1757 size_t writeLength = fPageCount << PAGE_SHIFT; 1758 1759 if (fRun != NULL) { 1760 return fCache->WriteAsync(writeOffset, fVecs, fVecCount, writeLength, 1761 flags | B_PHYSICAL_IO_REQUEST, this); 1762 } 1763 1764 status_t status = fCache->Write(writeOffset, fVecs, fVecCount, 1765 flags | B_PHYSICAL_IO_REQUEST, &writeLength); 1766 1767 SetStatus(status, writeLength); 1768 return fStatus; 1769 } 1770 1771 1772 void 1773 PageWriteTransfer::SetStatus(status_t status, size_t transferred) 1774 { 1775 // only succeed if all pages up to the last one have been written fully 1776 // and the last page has at least been written partially 1777 if (status == B_OK && transferred <= (fPageCount - 1) * B_PAGE_SIZE) 1778 status = B_ERROR; 1779 1780 fStatus = status; 1781 } 1782 1783 1784 void 1785 PageWriteTransfer::IOFinished(status_t status, bool partialTransfer, 1786 size_t bytesTransferred) 1787 { 1788 SetStatus(status, bytesTransferred); 1789 fRun->PageWritten(this, fStatus, partialTransfer, bytesTransferred); 1790 } 1791 1792 1793 status_t 1794 PageWriterRun::Init(uint32 maxPages) 1795 { 1796 fMaxPages = maxPages; 1797 fWrapperCount = 0; 1798 fTransferCount = 0; 1799 fPendingTransfers = 0; 1800 1801 fWrappers = new(std::nothrow) PageWriteWrapper[maxPages]; 1802 fTransfers = new(std::nothrow) PageWriteTransfer[maxPages]; 1803 if (fWrappers == NULL || fTransfers == NULL) 1804 return B_NO_MEMORY; 1805 1806 return B_OK; 1807 } 1808 1809 1810 void 1811 PageWriterRun::PrepareNextRun() 1812 { 1813 fWrapperCount = 0; 1814 fTransferCount = 0; 1815 fPendingTransfers = 0; 1816 } 1817 1818 1819 /*! The page's cache must be locked. 1820 */ 1821 void 1822 PageWriterRun::AddPage(vm_page* page) 1823 { 1824 fWrappers[fWrapperCount++].SetTo(page); 1825 1826 if (fTransferCount == 0 || !fTransfers[fTransferCount - 1].AddPage(page)) { 1827 fTransfers[fTransferCount++].SetTo(this, page, 1828 page->Cache()->MaxPagesPerAsyncWrite()); 1829 } 1830 } 1831 1832 1833 void 1834 PageWriterRun::Go() 1835 { 1836 fPendingTransfers = fTransferCount; 1837 1838 fAllFinishedCondition.Init(this, "page writer wait for I/O"); 1839 ConditionVariableEntry waitEntry; 1840 fAllFinishedCondition.Add(&waitEntry); 1841 1842 // schedule writes 1843 for (uint32 i = 0; i < fTransferCount; i++) 1844 fTransfers[i].Schedule(B_VIP_IO_REQUEST); 1845 1846 // wait until all pages have been written 1847 waitEntry.Wait(); 1848 1849 // mark pages depending on whether they could be written or not 1850 1851 uint32 wrapperIndex = 0; 1852 for (uint32 i = 0; i < fTransferCount; i++) { 1853 PageWriteTransfer& transfer = fTransfers[i]; 1854 transfer.Cache()->Lock(); 1855 1856 for (uint32 j = 0; j < transfer.PageCount(); j++) 1857 fWrappers[wrapperIndex++].Done(transfer.Status()); 1858 1859 transfer.Cache()->Unlock(); 1860 } 1861 1862 ASSERT(wrapperIndex == fWrapperCount); 1863 1864 for (uint32 i = 0; i < fTransferCount; i++) { 1865 PageWriteTransfer& transfer = fTransfers[i]; 1866 struct VMCache* cache = transfer.Cache(); 1867 1868 // We've acquired a references for each page 1869 for (uint32 j = 0; j < transfer.PageCount(); j++) { 1870 // We release the cache references after all pages were made 1871 // unbusy again - otherwise releasing a vnode could deadlock. 1872 cache->ReleaseStoreRef(); 1873 cache->ReleaseRef(); 1874 } 1875 } 1876 } 1877 1878 1879 void 1880 PageWriterRun::PageWritten(PageWriteTransfer* transfer, status_t status, 1881 bool partialTransfer, size_t bytesTransferred) 1882 { 1883 if (atomic_add(&fPendingTransfers, -1) == 1) 1884 fAllFinishedCondition.NotifyAll(); 1885 } 1886 1887 1888 /*! The page writer continuously takes some pages from the modified 1889 queue, writes them back, and moves them back to the active queue. 1890 It runs in its own thread, and is only there to keep the number 1891 of modified pages low, so that more pages can be reused with 1892 fewer costs. 1893 */ 1894 status_t 1895 page_writer(void* /*unused*/) 1896 { 1897 const uint32 kNumPages = 256; 1898 uint32 writtenPages = 0; 1899 bigtime_t lastWrittenTime = 0; 1900 bigtime_t pageCollectionTime = 0; 1901 bigtime_t pageWritingTime = 0; 1902 1903 PageWriterRun run; 1904 if (run.Init(kNumPages) != B_OK) { 1905 panic("page writer: Failed to init PageWriterRun!"); 1906 return B_ERROR; 1907 } 1908 1909 vm_page marker; 1910 init_page_marker(marker); 1911 1912 while (true) { 1913 // TODO: Maybe wait shorter when memory is low! 1914 if (sModifiedPageQueue.Count() < kNumPages) { 1915 sPageWriterCondition.Wait(3000000, true); 1916 // all 3 seconds when no one triggers us 1917 } 1918 1919 int32 modifiedPages = sModifiedPageQueue.Count(); 1920 if (modifiedPages == 0) 1921 continue; 1922 1923 #if ENABLE_SWAP_SUPPORT 1924 page_stats pageStats; 1925 get_page_stats(pageStats); 1926 bool activePaging = do_active_paging(pageStats); 1927 #endif 1928 1929 // depending on how urgent it becomes to get pages to disk, we adjust 1930 // our I/O priority 1931 uint32 lowPagesState = low_resource_state(B_KERNEL_RESOURCE_PAGES); 1932 int32 ioPriority = B_IDLE_PRIORITY; 1933 if (lowPagesState >= B_LOW_RESOURCE_CRITICAL 1934 || modifiedPages > MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD) { 1935 ioPriority = MAX_PAGE_WRITER_IO_PRIORITY; 1936 } else { 1937 ioPriority = (uint64)MAX_PAGE_WRITER_IO_PRIORITY * modifiedPages 1938 / MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD; 1939 } 1940 1941 thread_set_io_priority(ioPriority); 1942 1943 uint32 numPages = 0; 1944 run.PrepareNextRun(); 1945 1946 // TODO: make this laptop friendly, too (ie. only start doing 1947 // something if someone else did something or there is really 1948 // enough to do). 1949 1950 // collect pages to be written 1951 pageCollectionTime -= system_time(); 1952 1953 while (numPages < kNumPages) { 1954 vm_page *page = next_modified_page(marker); 1955 if (page == NULL) 1956 break; 1957 1958 PageCacheLocker cacheLocker(page, false); 1959 if (!cacheLocker.IsLocked()) 1960 continue; 1961 1962 VMCache *cache = page->Cache(); 1963 1964 // If the page is busy or its state has changed while we were 1965 // locking the cache, just ignore it. 1966 if (page->busy || page->State() != PAGE_STATE_MODIFIED) 1967 continue; 1968 1969 DEBUG_PAGE_ACCESS_START(page); 1970 1971 // Don't write back wired (locked) pages. 1972 if (page->wired_count > 0) { 1973 set_page_state(page, PAGE_STATE_ACTIVE); 1974 DEBUG_PAGE_ACCESS_END(page); 1975 continue; 1976 } 1977 1978 // Write back temporary pages only when we're actively paging. 1979 if (cache->temporary 1980 #if ENABLE_SWAP_SUPPORT 1981 && (!activePaging 1982 || !cache->CanWritePage( 1983 (off_t)page->cache_offset << PAGE_SHIFT)) 1984 #endif 1985 ) { 1986 // We can't/don't want to do anything with this page, so move it 1987 // to one of the other queues. 1988 if (page->mappings.IsEmpty()) 1989 set_page_state(page, PAGE_STATE_INACTIVE); 1990 else 1991 set_page_state(page, PAGE_STATE_ACTIVE); 1992 1993 DEBUG_PAGE_ACCESS_END(page); 1994 continue; 1995 } 1996 1997 // We need our own reference to the store, as it might currently be 1998 // destroyed. 1999 if (cache->AcquireUnreferencedStoreRef() != B_OK) { 2000 DEBUG_PAGE_ACCESS_END(page); 2001 cacheLocker.Unlock(); 2002 thread_yield(true); 2003 continue; 2004 } 2005 2006 run.AddPage(page); 2007 2008 DEBUG_PAGE_ACCESS_END(page); 2009 2010 //dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count); 2011 TPW(WritePage(page)); 2012 2013 cache->AcquireRefLocked(); 2014 numPages++; 2015 } 2016 2017 pageCollectionTime += system_time(); 2018 2019 if (numPages == 0) 2020 continue; 2021 2022 // write pages to disk and do all the cleanup 2023 pageWritingTime -= system_time(); 2024 run.Go(); 2025 pageWritingTime += system_time(); 2026 2027 // debug output only... 2028 writtenPages += numPages; 2029 if (writtenPages >= 1024) { 2030 bigtime_t now = system_time(); 2031 TRACE(("page writer: wrote 1024 pages (total: %llu ms, " 2032 "collect: %llu ms, write: %llu ms)\n", 2033 (now - lastWrittenTime) / 1000, 2034 pageCollectionTime / 1000, pageWritingTime / 1000)); 2035 writtenPages -= 1024; 2036 lastWrittenTime = now; 2037 pageCollectionTime = 0; 2038 pageWritingTime = 0; 2039 } 2040 } 2041 2042 remove_page_marker(marker); 2043 return B_OK; 2044 } 2045 2046 2047 // #pragma mark - 2048 2049 2050 // TODO: This should be done in the page daemon! 2051 #if 0 2052 #if ENABLE_SWAP_SUPPORT 2053 static bool 2054 free_page_swap_space(int32 index) 2055 { 2056 vm_page *page = vm_page_at_index(index); 2057 PageCacheLocker locker(page); 2058 if (!locker.IsLocked()) 2059 return false; 2060 2061 DEBUG_PAGE_ACCESS_START(page); 2062 2063 VMCache* cache = page->Cache(); 2064 if (cache->temporary && page->wired_count == 0 2065 && cache->HasPage(page->cache_offset << PAGE_SHIFT) 2066 && page->usage_count > 0) { 2067 // TODO: how to judge a page is highly active? 2068 if (swap_free_page_swap_space(page)) { 2069 // We need to mark the page modified, since otherwise it could be 2070 // stolen and we'd lose its data. 2071 vm_page_set_state(page, PAGE_STATE_MODIFIED); 2072 TD(FreedPageSwap(page)); 2073 DEBUG_PAGE_ACCESS_END(page); 2074 return true; 2075 } 2076 } 2077 DEBUG_PAGE_ACCESS_END(page); 2078 return false; 2079 } 2080 #endif 2081 #endif // 0 2082 2083 2084 static vm_page * 2085 find_cached_page_candidate(struct vm_page &marker) 2086 { 2087 DEBUG_PAGE_ACCESS_CHECK(&marker); 2088 2089 InterruptsSpinLocker locker(sCachedPageQueue.GetLock()); 2090 vm_page *page; 2091 2092 if (marker.State() == PAGE_STATE_UNUSED) { 2093 // Get the first free pages of the (in)active queue 2094 page = sCachedPageQueue.Head(); 2095 } else { 2096 // Get the next page of the current queue 2097 if (marker.State() != PAGE_STATE_CACHED) { 2098 panic("invalid marker %p state", &marker); 2099 return NULL; 2100 } 2101 2102 page = sCachedPageQueue.Next(&marker); 2103 sCachedPageQueue.Remove(&marker); 2104 marker.SetState(PAGE_STATE_UNUSED); 2105 } 2106 2107 while (page != NULL) { 2108 if (!page->busy) { 2109 // we found a candidate, insert marker 2110 marker.SetState(PAGE_STATE_CACHED); 2111 sCachedPageQueue.InsertAfter(page, &marker); 2112 return page; 2113 } 2114 2115 page = sCachedPageQueue.Next(page); 2116 } 2117 2118 return NULL; 2119 } 2120 2121 2122 static bool 2123 free_cached_page(vm_page *page, bool dontWait) 2124 { 2125 // try to lock the page's cache 2126 if (vm_cache_acquire_locked_page_cache(page, dontWait) == NULL) 2127 return false; 2128 VMCache* cache = page->Cache(); 2129 2130 AutoLocker<VMCache> cacheLocker(cache, true); 2131 MethodDeleter<VMCache> _2(cache, &VMCache::ReleaseRefLocked); 2132 2133 // check again if that page is still a candidate 2134 if (page->busy || page->State() != PAGE_STATE_CACHED) 2135 return false; 2136 2137 DEBUG_PAGE_ACCESS_START(page); 2138 2139 PAGE_ASSERT(page, !page->IsMapped()); 2140 PAGE_ASSERT(page, !page->modified); 2141 2142 // we can now steal this page 2143 2144 cache->RemovePage(page); 2145 // Now the page doesn't have cache anymore, so no one else (e.g. 2146 // vm_page_allocate_page_run() can pick it up), since they would be 2147 // required to lock the cache first, which would fail. 2148 2149 sCachedPageQueue.RemoveUnlocked(page); 2150 return true; 2151 } 2152 2153 2154 static uint32 2155 free_cached_pages(uint32 pagesToFree, bool dontWait) 2156 { 2157 vm_page marker; 2158 init_page_marker(marker); 2159 2160 uint32 pagesFreed = 0; 2161 2162 while (pagesFreed < pagesToFree) { 2163 vm_page *page = find_cached_page_candidate(marker); 2164 if (page == NULL) 2165 break; 2166 2167 if (free_cached_page(page, dontWait)) { 2168 ReadLocker locker(sFreePageQueuesLock); 2169 page->SetState(PAGE_STATE_FREE); 2170 DEBUG_PAGE_ACCESS_END(page); 2171 sFreePageQueue.PrependUnlocked(page); 2172 locker.Unlock(); 2173 2174 TA(StolenPage()); 2175 2176 pagesFreed++; 2177 } 2178 } 2179 2180 remove_page_marker(marker); 2181 2182 return pagesFreed; 2183 } 2184 2185 2186 static void 2187 idle_scan_active_pages(page_stats& pageStats) 2188 { 2189 VMPageQueue& queue = sActivePageQueue; 2190 2191 // We want to scan the whole queue in roughly kIdleRunsForFullQueue runs. 2192 uint32 maxToScan = queue.Count() / kIdleRunsForFullQueue + 1; 2193 2194 while (maxToScan > 0) { 2195 maxToScan--; 2196 2197 // Get the next page. Note that we don't bother to lock here. We go with 2198 // the assumption that on all architectures reading/writing pointers is 2199 // atomic. Beyond that it doesn't really matter. We have to unlock the 2200 // queue anyway to lock the page's cache, and we'll recheck afterwards. 2201 vm_page* page = queue.Head(); 2202 if (page == NULL) 2203 break; 2204 2205 // lock the page's cache 2206 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2207 if (cache == NULL) 2208 continue; 2209 2210 if (cache == NULL || page->State() != PAGE_STATE_ACTIVE) { 2211 // page is no longer in the cache or in this queue 2212 cache->ReleaseRefAndUnlock(); 2213 continue; 2214 } 2215 2216 if (page->busy) { 2217 // page is busy -- requeue at the end 2218 vm_page_requeue(page, true); 2219 cache->ReleaseRefAndUnlock(); 2220 continue; 2221 } 2222 2223 DEBUG_PAGE_ACCESS_START(page); 2224 2225 // Get the page active/modified flags and update the page's usage count. 2226 // We completely unmap inactive temporary pages. This saves us to 2227 // iterate through the inactive list as well, since we'll be notified 2228 // via page fault whenever such an inactive page is used again. 2229 // We don't remove the mappings of non-temporary pages, since we 2230 // wouldn't notice when those would become unused and could thus be 2231 // moved to the cached list. 2232 int32 usageCount; 2233 if (page->wired_count > 0 || page->usage_count > 0 || !cache->temporary) 2234 usageCount = vm_clear_page_mapping_accessed_flags(page); 2235 else 2236 usageCount = vm_remove_all_page_mappings_if_unaccessed(page); 2237 2238 if (usageCount > 0) { 2239 usageCount += page->usage_count + kPageUsageAdvance; 2240 if (usageCount > kPageUsageMax) 2241 usageCount = kPageUsageMax; 2242 // TODO: This would probably also be the place to reclaim swap space. 2243 } else { 2244 usageCount += page->usage_count - (int32)kPageUsageDecline; 2245 if (usageCount < 0) { 2246 usageCount = 0; 2247 set_page_state(page, PAGE_STATE_INACTIVE); 2248 } 2249 } 2250 2251 page->usage_count = usageCount; 2252 2253 DEBUG_PAGE_ACCESS_END(page); 2254 2255 cache->ReleaseRefAndUnlock(); 2256 } 2257 } 2258 2259 2260 static void 2261 full_scan_inactive_pages(page_stats& pageStats, int32 despairLevel) 2262 { 2263 int32 pagesToFree = pageStats.unsatisfiedReservations 2264 + sFreeOrCachedPagesTarget 2265 - (pageStats.totalFreePages + pageStats.cachedPages); 2266 if (pagesToFree <= 0) 2267 return; 2268 2269 bigtime_t time = system_time(); 2270 uint32 pagesScanned = 0; 2271 uint32 pagesToCached = 0; 2272 uint32 pagesToModified = 0; 2273 uint32 pagesToActive = 0; 2274 2275 // Determine how many pages at maximum to send to the modified queue. Since 2276 // it is relatively expensive to page out pages, we do that on a grander 2277 // scale only when things get desperate. 2278 uint32 maxToFlush = despairLevel <= 1 ? 32 : 10000; 2279 2280 vm_page marker; 2281 init_page_marker(marker); 2282 2283 VMPageQueue& queue = sInactivePageQueue; 2284 InterruptsSpinLocker queueLocker(queue.GetLock()); 2285 uint32 maxToScan = queue.Count(); 2286 2287 vm_page* nextPage = queue.Head(); 2288 2289 while (pagesToFree > 0 && maxToScan > 0) { 2290 maxToScan--; 2291 2292 // get the next page 2293 vm_page* page = nextPage; 2294 if (page == NULL) 2295 break; 2296 nextPage = queue.Next(page); 2297 2298 if (page->busy) 2299 continue; 2300 2301 // mark the position 2302 queue.InsertAfter(page, &marker); 2303 queueLocker.Unlock(); 2304 2305 // lock the page's cache 2306 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2307 if (cache == NULL || page->busy 2308 || page->State() != PAGE_STATE_INACTIVE) { 2309 if (cache != NULL) 2310 cache->ReleaseRefAndUnlock(); 2311 queueLocker.Lock(); 2312 nextPage = queue.Next(&marker); 2313 queue.Remove(&marker); 2314 continue; 2315 } 2316 2317 pagesScanned++; 2318 2319 DEBUG_PAGE_ACCESS_START(page); 2320 2321 // Get the accessed count, clear the accessed/modified flags and 2322 // unmap the page, if it hasn't been accessed. 2323 int32 usageCount; 2324 if (page->wired_count > 0) 2325 usageCount = vm_clear_page_mapping_accessed_flags(page); 2326 else 2327 usageCount = vm_remove_all_page_mappings_if_unaccessed(page); 2328 2329 // update usage count 2330 if (usageCount > 0) { 2331 usageCount += page->usage_count + kPageUsageAdvance; 2332 if (usageCount > kPageUsageMax) 2333 usageCount = kPageUsageMax; 2334 } else { 2335 usageCount += page->usage_count - (int32)kPageUsageDecline; 2336 if (usageCount < 0) 2337 usageCount = 0; 2338 } 2339 2340 page->usage_count = usageCount; 2341 2342 // Move to fitting queue or requeue: 2343 // * Active mapped pages go to the active queue. 2344 // * Inactive mapped (i.e. wired) pages are requeued. 2345 // * The remaining pages are cachable. Thus, if unmodified they go to 2346 // the cached queue, otherwise to the modified queue (up to a limit). 2347 // Note that until in the idle scanning we don't exempt pages of 2348 // temporary caches. Apparently we really need memory, so we better 2349 // page out memory as well. 2350 bool isMapped = page->IsMapped(); 2351 if (usageCount > 0) { 2352 if (isMapped) { 2353 set_page_state(page, PAGE_STATE_ACTIVE); 2354 pagesToActive++; 2355 } else 2356 vm_page_requeue(page, true); 2357 } else if (isMapped) { 2358 vm_page_requeue(page, true); 2359 } else if (!page->modified) { 2360 set_page_state(page, PAGE_STATE_CACHED); 2361 pagesToFree--; 2362 pagesToCached++; 2363 } else if (maxToFlush > 0) { 2364 set_page_state(page, PAGE_STATE_MODIFIED); 2365 maxToFlush--; 2366 pagesToModified++; 2367 } else 2368 vm_page_requeue(page, true); 2369 2370 DEBUG_PAGE_ACCESS_END(page); 2371 2372 cache->ReleaseRefAndUnlock(); 2373 2374 // remove the marker 2375 queueLocker.Lock(); 2376 nextPage = queue.Next(&marker); 2377 queue.Remove(&marker); 2378 } 2379 2380 queueLocker.Unlock(); 2381 2382 time = system_time() - time; 2383 TRACE_DAEMON(" -> inactive scan (%7lld us): scanned: %7lu, " 2384 "moved: %lu -> cached, %lu -> modified, %lu -> active\n", time, 2385 pagesScanned, pagesToCached, pagesToModified, pagesToActive); 2386 2387 // wake up the page writer, if we tossed it some pages 2388 if (pagesToModified > 0) 2389 sPageWriterCondition.WakeUp(); 2390 } 2391 2392 2393 static void 2394 full_scan_active_pages(page_stats& pageStats, int32 despairLevel) 2395 { 2396 vm_page marker; 2397 init_page_marker(marker); 2398 2399 VMPageQueue& queue = sActivePageQueue; 2400 InterruptsSpinLocker queueLocker(queue.GetLock()); 2401 uint32 maxToScan = queue.Count(); 2402 2403 int32 pagesToDeactivate = pageStats.unsatisfiedReservations 2404 + sFreeOrCachedPagesTarget 2405 - (pageStats.totalFreePages + pageStats.cachedPages) 2406 + std::max((int32)sInactivePagesTarget - (int32)maxToScan, (int32)0); 2407 if (pagesToDeactivate <= 0) 2408 return; 2409 2410 bigtime_t time = system_time(); 2411 uint32 pagesAccessed = 0; 2412 uint32 pagesToInactive = 0; 2413 uint32 pagesScanned = 0; 2414 2415 vm_page* nextPage = queue.Head(); 2416 2417 while (pagesToDeactivate > 0 && maxToScan > 0) { 2418 maxToScan--; 2419 2420 // get the next page 2421 vm_page* page = nextPage; 2422 if (page == NULL) 2423 break; 2424 nextPage = queue.Next(page); 2425 2426 if (page->busy) 2427 continue; 2428 2429 // mark the position 2430 queue.InsertAfter(page, &marker); 2431 queueLocker.Unlock(); 2432 2433 // lock the page's cache 2434 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2435 if (cache == NULL || page->busy || page->State() != PAGE_STATE_ACTIVE) { 2436 if (cache != NULL) 2437 cache->ReleaseRefAndUnlock(); 2438 queueLocker.Lock(); 2439 nextPage = queue.Next(&marker); 2440 queue.Remove(&marker); 2441 continue; 2442 } 2443 2444 pagesScanned++; 2445 2446 DEBUG_PAGE_ACCESS_START(page); 2447 2448 // Get the page active/modified flags and update the page's usage count. 2449 int32 usageCount = vm_clear_page_mapping_accessed_flags(page); 2450 2451 if (usageCount > 0) { 2452 usageCount += page->usage_count + kPageUsageAdvance; 2453 if (usageCount > kPageUsageMax) 2454 usageCount = kPageUsageMax; 2455 pagesAccessed++; 2456 // TODO: This would probably also be the place to reclaim swap space. 2457 } else { 2458 usageCount += page->usage_count - (int32)kPageUsageDecline; 2459 if (usageCount <= 0) { 2460 usageCount = 0; 2461 set_page_state(page, PAGE_STATE_INACTIVE); 2462 pagesToInactive++; 2463 } 2464 } 2465 2466 page->usage_count = usageCount; 2467 2468 DEBUG_PAGE_ACCESS_END(page); 2469 2470 cache->ReleaseRefAndUnlock(); 2471 2472 // remove the marker 2473 queueLocker.Lock(); 2474 nextPage = queue.Next(&marker); 2475 queue.Remove(&marker); 2476 } 2477 2478 time = system_time() - time; 2479 TRACE_DAEMON(" -> active scan (%7lld us): scanned: %7lu, " 2480 "moved: %lu -> inactive, encountered %lu accessed ones\n", time, 2481 pagesScanned, pagesToInactive, pagesAccessed); 2482 } 2483 2484 2485 static void 2486 page_daemon_idle_scan(page_stats& pageStats) 2487 { 2488 TRACE_DAEMON("page daemon: idle run\n"); 2489 2490 if (pageStats.totalFreePages < (int32)sFreePagesTarget) { 2491 // We want more actually free pages, so free some from the cached 2492 // ones. 2493 uint32 freed = free_cached_pages( 2494 sFreePagesTarget - pageStats.totalFreePages, false); 2495 if (freed > 0) 2496 unreserve_pages(freed); 2497 get_page_stats(pageStats); 2498 } 2499 2500 // Walk the active list and move pages to the inactive queue. 2501 get_page_stats(pageStats); 2502 idle_scan_active_pages(pageStats); 2503 } 2504 2505 2506 static void 2507 page_daemon_full_scan(page_stats& pageStats, int32 despairLevel) 2508 { 2509 TRACE_DAEMON("page daemon: full run: free: %lu, cached: %lu, " 2510 "to free: %lu\n", pageStats.totalFreePages, pageStats.cachedPages, 2511 pageStats.unsatisfiedReservations + sFreeOrCachedPagesTarget 2512 - (pageStats.totalFreePages + pageStats.cachedPages)); 2513 2514 // Walk the inactive list and transfer pages to the cached and modified 2515 // queues. 2516 full_scan_inactive_pages(pageStats, despairLevel); 2517 2518 // Free cached pages. Also wake up reservation waiters. 2519 get_page_stats(pageStats); 2520 int32 pagesToFree = pageStats.unsatisfiedReservations + sFreePagesTarget 2521 - (pageStats.totalFreePages); 2522 if (pagesToFree > 0) { 2523 uint32 freed = free_cached_pages(pagesToFree, true); 2524 if (freed > 0) 2525 unreserve_pages(freed); 2526 } 2527 2528 // Walk the active list and move pages to the inactive queue. 2529 get_page_stats(pageStats); 2530 full_scan_active_pages(pageStats, despairLevel); 2531 } 2532 2533 2534 static status_t 2535 page_daemon(void* /*unused*/) 2536 { 2537 int32 despairLevel = 0; 2538 2539 while (true) { 2540 sPageDaemonCondition.ClearActivated(); 2541 2542 // evaluate the free pages situation 2543 page_stats pageStats; 2544 get_page_stats(pageStats); 2545 2546 if (!do_active_paging(pageStats)) { 2547 // Things look good -- just maintain statistics and keep the pool 2548 // of actually free pages full enough. 2549 despairLevel = 0; 2550 page_daemon_idle_scan(pageStats); 2551 sPageDaemonCondition.Wait(kIdleScanWaitInterval, false); 2552 } else { 2553 // Not enough free pages. We need to do some real work. 2554 despairLevel = std::max(despairLevel + 1, (int32)3); 2555 page_daemon_full_scan(pageStats, despairLevel); 2556 2557 // Don't wait after the first full scan, but rather immediately 2558 // check whether we were successful in freeing enough pages and 2559 // re-run with increased despair level. The first scan is 2560 // conservative with respect to moving inactive modified pages to 2561 // the modified list to avoid thrashing. The second scan, however, 2562 // will not hold back. 2563 if (despairLevel > 1) 2564 snooze(kBusyScanWaitInterval); 2565 } 2566 } 2567 2568 return B_OK; 2569 } 2570 2571 2572 /*! Returns how many pages could *not* be reserved. 2573 */ 2574 static uint32 2575 reserve_pages(uint32 count, int priority, bool dontWait) 2576 { 2577 int32 dontTouch = kPageReserveForPriority[priority]; 2578 2579 while (true) { 2580 count -= reserve_some_pages(count, dontTouch); 2581 if (count == 0) 2582 return 0; 2583 2584 if (sUnsatisfiedPageReservations == 0) { 2585 count -= free_cached_pages(count, dontWait); 2586 if (count == 0) 2587 return count; 2588 } 2589 2590 if (dontWait) 2591 return count; 2592 2593 // we need to wait for pages to become available 2594 2595 MutexLocker pageDeficitLocker(sPageDeficitLock); 2596 2597 bool notifyDaemon = sUnsatisfiedPageReservations == 0; 2598 sUnsatisfiedPageReservations += count; 2599 2600 if (sUnreservedFreePages > dontTouch) { 2601 // the situation changed 2602 sUnsatisfiedPageReservations -= count; 2603 continue; 2604 } 2605 2606 PageReservationWaiter waiter; 2607 waiter.dontTouch = dontTouch; 2608 waiter.missing = count; 2609 waiter.thread = thread_get_current_thread(); 2610 waiter.threadPriority = waiter.thread->priority; 2611 2612 // insert ordered (i.e. after all waiters with higher or equal priority) 2613 PageReservationWaiter* otherWaiter = NULL; 2614 for (PageReservationWaiterList::Iterator it 2615 = sPageReservationWaiters.GetIterator(); 2616 (otherWaiter = it.Next()) != NULL;) { 2617 if (waiter < *otherWaiter) 2618 break; 2619 } 2620 2621 sPageReservationWaiters.InsertBefore(otherWaiter, &waiter); 2622 2623 thread_prepare_to_block(waiter.thread, 0, THREAD_BLOCK_TYPE_OTHER, 2624 "waiting for pages"); 2625 2626 if (notifyDaemon) 2627 sPageDaemonCondition.WakeUp(); 2628 2629 pageDeficitLocker.Unlock(); 2630 2631 low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0); 2632 thread_block(); 2633 2634 pageDeficitLocker.Lock(); 2635 2636 return 0; 2637 } 2638 } 2639 2640 2641 // #pragma mark - private kernel API 2642 2643 2644 /*! Writes a range of modified pages of a cache to disk. 2645 You need to hold the VMCache lock when calling this function. 2646 Note that the cache lock is released in this function. 2647 \param cache The cache. 2648 \param firstPage Offset (in page size units) of the first page in the range. 2649 \param endPage End offset (in page size units) of the page range. The page 2650 at this offset is not included. 2651 */ 2652 status_t 2653 vm_page_write_modified_page_range(struct VMCache* cache, uint32 firstPage, 2654 uint32 endPage) 2655 { 2656 static const int32 kMaxPages = 256; 2657 int32 maxPages = cache->MaxPagesPerWrite(); 2658 if (maxPages < 0 || maxPages > kMaxPages) 2659 maxPages = kMaxPages; 2660 2661 const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 2662 | HEAP_DONT_LOCK_KERNEL_SPACE; 2663 2664 PageWriteWrapper stackWrappers[2]; 2665 PageWriteWrapper* wrapperPool 2666 = new(malloc_flags(allocationFlags)) PageWriteWrapper[maxPages + 1]; 2667 if (wrapperPool == NULL) { 2668 // don't fail, just limit our capabilities 2669 wrapperPool = stackWrappers; 2670 maxPages = 1; 2671 } 2672 2673 int32 nextWrapper = 0; 2674 2675 PageWriteWrapper* wrappers[maxPages]; 2676 int32 usedWrappers = 0; 2677 2678 PageWriteTransfer transfer; 2679 bool transferEmpty = true; 2680 2681 VMCachePagesTree::Iterator it 2682 = cache->pages.GetIterator(firstPage, true, true); 2683 2684 while (true) { 2685 vm_page* page = it.Next(); 2686 if (page == NULL || page->cache_offset >= endPage) { 2687 if (transferEmpty) 2688 break; 2689 2690 page = NULL; 2691 } 2692 2693 if (page != NULL) { 2694 if (page->busy 2695 || (page->State() != PAGE_STATE_MODIFIED 2696 && !vm_test_map_modification(page))) { 2697 page = NULL; 2698 } 2699 } 2700 2701 PageWriteWrapper* wrapper = NULL; 2702 if (page != NULL) { 2703 wrapper = &wrapperPool[nextWrapper++]; 2704 if (nextWrapper > maxPages) 2705 nextWrapper = 0; 2706 2707 DEBUG_PAGE_ACCESS_START(page); 2708 2709 wrapper->SetTo(page); 2710 2711 if (transferEmpty || transfer.AddPage(page)) { 2712 if (transferEmpty) { 2713 transfer.SetTo(NULL, page, maxPages); 2714 transferEmpty = false; 2715 } 2716 2717 DEBUG_PAGE_ACCESS_END(page); 2718 2719 wrappers[usedWrappers++] = wrapper; 2720 continue; 2721 } 2722 2723 DEBUG_PAGE_ACCESS_END(page); 2724 } 2725 2726 if (transferEmpty) 2727 continue; 2728 2729 cache->Unlock(); 2730 status_t status = transfer.Schedule(0); 2731 cache->Lock(); 2732 2733 for (int32 i = 0; i < usedWrappers; i++) 2734 wrappers[i]->Done(status); 2735 2736 usedWrappers = 0; 2737 2738 if (page != NULL) { 2739 transfer.SetTo(NULL, page, maxPages); 2740 wrappers[usedWrappers++] = wrapper; 2741 } else 2742 transferEmpty = true; 2743 } 2744 2745 if (wrapperPool != stackWrappers) 2746 delete[] wrapperPool; 2747 2748 return B_OK; 2749 } 2750 2751 2752 /*! You need to hold the VMCache lock when calling this function. 2753 Note that the cache lock is released in this function. 2754 */ 2755 status_t 2756 vm_page_write_modified_pages(VMCache *cache) 2757 { 2758 return vm_page_write_modified_page_range(cache, 0, 2759 (cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 2760 } 2761 2762 2763 /*! Schedules the page writer to write back the specified \a page. 2764 Note, however, that it might not do this immediately, and it can well 2765 take several seconds until the page is actually written out. 2766 */ 2767 void 2768 vm_page_schedule_write_page(vm_page *page) 2769 { 2770 PAGE_ASSERT(page, page->State() == PAGE_STATE_MODIFIED); 2771 2772 vm_page_requeue(page, false); 2773 2774 sPageWriterCondition.WakeUp(); 2775 } 2776 2777 2778 /*! Cache must be locked. 2779 */ 2780 void 2781 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage, 2782 uint32 endPage) 2783 { 2784 uint32 modified = 0; 2785 for (VMCachePagesTree::Iterator it 2786 = cache->pages.GetIterator(firstPage, true, true); 2787 vm_page *page = it.Next();) { 2788 if (page->cache_offset >= endPage) 2789 break; 2790 2791 if (!page->busy && page->State() == PAGE_STATE_MODIFIED) { 2792 DEBUG_PAGE_ACCESS_START(page); 2793 vm_page_requeue(page, false); 2794 modified++; 2795 DEBUG_PAGE_ACCESS_END(page); 2796 } 2797 } 2798 2799 if (modified > 0) 2800 sPageWriterCondition.WakeUp(); 2801 } 2802 2803 2804 void 2805 vm_page_init_num_pages(kernel_args *args) 2806 { 2807 // calculate the size of memory by looking at the physical_memory_range array 2808 addr_t physicalPagesEnd = 0; 2809 sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE; 2810 2811 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 2812 physicalPagesEnd = (args->physical_memory_range[i].start 2813 + args->physical_memory_range[i].size) / B_PAGE_SIZE; 2814 } 2815 2816 TRACE(("first phys page = 0x%lx, end 0x%lx\n", sPhysicalPageOffset, 2817 physicalPagesEnd)); 2818 2819 sNumPages = physicalPagesEnd - sPhysicalPageOffset; 2820 2821 #ifdef LIMIT_AVAILABLE_MEMORY 2822 if (sNumPages > LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE)) 2823 sNumPages = LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE); 2824 #endif 2825 } 2826 2827 2828 status_t 2829 vm_page_init(kernel_args *args) 2830 { 2831 TRACE(("vm_page_init: entry\n")); 2832 2833 // init page queues 2834 sModifiedPageQueue.Init("modified pages queue"); 2835 sInactivePageQueue.Init("inactive pages queue"); 2836 sActivePageQueue.Init("active pages queue"); 2837 sCachedPageQueue.Init("cached pages queue"); 2838 sFreePageQueue.Init("free pages queue"); 2839 sClearPageQueue.Init("clear pages queue"); 2840 2841 new (&sPageReservationWaiters) PageReservationWaiterList; 2842 2843 // map in the new free page table 2844 sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page), 2845 ~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, false); 2846 2847 TRACE(("vm_init: putting free_page_table @ %p, # ents %ld (size 0x%x)\n", 2848 sPages, sNumPages, (unsigned int)(sNumPages * sizeof(vm_page)))); 2849 2850 // initialize the free page table 2851 for (uint32 i = 0; i < sNumPages; i++) { 2852 sPages[i].physical_page_number = sPhysicalPageOffset + i; 2853 sPages[i].InitState(PAGE_STATE_FREE); 2854 new(&sPages[i].mappings) vm_page_mappings(); 2855 sPages[i].wired_count = 0; 2856 sPages[i].usage_count = 0; 2857 sPages[i].busy_writing = false; 2858 sPages[i].SetCacheRef(NULL); 2859 #if DEBUG_PAGE_QUEUE 2860 sPages[i].queue = NULL; 2861 #endif 2862 #if DEBUG_PAGE_ACCESS 2863 sPages[i].accessing_thread = -1; 2864 #endif 2865 sFreePageQueue.Append(&sPages[i]); 2866 } 2867 2868 sUnreservedFreePages = sNumPages; 2869 2870 TRACE(("initialized table\n")); 2871 2872 // mark the ranges between usable physical memory unused 2873 addr_t previousEnd = 0; 2874 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 2875 addr_t base = args->physical_memory_range[i].start; 2876 addr_t size = args->physical_memory_range[i].size; 2877 if (base > previousEnd) { 2878 mark_page_range_in_use(previousEnd / B_PAGE_SIZE, 2879 (base - previousEnd) / B_PAGE_SIZE, false); 2880 } 2881 previousEnd = base + size; 2882 } 2883 2884 // mark the allocated physical page ranges wired 2885 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 2886 mark_page_range_in_use( 2887 args->physical_allocated_range[i].start / B_PAGE_SIZE, 2888 args->physical_allocated_range[i].size / B_PAGE_SIZE, true); 2889 } 2890 2891 // The target of actually free pages. This must be at least the system 2892 // reserve, but should be a few more pages, so we don't have to extract 2893 // a cached page with each allocation. 2894 sFreePagesTarget = VM_PAGE_RESERVE_USER 2895 + std::max((uint32)32, sNumPages / 1024); 2896 2897 // The target of free + cached and inactive pages. On low-memory machines 2898 // keep things tight. free + cached is the pool of immediately allocatable 2899 // pages. We want a few inactive pages, so when we're actually paging, we 2900 // have a reasonably large set of pages to work with. 2901 if (sUnreservedFreePages < 16 * 1024) { 2902 sFreeOrCachedPagesTarget = sFreePagesTarget + 128; 2903 sInactivePagesTarget = sFreePagesTarget / 3; 2904 } else { 2905 sFreeOrCachedPagesTarget = 2 * sFreePagesTarget; 2906 sInactivePagesTarget = sFreePagesTarget / 2; 2907 } 2908 2909 TRACE(("vm_page_init: exit\n")); 2910 2911 return B_OK; 2912 } 2913 2914 2915 status_t 2916 vm_page_init_post_area(kernel_args *args) 2917 { 2918 void *dummy; 2919 2920 dummy = sPages; 2921 create_area("page structures", &dummy, B_EXACT_ADDRESS, 2922 PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED, 2923 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 2924 2925 add_debugger_command("page_stats", &dump_page_stats, 2926 "Dump statistics about page usage"); 2927 add_debugger_command_etc("page", &dump_page, 2928 "Dump page info", 2929 "[ \"-p\" | \"-v\" ] [ \"-m\" ] <address>\n" 2930 "Prints information for the physical page. If neither \"-p\" nor\n" 2931 "\"-v\" are given, the provided address is interpreted as address of\n" 2932 "the vm_page data structure for the page in question. If \"-p\" is\n" 2933 "given, the address is the physical address of the page. If \"-v\" is\n" 2934 "given, the address is interpreted as virtual address in the current\n" 2935 "thread's address space and for the page it is mapped to (if any)\n" 2936 "information are printed. If \"-m\" is specified, the command will\n" 2937 "search all known address spaces for mappings to that page and print\n" 2938 "them.\n", 0); 2939 add_debugger_command("page_queue", &dump_page_queue, "Dump page queue"); 2940 add_debugger_command("find_page", &find_page, 2941 "Find out which queue a page is actually in"); 2942 2943 #ifdef TRACK_PAGE_USAGE_STATS 2944 add_debugger_command_etc("page_usage", &dump_page_usage_stats, 2945 "Dumps statistics about page usage counts", 2946 "\n" 2947 "Dumps statistics about page usage counts.\n", 2948 B_KDEBUG_DONT_PARSE_ARGUMENTS); 2949 #endif 2950 2951 return B_OK; 2952 } 2953 2954 2955 status_t 2956 vm_page_init_post_thread(kernel_args *args) 2957 { 2958 new (&sFreePageCondition) ConditionVariable; 2959 sFreePageCondition.Publish(&sFreePageQueue, "free page"); 2960 2961 // create a kernel thread to clear out pages 2962 2963 thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber", 2964 B_LOWEST_ACTIVE_PRIORITY, NULL); 2965 resume_thread(thread); 2966 2967 // start page writer 2968 2969 sPageWriterCondition.Init("page writer"); 2970 2971 thread = spawn_kernel_thread(&page_writer, "page writer", 2972 B_NORMAL_PRIORITY + 1, NULL); 2973 resume_thread(thread); 2974 2975 // start page daemon 2976 2977 sPageDaemonCondition.Init("page daemon"); 2978 2979 thread = spawn_kernel_thread(&page_daemon, "page daemon", 2980 B_NORMAL_PRIORITY, NULL); 2981 resume_thread(thread); 2982 2983 return B_OK; 2984 } 2985 2986 2987 status_t 2988 vm_mark_page_inuse(addr_t page) 2989 { 2990 return vm_mark_page_range_inuse(page, 1); 2991 } 2992 2993 2994 status_t 2995 vm_mark_page_range_inuse(addr_t startPage, addr_t length) 2996 { 2997 return mark_page_range_in_use(startPage, length, false); 2998 } 2999 3000 3001 /*! Unreserve pages previously reserved with vm_page_reserve_pages(). 3002 */ 3003 void 3004 vm_page_unreserve_pages(vm_page_reservation* reservation) 3005 { 3006 uint32 count = reservation->count; 3007 reservation->count = 0; 3008 3009 if (count == 0) 3010 return; 3011 3012 TA(UnreservePages(count)); 3013 3014 unreserve_pages(count); 3015 } 3016 3017 3018 /*! With this call, you can reserve a number of free pages in the system. 3019 They will only be handed out to someone who has actually reserved them. 3020 This call returns as soon as the number of requested pages has been 3021 reached. 3022 The caller must not hold any cache lock or the function might deadlock. 3023 */ 3024 void 3025 vm_page_reserve_pages(vm_page_reservation* reservation, uint32 count, 3026 int priority) 3027 { 3028 reservation->count = count; 3029 3030 if (count == 0) 3031 return; 3032 3033 TA(ReservePages(count)); 3034 3035 reserve_pages(count, priority, false); 3036 } 3037 3038 3039 bool 3040 vm_page_try_reserve_pages(vm_page_reservation* reservation, uint32 count, 3041 int priority) 3042 { 3043 if (count == 0) { 3044 reservation->count = count; 3045 return true; 3046 } 3047 3048 uint32 remaining = reserve_pages(count, priority, true); 3049 if (remaining == 0) { 3050 TA(ReservePages(count)); 3051 reservation->count = count; 3052 return true; 3053 } 3054 3055 unreserve_pages(count - remaining); 3056 3057 return false; 3058 } 3059 3060 3061 vm_page * 3062 vm_page_allocate_page(vm_page_reservation* reservation, uint32 flags) 3063 { 3064 uint32 pageState = flags & VM_PAGE_ALLOC_STATE; 3065 ASSERT(pageState != PAGE_STATE_FREE); 3066 ASSERT(pageState != PAGE_STATE_CLEAR); 3067 3068 ASSERT(reservation->count > 0); 3069 reservation->count--; 3070 3071 VMPageQueue* queue; 3072 VMPageQueue* otherQueue; 3073 3074 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) { 3075 queue = &sClearPageQueue; 3076 otherQueue = &sFreePageQueue; 3077 } else { 3078 queue = &sFreePageQueue; 3079 otherQueue = &sClearPageQueue; 3080 } 3081 3082 TA(AllocatePage()); 3083 3084 ReadLocker locker(sFreePageQueuesLock); 3085 3086 vm_page* page = queue->RemoveHeadUnlocked(); 3087 if (page == NULL) { 3088 // if the primary queue was empty, grab the page from the 3089 // secondary queue 3090 page = otherQueue->RemoveHeadUnlocked(); 3091 3092 if (page == NULL) { 3093 // Unlikely, but possible: the page we have reserved has moved 3094 // between the queues after we checked the first queue. Grab the 3095 // write locker to make sure this doesn't happen again. 3096 locker.Unlock(); 3097 WriteLocker writeLocker(sFreePageQueuesLock); 3098 3099 page = queue->RemoveHead(); 3100 if (page == NULL) 3101 otherQueue->RemoveHead(); 3102 3103 if (page == NULL) { 3104 panic("Had reserved page, but there is none!"); 3105 return NULL; 3106 } 3107 3108 // downgrade to read lock 3109 locker.Lock(); 3110 } 3111 } 3112 3113 if (page->CacheRef() != NULL) 3114 panic("supposed to be free page %p has cache\n", page); 3115 3116 DEBUG_PAGE_ACCESS_START(page); 3117 3118 int oldPageState = page->State(); 3119 page->SetState(pageState); 3120 page->busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3121 page->usage_count = 0; 3122 page->accessed = false; 3123 page->modified = false; 3124 3125 locker.Unlock(); 3126 3127 if (pageState < PAGE_STATE_FIRST_UNQUEUED) 3128 sPageQueues[pageState].AppendUnlocked(page); 3129 3130 // clear the page, if we had to take it from the free queue and a clear 3131 // page was requested 3132 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0 && oldPageState != PAGE_STATE_CLEAR) 3133 clear_page(page); 3134 3135 return page; 3136 } 3137 3138 3139 static void 3140 allocate_page_run_cleanup(VMPageQueue::PageList& freePages, 3141 VMPageQueue::PageList& clearPages) 3142 { 3143 while (vm_page* page = freePages.RemoveHead()) { 3144 page->busy = false; 3145 page->SetState(PAGE_STATE_FREE); 3146 DEBUG_PAGE_ACCESS_END(page); 3147 sFreePageQueue.PrependUnlocked(page); 3148 } 3149 3150 while (vm_page* page = clearPages.RemoveHead()) { 3151 page->busy = false; 3152 page->SetState(PAGE_STATE_CLEAR); 3153 DEBUG_PAGE_ACCESS_END(page); 3154 sClearPageQueue.PrependUnlocked(page); 3155 } 3156 3157 } 3158 3159 3160 /*! Tries to allocate the a contiguous run of \a length pages starting at 3161 index \a start. 3162 3163 The must have write-locked the free/clear page queues. The function will 3164 unlock regardless of whether it succeeds or fails. 3165 3166 If the function fails, it cleans up after itself, i.e. it will free all 3167 pages it managed to allocate. 3168 3169 \param start The start index (into \c sPages) of the run. 3170 \param length The number of pages to allocate. 3171 \param flags Page allocation flags. Encodes the state the function shall 3172 set the allocated pages to, whether the pages shall be marked busy 3173 (VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared 3174 (VM_PAGE_ALLOC_CLEAR). 3175 \param freeClearQueueLocker Locked WriteLocker for the free/clear page 3176 queues in locked state. Will be unlocked by the function. 3177 \return The index of the first page that could not be allocated. \a length 3178 is returned when the function was successful. 3179 */ 3180 static page_num_t 3181 allocate_page_run(page_num_t start, page_num_t length, uint32 flags, 3182 WriteLocker& freeClearQueueLocker) 3183 { 3184 uint32 pageState = flags & VM_PAGE_ALLOC_STATE; 3185 ASSERT(pageState != PAGE_STATE_FREE); 3186 ASSERT(pageState != PAGE_STATE_CLEAR); 3187 3188 TA(AllocatePageRun(length)); 3189 3190 // Pull the free/clear pages out of their respective queues. Cached pages 3191 // are allocated later. 3192 page_num_t cachedPages = 0; 3193 VMPageQueue::PageList freePages; 3194 VMPageQueue::PageList clearPages; 3195 page_num_t i = 0; 3196 for (; i < length; i++) { 3197 bool pageAllocated = true; 3198 bool noPage = false; 3199 vm_page& page = sPages[start + i]; 3200 switch (page.State()) { 3201 case PAGE_STATE_CLEAR: 3202 DEBUG_PAGE_ACCESS_START(&page); 3203 sClearPageQueue.Remove(&page); 3204 clearPages.Add(&page); 3205 break; 3206 case PAGE_STATE_FREE: 3207 DEBUG_PAGE_ACCESS_START(&page); 3208 sFreePageQueue.Remove(&page); 3209 freePages.Add(&page); 3210 break; 3211 case PAGE_STATE_CACHED: 3212 // We allocate cached pages later. 3213 cachedPages++; 3214 pageAllocated = false; 3215 break; 3216 3217 default: 3218 // Probably a page was cached when our caller checked. Now it's 3219 // gone and we have to abort. 3220 noPage = true; 3221 break; 3222 } 3223 3224 if (noPage) 3225 break; 3226 3227 if (pageAllocated) { 3228 page.SetState(flags & VM_PAGE_ALLOC_STATE); 3229 page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3230 page.usage_count = 0; 3231 page.accessed = false; 3232 page.modified = false; 3233 } 3234 } 3235 3236 if (i < length) { 3237 // failed to allocate a page -- free all that we've got 3238 allocate_page_run_cleanup(freePages, clearPages); 3239 return i; 3240 } 3241 3242 freeClearQueueLocker.Unlock(); 3243 3244 if (cachedPages > 0) { 3245 // allocate the pages that weren't free but cached 3246 page_num_t freedCachedPages = 0; 3247 page_num_t nextIndex = start; 3248 vm_page* freePage = freePages.Head(); 3249 vm_page* clearPage = clearPages.Head(); 3250 while (cachedPages > 0) { 3251 // skip, if we've already got the page 3252 if (freePage != NULL && size_t(freePage - sPages) == nextIndex) { 3253 freePage = freePages.GetNext(freePage); 3254 nextIndex++; 3255 continue; 3256 } 3257 if (clearPage != NULL && size_t(clearPage - sPages) == nextIndex) { 3258 clearPage = clearPages.GetNext(clearPage); 3259 nextIndex++; 3260 continue; 3261 } 3262 3263 // free the page, if it is still cached 3264 vm_page& page = sPages[nextIndex]; 3265 if (!free_cached_page(&page, false)) 3266 break; 3267 3268 page.SetState(flags & VM_PAGE_ALLOC_STATE); 3269 page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3270 page.usage_count = 0; 3271 page.accessed = false; 3272 page.modified = false; 3273 3274 freePages.InsertBefore(freePage, &page); 3275 freedCachedPages++; 3276 cachedPages--; 3277 nextIndex++; 3278 } 3279 3280 // If we have freed cached pages, we need to balance things. 3281 if (freedCachedPages > 0) 3282 unreserve_pages(freedCachedPages); 3283 3284 if (nextIndex - start < length) { 3285 // failed to allocate all cached pages -- free all that we've got 3286 freeClearQueueLocker.Lock(); 3287 allocate_page_run_cleanup(freePages, clearPages); 3288 freeClearQueueLocker.Unlock(); 3289 3290 return nextIndex - start; 3291 } 3292 } 3293 3294 // clear pages, if requested 3295 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) { 3296 for (VMPageQueue::PageList::Iterator it = freePages.GetIterator(); 3297 vm_page* page = it.Next();) { 3298 clear_page(page); 3299 } 3300 } 3301 3302 // add pages to target queue 3303 if (pageState < PAGE_STATE_FIRST_UNQUEUED) { 3304 freePages.MoveFrom(&clearPages); 3305 sPageQueues[pageState].AppendUnlocked(freePages, length); 3306 } 3307 3308 // Note: We don't unreserve the pages since we pulled them out of the 3309 // free/clear queues without adjusting sUnreservedFreePages. 3310 3311 return length; 3312 } 3313 3314 3315 vm_page * 3316 vm_page_allocate_page_run(uint32 flags, addr_t base, size_t length, 3317 int priority) 3318 { 3319 uint32 start = base >> PAGE_SHIFT; 3320 3321 vm_page_reservation reservation; 3322 vm_page_reserve_pages(&reservation, length, priority); 3323 3324 WriteLocker freeClearQueueLocker(sFreePageQueuesLock); 3325 3326 // First we try to get a run with free pages only. If that fails, we also 3327 // consider cached pages. If there are only few free pages and many cached 3328 // ones, the odds are that we won't find enough contiguous ones, so we skip 3329 // the first iteration in this case. 3330 int32 freePages = sUnreservedFreePages; 3331 int useCached = freePages > 0 && (page_num_t)freePages > 2 * length ? 0 : 1; 3332 3333 for (;;) { 3334 bool foundRun = true; 3335 if (start + length > sNumPages) { 3336 if (useCached == 0) { 3337 // The first iteration with free pages only was unsuccessful. 3338 // Try again also considering cached pages. 3339 useCached = 1; 3340 start = base >> PAGE_SHIFT; 3341 continue; 3342 } 3343 3344 dprintf("vm_page_allocate_page_run(): Failed to allocate run of " 3345 "length %" B_PRIuSIZE " in second iteration!", length); 3346 3347 freeClearQueueLocker.Unlock(); 3348 vm_page_unreserve_pages(&reservation); 3349 return NULL; 3350 } 3351 3352 uint32 i; 3353 for (i = 0; i < length; i++) { 3354 uint32 pageState = sPages[start + i].State(); 3355 if (pageState != PAGE_STATE_FREE 3356 && pageState != PAGE_STATE_CLEAR 3357 && (pageState != PAGE_STATE_CACHED || useCached == 0)) { 3358 foundRun = false; 3359 break; 3360 } 3361 } 3362 3363 if (foundRun) { 3364 i = allocate_page_run(start, length, flags, freeClearQueueLocker); 3365 if (i == length) 3366 return &sPages[start]; 3367 3368 // apparently a cached page couldn't be allocated -- skip it and 3369 // continue 3370 freeClearQueueLocker.Lock(); 3371 } 3372 3373 start += i + 1; 3374 } 3375 } 3376 3377 3378 vm_page * 3379 vm_page_at_index(int32 index) 3380 { 3381 return &sPages[index]; 3382 } 3383 3384 3385 vm_page * 3386 vm_lookup_page(addr_t pageNumber) 3387 { 3388 if (pageNumber < sPhysicalPageOffset) 3389 return NULL; 3390 3391 pageNumber -= sPhysicalPageOffset; 3392 if (pageNumber >= sNumPages) 3393 return NULL; 3394 3395 return &sPages[pageNumber]; 3396 } 3397 3398 3399 bool 3400 vm_page_is_dummy(struct vm_page *page) 3401 { 3402 return page < sPages || page >= sPages + sNumPages; 3403 } 3404 3405 3406 /*! Free the page that belonged to a certain cache. 3407 You can use vm_page_set_state() manually if you prefer, but only 3408 if the page does not equal PAGE_STATE_MODIFIED. 3409 */ 3410 void 3411 vm_page_free(VMCache *cache, vm_page *page) 3412 { 3413 PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE 3414 && page->State() != PAGE_STATE_CLEAR); 3415 3416 if (page->State() == PAGE_STATE_MODIFIED && cache->temporary) 3417 atomic_add(&sModifiedTemporaryPages, -1); 3418 3419 free_page(page, false); 3420 } 3421 3422 3423 void 3424 vm_page_set_state(vm_page *page, int pageState) 3425 { 3426 PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE 3427 && page->State() != PAGE_STATE_CLEAR); 3428 3429 if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) 3430 free_page(page, pageState == PAGE_STATE_CLEAR); 3431 else 3432 set_page_state(page, pageState); 3433 } 3434 3435 3436 /*! Moves a page to either the tail of the head of its current queue, 3437 depending on \a tail. 3438 The page must have a cache and the cache must be locked! 3439 */ 3440 void 3441 vm_page_requeue(struct vm_page *page, bool tail) 3442 { 3443 PAGE_ASSERT(page, page->Cache() != NULL); 3444 page->Cache()->AssertLocked(); 3445 // DEBUG_PAGE_ACCESS_CHECK(page); 3446 // TODO: This assertion cannot be satisfied by idle_scan_active_pages() 3447 // when it requeues busy pages. The reason is that vm_soft_fault() 3448 // (respectively fault_get_page()) and the file cache keep newly 3449 // allocated pages accessed while they are reading them from disk. It 3450 // would probably be better to change that code and reenable this 3451 // check. 3452 3453 VMPageQueue *queue = NULL; 3454 3455 switch (page->State()) { 3456 case PAGE_STATE_ACTIVE: 3457 queue = &sActivePageQueue; 3458 break; 3459 case PAGE_STATE_INACTIVE: 3460 queue = &sInactivePageQueue; 3461 break; 3462 case PAGE_STATE_MODIFIED: 3463 queue = &sModifiedPageQueue; 3464 break; 3465 case PAGE_STATE_CACHED: 3466 queue = &sCachedPageQueue; 3467 break; 3468 case PAGE_STATE_FREE: 3469 case PAGE_STATE_CLEAR: 3470 panic("vm_page_requeue() called for free/clear page %p", page); 3471 return; 3472 case PAGE_STATE_WIRED: 3473 case PAGE_STATE_UNUSED: 3474 return; 3475 default: 3476 panic("vm_page_touch: vm_page %p in invalid state %d\n", 3477 page, page->State()); 3478 break; 3479 } 3480 3481 queue->RequeueUnlocked(page, tail); 3482 } 3483 3484 3485 size_t 3486 vm_page_num_pages(void) 3487 { 3488 return sNumPages; 3489 } 3490 3491 3492 /*! There is a subtle distinction between the page counts returned by 3493 this function and vm_page_num_free_pages(): 3494 The latter returns the number of pages that are completely uncommitted, 3495 whereas this one returns the number of pages that are available for 3496 use by being reclaimed as well (IOW it factors in things like cache pages 3497 as available). 3498 */ 3499 size_t 3500 vm_page_num_available_pages(void) 3501 { 3502 return vm_available_memory() / B_PAGE_SIZE; 3503 } 3504 3505 3506 size_t 3507 vm_page_num_free_pages(void) 3508 { 3509 int32 count = sUnreservedFreePages + sCachedPageQueue.Count(); 3510 return count > 0 ? count : 0; 3511 } 3512 3513 3514 size_t 3515 vm_page_num_unused_pages(void) 3516 { 3517 int32 count = sUnreservedFreePages; 3518 return count > 0 ? count : 0; 3519 } 3520 3521 3522 void 3523 vm_page_get_stats(system_info *info) 3524 { 3525 // Get free pages count -- not really exact, since we don't know how many 3526 // of the reserved pages have already been allocated, but good citizens 3527 // unreserve chunk-wise as they are allocating the pages, if they have 3528 // reserved a larger quantity. 3529 int32 free = sUnreservedFreePages; 3530 if (free < 0) 3531 free = 0; 3532 3533 // The pages used for the block cache buffers. Those should not be counted 3534 // as used but as cached pages. 3535 // TODO: We should subtract the blocks that are in use ATM, since those 3536 // can't really be freed in a low memory situation. 3537 page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE; 3538 3539 info->max_pages = sNumPages; 3540 info->used_pages = gMappedPagesCount - blockCachePages; 3541 info->cached_pages = sNumPages >= (uint32)free + info->used_pages 3542 ? sNumPages - free - info->used_pages : 0; 3543 info->page_faults = vm_num_page_faults(); 3544 3545 // TODO: We don't consider pages used for page directories/tables yet. 3546 } 3547