1 /* 2 * Copyright 2010, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <signal.h> 12 #include <string.h> 13 #include <stdlib.h> 14 15 #include <algorithm> 16 17 #include <KernelExport.h> 18 #include <OS.h> 19 20 #include <AutoDeleter.h> 21 22 #include <arch/cpu.h> 23 #include <arch/vm_translation_map.h> 24 #include <block_cache.h> 25 #include <boot/kernel_args.h> 26 #include <condition_variable.h> 27 #include <heap.h> 28 #include <kernel.h> 29 #include <low_resource_manager.h> 30 #include <thread.h> 31 #include <tracing.h> 32 #include <util/AutoLock.h> 33 #include <vfs.h> 34 #include <vm/vm.h> 35 #include <vm/vm_priv.h> 36 #include <vm/vm_page.h> 37 #include <vm/VMAddressSpace.h> 38 #include <vm/VMArea.h> 39 #include <vm/VMCache.h> 40 41 #include "IORequest.h" 42 #include "PageCacheLocker.h" 43 #include "VMAnonymousCache.h" 44 #include "VMPageQueue.h" 45 46 47 //#define TRACE_VM_PAGE 48 #ifdef TRACE_VM_PAGE 49 # define TRACE(x) dprintf x 50 #else 51 # define TRACE(x) ; 52 #endif 53 54 //#define TRACE_VM_DAEMONS 55 #ifdef TRACE_VM_DAEMONS 56 #define TRACE_DAEMON(x...) dprintf(x) 57 #else 58 #define TRACE_DAEMON(x...) do {} while (false) 59 #endif 60 61 //#define TRACK_PAGE_USAGE_STATS 1 62 63 #define PAGE_ASSERT(page, condition) \ 64 ASSERT_PRINT((condition), "page: %p", (page)) 65 66 #define SCRUB_SIZE 16 67 // this many pages will be cleared at once in the page scrubber thread 68 69 #define MAX_PAGE_WRITER_IO_PRIORITY B_URGENT_DISPLAY_PRIORITY 70 // maximum I/O priority of the page writer 71 #define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD 10000 72 // the maximum I/O priority shall be reached when this many pages need to 73 // be written 74 75 76 // The page reserve an allocation of the certain priority must not touch. 77 static const size_t kPageReserveForPriority[] = { 78 VM_PAGE_RESERVE_USER, // user 79 VM_PAGE_RESERVE_SYSTEM, // system 80 0 // VIP 81 }; 82 83 // Minimum number of free pages the page daemon will try to achieve. 84 static uint32 sFreePagesTarget; 85 static uint32 sFreeOrCachedPagesTarget; 86 static uint32 sInactivePagesTarget; 87 88 // Wait interval between page daemon runs. 89 static const bigtime_t kIdleScanWaitInterval = 1000000LL; // 1 sec 90 static const bigtime_t kBusyScanWaitInterval = 500000LL; // 0.5 sec 91 92 // Number of idle runs after which we want to have processed the full active 93 // queue. 94 static const uint32 kIdleRunsForFullQueue = 20; 95 96 // Maximum limit for the vm_page::usage_count. 97 static const int32 kPageUsageMax = 64; 98 // vm_page::usage_count buff an accessed page receives in a scan. 99 static const int32 kPageUsageAdvance = 3; 100 // vm_page::usage_count debuff an unaccessed page receives in a scan. 101 static const int32 kPageUsageDecline = 1; 102 103 int32 gMappedPagesCount; 104 105 static VMPageQueue sPageQueues[PAGE_STATE_COUNT]; 106 107 static VMPageQueue& sFreePageQueue = sPageQueues[PAGE_STATE_FREE]; 108 static VMPageQueue& sClearPageQueue = sPageQueues[PAGE_STATE_CLEAR]; 109 static VMPageQueue& sModifiedPageQueue = sPageQueues[PAGE_STATE_MODIFIED]; 110 static VMPageQueue& sInactivePageQueue = sPageQueues[PAGE_STATE_INACTIVE]; 111 static VMPageQueue& sActivePageQueue = sPageQueues[PAGE_STATE_ACTIVE]; 112 static VMPageQueue& sCachedPageQueue = sPageQueues[PAGE_STATE_CACHED]; 113 114 static vm_page *sPages; 115 static addr_t sPhysicalPageOffset; 116 static size_t sNumPages; 117 static vint32 sUnreservedFreePages; 118 static vint32 sUnsatisfiedPageReservations; 119 static vint32 sModifiedTemporaryPages; 120 121 static ConditionVariable sFreePageCondition; 122 static mutex sPageDeficitLock = MUTEX_INITIALIZER("page deficit"); 123 124 static rw_lock sFreePageQueuesLock 125 = RW_LOCK_INITIALIZER("free/clear page queues"); 126 127 #ifdef TRACK_PAGE_USAGE_STATS 128 static page_num_t sPageUsageArrays[512]; 129 static page_num_t* sPageUsage = sPageUsageArrays; 130 static page_num_t sPageUsagePageCount; 131 static page_num_t* sNextPageUsage = sPageUsageArrays + 256; 132 static page_num_t sNextPageUsagePageCount; 133 #endif 134 135 136 struct page_stats { 137 int32 totalFreePages; 138 int32 unsatisfiedReservations; 139 int32 cachedPages; 140 }; 141 142 143 struct PageReservationWaiter 144 : public DoublyLinkedListLinkImpl<PageReservationWaiter> { 145 struct thread* thread; 146 uint32 dontTouch; // reserve not to touch 147 uint32 missing; // pages missing for the reservation 148 int32 threadPriority; 149 150 bool operator<(const PageReservationWaiter& other) const 151 { 152 // Implies an order by descending VM priority (ascending dontTouch) 153 // and (secondarily) descending thread priority. 154 if (dontTouch != other.dontTouch) 155 return dontTouch < other.dontTouch; 156 return threadPriority > other.threadPriority; 157 } 158 }; 159 160 typedef DoublyLinkedList<PageReservationWaiter> PageReservationWaiterList; 161 static PageReservationWaiterList sPageReservationWaiters; 162 163 164 struct DaemonCondition { 165 void Init(const char* name) 166 { 167 mutex_init(&fLock, "daemon condition"); 168 fCondition.Init(this, name); 169 fActivated = false; 170 } 171 172 bool Lock() 173 { 174 return mutex_lock(&fLock) == B_OK; 175 } 176 177 void Unlock() 178 { 179 mutex_unlock(&fLock); 180 } 181 182 bool Wait(bigtime_t timeout, bool clearActivated) 183 { 184 MutexLocker locker(fLock); 185 if (clearActivated) 186 fActivated = false; 187 else if (fActivated) 188 return true; 189 190 ConditionVariableEntry entry; 191 fCondition.Add(&entry); 192 193 locker.Unlock(); 194 195 return entry.Wait(B_RELATIVE_TIMEOUT, timeout) == B_OK; 196 } 197 198 void WakeUp() 199 { 200 if (fActivated) 201 return; 202 203 MutexLocker locker(fLock); 204 fActivated = true; 205 fCondition.NotifyOne(); 206 } 207 208 void ClearActivated() 209 { 210 MutexLocker locker(fLock); 211 fActivated = false; 212 } 213 214 private: 215 mutex fLock; 216 ConditionVariable fCondition; 217 bool fActivated; 218 }; 219 220 221 static DaemonCondition sPageWriterCondition; 222 static DaemonCondition sPageDaemonCondition; 223 224 225 #if PAGE_ALLOCATION_TRACING 226 227 namespace PageAllocationTracing { 228 229 class ReservePages : public AbstractTraceEntry { 230 public: 231 ReservePages(uint32 count) 232 : 233 fCount(count) 234 { 235 Initialized(); 236 } 237 238 virtual void AddDump(TraceOutput& out) 239 { 240 out.Print("page reserve: %lu", fCount); 241 } 242 243 private: 244 uint32 fCount; 245 }; 246 247 248 class UnreservePages : public AbstractTraceEntry { 249 public: 250 UnreservePages(uint32 count) 251 : 252 fCount(count) 253 { 254 Initialized(); 255 } 256 257 virtual void AddDump(TraceOutput& out) 258 { 259 out.Print("page unreserve: %lu", fCount); 260 } 261 262 private: 263 uint32 fCount; 264 }; 265 266 267 class AllocatePage : public AbstractTraceEntry { 268 public: 269 AllocatePage() 270 { 271 Initialized(); 272 } 273 274 virtual void AddDump(TraceOutput& out) 275 { 276 out.Print("page alloc"); 277 } 278 }; 279 280 281 class AllocatePageRun : public AbstractTraceEntry { 282 public: 283 AllocatePageRun(uint32 length) 284 : 285 fLength(length) 286 { 287 Initialized(); 288 } 289 290 virtual void AddDump(TraceOutput& out) 291 { 292 out.Print("page alloc run: length: %ld", fLength); 293 } 294 295 private: 296 uint32 fLength; 297 }; 298 299 300 class FreePage : public AbstractTraceEntry { 301 public: 302 FreePage() 303 { 304 Initialized(); 305 } 306 307 virtual void AddDump(TraceOutput& out) 308 { 309 out.Print("page free"); 310 } 311 }; 312 313 314 class ScrubbingPages : public AbstractTraceEntry { 315 public: 316 ScrubbingPages(uint32 count) 317 : 318 fCount(count) 319 { 320 Initialized(); 321 } 322 323 virtual void AddDump(TraceOutput& out) 324 { 325 out.Print("page scrubbing: %lu", fCount); 326 } 327 328 private: 329 uint32 fCount; 330 }; 331 332 333 class ScrubbedPages : public AbstractTraceEntry { 334 public: 335 ScrubbedPages(uint32 count) 336 : 337 fCount(count) 338 { 339 Initialized(); 340 } 341 342 virtual void AddDump(TraceOutput& out) 343 { 344 out.Print("page scrubbed: %lu", fCount); 345 } 346 347 private: 348 uint32 fCount; 349 }; 350 351 352 class StolenPage : public AbstractTraceEntry { 353 public: 354 StolenPage() 355 { 356 Initialized(); 357 } 358 359 virtual void AddDump(TraceOutput& out) 360 { 361 out.Print("page stolen"); 362 } 363 }; 364 365 } // namespace PageAllocationTracing 366 367 # define TA(x) new(std::nothrow) PageAllocationTracing::x 368 369 #else 370 # define TA(x) 371 #endif // PAGE_ALLOCATION_TRACING 372 373 374 #if PAGE_DAEMON_TRACING 375 376 namespace PageDaemonTracing { 377 378 class ActivatePage : public AbstractTraceEntry { 379 public: 380 ActivatePage(vm_page* page) 381 : 382 fCache(page->cache), 383 fPage(page) 384 { 385 Initialized(); 386 } 387 388 virtual void AddDump(TraceOutput& out) 389 { 390 out.Print("page activated: %p, cache: %p", fPage, fCache); 391 } 392 393 private: 394 VMCache* fCache; 395 vm_page* fPage; 396 }; 397 398 399 class DeactivatePage : public AbstractTraceEntry { 400 public: 401 DeactivatePage(vm_page* page) 402 : 403 fCache(page->cache), 404 fPage(page) 405 { 406 Initialized(); 407 } 408 409 virtual void AddDump(TraceOutput& out) 410 { 411 out.Print("page deactivated: %p, cache: %p", fPage, fCache); 412 } 413 414 private: 415 VMCache* fCache; 416 vm_page* fPage; 417 }; 418 419 420 class FreedPageSwap : public AbstractTraceEntry { 421 public: 422 FreedPageSwap(vm_page* page) 423 : 424 fCache(page->cache), 425 fPage(page) 426 { 427 Initialized(); 428 } 429 430 virtual void AddDump(TraceOutput& out) 431 { 432 out.Print("page swap freed: %p, cache: %p", fPage, fCache); 433 } 434 435 private: 436 VMCache* fCache; 437 vm_page* fPage; 438 }; 439 440 } // namespace PageDaemonTracing 441 442 # define TD(x) new(std::nothrow) PageDaemonTracing::x 443 444 #else 445 # define TD(x) 446 #endif // PAGE_DAEMON_TRACING 447 448 449 #if PAGE_WRITER_TRACING 450 451 namespace PageWriterTracing { 452 453 class WritePage : public AbstractTraceEntry { 454 public: 455 WritePage(vm_page* page) 456 : 457 fCache(page->Cache()), 458 fPage(page) 459 { 460 Initialized(); 461 } 462 463 virtual void AddDump(TraceOutput& out) 464 { 465 out.Print("page write: %p, cache: %p", fPage, fCache); 466 } 467 468 private: 469 VMCache* fCache; 470 vm_page* fPage; 471 }; 472 473 } // namespace PageWriterTracing 474 475 # define TPW(x) new(std::nothrow) PageWriterTracing::x 476 477 #else 478 # define TPW(x) 479 #endif // PAGE_WRITER_TRACING 480 481 482 #if PAGE_STATE_TRACING 483 484 namespace PageStateTracing { 485 486 class SetPageState : public AbstractTraceEntry { 487 public: 488 SetPageState(vm_page* page, uint8 newState) 489 : 490 fPage(page), 491 fOldState(page->State()), 492 fNewState(newState), 493 fBusy(page->busy), 494 fWired(page->wired_count > 0), 495 fMapped(!page->mappings.IsEmpty()), 496 fAccessed(page->accessed), 497 fModified(page->modified) 498 { 499 #if PAGE_STATE_TRACING_STACK_TRACE 500 fStackTrace = capture_tracing_stack_trace( 501 PAGE_STATE_TRACING_STACK_TRACE, 0, true); 502 // Don't capture userland stack trace to avoid potential 503 // deadlocks. 504 #endif 505 Initialized(); 506 } 507 508 #if PAGE_STATE_TRACING_STACK_TRACE 509 virtual void DumpStackTrace(TraceOutput& out) 510 { 511 out.PrintStackTrace(fStackTrace); 512 } 513 #endif 514 515 virtual void AddDump(TraceOutput& out) 516 { 517 out.Print("page set state: %p (%c%c%c%c%c): %s -> %s", fPage, 518 fBusy ? 'b' : '-', 519 fWired ? 'w' : '-', 520 fMapped ? 'm' : '-', 521 fAccessed ? 'a' : '-', 522 fModified ? 'm' : '-', 523 page_state_to_string(fOldState), 524 page_state_to_string(fNewState)); 525 } 526 527 private: 528 vm_page* fPage; 529 #if PAGE_STATE_TRACING_STACK_TRACE 530 tracing_stack_trace* fStackTrace; 531 #endif 532 uint8 fOldState; 533 uint8 fNewState; 534 bool fBusy : 1; 535 bool fWired : 1; 536 bool fMapped : 1; 537 bool fAccessed : 1; 538 bool fModified : 1; 539 }; 540 541 } // namespace PageStateTracing 542 543 # define TPS(x) new(std::nothrow) PageStateTracing::x 544 545 #else 546 # define TPS(x) 547 #endif // PAGE_STATE_TRACING 548 549 550 static int 551 find_page(int argc, char **argv) 552 { 553 struct vm_page *page; 554 addr_t address; 555 int32 index = 1; 556 int i; 557 558 struct { 559 const char* name; 560 VMPageQueue* queue; 561 } pageQueueInfos[] = { 562 { "free", &sFreePageQueue }, 563 { "clear", &sClearPageQueue }, 564 { "modified", &sModifiedPageQueue }, 565 { "active", &sActivePageQueue }, 566 { "inactive", &sInactivePageQueue }, 567 { "cached", &sCachedPageQueue }, 568 { NULL, NULL } 569 }; 570 571 if (argc < 2 572 || strlen(argv[index]) <= 2 573 || argv[index][0] != '0' 574 || argv[index][1] != 'x') { 575 kprintf("usage: find_page <address>\n"); 576 return 0; 577 } 578 579 address = strtoul(argv[index], NULL, 0); 580 page = (vm_page*)address; 581 582 for (i = 0; pageQueueInfos[i].name; i++) { 583 VMPageQueue::Iterator it = pageQueueInfos[i].queue->GetIterator(); 584 while (vm_page* p = it.Next()) { 585 if (p == page) { 586 kprintf("found page %p in queue %p (%s)\n", page, 587 pageQueueInfos[i].queue, pageQueueInfos[i].name); 588 return 0; 589 } 590 } 591 } 592 593 kprintf("page %p isn't in any queue\n", page); 594 595 return 0; 596 } 597 598 599 const char * 600 page_state_to_string(int state) 601 { 602 switch(state) { 603 case PAGE_STATE_ACTIVE: 604 return "active"; 605 case PAGE_STATE_INACTIVE: 606 return "inactive"; 607 case PAGE_STATE_MODIFIED: 608 return "modified"; 609 case PAGE_STATE_CACHED: 610 return "cached"; 611 case PAGE_STATE_FREE: 612 return "free"; 613 case PAGE_STATE_CLEAR: 614 return "clear"; 615 case PAGE_STATE_WIRED: 616 return "wired"; 617 case PAGE_STATE_UNUSED: 618 return "unused"; 619 default: 620 return "unknown"; 621 } 622 } 623 624 625 static int 626 dump_page(int argc, char **argv) 627 { 628 bool addressIsPointer = true; 629 bool physical = false; 630 bool searchMappings = false; 631 int32 index = 1; 632 633 while (index < argc) { 634 if (argv[index][0] != '-') 635 break; 636 637 if (!strcmp(argv[index], "-p")) { 638 addressIsPointer = false; 639 physical = true; 640 } else if (!strcmp(argv[index], "-v")) { 641 addressIsPointer = false; 642 } else if (!strcmp(argv[index], "-m")) { 643 searchMappings = true; 644 } else { 645 print_debugger_command_usage(argv[0]); 646 return 0; 647 } 648 649 index++; 650 } 651 652 if (index + 1 != argc) { 653 print_debugger_command_usage(argv[0]); 654 return 0; 655 } 656 657 uint64 value; 658 if (!evaluate_debug_expression(argv[index], &value, false)) 659 return 0; 660 661 addr_t pageAddress = (addr_t)value; 662 struct vm_page* page; 663 664 if (addressIsPointer) { 665 page = (struct vm_page *)pageAddress; 666 } else { 667 if (!physical) { 668 VMAddressSpace *addressSpace = VMAddressSpace::Kernel(); 669 670 if (debug_get_debugged_thread()->team->address_space != NULL) 671 addressSpace = debug_get_debugged_thread()->team->address_space; 672 673 uint32 flags = 0; 674 if (addressSpace->TranslationMap()->QueryInterrupt(pageAddress, 675 &pageAddress, &flags) != B_OK 676 || (flags & PAGE_PRESENT) == 0) { 677 kprintf("Virtual address not mapped to a physical page in this " 678 "address space.\n"); 679 return 0; 680 } 681 } 682 683 page = vm_lookup_page(pageAddress / B_PAGE_SIZE); 684 } 685 686 kprintf("PAGE: %p\n", page); 687 kprintf("queue_next,prev: %p, %p\n", page->queue_link.next, 688 page->queue_link.previous); 689 kprintf("physical_number: %#lx\n", page->physical_page_number); 690 kprintf("cache: %p\n", page->Cache()); 691 kprintf("cache_offset: %ld\n", page->cache_offset); 692 kprintf("cache_next: %p\n", page->cache_next); 693 kprintf("state: %s\n", page_state_to_string(page->State())); 694 kprintf("wired_count: %d\n", page->wired_count); 695 kprintf("usage_count: %d\n", page->usage_count); 696 kprintf("busy: %d\n", page->busy); 697 kprintf("busy_writing: %d\n", page->busy_writing); 698 kprintf("accessed: %d\n", page->accessed); 699 kprintf("modified: %d\n", page->modified); 700 #if DEBUG_PAGE_QUEUE 701 kprintf("queue: %p\n", page->queue); 702 #endif 703 #if DEBUG_PAGE_ACCESS 704 kprintf("accessor: %" B_PRId32 "\n", page->accessing_thread); 705 #endif 706 kprintf("area mappings:\n"); 707 708 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 709 vm_page_mapping *mapping; 710 while ((mapping = iterator.Next()) != NULL) { 711 kprintf(" %p (%" B_PRId32 ")\n", mapping->area, mapping->area->id); 712 mapping = mapping->page_link.next; 713 } 714 715 if (searchMappings) { 716 kprintf("all mappings:\n"); 717 VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst(); 718 while (addressSpace != NULL) { 719 size_t pageCount = addressSpace->Size() / B_PAGE_SIZE; 720 for (addr_t address = addressSpace->Base(); pageCount != 0; 721 address += B_PAGE_SIZE, pageCount--) { 722 addr_t physicalAddress; 723 uint32 flags = 0; 724 if (addressSpace->TranslationMap()->QueryInterrupt(address, 725 &physicalAddress, &flags) == B_OK 726 && (flags & PAGE_PRESENT) != 0 727 && physicalAddress / B_PAGE_SIZE 728 == page->physical_page_number) { 729 VMArea* area = addressSpace->LookupArea(address); 730 kprintf(" aspace %ld, area %ld: %#" B_PRIxADDR 731 " (%c%c%s%s)\n", addressSpace->ID(), 732 area != NULL ? area->id : -1, address, 733 (flags & B_KERNEL_READ_AREA) != 0 ? 'r' : '-', 734 (flags & B_KERNEL_WRITE_AREA) != 0 ? 'w' : '-', 735 (flags & PAGE_MODIFIED) != 0 ? " modified" : "", 736 (flags & PAGE_ACCESSED) != 0 ? " accessed" : ""); 737 } 738 } 739 addressSpace = VMAddressSpace::DebugNext(addressSpace); 740 } 741 } 742 743 set_debug_variable("_cache", (addr_t)page->Cache()); 744 #if DEBUG_PAGE_ACCESS 745 set_debug_variable("_accessor", page->accessing_thread); 746 #endif 747 748 return 0; 749 } 750 751 752 static int 753 dump_page_queue(int argc, char **argv) 754 { 755 struct VMPageQueue *queue; 756 757 if (argc < 2) { 758 kprintf("usage: page_queue <address/name> [list]\n"); 759 return 0; 760 } 761 762 if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x') 763 queue = (VMPageQueue*)strtoul(argv[1], NULL, 16); 764 if (!strcmp(argv[1], "free")) 765 queue = &sFreePageQueue; 766 else if (!strcmp(argv[1], "clear")) 767 queue = &sClearPageQueue; 768 else if (!strcmp(argv[1], "modified")) 769 queue = &sModifiedPageQueue; 770 else if (!strcmp(argv[1], "active")) 771 queue = &sActivePageQueue; 772 else if (!strcmp(argv[1], "inactive")) 773 queue = &sInactivePageQueue; 774 else if (!strcmp(argv[1], "cached")) 775 queue = &sCachedPageQueue; 776 else { 777 kprintf("page_queue: unknown queue \"%s\".\n", argv[1]); 778 return 0; 779 } 780 781 kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %ld\n", 782 queue, queue->Head(), queue->Tail(), queue->Count()); 783 784 if (argc == 3) { 785 struct vm_page *page = queue->Head(); 786 const char *type = "none"; 787 int i; 788 789 if (page->Cache() != NULL) { 790 switch (page->Cache()->type) { 791 case CACHE_TYPE_RAM: 792 type = "RAM"; 793 break; 794 case CACHE_TYPE_DEVICE: 795 type = "device"; 796 break; 797 case CACHE_TYPE_VNODE: 798 type = "vnode"; 799 break; 800 case CACHE_TYPE_NULL: 801 type = "null"; 802 break; 803 default: 804 type = "???"; 805 break; 806 } 807 } 808 809 kprintf("page cache type state wired usage\n"); 810 for (i = 0; page; i++, page = queue->Next(page)) { 811 kprintf("%p %p %-7s %8s %5d %5d\n", page, page->Cache(), 812 type, page_state_to_string(page->State()), 813 page->wired_count, page->usage_count); 814 } 815 } 816 return 0; 817 } 818 819 820 static int 821 dump_page_stats(int argc, char **argv) 822 { 823 page_num_t swappableModified = 0; 824 page_num_t swappableModifiedInactive = 0; 825 size_t counter[8]; 826 size_t busyCounter[8]; 827 addr_t i; 828 829 memset(counter, 0, sizeof(counter)); 830 memset(busyCounter, 0, sizeof(busyCounter)); 831 832 for (i = 0; i < sNumPages; i++) { 833 if (sPages[i].State() > 7) 834 panic("page %li at %p has invalid state!\n", i, &sPages[i]); 835 836 counter[sPages[i].State()]++; 837 if (sPages[i].busy) 838 busyCounter[sPages[i].State()]++; 839 840 if (sPages[i].State() == PAGE_STATE_MODIFIED 841 && sPages[i].Cache() != NULL 842 && sPages[i].Cache()->temporary && sPages[i].wired_count == 0) { 843 swappableModified++; 844 if (sPages[i].usage_count == 0) 845 swappableModifiedInactive++; 846 } 847 } 848 849 kprintf("page stats:\n"); 850 kprintf("total: %lu\n", sNumPages); 851 852 kprintf("active: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 853 counter[PAGE_STATE_ACTIVE], busyCounter[PAGE_STATE_ACTIVE]); 854 kprintf("inactive: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 855 counter[PAGE_STATE_INACTIVE], busyCounter[PAGE_STATE_INACTIVE]); 856 kprintf("cached: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 857 counter[PAGE_STATE_CACHED], busyCounter[PAGE_STATE_CACHED]); 858 kprintf("unused: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 859 counter[PAGE_STATE_UNUSED], busyCounter[PAGE_STATE_UNUSED]); 860 kprintf("wired: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 861 counter[PAGE_STATE_WIRED], busyCounter[PAGE_STATE_WIRED]); 862 kprintf("modified: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n", 863 counter[PAGE_STATE_MODIFIED], busyCounter[PAGE_STATE_MODIFIED]); 864 kprintf("free: %" B_PRIuSIZE "\n", counter[PAGE_STATE_FREE]); 865 kprintf("clear: %" B_PRIuSIZE "\n", counter[PAGE_STATE_CLEAR]); 866 867 kprintf("unreserved free pages: %" B_PRId32 "\n", sUnreservedFreePages); 868 kprintf("unsatisfied page reservations: %" B_PRId32 "\n", 869 sUnsatisfiedPageReservations); 870 kprintf("mapped pages: %lu\n", gMappedPagesCount); 871 872 kprintf("waiting threads:\n"); 873 for (PageReservationWaiterList::Iterator it 874 = sPageReservationWaiters.GetIterator(); 875 PageReservationWaiter* waiter = it.Next();) { 876 kprintf(" %6" B_PRId32 ": missing: %6" B_PRIu32 877 ", don't touch: %6" B_PRIu32 "\n", waiter->thread->id, 878 waiter->missing, waiter->dontTouch); 879 } 880 881 kprintf("\nfree queue: %p, count = %ld\n", &sFreePageQueue, 882 sFreePageQueue.Count()); 883 kprintf("clear queue: %p, count = %ld\n", &sClearPageQueue, 884 sClearPageQueue.Count()); 885 kprintf("modified queue: %p, count = %ld (%ld temporary, %lu swappable, " 886 "inactive: %lu)\n", &sModifiedPageQueue, sModifiedPageQueue.Count(), 887 sModifiedTemporaryPages, swappableModified, swappableModifiedInactive); 888 kprintf("active queue: %p, count = %ld\n", &sActivePageQueue, 889 sActivePageQueue.Count()); 890 kprintf("inactive queue: %p, count = %ld\n", &sInactivePageQueue, 891 sInactivePageQueue.Count()); 892 kprintf("cached queue: %p, count = %ld\n", &sCachedPageQueue, 893 sCachedPageQueue.Count()); 894 return 0; 895 } 896 897 898 #ifdef TRACK_PAGE_USAGE_STATS 899 900 static void 901 track_page_usage(vm_page* page) 902 { 903 if (page->wired_count == 0) { 904 sNextPageUsage[(int32)page->usage_count + 128]++; 905 sNextPageUsagePageCount++; 906 } 907 } 908 909 910 static void 911 update_page_usage_stats() 912 { 913 std::swap(sPageUsage, sNextPageUsage); 914 sPageUsagePageCount = sNextPageUsagePageCount; 915 916 memset(sNextPageUsage, 0, sizeof(page_num_t) * 256); 917 sNextPageUsagePageCount = 0; 918 919 // compute average 920 if (sPageUsagePageCount > 0) { 921 int64 sum = 0; 922 for (int32 i = 0; i < 256; i++) 923 sum += (int64)sPageUsage[i] * (i - 128); 924 925 TRACE_DAEMON("average page usage: %f (%lu pages)\n", 926 (float)sum / sPageUsagePageCount, sPageUsagePageCount); 927 } 928 } 929 930 931 static int 932 dump_page_usage_stats(int argc, char** argv) 933 { 934 kprintf("distribution of page usage counts (%lu pages):", 935 sPageUsagePageCount); 936 937 int64 sum = 0; 938 for (int32 i = 0; i < 256; i++) { 939 if (i % 8 == 0) 940 kprintf("\n%4ld:", i - 128); 941 942 int64 count = sPageUsage[i]; 943 sum += count * (i - 128); 944 945 kprintf(" %9llu", count); 946 } 947 948 kprintf("\n\n"); 949 950 kprintf("average usage count: %f\n", 951 sPageUsagePageCount > 0 ? (float)sum / sPageUsagePageCount : 0); 952 953 return 0; 954 } 955 956 #endif // TRACK_PAGE_USAGE_STATS 957 958 959 // #pragma mark - vm_page 960 961 962 inline void 963 vm_page::InitState(uint8 newState) 964 { 965 state = newState; 966 } 967 968 969 inline void 970 vm_page::SetState(uint8 newState) 971 { 972 TPS(SetPageState(this, newState)); 973 974 state = newState; 975 } 976 977 978 // #pragma mark - 979 980 981 static void 982 get_page_stats(page_stats& _pageStats) 983 { 984 _pageStats.totalFreePages = sUnreservedFreePages; 985 _pageStats.cachedPages = sCachedPageQueue.Count(); 986 _pageStats.unsatisfiedReservations = sUnsatisfiedPageReservations; 987 // TODO: We don't get an actual snapshot here! 988 } 989 990 991 static bool 992 do_active_paging(const page_stats& pageStats) 993 { 994 return pageStats.totalFreePages + pageStats.cachedPages 995 < pageStats.unsatisfiedReservations 996 + (int32)sFreeOrCachedPagesTarget; 997 } 998 999 1000 /*! Reserves as many pages as possible from \c sUnreservedFreePages up to 1001 \a count. Doesn't touch the last \a dontTouch pages of 1002 \c sUnreservedFreePages, though. 1003 \return The number of actually reserved pages. 1004 */ 1005 static uint32 1006 reserve_some_pages(uint32 count, uint32 dontTouch) 1007 { 1008 while (true) { 1009 int32 freePages = sUnreservedFreePages; 1010 if (freePages <= (int32)dontTouch) 1011 return 0; 1012 1013 int32 toReserve = std::min(count, freePages - dontTouch); 1014 if (atomic_test_and_set(&sUnreservedFreePages, 1015 freePages - toReserve, freePages) 1016 == freePages) { 1017 return toReserve; 1018 } 1019 1020 // the count changed in the meantime -- retry 1021 } 1022 } 1023 1024 1025 static void 1026 wake_up_page_reservation_waiters() 1027 { 1028 MutexLocker pageDeficitLocker(sPageDeficitLock); 1029 1030 // TODO: If this is a low priority thread, we might want to disable 1031 // interrupts or otherwise ensure that we aren't unscheduled. Otherwise 1032 // high priority threads wait be kept waiting while a medium priority thread 1033 // prevents us from running. 1034 1035 while (PageReservationWaiter* waiter = sPageReservationWaiters.Head()) { 1036 int32 reserved = reserve_some_pages(waiter->missing, 1037 waiter->dontTouch); 1038 if (reserved == 0) 1039 return; 1040 1041 atomic_add(&sUnsatisfiedPageReservations, -reserved); 1042 waiter->missing -= reserved; 1043 1044 if (waiter->missing > 0) 1045 return; 1046 1047 sPageReservationWaiters.Remove(waiter); 1048 1049 InterruptsSpinLocker threadLocker(gThreadSpinlock); 1050 thread_unblock_locked(waiter->thread, B_OK); 1051 } 1052 } 1053 1054 1055 static inline void 1056 unreserve_pages(uint32 count) 1057 { 1058 atomic_add(&sUnreservedFreePages, count); 1059 if (sUnsatisfiedPageReservations != 0) 1060 wake_up_page_reservation_waiters(); 1061 } 1062 1063 1064 static void 1065 free_page(vm_page* page, bool clear) 1066 { 1067 DEBUG_PAGE_ACCESS_CHECK(page); 1068 1069 PAGE_ASSERT(page, !page->IsMapped()); 1070 1071 VMPageQueue* fromQueue; 1072 1073 switch (page->State()) { 1074 case PAGE_STATE_ACTIVE: 1075 fromQueue = &sActivePageQueue; 1076 break; 1077 case PAGE_STATE_INACTIVE: 1078 fromQueue = &sInactivePageQueue; 1079 break; 1080 case PAGE_STATE_MODIFIED: 1081 fromQueue = &sModifiedPageQueue; 1082 break; 1083 case PAGE_STATE_CACHED: 1084 fromQueue = &sCachedPageQueue; 1085 break; 1086 case PAGE_STATE_FREE: 1087 case PAGE_STATE_CLEAR: 1088 panic("free_page(): page %p already free", page); 1089 return; 1090 case PAGE_STATE_WIRED: 1091 case PAGE_STATE_UNUSED: 1092 fromQueue = NULL; 1093 break; 1094 default: 1095 panic("free_page(): page %p in invalid state %d", 1096 page, page->State()); 1097 return; 1098 } 1099 1100 if (page->CacheRef() != NULL) 1101 panic("to be freed page %p has cache", page); 1102 if (page->IsMapped()) 1103 panic("to be freed page %p has mappings", page); 1104 1105 if (fromQueue != NULL) 1106 fromQueue->RemoveUnlocked(page); 1107 1108 TA(FreePage()); 1109 1110 ReadLocker locker(sFreePageQueuesLock); 1111 1112 DEBUG_PAGE_ACCESS_END(page); 1113 1114 if (clear) { 1115 page->SetState(PAGE_STATE_CLEAR); 1116 sClearPageQueue.PrependUnlocked(page); 1117 } else { 1118 page->SetState(PAGE_STATE_FREE); 1119 sFreePageQueue.PrependUnlocked(page); 1120 } 1121 1122 locker.Unlock(); 1123 1124 unreserve_pages(1); 1125 } 1126 1127 1128 /*! The caller must make sure that no-one else tries to change the page's state 1129 while the function is called. If the page has a cache, this can be done by 1130 locking the cache. 1131 */ 1132 static void 1133 set_page_state(vm_page *page, int pageState) 1134 { 1135 DEBUG_PAGE_ACCESS_CHECK(page); 1136 1137 if (pageState == page->State()) 1138 return; 1139 1140 VMPageQueue* fromQueue; 1141 1142 switch (page->State()) { 1143 case PAGE_STATE_ACTIVE: 1144 fromQueue = &sActivePageQueue; 1145 break; 1146 case PAGE_STATE_INACTIVE: 1147 fromQueue = &sInactivePageQueue; 1148 break; 1149 case PAGE_STATE_MODIFIED: 1150 fromQueue = &sModifiedPageQueue; 1151 break; 1152 case PAGE_STATE_CACHED: 1153 fromQueue = &sCachedPageQueue; 1154 break; 1155 case PAGE_STATE_FREE: 1156 case PAGE_STATE_CLEAR: 1157 panic("set_page_state(): page %p is free/clear", page); 1158 return; 1159 case PAGE_STATE_WIRED: 1160 case PAGE_STATE_UNUSED: 1161 fromQueue = NULL; 1162 break; 1163 default: 1164 panic("set_page_state(): page %p in invalid state %d", 1165 page, page->State()); 1166 return; 1167 } 1168 1169 VMPageQueue* toQueue; 1170 1171 switch (pageState) { 1172 case PAGE_STATE_ACTIVE: 1173 toQueue = &sActivePageQueue; 1174 break; 1175 case PAGE_STATE_INACTIVE: 1176 toQueue = &sInactivePageQueue; 1177 break; 1178 case PAGE_STATE_MODIFIED: 1179 toQueue = &sModifiedPageQueue; 1180 break; 1181 case PAGE_STATE_CACHED: 1182 PAGE_ASSERT(page, !page->IsMapped()); 1183 PAGE_ASSERT(page, !page->modified); 1184 toQueue = &sCachedPageQueue; 1185 break; 1186 case PAGE_STATE_FREE: 1187 case PAGE_STATE_CLEAR: 1188 panic("set_page_state(): target state is free/clear"); 1189 return; 1190 case PAGE_STATE_WIRED: 1191 case PAGE_STATE_UNUSED: 1192 toQueue = NULL; 1193 break; 1194 default: 1195 panic("set_page_state(): invalid target state %d", pageState); 1196 return; 1197 } 1198 1199 VMCache* cache = page->Cache(); 1200 if (cache != NULL && cache->temporary) { 1201 if (pageState == PAGE_STATE_MODIFIED) 1202 atomic_add(&sModifiedTemporaryPages, 1); 1203 else if (page->State() == PAGE_STATE_MODIFIED) 1204 atomic_add(&sModifiedTemporaryPages, -1); 1205 } 1206 1207 // move the page 1208 if (toQueue == fromQueue) { 1209 // Note: Theoretically we are required to lock when changing the page 1210 // state, even if we don't change the queue. We actually don't have to 1211 // do this, though, since only for the active queue there are different 1212 // page states and active pages have a cache that must be locked at 1213 // this point. So we rely on the fact that everyone must lock the cache 1214 // before trying to change/interpret the page state. 1215 PAGE_ASSERT(page, cache != NULL); 1216 cache->AssertLocked(); 1217 page->SetState(pageState); 1218 } else { 1219 if (fromQueue != NULL) 1220 fromQueue->RemoveUnlocked(page); 1221 1222 page->SetState(pageState); 1223 1224 if (toQueue != NULL) 1225 toQueue->AppendUnlocked(page); 1226 } 1227 } 1228 1229 1230 /*! Moves a previously modified page into a now appropriate queue. 1231 The page queues must not be locked. 1232 */ 1233 static void 1234 move_page_to_appropriate_queue(vm_page *page) 1235 { 1236 DEBUG_PAGE_ACCESS_CHECK(page); 1237 1238 // Note, this logic must be in sync with what the page daemon does. 1239 int32 state; 1240 if (page->IsMapped()) 1241 state = PAGE_STATE_ACTIVE; 1242 else if (page->modified) 1243 state = PAGE_STATE_MODIFIED; 1244 else 1245 state = PAGE_STATE_CACHED; 1246 1247 // TODO: If free + cached pages are low, we might directly want to free the 1248 // page. 1249 set_page_state(page, state); 1250 } 1251 1252 1253 static void 1254 clear_page(struct vm_page *page) 1255 { 1256 vm_memset_physical(page->physical_page_number << PAGE_SHIFT, 0, 1257 B_PAGE_SIZE); 1258 } 1259 1260 1261 static status_t 1262 mark_page_range_in_use(addr_t startPage, addr_t length, bool wired) 1263 { 1264 TRACE(("mark_page_range_in_use: start 0x%lx, len 0x%lx\n", 1265 startPage, length)); 1266 1267 if (sPhysicalPageOffset > startPage) { 1268 TRACE(("mark_page_range_in_use: start page %ld is before free list\n", 1269 startPage)); 1270 return B_BAD_VALUE; 1271 } 1272 startPage -= sPhysicalPageOffset; 1273 if (startPage + length > sNumPages) { 1274 TRACE(("mark_page_range_in_use: range would extend past free list\n")); 1275 return B_BAD_VALUE; 1276 } 1277 1278 WriteLocker locker(sFreePageQueuesLock); 1279 1280 for (addr_t i = 0; i < length; i++) { 1281 vm_page *page = &sPages[startPage + i]; 1282 switch (page->State()) { 1283 case PAGE_STATE_FREE: 1284 case PAGE_STATE_CLEAR: 1285 { 1286 // TODO: This violates the page reservation policy, since we remove pages from 1287 // the free/clear queues without having reserved them before. This should happen 1288 // in the early boot process only, though. 1289 DEBUG_PAGE_ACCESS_START(page); 1290 VMPageQueue& queue = page->State() == PAGE_STATE_FREE 1291 ? sFreePageQueue : sClearPageQueue; 1292 queue.Remove(page); 1293 page->SetState(wired ? PAGE_STATE_UNUSED : PAGE_STATE_UNUSED); 1294 page->busy = false; 1295 atomic_add(&sUnreservedFreePages, -1); 1296 DEBUG_PAGE_ACCESS_END(page); 1297 break; 1298 } 1299 case PAGE_STATE_WIRED: 1300 case PAGE_STATE_UNUSED: 1301 break; 1302 case PAGE_STATE_ACTIVE: 1303 case PAGE_STATE_INACTIVE: 1304 case PAGE_STATE_MODIFIED: 1305 case PAGE_STATE_CACHED: 1306 default: 1307 // uh 1308 dprintf("mark_page_range_in_use: page 0x%lx in non-free state %d!\n", 1309 startPage + i, page->State()); 1310 break; 1311 } 1312 } 1313 1314 return B_OK; 1315 } 1316 1317 1318 /*! 1319 This is a background thread that wakes up every now and then (every 100ms) 1320 and moves some pages from the free queue over to the clear queue. 1321 Given enough time, it will clear out all pages from the free queue - we 1322 could probably slow it down after having reached a certain threshold. 1323 */ 1324 static int32 1325 page_scrubber(void *unused) 1326 { 1327 (void)(unused); 1328 1329 TRACE(("page_scrubber starting...\n")); 1330 1331 for (;;) { 1332 snooze(100000); // 100ms 1333 1334 if (sFreePageQueue.Count() == 0 1335 || sUnreservedFreePages < (int32)sFreePagesTarget) { 1336 continue; 1337 } 1338 1339 // Since we temporarily remove pages from the free pages reserve, 1340 // we must make sure we don't cause a violation of the page 1341 // reservation warranty. The following is usually stricter than 1342 // necessary, because we don't have information on how many of the 1343 // reserved pages have already been allocated. 1344 int32 reserved = reserve_some_pages(SCRUB_SIZE, 1345 kPageReserveForPriority[VM_PRIORITY_USER]); 1346 if (reserved == 0) 1347 continue; 1348 1349 // get some pages from the free queue 1350 ReadLocker locker(sFreePageQueuesLock); 1351 1352 vm_page *page[SCRUB_SIZE]; 1353 int32 scrubCount = 0; 1354 for (int32 i = 0; i < reserved; i++) { 1355 page[i] = sFreePageQueue.RemoveHeadUnlocked(); 1356 if (page[i] == NULL) 1357 break; 1358 1359 DEBUG_PAGE_ACCESS_START(page[i]); 1360 1361 page[i]->SetState(PAGE_STATE_ACTIVE); 1362 page[i]->busy = true; 1363 scrubCount++; 1364 } 1365 1366 locker.Unlock(); 1367 1368 if (scrubCount == 0) { 1369 unreserve_pages(reserved); 1370 continue; 1371 } 1372 1373 TA(ScrubbingPages(scrubCount)); 1374 1375 // clear them 1376 for (int32 i = 0; i < scrubCount; i++) 1377 clear_page(page[i]); 1378 1379 locker.Lock(); 1380 1381 // and put them into the clear queue 1382 for (int32 i = 0; i < scrubCount; i++) { 1383 page[i]->SetState(PAGE_STATE_CLEAR); 1384 page[i]->busy = false; 1385 DEBUG_PAGE_ACCESS_END(page[i]); 1386 sClearPageQueue.PrependUnlocked(page[i]); 1387 } 1388 1389 locker.Unlock(); 1390 1391 unreserve_pages(reserved); 1392 1393 TA(ScrubbedPages(scrubCount)); 1394 } 1395 1396 return 0; 1397 } 1398 1399 1400 static void 1401 init_page_marker(vm_page &marker) 1402 { 1403 marker.SetCacheRef(NULL); 1404 marker.InitState(PAGE_STATE_UNUSED); 1405 marker.busy = true; 1406 #if DEBUG_PAGE_QUEUE 1407 marker.queue = NULL; 1408 #endif 1409 #if DEBUG_PAGE_ACCESS 1410 marker.accessing_thread = thread_get_current_thread_id(); 1411 #endif 1412 } 1413 1414 1415 static void 1416 remove_page_marker(struct vm_page &marker) 1417 { 1418 DEBUG_PAGE_ACCESS_CHECK(&marker); 1419 1420 if (marker.State() < PAGE_STATE_FIRST_UNQUEUED) 1421 sPageQueues[marker.State()].RemoveUnlocked(&marker); 1422 1423 marker.SetState(PAGE_STATE_UNUSED); 1424 } 1425 1426 1427 static vm_page * 1428 next_modified_page(struct vm_page &marker) 1429 { 1430 InterruptsSpinLocker locker(sModifiedPageQueue.GetLock()); 1431 vm_page *page; 1432 1433 DEBUG_PAGE_ACCESS_CHECK(&marker); 1434 1435 if (marker.State() == PAGE_STATE_MODIFIED) { 1436 page = sModifiedPageQueue.Next(&marker); 1437 sModifiedPageQueue.Remove(&marker); 1438 marker.SetState(PAGE_STATE_UNUSED); 1439 } else 1440 page = sModifiedPageQueue.Head(); 1441 1442 for (; page != NULL; page = sModifiedPageQueue.Next(page)) { 1443 if (!page->busy) { 1444 // insert marker 1445 marker.SetState(PAGE_STATE_MODIFIED); 1446 sModifiedPageQueue.InsertAfter(page, &marker); 1447 return page; 1448 } 1449 } 1450 1451 return NULL; 1452 } 1453 1454 1455 // #pragma mark - 1456 1457 1458 class PageWriteTransfer; 1459 class PageWriteWrapper; 1460 1461 1462 class PageWriterRun { 1463 public: 1464 status_t Init(uint32 maxPages); 1465 1466 void PrepareNextRun(); 1467 void AddPage(vm_page* page); 1468 void Go(); 1469 1470 void PageWritten(PageWriteTransfer* transfer, status_t status, 1471 bool partialTransfer, size_t bytesTransferred); 1472 1473 private: 1474 uint32 fMaxPages; 1475 uint32 fWrapperCount; 1476 uint32 fTransferCount; 1477 vint32 fPendingTransfers; 1478 PageWriteWrapper* fWrappers; 1479 PageWriteTransfer* fTransfers; 1480 ConditionVariable fAllFinishedCondition; 1481 }; 1482 1483 1484 class PageWriteTransfer : public AsyncIOCallback { 1485 public: 1486 void SetTo(PageWriterRun* run, vm_page* page, int32 maxPages); 1487 bool AddPage(vm_page* page); 1488 1489 status_t Schedule(uint32 flags); 1490 1491 void SetStatus(status_t status, size_t transferred); 1492 1493 status_t Status() const { return fStatus; } 1494 struct VMCache* Cache() const { return fCache; } 1495 uint32 PageCount() const { return fPageCount; } 1496 1497 virtual void IOFinished(status_t status, bool partialTransfer, 1498 size_t bytesTransferred); 1499 private: 1500 PageWriterRun* fRun; 1501 struct VMCache* fCache; 1502 off_t fOffset; 1503 uint32 fPageCount; 1504 int32 fMaxPages; 1505 status_t fStatus; 1506 uint32 fVecCount; 1507 iovec fVecs[32]; // TODO: make dynamic/configurable 1508 }; 1509 1510 1511 class PageWriteWrapper { 1512 public: 1513 PageWriteWrapper(); 1514 ~PageWriteWrapper(); 1515 void SetTo(vm_page* page); 1516 void Done(status_t result); 1517 1518 private: 1519 vm_page* fPage; 1520 struct VMCache* fCache; 1521 bool fIsActive; 1522 }; 1523 1524 1525 PageWriteWrapper::PageWriteWrapper() 1526 : 1527 fIsActive(false) 1528 { 1529 } 1530 1531 1532 PageWriteWrapper::~PageWriteWrapper() 1533 { 1534 if (fIsActive) 1535 panic("page write wrapper going out of scope but isn't completed"); 1536 } 1537 1538 1539 /*! The page's cache must be locked. 1540 */ 1541 void 1542 PageWriteWrapper::SetTo(vm_page* page) 1543 { 1544 DEBUG_PAGE_ACCESS_CHECK(page); 1545 1546 if (page->busy) 1547 panic("setting page write wrapper to busy page"); 1548 1549 if (fIsActive) 1550 panic("re-setting page write wrapper that isn't completed"); 1551 1552 fPage = page; 1553 fCache = page->Cache(); 1554 fIsActive = true; 1555 1556 fPage->busy = true; 1557 fPage->busy_writing = true; 1558 1559 // We have a modified page -- however, while we're writing it back, 1560 // the page might still be mapped. In order not to lose any changes to the 1561 // page, we mark it clean before actually writing it back; if 1562 // writing the page fails for some reason, we'll just keep it in the 1563 // modified page list, but that should happen only rarely. 1564 1565 // If the page is changed after we cleared the dirty flag, but before we 1566 // had the chance to write it back, then we'll write it again later -- that 1567 // will probably not happen that often, though. 1568 1569 vm_clear_map_flags(fPage, PAGE_MODIFIED); 1570 } 1571 1572 1573 /*! The page's cache must be locked. 1574 The page queues must not be locked. 1575 */ 1576 void 1577 PageWriteWrapper::Done(status_t result) 1578 { 1579 if (!fIsActive) 1580 panic("completing page write wrapper that is not active"); 1581 1582 DEBUG_PAGE_ACCESS_START(fPage); 1583 1584 fPage->busy = false; 1585 // Set unbusy and notify later by hand, since we might free the page. 1586 1587 if (result == B_OK) { 1588 // put it into the active/inactive queue 1589 move_page_to_appropriate_queue(fPage); 1590 fPage->busy_writing = false; 1591 DEBUG_PAGE_ACCESS_END(fPage); 1592 } else { 1593 // Writing the page failed. One reason would be that the cache has been 1594 // shrunk and the page does no longer belong to the file. Otherwise the 1595 // actual I/O failed, in which case we'll simply keep the page modified. 1596 1597 if (!fPage->busy_writing) { 1598 // The busy_writing flag was cleared. That means the cache has been 1599 // shrunk while we were trying to write the page and we have to free 1600 // it now. 1601 vm_remove_all_page_mappings(fPage); 1602 // TODO: Unmapping should already happen when resizing the cache! 1603 fCache->RemovePage(fPage); 1604 free_page(fPage, false); 1605 } else { 1606 // Writing the page failed -- mark the page modified and move it to 1607 // an appropriate queue other than the modified queue, so we don't 1608 // keep trying to write it over and over again. We keep 1609 // non-temporary pages in the modified queue, though, so they don't 1610 // get lost in the inactive queue. 1611 dprintf("PageWriteWrapper: Failed to write page %p: %s\n", fPage, 1612 strerror(result)); 1613 1614 fPage->modified = true; 1615 if (!fCache->temporary) 1616 set_page_state(fPage, PAGE_STATE_MODIFIED); 1617 else if (fPage->IsMapped()) 1618 set_page_state(fPage, PAGE_STATE_ACTIVE); 1619 else 1620 set_page_state(fPage, PAGE_STATE_INACTIVE); 1621 1622 fPage->busy_writing = false; 1623 DEBUG_PAGE_ACCESS_END(fPage); 1624 } 1625 } 1626 1627 fCache->NotifyPageEvents(fPage, PAGE_EVENT_NOT_BUSY); 1628 fIsActive = false; 1629 } 1630 1631 1632 /*! The page's cache must be locked. 1633 */ 1634 void 1635 PageWriteTransfer::SetTo(PageWriterRun* run, vm_page* page, int32 maxPages) 1636 { 1637 fRun = run; 1638 fCache = page->Cache(); 1639 fOffset = page->cache_offset; 1640 fPageCount = 1; 1641 fMaxPages = maxPages; 1642 fStatus = B_OK; 1643 1644 fVecs[0].iov_base = (void*)(page->physical_page_number << PAGE_SHIFT); 1645 fVecs[0].iov_len = B_PAGE_SIZE; 1646 fVecCount = 1; 1647 } 1648 1649 1650 /*! The page's cache must be locked. 1651 */ 1652 bool 1653 PageWriteTransfer::AddPage(vm_page* page) 1654 { 1655 if (page->Cache() != fCache 1656 || (fMaxPages >= 0 && fPageCount >= (uint32)fMaxPages)) 1657 return false; 1658 1659 addr_t nextBase 1660 = (addr_t)fVecs[fVecCount - 1].iov_base + fVecs[fVecCount - 1].iov_len; 1661 1662 if (page->physical_page_number << PAGE_SHIFT == nextBase 1663 && page->cache_offset == fOffset + fPageCount) { 1664 // append to last iovec 1665 fVecs[fVecCount - 1].iov_len += B_PAGE_SIZE; 1666 fPageCount++; 1667 return true; 1668 } 1669 1670 nextBase = (addr_t)fVecs[0].iov_base - B_PAGE_SIZE; 1671 if (page->physical_page_number << PAGE_SHIFT == nextBase 1672 && page->cache_offset == fOffset - 1) { 1673 // prepend to first iovec and adjust offset 1674 fVecs[0].iov_base = (void*)nextBase; 1675 fVecs[0].iov_len += B_PAGE_SIZE; 1676 fOffset = page->cache_offset; 1677 fPageCount++; 1678 return true; 1679 } 1680 1681 if ((page->cache_offset == fOffset + fPageCount 1682 || page->cache_offset == fOffset - 1) 1683 && fVecCount < sizeof(fVecs) / sizeof(fVecs[0])) { 1684 // not physically contiguous or not in the right order 1685 uint32 vectorIndex; 1686 if (page->cache_offset < fOffset) { 1687 // we are pre-pending another vector, move the other vecs 1688 for (uint32 i = fVecCount; i > 0; i--) 1689 fVecs[i] = fVecs[i - 1]; 1690 1691 fOffset = page->cache_offset; 1692 vectorIndex = 0; 1693 } else 1694 vectorIndex = fVecCount; 1695 1696 fVecs[vectorIndex].iov_base 1697 = (void*)(page->physical_page_number << PAGE_SHIFT); 1698 fVecs[vectorIndex].iov_len = B_PAGE_SIZE; 1699 1700 fVecCount++; 1701 fPageCount++; 1702 return true; 1703 } 1704 1705 return false; 1706 } 1707 1708 1709 status_t 1710 PageWriteTransfer::Schedule(uint32 flags) 1711 { 1712 off_t writeOffset = (off_t)fOffset << PAGE_SHIFT; 1713 size_t writeLength = fPageCount << PAGE_SHIFT; 1714 1715 if (fRun != NULL) { 1716 return fCache->WriteAsync(writeOffset, fVecs, fVecCount, writeLength, 1717 flags | B_PHYSICAL_IO_REQUEST, this); 1718 } 1719 1720 status_t status = fCache->Write(writeOffset, fVecs, fVecCount, 1721 flags | B_PHYSICAL_IO_REQUEST, &writeLength); 1722 1723 SetStatus(status, writeLength); 1724 return fStatus; 1725 } 1726 1727 1728 void 1729 PageWriteTransfer::SetStatus(status_t status, size_t transferred) 1730 { 1731 // only succeed if all pages up to the last one have been written fully 1732 // and the last page has at least been written partially 1733 if (status == B_OK && transferred <= (fPageCount - 1) * B_PAGE_SIZE) 1734 status = B_ERROR; 1735 1736 fStatus = status; 1737 } 1738 1739 1740 void 1741 PageWriteTransfer::IOFinished(status_t status, bool partialTransfer, 1742 size_t bytesTransferred) 1743 { 1744 SetStatus(status, bytesTransferred); 1745 fRun->PageWritten(this, fStatus, partialTransfer, bytesTransferred); 1746 } 1747 1748 1749 status_t 1750 PageWriterRun::Init(uint32 maxPages) 1751 { 1752 fMaxPages = maxPages; 1753 fWrapperCount = 0; 1754 fTransferCount = 0; 1755 fPendingTransfers = 0; 1756 1757 fWrappers = new(std::nothrow) PageWriteWrapper[maxPages]; 1758 fTransfers = new(std::nothrow) PageWriteTransfer[maxPages]; 1759 if (fWrappers == NULL || fTransfers == NULL) 1760 return B_NO_MEMORY; 1761 1762 return B_OK; 1763 } 1764 1765 1766 void 1767 PageWriterRun::PrepareNextRun() 1768 { 1769 fWrapperCount = 0; 1770 fTransferCount = 0; 1771 fPendingTransfers = 0; 1772 } 1773 1774 1775 /*! The page's cache must be locked. 1776 */ 1777 void 1778 PageWriterRun::AddPage(vm_page* page) 1779 { 1780 fWrappers[fWrapperCount++].SetTo(page); 1781 1782 if (fTransferCount == 0 || !fTransfers[fTransferCount - 1].AddPage(page)) { 1783 fTransfers[fTransferCount++].SetTo(this, page, 1784 page->Cache()->MaxPagesPerAsyncWrite()); 1785 } 1786 } 1787 1788 1789 void 1790 PageWriterRun::Go() 1791 { 1792 fPendingTransfers = fTransferCount; 1793 1794 fAllFinishedCondition.Init(this, "page writer wait for I/O"); 1795 ConditionVariableEntry waitEntry; 1796 fAllFinishedCondition.Add(&waitEntry); 1797 1798 // schedule writes 1799 for (uint32 i = 0; i < fTransferCount; i++) 1800 fTransfers[i].Schedule(B_VIP_IO_REQUEST); 1801 1802 // wait until all pages have been written 1803 waitEntry.Wait(); 1804 1805 // mark pages depending on whether they could be written or not 1806 1807 uint32 wrapperIndex = 0; 1808 for (uint32 i = 0; i < fTransferCount; i++) { 1809 PageWriteTransfer& transfer = fTransfers[i]; 1810 transfer.Cache()->Lock(); 1811 1812 for (uint32 j = 0; j < transfer.PageCount(); j++) 1813 fWrappers[wrapperIndex++].Done(transfer.Status()); 1814 1815 transfer.Cache()->Unlock(); 1816 } 1817 1818 ASSERT(wrapperIndex == fWrapperCount); 1819 1820 for (uint32 i = 0; i < fTransferCount; i++) { 1821 PageWriteTransfer& transfer = fTransfers[i]; 1822 struct VMCache* cache = transfer.Cache(); 1823 1824 // We've acquired a references for each page 1825 for (uint32 j = 0; j < transfer.PageCount(); j++) { 1826 // We release the cache references after all pages were made 1827 // unbusy again - otherwise releasing a vnode could deadlock. 1828 cache->ReleaseStoreRef(); 1829 cache->ReleaseRef(); 1830 } 1831 } 1832 } 1833 1834 1835 void 1836 PageWriterRun::PageWritten(PageWriteTransfer* transfer, status_t status, 1837 bool partialTransfer, size_t bytesTransferred) 1838 { 1839 if (atomic_add(&fPendingTransfers, -1) == 1) 1840 fAllFinishedCondition.NotifyAll(); 1841 } 1842 1843 1844 /*! The page writer continuously takes some pages from the modified 1845 queue, writes them back, and moves them back to the active queue. 1846 It runs in its own thread, and is only there to keep the number 1847 of modified pages low, so that more pages can be reused with 1848 fewer costs. 1849 */ 1850 status_t 1851 page_writer(void* /*unused*/) 1852 { 1853 const uint32 kNumPages = 256; 1854 uint32 writtenPages = 0; 1855 bigtime_t lastWrittenTime = 0; 1856 bigtime_t pageCollectionTime = 0; 1857 bigtime_t pageWritingTime = 0; 1858 1859 PageWriterRun run; 1860 if (run.Init(kNumPages) != B_OK) { 1861 panic("page writer: Failed to init PageWriterRun!"); 1862 return B_ERROR; 1863 } 1864 1865 vm_page marker; 1866 init_page_marker(marker); 1867 1868 while (true) { 1869 // TODO: Maybe wait shorter when memory is low! 1870 if (sModifiedPageQueue.Count() < kNumPages) { 1871 sPageWriterCondition.Wait(3000000, true); 1872 // all 3 seconds when no one triggers us 1873 } 1874 1875 int32 modifiedPages = sModifiedPageQueue.Count(); 1876 if (modifiedPages == 0) 1877 continue; 1878 1879 #if ENABLE_SWAP_SUPPORT 1880 page_stats pageStats; 1881 get_page_stats(pageStats); 1882 bool activePaging = do_active_paging(pageStats); 1883 #endif 1884 1885 // depending on how urgent it becomes to get pages to disk, we adjust 1886 // our I/O priority 1887 uint32 lowPagesState = low_resource_state(B_KERNEL_RESOURCE_PAGES); 1888 int32 ioPriority = B_IDLE_PRIORITY; 1889 if (lowPagesState >= B_LOW_RESOURCE_CRITICAL 1890 || modifiedPages > MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD) { 1891 ioPriority = MAX_PAGE_WRITER_IO_PRIORITY; 1892 } else { 1893 ioPriority = (uint64)MAX_PAGE_WRITER_IO_PRIORITY * modifiedPages 1894 / MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD; 1895 } 1896 1897 thread_set_io_priority(ioPriority); 1898 1899 uint32 numPages = 0; 1900 run.PrepareNextRun(); 1901 1902 // TODO: make this laptop friendly, too (ie. only start doing 1903 // something if someone else did something or there is really 1904 // enough to do). 1905 1906 // collect pages to be written 1907 pageCollectionTime -= system_time(); 1908 1909 while (numPages < kNumPages) { 1910 vm_page *page = next_modified_page(marker); 1911 if (page == NULL) 1912 break; 1913 1914 PageCacheLocker cacheLocker(page, false); 1915 if (!cacheLocker.IsLocked()) 1916 continue; 1917 1918 VMCache *cache = page->Cache(); 1919 1920 // If the page is busy or its state has changed while we were 1921 // locking the cache, just ignore it. 1922 if (page->busy || page->State() != PAGE_STATE_MODIFIED) 1923 continue; 1924 1925 DEBUG_PAGE_ACCESS_START(page); 1926 1927 // Don't write back wired (locked) pages. 1928 if (page->wired_count > 0) { 1929 set_page_state(page, PAGE_STATE_ACTIVE); 1930 DEBUG_PAGE_ACCESS_END(page); 1931 continue; 1932 } 1933 1934 // Write back temporary pages only when we're actively paging. 1935 if (cache->temporary 1936 #if ENABLE_SWAP_SUPPORT 1937 && (!activePaging 1938 || !cache->CanWritePage( 1939 (off_t)page->cache_offset << PAGE_SHIFT)) 1940 #endif 1941 ) { 1942 // We can't/don't want to do anything with this page, so move it 1943 // to one of the other queues. 1944 if (page->mappings.IsEmpty()) 1945 set_page_state(page, PAGE_STATE_INACTIVE); 1946 else 1947 set_page_state(page, PAGE_STATE_ACTIVE); 1948 1949 DEBUG_PAGE_ACCESS_END(page); 1950 continue; 1951 } 1952 1953 // We need our own reference to the store, as it might currently be 1954 // destroyed. 1955 if (cache->AcquireUnreferencedStoreRef() != B_OK) { 1956 DEBUG_PAGE_ACCESS_END(page); 1957 cacheLocker.Unlock(); 1958 thread_yield(true); 1959 continue; 1960 } 1961 1962 run.AddPage(page); 1963 1964 DEBUG_PAGE_ACCESS_END(page); 1965 1966 //dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count); 1967 TPW(WritePage(page)); 1968 1969 cache->AcquireRefLocked(); 1970 numPages++; 1971 } 1972 1973 pageCollectionTime += system_time(); 1974 1975 if (numPages == 0) 1976 continue; 1977 1978 // write pages to disk and do all the cleanup 1979 pageWritingTime -= system_time(); 1980 run.Go(); 1981 pageWritingTime += system_time(); 1982 1983 // debug output only... 1984 writtenPages += numPages; 1985 if (writtenPages >= 1024) { 1986 bigtime_t now = system_time(); 1987 TRACE(("page writer: wrote 1024 pages (total: %llu ms, " 1988 "collect: %llu ms, write: %llu ms)\n", 1989 (now - lastWrittenTime) / 1000, 1990 pageCollectionTime / 1000, pageWritingTime / 1000)); 1991 writtenPages -= 1024; 1992 lastWrittenTime = now; 1993 pageCollectionTime = 0; 1994 pageWritingTime = 0; 1995 } 1996 } 1997 1998 remove_page_marker(marker); 1999 return B_OK; 2000 } 2001 2002 2003 // #pragma mark - 2004 2005 2006 // TODO: This should be done in the page daemon! 2007 #if 0 2008 #if ENABLE_SWAP_SUPPORT 2009 static bool 2010 free_page_swap_space(int32 index) 2011 { 2012 vm_page *page = vm_page_at_index(index); 2013 PageCacheLocker locker(page); 2014 if (!locker.IsLocked()) 2015 return false; 2016 2017 DEBUG_PAGE_ACCESS_START(page); 2018 2019 VMCache* cache = page->Cache(); 2020 if (cache->temporary && page->wired_count == 0 2021 && cache->HasPage(page->cache_offset << PAGE_SHIFT) 2022 && page->usage_count > 0) { 2023 // TODO: how to judge a page is highly active? 2024 if (swap_free_page_swap_space(page)) { 2025 // We need to mark the page modified, since otherwise it could be 2026 // stolen and we'd lose its data. 2027 vm_page_set_state(page, PAGE_STATE_MODIFIED); 2028 TD(FreedPageSwap(page)); 2029 DEBUG_PAGE_ACCESS_END(page); 2030 return true; 2031 } 2032 } 2033 DEBUG_PAGE_ACCESS_END(page); 2034 return false; 2035 } 2036 #endif 2037 #endif // 0 2038 2039 2040 static vm_page * 2041 find_cached_page_candidate(struct vm_page &marker) 2042 { 2043 DEBUG_PAGE_ACCESS_CHECK(&marker); 2044 2045 InterruptsSpinLocker locker(sCachedPageQueue.GetLock()); 2046 vm_page *page; 2047 2048 if (marker.State() == PAGE_STATE_UNUSED) { 2049 // Get the first free pages of the (in)active queue 2050 page = sCachedPageQueue.Head(); 2051 } else { 2052 // Get the next page of the current queue 2053 if (marker.State() != PAGE_STATE_CACHED) { 2054 panic("invalid marker %p state", &marker); 2055 return NULL; 2056 } 2057 2058 page = sCachedPageQueue.Next(&marker); 2059 sCachedPageQueue.Remove(&marker); 2060 marker.SetState(PAGE_STATE_UNUSED); 2061 } 2062 2063 while (page != NULL) { 2064 if (!page->busy) { 2065 // we found a candidate, insert marker 2066 marker.SetState(PAGE_STATE_CACHED); 2067 sCachedPageQueue.InsertAfter(page, &marker); 2068 return page; 2069 } 2070 2071 page = sCachedPageQueue.Next(page); 2072 } 2073 2074 return NULL; 2075 } 2076 2077 2078 static bool 2079 free_cached_page(vm_page *page, bool dontWait) 2080 { 2081 // try to lock the page's cache 2082 if (vm_cache_acquire_locked_page_cache(page, dontWait) == NULL) 2083 return false; 2084 VMCache* cache = page->Cache(); 2085 2086 AutoLocker<VMCache> cacheLocker(cache, true); 2087 MethodDeleter<VMCache> _2(cache, &VMCache::ReleaseRefLocked); 2088 2089 // check again if that page is still a candidate 2090 if (page->busy || page->State() != PAGE_STATE_CACHED) 2091 return false; 2092 2093 DEBUG_PAGE_ACCESS_START(page); 2094 2095 PAGE_ASSERT(page, !page->IsMapped()); 2096 PAGE_ASSERT(page, !page->modified); 2097 2098 // we can now steal this page 2099 2100 cache->RemovePage(page); 2101 2102 sCachedPageQueue.RemoveUnlocked(page); 2103 return true; 2104 } 2105 2106 2107 static uint32 2108 free_cached_pages(uint32 pagesToFree, bool dontWait) 2109 { 2110 vm_page marker; 2111 init_page_marker(marker); 2112 2113 uint32 pagesFreed = 0; 2114 2115 while (pagesFreed < pagesToFree) { 2116 vm_page *page = find_cached_page_candidate(marker); 2117 if (page == NULL) 2118 break; 2119 2120 if (free_cached_page(page, dontWait)) { 2121 ReadLocker locker(sFreePageQueuesLock); 2122 page->SetState(PAGE_STATE_FREE); 2123 DEBUG_PAGE_ACCESS_END(page); 2124 sFreePageQueue.PrependUnlocked(page); 2125 locker.Unlock(); 2126 2127 TA(StolenPage()); 2128 2129 pagesFreed++; 2130 } 2131 } 2132 2133 remove_page_marker(marker); 2134 2135 return pagesFreed; 2136 } 2137 2138 2139 static void 2140 idle_scan_active_pages(page_stats& pageStats) 2141 { 2142 VMPageQueue& queue = sActivePageQueue; 2143 2144 // We want to scan the whole queue in roughly kIdleRunsForFullQueue runs. 2145 uint32 maxToScan = queue.Count() / kIdleRunsForFullQueue + 1; 2146 2147 while (maxToScan > 0) { 2148 maxToScan--; 2149 2150 // Get the next page. Note that we don't bother to lock here. We go with 2151 // the assumption that on all architectures reading/writing pointers is 2152 // atomic. Beyond that it doesn't really matter. We have to unlock the 2153 // queue anyway to lock the page's cache, and we'll recheck afterwards. 2154 vm_page* page = queue.Head(); 2155 if (page == NULL) 2156 break; 2157 2158 // lock the page's cache 2159 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2160 if (cache == NULL) 2161 continue; 2162 2163 if (cache == NULL || page->State() != PAGE_STATE_ACTIVE) { 2164 // page is no longer in the cache or in this queue 2165 cache->ReleaseRefAndUnlock(); 2166 continue; 2167 } 2168 2169 DEBUG_PAGE_ACCESS_START(page); 2170 2171 if (page->busy) { 2172 // page is busy -- requeue at the end 2173 vm_page_requeue(page, true); 2174 cache->ReleaseRefAndUnlock(); 2175 DEBUG_PAGE_ACCESS_END(page); 2176 continue; 2177 } 2178 2179 // Get the page active/modified flags and update the page's usage count. 2180 // We completely unmap inactive temporary pages. This saves us to 2181 // iterate through the inactive list as well, since we'll be notified 2182 // via page fault whenever such an inactive page is used again. 2183 // We don't remove the mappings of non-temporary pages, since we 2184 // wouldn't notice when those would become unused and could thus be 2185 // moved to the cached list. 2186 int32 usageCount; 2187 if (page->wired_count > 0 || page->usage_count > 0 || !cache->temporary) 2188 usageCount = vm_clear_page_mapping_accessed_flags(page); 2189 else 2190 usageCount = vm_remove_all_page_mappings_if_unaccessed(page); 2191 2192 if (usageCount > 0) { 2193 usageCount += page->usage_count + kPageUsageAdvance; 2194 if (usageCount > kPageUsageMax) 2195 usageCount = kPageUsageMax; 2196 // TODO: This would probably also be the place to reclaim swap space. 2197 } else { 2198 usageCount += page->usage_count - (int32)kPageUsageDecline; 2199 if (usageCount < 0) { 2200 usageCount = 0; 2201 set_page_state(page, PAGE_STATE_INACTIVE); 2202 } 2203 } 2204 2205 page->usage_count = usageCount; 2206 2207 DEBUG_PAGE_ACCESS_END(page); 2208 2209 cache->ReleaseRefAndUnlock(); 2210 } 2211 } 2212 2213 2214 static void 2215 full_scan_inactive_pages(page_stats& pageStats, int32 despairLevel) 2216 { 2217 int32 pagesToFree = pageStats.unsatisfiedReservations 2218 + sFreeOrCachedPagesTarget 2219 - (pageStats.totalFreePages + pageStats.cachedPages); 2220 if (pagesToFree <= 0) 2221 return; 2222 2223 bigtime_t time = system_time(); 2224 uint32 pagesScanned = 0; 2225 uint32 pagesToCached = 0; 2226 uint32 pagesToModified = 0; 2227 uint32 pagesToActive = 0; 2228 2229 // Determine how many pages at maximum to send to the modified queue. Since 2230 // it is relatively expensive to page out pages, we do that on a grander 2231 // scale only when things get desperate. 2232 uint32 maxToFlush = despairLevel <= 1 ? 32 : 10000; 2233 2234 vm_page marker; 2235 init_page_marker(marker); 2236 2237 VMPageQueue& queue = sInactivePageQueue; 2238 InterruptsSpinLocker queueLocker(queue.GetLock()); 2239 uint32 maxToScan = queue.Count(); 2240 2241 vm_page* nextPage = queue.Head(); 2242 2243 while (pagesToFree > 0 && maxToScan > 0) { 2244 maxToScan--; 2245 2246 // get the next page 2247 vm_page* page = nextPage; 2248 if (page == NULL) 2249 break; 2250 nextPage = queue.Next(page); 2251 2252 if (page->busy) 2253 continue; 2254 2255 // mark the position 2256 queue.InsertAfter(page, &marker); 2257 queueLocker.Unlock(); 2258 2259 // lock the page's cache 2260 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2261 if (cache == NULL || page->busy 2262 || page->State() != PAGE_STATE_INACTIVE) { 2263 if (cache != NULL) 2264 cache->ReleaseRefAndUnlock(); 2265 queueLocker.Lock(); 2266 nextPage = queue.Next(&marker); 2267 queue.Remove(&marker); 2268 continue; 2269 } 2270 2271 pagesScanned++; 2272 2273 DEBUG_PAGE_ACCESS_START(page); 2274 2275 // Get the accessed count, clear the accessed/modified flags and 2276 // unmap the page, if it hasn't been accessed. 2277 int32 usageCount; 2278 if (page->wired_count > 0) 2279 usageCount = vm_clear_page_mapping_accessed_flags(page); 2280 else 2281 usageCount = vm_remove_all_page_mappings_if_unaccessed(page); 2282 2283 // update usage count 2284 if (usageCount > 0) { 2285 usageCount += page->usage_count + kPageUsageAdvance; 2286 if (usageCount > kPageUsageMax) 2287 usageCount = kPageUsageMax; 2288 } else { 2289 usageCount += page->usage_count - (int32)kPageUsageDecline; 2290 if (usageCount < 0) 2291 usageCount = 0; 2292 } 2293 2294 page->usage_count = usageCount; 2295 2296 // Move to fitting queue or requeue: 2297 // * Active mapped pages go to the active queue. 2298 // * Inactive mapped (i.e. wired) pages are requeued. 2299 // * The remaining pages are cachable. Thus, if unmodified they go to 2300 // the cached queue, otherwise to the modified queue (up to a limit). 2301 // Note that until in the idle scanning we don't exempt pages of 2302 // temporary caches. Apparently we really need memory, so we better 2303 // page out memory as well. 2304 bool isMapped = page->IsMapped(); 2305 if (usageCount > 0) { 2306 if (isMapped) { 2307 set_page_state(page, PAGE_STATE_ACTIVE); 2308 pagesToActive++; 2309 } else 2310 vm_page_requeue(page, true); 2311 } else if (isMapped) { 2312 vm_page_requeue(page, true); 2313 } else if (!page->modified) { 2314 set_page_state(page, PAGE_STATE_CACHED); 2315 pagesToFree--; 2316 pagesToCached++; 2317 } else if (maxToFlush > 0) { 2318 set_page_state(page, PAGE_STATE_MODIFIED); 2319 maxToFlush--; 2320 pagesToModified++; 2321 } else 2322 vm_page_requeue(page, true); 2323 2324 DEBUG_PAGE_ACCESS_END(page); 2325 2326 cache->ReleaseRefAndUnlock(); 2327 2328 // remove the marker 2329 queueLocker.Lock(); 2330 nextPage = queue.Next(&marker); 2331 queue.Remove(&marker); 2332 } 2333 2334 queueLocker.Unlock(); 2335 2336 time = system_time() - time; 2337 TRACE_DAEMON(" -> inactive scan (%7lld us): scanned: %7lu, " 2338 "moved: %lu -> cached, %lu -> modified, %lu -> active\n", time, 2339 pagesScanned, pagesToCached, pagesToModified, pagesToActive); 2340 2341 // wake up the page writer, if we tossed it some pages 2342 if (pagesToModified > 0) 2343 sPageWriterCondition.WakeUp(); 2344 } 2345 2346 2347 static void 2348 full_scan_active_pages(page_stats& pageStats, int32 despairLevel) 2349 { 2350 vm_page marker; 2351 init_page_marker(marker); 2352 2353 VMPageQueue& queue = sActivePageQueue; 2354 InterruptsSpinLocker queueLocker(queue.GetLock()); 2355 uint32 maxToScan = queue.Count(); 2356 2357 int32 pagesToDeactivate = pageStats.unsatisfiedReservations 2358 + sFreeOrCachedPagesTarget 2359 - (pageStats.totalFreePages + pageStats.cachedPages) 2360 + std::max((int32)sInactivePagesTarget - (int32)maxToScan, (int32)0); 2361 if (pagesToDeactivate <= 0) 2362 return; 2363 2364 bigtime_t time = system_time(); 2365 uint32 pagesAccessed = 0; 2366 uint32 pagesToInactive = 0; 2367 uint32 pagesScanned = 0; 2368 2369 vm_page* nextPage = queue.Head(); 2370 2371 while (pagesToDeactivate > 0 && maxToScan > 0) { 2372 maxToScan--; 2373 2374 // get the next page 2375 vm_page* page = nextPage; 2376 if (page == NULL) 2377 break; 2378 nextPage = queue.Next(page); 2379 2380 if (page->busy) 2381 continue; 2382 2383 // mark the position 2384 queue.InsertAfter(page, &marker); 2385 queueLocker.Unlock(); 2386 2387 // lock the page's cache 2388 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true); 2389 if (cache == NULL || page->busy || page->State() != PAGE_STATE_ACTIVE) { 2390 if (cache != NULL) 2391 cache->ReleaseRefAndUnlock(); 2392 queueLocker.Lock(); 2393 nextPage = queue.Next(&marker); 2394 queue.Remove(&marker); 2395 continue; 2396 } 2397 2398 pagesScanned++; 2399 2400 DEBUG_PAGE_ACCESS_START(page); 2401 2402 // Get the page active/modified flags and update the page's usage count. 2403 int32 usageCount = vm_clear_page_mapping_accessed_flags(page); 2404 2405 if (usageCount > 0) { 2406 usageCount += page->usage_count + kPageUsageAdvance; 2407 if (usageCount > kPageUsageMax) 2408 usageCount = kPageUsageMax; 2409 pagesAccessed++; 2410 // TODO: This would probably also be the place to reclaim swap space. 2411 } else { 2412 usageCount += page->usage_count - (int32)kPageUsageDecline; 2413 if (usageCount <= 0) { 2414 usageCount = 0; 2415 set_page_state(page, PAGE_STATE_INACTIVE); 2416 pagesToInactive++; 2417 } 2418 } 2419 2420 page->usage_count = usageCount; 2421 2422 DEBUG_PAGE_ACCESS_END(page); 2423 2424 cache->ReleaseRefAndUnlock(); 2425 2426 // remove the marker 2427 queueLocker.Lock(); 2428 nextPage = queue.Next(&marker); 2429 queue.Remove(&marker); 2430 } 2431 2432 time = system_time() - time; 2433 TRACE_DAEMON(" -> active scan (%7lld us): scanned: %7lu, " 2434 "moved: %lu -> inactive, encountered %lu accessed ones\n", time, 2435 pagesScanned, pagesToInactive, pagesAccessed); 2436 } 2437 2438 2439 static void 2440 page_daemon_idle_scan(page_stats& pageStats) 2441 { 2442 TRACE_DAEMON("page daemon: idle run\n"); 2443 2444 if (pageStats.totalFreePages < (int32)sFreePagesTarget) { 2445 // We want more actually free pages, so free some from the cached 2446 // ones. 2447 uint32 freed = free_cached_pages( 2448 sFreePagesTarget - pageStats.totalFreePages, false); 2449 if (freed > 0) 2450 unreserve_pages(freed); 2451 get_page_stats(pageStats); 2452 } 2453 2454 // Walk the active list and move pages to the inactive queue. 2455 get_page_stats(pageStats); 2456 idle_scan_active_pages(pageStats); 2457 } 2458 2459 2460 static void 2461 page_daemon_full_scan(page_stats& pageStats, int32 despairLevel) 2462 { 2463 TRACE_DAEMON("page daemon: full run: free: %lu, cached: %lu, " 2464 "to free: %lu\n", pageStats.totalFreePages, pageStats.cachedPages, 2465 pageStats.unsatisfiedReservations + sFreeOrCachedPagesTarget 2466 - (pageStats.totalFreePages + pageStats.cachedPages)); 2467 2468 // Walk the inactive list and transfer pages to the cached and modified 2469 // queues. 2470 full_scan_inactive_pages(pageStats, despairLevel); 2471 2472 // Free cached pages. Also wake up reservation waiters. 2473 get_page_stats(pageStats); 2474 int32 pagesToFree = pageStats.unsatisfiedReservations + sFreePagesTarget 2475 - (pageStats.totalFreePages); 2476 if (pagesToFree > 0) { 2477 uint32 freed = free_cached_pages(pagesToFree, true); 2478 if (freed > 0) 2479 unreserve_pages(freed); 2480 } 2481 2482 // Walk the active list and move pages to the inactive queue. 2483 get_page_stats(pageStats); 2484 full_scan_active_pages(pageStats, despairLevel); 2485 } 2486 2487 2488 static status_t 2489 page_daemon(void* /*unused*/) 2490 { 2491 int32 despairLevel = 0; 2492 2493 while (true) { 2494 sPageDaemonCondition.ClearActivated(); 2495 2496 // evaluate the free pages situation 2497 page_stats pageStats; 2498 get_page_stats(pageStats); 2499 2500 if (!do_active_paging(pageStats)) { 2501 // Things look good -- just maintain statistics and keep the pool 2502 // of actually free pages full enough. 2503 despairLevel = 0; 2504 page_daemon_idle_scan(pageStats); 2505 sPageDaemonCondition.Wait(kIdleScanWaitInterval, false); 2506 } else { 2507 // Not enough free pages. We need to do some real work. 2508 despairLevel = std::max(despairLevel + 1, (int32)3); 2509 page_daemon_full_scan(pageStats, despairLevel); 2510 2511 // Don't wait after the first full scan, but rather immediately 2512 // check whether we were successful in freeing enough pages and 2513 // re-run with increased despair level. The first scan is 2514 // conservative with respect to moving inactive modified pages to 2515 // the modified list to avoid thrashing. The second scan, however, 2516 // will not hold back. 2517 if (despairLevel > 1) 2518 snooze(kBusyScanWaitInterval); 2519 } 2520 } 2521 2522 return B_OK; 2523 } 2524 2525 2526 /*! Returns how many pages could *not* be reserved. 2527 */ 2528 static uint32 2529 reserve_pages(uint32 count, int priority, bool dontWait) 2530 { 2531 int32 dontTouch = kPageReserveForPriority[priority]; 2532 2533 while (true) { 2534 count -= reserve_some_pages(count, dontTouch); 2535 if (count == 0) 2536 return 0; 2537 2538 if (sUnsatisfiedPageReservations == 0) { 2539 count -= free_cached_pages(count, dontWait); 2540 if (count == 0) 2541 return count; 2542 } 2543 2544 if (dontWait) 2545 return count; 2546 2547 // we need to wait for pages to become available 2548 2549 MutexLocker pageDeficitLocker(sPageDeficitLock); 2550 2551 bool notifyDaemon = sUnsatisfiedPageReservations == 0; 2552 sUnsatisfiedPageReservations += count; 2553 2554 if (sUnreservedFreePages > dontTouch) { 2555 // the situation changed 2556 sUnsatisfiedPageReservations -= count; 2557 continue; 2558 } 2559 2560 PageReservationWaiter waiter; 2561 waiter.dontTouch = dontTouch; 2562 waiter.missing = count; 2563 waiter.thread = thread_get_current_thread(); 2564 waiter.threadPriority = waiter.thread->priority; 2565 2566 // insert ordered (i.e. after all waiters with higher or equal priority) 2567 PageReservationWaiter* otherWaiter = NULL; 2568 for (PageReservationWaiterList::Iterator it 2569 = sPageReservationWaiters.GetIterator(); 2570 (otherWaiter = it.Next()) != NULL;) { 2571 if (waiter < *otherWaiter) 2572 break; 2573 } 2574 2575 sPageReservationWaiters.InsertBefore(otherWaiter, &waiter); 2576 2577 thread_prepare_to_block(waiter.thread, 0, THREAD_BLOCK_TYPE_OTHER, 2578 "waiting for pages"); 2579 2580 if (notifyDaemon) 2581 sPageDaemonCondition.WakeUp(); 2582 2583 pageDeficitLocker.Unlock(); 2584 2585 low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0); 2586 thread_block(); 2587 2588 pageDeficitLocker.Lock(); 2589 2590 return 0; 2591 } 2592 } 2593 2594 2595 // #pragma mark - private kernel API 2596 2597 2598 /*! Writes a range of modified pages of a cache to disk. 2599 You need to hold the VMCache lock when calling this function. 2600 Note that the cache lock is released in this function. 2601 \param cache The cache. 2602 \param firstPage Offset (in page size units) of the first page in the range. 2603 \param endPage End offset (in page size units) of the page range. The page 2604 at this offset is not included. 2605 */ 2606 status_t 2607 vm_page_write_modified_page_range(struct VMCache* cache, uint32 firstPage, 2608 uint32 endPage) 2609 { 2610 static const int32 kMaxPages = 256; 2611 int32 maxPages = cache->MaxPagesPerWrite(); 2612 if (maxPages < 0 || maxPages > kMaxPages) 2613 maxPages = kMaxPages; 2614 2615 const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 2616 | HEAP_DONT_LOCK_KERNEL_SPACE; 2617 2618 PageWriteWrapper stackWrappers[2]; 2619 PageWriteWrapper* wrapperPool 2620 = new(malloc_flags(allocationFlags)) PageWriteWrapper[maxPages + 1]; 2621 if (wrapperPool == NULL) { 2622 // don't fail, just limit our capabilities 2623 wrapperPool = stackWrappers; 2624 maxPages = 1; 2625 } 2626 2627 int32 nextWrapper = 0; 2628 2629 PageWriteWrapper* wrappers[maxPages]; 2630 int32 usedWrappers = 0; 2631 2632 PageWriteTransfer transfer; 2633 bool transferEmpty = true; 2634 2635 VMCachePagesTree::Iterator it 2636 = cache->pages.GetIterator(firstPage, true, true); 2637 2638 while (true) { 2639 vm_page* page = it.Next(); 2640 if (page == NULL || page->cache_offset >= endPage) { 2641 if (transferEmpty) 2642 break; 2643 2644 page = NULL; 2645 } 2646 2647 if (page != NULL) { 2648 if (page->busy 2649 || (page->State() != PAGE_STATE_MODIFIED 2650 && !vm_test_map_modification(page))) { 2651 page = NULL; 2652 } 2653 } 2654 2655 PageWriteWrapper* wrapper = NULL; 2656 if (page != NULL) { 2657 wrapper = &wrapperPool[nextWrapper++]; 2658 if (nextWrapper > maxPages) 2659 nextWrapper = 0; 2660 2661 DEBUG_PAGE_ACCESS_START(page); 2662 2663 wrapper->SetTo(page); 2664 2665 if (transferEmpty || transfer.AddPage(page)) { 2666 if (transferEmpty) { 2667 transfer.SetTo(NULL, page, maxPages); 2668 transferEmpty = false; 2669 } 2670 2671 DEBUG_PAGE_ACCESS_END(page); 2672 2673 wrappers[usedWrappers++] = wrapper; 2674 continue; 2675 } 2676 2677 DEBUG_PAGE_ACCESS_END(page); 2678 } 2679 2680 if (transferEmpty) 2681 continue; 2682 2683 cache->Unlock(); 2684 status_t status = transfer.Schedule(0); 2685 cache->Lock(); 2686 2687 for (int32 i = 0; i < usedWrappers; i++) 2688 wrappers[i]->Done(status); 2689 2690 usedWrappers = 0; 2691 2692 if (page != NULL) { 2693 transfer.SetTo(NULL, page, maxPages); 2694 wrappers[usedWrappers++] = wrapper; 2695 } else 2696 transferEmpty = true; 2697 } 2698 2699 if (wrapperPool != stackWrappers) 2700 delete[] wrapperPool; 2701 2702 return B_OK; 2703 } 2704 2705 2706 /*! You need to hold the VMCache lock when calling this function. 2707 Note that the cache lock is released in this function. 2708 */ 2709 status_t 2710 vm_page_write_modified_pages(VMCache *cache) 2711 { 2712 return vm_page_write_modified_page_range(cache, 0, 2713 (cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 2714 } 2715 2716 2717 /*! Schedules the page writer to write back the specified \a page. 2718 Note, however, that it might not do this immediately, and it can well 2719 take several seconds until the page is actually written out. 2720 */ 2721 void 2722 vm_page_schedule_write_page(vm_page *page) 2723 { 2724 PAGE_ASSERT(page, page->State() == PAGE_STATE_MODIFIED); 2725 2726 vm_page_requeue(page, false); 2727 2728 sPageWriterCondition.WakeUp(); 2729 } 2730 2731 2732 /*! Cache must be locked. 2733 */ 2734 void 2735 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage, 2736 uint32 endPage) 2737 { 2738 uint32 modified = 0; 2739 for (VMCachePagesTree::Iterator it 2740 = cache->pages.GetIterator(firstPage, true, true); 2741 vm_page *page = it.Next();) { 2742 if (page->cache_offset >= endPage) 2743 break; 2744 2745 if (!page->busy && page->State() == PAGE_STATE_MODIFIED) { 2746 DEBUG_PAGE_ACCESS_START(page); 2747 vm_page_requeue(page, false); 2748 modified++; 2749 DEBUG_PAGE_ACCESS_END(page); 2750 } 2751 } 2752 2753 if (modified > 0) 2754 sPageWriterCondition.WakeUp(); 2755 } 2756 2757 2758 void 2759 vm_page_init_num_pages(kernel_args *args) 2760 { 2761 // calculate the size of memory by looking at the physical_memory_range array 2762 addr_t physicalPagesEnd = 0; 2763 sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE; 2764 2765 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 2766 physicalPagesEnd = (args->physical_memory_range[i].start 2767 + args->physical_memory_range[i].size) / B_PAGE_SIZE; 2768 } 2769 2770 TRACE(("first phys page = 0x%lx, end 0x%lx\n", sPhysicalPageOffset, 2771 physicalPagesEnd)); 2772 2773 sNumPages = physicalPagesEnd - sPhysicalPageOffset; 2774 2775 #ifdef LIMIT_AVAILABLE_MEMORY 2776 if (sNumPages > LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE)) 2777 sNumPages = LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE); 2778 #endif 2779 } 2780 2781 2782 status_t 2783 vm_page_init(kernel_args *args) 2784 { 2785 TRACE(("vm_page_init: entry\n")); 2786 2787 // init page queues 2788 sModifiedPageQueue.Init("modified pages queue"); 2789 sInactivePageQueue.Init("inactive pages queue"); 2790 sActivePageQueue.Init("active pages queue"); 2791 sCachedPageQueue.Init("cached pages queue"); 2792 sFreePageQueue.Init("free pages queue"); 2793 sClearPageQueue.Init("clear pages queue"); 2794 2795 new (&sPageReservationWaiters) PageReservationWaiterList; 2796 2797 // map in the new free page table 2798 sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page), 2799 ~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, false); 2800 2801 TRACE(("vm_init: putting free_page_table @ %p, # ents %ld (size 0x%x)\n", 2802 sPages, sNumPages, (unsigned int)(sNumPages * sizeof(vm_page)))); 2803 2804 // initialize the free page table 2805 for (uint32 i = 0; i < sNumPages; i++) { 2806 sPages[i].physical_page_number = sPhysicalPageOffset + i; 2807 sPages[i].InitState(PAGE_STATE_FREE); 2808 new(&sPages[i].mappings) vm_page_mappings(); 2809 sPages[i].wired_count = 0; 2810 sPages[i].usage_count = 0; 2811 sPages[i].busy_writing = false; 2812 sPages[i].SetCacheRef(NULL); 2813 #if DEBUG_PAGE_QUEUE 2814 sPages[i].queue = NULL; 2815 #endif 2816 #if DEBUG_PAGE_ACCESS 2817 sPages[i].accessing_thread = -1; 2818 #endif 2819 sFreePageQueue.Append(&sPages[i]); 2820 } 2821 2822 sUnreservedFreePages = sNumPages; 2823 2824 TRACE(("initialized table\n")); 2825 2826 // mark the ranges between usable physical memory unused 2827 addr_t previousEnd = 0; 2828 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 2829 addr_t base = args->physical_memory_range[i].start; 2830 addr_t size = args->physical_memory_range[i].size; 2831 if (base > previousEnd) { 2832 mark_page_range_in_use(previousEnd / B_PAGE_SIZE, 2833 (base - previousEnd) / B_PAGE_SIZE, false); 2834 } 2835 previousEnd = base + size; 2836 } 2837 2838 // mark the allocated physical page ranges wired 2839 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 2840 mark_page_range_in_use( 2841 args->physical_allocated_range[i].start / B_PAGE_SIZE, 2842 args->physical_allocated_range[i].size / B_PAGE_SIZE, true); 2843 } 2844 2845 // The target of actually free pages. This must be at least the system 2846 // reserve, but should be a few more pages, so we don't have to extract 2847 // a cached page with each allocation. 2848 sFreePagesTarget = VM_PAGE_RESERVE_USER 2849 + std::max((uint32)32, sNumPages / 1024); 2850 2851 // The target of free + cached and inactive pages. On low-memory machines 2852 // keep things tight. free + cached is the pool of immediately allocatable 2853 // pages. We want a few inactive pages, so when we're actually paging, we 2854 // have a reasonably large set of pages to work with. 2855 if (sUnreservedFreePages < 16 * 1024) { 2856 sFreeOrCachedPagesTarget = sFreePagesTarget + 128; 2857 sInactivePagesTarget = sFreePagesTarget / 3; 2858 } else { 2859 sFreeOrCachedPagesTarget = 2 * sFreePagesTarget; 2860 sInactivePagesTarget = sFreePagesTarget / 2; 2861 } 2862 2863 TRACE(("vm_page_init: exit\n")); 2864 2865 return B_OK; 2866 } 2867 2868 2869 status_t 2870 vm_page_init_post_area(kernel_args *args) 2871 { 2872 void *dummy; 2873 2874 dummy = sPages; 2875 create_area("page structures", &dummy, B_EXACT_ADDRESS, 2876 PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED, 2877 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 2878 2879 add_debugger_command("page_stats", &dump_page_stats, 2880 "Dump statistics about page usage"); 2881 add_debugger_command_etc("page", &dump_page, 2882 "Dump page info", 2883 "[ \"-p\" | \"-v\" ] [ \"-m\" ] <address>\n" 2884 "Prints information for the physical page. If neither \"-p\" nor\n" 2885 "\"-v\" are given, the provided address is interpreted as address of\n" 2886 "the vm_page data structure for the page in question. If \"-p\" is\n" 2887 "given, the address is the physical address of the page. If \"-v\" is\n" 2888 "given, the address is interpreted as virtual address in the current\n" 2889 "thread's address space and for the page it is mapped to (if any)\n" 2890 "information are printed. If \"-m\" is specified, the command will\n" 2891 "search all known address spaces for mappings to that page and print\n" 2892 "them.\n", 0); 2893 add_debugger_command("page_queue", &dump_page_queue, "Dump page queue"); 2894 add_debugger_command("find_page", &find_page, 2895 "Find out which queue a page is actually in"); 2896 2897 #ifdef TRACK_PAGE_USAGE_STATS 2898 add_debugger_command_etc("page_usage", &dump_page_usage_stats, 2899 "Dumps statistics about page usage counts", 2900 "\n" 2901 "Dumps statistics about page usage counts.\n", 2902 B_KDEBUG_DONT_PARSE_ARGUMENTS); 2903 #endif 2904 2905 return B_OK; 2906 } 2907 2908 2909 status_t 2910 vm_page_init_post_thread(kernel_args *args) 2911 { 2912 new (&sFreePageCondition) ConditionVariable; 2913 sFreePageCondition.Publish(&sFreePageQueue, "free page"); 2914 2915 // create a kernel thread to clear out pages 2916 2917 thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber", 2918 B_LOWEST_ACTIVE_PRIORITY, NULL); 2919 send_signal_etc(thread, SIGCONT, B_DO_NOT_RESCHEDULE); 2920 2921 // start page writer 2922 2923 sPageWriterCondition.Init("page writer"); 2924 2925 thread = spawn_kernel_thread(&page_writer, "page writer", 2926 B_NORMAL_PRIORITY + 1, NULL); 2927 send_signal_etc(thread, SIGCONT, B_DO_NOT_RESCHEDULE); 2928 2929 // start page daemon 2930 2931 sPageDaemonCondition.Init("page daemon"); 2932 2933 thread = spawn_kernel_thread(&page_daemon, "page daemon", 2934 B_NORMAL_PRIORITY, NULL); 2935 send_signal_etc(thread, SIGCONT, B_DO_NOT_RESCHEDULE); 2936 2937 return B_OK; 2938 } 2939 2940 2941 status_t 2942 vm_mark_page_inuse(addr_t page) 2943 { 2944 return vm_mark_page_range_inuse(page, 1); 2945 } 2946 2947 2948 status_t 2949 vm_mark_page_range_inuse(addr_t startPage, addr_t length) 2950 { 2951 return mark_page_range_in_use(startPage, length, false); 2952 } 2953 2954 2955 /*! Unreserve pages previously reserved with vm_page_reserve_pages(). 2956 */ 2957 void 2958 vm_page_unreserve_pages(vm_page_reservation* reservation) 2959 { 2960 uint32 count = reservation->count; 2961 reservation->count = 0; 2962 2963 if (count == 0) 2964 return; 2965 2966 TA(UnreservePages(count)); 2967 2968 unreserve_pages(count); 2969 } 2970 2971 2972 /*! With this call, you can reserve a number of free pages in the system. 2973 They will only be handed out to someone who has actually reserved them. 2974 This call returns as soon as the number of requested pages has been 2975 reached. 2976 The caller must not hold any cache lock or the function might deadlock. 2977 */ 2978 void 2979 vm_page_reserve_pages(vm_page_reservation* reservation, uint32 count, 2980 int priority) 2981 { 2982 reservation->count = count; 2983 2984 if (count == 0) 2985 return; 2986 2987 TA(ReservePages(count)); 2988 2989 reserve_pages(count, priority, false); 2990 } 2991 2992 2993 bool 2994 vm_page_try_reserve_pages(vm_page_reservation* reservation, uint32 count, 2995 int priority) 2996 { 2997 if (count == 0) { 2998 reservation->count = count; 2999 return true; 3000 } 3001 3002 uint32 remaining = reserve_pages(count, priority, true); 3003 if (remaining == 0) { 3004 TA(ReservePages(count)); 3005 reservation->count = count; 3006 return true; 3007 } 3008 3009 unreserve_pages(count - remaining); 3010 3011 return false; 3012 } 3013 3014 3015 vm_page * 3016 vm_page_allocate_page(vm_page_reservation* reservation, uint32 flags) 3017 { 3018 uint32 pageState = flags & VM_PAGE_ALLOC_STATE; 3019 ASSERT(pageState != PAGE_STATE_FREE); 3020 ASSERT(pageState != PAGE_STATE_CLEAR); 3021 3022 ASSERT(reservation->count > 0); 3023 reservation->count--; 3024 3025 VMPageQueue* queue; 3026 VMPageQueue* otherQueue; 3027 3028 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) { 3029 queue = &sClearPageQueue; 3030 otherQueue = &sFreePageQueue; 3031 } else { 3032 queue = &sFreePageQueue; 3033 otherQueue = &sClearPageQueue; 3034 } 3035 3036 TA(AllocatePage()); 3037 3038 ReadLocker locker(sFreePageQueuesLock); 3039 3040 vm_page* page = queue->RemoveHeadUnlocked(); 3041 if (page == NULL) { 3042 // if the primary queue was empty, grab the page from the 3043 // secondary queue 3044 page = otherQueue->RemoveHeadUnlocked(); 3045 3046 if (page == NULL) { 3047 // Unlikely, but possible: the page we have reserved has moved 3048 // between the queues after we checked the first queue. Grab the 3049 // write locker to make sure this doesn't happen again. 3050 locker.Unlock(); 3051 WriteLocker writeLocker(sFreePageQueuesLock); 3052 3053 page = queue->RemoveHead(); 3054 if (page == NULL) 3055 otherQueue->RemoveHead(); 3056 3057 if (page == NULL) { 3058 panic("Had reserved page, but there is none!"); 3059 return NULL; 3060 } 3061 3062 // downgrade to read lock 3063 locker.Lock(); 3064 } 3065 } 3066 3067 if (page->CacheRef() != NULL) 3068 panic("supposed to be free page %p has cache\n", page); 3069 3070 DEBUG_PAGE_ACCESS_START(page); 3071 3072 int oldPageState = page->State(); 3073 page->SetState(pageState); 3074 page->busy = (flags & VM_PAGE_ALLOC_BUSY) != 0; 3075 page->usage_count = 0; 3076 page->accessed = false; 3077 page->modified = false; 3078 3079 locker.Unlock(); 3080 3081 if (pageState < PAGE_STATE_FIRST_UNQUEUED) 3082 sPageQueues[pageState].AppendUnlocked(page); 3083 3084 // clear the page, if we had to take it from the free queue and a clear 3085 // page was requested 3086 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0 && oldPageState != PAGE_STATE_CLEAR) 3087 clear_page(page); 3088 3089 return page; 3090 } 3091 3092 3093 static vm_page* 3094 allocate_page_run(page_num_t start, page_num_t length, uint32 flags, 3095 WriteLocker& freeClearQueueLocker) 3096 { 3097 uint32 pageState = flags & VM_PAGE_ALLOC_STATE; 3098 ASSERT(pageState != PAGE_STATE_FREE); 3099 ASSERT(pageState != PAGE_STATE_CLEAR); 3100 3101 TA(AllocatePageRun(length)); 3102 3103 // pull the pages out of the appropriate queues 3104 VMPageQueue::PageList freePages; 3105 VMPageQueue::PageList clearPages; 3106 for (page_num_t i = 0; i < length; i++) { 3107 vm_page& page = sPages[start + i]; 3108 DEBUG_PAGE_ACCESS_START(&page); 3109 if (page.State() == PAGE_STATE_CLEAR) { 3110 sClearPageQueue.Remove(&page); 3111 clearPages.Add(&page); 3112 } else { 3113 sFreePageQueue.Remove(&page); 3114 freePages.Add(&page); 3115 } 3116 3117 page.SetState(flags & VM_PAGE_ALLOC_STATE); 3118 page.busy = flags & VM_PAGE_ALLOC_BUSY; 3119 page.usage_count = 0; 3120 page.accessed = false; 3121 page.modified = false; 3122 } 3123 3124 freeClearQueueLocker.Unlock(); 3125 3126 // clear pages, if requested 3127 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) { 3128 for (VMPageQueue::PageList::Iterator it = freePages.GetIterator(); 3129 vm_page* page = it.Next();) { 3130 clear_page(page); 3131 } 3132 } 3133 3134 // add pages to target queue 3135 if (pageState < PAGE_STATE_FIRST_UNQUEUED) { 3136 freePages.MoveFrom(&clearPages); 3137 sPageQueues[pageState].AppendUnlocked(freePages, length); 3138 } 3139 3140 // Note: We don't unreserve the pages since we pulled them out of the 3141 // free/clear queues without adjusting sUnreservedFreePages. 3142 3143 return &sPages[start]; 3144 } 3145 3146 3147 vm_page * 3148 vm_page_allocate_page_run(uint32 flags, addr_t base, addr_t length, 3149 int priority) 3150 { 3151 uint32 start = base >> PAGE_SHIFT; 3152 3153 vm_page_reservation reservation; 3154 if (!vm_page_try_reserve_pages(&reservation, length, priority)) 3155 return NULL; 3156 // TODO: add more tries, ie. free some inactive, ... 3157 // no free space 3158 3159 WriteLocker freeClearQueueLocker(sFreePageQueuesLock); 3160 3161 for (;;) { 3162 bool foundRun = true; 3163 if (start + length > sNumPages) { 3164 freeClearQueueLocker.Unlock(); 3165 vm_page_unreserve_pages(&reservation); 3166 return NULL; 3167 } 3168 3169 uint32 i; 3170 for (i = 0; i < length; i++) { 3171 if (sPages[start + i].State() != PAGE_STATE_FREE 3172 && sPages[start + i].State() != PAGE_STATE_CLEAR) { 3173 foundRun = false; 3174 i++; 3175 break; 3176 } 3177 } 3178 3179 if (foundRun) 3180 return allocate_page_run(start, length, flags, 3181 freeClearQueueLocker); 3182 3183 start += i; 3184 } 3185 } 3186 3187 3188 vm_page * 3189 vm_page_allocate_page_run_no_base(uint32 flags, addr_t count, int priority) 3190 { 3191 VMPageQueue* queue; 3192 VMPageQueue* otherQueue; 3193 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) { 3194 queue = &sClearPageQueue; 3195 otherQueue = &sFreePageQueue; 3196 } else { 3197 queue = &sFreePageQueue; 3198 otherQueue = &sClearPageQueue; 3199 } 3200 3201 vm_page_reservation reservation; 3202 if (!vm_page_try_reserve_pages(&reservation, count, priority)) 3203 return NULL; 3204 // TODO: add more tries, ie. free some inactive, ... 3205 // no free space 3206 3207 WriteLocker freeClearQueueLocker(sFreePageQueuesLock); 3208 3209 for (;;) { 3210 VMPageQueue::Iterator it = queue->GetIterator(); 3211 while (vm_page *page = it.Next()) { 3212 vm_page *current = page; 3213 if (current >= &sPages[sNumPages - count]) 3214 continue; 3215 3216 bool foundRun = true; 3217 for (uint32 i = 0; i < count; i++, current++) { 3218 if (current->State() != PAGE_STATE_FREE 3219 && current->State() != PAGE_STATE_CLEAR) { 3220 foundRun = false; 3221 break; 3222 } 3223 } 3224 3225 if (foundRun) { 3226 return allocate_page_run(page - sPages, count, flags, 3227 freeClearQueueLocker); 3228 } 3229 } 3230 3231 if (queue == otherQueue) { 3232 freeClearQueueLocker.Unlock(); 3233 vm_page_unreserve_pages(&reservation); 3234 } 3235 3236 queue = otherQueue; 3237 } 3238 } 3239 3240 3241 vm_page * 3242 vm_page_at_index(int32 index) 3243 { 3244 return &sPages[index]; 3245 } 3246 3247 3248 vm_page * 3249 vm_lookup_page(addr_t pageNumber) 3250 { 3251 if (pageNumber < sPhysicalPageOffset) 3252 return NULL; 3253 3254 pageNumber -= sPhysicalPageOffset; 3255 if (pageNumber >= sNumPages) 3256 return NULL; 3257 3258 return &sPages[pageNumber]; 3259 } 3260 3261 3262 bool 3263 vm_page_is_dummy(struct vm_page *page) 3264 { 3265 return page < sPages || page >= sPages + sNumPages; 3266 } 3267 3268 3269 /*! Free the page that belonged to a certain cache. 3270 You can use vm_page_set_state() manually if you prefer, but only 3271 if the page does not equal PAGE_STATE_MODIFIED. 3272 */ 3273 void 3274 vm_page_free(VMCache *cache, vm_page *page) 3275 { 3276 PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE 3277 && page->State() != PAGE_STATE_CLEAR); 3278 3279 if (page->State() == PAGE_STATE_MODIFIED && cache->temporary) 3280 atomic_add(&sModifiedTemporaryPages, -1); 3281 3282 free_page(page, false); 3283 } 3284 3285 3286 void 3287 vm_page_set_state(vm_page *page, int pageState) 3288 { 3289 PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE 3290 && page->State() != PAGE_STATE_CLEAR); 3291 3292 if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) 3293 free_page(page, pageState == PAGE_STATE_CLEAR); 3294 else 3295 set_page_state(page, pageState); 3296 } 3297 3298 3299 /*! Moves a page to either the tail of the head of its current queue, 3300 depending on \a tail. 3301 The page must have a cache and the cache must be locked! 3302 */ 3303 void 3304 vm_page_requeue(struct vm_page *page, bool tail) 3305 { 3306 PAGE_ASSERT(page, page->Cache() != NULL); 3307 DEBUG_PAGE_ACCESS_CHECK(page); 3308 3309 VMPageQueue *queue = NULL; 3310 3311 switch (page->State()) { 3312 case PAGE_STATE_ACTIVE: 3313 queue = &sActivePageQueue; 3314 break; 3315 case PAGE_STATE_INACTIVE: 3316 queue = &sInactivePageQueue; 3317 break; 3318 case PAGE_STATE_MODIFIED: 3319 queue = &sModifiedPageQueue; 3320 break; 3321 case PAGE_STATE_CACHED: 3322 queue = &sCachedPageQueue; 3323 break; 3324 case PAGE_STATE_FREE: 3325 case PAGE_STATE_CLEAR: 3326 panic("vm_page_requeue() called for free/clear page %p", page); 3327 return; 3328 case PAGE_STATE_WIRED: 3329 case PAGE_STATE_UNUSED: 3330 return; 3331 default: 3332 panic("vm_page_touch: vm_page %p in invalid state %d\n", 3333 page, page->State()); 3334 break; 3335 } 3336 3337 queue->RequeueUnlocked(page, tail); 3338 } 3339 3340 3341 size_t 3342 vm_page_num_pages(void) 3343 { 3344 return sNumPages; 3345 } 3346 3347 3348 /*! There is a subtle distinction between the page counts returned by 3349 this function and vm_page_num_free_pages(): 3350 The latter returns the number of pages that are completely uncommitted, 3351 whereas this one returns the number of pages that are available for 3352 use by being reclaimed as well (IOW it factors in things like cache pages 3353 as available). 3354 */ 3355 size_t 3356 vm_page_num_available_pages(void) 3357 { 3358 return vm_available_memory() / B_PAGE_SIZE; 3359 } 3360 3361 3362 size_t 3363 vm_page_num_free_pages(void) 3364 { 3365 int32 count = sUnreservedFreePages + sCachedPageQueue.Count(); 3366 return count > 0 ? count : 0; 3367 } 3368 3369 3370 size_t 3371 vm_page_num_unused_pages(void) 3372 { 3373 int32 count = sUnreservedFreePages; 3374 return count > 0 ? count : 0; 3375 } 3376 3377 3378 void 3379 vm_page_get_stats(system_info *info) 3380 { 3381 // Get free pages count -- not really exact, since we don't know how many 3382 // of the reserved pages have already been allocated, but good citizens 3383 // unreserve chunk-wise as they are allocating the pages, if they have 3384 // reserved a larger quantity. 3385 int32 free = sUnreservedFreePages; 3386 if (free < 0) 3387 free = 0; 3388 3389 // The pages used for the block cache buffers. Those should not be counted 3390 // as used but as cached pages. 3391 // TODO: We should subtract the blocks that are in use ATM, since those 3392 // can't really be freed in a low memory situation. 3393 page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE; 3394 3395 info->max_pages = sNumPages; 3396 info->used_pages = gMappedPagesCount - blockCachePages; 3397 info->cached_pages = sNumPages >= (uint32)free + info->used_pages 3398 ? sNumPages - free - info->used_pages : 0; 3399 info->page_faults = vm_num_page_faults(); 3400 3401 // TODO: We don't consider pages used for page directories/tables yet. 3402 } 3403