1 /* 2 * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 * 5 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 6 * Distributed under the terms of the NewOS License. 7 */ 8 9 #include <signal.h> 10 #include <string.h> 11 #include <stdlib.h> 12 13 #include <KernelExport.h> 14 #include <OS.h> 15 16 #include <AutoDeleter.h> 17 18 #include <arch/cpu.h> 19 #include <arch/vm_translation_map.h> 20 #include <block_cache.h> 21 #include <boot/kernel_args.h> 22 #include <condition_variable.h> 23 #include <kernel.h> 24 #include <low_resource_manager.h> 25 #include <thread.h> 26 #include <tracing.h> 27 #include <util/AutoLock.h> 28 #include <vfs.h> 29 #include <vm.h> 30 #include <vm_address_space.h> 31 #include <vm_priv.h> 32 #include <vm_page.h> 33 #include <vm_cache.h> 34 35 #include "VMAnonymousCache.h" 36 #include "IORequest.h" 37 #include "PageCacheLocker.h" 38 39 40 //#define TRACE_VM_PAGE 41 #ifdef TRACE_VM_PAGE 42 # define TRACE(x) dprintf x 43 #else 44 # define TRACE(x) ; 45 #endif 46 47 #define SCRUB_SIZE 16 48 // this many pages will be cleared at once in the page scrubber thread 49 50 #define MAX_PAGE_WRITER_IO_PRIORITY B_URGENT_DISPLAY_PRIORITY 51 // maximum I/O priority of the page writer 52 #define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD 10000 53 // the maximum I/O priority shall be reached when this many pages need to 54 // be written 55 56 57 typedef struct page_queue { 58 vm_page *head; 59 vm_page *tail; 60 uint32 count; 61 } page_queue; 62 63 int32 gMappedPagesCount; 64 65 static page_queue sFreePageQueue; 66 static page_queue sClearPageQueue; 67 static page_queue sModifiedPageQueue; 68 static page_queue sInactivePageQueue; 69 static page_queue sActivePageQueue; 70 71 static vm_page *sPages; 72 static addr_t sPhysicalPageOffset; 73 static size_t sNumPages; 74 static size_t sReservedPages; 75 static vint32 sPageDeficit; 76 static size_t sModifiedTemporaryPages; 77 78 static ConditionVariable sFreePageCondition; 79 static spinlock sPageLock; 80 81 static sem_id sWriterWaitSem; 82 83 84 #if PAGE_ALLOCATION_TRACING 85 86 namespace PageAllocationTracing { 87 88 class ReservePages : public AbstractTraceEntry { 89 public: 90 ReservePages(uint32 count) 91 : 92 fCount(count) 93 { 94 Initialized(); 95 } 96 97 virtual void AddDump(TraceOutput& out) 98 { 99 out.Print("page reserve: %lu", fCount); 100 } 101 102 private: 103 uint32 fCount; 104 }; 105 106 107 class UnreservePages : public AbstractTraceEntry { 108 public: 109 UnreservePages(uint32 count) 110 : 111 fCount(count) 112 { 113 Initialized(); 114 } 115 116 virtual void AddDump(TraceOutput& out) 117 { 118 out.Print("page unreserve: %lu", fCount); 119 } 120 121 private: 122 uint32 fCount; 123 }; 124 125 126 class AllocatePage : public AbstractTraceEntry { 127 public: 128 AllocatePage(bool reserved) 129 : 130 fReserved(reserved) 131 { 132 Initialized(); 133 } 134 135 virtual void AddDump(TraceOutput& out) 136 { 137 out.Print("page alloc"); 138 if (fReserved) 139 out.Print(" reserved"); 140 } 141 142 private: 143 bool fReserved; 144 }; 145 146 147 class AllocatePageRun : public AbstractTraceEntry { 148 public: 149 AllocatePageRun(uint32 length) 150 : 151 fLength(length) 152 { 153 Initialized(); 154 } 155 156 virtual void AddDump(TraceOutput& out) 157 { 158 out.Print("page alloc run: length: %ld", fLength); 159 } 160 161 private: 162 uint32 fLength; 163 }; 164 165 166 class FreePage : public AbstractTraceEntry { 167 public: 168 FreePage() 169 { 170 Initialized(); 171 } 172 173 virtual void AddDump(TraceOutput& out) 174 { 175 out.Print("page free"); 176 } 177 }; 178 179 180 class ScrubbingPages : public AbstractTraceEntry { 181 public: 182 ScrubbingPages(uint32 count) 183 : 184 fCount(count) 185 { 186 Initialized(); 187 } 188 189 virtual void AddDump(TraceOutput& out) 190 { 191 out.Print("page scrubbing: %lu", fCount); 192 } 193 194 private: 195 uint32 fCount; 196 }; 197 198 199 class ScrubbedPages : public AbstractTraceEntry { 200 public: 201 ScrubbedPages(uint32 count) 202 : 203 fCount(count) 204 { 205 Initialized(); 206 } 207 208 virtual void AddDump(TraceOutput& out) 209 { 210 out.Print("page scrubbed: %lu", fCount); 211 } 212 213 private: 214 uint32 fCount; 215 }; 216 217 218 class StolenPage : public AbstractTraceEntry { 219 public: 220 StolenPage() 221 { 222 Initialized(); 223 } 224 225 virtual void AddDump(TraceOutput& out) 226 { 227 out.Print("page stolen"); 228 } 229 }; 230 231 } // namespace PageAllocationTracing 232 233 # define T(x) new(std::nothrow) PageAllocationTracing::x 234 235 #else 236 # define T(x) 237 #endif // PAGE_ALLOCATION_TRACING 238 239 240 #if PAGE_WRITER_TRACING 241 242 namespace PageWriterTracing { 243 244 class WritePage : public AbstractTraceEntry { 245 public: 246 WritePage(vm_page* page) 247 : 248 fCache(page->cache), 249 fPage(page) 250 { 251 Initialized(); 252 } 253 254 virtual void AddDump(TraceOutput& out) 255 { 256 out.Print("page write: %p, cache: %p", fPage, fCache); 257 } 258 259 private: 260 VMCache* fCache; 261 vm_page* fPage; 262 }; 263 264 } // namespace PageWriterTracing 265 266 # define TPW(x) new(std::nothrow) PageWriterTracing::x 267 268 #else 269 # define TPW(x) 270 #endif // PAGE_WRITER_TRACING 271 272 273 /*! Dequeues a page from the head of the given queue */ 274 static vm_page * 275 dequeue_page(page_queue *queue) 276 { 277 vm_page *page; 278 279 page = queue->head; 280 if (page != NULL) { 281 if (queue->tail == page) 282 queue->tail = NULL; 283 if (page->queue_next != NULL) 284 page->queue_next->queue_prev = NULL; 285 286 queue->head = page->queue_next; 287 if (page->type != PAGE_TYPE_DUMMY) 288 queue->count--; 289 290 #if DEBUG_PAGE_QUEUE 291 if (page->queue != queue) { 292 panic("dequeue_page(queue: %p): page %p thinks it is in queue " 293 "%p", queue, page, page->queue); 294 } 295 296 page->queue = NULL; 297 #endif // DEBUG_PAGE_QUEUE 298 } 299 300 return page; 301 } 302 303 304 /*! Enqueues a page to the tail of the given queue */ 305 static void 306 enqueue_page(page_queue *queue, vm_page *page) 307 { 308 #if DEBUG_PAGE_QUEUE 309 if (page->queue != NULL) { 310 panic("enqueue_page(queue: %p, page: %p): page thinks it is " 311 "already in queue %p", queue, page, page->queue); 312 } 313 #endif // DEBUG_PAGE_QUEUE 314 315 if (queue->tail != NULL) 316 queue->tail->queue_next = page; 317 page->queue_prev = queue->tail; 318 queue->tail = page; 319 page->queue_next = NULL; 320 if (queue->head == NULL) 321 queue->head = page; 322 if (page->type != PAGE_TYPE_DUMMY) 323 queue->count++; 324 325 #if DEBUG_PAGE_QUEUE 326 page->queue = queue; 327 #endif 328 } 329 330 331 /*! Enqueues a page to the head of the given queue */ 332 static void 333 enqueue_page_to_head(page_queue *queue, vm_page *page) 334 { 335 #if DEBUG_PAGE_QUEUE 336 if (page->queue != NULL) { 337 panic("enqueue_page_to_head(queue: %p, page: %p): page thinks it is " 338 "already in queue %p", queue, page, page->queue); 339 } 340 #endif // DEBUG_PAGE_QUEUE 341 342 if (queue->head != NULL) 343 queue->head->queue_prev = page; 344 page->queue_next = queue->head; 345 queue->head = page; 346 page->queue_prev = NULL; 347 if (queue->tail == NULL) 348 queue->tail = page; 349 if (page->type != PAGE_TYPE_DUMMY) 350 queue->count++; 351 352 #if DEBUG_PAGE_QUEUE 353 page->queue = queue; 354 #endif 355 } 356 357 358 static void 359 remove_page_from_queue(page_queue *queue, vm_page *page) 360 { 361 #if DEBUG_PAGE_QUEUE 362 if (page->queue != queue) { 363 panic("remove_page_from_queue(queue: %p, page: %p): page thinks it " 364 "is in queue %p", queue, page, page->queue); 365 } 366 #endif // DEBUG_PAGE_QUEUE 367 368 if (page->queue_next != NULL) 369 page->queue_next->queue_prev = page->queue_prev; 370 else 371 queue->tail = page->queue_prev; 372 373 if (page->queue_prev != NULL) 374 page->queue_prev->queue_next = page->queue_next; 375 else 376 queue->head = page->queue_next; 377 378 if (page->type != PAGE_TYPE_DUMMY) 379 queue->count--; 380 381 #if DEBUG_PAGE_QUEUE 382 page->queue = NULL; 383 #endif 384 } 385 386 387 /*! Moves a page to the tail of the given queue, but only does so if 388 the page is currently in another queue. 389 */ 390 static void 391 move_page_to_queue(page_queue *fromQueue, page_queue *toQueue, vm_page *page) 392 { 393 if (fromQueue != toQueue) { 394 remove_page_from_queue(fromQueue, page); 395 enqueue_page(toQueue, page); 396 } 397 } 398 399 400 /*! Inserts \a page after the \a before page in the \a queue. */ 401 static void 402 insert_page_after(page_queue *queue, vm_page *before, vm_page *page) 403 { 404 #if DEBUG_PAGE_QUEUE 405 if (page->queue != NULL) { 406 panic("enqueue_page(queue: %p, page: %p): page thinks it is " 407 "already in queue %p", queue, page, page->queue); 408 } 409 #endif // DEBUG_PAGE_QUEUE 410 411 if (before == NULL) { 412 enqueue_page(queue, page); 413 return; 414 } 415 416 page->queue_next = before->queue_next; 417 if (page->queue_next != NULL) 418 page->queue_next->queue_prev = page; 419 page->queue_prev = before; 420 before->queue_next = page; 421 422 if (queue->tail == before) 423 queue->tail = page; 424 425 if (page->type != PAGE_TYPE_DUMMY) 426 queue->count++; 427 428 #if DEBUG_PAGE_QUEUE 429 page->queue = queue; 430 #endif 431 } 432 433 434 static int 435 find_page(int argc, char **argv) 436 { 437 struct vm_page *page; 438 addr_t address; 439 int32 index = 1; 440 int i; 441 442 struct { 443 const char* name; 444 page_queue* queue; 445 } pageQueueInfos[] = { 446 { "free", &sFreePageQueue }, 447 { "clear", &sClearPageQueue }, 448 { "modified", &sModifiedPageQueue }, 449 { "active", &sActivePageQueue }, 450 { NULL, NULL } 451 }; 452 453 if (argc < 2 454 || strlen(argv[index]) <= 2 455 || argv[index][0] != '0' 456 || argv[index][1] != 'x') { 457 kprintf("usage: find_page <address>\n"); 458 return 0; 459 } 460 461 address = strtoul(argv[index], NULL, 0); 462 page = (vm_page*)address; 463 464 for (i = 0; pageQueueInfos[i].name; i++) { 465 vm_page* p = pageQueueInfos[i].queue->head; 466 while (p) { 467 if (p == page) { 468 kprintf("found page %p in queue %p (%s)\n", page, 469 pageQueueInfos[i].queue, pageQueueInfos[i].name); 470 return 0; 471 } 472 p = p->queue_next; 473 } 474 } 475 476 kprintf("page %p isn't in any queue\n", page); 477 478 return 0; 479 } 480 481 482 const char * 483 page_state_to_string(int state) 484 { 485 switch(state) { 486 case PAGE_STATE_ACTIVE: 487 return "active"; 488 case PAGE_STATE_INACTIVE: 489 return "inactive"; 490 case PAGE_STATE_BUSY: 491 return "busy"; 492 case PAGE_STATE_MODIFIED: 493 return "modified"; 494 case PAGE_STATE_FREE: 495 return "free"; 496 case PAGE_STATE_CLEAR: 497 return "clear"; 498 case PAGE_STATE_WIRED: 499 return "wired"; 500 case PAGE_STATE_UNUSED: 501 return "unused"; 502 default: 503 return "unknown"; 504 } 505 } 506 507 508 static int 509 dump_page(int argc, char **argv) 510 { 511 struct vm_page *page; 512 addr_t address; 513 bool physical = false; 514 int32 index = 1; 515 516 if (argc > 2) { 517 if (!strcmp(argv[1], "-p")) { 518 physical = true; 519 index++; 520 } else if (!strcmp(argv[1], "-v")) 521 index++; 522 } 523 524 if (argc < 2 525 || strlen(argv[index]) <= 2 526 || argv[index][0] != '0' 527 || argv[index][1] != 'x') { 528 kprintf("usage: page [-p|-v] <address>\n" 529 " -v looks up a virtual address for the page, -p a physical address.\n" 530 " Default is to look for the page structure address directly.\n"); 531 return 0; 532 } 533 534 address = strtoul(argv[index], NULL, 0); 535 536 if (index == 2) { 537 if (!physical) { 538 vm_address_space *addressSpace = vm_kernel_address_space(); 539 uint32 flags; 540 541 if (thread_get_current_thread()->team->address_space != NULL) 542 addressSpace = thread_get_current_thread()->team->address_space; 543 544 addressSpace->translation_map.ops->query_interrupt( 545 &addressSpace->translation_map, address, &address, &flags); 546 } 547 page = vm_lookup_page(address / B_PAGE_SIZE); 548 } else 549 page = (struct vm_page *)address; 550 551 kprintf("PAGE: %p\n", page); 552 kprintf("queue_next,prev: %p, %p\n", page->queue_next, page->queue_prev); 553 kprintf("physical_number: %lx\n", page->physical_page_number); 554 kprintf("cache: %p\n", page->cache); 555 kprintf("cache_offset: %ld\n", page->cache_offset); 556 kprintf("cache_next: %p\n", page->cache_next); 557 kprintf("type: %d\n", page->type); 558 kprintf("state: %s\n", page_state_to_string(page->state)); 559 kprintf("wired_count: %d\n", page->wired_count); 560 kprintf("usage_count: %d\n", page->usage_count); 561 kprintf("busy_writing: %d\n", page->busy_writing); 562 #if DEBUG_PAGE_QUEUE 563 kprintf("queue: %p\n", page->queue); 564 #endif 565 #if DEBUG_PAGE_CACHE_TRANSITIONS 566 kprintf("debug_flags: 0x%lx\n", page->debug_flags); 567 kprintf("collided page: %p\n", page->collided_page); 568 #endif // DEBUG_PAGE_CACHE_TRANSITIONS 569 kprintf("area mappings:\n"); 570 571 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 572 vm_page_mapping *mapping; 573 while ((mapping = iterator.Next()) != NULL) { 574 kprintf(" %p (%#lx)\n", mapping->area, mapping->area->id); 575 mapping = mapping->page_link.next; 576 } 577 578 return 0; 579 } 580 581 582 static int 583 dump_page_queue(int argc, char **argv) 584 { 585 struct page_queue *queue; 586 587 if (argc < 2) { 588 kprintf("usage: page_queue <address/name> [list]\n"); 589 return 0; 590 } 591 592 if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x') 593 queue = (struct page_queue *)strtoul(argv[1], NULL, 16); 594 if (!strcmp(argv[1], "free")) 595 queue = &sFreePageQueue; 596 else if (!strcmp(argv[1], "clear")) 597 queue = &sClearPageQueue; 598 else if (!strcmp(argv[1], "modified")) 599 queue = &sModifiedPageQueue; 600 else if (!strcmp(argv[1], "active")) 601 queue = &sActivePageQueue; 602 else if (!strcmp(argv[1], "inactive")) 603 queue = &sInactivePageQueue; 604 else { 605 kprintf("page_queue: unknown queue \"%s\".\n", argv[1]); 606 return 0; 607 } 608 609 kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %ld\n", 610 queue, queue->head, queue->tail, queue->count); 611 612 if (argc == 3) { 613 struct vm_page *page = queue->head; 614 const char *type = "none"; 615 int i; 616 617 if (page->cache != NULL) { 618 switch (page->cache->type) { 619 case CACHE_TYPE_RAM: 620 type = "RAM"; 621 break; 622 case CACHE_TYPE_DEVICE: 623 type = "device"; 624 break; 625 case CACHE_TYPE_VNODE: 626 type = "vnode"; 627 break; 628 case CACHE_TYPE_NULL: 629 type = "null"; 630 break; 631 default: 632 type = "???"; 633 break; 634 } 635 } 636 637 kprintf("page cache type state wired usage\n"); 638 for (i = 0; page; i++, page = page->queue_next) { 639 kprintf("%p %p %-7s %8s %5d %5d\n", page, page->cache, 640 type, page_state_to_string(page->state), 641 page->wired_count, page->usage_count); 642 } 643 } 644 return 0; 645 } 646 647 648 static int 649 dump_page_stats(int argc, char **argv) 650 { 651 page_num_t swappableModified = 0; 652 page_num_t swappableModifiedInactive = 0; 653 uint32 counter[8]; 654 addr_t i; 655 656 memset(counter, 0, sizeof(counter)); 657 658 for (i = 0; i < sNumPages; i++) { 659 if (sPages[i].state > 7) 660 panic("page %li at %p has invalid state!\n", i, &sPages[i]); 661 662 counter[sPages[i].state]++; 663 664 if (sPages[i].state == PAGE_STATE_MODIFIED && sPages[i].cache != NULL 665 && sPages[i].cache->temporary && sPages[i].wired_count == 0) { 666 swappableModified++; 667 if (sPages[i].usage_count < 0) 668 swappableModifiedInactive++; 669 } 670 } 671 672 kprintf("page stats:\n"); 673 kprintf("total: %lu\n", sNumPages); 674 kprintf("active: %lu\ninactive: %lu\nbusy: %lu\nunused: %lu\n", 675 counter[PAGE_STATE_ACTIVE], counter[PAGE_STATE_INACTIVE], 676 counter[PAGE_STATE_BUSY], counter[PAGE_STATE_UNUSED]); 677 kprintf("wired: %lu\nmodified: %lu\nfree: %lu\nclear: %lu\n", 678 counter[PAGE_STATE_WIRED], counter[PAGE_STATE_MODIFIED], 679 counter[PAGE_STATE_FREE], counter[PAGE_STATE_CLEAR]); 680 kprintf("reserved pages: %lu\n", sReservedPages); 681 kprintf("page deficit: %lu\n", sPageDeficit); 682 kprintf("mapped pages: %lu\n", gMappedPagesCount); 683 684 kprintf("\nfree queue: %p, count = %ld\n", &sFreePageQueue, 685 sFreePageQueue.count); 686 kprintf("clear queue: %p, count = %ld\n", &sClearPageQueue, 687 sClearPageQueue.count); 688 kprintf("modified queue: %p, count = %ld (%ld temporary, %lu swappable, " 689 "inactive: %lu)\n", &sModifiedPageQueue, sModifiedPageQueue.count, 690 sModifiedTemporaryPages, swappableModified, swappableModifiedInactive); 691 kprintf("active queue: %p, count = %ld\n", &sActivePageQueue, 692 sActivePageQueue.count); 693 kprintf("inactive queue: %p, count = %ld\n", &sInactivePageQueue, 694 sInactivePageQueue.count); 695 return 0; 696 } 697 698 699 static inline size_t 700 free_page_queue_count(void) 701 { 702 return sFreePageQueue.count + sClearPageQueue.count; 703 } 704 705 706 static status_t 707 set_page_state_nolock(vm_page *page, int pageState) 708 { 709 if (pageState == page->state) 710 return B_OK; 711 712 page_queue *fromQueue = NULL; 713 page_queue *toQueue = NULL; 714 715 switch (page->state) { 716 case PAGE_STATE_BUSY: 717 case PAGE_STATE_ACTIVE: 718 case PAGE_STATE_WIRED: 719 case PAGE_STATE_UNUSED: 720 fromQueue = &sActivePageQueue; 721 break; 722 case PAGE_STATE_INACTIVE: 723 fromQueue = &sInactivePageQueue; 724 break; 725 case PAGE_STATE_MODIFIED: 726 fromQueue = &sModifiedPageQueue; 727 break; 728 case PAGE_STATE_FREE: 729 fromQueue = &sFreePageQueue; 730 break; 731 case PAGE_STATE_CLEAR: 732 fromQueue = &sClearPageQueue; 733 break; 734 default: 735 panic("vm_page_set_state: vm_page %p in invalid state %d\n", 736 page, page->state); 737 break; 738 } 739 740 if (page->state == PAGE_STATE_CLEAR || page->state == PAGE_STATE_FREE) { 741 if (page->cache != NULL) 742 panic("free page %p has cache", page); 743 } 744 745 switch (pageState) { 746 case PAGE_STATE_BUSY: 747 case PAGE_STATE_ACTIVE: 748 case PAGE_STATE_WIRED: 749 case PAGE_STATE_UNUSED: 750 toQueue = &sActivePageQueue; 751 break; 752 case PAGE_STATE_INACTIVE: 753 toQueue = &sInactivePageQueue; 754 break; 755 case PAGE_STATE_MODIFIED: 756 toQueue = &sModifiedPageQueue; 757 break; 758 case PAGE_STATE_FREE: 759 toQueue = &sFreePageQueue; 760 break; 761 case PAGE_STATE_CLEAR: 762 toQueue = &sClearPageQueue; 763 break; 764 default: 765 panic("vm_page_set_state: invalid target state %d\n", pageState); 766 } 767 768 if (pageState == PAGE_STATE_CLEAR || pageState == PAGE_STATE_FREE 769 || pageState == PAGE_STATE_INACTIVE) { 770 if (sPageDeficit > 0) 771 sFreePageCondition.NotifyOne(); 772 773 if (pageState != PAGE_STATE_INACTIVE && page->cache != NULL) 774 panic("to be freed page %p has cache", page); 775 } 776 if (page->cache != NULL && page->cache->temporary) { 777 if (pageState == PAGE_STATE_MODIFIED) 778 sModifiedTemporaryPages++; 779 else if (page->state == PAGE_STATE_MODIFIED) 780 sModifiedTemporaryPages--; 781 } 782 783 #ifdef PAGE_ALLOCATION_TRACING 784 if ((pageState == PAGE_STATE_CLEAR || pageState == PAGE_STATE_FREE) 785 && page->state != PAGE_STATE_CLEAR && page->state != PAGE_STATE_FREE) { 786 T(FreePage()); 787 } 788 #endif // PAGE_ALLOCATION_TRACING 789 790 page->state = pageState; 791 move_page_to_queue(fromQueue, toQueue, page); 792 793 return B_OK; 794 } 795 796 797 /*! Moves a modified page into either the active or inactive page queue 798 depending on its usage count and wiring. 799 */ 800 static void 801 move_page_to_active_or_inactive_queue(vm_page *page, bool dequeued) 802 { 803 // Note, this logic must be in sync with what the page daemon does 804 int32 state; 805 if (!page->mappings.IsEmpty() || page->usage_count >= 0 806 || page->wired_count) 807 state = PAGE_STATE_ACTIVE; 808 else 809 state = PAGE_STATE_INACTIVE; 810 811 if (dequeued) { 812 page->state = state; 813 enqueue_page(state == PAGE_STATE_ACTIVE 814 ? &sActivePageQueue : &sInactivePageQueue, page); 815 if (page->cache->temporary) 816 sModifiedTemporaryPages--; 817 } else 818 set_page_state_nolock(page, state); 819 } 820 821 822 static void 823 clear_page(struct vm_page *page) 824 { 825 vm_memset_physical(page->physical_page_number << PAGE_SHIFT, 0, 826 B_PAGE_SIZE); 827 } 828 829 830 /*! 831 This is a background thread that wakes up every now and then (every 100ms) 832 and moves some pages from the free queue over to the clear queue. 833 Given enough time, it will clear out all pages from the free queue - we 834 could probably slow it down after having reached a certain threshold. 835 */ 836 static int32 837 page_scrubber(void *unused) 838 { 839 (void)(unused); 840 841 TRACE(("page_scrubber starting...\n")); 842 843 for (;;) { 844 snooze(100000); // 100ms 845 846 if (sFreePageQueue.count > 0) { 847 vm_page *page[SCRUB_SIZE]; 848 int32 i, scrubCount; 849 850 // get some pages from the free queue 851 852 InterruptsSpinLocker locker(sPageLock); 853 854 // Since we temporarily remove pages from the free pages reserve, 855 // we must make sure we don't cause a violation of the page 856 // reservation warranty. The following is usually stricter than 857 // necessary, because we don't have information on how many of the 858 // reserved pages have already been allocated. 859 scrubCount = SCRUB_SIZE; 860 uint32 freeCount = free_page_queue_count(); 861 if (freeCount < sReservedPages) 862 scrubCount = 0; 863 else if ((uint32)scrubCount > freeCount - sReservedPages) 864 scrubCount = freeCount - sReservedPages; 865 866 for (i = 0; i < scrubCount; i++) { 867 page[i] = dequeue_page(&sFreePageQueue); 868 if (page[i] == NULL) 869 break; 870 page[i]->state = PAGE_STATE_BUSY; 871 } 872 873 scrubCount = i; 874 875 if (scrubCount > 0) { 876 T(ScrubbingPages(scrubCount)); 877 } 878 879 locker.Unlock(); 880 881 // clear them 882 883 for (i = 0; i < scrubCount; i++) { 884 clear_page(page[i]); 885 } 886 887 locker.Lock(); 888 889 // and put them into the clear queue 890 891 for (i = 0; i < scrubCount; i++) { 892 page[i]->state = PAGE_STATE_CLEAR; 893 enqueue_page(&sClearPageQueue, page[i]); 894 } 895 896 if (scrubCount > 0) { 897 T(ScrubbedPages(scrubCount)); 898 } 899 } 900 } 901 902 return 0; 903 } 904 905 906 static status_t 907 write_page(vm_page *page, uint32 flags, AsyncIOCallback* callback) 908 { 909 TRACE(("write_page(page = %p): offset = %Ld\n", page, (off_t)page->cache_offset << PAGE_SHIFT)); 910 911 off_t offset = (off_t)page->cache_offset << PAGE_SHIFT; 912 913 iovec vecs[1]; 914 vecs->iov_base = (void*)(addr_t)(page->physical_page_number * B_PAGE_SIZE); 915 vecs->iov_len = B_PAGE_SIZE; 916 917 if (callback != NULL) { 918 // asynchronous I/O 919 return page->cache->WriteAsync(offset, vecs, 1, B_PAGE_SIZE, 920 flags | B_PHYSICAL_IO_REQUEST, callback); 921 } 922 923 // synchronous I/0 924 size_t length = B_PAGE_SIZE; 925 status_t status = page->cache->Write(offset, vecs, 1, 926 flags | B_PHYSICAL_IO_REQUEST, &length); 927 928 if (status == B_OK && length == 0) 929 status = B_ERROR; 930 931 return status; 932 } 933 934 935 static inline bool 936 is_marker_page(struct vm_page *page) 937 { 938 return page->type == PAGE_TYPE_DUMMY; 939 } 940 941 942 static void 943 remove_page_marker(struct vm_page &marker) 944 { 945 if (marker.state == PAGE_STATE_UNUSED) 946 return; 947 948 page_queue *queue; 949 950 switch (marker.state) { 951 case PAGE_STATE_ACTIVE: 952 queue = &sActivePageQueue; 953 break; 954 case PAGE_STATE_INACTIVE: 955 queue = &sInactivePageQueue; 956 break; 957 case PAGE_STATE_MODIFIED: 958 queue = &sModifiedPageQueue; 959 break; 960 961 default: 962 return; 963 } 964 965 InterruptsSpinLocker locker(sPageLock); 966 remove_page_from_queue(queue, &marker); 967 968 marker.state = PAGE_STATE_UNUSED; 969 } 970 971 972 static vm_page * 973 next_modified_page(struct vm_page &marker) 974 { 975 InterruptsSpinLocker locker(sPageLock); 976 vm_page *page; 977 978 if (marker.state == PAGE_STATE_MODIFIED) { 979 page = marker.queue_next; 980 remove_page_from_queue(&sModifiedPageQueue, &marker); 981 marker.state = PAGE_STATE_UNUSED; 982 } else 983 page = sModifiedPageQueue.head; 984 985 for (; page != NULL; page = page->queue_next) { 986 if (!is_marker_page(page) && page->state != PAGE_STATE_BUSY) { 987 // insert marker 988 marker.state = PAGE_STATE_MODIFIED; 989 insert_page_after(&sModifiedPageQueue, page, &marker); 990 return page; 991 } 992 } 993 994 return NULL; 995 } 996 997 998 class PageWriterCallback; 999 1000 1001 class PageWriterRun { 1002 public: 1003 status_t Init(uint32 maxPages); 1004 1005 void PrepareNextRun(); 1006 void AddPage(vm_page* page); 1007 void Go(); 1008 1009 void PageWritten(PageWriterCallback* callback, status_t status, 1010 bool partialTransfer, size_t bytesTransferred); 1011 1012 private: 1013 uint32 fMaxPages; 1014 uint32 fPageCount; 1015 vint32 fPendingPages; 1016 PageWriterCallback* fCallbacks; 1017 ConditionVariable fAllFinishedCondition; 1018 }; 1019 1020 1021 class PageWriterCallback : public AsyncIOCallback { 1022 public: 1023 void SetTo(PageWriterRun* run, vm_page* page) 1024 { 1025 fRun = run; 1026 fPage = page; 1027 fCache = page->cache; 1028 fStatus = B_OK; 1029 fBusyCondition.Publish(page, "page"); 1030 } 1031 1032 vm_page* Page() const { return fPage; } 1033 VMCache* Cache() const { return fCache; } 1034 status_t Status() const { return fStatus; } 1035 1036 ConditionVariable& BusyCondition() { return fBusyCondition; } 1037 1038 virtual void IOFinished(status_t status, bool partialTransfer, 1039 size_t bytesTransferred) 1040 { 1041 fStatus = status == B_OK && bytesTransferred == 0 ? B_ERROR : status; 1042 fRun->PageWritten(this, status, partialTransfer, bytesTransferred); 1043 } 1044 1045 private: 1046 PageWriterRun* fRun; 1047 vm_page* fPage; 1048 VMCache* fCache; 1049 status_t fStatus; 1050 ConditionVariable fBusyCondition; 1051 }; 1052 1053 1054 status_t 1055 PageWriterRun::Init(uint32 maxPages) 1056 { 1057 fMaxPages = maxPages; 1058 fPageCount = 0; 1059 fPendingPages = 0; 1060 1061 fCallbacks = new(std::nothrow) PageWriterCallback[maxPages]; 1062 if (fCallbacks == NULL) 1063 return B_NO_MEMORY; 1064 1065 return B_OK; 1066 } 1067 1068 1069 void 1070 PageWriterRun::PrepareNextRun() 1071 { 1072 fPageCount = 0; 1073 fPendingPages = 0; 1074 } 1075 1076 1077 void 1078 PageWriterRun::AddPage(vm_page* page) 1079 { 1080 page->state = PAGE_STATE_BUSY; 1081 page->busy_writing = true; 1082 1083 fCallbacks[fPageCount].SetTo(this, page); 1084 fPageCount++; 1085 } 1086 1087 1088 void 1089 PageWriterRun::Go() 1090 { 1091 fPendingPages = fPageCount; 1092 1093 fAllFinishedCondition.Init(this, "page writer wait for I/O"); 1094 ConditionVariableEntry waitEntry; 1095 fAllFinishedCondition.Add(&waitEntry); 1096 1097 // schedule writes 1098 for (uint32 i = 0; i < fPageCount; i++) { 1099 PageWriterCallback& callback = fCallbacks[i]; 1100 write_page(callback.Page(), B_VIP_IO_REQUEST, &callback); 1101 } 1102 1103 // wait until all pages have been written 1104 waitEntry.Wait(); 1105 1106 // mark pages depending on whether they could be written or not 1107 1108 for (uint32 i = 0; i < fPageCount; i++) { 1109 PageWriterCallback& callback = fCallbacks[i]; 1110 vm_page* page = callback.Page(); 1111 vm_cache* cache = callback.Cache(); 1112 cache->Lock(); 1113 1114 if (callback.Status() == B_OK) { 1115 // put it into the active queue 1116 InterruptsSpinLocker locker(sPageLock); 1117 move_page_to_active_or_inactive_queue(page, true); 1118 page->busy_writing = false; 1119 } else { 1120 // Writing failed, so move the page back to the modified queue. 1121 { 1122 InterruptsSpinLocker locker(sPageLock); 1123 page->state = PAGE_STATE_MODIFIED; 1124 enqueue_page_to_head(&sModifiedPageQueue, page); 1125 // Enqueue to the head, so we don't put it behind the 1126 // page writer's marker again. 1127 } 1128 1129 if (!page->busy_writing) { 1130 // Someone has cleared the busy_writing flag which tells 1131 // us our page has gone invalid. We need to remove it from the 1132 // cache and free it completely. 1133 vm_remove_all_page_mappings(page, NULL); 1134 cache->RemovePage(page); 1135 vm_page_free(cache, page); 1136 } else 1137 page->busy_writing = false; 1138 } 1139 1140 callback.BusyCondition().Unpublish(); 1141 1142 cache->Unlock(); 1143 } 1144 1145 for (uint32 i = 0; i < fPageCount; i++) { 1146 vm_cache* cache = fCallbacks[i].Cache(); 1147 1148 // We release the cache references after all pages were made 1149 // unbusy again - otherwise releasing a vnode could deadlock. 1150 cache->ReleaseStoreRef(); 1151 cache->ReleaseRef(); 1152 } 1153 } 1154 1155 1156 void 1157 PageWriterRun::PageWritten(PageWriterCallback* callback, status_t status, 1158 bool partialTransfer, size_t bytesTransferred) 1159 { 1160 if (atomic_add(&fPendingPages, -1) == 1) 1161 fAllFinishedCondition.NotifyAll(); 1162 } 1163 1164 1165 /*! The page writer continuously takes some pages from the modified 1166 queue, writes them back, and moves them back to the active queue. 1167 It runs in its own thread, and is only there to keep the number 1168 of modified pages low, so that more pages can be reused with 1169 fewer costs. 1170 */ 1171 status_t 1172 page_writer(void* /*unused*/) 1173 { 1174 const uint32 kNumPages = 256; 1175 uint32 writtenPages = 0; 1176 bigtime_t lastWrittenTime = 0; 1177 bigtime_t pageCollectionTime = 0; 1178 bigtime_t pageWritingTime = 0; 1179 1180 PageWriterRun run; 1181 if (run.Init(kNumPages) != B_OK) { 1182 panic("page writer: Failed to init PageWriterRun!"); 1183 return B_ERROR; 1184 } 1185 1186 vm_page marker; 1187 marker.type = PAGE_TYPE_DUMMY; 1188 marker.cache = NULL; 1189 marker.state = PAGE_STATE_UNUSED; 1190 1191 while (true) { 1192 if (sModifiedPageQueue.count - sModifiedTemporaryPages < 1024) { 1193 int32 count = 0; 1194 get_sem_count(sWriterWaitSem, &count); 1195 if (count == 0) 1196 count = 1; 1197 1198 acquire_sem_etc(sWriterWaitSem, count, B_RELATIVE_TIMEOUT, 3000000); 1199 // all 3 seconds when no one triggers us 1200 } 1201 1202 // depending on how urgent it becomes to get pages to disk, we adjust 1203 // our I/O priority 1204 page_num_t modifiedPages = sModifiedPageQueue.count 1205 - sModifiedTemporaryPages; 1206 uint32 lowPagesState = low_resource_state(B_KERNEL_RESOURCE_PAGES); 1207 int32 ioPriority = B_IDLE_PRIORITY; 1208 if (lowPagesState >= B_LOW_RESOURCE_CRITICAL 1209 || modifiedPages > MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD) { 1210 ioPriority = MAX_PAGE_WRITER_IO_PRIORITY; 1211 } else { 1212 ioPriority = (uint64)MAX_PAGE_WRITER_IO_PRIORITY * modifiedPages 1213 / MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD; 1214 } 1215 1216 thread_set_io_priority(ioPriority); 1217 1218 uint32 numPages = 0; 1219 run.PrepareNextRun(); 1220 1221 // TODO: make this laptop friendly, too (ie. only start doing 1222 // something if someone else did something or there is really 1223 // enough to do). 1224 1225 // collect pages to be written 1226 #if ENABLE_SWAP_SUPPORT 1227 bool lowOnPages = lowPagesState != B_NO_LOW_RESOURCE; 1228 #endif 1229 1230 pageCollectionTime -= system_time(); 1231 1232 while (numPages < kNumPages) { 1233 vm_page *page = next_modified_page(marker); 1234 if (page == NULL) 1235 break; 1236 1237 PageCacheLocker cacheLocker(page, false); 1238 if (!cacheLocker.IsLocked()) 1239 continue; 1240 1241 vm_cache *cache = page->cache; 1242 1243 // Don't write back wired (locked) pages and don't write RAM pages 1244 // until we're low on pages. Also avoid writing temporary pages that 1245 // are active. 1246 if (page->wired_count > 0 1247 || (cache->temporary 1248 #if ENABLE_SWAP_SUPPORT 1249 && (!lowOnPages /*|| page->usage_count > 0*/ 1250 || !cache->CanWritePage( 1251 (off_t)page->cache_offset << PAGE_SHIFT)) 1252 #endif 1253 )) { 1254 continue; 1255 } 1256 1257 // we need our own reference to the store, as it might 1258 // currently be destructed 1259 if (cache->AcquireUnreferencedStoreRef() != B_OK) { 1260 cacheLocker.Unlock(); 1261 thread_yield(true); 1262 continue; 1263 } 1264 1265 InterruptsSpinLocker locker(sPageLock); 1266 1267 // state might have change while we were locking the cache 1268 if (page->state != PAGE_STATE_MODIFIED) { 1269 // release the cache reference 1270 locker.Unlock(); 1271 cache->ReleaseStoreRef(); 1272 continue; 1273 } 1274 1275 remove_page_from_queue(&sModifiedPageQueue, page); 1276 1277 run.AddPage(page); 1278 1279 locker.Unlock(); 1280 1281 //dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count); 1282 TPW(WritePage(page)); 1283 1284 vm_clear_map_flags(page, PAGE_MODIFIED); 1285 cache->AcquireRefLocked(); 1286 numPages++; 1287 } 1288 1289 pageCollectionTime += system_time(); 1290 1291 if (numPages == 0) 1292 continue; 1293 1294 // write pages to disk and do all the cleanup 1295 pageWritingTime -= system_time(); 1296 run.Go(); 1297 pageWritingTime += system_time(); 1298 1299 // debug output only... 1300 writtenPages += numPages; 1301 if (writtenPages >= 1024) { 1302 bigtime_t now = system_time(); 1303 TRACE(("page writer: wrote 1024 pages (total: %llu ms, " 1304 "collect: %llu ms, write: %llu ms)\n", 1305 (now - lastWrittenTime) / 1000, 1306 pageCollectionTime / 1000, pageWritingTime / 1000)); 1307 writtenPages -= 1024; 1308 lastWrittenTime = now; 1309 pageCollectionTime = 0; 1310 pageWritingTime = 0; 1311 } 1312 } 1313 1314 remove_page_marker(marker); 1315 return B_OK; 1316 } 1317 1318 1319 static vm_page * 1320 find_page_candidate(struct vm_page &marker, bool stealActive) 1321 { 1322 InterruptsSpinLocker locker(sPageLock); 1323 page_queue *queue; 1324 vm_page *page; 1325 1326 if (marker.state == PAGE_STATE_UNUSED) { 1327 // Get the first free pages of the (in)active queue 1328 queue = &sInactivePageQueue; 1329 page = sInactivePageQueue.head; 1330 if (page == NULL && stealActive) { 1331 queue = &sActivePageQueue; 1332 page = sActivePageQueue.head; 1333 } 1334 } else { 1335 // Get the next page of the current queue 1336 if (marker.state == PAGE_STATE_INACTIVE) 1337 queue = &sInactivePageQueue; 1338 else if (marker.state == PAGE_STATE_ACTIVE) 1339 queue = &sActivePageQueue; 1340 else { 1341 panic("invalid marker %p state", &marker); 1342 queue = NULL; 1343 } 1344 1345 page = marker.queue_next; 1346 remove_page_from_queue(queue, &marker); 1347 marker.state = PAGE_STATE_UNUSED; 1348 } 1349 1350 while (page != NULL) { 1351 if (!is_marker_page(page) 1352 && (page->state == PAGE_STATE_INACTIVE 1353 || (stealActive && page->state == PAGE_STATE_ACTIVE 1354 && page->wired_count == 0))) { 1355 // we found a candidate, insert marker 1356 marker.state = queue == &sActivePageQueue 1357 ? PAGE_STATE_ACTIVE : PAGE_STATE_INACTIVE; 1358 insert_page_after(queue, page, &marker); 1359 return page; 1360 } 1361 1362 page = page->queue_next; 1363 if (page == NULL && stealActive && queue != &sActivePageQueue) { 1364 queue = &sActivePageQueue; 1365 page = sActivePageQueue.head; 1366 } 1367 } 1368 1369 return NULL; 1370 } 1371 1372 1373 static bool 1374 steal_page(vm_page *page, bool stealActive) 1375 { 1376 // try to lock the page's cache 1377 if (vm_cache_acquire_locked_page_cache(page, false) == NULL) 1378 return false; 1379 1380 AutoLocker<VMCache> cacheLocker(page->cache, true, false); 1381 MethodDeleter<VMCache> _2(page->cache, &VMCache::ReleaseRefLocked); 1382 1383 // check again if that page is still a candidate 1384 if (page->state != PAGE_STATE_INACTIVE 1385 && (!stealActive || page->state != PAGE_STATE_ACTIVE 1386 || page->wired_count != 0)) 1387 return false; 1388 1389 // recheck eventual last minute changes 1390 uint32 flags; 1391 vm_remove_all_page_mappings(page, &flags); 1392 if ((flags & PAGE_MODIFIED) != 0) { 1393 // page was modified, don't steal it 1394 vm_page_set_state(page, PAGE_STATE_MODIFIED); 1395 return false; 1396 } else if ((flags & PAGE_ACCESSED) != 0) { 1397 // page is in active use, don't steal it 1398 vm_page_set_state(page, PAGE_STATE_ACTIVE); 1399 return false; 1400 } 1401 1402 // we can now steal this page 1403 1404 //dprintf(" steal page %p from cache %p%s\n", page, page->cache, 1405 // page->state == PAGE_STATE_INACTIVE ? "" : " (ACTIVE)"); 1406 1407 page->cache->RemovePage(page); 1408 1409 InterruptsSpinLocker _(sPageLock); 1410 remove_page_from_queue(page->state == PAGE_STATE_ACTIVE 1411 ? &sActivePageQueue : &sInactivePageQueue, page); 1412 return true; 1413 } 1414 1415 1416 static size_t 1417 steal_pages(vm_page **pages, size_t count, bool reserve) 1418 { 1419 size_t maxCount = count; 1420 1421 while (true) { 1422 vm_page marker; 1423 marker.type = PAGE_TYPE_DUMMY; 1424 marker.cache = NULL; 1425 marker.state = PAGE_STATE_UNUSED; 1426 1427 bool tried = false; 1428 size_t stolen = 0; 1429 1430 while (count > 0) { 1431 vm_page *page = find_page_candidate(marker, false); 1432 if (page == NULL) 1433 break; 1434 1435 if (steal_page(page, false)) { 1436 if (reserve || stolen >= maxCount) { 1437 InterruptsSpinLocker _(sPageLock); 1438 enqueue_page(&sFreePageQueue, page); 1439 page->state = PAGE_STATE_FREE; 1440 1441 T(StolenPage()); 1442 } else if (stolen < maxCount) { 1443 pages[stolen] = page; 1444 } 1445 stolen++; 1446 count--; 1447 } else 1448 tried = true; 1449 } 1450 1451 remove_page_marker(marker); 1452 1453 InterruptsSpinLocker locker(sPageLock); 1454 1455 if ((reserve && sReservedPages <= free_page_queue_count()) 1456 || count == 0 1457 || ((!reserve && (sInactivePageQueue.count > 0)) 1458 || free_page_queue_count() > sReservedPages)) 1459 return stolen; 1460 1461 if (stolen && !tried && sInactivePageQueue.count > 0) { 1462 count++; 1463 continue; 1464 } 1465 1466 // we need to wait for pages to become inactive 1467 1468 ConditionVariableEntry freeConditionEntry; 1469 sPageDeficit++; 1470 freeConditionEntry.Add(&sFreePageQueue); 1471 locker.Unlock(); 1472 1473 if (tried) { 1474 // We tried all potential pages, but one or more couldn't be stolen 1475 // at that time (likely because their cache was locked). No one 1476 // else will have any better luck, so we'll just retry a little 1477 // later. 1478 freeConditionEntry.Wait(B_RELATIVE_TIMEOUT, 10000); 1479 } else { 1480 low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0); 1481 //snooze(50000); 1482 // sleep for 50ms 1483 1484 freeConditionEntry.Wait(); 1485 } 1486 1487 locker.Lock(); 1488 sPageDeficit--; 1489 1490 if (reserve && sReservedPages <= free_page_queue_count()) 1491 return stolen; 1492 } 1493 } 1494 1495 1496 // #pragma mark - private kernel API 1497 1498 1499 /*! Writes a range of modified pages of a cache to disk. 1500 You need to hold the vm_cache lock when calling this function. 1501 Note that the cache lock is released in this function. 1502 \param cache The cache. 1503 \param firstPage Offset (in page size units) of the first page in the range. 1504 \param endPage End offset (in page size units) of the page range. The page 1505 at this offset is not included. 1506 */ 1507 status_t 1508 vm_page_write_modified_page_range(struct VMCache *cache, uint32 firstPage, 1509 uint32 endPage) 1510 { 1511 // TODO: join adjacent pages into one vec list 1512 1513 for (VMCachePagesTree::Iterator it 1514 = cache->pages.GetIterator(firstPage, true, true); 1515 vm_page *page = it.Next();) { 1516 bool dequeuedPage = false; 1517 1518 if (page->cache_offset >= endPage) 1519 break; 1520 1521 if (page->state == PAGE_STATE_MODIFIED) { 1522 InterruptsSpinLocker locker(&sPageLock); 1523 remove_page_from_queue(&sModifiedPageQueue, page); 1524 dequeuedPage = true; 1525 } else if (page->state == PAGE_STATE_BUSY 1526 || !vm_test_map_modification(page)) { 1527 continue; 1528 } 1529 1530 int oldPageState = page->state; 1531 page->state = PAGE_STATE_BUSY; 1532 page->busy_writing = true; 1533 1534 ConditionVariable busyCondition; 1535 busyCondition.Publish(page, "page"); 1536 1537 // We have a modified page - however, while we're writing it back, 1538 // the page is still mapped. In order not to lose any changes to the 1539 // page, we mark it clean before actually writing it back; if writing 1540 // the page fails for some reason, we just keep it in the modified page 1541 // list, but that should happen only rarely. 1542 1543 // If the page is changed after we cleared the dirty flag, but before we 1544 // had the chance to write it back, then we'll write it again later - 1545 // that will probably not happen that often, though. 1546 1547 // clear the modified flag 1548 vm_clear_map_flags(page, PAGE_MODIFIED); 1549 1550 cache->Unlock(); 1551 status_t status = write_page(page, 0, NULL); 1552 cache->Lock(); 1553 1554 // Before disabling interrupts handle part of the special case that 1555 // writing the page failed due the cache having been shrunk. We need to 1556 // remove the page from the cache and free it. 1557 if (status != B_OK && !page->busy_writing) { 1558 vm_remove_all_page_mappings(page, NULL); 1559 cache->RemovePage(page); 1560 } 1561 1562 InterruptsSpinLocker locker(&sPageLock); 1563 1564 if (status == B_OK) { 1565 // put it into the active/inactive queue 1566 move_page_to_active_or_inactive_queue(page, dequeuedPage); 1567 page->busy_writing = false; 1568 } else { 1569 // Writing the page failed -- move to the modified queue. If we 1570 // dequeued it from there, just enqueue it again, otherwise set the 1571 // page set explicitly, which will take care of moving between the 1572 // queues. 1573 if (dequeuedPage) { 1574 page->state = PAGE_STATE_MODIFIED; 1575 enqueue_page(&sModifiedPageQueue, page); 1576 } else { 1577 page->state = oldPageState; 1578 set_page_state_nolock(page, PAGE_STATE_MODIFIED); 1579 } 1580 1581 if (!page->busy_writing) { 1582 // The busy_writing flag was cleared. That means the cache has 1583 // been shrunk while we were trying to write the page and we 1584 // have to free it now. 1585 1586 // Adjust temporary modified pages count, if necessary. 1587 if (dequeuedPage && cache->temporary) 1588 sModifiedTemporaryPages--; 1589 1590 // free the page 1591 set_page_state_nolock(page, PAGE_STATE_FREE); 1592 } else 1593 page->busy_writing = false; 1594 } 1595 1596 busyCondition.Unpublish(); 1597 } 1598 1599 return B_OK; 1600 } 1601 1602 1603 /*! You need to hold the vm_cache lock when calling this function. 1604 Note that the cache lock is released in this function. 1605 */ 1606 status_t 1607 vm_page_write_modified_pages(vm_cache *cache) 1608 { 1609 return vm_page_write_modified_page_range(cache, 0, 1610 (cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 1611 } 1612 1613 1614 /*! Schedules the page writer to write back the specified \a page. 1615 Note, however, that it might not do this immediately, and it can well 1616 take several seconds until the page is actually written out. 1617 */ 1618 void 1619 vm_page_schedule_write_page(vm_page *page) 1620 { 1621 ASSERT(page->state == PAGE_STATE_MODIFIED); 1622 1623 vm_page_requeue(page, false); 1624 1625 release_sem_etc(sWriterWaitSem, 1, B_DO_NOT_RESCHEDULE); 1626 } 1627 1628 1629 /*! Cache must be locked. 1630 */ 1631 void 1632 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage, 1633 uint32 endPage) 1634 { 1635 uint32 modified = 0; 1636 for (VMCachePagesTree::Iterator it 1637 = cache->pages.GetIterator(firstPage, true, true); 1638 vm_page *page = it.Next();) { 1639 if (page->cache_offset >= endPage) 1640 break; 1641 1642 if (page->state == PAGE_STATE_MODIFIED) { 1643 vm_page_requeue(page, false); 1644 modified++; 1645 } 1646 } 1647 1648 if (modified > 0) 1649 release_sem_etc(sWriterWaitSem, 1, B_DO_NOT_RESCHEDULE); 1650 } 1651 1652 1653 void 1654 vm_page_init_num_pages(kernel_args *args) 1655 { 1656 uint32 i; 1657 1658 // calculate the size of memory by looking at the physical_memory_range array 1659 addr_t physicalPagesEnd = 0; 1660 sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE; 1661 1662 for (i = 0; i < args->num_physical_memory_ranges; i++) { 1663 physicalPagesEnd = (args->physical_memory_range[i].start 1664 + args->physical_memory_range[i].size) / B_PAGE_SIZE; 1665 } 1666 1667 TRACE(("first phys page = 0x%lx, end 0x%lx\n", sPhysicalPageOffset, 1668 physicalPagesEnd)); 1669 1670 sNumPages = physicalPagesEnd - sPhysicalPageOffset; 1671 1672 #ifdef LIMIT_AVAILABLE_MEMORY 1673 if (sNumPages > LIMIT_AVAILABLE_MEMORY * 256) 1674 sNumPages = LIMIT_AVAILABLE_MEMORY * 256; 1675 #endif 1676 } 1677 1678 1679 status_t 1680 vm_page_init(kernel_args *args) 1681 { 1682 TRACE(("vm_page_init: entry\n")); 1683 1684 // map in the new free page table 1685 sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page), 1686 ~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 1687 1688 TRACE(("vm_init: putting free_page_table @ %p, # ents %ld (size 0x%x)\n", 1689 sPages, sNumPages, (unsigned int)(sNumPages * sizeof(vm_page)))); 1690 1691 // initialize the free page table 1692 for (uint32 i = 0; i < sNumPages; i++) { 1693 sPages[i].physical_page_number = sPhysicalPageOffset + i; 1694 sPages[i].type = PAGE_TYPE_PHYSICAL; 1695 sPages[i].state = PAGE_STATE_FREE; 1696 new(&sPages[i].mappings) vm_page_mappings(); 1697 sPages[i].wired_count = 0; 1698 sPages[i].usage_count = 0; 1699 sPages[i].busy_writing = false; 1700 sPages[i].merge_swap = false; 1701 sPages[i].cache = NULL; 1702 #if DEBUG_PAGE_QUEUE 1703 sPages[i].queue = NULL; 1704 #endif 1705 #if DEBUG_PAGE_CACHE_TRANSITIONS 1706 sPages[i].debug_flags = 0; 1707 sPages[i].collided_page = NULL; 1708 #endif // DEBUG_PAGE_CACHE_TRANSITIONS 1709 enqueue_page(&sFreePageQueue, &sPages[i]); 1710 } 1711 1712 TRACE(("initialized table\n")); 1713 1714 // mark some of the page ranges inuse 1715 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 1716 vm_mark_page_range_inuse(args->physical_allocated_range[i].start / B_PAGE_SIZE, 1717 args->physical_allocated_range[i].size / B_PAGE_SIZE); 1718 } 1719 1720 TRACE(("vm_page_init: exit\n")); 1721 1722 return B_OK; 1723 } 1724 1725 1726 status_t 1727 vm_page_init_post_area(kernel_args *args) 1728 { 1729 void *dummy; 1730 1731 dummy = sPages; 1732 create_area("page structures", &dummy, B_EXACT_ADDRESS, 1733 PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED, 1734 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 1735 1736 add_debugger_command("page_stats", &dump_page_stats, "Dump statistics about page usage"); 1737 add_debugger_command("page", &dump_page, "Dump page info"); 1738 add_debugger_command("page_queue", &dump_page_queue, "Dump page queue"); 1739 add_debugger_command("find_page", &find_page, 1740 "Find out which queue a page is actually in"); 1741 1742 return B_OK; 1743 } 1744 1745 1746 status_t 1747 vm_page_init_post_thread(kernel_args *args) 1748 { 1749 new (&sFreePageCondition) ConditionVariable; 1750 sFreePageCondition.Publish(&sFreePageQueue, "free page"); 1751 1752 // create a kernel thread to clear out pages 1753 1754 thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber", 1755 B_LOWEST_ACTIVE_PRIORITY, NULL); 1756 send_signal_etc(thread, SIGCONT, B_DO_NOT_RESCHEDULE); 1757 1758 // start page writer 1759 1760 sWriterWaitSem = create_sem(0, "page writer"); 1761 1762 thread = spawn_kernel_thread(&page_writer, "page writer", 1763 B_NORMAL_PRIORITY + 1, NULL); 1764 send_signal_etc(thread, SIGCONT, B_DO_NOT_RESCHEDULE); 1765 1766 return B_OK; 1767 } 1768 1769 1770 status_t 1771 vm_mark_page_inuse(addr_t page) 1772 { 1773 return vm_mark_page_range_inuse(page, 1); 1774 } 1775 1776 1777 status_t 1778 vm_mark_page_range_inuse(addr_t startPage, addr_t length) 1779 { 1780 TRACE(("vm_mark_page_range_inuse: start 0x%lx, len 0x%lx\n", 1781 startPage, length)); 1782 1783 if (sPhysicalPageOffset > startPage) { 1784 TRACE(("vm_mark_page_range_inuse: start page %ld is before free list\n", 1785 startPage)); 1786 return B_BAD_VALUE; 1787 } 1788 startPage -= sPhysicalPageOffset; 1789 if (startPage + length > sNumPages) { 1790 TRACE(("vm_mark_page_range_inuse: range would extend past free list\n")); 1791 return B_BAD_VALUE; 1792 } 1793 1794 cpu_status state = disable_interrupts(); 1795 acquire_spinlock(&sPageLock); 1796 1797 for (addr_t i = 0; i < length; i++) { 1798 vm_page *page = &sPages[startPage + i]; 1799 switch (page->state) { 1800 case PAGE_STATE_FREE: 1801 case PAGE_STATE_CLEAR: 1802 set_page_state_nolock(page, PAGE_STATE_UNUSED); 1803 break; 1804 case PAGE_STATE_WIRED: 1805 break; 1806 case PAGE_STATE_ACTIVE: 1807 case PAGE_STATE_INACTIVE: 1808 case PAGE_STATE_BUSY: 1809 case PAGE_STATE_MODIFIED: 1810 case PAGE_STATE_UNUSED: 1811 default: 1812 // uh 1813 dprintf("vm_mark_page_range_inuse: page 0x%lx in non-free state %d!\n", 1814 startPage + i, page->state); 1815 break; 1816 } 1817 } 1818 1819 release_spinlock(&sPageLock); 1820 restore_interrupts(state); 1821 1822 return B_OK; 1823 } 1824 1825 1826 /*! Unreserve pages previously reserved with vm_page_reserve_pages(). 1827 Note, you specify the same \a count here that you specified when 1828 reserving the pages - you don't need to keep track how many pages 1829 you actually needed of that upfront allocation. 1830 */ 1831 void 1832 vm_page_unreserve_pages(uint32 count) 1833 { 1834 if (count == 0) 1835 return; 1836 1837 InterruptsSpinLocker locker(sPageLock); 1838 ASSERT(sReservedPages >= count); 1839 1840 T(UnreservePages(count)); 1841 1842 sReservedPages -= count; 1843 1844 if (sPageDeficit > 0) 1845 sFreePageCondition.NotifyAll(); 1846 } 1847 1848 1849 /*! With this call, you can reserve a number of free pages in the system. 1850 They will only be handed out to someone who has actually reserved them. 1851 This call returns as soon as the number of requested pages has been 1852 reached. 1853 */ 1854 void 1855 vm_page_reserve_pages(uint32 count) 1856 { 1857 if (count == 0) 1858 return; 1859 1860 InterruptsSpinLocker locker(sPageLock); 1861 1862 T(ReservePages(count)); 1863 1864 sReservedPages += count; 1865 size_t freePages = free_page_queue_count(); 1866 if (sReservedPages <= freePages) 1867 return; 1868 1869 locker.Unlock(); 1870 1871 steal_pages(NULL, count + 1, true); 1872 // we get one more, just in case we can do something someone 1873 // else can't 1874 } 1875 1876 1877 bool 1878 vm_page_try_reserve_pages(uint32 count) 1879 { 1880 if (count == 0) 1881 return true; 1882 1883 InterruptsSpinLocker locker(sPageLock); 1884 1885 T(ReservePages(count)); 1886 1887 size_t freePages = free_page_queue_count(); 1888 if (sReservedPages + count > freePages) 1889 return false; 1890 1891 sReservedPages += count; 1892 return true; 1893 } 1894 1895 1896 vm_page * 1897 vm_page_allocate_page(int pageState, bool reserved) 1898 { 1899 page_queue *queue; 1900 page_queue *otherQueue; 1901 1902 switch (pageState) { 1903 case PAGE_STATE_FREE: 1904 queue = &sFreePageQueue; 1905 otherQueue = &sClearPageQueue; 1906 break; 1907 case PAGE_STATE_CLEAR: 1908 queue = &sClearPageQueue; 1909 otherQueue = &sFreePageQueue; 1910 break; 1911 default: 1912 return NULL; // invalid 1913 } 1914 1915 InterruptsSpinLocker locker(sPageLock); 1916 1917 T(AllocatePage(reserved)); 1918 1919 vm_page *page = NULL; 1920 while (true) { 1921 if (reserved || sReservedPages < free_page_queue_count()) { 1922 page = dequeue_page(queue); 1923 if (page == NULL) { 1924 #if DEBUG_PAGE_QUEUE 1925 if (queue->count != 0) 1926 panic("queue %p corrupted, count = %d\n", queue, queue->count); 1927 #endif 1928 1929 // if the primary queue was empty, grap the page from the 1930 // secondary queue 1931 page = dequeue_page(otherQueue); 1932 } 1933 } 1934 1935 if (page != NULL) 1936 break; 1937 1938 if (reserved) 1939 panic("Had reserved page, but there is none!"); 1940 1941 // steal one from the inactive list 1942 locker.Unlock(); 1943 size_t stolen = steal_pages(&page, 1, false); 1944 locker.Lock(); 1945 1946 if (stolen > 0) 1947 break; 1948 } 1949 1950 if (page->cache != NULL) 1951 panic("supposed to be free page %p has cache\n", page); 1952 1953 int oldPageState = page->state; 1954 page->state = PAGE_STATE_BUSY; 1955 page->usage_count = 2; 1956 1957 enqueue_page(&sActivePageQueue, page); 1958 1959 locker.Unlock(); 1960 1961 // if needed take the page from the free queue and zero it out 1962 if (pageState == PAGE_STATE_CLEAR && oldPageState != PAGE_STATE_CLEAR) 1963 clear_page(page); 1964 1965 return page; 1966 } 1967 1968 1969 /*! Allocates a number of pages and puts their pointers into the provided 1970 array. All pages are marked busy. 1971 Returns B_OK on success, and B_NO_MEMORY when there aren't any free 1972 pages left to allocate. 1973 */ 1974 status_t 1975 vm_page_allocate_pages(int pageState, vm_page **pages, uint32 numPages) 1976 { 1977 uint32 i; 1978 1979 for (i = 0; i < numPages; i++) { 1980 pages[i] = vm_page_allocate_page(pageState, false); 1981 if (pages[i] == NULL) { 1982 // allocation failed, we need to free what we already have 1983 while (i-- > 0) 1984 vm_page_set_state(pages[i], pageState); 1985 1986 return B_NO_MEMORY; 1987 } 1988 } 1989 1990 return B_OK; 1991 } 1992 1993 1994 vm_page * 1995 vm_page_allocate_page_run(int pageState, addr_t base, addr_t length) 1996 { 1997 vm_page *firstPage = NULL; 1998 uint32 start = base >> PAGE_SHIFT; 1999 2000 InterruptsSpinLocker locker(sPageLock); 2001 2002 if (sFreePageQueue.count + sClearPageQueue.count - sReservedPages 2003 < length) { 2004 // TODO: add more tries, ie. free some inactive, ... 2005 // no free space 2006 return NULL; 2007 } 2008 2009 for (;;) { 2010 bool foundRun = true; 2011 if (start + length > sNumPages) 2012 break; 2013 2014 uint32 i; 2015 for (i = 0; i < length; i++) { 2016 if (sPages[start + i].state != PAGE_STATE_FREE 2017 && sPages[start + i].state != PAGE_STATE_CLEAR) { 2018 foundRun = false; 2019 i++; 2020 break; 2021 } 2022 } 2023 if (foundRun) { 2024 // pull the pages out of the appropriate queues 2025 for (i = 0; i < length; i++) { 2026 sPages[start + i].is_cleared 2027 = sPages[start + i].state == PAGE_STATE_CLEAR; 2028 set_page_state_nolock(&sPages[start + i], PAGE_STATE_BUSY); 2029 sPages[start + i].usage_count = 2; 2030 } 2031 firstPage = &sPages[start]; 2032 break; 2033 } else { 2034 start += i; 2035 } 2036 } 2037 2038 T(AllocatePageRun(length)); 2039 2040 locker.Unlock(); 2041 2042 if (firstPage != NULL && pageState == PAGE_STATE_CLEAR) { 2043 for (uint32 i = 0; i < length; i++) { 2044 if (!sPages[start + i].is_cleared) 2045 clear_page(&sPages[start + i]); 2046 } 2047 } 2048 2049 return firstPage; 2050 } 2051 2052 2053 vm_page * 2054 vm_page_at_index(int32 index) 2055 { 2056 return &sPages[index]; 2057 } 2058 2059 2060 vm_page * 2061 vm_lookup_page(addr_t pageNumber) 2062 { 2063 if (pageNumber < sPhysicalPageOffset) 2064 return NULL; 2065 2066 pageNumber -= sPhysicalPageOffset; 2067 if (pageNumber >= sNumPages) 2068 return NULL; 2069 2070 return &sPages[pageNumber]; 2071 } 2072 2073 2074 /*! Free the page that belonged to a certain cache. 2075 You can use vm_page_set_state() manually if you prefer, but only 2076 if the page does not equal PAGE_STATE_MODIFIED. 2077 */ 2078 void 2079 vm_page_free(vm_cache *cache, vm_page *page) 2080 { 2081 InterruptsSpinLocker _(sPageLock); 2082 2083 if (page->cache == NULL && page->state == PAGE_STATE_MODIFIED 2084 && cache->temporary) { 2085 sModifiedTemporaryPages--; 2086 } 2087 2088 set_page_state_nolock(page, PAGE_STATE_FREE); 2089 } 2090 2091 2092 status_t 2093 vm_page_set_state(vm_page *page, int pageState) 2094 { 2095 InterruptsSpinLocker _(sPageLock); 2096 2097 return set_page_state_nolock(page, pageState); 2098 } 2099 2100 2101 /*! Moves a page to either the tail of the head of its current queue, 2102 depending on \a tail. 2103 */ 2104 void 2105 vm_page_requeue(struct vm_page *page, bool tail) 2106 { 2107 InterruptsSpinLocker _(sPageLock); 2108 page_queue *queue = NULL; 2109 2110 switch (page->state) { 2111 case PAGE_STATE_BUSY: 2112 case PAGE_STATE_ACTIVE: 2113 case PAGE_STATE_WIRED: 2114 case PAGE_STATE_UNUSED: 2115 queue = &sActivePageQueue; 2116 break; 2117 case PAGE_STATE_INACTIVE: 2118 queue = &sInactivePageQueue; 2119 break; 2120 case PAGE_STATE_MODIFIED: 2121 queue = &sModifiedPageQueue; 2122 break; 2123 case PAGE_STATE_FREE: 2124 queue = &sFreePageQueue; 2125 break; 2126 case PAGE_STATE_CLEAR: 2127 queue = &sClearPageQueue; 2128 break; 2129 default: 2130 panic("vm_page_touch: vm_page %p in invalid state %d\n", 2131 page, page->state); 2132 break; 2133 } 2134 2135 remove_page_from_queue(queue, page); 2136 2137 if (tail) 2138 enqueue_page(queue, page); 2139 else 2140 enqueue_page_to_head(queue, page); 2141 } 2142 2143 2144 size_t 2145 vm_page_num_pages(void) 2146 { 2147 return sNumPages; 2148 } 2149 2150 2151 /*! There is a subtle distinction between the page counts returned by 2152 this function and vm_page_num_free_pages(): 2153 The latter returns the number of pages that are completely uncommitted, 2154 whereas this one returns the number of pages that are available for 2155 use by being reclaimed as well (IOW it factors in things like cache pages 2156 as available). 2157 */ 2158 size_t 2159 vm_page_num_available_pages(void) 2160 { 2161 return vm_available_memory() / B_PAGE_SIZE; 2162 } 2163 2164 2165 size_t 2166 vm_page_num_free_pages(void) 2167 { 2168 size_t reservedPages = sReservedPages; 2169 size_t count = free_page_queue_count() + sInactivePageQueue.count; 2170 if (reservedPages > count) 2171 return 0; 2172 2173 return count - reservedPages; 2174 } 2175 2176 2177 void 2178 vm_page_get_stats(system_info *info) 2179 { 2180 // Get free pages count -- not really exact, since we don't know how many 2181 // of the reserved pages have already been allocated, but good citizens 2182 // unreserve chunk-wise as they are allocating the pages, if they have 2183 // reserved a larger quantity. 2184 page_num_t reserved = sReservedPages; 2185 page_num_t free = free_page_queue_count(); 2186 free = free > reserved ? free - reserved : 0; 2187 2188 // The pages used for the block cache buffers. Those should not be counted 2189 // as used but as cached pages. 2190 // TODO: We should subtract the blocks that are in use ATM, since those 2191 // can't really be freed in a low memory situation. 2192 page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE; 2193 2194 info->max_pages = sNumPages; 2195 info->used_pages = gMappedPagesCount - blockCachePages; 2196 info->cached_pages = sNumPages >= free + info->used_pages 2197 ? sNumPages - free - info->used_pages : 0; 2198 info->page_faults = vm_num_page_faults(); 2199 2200 // TODO: We don't consider pages used for page directories/tables yet. 2201 } 2202