1 /* 2 * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 * 5 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 6 * Distributed under the terms of the NewOS License. 7 */ 8 9 #include <signal.h> 10 #include <string.h> 11 #include <stdlib.h> 12 13 #include <KernelExport.h> 14 #include <OS.h> 15 16 #include <AutoDeleter.h> 17 18 #include <arch/cpu.h> 19 #include <arch/vm_translation_map.h> 20 #include <block_cache.h> 21 #include <boot/kernel_args.h> 22 #include <condition_variable.h> 23 #include <kernel.h> 24 #include <low_resource_manager.h> 25 #include <thread.h> 26 #include <tracing.h> 27 #include <util/AutoLock.h> 28 #include <vfs.h> 29 #include <vm.h> 30 #include <vm_address_space.h> 31 #include <vm_priv.h> 32 #include <vm_page.h> 33 #include <vm_cache.h> 34 35 #include "PageCacheLocker.h" 36 #include "io_requests.h" 37 38 39 //#define TRACE_VM_PAGE 40 #ifdef TRACE_VM_PAGE 41 # define TRACE(x) dprintf x 42 #else 43 # define TRACE(x) ; 44 #endif 45 46 #define SCRUB_SIZE 16 47 // this many pages will be cleared at once in the page scrubber thread 48 49 // for debugging: limit the amount of available RAM (in MB) 50 //#define LIMIT_AVAILABLE_MEMORY 256 51 52 53 typedef struct page_queue { 54 vm_page *head; 55 vm_page *tail; 56 uint32 count; 57 } page_queue; 58 59 int32 gMappedPagesCount; 60 61 static page_queue sFreePageQueue; 62 static page_queue sClearPageQueue; 63 static page_queue sModifiedPageQueue; 64 static page_queue sInactivePageQueue; 65 static page_queue sActivePageQueue; 66 67 static vm_page *sPages; 68 static addr_t sPhysicalPageOffset; 69 static size_t sNumPages; 70 static size_t sReservedPages; 71 static vint32 sPageDeficit; 72 static size_t sModifiedTemporaryPages; 73 74 static ConditionVariable sFreePageCondition; 75 static spinlock sPageLock; 76 77 static sem_id sWriterWaitSem; 78 79 80 #if PAGE_ALLOCATION_TRACING 81 82 namespace PageAllocationTracing { 83 84 class ReservePages : public AbstractTraceEntry { 85 public: 86 ReservePages(uint32 count) 87 : 88 fCount(count) 89 { 90 Initialized(); 91 } 92 93 virtual void AddDump(TraceOutput& out) 94 { 95 out.Print("page reserve: %lu", fCount); 96 } 97 98 private: 99 uint32 fCount; 100 }; 101 102 103 class UnreservePages : public AbstractTraceEntry { 104 public: 105 UnreservePages(uint32 count) 106 : 107 fCount(count) 108 { 109 Initialized(); 110 } 111 112 virtual void AddDump(TraceOutput& out) 113 { 114 out.Print("page unreserve: %lu", fCount); 115 } 116 117 private: 118 uint32 fCount; 119 }; 120 121 122 class AllocatePage : public AbstractTraceEntry { 123 public: 124 AllocatePage(bool reserved) 125 : 126 fReserved(reserved) 127 { 128 Initialized(); 129 } 130 131 virtual void AddDump(TraceOutput& out) 132 { 133 out.Print("page alloc"); 134 if (fReserved) 135 out.Print(" reserved"); 136 } 137 138 private: 139 bool fReserved; 140 }; 141 142 143 class AllocatePageRun : public AbstractTraceEntry { 144 public: 145 AllocatePageRun(uint32 length) 146 : 147 fLength(length) 148 { 149 Initialized(); 150 } 151 152 virtual void AddDump(TraceOutput& out) 153 { 154 out.Print("page alloc run: length: %ld", fLength); 155 } 156 157 private: 158 uint32 fLength; 159 }; 160 161 162 class FreePage : public AbstractTraceEntry { 163 public: 164 FreePage() 165 { 166 Initialized(); 167 } 168 169 virtual void AddDump(TraceOutput& out) 170 { 171 out.Print("page free"); 172 } 173 }; 174 175 176 class ScrubbingPages : public AbstractTraceEntry { 177 public: 178 ScrubbingPages(uint32 count) 179 : 180 fCount(count) 181 { 182 Initialized(); 183 } 184 185 virtual void AddDump(TraceOutput& out) 186 { 187 out.Print("page scrubbing: %lu", fCount); 188 } 189 190 private: 191 uint32 fCount; 192 }; 193 194 195 class ScrubbedPages : public AbstractTraceEntry { 196 public: 197 ScrubbedPages(uint32 count) 198 : 199 fCount(count) 200 { 201 Initialized(); 202 } 203 204 virtual void AddDump(TraceOutput& out) 205 { 206 out.Print("page scrubbed: %lu", fCount); 207 } 208 209 private: 210 uint32 fCount; 211 }; 212 213 214 class StolenPage : public AbstractTraceEntry { 215 public: 216 StolenPage() 217 { 218 Initialized(); 219 } 220 221 virtual void AddDump(TraceOutput& out) 222 { 223 out.Print("page stolen"); 224 } 225 }; 226 227 } // namespace PageAllocationTracing 228 229 # define T(x) new(std::nothrow) PageAllocationTracing::x 230 231 #else 232 # define T(x) 233 #endif // PAGE_ALLOCATION_TRACING 234 235 236 /*! Dequeues a page from the head of the given queue */ 237 static vm_page * 238 dequeue_page(page_queue *queue) 239 { 240 vm_page *page; 241 242 page = queue->head; 243 if (page != NULL) { 244 if (queue->tail == page) 245 queue->tail = NULL; 246 if (page->queue_next != NULL) 247 page->queue_next->queue_prev = NULL; 248 249 queue->head = page->queue_next; 250 if (page->type != PAGE_TYPE_DUMMY) 251 queue->count--; 252 253 #ifdef DEBUG_PAGE_QUEUE 254 if (page->queue != queue) { 255 panic("dequeue_page(queue: %p): page %p thinks it is in queue " 256 "%p", queue, page, page->queue); 257 } 258 259 page->queue = NULL; 260 #endif // DEBUG_PAGE_QUEUE 261 } 262 263 return page; 264 } 265 266 267 /*! Enqueues a page to the tail of the given queue */ 268 static void 269 enqueue_page(page_queue *queue, vm_page *page) 270 { 271 #ifdef DEBUG_PAGE_QUEUE 272 if (page->queue != NULL) { 273 panic("enqueue_page(queue: %p, page: %p): page thinks it is " 274 "already in queue %p", queue, page, page->queue); 275 } 276 #endif // DEBUG_PAGE_QUEUE 277 278 if (queue->tail != NULL) 279 queue->tail->queue_next = page; 280 page->queue_prev = queue->tail; 281 queue->tail = page; 282 page->queue_next = NULL; 283 if (queue->head == NULL) 284 queue->head = page; 285 if (page->type != PAGE_TYPE_DUMMY) 286 queue->count++; 287 288 #ifdef DEBUG_PAGE_QUEUE 289 page->queue = queue; 290 #endif 291 } 292 293 294 /*! Enqueues a page to the head of the given queue */ 295 static void 296 enqueue_page_to_head(page_queue *queue, vm_page *page) 297 { 298 #ifdef DEBUG_PAGE_QUEUE 299 if (page->queue != NULL) { 300 panic("enqueue_page_to_head(queue: %p, page: %p): page thinks it is " 301 "already in queue %p", queue, page, page->queue); 302 } 303 #endif // DEBUG_PAGE_QUEUE 304 305 if (queue->head != NULL) 306 queue->head->queue_prev = page; 307 page->queue_next = queue->head; 308 queue->head = page; 309 page->queue_prev = NULL; 310 if (queue->tail == NULL) 311 queue->tail = page; 312 if (page->type != PAGE_TYPE_DUMMY) 313 queue->count++; 314 315 #ifdef DEBUG_PAGE_QUEUE 316 page->queue = queue; 317 #endif 318 } 319 320 321 static void 322 remove_page_from_queue(page_queue *queue, vm_page *page) 323 { 324 #ifdef DEBUG_PAGE_QUEUE 325 if (page->queue != queue) { 326 panic("remove_page_from_queue(queue: %p, page: %p): page thinks it " 327 "is in queue %p", queue, page, page->queue); 328 } 329 #endif // DEBUG_PAGE_QUEUE 330 331 if (page->queue_next != NULL) 332 page->queue_next->queue_prev = page->queue_prev; 333 else 334 queue->tail = page->queue_prev; 335 336 if (page->queue_prev != NULL) 337 page->queue_prev->queue_next = page->queue_next; 338 else 339 queue->head = page->queue_next; 340 341 if (page->type != PAGE_TYPE_DUMMY) 342 queue->count--; 343 344 #ifdef DEBUG_PAGE_QUEUE 345 page->queue = NULL; 346 #endif 347 } 348 349 350 /*! Moves a page to the tail of the given queue, but only does so if 351 the page is currently in another queue. 352 */ 353 static void 354 move_page_to_queue(page_queue *fromQueue, page_queue *toQueue, vm_page *page) 355 { 356 if (fromQueue != toQueue) { 357 remove_page_from_queue(fromQueue, page); 358 enqueue_page(toQueue, page); 359 } 360 } 361 362 363 /*! Inserts \a page after the \a before page in the \a queue. */ 364 static void 365 insert_page_after(page_queue *queue, vm_page *before, vm_page *page) 366 { 367 #ifdef DEBUG_PAGE_QUEUE 368 if (page->queue != NULL) { 369 panic("enqueue_page(queue: %p, page: %p): page thinks it is " 370 "already in queue %p", queue, page, page->queue); 371 } 372 #endif // DEBUG_PAGE_QUEUE 373 374 if (before == NULL) { 375 enqueue_page(queue, page); 376 return; 377 } 378 379 page->queue_next = before->queue_next; 380 if (page->queue_next != NULL) 381 page->queue_next->queue_prev = page; 382 page->queue_prev = before; 383 before->queue_next = page; 384 385 if (queue->tail == before) 386 queue->tail = page; 387 388 if (page->type != PAGE_TYPE_DUMMY) 389 queue->count++; 390 391 #ifdef DEBUG_PAGE_QUEUE 392 page->queue = queue; 393 #endif 394 } 395 396 397 static int 398 find_page(int argc, char **argv) 399 { 400 struct vm_page *page; 401 addr_t address; 402 int32 index = 1; 403 int i; 404 405 struct { 406 const char* name; 407 page_queue* queue; 408 } pageQueueInfos[] = { 409 { "free", &sFreePageQueue }, 410 { "clear", &sClearPageQueue }, 411 { "modified", &sModifiedPageQueue }, 412 { "active", &sActivePageQueue }, 413 { NULL, NULL } 414 }; 415 416 if (argc < 2 417 || strlen(argv[index]) <= 2 418 || argv[index][0] != '0' 419 || argv[index][1] != 'x') { 420 kprintf("usage: find_page <address>\n"); 421 return 0; 422 } 423 424 address = strtoul(argv[index], NULL, 0); 425 page = (vm_page*)address; 426 427 for (i = 0; pageQueueInfos[i].name; i++) { 428 vm_page* p = pageQueueInfos[i].queue->head; 429 while (p) { 430 if (p == page) { 431 kprintf("found page %p in queue %p (%s)\n", page, 432 pageQueueInfos[i].queue, pageQueueInfos[i].name); 433 return 0; 434 } 435 p = p->queue_next; 436 } 437 } 438 439 kprintf("page %p isn't in any queue\n", page); 440 441 return 0; 442 } 443 444 445 const char * 446 page_state_to_string(int state) 447 { 448 switch(state) { 449 case PAGE_STATE_ACTIVE: 450 return "active"; 451 case PAGE_STATE_INACTIVE: 452 return "inactive"; 453 case PAGE_STATE_BUSY: 454 return "busy"; 455 case PAGE_STATE_MODIFIED: 456 return "modified"; 457 case PAGE_STATE_FREE: 458 return "free"; 459 case PAGE_STATE_CLEAR: 460 return "clear"; 461 case PAGE_STATE_WIRED: 462 return "wired"; 463 case PAGE_STATE_UNUSED: 464 return "unused"; 465 default: 466 return "unknown"; 467 } 468 } 469 470 471 static int 472 dump_page(int argc, char **argv) 473 { 474 struct vm_page *page; 475 addr_t address; 476 bool physical = false; 477 int32 index = 1; 478 479 if (argc > 2) { 480 if (!strcmp(argv[1], "-p")) { 481 physical = true; 482 index++; 483 } else if (!strcmp(argv[1], "-v")) 484 index++; 485 } 486 487 if (argc < 2 488 || strlen(argv[index]) <= 2 489 || argv[index][0] != '0' 490 || argv[index][1] != 'x') { 491 kprintf("usage: page [-p|-v] <address>\n" 492 " -v looks up a virtual address for the page, -p a physical address.\n" 493 " Default is to look for the page structure address directly.\n"); 494 return 0; 495 } 496 497 address = strtoul(argv[index], NULL, 0); 498 499 if (index == 2) { 500 if (!physical) { 501 vm_address_space *addressSpace = vm_kernel_address_space(); 502 uint32 flags; 503 504 if (thread_get_current_thread()->team->address_space != NULL) 505 addressSpace = thread_get_current_thread()->team->address_space; 506 507 addressSpace->translation_map.ops->query_interrupt( 508 &addressSpace->translation_map, address, &address, &flags); 509 } 510 page = vm_lookup_page(address / B_PAGE_SIZE); 511 } else 512 page = (struct vm_page *)address; 513 514 kprintf("PAGE: %p\n", page); 515 kprintf("queue_next,prev: %p, %p\n", page->queue_next, page->queue_prev); 516 kprintf("physical_number: %lx\n", page->physical_page_number); 517 kprintf("cache: %p\n", page->cache); 518 kprintf("cache_offset: %ld\n", page->cache_offset); 519 kprintf("cache_next: %p\n", page->cache_next); 520 kprintf("type: %d\n", page->type); 521 kprintf("state: %s\n", page_state_to_string(page->state)); 522 kprintf("wired_count: %d\n", page->wired_count); 523 kprintf("usage_count: %d\n", page->usage_count); 524 kprintf("busy_writing: %d\n", page->busy_writing); 525 #ifdef DEBUG_PAGE_QUEUE 526 kprintf("queue: %p\n", page->queue); 527 #endif 528 #ifdef DEBUG_PAGE_CACHE_TRANSITIONS 529 kprintf("debug_flags: 0x%lx\n", page->debug_flags); 530 kprintf("collided page: %p\n", page->collided_page); 531 #endif // DEBUG_PAGE_CACHE_TRANSITIONS 532 kprintf("area mappings:\n"); 533 534 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 535 vm_page_mapping *mapping; 536 while ((mapping = iterator.Next()) != NULL) { 537 kprintf(" %p (%#lx)\n", mapping->area, mapping->area->id); 538 mapping = mapping->page_link.next; 539 } 540 541 return 0; 542 } 543 544 545 static int 546 dump_page_queue(int argc, char **argv) 547 { 548 struct page_queue *queue; 549 550 if (argc < 2) { 551 kprintf("usage: page_queue <address/name> [list]\n"); 552 return 0; 553 } 554 555 if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x') 556 queue = (struct page_queue *)strtoul(argv[1], NULL, 16); 557 if (!strcmp(argv[1], "free")) 558 queue = &sFreePageQueue; 559 else if (!strcmp(argv[1], "clear")) 560 queue = &sClearPageQueue; 561 else if (!strcmp(argv[1], "modified")) 562 queue = &sModifiedPageQueue; 563 else if (!strcmp(argv[1], "active")) 564 queue = &sActivePageQueue; 565 else if (!strcmp(argv[1], "inactive")) 566 queue = &sInactivePageQueue; 567 else { 568 kprintf("page_queue: unknown queue \"%s\".\n", argv[1]); 569 return 0; 570 } 571 572 kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %ld\n", 573 queue, queue->head, queue->tail, queue->count); 574 575 if (argc == 3) { 576 struct vm_page *page = queue->head; 577 const char *type = "none"; 578 int i; 579 580 if (page->cache != NULL) { 581 switch (page->cache->type) { 582 case CACHE_TYPE_RAM: 583 type = "RAM"; 584 break; 585 case CACHE_TYPE_DEVICE: 586 type = "device"; 587 break; 588 case CACHE_TYPE_VNODE: 589 type = "vnode"; 590 break; 591 case CACHE_TYPE_NULL: 592 type = "null"; 593 break; 594 default: 595 type = "???"; 596 break; 597 } 598 } 599 600 kprintf("page cache type state wired usage\n"); 601 for (i = 0; page; i++, page = page->queue_next) { 602 kprintf("%p %p %-7s %8s %5d %5d\n", page, page->cache, 603 type, page_state_to_string(page->state), 604 page->wired_count, page->usage_count); 605 } 606 } 607 return 0; 608 } 609 610 611 static int 612 dump_page_stats(int argc, char **argv) 613 { 614 page_num_t swappableModified = 0; 615 page_num_t swappableModifiedInactive = 0; 616 uint32 counter[8]; 617 addr_t i; 618 619 memset(counter, 0, sizeof(counter)); 620 621 for (i = 0; i < sNumPages; i++) { 622 if (sPages[i].state > 7) 623 panic("page %li at %p has invalid state!\n", i, &sPages[i]); 624 625 counter[sPages[i].state]++; 626 627 if (sPages[i].state == PAGE_STATE_MODIFIED && sPages[i].cache != NULL 628 && sPages[i].cache->temporary && sPages[i].wired_count == 0) { 629 swappableModified++; 630 if (sPages[i].usage_count < 0) 631 swappableModifiedInactive++; 632 } 633 } 634 635 kprintf("page stats:\n"); 636 kprintf("total: %lu\n", sNumPages); 637 kprintf("active: %lu\ninactive: %lu\nbusy: %lu\nunused: %lu\n", 638 counter[PAGE_STATE_ACTIVE], counter[PAGE_STATE_INACTIVE], 639 counter[PAGE_STATE_BUSY], counter[PAGE_STATE_UNUSED]); 640 kprintf("wired: %lu\nmodified: %lu\nfree: %lu\nclear: %lu\n", 641 counter[PAGE_STATE_WIRED], counter[PAGE_STATE_MODIFIED], 642 counter[PAGE_STATE_FREE], counter[PAGE_STATE_CLEAR]); 643 kprintf("reserved pages: %lu\n", sReservedPages); 644 kprintf("page deficit: %lu\n", sPageDeficit); 645 kprintf("mapped pages: %lu\n", gMappedPagesCount); 646 647 kprintf("\nfree queue: %p, count = %ld\n", &sFreePageQueue, 648 sFreePageQueue.count); 649 kprintf("clear queue: %p, count = %ld\n", &sClearPageQueue, 650 sClearPageQueue.count); 651 kprintf("modified queue: %p, count = %ld (%ld temporary, %lu swappable, " 652 "inactive: %lu)\n", &sModifiedPageQueue, sModifiedPageQueue.count, 653 sModifiedTemporaryPages, swappableModified, swappableModifiedInactive); 654 kprintf("active queue: %p, count = %ld\n", &sActivePageQueue, 655 sActivePageQueue.count); 656 kprintf("inactive queue: %p, count = %ld\n", &sInactivePageQueue, 657 sInactivePageQueue.count); 658 return 0; 659 } 660 661 662 static inline size_t 663 free_page_queue_count(void) 664 { 665 return sFreePageQueue.count + sClearPageQueue.count; 666 } 667 668 669 static status_t 670 set_page_state_nolock(vm_page *page, int pageState) 671 { 672 if (pageState == page->state) 673 return B_OK; 674 675 page_queue *fromQueue = NULL; 676 page_queue *toQueue = NULL; 677 678 switch (page->state) { 679 case PAGE_STATE_BUSY: 680 case PAGE_STATE_ACTIVE: 681 case PAGE_STATE_WIRED: 682 case PAGE_STATE_UNUSED: 683 fromQueue = &sActivePageQueue; 684 break; 685 case PAGE_STATE_INACTIVE: 686 fromQueue = &sInactivePageQueue; 687 break; 688 case PAGE_STATE_MODIFIED: 689 fromQueue = &sModifiedPageQueue; 690 break; 691 case PAGE_STATE_FREE: 692 fromQueue = &sFreePageQueue; 693 break; 694 case PAGE_STATE_CLEAR: 695 fromQueue = &sClearPageQueue; 696 break; 697 default: 698 panic("vm_page_set_state: vm_page %p in invalid state %d\n", 699 page, page->state); 700 break; 701 } 702 703 if (page->state == PAGE_STATE_CLEAR || page->state == PAGE_STATE_FREE) { 704 if (page->cache != NULL) 705 panic("free page %p has cache", page); 706 } 707 708 switch (pageState) { 709 case PAGE_STATE_BUSY: 710 case PAGE_STATE_ACTIVE: 711 case PAGE_STATE_WIRED: 712 case PAGE_STATE_UNUSED: 713 toQueue = &sActivePageQueue; 714 break; 715 case PAGE_STATE_INACTIVE: 716 toQueue = &sInactivePageQueue; 717 break; 718 case PAGE_STATE_MODIFIED: 719 toQueue = &sModifiedPageQueue; 720 break; 721 case PAGE_STATE_FREE: 722 toQueue = &sFreePageQueue; 723 break; 724 case PAGE_STATE_CLEAR: 725 toQueue = &sClearPageQueue; 726 break; 727 default: 728 panic("vm_page_set_state: invalid target state %d\n", pageState); 729 } 730 731 if (pageState == PAGE_STATE_CLEAR || pageState == PAGE_STATE_FREE 732 || pageState == PAGE_STATE_INACTIVE) { 733 if (sPageDeficit > 0) 734 sFreePageCondition.NotifyOne(); 735 736 if (pageState != PAGE_STATE_INACTIVE && page->cache != NULL) 737 panic("to be freed page %p has cache", page); 738 } 739 if (page->cache != NULL && page->cache->temporary) { 740 if (pageState == PAGE_STATE_MODIFIED) 741 sModifiedTemporaryPages++; 742 else if (page->state == PAGE_STATE_MODIFIED) 743 sModifiedTemporaryPages--; 744 } 745 746 #ifdef PAGE_ALLOCATION_TRACING 747 if ((pageState == PAGE_STATE_CLEAR || pageState == PAGE_STATE_FREE) 748 && page->state != PAGE_STATE_CLEAR && page->state != PAGE_STATE_FREE) { 749 T(FreePage()); 750 } 751 #endif // PAGE_ALLOCATION_TRACING 752 753 page->state = pageState; 754 move_page_to_queue(fromQueue, toQueue, page); 755 756 return B_OK; 757 } 758 759 760 /*! Moves a modified page into either the active or inactive page queue 761 depending on its usage count and wiring. 762 */ 763 static void 764 move_page_to_active_or_inactive_queue(vm_page *page, bool dequeued) 765 { 766 // Note, this logic must be in sync with what the page daemon does 767 int32 state; 768 if (!page->mappings.IsEmpty() || page->usage_count >= 0 769 || page->wired_count) 770 state = PAGE_STATE_ACTIVE; 771 else 772 state = PAGE_STATE_INACTIVE; 773 774 if (dequeued) { 775 page->state = state; 776 enqueue_page(state == PAGE_STATE_ACTIVE 777 ? &sActivePageQueue : &sInactivePageQueue, page); 778 if (page->cache->temporary) 779 sModifiedTemporaryPages--; 780 } else 781 set_page_state_nolock(page, state); 782 } 783 784 785 static void 786 clear_page(struct vm_page *page) 787 { 788 addr_t virtualAddress; 789 vm_get_physical_page(page->physical_page_number << PAGE_SHIFT, 790 &virtualAddress, PHYSICAL_PAGE_CAN_WAIT); 791 792 memset((void *)virtualAddress, 0, B_PAGE_SIZE); 793 794 vm_put_physical_page(virtualAddress); 795 } 796 797 798 /*! 799 This is a background thread that wakes up every now and then (every 100ms) 800 and moves some pages from the free queue over to the clear queue. 801 Given enough time, it will clear out all pages from the free queue - we 802 could probably slow it down after having reached a certain threshold. 803 */ 804 static int32 805 page_scrubber(void *unused) 806 { 807 (void)(unused); 808 809 TRACE(("page_scrubber starting...\n")); 810 811 for (;;) { 812 snooze(100000); // 100ms 813 814 if (sFreePageQueue.count > 0) { 815 vm_page *page[SCRUB_SIZE]; 816 int32 i, scrubCount; 817 818 // get some pages from the free queue 819 820 InterruptsSpinLocker locker(sPageLock); 821 822 // Since we temporarily remove pages from the free pages reserve, 823 // we must make sure we don't cause a violation of the page 824 // reservation warranty. The following is usually stricter than 825 // necessary, because we don't have information on how many of the 826 // reserved pages have already been allocated. 827 scrubCount = SCRUB_SIZE; 828 uint32 freeCount = free_page_queue_count(); 829 if (freeCount < sReservedPages) 830 scrubCount = 0; 831 else if ((uint32)scrubCount > freeCount - sReservedPages) 832 scrubCount = freeCount - sReservedPages; 833 834 for (i = 0; i < scrubCount; i++) { 835 page[i] = dequeue_page(&sFreePageQueue); 836 if (page[i] == NULL) 837 break; 838 page[i]->state = PAGE_STATE_BUSY; 839 } 840 841 scrubCount = i; 842 843 if (scrubCount > 0) { 844 T(ScrubbingPages(scrubCount)); 845 } 846 847 locker.Unlock(); 848 849 // clear them 850 851 for (i = 0; i < scrubCount; i++) { 852 clear_page(page[i]); 853 } 854 855 locker.Lock(); 856 857 // and put them into the clear queue 858 859 for (i = 0; i < scrubCount; i++) { 860 page[i]->state = PAGE_STATE_CLEAR; 861 enqueue_page(&sClearPageQueue, page[i]); 862 } 863 864 if (scrubCount > 0) { 865 T(ScrubbedPages(scrubCount)); 866 } 867 } 868 } 869 870 return 0; 871 } 872 873 874 static status_t 875 write_page(vm_page *page, uint32 flags, AsyncIOCallback* callback) 876 { 877 TRACE(("write_page(page = %p): offset = %Ld\n", page, (off_t)page->cache_offset << PAGE_SHIFT)); 878 879 off_t offset = (off_t)page->cache_offset << PAGE_SHIFT; 880 881 iovec vecs[1]; 882 vecs->iov_base = (void*)(addr_t)(page->physical_page_number * B_PAGE_SIZE); 883 vecs->iov_len = B_PAGE_SIZE; 884 885 if (callback != NULL) { 886 // asynchronous I/O 887 return page->cache->WriteAsync(offset, vecs, 1, B_PAGE_SIZE, 888 flags | B_PHYSICAL_IO_REQUEST, callback); 889 } 890 891 // synchronous I/0 892 size_t length = B_PAGE_SIZE; 893 status_t status = page->cache->Write(offset, vecs, 1, 894 flags | B_PHYSICAL_IO_REQUEST, &length); 895 896 if (status == B_OK && length == 0) 897 status = B_ERROR; 898 899 return status; 900 } 901 902 903 static inline bool 904 is_marker_page(struct vm_page *page) 905 { 906 return page->type == PAGE_TYPE_DUMMY; 907 } 908 909 910 static void 911 remove_page_marker(struct vm_page &marker) 912 { 913 if (marker.state == PAGE_STATE_UNUSED) 914 return; 915 916 page_queue *queue; 917 918 switch (marker.state) { 919 case PAGE_STATE_ACTIVE: 920 queue = &sActivePageQueue; 921 break; 922 case PAGE_STATE_INACTIVE: 923 queue = &sInactivePageQueue; 924 break; 925 case PAGE_STATE_MODIFIED: 926 queue = &sModifiedPageQueue; 927 break; 928 929 default: 930 return; 931 } 932 933 InterruptsSpinLocker locker(sPageLock); 934 remove_page_from_queue(queue, &marker); 935 936 marker.state = PAGE_STATE_UNUSED; 937 } 938 939 940 static vm_page * 941 next_modified_page(struct vm_page &marker) 942 { 943 InterruptsSpinLocker locker(sPageLock); 944 vm_page *page; 945 946 if (marker.state == PAGE_STATE_MODIFIED) { 947 page = marker.queue_next; 948 remove_page_from_queue(&sModifiedPageQueue, &marker); 949 marker.state = PAGE_STATE_UNUSED; 950 } else 951 page = sModifiedPageQueue.head; 952 953 for (; page != NULL; page = page->queue_next) { 954 if (!is_marker_page(page) && page->state != PAGE_STATE_BUSY) { 955 // insert marker 956 marker.state = PAGE_STATE_MODIFIED; 957 insert_page_after(&sModifiedPageQueue, page, &marker); 958 return page; 959 } 960 } 961 962 return NULL; 963 } 964 965 966 class PageWriterCallback; 967 968 969 class PageWriterRun { 970 public: 971 status_t Init(uint32 maxPages); 972 973 void PrepareNextRun(); 974 void AddPage(vm_page* page); 975 void Go(); 976 977 void PageWritten(PageWriterCallback* callback, status_t status, 978 bool partialTransfer, size_t bytesTransferred); 979 980 private: 981 uint32 fMaxPages; 982 uint32 fPageCount; 983 vint32 fPendingPages; 984 PageWriterCallback* fCallbacks; 985 ConditionVariable fAllFinishedCondition; 986 }; 987 988 989 class PageWriterCallback : public AsyncIOCallback { 990 public: 991 void SetTo(PageWriterRun* run, vm_page* page) 992 { 993 fRun = run; 994 fPage = page; 995 fCache = page->cache; 996 fStatus = B_OK; 997 fBusyCondition.Publish(page, "page"); 998 } 999 1000 vm_page* Page() const { return fPage; } 1001 VMCache* Cache() const { return fCache; } 1002 status_t Status() const { return fStatus; } 1003 1004 ConditionVariable& BusyCondition() { return fBusyCondition; } 1005 1006 virtual void IOFinished(status_t status, bool partialTransfer, 1007 size_t bytesTransferred) 1008 { 1009 fStatus = status == B_OK && bytesTransferred == 0 ? B_ERROR : status; 1010 fRun->PageWritten(this, status, partialTransfer, bytesTransferred); 1011 } 1012 1013 private: 1014 PageWriterRun* fRun; 1015 vm_page* fPage; 1016 VMCache* fCache; 1017 status_t fStatus; 1018 ConditionVariable fBusyCondition; 1019 }; 1020 1021 1022 status_t 1023 PageWriterRun::Init(uint32 maxPages) 1024 { 1025 fMaxPages = maxPages; 1026 fPageCount = 0; 1027 fPendingPages = 0; 1028 1029 fCallbacks = new(std::nothrow) PageWriterCallback[maxPages]; 1030 if (fCallbacks == NULL) 1031 return B_NO_MEMORY; 1032 1033 return B_OK; 1034 } 1035 1036 1037 void 1038 PageWriterRun::PrepareNextRun() 1039 { 1040 fPageCount = 0; 1041 fPendingPages = 0; 1042 } 1043 1044 1045 void 1046 PageWriterRun::AddPage(vm_page* page) 1047 { 1048 page->state = PAGE_STATE_BUSY; 1049 page->busy_writing = true; 1050 1051 fCallbacks[fPageCount].SetTo(this, page); 1052 fPageCount++; 1053 } 1054 1055 1056 void 1057 PageWriterRun::Go() 1058 { 1059 fPendingPages = fPageCount; 1060 1061 fAllFinishedCondition.Init(this, "page writer wait for I/O"); 1062 ConditionVariableEntry waitEntry; 1063 fAllFinishedCondition.Add(&waitEntry); 1064 1065 // schedule writes 1066 for (uint32 i = 0; i < fPageCount; i++) { 1067 PageWriterCallback& callback = fCallbacks[i]; 1068 write_page(callback.Page(), B_VIP_IO_REQUEST, &callback); 1069 } 1070 1071 // wait until all pages have been written 1072 waitEntry.Wait(); 1073 1074 // mark pages depending on whether they could be written or not 1075 1076 for (uint32 i = 0; i < fPageCount; i++) { 1077 PageWriterCallback& callback = fCallbacks[i]; 1078 vm_page* page = callback.Page(); 1079 vm_cache* cache = callback.Cache(); 1080 cache->Lock(); 1081 1082 if (callback.Status() == B_OK) { 1083 // put it into the active queue 1084 InterruptsSpinLocker locker(sPageLock); 1085 move_page_to_active_or_inactive_queue(page, true); 1086 page->busy_writing = false; 1087 } else { 1088 // We don't have to put the PAGE_MODIFIED bit back, as it's 1089 // still in the modified pages list. 1090 { 1091 InterruptsSpinLocker locker(sPageLock); 1092 page->state = PAGE_STATE_MODIFIED; 1093 enqueue_page(&sModifiedPageQueue, page); 1094 } 1095 if (!page->busy_writing) { 1096 // someone has cleared the busy_writing flag which tells 1097 // us our page has gone invalid 1098 cache->RemovePage(page); 1099 } else 1100 page->busy_writing = false; 1101 } 1102 1103 callback.BusyCondition().Unpublish(); 1104 1105 cache->Unlock(); 1106 } 1107 1108 for (uint32 i = 0; i < fPageCount; i++) { 1109 vm_cache* cache = fCallbacks[i].Cache(); 1110 1111 // We release the cache references after all pages were made 1112 // unbusy again - otherwise releasing a vnode could deadlock. 1113 cache->ReleaseStoreRef(); 1114 cache->ReleaseRef(); 1115 } 1116 } 1117 1118 1119 void 1120 PageWriterRun::PageWritten(PageWriterCallback* callback, status_t status, 1121 bool partialTransfer, size_t bytesTransferred) 1122 { 1123 if (atomic_add(&fPendingPages, -1) == 1) 1124 fAllFinishedCondition.NotifyAll(); 1125 } 1126 1127 1128 /*! The page writer continuously takes some pages from the modified 1129 queue, writes them back, and moves them back to the active queue. 1130 It runs in its own thread, and is only there to keep the number 1131 of modified pages low, so that more pages can be reused with 1132 fewer costs. 1133 */ 1134 status_t 1135 page_writer(void* /*unused*/) 1136 { 1137 const uint32 kNumPages = 256; 1138 uint32 writtenPages = 0; 1139 bigtime_t lastWrittenTime = 0; 1140 bigtime_t pageCollectionTime = 0; 1141 bigtime_t pageWritingTime = 0; 1142 1143 PageWriterRun run; 1144 if (run.Init(kNumPages) != B_OK) { 1145 panic("page writer: Failed to init PageWriterRun!"); 1146 return B_ERROR; 1147 } 1148 1149 vm_page marker; 1150 marker.type = PAGE_TYPE_DUMMY; 1151 marker.cache = NULL; 1152 marker.state = PAGE_STATE_UNUSED; 1153 1154 while (true) { 1155 if (sModifiedPageQueue.count - sModifiedTemporaryPages < 1024) { 1156 int32 count = 0; 1157 get_sem_count(sWriterWaitSem, &count); 1158 if (count == 0) 1159 count = 1; 1160 1161 acquire_sem_etc(sWriterWaitSem, count, B_RELATIVE_TIMEOUT, 3000000); 1162 // all 3 seconds when no one triggers us 1163 } 1164 1165 uint32 numPages = 0; 1166 run.PrepareNextRun(); 1167 1168 // TODO: make this laptop friendly, too (ie. only start doing 1169 // something if someone else did something or there is really 1170 // enough to do). 1171 1172 // collect pages to be written 1173 #if ENABLE_SWAP_SUPPORT 1174 bool lowOnPages = low_resource_state(B_KERNEL_RESOURCE_PAGES) 1175 != B_NO_LOW_RESOURCE; 1176 #endif 1177 1178 pageCollectionTime -= system_time(); 1179 1180 while (numPages < kNumPages) { 1181 vm_page *page = next_modified_page(marker); 1182 if (page == NULL) 1183 break; 1184 1185 PageCacheLocker cacheLocker(page, false); 1186 if (!cacheLocker.IsLocked()) 1187 continue; 1188 1189 vm_cache *cache = page->cache; 1190 1191 // Don't write back wired (locked) pages and don't write RAM pages 1192 // until we're low on pages. Also avoid writing temporary pages that 1193 // are active. 1194 if (page->wired_count > 0 1195 || cache->temporary 1196 #if ENABLE_SWAP_SUPPORT 1197 && (!lowOnPages /*|| page->usage_count > 0*/) 1198 #endif 1199 ) { 1200 continue; 1201 } 1202 1203 // we need our own reference to the store, as it might 1204 // currently be destructed 1205 if (cache->AcquireUnreferencedStoreRef() != B_OK) { 1206 cacheLocker.Unlock(); 1207 thread_yield(true); 1208 continue; 1209 } 1210 1211 InterruptsSpinLocker locker(sPageLock); 1212 1213 // state might have change while we were locking the cache 1214 if (page->state != PAGE_STATE_MODIFIED) { 1215 // release the cache reference 1216 locker.Unlock(); 1217 cache->ReleaseStoreRef(); 1218 continue; 1219 } 1220 1221 remove_page_from_queue(&sModifiedPageQueue, page); 1222 1223 run.AddPage(page); 1224 1225 locker.Unlock(); 1226 1227 //dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count); 1228 vm_clear_map_flags(page, PAGE_MODIFIED); 1229 cache->AcquireRefLocked(); 1230 numPages++; 1231 } 1232 1233 pageCollectionTime += system_time(); 1234 1235 if (numPages == 0) 1236 continue; 1237 1238 // write pages to disk and do all the cleanup 1239 pageWritingTime -= system_time(); 1240 run.Go(); 1241 pageWritingTime += system_time(); 1242 1243 // debug output only... 1244 writtenPages += numPages; 1245 if (writtenPages >= 1024) { 1246 bigtime_t now = system_time(); 1247 dprintf("page writer: wrote 1024 pages (total: %llu ms, " 1248 "collect: %llu ms, write: %llu ms)\n", 1249 (now - lastWrittenTime) / 1000, 1250 pageCollectionTime / 1000, pageWritingTime / 1000); 1251 writtenPages -= 1024; 1252 lastWrittenTime = now; 1253 pageCollectionTime = 0; 1254 pageWritingTime = 0; 1255 } 1256 } 1257 1258 remove_page_marker(marker); 1259 return B_OK; 1260 } 1261 1262 1263 static vm_page * 1264 find_page_candidate(struct vm_page &marker, bool stealActive) 1265 { 1266 InterruptsSpinLocker locker(sPageLock); 1267 page_queue *queue; 1268 vm_page *page; 1269 1270 if (marker.state == PAGE_STATE_UNUSED) { 1271 // Get the first free pages of the (in)active queue 1272 queue = &sInactivePageQueue; 1273 page = sInactivePageQueue.head; 1274 if (page == NULL && stealActive) { 1275 queue = &sActivePageQueue; 1276 page = sActivePageQueue.head; 1277 } 1278 } else { 1279 // Get the next page of the current queue 1280 if (marker.state == PAGE_STATE_INACTIVE) 1281 queue = &sInactivePageQueue; 1282 else if (marker.state == PAGE_STATE_ACTIVE) 1283 queue = &sActivePageQueue; 1284 else { 1285 panic("invalid marker %p state", &marker); 1286 queue = NULL; 1287 } 1288 1289 page = marker.queue_next; 1290 remove_page_from_queue(queue, &marker); 1291 marker.state = PAGE_STATE_UNUSED; 1292 } 1293 1294 while (page != NULL) { 1295 if (!is_marker_page(page) 1296 && (page->state == PAGE_STATE_INACTIVE 1297 || (stealActive && page->state == PAGE_STATE_ACTIVE 1298 && page->wired_count == 0))) { 1299 // we found a candidate, insert marker 1300 marker.state = queue == &sActivePageQueue 1301 ? PAGE_STATE_ACTIVE : PAGE_STATE_INACTIVE; 1302 insert_page_after(queue, page, &marker); 1303 return page; 1304 } 1305 1306 page = page->queue_next; 1307 if (page == NULL && stealActive && queue != &sActivePageQueue) { 1308 queue = &sActivePageQueue; 1309 page = sActivePageQueue.head; 1310 } 1311 } 1312 1313 return NULL; 1314 } 1315 1316 1317 static bool 1318 steal_page(vm_page *page, bool stealActive) 1319 { 1320 // try to lock the page's cache 1321 if (vm_cache_acquire_locked_page_cache(page, false) == NULL) 1322 return false; 1323 1324 AutoLocker<VMCache> cacheLocker(page->cache, true, false); 1325 MethodDeleter<VMCache> _2(page->cache, &VMCache::ReleaseRefLocked); 1326 1327 // check again if that page is still a candidate 1328 if (page->state != PAGE_STATE_INACTIVE 1329 && (!stealActive || page->state != PAGE_STATE_ACTIVE 1330 || page->wired_count != 0)) 1331 return false; 1332 1333 // recheck eventual last minute changes 1334 uint32 flags; 1335 vm_remove_all_page_mappings(page, &flags); 1336 if ((flags & PAGE_MODIFIED) != 0) { 1337 // page was modified, don't steal it 1338 vm_page_set_state(page, PAGE_STATE_MODIFIED); 1339 return false; 1340 } else if ((flags & PAGE_ACCESSED) != 0) { 1341 // page is in active use, don't steal it 1342 vm_page_set_state(page, PAGE_STATE_ACTIVE); 1343 return false; 1344 } 1345 1346 // we can now steal this page 1347 1348 //dprintf(" steal page %p from cache %p%s\n", page, page->cache, 1349 // page->state == PAGE_STATE_INACTIVE ? "" : " (ACTIVE)"); 1350 1351 page->cache->RemovePage(page); 1352 1353 InterruptsSpinLocker _(sPageLock); 1354 remove_page_from_queue(page->state == PAGE_STATE_ACTIVE 1355 ? &sActivePageQueue : &sInactivePageQueue, page); 1356 return true; 1357 } 1358 1359 1360 static size_t 1361 steal_pages(vm_page **pages, size_t count, bool reserve) 1362 { 1363 size_t maxCount = count; 1364 1365 while (true) { 1366 vm_page marker; 1367 marker.type = PAGE_TYPE_DUMMY; 1368 marker.cache = NULL; 1369 marker.state = PAGE_STATE_UNUSED; 1370 1371 bool tried = false; 1372 size_t stolen = 0; 1373 1374 while (count > 0) { 1375 vm_page *page = find_page_candidate(marker, false); 1376 if (page == NULL) 1377 break; 1378 1379 if (steal_page(page, false)) { 1380 if (reserve || stolen >= maxCount) { 1381 InterruptsSpinLocker _(sPageLock); 1382 enqueue_page(&sFreePageQueue, page); 1383 page->state = PAGE_STATE_FREE; 1384 1385 T(StolenPage()); 1386 } else if (stolen < maxCount) { 1387 pages[stolen] = page; 1388 } 1389 stolen++; 1390 count--; 1391 } else 1392 tried = true; 1393 } 1394 1395 remove_page_marker(marker); 1396 1397 InterruptsSpinLocker locker(sPageLock); 1398 1399 if (reserve && sReservedPages <= free_page_queue_count() 1400 || count == 0 1401 || !reserve && (sInactivePageQueue.count > 0 1402 || free_page_queue_count() > sReservedPages)) 1403 return stolen; 1404 1405 if (stolen && !tried && sInactivePageQueue.count > 0) { 1406 count++; 1407 continue; 1408 } 1409 1410 // we need to wait for pages to become inactive 1411 1412 ConditionVariableEntry freeConditionEntry; 1413 sPageDeficit++; 1414 freeConditionEntry.Add(&sFreePageQueue); 1415 locker.Unlock(); 1416 1417 if (tried) { 1418 // We tried all potential pages, but one or more couldn't be stolen 1419 // at that time (likely because their cache was locked). No one 1420 // else will have any better luck, so we'll just retry a little 1421 // later. 1422 freeConditionEntry.Wait(B_RELATIVE_TIMEOUT, 10000); 1423 } else { 1424 low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0); 1425 //snooze(50000); 1426 // sleep for 50ms 1427 1428 freeConditionEntry.Wait(); 1429 } 1430 1431 locker.Lock(); 1432 sPageDeficit--; 1433 1434 if (reserve && sReservedPages <= free_page_queue_count()) 1435 return stolen; 1436 } 1437 } 1438 1439 1440 // #pragma mark - private kernel API 1441 1442 1443 /*! Writes a range of modified pages of a cache to disk. 1444 You need to hold the vm_cache lock when calling this function. 1445 Note that the cache lock is released in this function. 1446 \param cache The cache. 1447 \param firstPage Offset (in page size units) of the first page in the range. 1448 \param endPage End offset (in page size units) of the page range. The page 1449 at this offset is not included. 1450 */ 1451 status_t 1452 vm_page_write_modified_page_range(struct VMCache *cache, uint32 firstPage, 1453 uint32 endPage) 1454 { 1455 // TODO: join adjacent pages into one vec list 1456 1457 for (VMCachePagesTree::Iterator it 1458 = cache->pages.GetIterator(firstPage, true, true); 1459 vm_page *page = it.Next();) { 1460 bool dequeuedPage = false; 1461 1462 if (page->cache_offset >= endPage) 1463 break; 1464 1465 if (page->state == PAGE_STATE_MODIFIED) { 1466 InterruptsSpinLocker locker(&sPageLock); 1467 remove_page_from_queue(&sModifiedPageQueue, page); 1468 dequeuedPage = true; 1469 } else if (page->state == PAGE_STATE_BUSY 1470 || !vm_test_map_modification(page)) { 1471 continue; 1472 } 1473 1474 page->state = PAGE_STATE_BUSY; 1475 page->busy_writing = true; 1476 1477 ConditionVariable busyCondition; 1478 busyCondition.Publish(page, "page"); 1479 1480 // We have a modified page - however, while we're writing it back, 1481 // the page is still mapped. In order not to lose any changes to the 1482 // page, we mark it clean before actually writing it back; if writing 1483 // the page fails for some reason, we just keep it in the modified page 1484 // list, but that should happen only rarely. 1485 1486 // If the page is changed after we cleared the dirty flag, but before we 1487 // had the chance to write it back, then we'll write it again later - 1488 // that will probably not happen that often, though. 1489 1490 // clear the modified flag 1491 vm_clear_map_flags(page, PAGE_MODIFIED); 1492 1493 cache->Unlock(); 1494 status_t status = write_page(page, 0, NULL); 1495 cache->Lock(); 1496 1497 InterruptsSpinLocker locker(&sPageLock); 1498 1499 if (status == B_OK) { 1500 // put it into the active/inactive queue 1501 move_page_to_active_or_inactive_queue(page, dequeuedPage); 1502 page->busy_writing = false; 1503 } else { 1504 // We don't have to put the PAGE_MODIFIED bit back, as it's still 1505 // in the modified pages list. 1506 if (dequeuedPage) { 1507 page->state = PAGE_STATE_MODIFIED; 1508 enqueue_page(&sModifiedPageQueue, page); 1509 } 1510 1511 if (!page->busy_writing) { 1512 // someone has cleared the busy_writing flag which tells 1513 // us our page has gone invalid 1514 cache->RemovePage(page); 1515 } else { 1516 if (!dequeuedPage) 1517 set_page_state_nolock(page, PAGE_STATE_MODIFIED); 1518 1519 page->busy_writing = false; 1520 } 1521 } 1522 1523 busyCondition.Unpublish(); 1524 } 1525 1526 return B_OK; 1527 } 1528 1529 1530 /*! You need to hold the vm_cache lock when calling this function. 1531 Note that the cache lock is released in this function. 1532 */ 1533 status_t 1534 vm_page_write_modified_pages(vm_cache *cache) 1535 { 1536 return vm_page_write_modified_page_range(cache, 0, 1537 (cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT); 1538 } 1539 1540 1541 /*! Schedules the page writer to write back the specified \a page. 1542 Note, however, that it might not do this immediately, and it can well 1543 take several seconds until the page is actually written out. 1544 */ 1545 void 1546 vm_page_schedule_write_page(vm_page *page) 1547 { 1548 ASSERT(page->state == PAGE_STATE_MODIFIED); 1549 1550 vm_page_requeue(page, false); 1551 1552 release_sem_etc(sWriterWaitSem, 1, B_DO_NOT_RESCHEDULE); 1553 } 1554 1555 1556 /*! Cache must be locked. 1557 */ 1558 void 1559 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage, 1560 uint32 endPage) 1561 { 1562 uint32 modified = 0; 1563 for (VMCachePagesTree::Iterator it 1564 = cache->pages.GetIterator(firstPage, true, true); 1565 vm_page *page = it.Next();) { 1566 if (page->cache_offset >= endPage) 1567 break; 1568 1569 if (page->state == PAGE_STATE_MODIFIED) { 1570 vm_page_requeue(page, false); 1571 modified++; 1572 } 1573 } 1574 1575 if (modified > 0) 1576 release_sem_etc(sWriterWaitSem, 1, B_DO_NOT_RESCHEDULE); 1577 } 1578 1579 1580 void 1581 vm_page_init_num_pages(kernel_args *args) 1582 { 1583 uint32 i; 1584 1585 // calculate the size of memory by looking at the physical_memory_range array 1586 addr_t physicalPagesEnd = 0; 1587 sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE; 1588 1589 for (i = 0; i < args->num_physical_memory_ranges; i++) { 1590 physicalPagesEnd = (args->physical_memory_range[i].start 1591 + args->physical_memory_range[i].size) / B_PAGE_SIZE; 1592 } 1593 1594 TRACE(("first phys page = 0x%lx, end 0x%x\n", sPhysicalPageOffset, 1595 physicalPagesEnd)); 1596 1597 sNumPages = physicalPagesEnd - sPhysicalPageOffset; 1598 1599 #ifdef LIMIT_AVAILABLE_MEMORY 1600 if (sNumPages > LIMIT_AVAILABLE_MEMORY * 256) 1601 sNumPages = LIMIT_AVAILABLE_MEMORY * 256; 1602 #endif 1603 } 1604 1605 1606 status_t 1607 vm_page_init(kernel_args *args) 1608 { 1609 TRACE(("vm_page_init: entry\n")); 1610 1611 // map in the new free page table 1612 sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page), 1613 ~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 1614 1615 TRACE(("vm_init: putting free_page_table @ %p, # ents %d (size 0x%x)\n", 1616 sPages, sNumPages, (unsigned int)(sNumPages * sizeof(vm_page)))); 1617 1618 // initialize the free page table 1619 for (uint32 i = 0; i < sNumPages; i++) { 1620 sPages[i].physical_page_number = sPhysicalPageOffset + i; 1621 sPages[i].type = PAGE_TYPE_PHYSICAL; 1622 sPages[i].state = PAGE_STATE_FREE; 1623 new(&sPages[i].mappings) vm_page_mappings(); 1624 sPages[i].wired_count = 0; 1625 sPages[i].usage_count = 0; 1626 sPages[i].busy_writing = false; 1627 sPages[i].merge_swap = false; 1628 sPages[i].cache = NULL; 1629 #ifdef DEBUG_PAGE_QUEUE 1630 sPages[i].queue = NULL; 1631 #endif 1632 #ifdef DEBUG_PAGE_CACHE_TRANSITIONS 1633 sPages[i].debug_flags = 0; 1634 sPages[i].collided_page = NULL; 1635 #endif // DEBUG_PAGE_CACHE_TRANSITIONS 1636 enqueue_page(&sFreePageQueue, &sPages[i]); 1637 } 1638 1639 TRACE(("initialized table\n")); 1640 1641 // mark some of the page ranges inuse 1642 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 1643 vm_mark_page_range_inuse(args->physical_allocated_range[i].start / B_PAGE_SIZE, 1644 args->physical_allocated_range[i].size / B_PAGE_SIZE); 1645 } 1646 1647 TRACE(("vm_page_init: exit\n")); 1648 1649 return B_OK; 1650 } 1651 1652 1653 status_t 1654 vm_page_init_post_area(kernel_args *args) 1655 { 1656 void *dummy; 1657 1658 dummy = sPages; 1659 create_area("page structures", &dummy, B_EXACT_ADDRESS, 1660 PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED, 1661 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 1662 1663 add_debugger_command("page_stats", &dump_page_stats, "Dump statistics about page usage"); 1664 add_debugger_command("page", &dump_page, "Dump page info"); 1665 add_debugger_command("page_queue", &dump_page_queue, "Dump page queue"); 1666 add_debugger_command("find_page", &find_page, 1667 "Find out which queue a page is actually in"); 1668 1669 return B_OK; 1670 } 1671 1672 1673 status_t 1674 vm_page_init_post_thread(kernel_args *args) 1675 { 1676 new (&sFreePageCondition) ConditionVariable; 1677 sFreePageCondition.Publish(&sFreePageQueue, "free page"); 1678 1679 // create a kernel thread to clear out pages 1680 1681 thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber", 1682 B_LOWEST_ACTIVE_PRIORITY, NULL); 1683 send_signal_etc(thread, SIGCONT, B_DO_NOT_RESCHEDULE); 1684 1685 // start page writer 1686 1687 sWriterWaitSem = create_sem(0, "page writer"); 1688 1689 thread = spawn_kernel_thread(&page_writer, "page writer", 1690 B_NORMAL_PRIORITY + 1, NULL); 1691 send_signal_etc(thread, SIGCONT, B_DO_NOT_RESCHEDULE); 1692 1693 return B_OK; 1694 } 1695 1696 1697 status_t 1698 vm_mark_page_inuse(addr_t page) 1699 { 1700 return vm_mark_page_range_inuse(page, 1); 1701 } 1702 1703 1704 status_t 1705 vm_mark_page_range_inuse(addr_t startPage, addr_t length) 1706 { 1707 TRACE(("vm_mark_page_range_inuse: start 0x%lx, len 0x%lx\n", 1708 startPage, length)); 1709 1710 if (sPhysicalPageOffset > startPage) { 1711 TRACE(("vm_mark_page_range_inuse: start page %ld is before free list\n", 1712 startPage)); 1713 return B_BAD_VALUE; 1714 } 1715 startPage -= sPhysicalPageOffset; 1716 if (startPage + length > sNumPages) { 1717 TRACE(("vm_mark_page_range_inuse: range would extend past free list\n")); 1718 return B_BAD_VALUE; 1719 } 1720 1721 cpu_status state = disable_interrupts(); 1722 acquire_spinlock(&sPageLock); 1723 1724 for (addr_t i = 0; i < length; i++) { 1725 vm_page *page = &sPages[startPage + i]; 1726 switch (page->state) { 1727 case PAGE_STATE_FREE: 1728 case PAGE_STATE_CLEAR: 1729 set_page_state_nolock(page, PAGE_STATE_UNUSED); 1730 break; 1731 case PAGE_STATE_WIRED: 1732 break; 1733 case PAGE_STATE_ACTIVE: 1734 case PAGE_STATE_INACTIVE: 1735 case PAGE_STATE_BUSY: 1736 case PAGE_STATE_MODIFIED: 1737 case PAGE_STATE_UNUSED: 1738 default: 1739 // uh 1740 dprintf("vm_mark_page_range_inuse: page 0x%lx in non-free state %d!\n", 1741 startPage + i, page->state); 1742 break; 1743 } 1744 } 1745 1746 release_spinlock(&sPageLock); 1747 restore_interrupts(state); 1748 1749 return B_OK; 1750 } 1751 1752 1753 /*! Unreserve pages previously reserved with vm_page_reserve_pages(). 1754 Note, you specify the same \a count here that you specified when 1755 reserving the pages - you don't need to keep track how many pages 1756 you actually needed of that upfront allocation. 1757 */ 1758 void 1759 vm_page_unreserve_pages(uint32 count) 1760 { 1761 if (count == 0) 1762 return; 1763 1764 InterruptsSpinLocker locker(sPageLock); 1765 ASSERT(sReservedPages >= count); 1766 1767 T(UnreservePages(count)); 1768 1769 sReservedPages -= count; 1770 1771 if (sPageDeficit > 0) 1772 sFreePageCondition.NotifyAll(); 1773 } 1774 1775 1776 /*! With this call, you can reserve a number of free pages in the system. 1777 They will only be handed out to someone who has actually reserved them. 1778 This call returns as soon as the number of requested pages has been 1779 reached. 1780 */ 1781 void 1782 vm_page_reserve_pages(uint32 count) 1783 { 1784 if (count == 0) 1785 return; 1786 1787 InterruptsSpinLocker locker(sPageLock); 1788 1789 T(ReservePages(count)); 1790 1791 sReservedPages += count; 1792 size_t freePages = free_page_queue_count(); 1793 if (sReservedPages <= freePages) 1794 return; 1795 1796 locker.Unlock(); 1797 1798 steal_pages(NULL, count + 1, true); 1799 // we get one more, just in case we can do something someone 1800 // else can't 1801 } 1802 1803 1804 bool 1805 vm_page_try_reserve_pages(uint32 count) 1806 { 1807 if (count == 0) 1808 return true; 1809 1810 InterruptsSpinLocker locker(sPageLock); 1811 1812 T(ReservePages(count)); 1813 1814 size_t freePages = free_page_queue_count(); 1815 if (sReservedPages + count > freePages) 1816 return false; 1817 1818 sReservedPages += count; 1819 return true; 1820 } 1821 1822 1823 vm_page * 1824 vm_page_allocate_page(int pageState, bool reserved) 1825 { 1826 page_queue *queue; 1827 page_queue *otherQueue; 1828 1829 switch (pageState) { 1830 case PAGE_STATE_FREE: 1831 queue = &sFreePageQueue; 1832 otherQueue = &sClearPageQueue; 1833 break; 1834 case PAGE_STATE_CLEAR: 1835 queue = &sClearPageQueue; 1836 otherQueue = &sFreePageQueue; 1837 break; 1838 default: 1839 return NULL; // invalid 1840 } 1841 1842 InterruptsSpinLocker locker(sPageLock); 1843 1844 T(AllocatePage(reserved)); 1845 1846 vm_page *page = NULL; 1847 while (true) { 1848 if (reserved || sReservedPages < free_page_queue_count()) { 1849 page = dequeue_page(queue); 1850 if (page == NULL) { 1851 #ifdef DEBUG 1852 if (queue->count != 0) 1853 panic("queue %p corrupted, count = %d\n", queue, queue->count); 1854 #endif 1855 1856 // if the primary queue was empty, grap the page from the 1857 // secondary queue 1858 page = dequeue_page(otherQueue); 1859 } 1860 } 1861 1862 if (page != NULL) 1863 break; 1864 1865 if (reserved) 1866 panic("Had reserved page, but there is none!"); 1867 1868 // steal one from the inactive list 1869 locker.Unlock(); 1870 size_t stolen = steal_pages(&page, 1, false); 1871 locker.Lock(); 1872 1873 if (stolen > 0) 1874 break; 1875 } 1876 1877 if (page->cache != NULL) 1878 panic("supposed to be free page %p has cache\n", page); 1879 1880 int oldPageState = page->state; 1881 page->state = PAGE_STATE_BUSY; 1882 page->usage_count = 2; 1883 1884 enqueue_page(&sActivePageQueue, page); 1885 1886 locker.Unlock(); 1887 1888 // if needed take the page from the free queue and zero it out 1889 if (pageState == PAGE_STATE_CLEAR && oldPageState != PAGE_STATE_CLEAR) 1890 clear_page(page); 1891 1892 return page; 1893 } 1894 1895 1896 /*! Allocates a number of pages and puts their pointers into the provided 1897 array. All pages are marked busy. 1898 Returns B_OK on success, and B_NO_MEMORY when there aren't any free 1899 pages left to allocate. 1900 */ 1901 status_t 1902 vm_page_allocate_pages(int pageState, vm_page **pages, uint32 numPages) 1903 { 1904 uint32 i; 1905 1906 for (i = 0; i < numPages; i++) { 1907 pages[i] = vm_page_allocate_page(pageState, false); 1908 if (pages[i] == NULL) { 1909 // allocation failed, we need to free what we already have 1910 while (i-- > 0) 1911 vm_page_set_state(pages[i], pageState); 1912 1913 return B_NO_MEMORY; 1914 } 1915 } 1916 1917 return B_OK; 1918 } 1919 1920 1921 vm_page * 1922 vm_page_allocate_page_run(int pageState, addr_t base, addr_t length) 1923 { 1924 vm_page *firstPage = NULL; 1925 uint32 start = base >> PAGE_SHIFT; 1926 1927 InterruptsSpinLocker locker(sPageLock); 1928 1929 if (sFreePageQueue.count + sClearPageQueue.count - sReservedPages 1930 < length) { 1931 // TODO: add more tries, ie. free some inactive, ... 1932 // no free space 1933 return NULL; 1934 } 1935 1936 for (;;) { 1937 bool foundRun = true; 1938 if (start + length > sNumPages) 1939 break; 1940 1941 uint32 i; 1942 for (i = 0; i < length; i++) { 1943 if (sPages[start + i].state != PAGE_STATE_FREE 1944 && sPages[start + i].state != PAGE_STATE_CLEAR) { 1945 foundRun = false; 1946 i++; 1947 break; 1948 } 1949 } 1950 if (foundRun) { 1951 // pull the pages out of the appropriate queues 1952 for (i = 0; i < length; i++) { 1953 sPages[start + i].is_cleared 1954 = sPages[start + i].state == PAGE_STATE_CLEAR; 1955 set_page_state_nolock(&sPages[start + i], PAGE_STATE_BUSY); 1956 sPages[start + i].usage_count = 2; 1957 } 1958 firstPage = &sPages[start]; 1959 break; 1960 } else { 1961 start += i; 1962 } 1963 } 1964 1965 T(AllocatePageRun(length)); 1966 1967 locker.Unlock(); 1968 1969 if (firstPage != NULL && pageState == PAGE_STATE_CLEAR) { 1970 for (uint32 i = 0; i < length; i++) { 1971 if (!sPages[start + i].is_cleared) 1972 clear_page(&sPages[start + i]); 1973 } 1974 } 1975 1976 return firstPage; 1977 } 1978 1979 1980 vm_page * 1981 vm_page_at_index(int32 index) 1982 { 1983 return &sPages[index]; 1984 } 1985 1986 1987 vm_page * 1988 vm_lookup_page(addr_t pageNumber) 1989 { 1990 if (pageNumber < sPhysicalPageOffset) 1991 return NULL; 1992 1993 pageNumber -= sPhysicalPageOffset; 1994 if (pageNumber >= sNumPages) 1995 return NULL; 1996 1997 return &sPages[pageNumber]; 1998 } 1999 2000 2001 /*! Free the page that belonged to a certain cache. 2002 You can use vm_page_set_state() manually if you prefer, but only 2003 if the page does not equal PAGE_STATE_MODIFIED. 2004 */ 2005 void 2006 vm_page_free(vm_cache *cache, vm_page *page) 2007 { 2008 InterruptsSpinLocker _(sPageLock); 2009 2010 if (page->cache == NULL && page->state == PAGE_STATE_MODIFIED 2011 && cache->temporary) { 2012 sModifiedTemporaryPages--; 2013 } 2014 2015 set_page_state_nolock(page, PAGE_STATE_FREE); 2016 } 2017 2018 2019 status_t 2020 vm_page_set_state(vm_page *page, int pageState) 2021 { 2022 InterruptsSpinLocker _(sPageLock); 2023 2024 return set_page_state_nolock(page, pageState); 2025 } 2026 2027 2028 /*! Moves a page to either the tail of the head of its current queue, 2029 depending on \a tail. 2030 */ 2031 void 2032 vm_page_requeue(struct vm_page *page, bool tail) 2033 { 2034 InterruptsSpinLocker _(sPageLock); 2035 page_queue *queue = NULL; 2036 2037 switch (page->state) { 2038 case PAGE_STATE_BUSY: 2039 case PAGE_STATE_ACTIVE: 2040 case PAGE_STATE_WIRED: 2041 case PAGE_STATE_UNUSED: 2042 queue = &sActivePageQueue; 2043 break; 2044 case PAGE_STATE_INACTIVE: 2045 queue = &sInactivePageQueue; 2046 break; 2047 case PAGE_STATE_MODIFIED: 2048 queue = &sModifiedPageQueue; 2049 break; 2050 case PAGE_STATE_FREE: 2051 queue = &sFreePageQueue; 2052 break; 2053 case PAGE_STATE_CLEAR: 2054 queue = &sClearPageQueue; 2055 break; 2056 default: 2057 panic("vm_page_touch: vm_page %p in invalid state %d\n", 2058 page, page->state); 2059 break; 2060 } 2061 2062 remove_page_from_queue(queue, page); 2063 2064 if (tail) 2065 enqueue_page(queue, page); 2066 else 2067 enqueue_page_to_head(queue, page); 2068 } 2069 2070 2071 size_t 2072 vm_page_num_pages(void) 2073 { 2074 return sNumPages; 2075 } 2076 2077 2078 /*! There is a subtle distinction between the page counts returned by 2079 this function and vm_page_num_free_pages(): 2080 The latter returns the number of pages that are completely uncommitted, 2081 whereas this one returns the number of pages that are available for 2082 use by being reclaimed as well (IOW it factors in things like cache pages 2083 as available). 2084 */ 2085 size_t 2086 vm_page_num_available_pages(void) 2087 { 2088 return vm_available_memory() / B_PAGE_SIZE; 2089 } 2090 2091 2092 size_t 2093 vm_page_num_free_pages(void) 2094 { 2095 size_t reservedPages = sReservedPages; 2096 size_t count = free_page_queue_count() + sInactivePageQueue.count; 2097 if (reservedPages > count) 2098 return 0; 2099 2100 return count - reservedPages; 2101 } 2102 2103 2104 void 2105 vm_page_get_stats(system_info *info) 2106 { 2107 // Get free pages count -- not really exact, since we don't know how many 2108 // of the reserved pages have already been allocated, but good citizens 2109 // unreserve chunk-wise as they are allocating the pages, if they have 2110 // reserved a larger quantity. 2111 page_num_t reserved = sReservedPages; 2112 page_num_t free = free_page_queue_count(); 2113 free = free > reserved ? free - reserved : 0; 2114 2115 // The pages used for the block cache buffers. Those should not be counted 2116 // as used but as cached pages. 2117 // TODO: We should subtract the blocks that are in use ATM, since those 2118 // can't really be freed in a low memory situation. 2119 page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE; 2120 2121 info->max_pages = sNumPages; 2122 info->used_pages = gMappedPagesCount - blockCachePages; 2123 info->cached_pages = sNumPages >= free + info->used_pages 2124 ? sNumPages - free - info->used_pages : 0; 2125 2126 // TODO: We don't consider pages used for page directories/tables yet. 2127 } 2128