1 /*
2 * Copyright 2010-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4 * Distributed under the terms of the MIT License.
5 *
6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7 * Distributed under the terms of the NewOS License.
8 */
9
10
11 #include <string.h>
12 #include <stdlib.h>
13
14 #include <algorithm>
15
16 #include <KernelExport.h>
17 #include <OS.h>
18
19 #include <AutoDeleter.h>
20
21 #include <arch/cpu.h>
22 #include <arch/vm_translation_map.h>
23 #include <block_cache.h>
24 #include <boot/kernel_args.h>
25 #include <condition_variable.h>
26 #include <elf.h>
27 #include <heap.h>
28 #include <kernel.h>
29 #include <low_resource_manager.h>
30 #include <thread.h>
31 #include <tracing.h>
32 #include <util/AutoLock.h>
33 #include <vfs.h>
34 #include <vm/vm.h>
35 #include <vm/vm_priv.h>
36 #include <vm/vm_page.h>
37 #include <vm/VMAddressSpace.h>
38 #include <vm/VMArea.h>
39 #include <vm/VMCache.h>
40
41 #include "IORequest.h"
42 #include "PageCacheLocker.h"
43 #include "VMAnonymousCache.h"
44 #include "VMPageQueue.h"
45
46
47 //#define TRACE_VM_PAGE
48 #ifdef TRACE_VM_PAGE
49 # define TRACE(x) dprintf x
50 #else
51 # define TRACE(x) ;
52 #endif
53
54 //#define TRACE_VM_DAEMONS
55 #ifdef TRACE_VM_DAEMONS
56 #define TRACE_DAEMON(x...) dprintf(x)
57 #else
58 #define TRACE_DAEMON(x...) do {} while (false)
59 #endif
60
61 //#define TRACK_PAGE_USAGE_STATS 1
62
63 #define PAGE_ASSERT(page, condition) \
64 ASSERT_PRINT((condition), "page: %p", (page))
65
66 #define SCRUB_SIZE 32
67 // this many pages will be cleared at once in the page scrubber thread
68
69 #define MAX_PAGE_WRITER_IO_PRIORITY B_URGENT_DISPLAY_PRIORITY
70 // maximum I/O priority of the page writer
71 #define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD 10000
72 // the maximum I/O priority shall be reached when this many pages need to
73 // be written
74
75
76 // The page reserve an allocation of the certain priority must not touch.
77 static const size_t kPageReserveForPriority[] = {
78 VM_PAGE_RESERVE_USER, // user
79 VM_PAGE_RESERVE_SYSTEM, // system
80 0 // VIP
81 };
82
83 // Minimum number of free pages the page daemon will try to achieve.
84 static uint32 sFreePagesTarget;
85 static uint32 sFreeOrCachedPagesTarget;
86 static uint32 sInactivePagesTarget;
87
88 // Wait interval between page daemon runs.
89 static const bigtime_t kIdleScanWaitInterval = 1000000LL; // 1 sec
90 static const bigtime_t kBusyScanWaitInterval = 500000LL; // 0.5 sec
91
92 // Number of idle runs after which we want to have processed the full active
93 // queue.
94 static const uint32 kIdleRunsForFullQueue = 20;
95
96 // Maximum limit for the vm_page::usage_count.
97 static const int32 kPageUsageMax = 64;
98 // vm_page::usage_count buff an accessed page receives in a scan.
99 static const int32 kPageUsageAdvance = 3;
100 // vm_page::usage_count debuff an unaccessed page receives in a scan.
101 static const int32 kPageUsageDecline = 1;
102
103 int32 gMappedPagesCount;
104
105 static VMPageQueue sPageQueues[PAGE_STATE_COUNT];
106
107 static VMPageQueue& sFreePageQueue = sPageQueues[PAGE_STATE_FREE];
108 static VMPageQueue& sClearPageQueue = sPageQueues[PAGE_STATE_CLEAR];
109 static VMPageQueue& sModifiedPageQueue = sPageQueues[PAGE_STATE_MODIFIED];
110 static VMPageQueue& sInactivePageQueue = sPageQueues[PAGE_STATE_INACTIVE];
111 static VMPageQueue& sActivePageQueue = sPageQueues[PAGE_STATE_ACTIVE];
112 static VMPageQueue& sCachedPageQueue = sPageQueues[PAGE_STATE_CACHED];
113
114 static vm_page *sPages;
115 static page_num_t sPhysicalPageOffset;
116 static page_num_t sNumPages;
117 static page_num_t sNonExistingPages;
118 // pages in the sPages array that aren't backed by physical memory
119 static uint64 sIgnoredPages;
120 // pages of physical memory ignored by the boot loader (and thus not
121 // available here)
122 static int32 sUnreservedFreePages;
123 static int32 sUnsatisfiedPageReservations;
124 static int32 sModifiedTemporaryPages;
125
126 static ConditionVariable sFreePageCondition;
127 static mutex sPageDeficitLock = MUTEX_INITIALIZER("page deficit");
128
129 // This lock must be used whenever the free or clear page queues are changed.
130 // If you need to work on both queues at the same time, you need to hold a write
131 // lock, otherwise, a read lock suffices (each queue still has a spinlock to
132 // guard against concurrent changes).
133 static rw_lock sFreePageQueuesLock
134 = RW_LOCK_INITIALIZER("free/clear page queues");
135
136 #ifdef TRACK_PAGE_USAGE_STATS
137 static page_num_t sPageUsageArrays[512];
138 static page_num_t* sPageUsage = sPageUsageArrays;
139 static page_num_t sPageUsagePageCount;
140 static page_num_t* sNextPageUsage = sPageUsageArrays + 256;
141 static page_num_t sNextPageUsagePageCount;
142 #endif
143
144
145 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
146
147 struct caller_info {
148 addr_t caller;
149 size_t count;
150 };
151
152 static const int32 kCallerInfoTableSize = 1024;
153 static caller_info sCallerInfoTable[kCallerInfoTableSize];
154 static int32 sCallerInfoCount = 0;
155
156 static caller_info* get_caller_info(addr_t caller);
157
158
159 RANGE_MARKER_FUNCTION_PROTOTYPES(vm_page)
160
161 static const addr_t kVMPageCodeAddressRange[] = {
162 RANGE_MARKER_FUNCTION_ADDRESS_RANGE(vm_page)
163 };
164
165 #endif
166
167
168 RANGE_MARKER_FUNCTION_BEGIN(vm_page)
169
170
171 struct page_stats {
172 int32 totalFreePages;
173 int32 unsatisfiedReservations;
174 int32 cachedPages;
175 };
176
177
178 struct PageReservationWaiter
179 : public DoublyLinkedListLinkImpl<PageReservationWaiter> {
180 Thread* thread;
181 uint32 dontTouch; // reserve not to touch
182 uint32 missing; // pages missing for the reservation
183 int32 threadPriority;
184
operator <PageReservationWaiter185 bool operator<(const PageReservationWaiter& other) const
186 {
187 // Implies an order by descending VM priority (ascending dontTouch)
188 // and (secondarily) descending thread priority.
189 if (dontTouch != other.dontTouch)
190 return dontTouch < other.dontTouch;
191 return threadPriority > other.threadPriority;
192 }
193 };
194
195 typedef DoublyLinkedList<PageReservationWaiter> PageReservationWaiterList;
196 static PageReservationWaiterList sPageReservationWaiters;
197
198
199 struct DaemonCondition {
InitDaemonCondition200 void Init(const char* name)
201 {
202 mutex_init(&fLock, "daemon condition");
203 fCondition.Init(this, name);
204 fActivated = false;
205 }
206
LockDaemonCondition207 bool Lock()
208 {
209 return mutex_lock(&fLock) == B_OK;
210 }
211
UnlockDaemonCondition212 void Unlock()
213 {
214 mutex_unlock(&fLock);
215 }
216
WaitDaemonCondition217 bool Wait(bigtime_t timeout, bool clearActivated)
218 {
219 MutexLocker locker(fLock);
220 if (clearActivated)
221 fActivated = false;
222 else if (fActivated)
223 return true;
224
225 ConditionVariableEntry entry;
226 fCondition.Add(&entry);
227
228 locker.Unlock();
229
230 return entry.Wait(B_RELATIVE_TIMEOUT, timeout) == B_OK;
231 }
232
WakeUpDaemonCondition233 void WakeUp()
234 {
235 if (fActivated)
236 return;
237
238 MutexLocker locker(fLock);
239 fActivated = true;
240 fCondition.NotifyOne();
241 }
242
ClearActivatedDaemonCondition243 void ClearActivated()
244 {
245 MutexLocker locker(fLock);
246 fActivated = false;
247 }
248
249 private:
250 mutex fLock;
251 ConditionVariable fCondition;
252 bool fActivated;
253 };
254
255
256 static DaemonCondition sPageWriterCondition;
257 static DaemonCondition sPageDaemonCondition;
258
259
260 #if PAGE_ALLOCATION_TRACING
261
262 namespace PageAllocationTracing {
263
264 class ReservePages : public AbstractTraceEntry {
265 public:
ReservePages(uint32 count)266 ReservePages(uint32 count)
267 :
268 fCount(count)
269 {
270 Initialized();
271 }
272
AddDump(TraceOutput & out)273 virtual void AddDump(TraceOutput& out)
274 {
275 out.Print("page reserve: %" B_PRIu32, fCount);
276 }
277
278 private:
279 uint32 fCount;
280 };
281
282
283 class UnreservePages : public AbstractTraceEntry {
284 public:
UnreservePages(uint32 count)285 UnreservePages(uint32 count)
286 :
287 fCount(count)
288 {
289 Initialized();
290 }
291
AddDump(TraceOutput & out)292 virtual void AddDump(TraceOutput& out)
293 {
294 out.Print("page unreserve: %" B_PRId32, fCount);
295 }
296
297 private:
298 uint32 fCount;
299 };
300
301
302 class AllocatePage
303 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
304 public:
AllocatePage(page_num_t pageNumber)305 AllocatePage(page_num_t pageNumber)
306 :
307 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
308 fPageNumber(pageNumber)
309 {
310 Initialized();
311 }
312
AddDump(TraceOutput & out)313 virtual void AddDump(TraceOutput& out)
314 {
315 out.Print("page alloc: %#" B_PRIxPHYSADDR, fPageNumber);
316 }
317
318 private:
319 page_num_t fPageNumber;
320 };
321
322
323 class AllocatePageRun
324 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
325 public:
AllocatePageRun(page_num_t startPage,uint32 length)326 AllocatePageRun(page_num_t startPage, uint32 length)
327 :
328 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
329 fStartPage(startPage),
330 fLength(length)
331 {
332 Initialized();
333 }
334
AddDump(TraceOutput & out)335 virtual void AddDump(TraceOutput& out)
336 {
337 out.Print("page alloc run: start %#" B_PRIxPHYSADDR " length: %"
338 B_PRIu32, fStartPage, fLength);
339 }
340
341 private:
342 page_num_t fStartPage;
343 uint32 fLength;
344 };
345
346
347 class FreePage
348 : public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
349 public:
FreePage(page_num_t pageNumber)350 FreePage(page_num_t pageNumber)
351 :
352 TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
353 fPageNumber(pageNumber)
354 {
355 Initialized();
356 }
357
AddDump(TraceOutput & out)358 virtual void AddDump(TraceOutput& out)
359 {
360 out.Print("page free: %#" B_PRIxPHYSADDR, fPageNumber);
361 }
362
363 private:
364 page_num_t fPageNumber;
365 };
366
367
368 class ScrubbingPages : public AbstractTraceEntry {
369 public:
ScrubbingPages(uint32 count)370 ScrubbingPages(uint32 count)
371 :
372 fCount(count)
373 {
374 Initialized();
375 }
376
AddDump(TraceOutput & out)377 virtual void AddDump(TraceOutput& out)
378 {
379 out.Print("page scrubbing: %" B_PRId32, fCount);
380 }
381
382 private:
383 uint32 fCount;
384 };
385
386
387 class ScrubbedPages : public AbstractTraceEntry {
388 public:
ScrubbedPages(uint32 count)389 ScrubbedPages(uint32 count)
390 :
391 fCount(count)
392 {
393 Initialized();
394 }
395
AddDump(TraceOutput & out)396 virtual void AddDump(TraceOutput& out)
397 {
398 out.Print("page scrubbed: %" B_PRId32, fCount);
399 }
400
401 private:
402 uint32 fCount;
403 };
404
405
406 class StolenPage : public AbstractTraceEntry {
407 public:
StolenPage()408 StolenPage()
409 {
410 Initialized();
411 }
412
AddDump(TraceOutput & out)413 virtual void AddDump(TraceOutput& out)
414 {
415 out.Print("page stolen");
416 }
417 };
418
419 } // namespace PageAllocationTracing
420
421 # define TA(x) new(std::nothrow) PageAllocationTracing::x
422
423 #else
424 # define TA(x)
425 #endif // PAGE_ALLOCATION_TRACING
426
427
428 #if PAGE_DAEMON_TRACING
429
430 namespace PageDaemonTracing {
431
432 class ActivatePage : public AbstractTraceEntry {
433 public:
ActivatePage(vm_page * page)434 ActivatePage(vm_page* page)
435 :
436 fCache(page->cache),
437 fPage(page)
438 {
439 Initialized();
440 }
441
AddDump(TraceOutput & out)442 virtual void AddDump(TraceOutput& out)
443 {
444 out.Print("page activated: %p, cache: %p", fPage, fCache);
445 }
446
447 private:
448 VMCache* fCache;
449 vm_page* fPage;
450 };
451
452
453 class DeactivatePage : public AbstractTraceEntry {
454 public:
DeactivatePage(vm_page * page)455 DeactivatePage(vm_page* page)
456 :
457 fCache(page->cache),
458 fPage(page)
459 {
460 Initialized();
461 }
462
AddDump(TraceOutput & out)463 virtual void AddDump(TraceOutput& out)
464 {
465 out.Print("page deactivated: %p, cache: %p", fPage, fCache);
466 }
467
468 private:
469 VMCache* fCache;
470 vm_page* fPage;
471 };
472
473
474 class FreedPageSwap : public AbstractTraceEntry {
475 public:
FreedPageSwap(vm_page * page)476 FreedPageSwap(vm_page* page)
477 :
478 fCache(page->cache),
479 fPage(page)
480 {
481 Initialized();
482 }
483
AddDump(TraceOutput & out)484 virtual void AddDump(TraceOutput& out)
485 {
486 out.Print("page swap freed: %p, cache: %p", fPage, fCache);
487 }
488
489 private:
490 VMCache* fCache;
491 vm_page* fPage;
492 };
493
494 } // namespace PageDaemonTracing
495
496 # define TD(x) new(std::nothrow) PageDaemonTracing::x
497
498 #else
499 # define TD(x)
500 #endif // PAGE_DAEMON_TRACING
501
502
503 #if PAGE_WRITER_TRACING
504
505 namespace PageWriterTracing {
506
507 class WritePage : public AbstractTraceEntry {
508 public:
WritePage(vm_page * page)509 WritePage(vm_page* page)
510 :
511 fCache(page->Cache()),
512 fPage(page)
513 {
514 Initialized();
515 }
516
AddDump(TraceOutput & out)517 virtual void AddDump(TraceOutput& out)
518 {
519 out.Print("page write: %p, cache: %p", fPage, fCache);
520 }
521
522 private:
523 VMCache* fCache;
524 vm_page* fPage;
525 };
526
527 } // namespace PageWriterTracing
528
529 # define TPW(x) new(std::nothrow) PageWriterTracing::x
530
531 #else
532 # define TPW(x)
533 #endif // PAGE_WRITER_TRACING
534
535
536 #if PAGE_STATE_TRACING
537
538 namespace PageStateTracing {
539
540 class SetPageState : public AbstractTraceEntry {
541 public:
SetPageState(vm_page * page,uint8 newState)542 SetPageState(vm_page* page, uint8 newState)
543 :
544 fPage(page),
545 fOldState(page->State()),
546 fNewState(newState),
547 fBusy(page->busy),
548 fWired(page->WiredCount() > 0),
549 fMapped(!page->mappings.IsEmpty()),
550 fAccessed(page->accessed),
551 fModified(page->modified)
552 {
553 #if PAGE_STATE_TRACING_STACK_TRACE
554 fStackTrace = capture_tracing_stack_trace(
555 PAGE_STATE_TRACING_STACK_TRACE, 0, true);
556 // Don't capture userland stack trace to avoid potential
557 // deadlocks.
558 #endif
559 Initialized();
560 }
561
562 #if PAGE_STATE_TRACING_STACK_TRACE
DumpStackTrace(TraceOutput & out)563 virtual void DumpStackTrace(TraceOutput& out)
564 {
565 out.PrintStackTrace(fStackTrace);
566 }
567 #endif
568
AddDump(TraceOutput & out)569 virtual void AddDump(TraceOutput& out)
570 {
571 out.Print("page set state: %p (%c%c%c%c%c): %s -> %s", fPage,
572 fBusy ? 'b' : '-',
573 fWired ? 'w' : '-',
574 fMapped ? 'm' : '-',
575 fAccessed ? 'a' : '-',
576 fModified ? 'm' : '-',
577 page_state_to_string(fOldState),
578 page_state_to_string(fNewState));
579 }
580
581 private:
582 vm_page* fPage;
583 #if PAGE_STATE_TRACING_STACK_TRACE
584 tracing_stack_trace* fStackTrace;
585 #endif
586 uint8 fOldState;
587 uint8 fNewState;
588 bool fBusy : 1;
589 bool fWired : 1;
590 bool fMapped : 1;
591 bool fAccessed : 1;
592 bool fModified : 1;
593 };
594
595 } // namespace PageStateTracing
596
597 # define TPS(x) new(std::nothrow) PageStateTracing::x
598
599 #else
600 # define TPS(x)
601 #endif // PAGE_STATE_TRACING
602
603
604 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
605
606 namespace BKernel {
607
608 class AllocationTrackingCallback {
609 public:
610 virtual ~AllocationTrackingCallback();
611
612 virtual bool ProcessTrackingInfo(
613 AllocationTrackingInfo* info,
614 page_num_t pageNumber) = 0;
615 };
616
617 }
618
619 using BKernel::AllocationTrackingCallback;
620
621
622 class AllocationCollectorCallback : public AllocationTrackingCallback {
623 public:
AllocationCollectorCallback(bool resetInfos)624 AllocationCollectorCallback(bool resetInfos)
625 :
626 fResetInfos(resetInfos)
627 {
628 }
629
ProcessTrackingInfo(AllocationTrackingInfo * info,page_num_t pageNumber)630 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
631 page_num_t pageNumber)
632 {
633 if (!info->IsInitialized())
634 return true;
635
636 addr_t caller = 0;
637 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
638
639 if (traceEntry != NULL && info->IsTraceEntryValid()) {
640 caller = tracing_find_caller_in_stack_trace(
641 traceEntry->StackTrace(), kVMPageCodeAddressRange, 1);
642 }
643
644 caller_info* callerInfo = get_caller_info(caller);
645 if (callerInfo == NULL) {
646 kprintf("out of space for caller infos\n");
647 return false;
648 }
649
650 callerInfo->count++;
651
652 if (fResetInfos)
653 info->Clear();
654
655 return true;
656 }
657
658 private:
659 bool fResetInfos;
660 };
661
662
663 class AllocationInfoPrinterCallback : public AllocationTrackingCallback {
664 public:
AllocationInfoPrinterCallback(bool printStackTrace,page_num_t pageFilter,team_id teamFilter,thread_id threadFilter)665 AllocationInfoPrinterCallback(bool printStackTrace, page_num_t pageFilter,
666 team_id teamFilter, thread_id threadFilter)
667 :
668 fPrintStackTrace(printStackTrace),
669 fPageFilter(pageFilter),
670 fTeamFilter(teamFilter),
671 fThreadFilter(threadFilter)
672 {
673 }
674
ProcessTrackingInfo(AllocationTrackingInfo * info,page_num_t pageNumber)675 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
676 page_num_t pageNumber)
677 {
678 if (!info->IsInitialized())
679 return true;
680
681 if (fPageFilter != 0 && pageNumber != fPageFilter)
682 return true;
683
684 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
685 if (traceEntry != NULL && !info->IsTraceEntryValid())
686 traceEntry = NULL;
687
688 if (traceEntry != NULL) {
689 if (fTeamFilter != -1 && traceEntry->TeamID() != fTeamFilter)
690 return true;
691 if (fThreadFilter != -1 && traceEntry->ThreadID() != fThreadFilter)
692 return true;
693 } else {
694 // we need the info if we have filters set
695 if (fTeamFilter != -1 || fThreadFilter != -1)
696 return true;
697 }
698
699 kprintf("page number %#" B_PRIxPHYSADDR, pageNumber);
700
701 if (traceEntry != NULL) {
702 kprintf(", team: %" B_PRId32 ", thread %" B_PRId32
703 ", time %" B_PRId64 "\n", traceEntry->TeamID(),
704 traceEntry->ThreadID(), traceEntry->Time());
705
706 if (fPrintStackTrace)
707 tracing_print_stack_trace(traceEntry->StackTrace());
708 } else
709 kprintf("\n");
710
711 return true;
712 }
713
714 private:
715 bool fPrintStackTrace;
716 page_num_t fPageFilter;
717 team_id fTeamFilter;
718 thread_id fThreadFilter;
719 };
720
721
722 class AllocationDetailPrinterCallback : public AllocationTrackingCallback {
723 public:
AllocationDetailPrinterCallback(addr_t caller)724 AllocationDetailPrinterCallback(addr_t caller)
725 :
726 fCaller(caller)
727 {
728 }
729
ProcessTrackingInfo(AllocationTrackingInfo * info,page_num_t pageNumber)730 virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
731 page_num_t pageNumber)
732 {
733 if (!info->IsInitialized())
734 return true;
735
736 addr_t caller = 0;
737 AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
738 if (traceEntry != NULL && !info->IsTraceEntryValid())
739 traceEntry = NULL;
740
741 if (traceEntry != NULL) {
742 caller = tracing_find_caller_in_stack_trace(
743 traceEntry->StackTrace(), kVMPageCodeAddressRange, 1);
744 }
745
746 if (caller != fCaller)
747 return true;
748
749 kprintf("page %#" B_PRIxPHYSADDR "\n", pageNumber);
750 if (traceEntry != NULL)
751 tracing_print_stack_trace(traceEntry->StackTrace());
752
753 return true;
754 }
755
756 private:
757 addr_t fCaller;
758 };
759
760 #endif // VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
761
762
763 static void
list_page(vm_page * page)764 list_page(vm_page* page)
765 {
766 kprintf("0x%08" B_PRIxADDR " ",
767 (addr_t)(page->physical_page_number * B_PAGE_SIZE));
768 switch (page->State()) {
769 case PAGE_STATE_ACTIVE: kprintf("A"); break;
770 case PAGE_STATE_INACTIVE: kprintf("I"); break;
771 case PAGE_STATE_MODIFIED: kprintf("M"); break;
772 case PAGE_STATE_CACHED: kprintf("C"); break;
773 case PAGE_STATE_FREE: kprintf("F"); break;
774 case PAGE_STATE_CLEAR: kprintf("L"); break;
775 case PAGE_STATE_WIRED: kprintf("W"); break;
776 case PAGE_STATE_UNUSED: kprintf("-"); break;
777 }
778 kprintf(" ");
779 if (page->busy) kprintf("B"); else kprintf("-");
780 if (page->busy_writing) kprintf("W"); else kprintf("-");
781 if (page->accessed) kprintf("A"); else kprintf("-");
782 if (page->modified) kprintf("M"); else kprintf("-");
783 kprintf("-");
784
785 kprintf(" usage:%3u", page->usage_count);
786 kprintf(" wired:%5u", page->WiredCount());
787
788 bool first = true;
789 vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
790 vm_page_mapping* mapping;
791 while ((mapping = iterator.Next()) != NULL) {
792 if (first) {
793 kprintf(": ");
794 first = false;
795 } else
796 kprintf(", ");
797
798 kprintf("%" B_PRId32 " (%s)", mapping->area->id, mapping->area->name);
799 mapping = mapping->page_link.next;
800 }
801 }
802
803
804 static int
dump_page_list(int argc,char ** argv)805 dump_page_list(int argc, char **argv)
806 {
807 kprintf("page table:\n");
808 for (page_num_t i = 0; i < sNumPages; i++) {
809 if (sPages[i].State() != PAGE_STATE_UNUSED) {
810 list_page(&sPages[i]);
811 kprintf("\n");
812 }
813 }
814 kprintf("end of page table\n");
815
816 return 0;
817 }
818
819
820 static int
find_page(int argc,char ** argv)821 find_page(int argc, char **argv)
822 {
823 struct vm_page *page;
824 addr_t address;
825 int32 index = 1;
826 int i;
827
828 struct {
829 const char* name;
830 VMPageQueue* queue;
831 } pageQueueInfos[] = {
832 { "free", &sFreePageQueue },
833 { "clear", &sClearPageQueue },
834 { "modified", &sModifiedPageQueue },
835 { "active", &sActivePageQueue },
836 { "inactive", &sInactivePageQueue },
837 { "cached", &sCachedPageQueue },
838 { NULL, NULL }
839 };
840
841 if (argc < 2
842 || strlen(argv[index]) <= 2
843 || argv[index][0] != '0'
844 || argv[index][1] != 'x') {
845 kprintf("usage: find_page <address>\n");
846 return 0;
847 }
848
849 address = strtoul(argv[index], NULL, 0);
850 page = (vm_page*)address;
851
852 for (i = 0; pageQueueInfos[i].name; i++) {
853 VMPageQueue::Iterator it = pageQueueInfos[i].queue->GetIterator();
854 while (vm_page* p = it.Next()) {
855 if (p == page) {
856 kprintf("found page %p in queue %p (%s)\n", page,
857 pageQueueInfos[i].queue, pageQueueInfos[i].name);
858 return 0;
859 }
860 }
861 }
862
863 kprintf("page %p isn't in any queue\n", page);
864
865 return 0;
866 }
867
868
869 const char *
page_state_to_string(int state)870 page_state_to_string(int state)
871 {
872 switch(state) {
873 case PAGE_STATE_ACTIVE:
874 return "active";
875 case PAGE_STATE_INACTIVE:
876 return "inactive";
877 case PAGE_STATE_MODIFIED:
878 return "modified";
879 case PAGE_STATE_CACHED:
880 return "cached";
881 case PAGE_STATE_FREE:
882 return "free";
883 case PAGE_STATE_CLEAR:
884 return "clear";
885 case PAGE_STATE_WIRED:
886 return "wired";
887 case PAGE_STATE_UNUSED:
888 return "unused";
889 default:
890 return "unknown";
891 }
892 }
893
894
895 static int
dump_page_long(int argc,char ** argv)896 dump_page_long(int argc, char **argv)
897 {
898 bool addressIsPointer = true;
899 bool physical = false;
900 bool searchMappings = false;
901 int32 index = 1;
902
903 while (index < argc) {
904 if (argv[index][0] != '-')
905 break;
906
907 if (!strcmp(argv[index], "-p")) {
908 addressIsPointer = false;
909 physical = true;
910 } else if (!strcmp(argv[index], "-v")) {
911 addressIsPointer = false;
912 } else if (!strcmp(argv[index], "-m")) {
913 searchMappings = true;
914 } else {
915 print_debugger_command_usage(argv[0]);
916 return 0;
917 }
918
919 index++;
920 }
921
922 if (index + 1 != argc) {
923 print_debugger_command_usage(argv[0]);
924 return 0;
925 }
926
927 uint64 value;
928 if (!evaluate_debug_expression(argv[index], &value, false))
929 return 0;
930
931 uint64 pageAddress = value;
932 struct vm_page* page;
933
934 if (addressIsPointer) {
935 page = (struct vm_page *)(addr_t)pageAddress;
936 } else {
937 if (!physical) {
938 VMAddressSpace *addressSpace = VMAddressSpace::Kernel();
939
940 if (debug_get_debugged_thread()->team->address_space != NULL)
941 addressSpace = debug_get_debugged_thread()->team->address_space;
942
943 uint32 flags = 0;
944 phys_addr_t physicalAddress;
945 if (addressSpace->TranslationMap()->QueryInterrupt(pageAddress,
946 &physicalAddress, &flags) != B_OK
947 || (flags & PAGE_PRESENT) == 0) {
948 kprintf("Virtual address not mapped to a physical page in this "
949 "address space.\n");
950 return 0;
951 }
952 pageAddress = physicalAddress;
953 }
954
955 page = vm_lookup_page(pageAddress / B_PAGE_SIZE);
956 }
957
958 if (page == NULL) {
959 kprintf("Page not found.\n");
960 return 0;
961 }
962
963 kprintf("PAGE: %p\n", page);
964
965 const off_t pageOffset = (addr_t)page - (addr_t)sPages;
966 const off_t pageIndex = pageOffset / (off_t)sizeof(vm_page);
967 if (pageIndex < 0) {
968 kprintf("\taddress is before start of page array!"
969 " (offset %" B_PRIdOFF ")\n", pageOffset);
970 } else if ((page_num_t)pageIndex >= sNumPages) {
971 kprintf("\taddress is after end of page array!"
972 " (offset %" B_PRIdOFF ")\n", pageOffset);
973 } else if ((pageIndex * (off_t)sizeof(vm_page)) != pageOffset) {
974 kprintf("\taddress isn't a multiple of page structure size!"
975 " (offset %" B_PRIdOFF ", expected align %" B_PRIuSIZE ")\n",
976 pageOffset, sizeof(vm_page));
977 }
978
979 kprintf("queue_next,prev: %p, %p\n", page->queue_link.next,
980 page->queue_link.previous);
981 kprintf("physical_number: %#" B_PRIxPHYSADDR "\n", page->physical_page_number);
982 kprintf("cache: %p\n", page->Cache());
983 kprintf("cache_offset: %" B_PRIuPHYSADDR "\n", page->cache_offset);
984 kprintf("cache_next: %p\n", page->cache_next);
985 kprintf("state: %s\n", page_state_to_string(page->State()));
986 kprintf("wired_count: %d\n", page->WiredCount());
987 kprintf("usage_count: %d\n", page->usage_count);
988 kprintf("busy: %d\n", page->busy);
989 kprintf("busy_writing: %d\n", page->busy_writing);
990 kprintf("accessed: %d\n", page->accessed);
991 kprintf("modified: %d\n", page->modified);
992 #if DEBUG_PAGE_QUEUE
993 kprintf("queue: %p\n", page->queue);
994 #endif
995 #if DEBUG_PAGE_ACCESS
996 kprintf("accessor: %" B_PRId32 "\n", page->accessing_thread);
997 #endif
998
999 if (pageIndex < 0 || (page_num_t)pageIndex >= sNumPages) {
1000 // Don't try to read the mappings.
1001 return 0;
1002 }
1003
1004 kprintf("area mappings:\n");
1005 vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
1006 vm_page_mapping *mapping;
1007 while ((mapping = iterator.Next()) != NULL) {
1008 kprintf(" %p (%" B_PRId32 ")\n", mapping->area, mapping->area->id);
1009 mapping = mapping->page_link.next;
1010 }
1011
1012 if (searchMappings) {
1013 struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
1014 VMAddressSpace* fAddressSpace;
1015
1016 virtual bool HandleVirtualAddress(addr_t virtualAddress)
1017 {
1018 phys_addr_t physicalAddress;
1019 uint32 flags = 0;
1020 if (fAddressSpace->TranslationMap()->QueryInterrupt(virtualAddress,
1021 &physicalAddress, &flags) != B_OK) {
1022 kprintf(" aspace %" B_PRId32 ": %#" B_PRIxADDR " (querying failed)\n",
1023 fAddressSpace->ID(), virtualAddress);
1024 return false;
1025 }
1026 VMArea* area = fAddressSpace->LookupArea(virtualAddress);
1027 kprintf(" aspace %" B_PRId32 ", area %" B_PRId32 ": %#"
1028 B_PRIxADDR " (%c%c%s%s)\n", fAddressSpace->ID(),
1029 area != NULL ? area->id : -1, virtualAddress,
1030 (flags & B_KERNEL_READ_AREA) != 0 ? 'r' : '-',
1031 (flags & B_KERNEL_WRITE_AREA) != 0 ? 'w' : '-',
1032 (flags & PAGE_MODIFIED) != 0 ? " modified" : "",
1033 (flags & PAGE_ACCESSED) != 0 ? " accessed" : "");
1034 return false;
1035 }
1036 } callback;
1037
1038 kprintf("all mappings:\n");
1039 VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
1040 while (addressSpace != NULL) {
1041 callback.fAddressSpace = addressSpace;
1042 addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
1043 page->physical_page_number * B_PAGE_SIZE, callback);
1044 addressSpace = VMAddressSpace::DebugNext(addressSpace);
1045 }
1046 }
1047
1048 set_debug_variable("_cache", (addr_t)page->Cache());
1049 #if DEBUG_PAGE_ACCESS
1050 set_debug_variable("_accessor", page->accessing_thread);
1051 #endif
1052
1053 return 0;
1054 }
1055
1056
1057 static int
dump_page_queue(int argc,char ** argv)1058 dump_page_queue(int argc, char **argv)
1059 {
1060 struct VMPageQueue *queue;
1061
1062 if (argc < 2) {
1063 kprintf("usage: page_queue <address/name> [list]\n");
1064 return 0;
1065 }
1066
1067 if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x')
1068 queue = (VMPageQueue*)strtoul(argv[1], NULL, 16);
1069 else if (!strcmp(argv[1], "free"))
1070 queue = &sFreePageQueue;
1071 else if (!strcmp(argv[1], "clear"))
1072 queue = &sClearPageQueue;
1073 else if (!strcmp(argv[1], "modified"))
1074 queue = &sModifiedPageQueue;
1075 else if (!strcmp(argv[1], "active"))
1076 queue = &sActivePageQueue;
1077 else if (!strcmp(argv[1], "inactive"))
1078 queue = &sInactivePageQueue;
1079 else if (!strcmp(argv[1], "cached"))
1080 queue = &sCachedPageQueue;
1081 else {
1082 kprintf("page_queue: unknown queue \"%s\".\n", argv[1]);
1083 return 0;
1084 }
1085
1086 kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %"
1087 B_PRIuPHYSADDR "\n", queue, queue->Head(), queue->Tail(),
1088 queue->Count());
1089
1090 if (argc == 3) {
1091 struct vm_page *page = queue->Head();
1092
1093 kprintf("page cache type state wired usage\n");
1094 for (page_num_t i = 0; page; i++, page = queue->Next(page)) {
1095 kprintf("%p %p %-7s %8s %5d %5d\n", page, page->Cache(),
1096 vm_cache_type_to_string(page->Cache()->type),
1097 page_state_to_string(page->State()),
1098 page->WiredCount(), page->usage_count);
1099 }
1100 }
1101 return 0;
1102 }
1103
1104
1105 static int
dump_page_stats(int argc,char ** argv)1106 dump_page_stats(int argc, char **argv)
1107 {
1108 page_num_t swappableModified = 0;
1109 page_num_t swappableModifiedInactive = 0;
1110
1111 size_t counter[8];
1112 size_t busyCounter[8];
1113 memset(counter, 0, sizeof(counter));
1114 memset(busyCounter, 0, sizeof(busyCounter));
1115
1116 struct page_run {
1117 page_num_t start;
1118 page_num_t end;
1119
1120 page_num_t Length() const { return end - start; }
1121 };
1122
1123 page_run currentFreeRun = { 0, 0 };
1124 page_run currentCachedRun = { 0, 0 };
1125 page_run longestFreeRun = { 0, 0 };
1126 page_run longestCachedRun = { 0, 0 };
1127
1128 for (page_num_t i = 0; i < sNumPages; i++) {
1129 if (sPages[i].State() > 7) {
1130 panic("page %" B_PRIuPHYSADDR " at %p has invalid state!\n", i,
1131 &sPages[i]);
1132 }
1133
1134 uint32 pageState = sPages[i].State();
1135
1136 counter[pageState]++;
1137 if (sPages[i].busy)
1138 busyCounter[pageState]++;
1139
1140 if (pageState == PAGE_STATE_MODIFIED
1141 && sPages[i].Cache() != NULL
1142 && sPages[i].Cache()->temporary && sPages[i].WiredCount() == 0) {
1143 swappableModified++;
1144 if (sPages[i].usage_count == 0)
1145 swappableModifiedInactive++;
1146 }
1147
1148 // track free and cached pages runs
1149 if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) {
1150 currentFreeRun.end = i + 1;
1151 currentCachedRun.end = i + 1;
1152 } else {
1153 if (currentFreeRun.Length() > longestFreeRun.Length())
1154 longestFreeRun = currentFreeRun;
1155 currentFreeRun.start = currentFreeRun.end = i + 1;
1156
1157 if (pageState == PAGE_STATE_CACHED) {
1158 currentCachedRun.end = i + 1;
1159 } else {
1160 if (currentCachedRun.Length() > longestCachedRun.Length())
1161 longestCachedRun = currentCachedRun;
1162 currentCachedRun.start = currentCachedRun.end = i + 1;
1163 }
1164 }
1165 }
1166
1167 kprintf("page stats:\n");
1168 kprintf("total: %" B_PRIuPHYSADDR "\n", sNumPages);
1169
1170 kprintf("active: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1171 counter[PAGE_STATE_ACTIVE], busyCounter[PAGE_STATE_ACTIVE]);
1172 kprintf("inactive: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1173 counter[PAGE_STATE_INACTIVE], busyCounter[PAGE_STATE_INACTIVE]);
1174 kprintf("cached: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1175 counter[PAGE_STATE_CACHED], busyCounter[PAGE_STATE_CACHED]);
1176 kprintf("unused: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1177 counter[PAGE_STATE_UNUSED], busyCounter[PAGE_STATE_UNUSED]);
1178 kprintf("wired: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1179 counter[PAGE_STATE_WIRED], busyCounter[PAGE_STATE_WIRED]);
1180 kprintf("modified: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1181 counter[PAGE_STATE_MODIFIED], busyCounter[PAGE_STATE_MODIFIED]);
1182 kprintf("free: %" B_PRIuSIZE "\n", counter[PAGE_STATE_FREE]);
1183 kprintf("clear: %" B_PRIuSIZE "\n", counter[PAGE_STATE_CLEAR]);
1184
1185 kprintf("unreserved free pages: %" B_PRId32 "\n", sUnreservedFreePages);
1186 kprintf("unsatisfied page reservations: %" B_PRId32 "\n",
1187 sUnsatisfiedPageReservations);
1188 kprintf("mapped pages: %" B_PRId32 "\n", gMappedPagesCount);
1189 kprintf("longest free pages run: %" B_PRIuPHYSADDR " pages (at %"
1190 B_PRIuPHYSADDR ")\n", longestFreeRun.Length(),
1191 sPages[longestFreeRun.start].physical_page_number);
1192 kprintf("longest free/cached pages run: %" B_PRIuPHYSADDR " pages (at %"
1193 B_PRIuPHYSADDR ")\n", longestCachedRun.Length(),
1194 sPages[longestCachedRun.start].physical_page_number);
1195
1196 kprintf("waiting threads:\n");
1197 for (PageReservationWaiterList::Iterator it
1198 = sPageReservationWaiters.GetIterator();
1199 PageReservationWaiter* waiter = it.Next();) {
1200 kprintf(" %6" B_PRId32 ": missing: %6" B_PRIu32
1201 ", don't touch: %6" B_PRIu32 "\n", waiter->thread->id,
1202 waiter->missing, waiter->dontTouch);
1203 }
1204
1205 kprintf("\nfree queue: %p, count = %" B_PRIuPHYSADDR "\n", &sFreePageQueue,
1206 sFreePageQueue.Count());
1207 kprintf("clear queue: %p, count = %" B_PRIuPHYSADDR "\n", &sClearPageQueue,
1208 sClearPageQueue.Count());
1209 kprintf("modified queue: %p, count = %" B_PRIuPHYSADDR " (%" B_PRId32
1210 " temporary, %" B_PRIuPHYSADDR " swappable, " "inactive: %"
1211 B_PRIuPHYSADDR ")\n", &sModifiedPageQueue, sModifiedPageQueue.Count(),
1212 sModifiedTemporaryPages, swappableModified, swappableModifiedInactive);
1213 kprintf("active queue: %p, count = %" B_PRIuPHYSADDR "\n",
1214 &sActivePageQueue, sActivePageQueue.Count());
1215 kprintf("inactive queue: %p, count = %" B_PRIuPHYSADDR "\n",
1216 &sInactivePageQueue, sInactivePageQueue.Count());
1217 kprintf("cached queue: %p, count = %" B_PRIuPHYSADDR "\n",
1218 &sCachedPageQueue, sCachedPageQueue.Count());
1219 return 0;
1220 }
1221
1222
1223 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1224
1225 static caller_info*
get_caller_info(addr_t caller)1226 get_caller_info(addr_t caller)
1227 {
1228 // find the caller info
1229 for (int32 i = 0; i < sCallerInfoCount; i++) {
1230 if (caller == sCallerInfoTable[i].caller)
1231 return &sCallerInfoTable[i];
1232 }
1233
1234 // not found, add a new entry, if there are free slots
1235 if (sCallerInfoCount >= kCallerInfoTableSize)
1236 return NULL;
1237
1238 caller_info* info = &sCallerInfoTable[sCallerInfoCount++];
1239 info->caller = caller;
1240 info->count = 0;
1241
1242 return info;
1243 }
1244
1245
1246 static int
caller_info_compare_count(const void * _a,const void * _b)1247 caller_info_compare_count(const void* _a, const void* _b)
1248 {
1249 const caller_info* a = (const caller_info*)_a;
1250 const caller_info* b = (const caller_info*)_b;
1251 return (int)(b->count - a->count);
1252 }
1253
1254
1255 static int
dump_page_allocations_per_caller(int argc,char ** argv)1256 dump_page_allocations_per_caller(int argc, char** argv)
1257 {
1258 bool resetAllocationInfos = false;
1259 bool printDetails = false;
1260 addr_t caller = 0;
1261
1262 for (int32 i = 1; i < argc; i++) {
1263 if (strcmp(argv[i], "-d") == 0) {
1264 uint64 callerAddress;
1265 if (++i >= argc
1266 || !evaluate_debug_expression(argv[i], &callerAddress, true)) {
1267 print_debugger_command_usage(argv[0]);
1268 return 0;
1269 }
1270
1271 caller = callerAddress;
1272 printDetails = true;
1273 } else if (strcmp(argv[i], "-r") == 0) {
1274 resetAllocationInfos = true;
1275 } else {
1276 print_debugger_command_usage(argv[0]);
1277 return 0;
1278 }
1279 }
1280
1281 sCallerInfoCount = 0;
1282
1283 AllocationCollectorCallback collectorCallback(resetAllocationInfos);
1284 AllocationDetailPrinterCallback detailsCallback(caller);
1285 AllocationTrackingCallback& callback = printDetails
1286 ? (AllocationTrackingCallback&)detailsCallback
1287 : (AllocationTrackingCallback&)collectorCallback;
1288
1289 for (page_num_t i = 0; i < sNumPages; i++)
1290 callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i);
1291
1292 if (printDetails)
1293 return 0;
1294
1295 // sort the array
1296 qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info),
1297 &caller_info_compare_count);
1298
1299 kprintf("%" B_PRId32 " different callers\n\n", sCallerInfoCount);
1300
1301 size_t totalAllocationCount = 0;
1302
1303 kprintf(" count caller\n");
1304 kprintf("----------------------------------\n");
1305 for (int32 i = 0; i < sCallerInfoCount; i++) {
1306 caller_info& info = sCallerInfoTable[i];
1307 kprintf("%10" B_PRIuSIZE " %p", info.count, (void*)info.caller);
1308
1309 const char* symbol;
1310 const char* imageName;
1311 bool exactMatch;
1312 addr_t baseAddress;
1313
1314 if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol,
1315 &imageName, &exactMatch) == B_OK) {
1316 kprintf(" %s + %#" B_PRIxADDR " (%s)%s\n", symbol,
1317 info.caller - baseAddress, imageName,
1318 exactMatch ? "" : " (nearest)");
1319 } else
1320 kprintf("\n");
1321
1322 totalAllocationCount += info.count;
1323 }
1324
1325 kprintf("\ntotal page allocations: %" B_PRIuSIZE "\n",
1326 totalAllocationCount);
1327
1328 return 0;
1329 }
1330
1331
1332 static int
dump_page_allocation_infos(int argc,char ** argv)1333 dump_page_allocation_infos(int argc, char** argv)
1334 {
1335 page_num_t pageFilter = 0;
1336 team_id teamFilter = -1;
1337 thread_id threadFilter = -1;
1338 bool printStackTraces = false;
1339
1340 for (int32 i = 1; i < argc; i++) {
1341 if (strcmp(argv[i], "--stacktrace") == 0)
1342 printStackTraces = true;
1343 else if (strcmp(argv[i], "-p") == 0) {
1344 uint64 pageNumber;
1345 if (++i >= argc
1346 || !evaluate_debug_expression(argv[i], &pageNumber, true)) {
1347 print_debugger_command_usage(argv[0]);
1348 return 0;
1349 }
1350
1351 pageFilter = pageNumber;
1352 } else if (strcmp(argv[i], "--team") == 0) {
1353 uint64 team;
1354 if (++i >= argc
1355 || !evaluate_debug_expression(argv[i], &team, true)) {
1356 print_debugger_command_usage(argv[0]);
1357 return 0;
1358 }
1359
1360 teamFilter = team;
1361 } else if (strcmp(argv[i], "--thread") == 0) {
1362 uint64 thread;
1363 if (++i >= argc
1364 || !evaluate_debug_expression(argv[i], &thread, true)) {
1365 print_debugger_command_usage(argv[0]);
1366 return 0;
1367 }
1368
1369 threadFilter = thread;
1370 } else {
1371 print_debugger_command_usage(argv[0]);
1372 return 0;
1373 }
1374 }
1375
1376 AllocationInfoPrinterCallback callback(printStackTraces, pageFilter,
1377 teamFilter, threadFilter);
1378
1379 for (page_num_t i = 0; i < sNumPages; i++)
1380 callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i);
1381
1382 return 0;
1383 }
1384
1385 #endif // VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1386
1387
1388 #ifdef TRACK_PAGE_USAGE_STATS
1389
1390 static void
track_page_usage(vm_page * page)1391 track_page_usage(vm_page* page)
1392 {
1393 if (page->WiredCount() == 0) {
1394 sNextPageUsage[(int32)page->usage_count + 128]++;
1395 sNextPageUsagePageCount++;
1396 }
1397 }
1398
1399
1400 static void
update_page_usage_stats()1401 update_page_usage_stats()
1402 {
1403 std::swap(sPageUsage, sNextPageUsage);
1404 sPageUsagePageCount = sNextPageUsagePageCount;
1405
1406 memset(sNextPageUsage, 0, sizeof(page_num_t) * 256);
1407 sNextPageUsagePageCount = 0;
1408
1409 // compute average
1410 if (sPageUsagePageCount > 0) {
1411 int64 sum = 0;
1412 for (int32 i = 0; i < 256; i++)
1413 sum += (int64)sPageUsage[i] * (i - 128);
1414
1415 TRACE_DAEMON("average page usage: %f (%lu pages)\n",
1416 (float)sum / sPageUsagePageCount, sPageUsagePageCount);
1417 }
1418 }
1419
1420
1421 static int
dump_page_usage_stats(int argc,char ** argv)1422 dump_page_usage_stats(int argc, char** argv)
1423 {
1424 kprintf("distribution of page usage counts (%lu pages):",
1425 sPageUsagePageCount);
1426
1427 int64 sum = 0;
1428 for (int32 i = 0; i < 256; i++) {
1429 if (i % 8 == 0)
1430 kprintf("\n%4ld:", i - 128);
1431
1432 int64 count = sPageUsage[i];
1433 sum += count * (i - 128);
1434
1435 kprintf(" %9llu", count);
1436 }
1437
1438 kprintf("\n\n");
1439
1440 kprintf("average usage count: %f\n",
1441 sPageUsagePageCount > 0 ? (float)sum / sPageUsagePageCount : 0);
1442
1443 return 0;
1444 }
1445
1446 #endif // TRACK_PAGE_USAGE_STATS
1447
1448
1449 // #pragma mark - vm_page
1450
1451
1452 inline void
InitState(uint8 newState)1453 vm_page::InitState(uint8 newState)
1454 {
1455 state = newState;
1456 }
1457
1458
1459 inline void
SetState(uint8 newState)1460 vm_page::SetState(uint8 newState)
1461 {
1462 TPS(SetPageState(this, newState));
1463
1464 state = newState;
1465 }
1466
1467
1468 // #pragma mark -
1469
1470
1471 static void
get_page_stats(page_stats & _pageStats)1472 get_page_stats(page_stats& _pageStats)
1473 {
1474 _pageStats.totalFreePages = sUnreservedFreePages;
1475 _pageStats.cachedPages = sCachedPageQueue.Count();
1476 _pageStats.unsatisfiedReservations = sUnsatisfiedPageReservations;
1477 // TODO: We don't get an actual snapshot here!
1478 }
1479
1480
1481 static bool
do_active_paging(const page_stats & pageStats)1482 do_active_paging(const page_stats& pageStats)
1483 {
1484 return pageStats.totalFreePages + pageStats.cachedPages
1485 < pageStats.unsatisfiedReservations
1486 + (int32)sFreeOrCachedPagesTarget;
1487 }
1488
1489
1490 /*! Reserves as many pages as possible from \c sUnreservedFreePages up to
1491 \a count. Doesn't touch the last \a dontTouch pages of
1492 \c sUnreservedFreePages, though.
1493 \return The number of actually reserved pages.
1494 */
1495 static uint32
reserve_some_pages(uint32 count,uint32 dontTouch)1496 reserve_some_pages(uint32 count, uint32 dontTouch)
1497 {
1498 while (true) {
1499 int32 freePages = atomic_get(&sUnreservedFreePages);
1500 if (freePages <= (int32)dontTouch)
1501 return 0;
1502
1503 int32 toReserve = std::min(count, freePages - dontTouch);
1504 if (atomic_test_and_set(&sUnreservedFreePages,
1505 freePages - toReserve, freePages)
1506 == freePages) {
1507 return toReserve;
1508 }
1509
1510 // the count changed in the meantime -- retry
1511 }
1512 }
1513
1514
1515 static void
wake_up_page_reservation_waiters()1516 wake_up_page_reservation_waiters()
1517 {
1518 MutexLocker pageDeficitLocker(sPageDeficitLock);
1519
1520 // TODO: If this is a low priority thread, we might want to disable
1521 // interrupts or otherwise ensure that we aren't unscheduled. Otherwise
1522 // high priority threads wait be kept waiting while a medium priority thread
1523 // prevents us from running.
1524
1525 while (PageReservationWaiter* waiter = sPageReservationWaiters.Head()) {
1526 int32 reserved = reserve_some_pages(waiter->missing,
1527 waiter->dontTouch);
1528 if (reserved == 0)
1529 return;
1530
1531 atomic_add(&sUnsatisfiedPageReservations, -reserved);
1532 waiter->missing -= reserved;
1533
1534 if (waiter->missing > 0)
1535 return;
1536
1537 sPageReservationWaiters.Remove(waiter);
1538
1539 thread_unblock(waiter->thread, B_OK);
1540 }
1541 }
1542
1543
1544 static inline void
unreserve_pages(uint32 count)1545 unreserve_pages(uint32 count)
1546 {
1547 atomic_add(&sUnreservedFreePages, count);
1548 if (atomic_get(&sUnsatisfiedPageReservations) != 0)
1549 wake_up_page_reservation_waiters();
1550 }
1551
1552
1553 static void
free_page(vm_page * page,bool clear)1554 free_page(vm_page* page, bool clear)
1555 {
1556 DEBUG_PAGE_ACCESS_CHECK(page);
1557
1558 PAGE_ASSERT(page, !page->IsMapped());
1559
1560 VMPageQueue* fromQueue;
1561
1562 switch (page->State()) {
1563 case PAGE_STATE_ACTIVE:
1564 fromQueue = &sActivePageQueue;
1565 break;
1566 case PAGE_STATE_INACTIVE:
1567 fromQueue = &sInactivePageQueue;
1568 break;
1569 case PAGE_STATE_MODIFIED:
1570 fromQueue = &sModifiedPageQueue;
1571 break;
1572 case PAGE_STATE_CACHED:
1573 fromQueue = &sCachedPageQueue;
1574 break;
1575 case PAGE_STATE_FREE:
1576 case PAGE_STATE_CLEAR:
1577 panic("free_page(): page %p already free", page);
1578 return;
1579 case PAGE_STATE_WIRED:
1580 case PAGE_STATE_UNUSED:
1581 fromQueue = NULL;
1582 break;
1583 default:
1584 panic("free_page(): page %p in invalid state %d",
1585 page, page->State());
1586 return;
1587 }
1588
1589 if (page->CacheRef() != NULL)
1590 panic("to be freed page %p has cache", page);
1591 if (page->IsMapped())
1592 panic("to be freed page %p has mappings", page);
1593
1594 if (fromQueue != NULL)
1595 fromQueue->RemoveUnlocked(page);
1596
1597 TA(FreePage(page->physical_page_number));
1598
1599 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1600 page->allocation_tracking_info.Clear();
1601 #endif
1602
1603 ReadLocker locker(sFreePageQueuesLock);
1604
1605 DEBUG_PAGE_ACCESS_END(page);
1606
1607 if (clear) {
1608 page->SetState(PAGE_STATE_CLEAR);
1609 sClearPageQueue.PrependUnlocked(page);
1610 } else {
1611 page->SetState(PAGE_STATE_FREE);
1612 sFreePageQueue.PrependUnlocked(page);
1613 sFreePageCondition.NotifyAll();
1614 }
1615
1616 locker.Unlock();
1617 }
1618
1619
1620 /*! The caller must make sure that no-one else tries to change the page's state
1621 while the function is called. If the page has a cache, this can be done by
1622 locking the cache.
1623 */
1624 static void
set_page_state(vm_page * page,int pageState)1625 set_page_state(vm_page *page, int pageState)
1626 {
1627 DEBUG_PAGE_ACCESS_CHECK(page);
1628
1629 if (pageState == page->State())
1630 return;
1631
1632 VMPageQueue* fromQueue;
1633
1634 switch (page->State()) {
1635 case PAGE_STATE_ACTIVE:
1636 fromQueue = &sActivePageQueue;
1637 break;
1638 case PAGE_STATE_INACTIVE:
1639 fromQueue = &sInactivePageQueue;
1640 break;
1641 case PAGE_STATE_MODIFIED:
1642 fromQueue = &sModifiedPageQueue;
1643 break;
1644 case PAGE_STATE_CACHED:
1645 fromQueue = &sCachedPageQueue;
1646 break;
1647 case PAGE_STATE_FREE:
1648 case PAGE_STATE_CLEAR:
1649 panic("set_page_state(): page %p is free/clear", page);
1650 return;
1651 case PAGE_STATE_WIRED:
1652 case PAGE_STATE_UNUSED:
1653 fromQueue = NULL;
1654 break;
1655 default:
1656 panic("set_page_state(): page %p in invalid state %d",
1657 page, page->State());
1658 return;
1659 }
1660
1661 VMPageQueue* toQueue;
1662
1663 switch (pageState) {
1664 case PAGE_STATE_ACTIVE:
1665 toQueue = &sActivePageQueue;
1666 break;
1667 case PAGE_STATE_INACTIVE:
1668 toQueue = &sInactivePageQueue;
1669 break;
1670 case PAGE_STATE_MODIFIED:
1671 toQueue = &sModifiedPageQueue;
1672 break;
1673 case PAGE_STATE_CACHED:
1674 PAGE_ASSERT(page, !page->IsMapped());
1675 PAGE_ASSERT(page, !page->modified);
1676 toQueue = &sCachedPageQueue;
1677 break;
1678 case PAGE_STATE_FREE:
1679 case PAGE_STATE_CLEAR:
1680 panic("set_page_state(): target state is free/clear");
1681 return;
1682 case PAGE_STATE_WIRED:
1683 case PAGE_STATE_UNUSED:
1684 toQueue = NULL;
1685 break;
1686 default:
1687 panic("set_page_state(): invalid target state %d", pageState);
1688 return;
1689 }
1690
1691 VMCache* cache = page->Cache();
1692 if (cache != NULL && cache->temporary) {
1693 if (pageState == PAGE_STATE_MODIFIED)
1694 atomic_add(&sModifiedTemporaryPages, 1);
1695 else if (page->State() == PAGE_STATE_MODIFIED)
1696 atomic_add(&sModifiedTemporaryPages, -1);
1697 }
1698
1699 // move the page
1700 if (toQueue == fromQueue) {
1701 // Note: Theoretically we are required to lock when changing the page
1702 // state, even if we don't change the queue. We actually don't have to
1703 // do this, though, since only for the active queue there are different
1704 // page states and active pages have a cache that must be locked at
1705 // this point. So we rely on the fact that everyone must lock the cache
1706 // before trying to change/interpret the page state.
1707 PAGE_ASSERT(page, cache != NULL);
1708 cache->AssertLocked();
1709 page->SetState(pageState);
1710 } else {
1711 if (fromQueue != NULL)
1712 fromQueue->RemoveUnlocked(page);
1713
1714 page->SetState(pageState);
1715
1716 if (toQueue != NULL)
1717 toQueue->AppendUnlocked(page);
1718 }
1719 }
1720
1721
1722 /*! Moves a previously modified page into a now appropriate queue.
1723 The page queues must not be locked.
1724 */
1725 static void
move_page_to_appropriate_queue(vm_page * page)1726 move_page_to_appropriate_queue(vm_page *page)
1727 {
1728 DEBUG_PAGE_ACCESS_CHECK(page);
1729
1730 // Note, this logic must be in sync with what the page daemon does.
1731 int32 state;
1732 if (page->IsMapped())
1733 state = PAGE_STATE_ACTIVE;
1734 else if (page->modified)
1735 state = PAGE_STATE_MODIFIED;
1736 else
1737 state = PAGE_STATE_CACHED;
1738
1739 // TODO: If free + cached pages are low, we might directly want to free the
1740 // page.
1741 set_page_state(page, state);
1742 }
1743
1744
1745 static void
clear_page(struct vm_page * page)1746 clear_page(struct vm_page *page)
1747 {
1748 vm_memset_physical(page->physical_page_number << PAGE_SHIFT, 0,
1749 B_PAGE_SIZE);
1750 }
1751
1752
1753 static status_t
mark_page_range_in_use(page_num_t startPage,page_num_t length,bool wired)1754 mark_page_range_in_use(page_num_t startPage, page_num_t length, bool wired)
1755 {
1756 TRACE(("mark_page_range_in_use: start %#" B_PRIxPHYSADDR ", len %#"
1757 B_PRIxPHYSADDR "\n", startPage, length));
1758
1759 if (sPhysicalPageOffset > startPage) {
1760 dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR
1761 "): start page is before free list\n", startPage, length);
1762 if (sPhysicalPageOffset - startPage >= length)
1763 return B_OK;
1764 length -= sPhysicalPageOffset - startPage;
1765 startPage = sPhysicalPageOffset;
1766 }
1767
1768 startPage -= sPhysicalPageOffset;
1769
1770 if (startPage + length > sNumPages) {
1771 dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR
1772 "): range would extend past free list\n", startPage, length);
1773 if (startPage >= sNumPages)
1774 return B_OK;
1775 length = sNumPages - startPage;
1776 }
1777
1778 WriteLocker locker(sFreePageQueuesLock);
1779
1780 for (page_num_t i = 0; i < length; i++) {
1781 vm_page *page = &sPages[startPage + i];
1782 switch (page->State()) {
1783 case PAGE_STATE_FREE:
1784 case PAGE_STATE_CLEAR:
1785 {
1786 // This violates the page reservation policy, since we remove pages
1787 // from the free/clear queues without having reserved them before.
1788 // This should happen in the early boot process only, though.
1789 ASSERT(gKernelStartup);
1790
1791 DEBUG_PAGE_ACCESS_START(page);
1792 VMPageQueue& queue = page->State() == PAGE_STATE_FREE
1793 ? sFreePageQueue : sClearPageQueue;
1794 queue.Remove(page);
1795 page->SetState(wired ? PAGE_STATE_WIRED : PAGE_STATE_UNUSED);
1796 page->busy = false;
1797 atomic_add(&sUnreservedFreePages, -1);
1798 DEBUG_PAGE_ACCESS_END(page);
1799 break;
1800 }
1801 case PAGE_STATE_WIRED:
1802 case PAGE_STATE_UNUSED:
1803 break;
1804 case PAGE_STATE_ACTIVE:
1805 case PAGE_STATE_INACTIVE:
1806 case PAGE_STATE_MODIFIED:
1807 case PAGE_STATE_CACHED:
1808 default:
1809 // uh
1810 panic("mark_page_range_in_use: page %#" B_PRIxPHYSADDR
1811 " in non-free state %d!\n", startPage + i, page->State());
1812 break;
1813 }
1814 }
1815
1816 return B_OK;
1817 }
1818
1819
1820 /*!
1821 This is a background thread that wakes up when its condition is notified
1822 and moves some pages from the free queue over to the clear queue.
1823 Given enough time, it will clear out all pages from the free queue - we
1824 could probably slow it down after having reached a certain threshold.
1825 */
1826 static int32
page_scrubber(void * unused)1827 page_scrubber(void *unused)
1828 {
1829 (void)(unused);
1830
1831 TRACE(("page_scrubber starting...\n"));
1832
1833 ConditionVariableEntry entry;
1834 for (;;) {
1835 while (sFreePageQueue.Count() == 0
1836 || atomic_get(&sUnreservedFreePages)
1837 < (int32)sFreePagesTarget) {
1838 sFreePageCondition.Add(&entry);
1839 entry.Wait();
1840 }
1841
1842 // Since we temporarily remove pages from the free pages reserve,
1843 // we must make sure we don't cause a violation of the page
1844 // reservation warranty. The following is usually stricter than
1845 // necessary, because we don't have information on how many of the
1846 // reserved pages have already been allocated.
1847 int32 reserved = reserve_some_pages(SCRUB_SIZE,
1848 kPageReserveForPriority[VM_PRIORITY_USER]);
1849 if (reserved == 0)
1850 continue;
1851
1852 // get some pages from the free queue, mostly sorted
1853 ReadLocker locker(sFreePageQueuesLock);
1854
1855 vm_page *page[SCRUB_SIZE];
1856 int32 scrubCount = 0;
1857 for (int32 i = 0; i < reserved; i++) {
1858 page[i] = sFreePageQueue.RemoveHeadUnlocked();
1859 if (page[i] == NULL)
1860 break;
1861
1862 DEBUG_PAGE_ACCESS_START(page[i]);
1863
1864 page[i]->SetState(PAGE_STATE_ACTIVE);
1865 page[i]->busy = true;
1866 scrubCount++;
1867 }
1868
1869 locker.Unlock();
1870
1871 if (scrubCount == 0) {
1872 unreserve_pages(reserved);
1873 continue;
1874 }
1875
1876 TA(ScrubbingPages(scrubCount));
1877
1878 // clear them
1879 for (int32 i = 0; i < scrubCount; i++)
1880 clear_page(page[i]);
1881
1882 locker.Lock();
1883
1884 // and put them into the clear queue
1885 // process the array reversed when prepending to preserve sequential order
1886 for (int32 i = scrubCount - 1; i >= 0; i--) {
1887 page[i]->SetState(PAGE_STATE_CLEAR);
1888 page[i]->busy = false;
1889 DEBUG_PAGE_ACCESS_END(page[i]);
1890 sClearPageQueue.PrependUnlocked(page[i]);
1891 }
1892
1893 locker.Unlock();
1894
1895 unreserve_pages(reserved);
1896
1897 TA(ScrubbedPages(scrubCount));
1898
1899 // wait at least 100ms between runs
1900 snooze(100 * 1000);
1901 }
1902
1903 return 0;
1904 }
1905
1906
1907 static void
init_page_marker(vm_page & marker)1908 init_page_marker(vm_page &marker)
1909 {
1910 marker.SetCacheRef(NULL);
1911 marker.InitState(PAGE_STATE_UNUSED);
1912 marker.busy = true;
1913 #if DEBUG_PAGE_QUEUE
1914 marker.queue = NULL;
1915 #endif
1916 #if DEBUG_PAGE_ACCESS
1917 marker.accessing_thread = thread_get_current_thread_id();
1918 #endif
1919 }
1920
1921
1922 static void
remove_page_marker(struct vm_page & marker)1923 remove_page_marker(struct vm_page &marker)
1924 {
1925 DEBUG_PAGE_ACCESS_CHECK(&marker);
1926
1927 if (marker.State() < PAGE_STATE_FIRST_UNQUEUED)
1928 sPageQueues[marker.State()].RemoveUnlocked(&marker);
1929
1930 marker.SetState(PAGE_STATE_UNUSED);
1931 }
1932
1933
1934 static vm_page*
next_modified_page(page_num_t & maxPagesToSee)1935 next_modified_page(page_num_t& maxPagesToSee)
1936 {
1937 InterruptsSpinLocker locker(sModifiedPageQueue.GetLock());
1938
1939 while (maxPagesToSee > 0) {
1940 vm_page* page = sModifiedPageQueue.Head();
1941 if (page == NULL)
1942 return NULL;
1943
1944 sModifiedPageQueue.Requeue(page, true);
1945
1946 maxPagesToSee--;
1947
1948 if (!page->busy)
1949 return page;
1950 }
1951
1952 return NULL;
1953 }
1954
1955
1956 // #pragma mark -
1957
1958
1959 class PageWriteTransfer;
1960 class PageWriteWrapper;
1961
1962
1963 class PageWriterRun {
1964 public:
1965 status_t Init(uint32 maxPages);
1966
1967 void PrepareNextRun();
1968 void AddPage(vm_page* page);
1969 uint32 Go();
1970
1971 void PageWritten(PageWriteTransfer* transfer, status_t status,
1972 bool partialTransfer, size_t bytesTransferred);
1973
1974 private:
1975 uint32 fMaxPages;
1976 uint32 fWrapperCount;
1977 uint32 fTransferCount;
1978 int32 fPendingTransfers;
1979 PageWriteWrapper* fWrappers;
1980 PageWriteTransfer* fTransfers;
1981 ConditionVariable fAllFinishedCondition;
1982 };
1983
1984
1985 class PageWriteTransfer : public AsyncIOCallback {
1986 public:
1987 void SetTo(PageWriterRun* run, vm_page* page, int32 maxPages);
1988 bool AddPage(vm_page* page);
1989
1990 status_t Schedule(uint32 flags);
1991
1992 void SetStatus(status_t status, size_t transferred);
1993
Status() const1994 status_t Status() const { return fStatus; }
Cache() const1995 struct VMCache* Cache() const { return fCache; }
PageCount() const1996 uint32 PageCount() const { return fPageCount; }
1997
1998 virtual void IOFinished(status_t status, bool partialTransfer,
1999 generic_size_t bytesTransferred);
2000
2001 private:
2002 PageWriterRun* fRun;
2003 struct VMCache* fCache;
2004 off_t fOffset;
2005 uint32 fPageCount;
2006 int32 fMaxPages;
2007 status_t fStatus;
2008 uint32 fVecCount;
2009 generic_io_vec fVecs[32]; // TODO: make dynamic/configurable
2010 };
2011
2012
2013 class PageWriteWrapper {
2014 public:
2015 PageWriteWrapper();
2016 ~PageWriteWrapper();
2017 void SetTo(vm_page* page);
2018 bool Done(status_t result);
2019
2020 private:
2021 vm_page* fPage;
2022 struct VMCache* fCache;
2023 bool fIsActive;
2024 };
2025
2026
PageWriteWrapper()2027 PageWriteWrapper::PageWriteWrapper()
2028 :
2029 fIsActive(false)
2030 {
2031 }
2032
2033
~PageWriteWrapper()2034 PageWriteWrapper::~PageWriteWrapper()
2035 {
2036 if (fIsActive)
2037 panic("page write wrapper going out of scope but isn't completed");
2038 }
2039
2040
2041 /*! The page's cache must be locked.
2042 */
2043 void
SetTo(vm_page * page)2044 PageWriteWrapper::SetTo(vm_page* page)
2045 {
2046 DEBUG_PAGE_ACCESS_CHECK(page);
2047
2048 if (page->busy)
2049 panic("setting page write wrapper to busy page");
2050
2051 if (fIsActive)
2052 panic("re-setting page write wrapper that isn't completed");
2053
2054 fPage = page;
2055 fCache = page->Cache();
2056 fIsActive = true;
2057
2058 fPage->busy = true;
2059 fPage->busy_writing = true;
2060
2061 // We have a modified page -- however, while we're writing it back,
2062 // the page might still be mapped. In order not to lose any changes to the
2063 // page, we mark it clean before actually writing it back; if
2064 // writing the page fails for some reason, we'll just keep it in the
2065 // modified page list, but that should happen only rarely.
2066
2067 // If the page is changed after we cleared the dirty flag, but before we
2068 // had the chance to write it back, then we'll write it again later -- that
2069 // will probably not happen that often, though.
2070
2071 vm_clear_map_flags(fPage, PAGE_MODIFIED);
2072 }
2073
2074
2075 /*! The page's cache must be locked.
2076 The page queues must not be locked.
2077 \return \c true if the page was written successfully respectively could be
2078 handled somehow, \c false otherwise.
2079 */
2080 bool
Done(status_t result)2081 PageWriteWrapper::Done(status_t result)
2082 {
2083 if (!fIsActive)
2084 panic("completing page write wrapper that is not active");
2085
2086 DEBUG_PAGE_ACCESS_START(fPage);
2087
2088 fPage->busy = false;
2089 // Set unbusy and notify later by hand, since we might free the page.
2090
2091 bool success = true;
2092
2093 if (result == B_OK) {
2094 // put it into the active/inactive queue
2095 move_page_to_appropriate_queue(fPage);
2096 fPage->busy_writing = false;
2097 DEBUG_PAGE_ACCESS_END(fPage);
2098 } else {
2099 // Writing the page failed. One reason would be that the cache has been
2100 // shrunk and the page does no longer belong to the file. Otherwise the
2101 // actual I/O failed, in which case we'll simply keep the page modified.
2102
2103 if (!fPage->busy_writing) {
2104 // The busy_writing flag was cleared. That means the cache has been
2105 // shrunk while we were trying to write the page and we have to free
2106 // it now.
2107 vm_remove_all_page_mappings(fPage);
2108 // TODO: Unmapping should already happen when resizing the cache!
2109 fCache->RemovePage(fPage);
2110 free_page(fPage, false);
2111 unreserve_pages(1);
2112 } else {
2113 // Writing the page failed -- mark the page modified and move it to
2114 // an appropriate queue other than the modified queue, so we don't
2115 // keep trying to write it over and over again. We keep
2116 // non-temporary pages in the modified queue, though, so they don't
2117 // get lost in the inactive queue.
2118 dprintf("PageWriteWrapper: Failed to write page %p: %s\n", fPage,
2119 strerror(result));
2120
2121 fPage->modified = true;
2122 if (!fCache->temporary)
2123 set_page_state(fPage, PAGE_STATE_MODIFIED);
2124 else if (fPage->IsMapped())
2125 set_page_state(fPage, PAGE_STATE_ACTIVE);
2126 else
2127 set_page_state(fPage, PAGE_STATE_INACTIVE);
2128
2129 fPage->busy_writing = false;
2130 DEBUG_PAGE_ACCESS_END(fPage);
2131
2132 success = false;
2133 }
2134 }
2135
2136 fCache->NotifyPageEvents(fPage, PAGE_EVENT_NOT_BUSY);
2137 fIsActive = false;
2138
2139 return success;
2140 }
2141
2142
2143 /*! The page's cache must be locked.
2144 */
2145 void
SetTo(PageWriterRun * run,vm_page * page,int32 maxPages)2146 PageWriteTransfer::SetTo(PageWriterRun* run, vm_page* page, int32 maxPages)
2147 {
2148 fRun = run;
2149 fCache = page->Cache();
2150 fOffset = page->cache_offset;
2151 fPageCount = 1;
2152 fMaxPages = maxPages;
2153 fStatus = B_OK;
2154
2155 fVecs[0].base = (phys_addr_t)page->physical_page_number << PAGE_SHIFT;
2156 fVecs[0].length = B_PAGE_SIZE;
2157 fVecCount = 1;
2158 }
2159
2160
2161 /*! The page's cache must be locked.
2162 */
2163 bool
AddPage(vm_page * page)2164 PageWriteTransfer::AddPage(vm_page* page)
2165 {
2166 if (page->Cache() != fCache
2167 || (fMaxPages >= 0 && fPageCount >= (uint32)fMaxPages))
2168 return false;
2169
2170 phys_addr_t nextBase = fVecs[fVecCount - 1].base
2171 + fVecs[fVecCount - 1].length;
2172
2173 if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase
2174 && (off_t)page->cache_offset == fOffset + fPageCount) {
2175 // append to last iovec
2176 fVecs[fVecCount - 1].length += B_PAGE_SIZE;
2177 fPageCount++;
2178 return true;
2179 }
2180
2181 nextBase = fVecs[0].base - B_PAGE_SIZE;
2182 if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase
2183 && (off_t)page->cache_offset == fOffset - 1) {
2184 // prepend to first iovec and adjust offset
2185 fVecs[0].base = nextBase;
2186 fVecs[0].length += B_PAGE_SIZE;
2187 fOffset = page->cache_offset;
2188 fPageCount++;
2189 return true;
2190 }
2191
2192 if (((off_t)page->cache_offset == fOffset + fPageCount
2193 || (off_t)page->cache_offset == fOffset - 1)
2194 && fVecCount < sizeof(fVecs) / sizeof(fVecs[0])) {
2195 // not physically contiguous or not in the right order
2196 uint32 vectorIndex;
2197 if ((off_t)page->cache_offset < fOffset) {
2198 // we are pre-pending another vector, move the other vecs
2199 for (uint32 i = fVecCount; i > 0; i--)
2200 fVecs[i] = fVecs[i - 1];
2201
2202 fOffset = page->cache_offset;
2203 vectorIndex = 0;
2204 } else
2205 vectorIndex = fVecCount;
2206
2207 fVecs[vectorIndex].base
2208 = (phys_addr_t)page->physical_page_number << PAGE_SHIFT;
2209 fVecs[vectorIndex].length = B_PAGE_SIZE;
2210
2211 fVecCount++;
2212 fPageCount++;
2213 return true;
2214 }
2215
2216 return false;
2217 }
2218
2219
2220 status_t
Schedule(uint32 flags)2221 PageWriteTransfer::Schedule(uint32 flags)
2222 {
2223 off_t writeOffset = (off_t)fOffset << PAGE_SHIFT;
2224 generic_size_t writeLength = (phys_size_t)fPageCount << PAGE_SHIFT;
2225
2226 if (fRun != NULL) {
2227 return fCache->WriteAsync(writeOffset, fVecs, fVecCount, writeLength,
2228 flags | B_PHYSICAL_IO_REQUEST, this);
2229 }
2230
2231 status_t status = fCache->Write(writeOffset, fVecs, fVecCount,
2232 flags | B_PHYSICAL_IO_REQUEST, &writeLength);
2233
2234 SetStatus(status, writeLength);
2235 return fStatus;
2236 }
2237
2238
2239 void
SetStatus(status_t status,size_t transferred)2240 PageWriteTransfer::SetStatus(status_t status, size_t transferred)
2241 {
2242 // only succeed if all pages up to the last one have been written fully
2243 // and the last page has at least been written partially
2244 if (status == B_OK && transferred <= (fPageCount - 1) * B_PAGE_SIZE)
2245 status = B_ERROR;
2246
2247 fStatus = status;
2248 }
2249
2250
2251 void
IOFinished(status_t status,bool partialTransfer,generic_size_t bytesTransferred)2252 PageWriteTransfer::IOFinished(status_t status, bool partialTransfer,
2253 generic_size_t bytesTransferred)
2254 {
2255 SetStatus(status, bytesTransferred);
2256 fRun->PageWritten(this, fStatus, partialTransfer, bytesTransferred);
2257 }
2258
2259
2260 status_t
Init(uint32 maxPages)2261 PageWriterRun::Init(uint32 maxPages)
2262 {
2263 fMaxPages = maxPages;
2264 fWrapperCount = 0;
2265 fTransferCount = 0;
2266 fPendingTransfers = 0;
2267
2268 fWrappers = new(std::nothrow) PageWriteWrapper[maxPages];
2269 fTransfers = new(std::nothrow) PageWriteTransfer[maxPages];
2270 if (fWrappers == NULL || fTransfers == NULL)
2271 return B_NO_MEMORY;
2272
2273 return B_OK;
2274 }
2275
2276
2277 void
PrepareNextRun()2278 PageWriterRun::PrepareNextRun()
2279 {
2280 fWrapperCount = 0;
2281 fTransferCount = 0;
2282 fPendingTransfers = 0;
2283 }
2284
2285
2286 /*! The page's cache must be locked.
2287 */
2288 void
AddPage(vm_page * page)2289 PageWriterRun::AddPage(vm_page* page)
2290 {
2291 fWrappers[fWrapperCount++].SetTo(page);
2292
2293 if (fTransferCount == 0 || !fTransfers[fTransferCount - 1].AddPage(page)) {
2294 fTransfers[fTransferCount++].SetTo(this, page,
2295 page->Cache()->MaxPagesPerAsyncWrite());
2296 }
2297 }
2298
2299
2300 /*! Writes all pages previously added.
2301 \return The number of pages that could not be written or otherwise handled.
2302 */
2303 uint32
Go()2304 PageWriterRun::Go()
2305 {
2306 atomic_set(&fPendingTransfers, fTransferCount);
2307
2308 fAllFinishedCondition.Init(this, "page writer wait for I/O");
2309 ConditionVariableEntry waitEntry;
2310 fAllFinishedCondition.Add(&waitEntry);
2311
2312 // schedule writes
2313 for (uint32 i = 0; i < fTransferCount; i++)
2314 fTransfers[i].Schedule(B_VIP_IO_REQUEST);
2315
2316 // wait until all pages have been written
2317 waitEntry.Wait();
2318
2319 // mark pages depending on whether they could be written or not
2320
2321 uint32 failedPages = 0;
2322 uint32 wrapperIndex = 0;
2323 for (uint32 i = 0; i < fTransferCount; i++) {
2324 PageWriteTransfer& transfer = fTransfers[i];
2325 transfer.Cache()->Lock();
2326
2327 for (uint32 j = 0; j < transfer.PageCount(); j++) {
2328 if (!fWrappers[wrapperIndex++].Done(transfer.Status()))
2329 failedPages++;
2330 }
2331
2332 transfer.Cache()->Unlock();
2333 }
2334
2335 ASSERT(wrapperIndex == fWrapperCount);
2336
2337 for (uint32 i = 0; i < fTransferCount; i++) {
2338 PageWriteTransfer& transfer = fTransfers[i];
2339 struct VMCache* cache = transfer.Cache();
2340
2341 // We've acquired a references for each page
2342 for (uint32 j = 0; j < transfer.PageCount(); j++) {
2343 // We release the cache references after all pages were made
2344 // unbusy again - otherwise releasing a vnode could deadlock.
2345 cache->ReleaseStoreRef();
2346 cache->ReleaseRef();
2347 }
2348 }
2349
2350 return failedPages;
2351 }
2352
2353
2354 void
PageWritten(PageWriteTransfer * transfer,status_t status,bool partialTransfer,size_t bytesTransferred)2355 PageWriterRun::PageWritten(PageWriteTransfer* transfer, status_t status,
2356 bool partialTransfer, size_t bytesTransferred)
2357 {
2358 if (atomic_add(&fPendingTransfers, -1) == 1)
2359 fAllFinishedCondition.NotifyAll();
2360 }
2361
2362
2363 /*! The page writer continuously takes some pages from the modified
2364 queue, writes them back, and moves them back to the active queue.
2365 It runs in its own thread, and is only there to keep the number
2366 of modified pages low, so that more pages can be reused with
2367 fewer costs.
2368 */
2369 status_t
page_writer(void *)2370 page_writer(void* /*unused*/)
2371 {
2372 const uint32 kNumPages = 256;
2373 #ifdef TRACE_VM_PAGE
2374 uint32 writtenPages = 0;
2375 bigtime_t lastWrittenTime = 0;
2376 bigtime_t pageCollectionTime = 0;
2377 bigtime_t pageWritingTime = 0;
2378 #endif
2379
2380 PageWriterRun run;
2381 if (run.Init(kNumPages) != B_OK) {
2382 panic("page writer: Failed to init PageWriterRun!");
2383 return B_ERROR;
2384 }
2385
2386 page_num_t pagesSinceLastSuccessfulWrite = 0;
2387
2388 while (true) {
2389 // TODO: Maybe wait shorter when memory is low!
2390 if (sModifiedPageQueue.Count() < kNumPages) {
2391 sPageWriterCondition.Wait(3000000, true);
2392 // all 3 seconds when no one triggers us
2393 }
2394
2395 page_num_t modifiedPages = sModifiedPageQueue.Count();
2396 if (modifiedPages == 0)
2397 continue;
2398
2399 if (modifiedPages <= pagesSinceLastSuccessfulWrite) {
2400 // We ran through the whole queue without being able to write a
2401 // single page. Take a break.
2402 snooze(500000);
2403 pagesSinceLastSuccessfulWrite = 0;
2404 }
2405
2406 #if ENABLE_SWAP_SUPPORT
2407 page_stats pageStats;
2408 get_page_stats(pageStats);
2409 bool activePaging = do_active_paging(pageStats);
2410 #endif
2411
2412 // depending on how urgent it becomes to get pages to disk, we adjust
2413 // our I/O priority
2414 uint32 lowPagesState = low_resource_state(B_KERNEL_RESOURCE_PAGES);
2415 int32 ioPriority = B_IDLE_PRIORITY;
2416 if (lowPagesState >= B_LOW_RESOURCE_CRITICAL
2417 || modifiedPages > MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD) {
2418 ioPriority = MAX_PAGE_WRITER_IO_PRIORITY;
2419 } else {
2420 ioPriority = (uint64)MAX_PAGE_WRITER_IO_PRIORITY * modifiedPages
2421 / MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD;
2422 }
2423
2424 thread_set_io_priority(ioPriority);
2425
2426 uint32 numPages = 0;
2427 run.PrepareNextRun();
2428
2429 // TODO: make this laptop friendly, too (ie. only start doing
2430 // something if someone else did something or there is really
2431 // enough to do).
2432
2433 // collect pages to be written
2434 #ifdef TRACE_VM_PAGE
2435 pageCollectionTime -= system_time();
2436 #endif
2437
2438 page_num_t maxPagesToSee = modifiedPages;
2439
2440 while (numPages < kNumPages && maxPagesToSee > 0) {
2441 vm_page *page = next_modified_page(maxPagesToSee);
2442 if (page == NULL)
2443 break;
2444
2445 PageCacheLocker cacheLocker(page, false);
2446 if (!cacheLocker.IsLocked())
2447 continue;
2448
2449 VMCache *cache = page->Cache();
2450
2451 // If the page is busy or its state has changed while we were
2452 // locking the cache, just ignore it.
2453 if (page->busy || page->State() != PAGE_STATE_MODIFIED)
2454 continue;
2455
2456 DEBUG_PAGE_ACCESS_START(page);
2457
2458 // Don't write back wired (locked) pages.
2459 if (page->WiredCount() > 0) {
2460 set_page_state(page, PAGE_STATE_ACTIVE);
2461 DEBUG_PAGE_ACCESS_END(page);
2462 continue;
2463 }
2464
2465 // Write back temporary pages only when we're actively paging.
2466 if (cache->temporary
2467 #if ENABLE_SWAP_SUPPORT
2468 && (!activePaging
2469 || !cache->CanWritePage(
2470 (off_t)page->cache_offset << PAGE_SHIFT))
2471 #endif
2472 ) {
2473 // We can't/don't want to do anything with this page, so move it
2474 // to one of the other queues.
2475 if (page->mappings.IsEmpty())
2476 set_page_state(page, PAGE_STATE_INACTIVE);
2477 else
2478 set_page_state(page, PAGE_STATE_ACTIVE);
2479
2480 DEBUG_PAGE_ACCESS_END(page);
2481 continue;
2482 }
2483
2484 // We need our own reference to the store, as it might currently be
2485 // destroyed.
2486 if (cache->AcquireUnreferencedStoreRef() != B_OK) {
2487 DEBUG_PAGE_ACCESS_END(page);
2488 cacheLocker.Unlock();
2489 thread_yield();
2490 continue;
2491 }
2492
2493 run.AddPage(page);
2494 // TODO: We're possibly adding pages of different caches and
2495 // thus maybe of different underlying file systems here. This
2496 // is a potential problem for loop file systems/devices, since
2497 // we could mark a page busy that would need to be accessed
2498 // when writing back another page, thus causing a deadlock.
2499
2500 DEBUG_PAGE_ACCESS_END(page);
2501
2502 //dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count);
2503 TPW(WritePage(page));
2504
2505 cache->AcquireRefLocked();
2506 numPages++;
2507 }
2508
2509 #ifdef TRACE_VM_PAGE
2510 pageCollectionTime += system_time();
2511 #endif
2512 if (numPages == 0)
2513 continue;
2514
2515 // write pages to disk and do all the cleanup
2516 #ifdef TRACE_VM_PAGE
2517 pageWritingTime -= system_time();
2518 #endif
2519 uint32 failedPages = run.Go();
2520 #ifdef TRACE_VM_PAGE
2521 pageWritingTime += system_time();
2522
2523 // debug output only...
2524 writtenPages += numPages;
2525 if (writtenPages >= 1024) {
2526 bigtime_t now = system_time();
2527 TRACE(("page writer: wrote 1024 pages (total: %" B_PRIu64 " ms, "
2528 "collect: %" B_PRIu64 " ms, write: %" B_PRIu64 " ms)\n",
2529 (now - lastWrittenTime) / 1000,
2530 pageCollectionTime / 1000, pageWritingTime / 1000));
2531 lastWrittenTime = now;
2532
2533 writtenPages -= 1024;
2534 pageCollectionTime = 0;
2535 pageWritingTime = 0;
2536 }
2537 #endif
2538
2539 if (failedPages == numPages)
2540 pagesSinceLastSuccessfulWrite += modifiedPages - maxPagesToSee;
2541 else
2542 pagesSinceLastSuccessfulWrite = 0;
2543 }
2544
2545 return B_OK;
2546 }
2547
2548
2549 // #pragma mark -
2550
2551
2552 // TODO: This should be done in the page daemon!
2553 #if 0
2554 #if ENABLE_SWAP_SUPPORT
2555 static bool
2556 free_page_swap_space(int32 index)
2557 {
2558 vm_page *page = vm_page_at_index(index);
2559 PageCacheLocker locker(page);
2560 if (!locker.IsLocked())
2561 return false;
2562
2563 DEBUG_PAGE_ACCESS_START(page);
2564
2565 VMCache* cache = page->Cache();
2566 if (cache->temporary && page->WiredCount() == 0
2567 && cache->HasPage(page->cache_offset << PAGE_SHIFT)
2568 && page->usage_count > 0) {
2569 // TODO: how to judge a page is highly active?
2570 if (swap_free_page_swap_space(page)) {
2571 // We need to mark the page modified, since otherwise it could be
2572 // stolen and we'd lose its data.
2573 vm_page_set_state(page, PAGE_STATE_MODIFIED);
2574 TD(FreedPageSwap(page));
2575 DEBUG_PAGE_ACCESS_END(page);
2576 return true;
2577 }
2578 }
2579 DEBUG_PAGE_ACCESS_END(page);
2580 return false;
2581 }
2582 #endif
2583 #endif // 0
2584
2585
2586 static vm_page *
find_cached_page_candidate(struct vm_page & marker)2587 find_cached_page_candidate(struct vm_page &marker)
2588 {
2589 DEBUG_PAGE_ACCESS_CHECK(&marker);
2590
2591 InterruptsSpinLocker locker(sCachedPageQueue.GetLock());
2592 vm_page *page;
2593
2594 if (marker.State() == PAGE_STATE_UNUSED) {
2595 // Get the first free pages of the (in)active queue
2596 page = sCachedPageQueue.Head();
2597 } else {
2598 // Get the next page of the current queue
2599 if (marker.State() != PAGE_STATE_CACHED) {
2600 panic("invalid marker %p state", &marker);
2601 return NULL;
2602 }
2603
2604 page = sCachedPageQueue.Next(&marker);
2605 sCachedPageQueue.Remove(&marker);
2606 marker.SetState(PAGE_STATE_UNUSED);
2607 }
2608
2609 while (page != NULL) {
2610 if (!page->busy) {
2611 // we found a candidate, insert marker
2612 marker.SetState(PAGE_STATE_CACHED);
2613 sCachedPageQueue.InsertAfter(page, &marker);
2614 return page;
2615 }
2616
2617 page = sCachedPageQueue.Next(page);
2618 }
2619
2620 return NULL;
2621 }
2622
2623
2624 static bool
free_cached_page(vm_page * page,bool dontWait)2625 free_cached_page(vm_page *page, bool dontWait)
2626 {
2627 // try to lock the page's cache
2628 if (vm_cache_acquire_locked_page_cache(page, dontWait) == NULL)
2629 return false;
2630 VMCache* cache = page->Cache();
2631
2632 AutoLocker<VMCache> cacheLocker(cache, true);
2633 MethodDeleter<VMCache, void, &VMCache::ReleaseRefLocked> _2(cache);
2634
2635 // check again if that page is still a candidate
2636 if (page->busy || page->State() != PAGE_STATE_CACHED)
2637 return false;
2638
2639 DEBUG_PAGE_ACCESS_START(page);
2640
2641 PAGE_ASSERT(page, !page->IsMapped());
2642 PAGE_ASSERT(page, !page->modified);
2643
2644 // we can now steal this page
2645
2646 cache->RemovePage(page);
2647 // Now the page doesn't have cache anymore, so no one else (e.g.
2648 // vm_page_allocate_page_run() can pick it up), since they would be
2649 // required to lock the cache first, which would fail.
2650
2651 sCachedPageQueue.RemoveUnlocked(page);
2652 return true;
2653 }
2654
2655
2656 static uint32
free_cached_pages(uint32 pagesToFree,bool dontWait)2657 free_cached_pages(uint32 pagesToFree, bool dontWait)
2658 {
2659 vm_page marker;
2660 init_page_marker(marker);
2661
2662 uint32 pagesFreed = 0;
2663
2664 while (pagesFreed < pagesToFree) {
2665 vm_page *page = find_cached_page_candidate(marker);
2666 if (page == NULL)
2667 break;
2668
2669 if (free_cached_page(page, dontWait)) {
2670 ReadLocker locker(sFreePageQueuesLock);
2671 page->SetState(PAGE_STATE_FREE);
2672 DEBUG_PAGE_ACCESS_END(page);
2673 sFreePageQueue.PrependUnlocked(page);
2674 locker.Unlock();
2675
2676 TA(StolenPage());
2677
2678 pagesFreed++;
2679 }
2680 }
2681
2682 remove_page_marker(marker);
2683
2684 sFreePageCondition.NotifyAll();
2685
2686 return pagesFreed;
2687 }
2688
2689
2690 static void
idle_scan_active_pages(page_stats & pageStats)2691 idle_scan_active_pages(page_stats& pageStats)
2692 {
2693 VMPageQueue& queue = sActivePageQueue;
2694
2695 // We want to scan the whole queue in roughly kIdleRunsForFullQueue runs.
2696 uint32 maxToScan = queue.Count() / kIdleRunsForFullQueue + 1;
2697
2698 while (maxToScan > 0) {
2699 maxToScan--;
2700
2701 // Get the next page. Note that we don't bother to lock here. We go with
2702 // the assumption that on all architectures reading/writing pointers is
2703 // atomic. Beyond that it doesn't really matter. We have to unlock the
2704 // queue anyway to lock the page's cache, and we'll recheck afterwards.
2705 vm_page* page = queue.Head();
2706 if (page == NULL)
2707 break;
2708
2709 // lock the page's cache
2710 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2711 if (cache == NULL)
2712 continue;
2713
2714 if (page->State() != PAGE_STATE_ACTIVE) {
2715 // page is no longer in the cache or in this queue
2716 cache->ReleaseRefAndUnlock();
2717 continue;
2718 }
2719
2720 if (page->busy) {
2721 // page is busy -- requeue at the end
2722 vm_page_requeue(page, true);
2723 cache->ReleaseRefAndUnlock();
2724 continue;
2725 }
2726
2727 DEBUG_PAGE_ACCESS_START(page);
2728
2729 // Get the page active/modified flags and update the page's usage count.
2730 // We completely unmap inactive temporary pages. This saves us to
2731 // iterate through the inactive list as well, since we'll be notified
2732 // via page fault whenever such an inactive page is used again.
2733 // We don't remove the mappings of non-temporary pages, since we
2734 // wouldn't notice when those would become unused and could thus be
2735 // moved to the cached list.
2736 int32 usageCount;
2737 if (page->WiredCount() > 0 || page->usage_count > 0
2738 || !cache->temporary) {
2739 usageCount = vm_clear_page_mapping_accessed_flags(page);
2740 } else
2741 usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2742
2743 if (usageCount > 0) {
2744 usageCount += page->usage_count + kPageUsageAdvance;
2745 if (usageCount > kPageUsageMax)
2746 usageCount = kPageUsageMax;
2747 // TODO: This would probably also be the place to reclaim swap space.
2748 } else {
2749 usageCount += page->usage_count - (int32)kPageUsageDecline;
2750 if (usageCount < 0) {
2751 usageCount = 0;
2752 set_page_state(page, PAGE_STATE_INACTIVE);
2753 }
2754 }
2755
2756 page->usage_count = usageCount;
2757
2758 DEBUG_PAGE_ACCESS_END(page);
2759
2760 cache->ReleaseRefAndUnlock();
2761 }
2762 }
2763
2764
2765 static void
full_scan_inactive_pages(page_stats & pageStats,int32 despairLevel)2766 full_scan_inactive_pages(page_stats& pageStats, int32 despairLevel)
2767 {
2768 int32 pagesToFree = pageStats.unsatisfiedReservations
2769 + sFreeOrCachedPagesTarget
2770 - (pageStats.totalFreePages + pageStats.cachedPages);
2771 if (pagesToFree <= 0)
2772 return;
2773
2774 bigtime_t time = system_time();
2775 uint32 pagesScanned = 0;
2776 uint32 pagesToCached = 0;
2777 uint32 pagesToModified = 0;
2778 uint32 pagesToActive = 0;
2779
2780 // Determine how many pages at maximum to send to the modified queue. Since
2781 // it is relatively expensive to page out pages, we do that on a grander
2782 // scale only when things get desperate.
2783 uint32 maxToFlush = despairLevel <= 1 ? 32 : 10000;
2784
2785 vm_page marker;
2786 init_page_marker(marker);
2787
2788 VMPageQueue& queue = sInactivePageQueue;
2789 InterruptsSpinLocker queueLocker(queue.GetLock());
2790 uint32 maxToScan = queue.Count();
2791
2792 vm_page* nextPage = queue.Head();
2793
2794 while (pagesToFree > 0 && maxToScan > 0) {
2795 maxToScan--;
2796
2797 // get the next page
2798 vm_page* page = nextPage;
2799 if (page == NULL)
2800 break;
2801 nextPage = queue.Next(page);
2802
2803 if (page->busy)
2804 continue;
2805
2806 // mark the position
2807 queue.InsertAfter(page, &marker);
2808 queueLocker.Unlock();
2809
2810 // lock the page's cache
2811 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2812 if (cache == NULL || page->busy
2813 || page->State() != PAGE_STATE_INACTIVE) {
2814 if (cache != NULL)
2815 cache->ReleaseRefAndUnlock();
2816 queueLocker.Lock();
2817 nextPage = queue.Next(&marker);
2818 queue.Remove(&marker);
2819 continue;
2820 }
2821
2822 pagesScanned++;
2823
2824 DEBUG_PAGE_ACCESS_START(page);
2825
2826 // Get the accessed count, clear the accessed/modified flags and
2827 // unmap the page, if it hasn't been accessed.
2828 int32 usageCount;
2829 if (page->WiredCount() > 0)
2830 usageCount = vm_clear_page_mapping_accessed_flags(page);
2831 else
2832 usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2833
2834 // update usage count
2835 if (usageCount > 0) {
2836 usageCount += page->usage_count + kPageUsageAdvance;
2837 if (usageCount > kPageUsageMax)
2838 usageCount = kPageUsageMax;
2839 } else {
2840 usageCount += page->usage_count - (int32)kPageUsageDecline;
2841 if (usageCount < 0)
2842 usageCount = 0;
2843 }
2844
2845 page->usage_count = usageCount;
2846
2847 // Move to fitting queue or requeue:
2848 // * Active mapped pages go to the active queue.
2849 // * Inactive mapped (i.e. wired) pages are requeued.
2850 // * The remaining pages are cachable. Thus, if unmodified they go to
2851 // the cached queue, otherwise to the modified queue (up to a limit).
2852 // Note that until in the idle scanning we don't exempt pages of
2853 // temporary caches. Apparently we really need memory, so we better
2854 // page out memory as well.
2855 bool isMapped = page->IsMapped();
2856 if (usageCount > 0) {
2857 if (isMapped) {
2858 set_page_state(page, PAGE_STATE_ACTIVE);
2859 pagesToActive++;
2860 } else
2861 vm_page_requeue(page, true);
2862 } else if (isMapped) {
2863 vm_page_requeue(page, true);
2864 } else if (!page->modified) {
2865 set_page_state(page, PAGE_STATE_CACHED);
2866 pagesToFree--;
2867 pagesToCached++;
2868 } else if (maxToFlush > 0) {
2869 set_page_state(page, PAGE_STATE_MODIFIED);
2870 maxToFlush--;
2871 pagesToModified++;
2872 } else
2873 vm_page_requeue(page, true);
2874
2875 DEBUG_PAGE_ACCESS_END(page);
2876
2877 cache->ReleaseRefAndUnlock();
2878
2879 // remove the marker
2880 queueLocker.Lock();
2881 nextPage = queue.Next(&marker);
2882 queue.Remove(&marker);
2883 }
2884
2885 queueLocker.Unlock();
2886
2887 time = system_time() - time;
2888 TRACE_DAEMON(" -> inactive scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32
2889 ", moved: %" B_PRIu32 " -> cached, %" B_PRIu32 " -> modified, %"
2890 B_PRIu32 " -> active\n", time, pagesScanned, pagesToCached,
2891 pagesToModified, pagesToActive);
2892
2893 // wake up the page writer, if we tossed it some pages
2894 if (pagesToModified > 0)
2895 sPageWriterCondition.WakeUp();
2896 }
2897
2898
2899 static void
full_scan_active_pages(page_stats & pageStats,int32 despairLevel)2900 full_scan_active_pages(page_stats& pageStats, int32 despairLevel)
2901 {
2902 vm_page marker;
2903 init_page_marker(marker);
2904
2905 VMPageQueue& queue = sActivePageQueue;
2906 InterruptsSpinLocker queueLocker(queue.GetLock());
2907 uint32 maxToScan = queue.Count();
2908
2909 int32 pagesToDeactivate = pageStats.unsatisfiedReservations
2910 + sFreeOrCachedPagesTarget
2911 - (pageStats.totalFreePages + pageStats.cachedPages)
2912 + std::max((int32)sInactivePagesTarget - (int32)maxToScan, (int32)0);
2913 if (pagesToDeactivate <= 0)
2914 return;
2915
2916 bigtime_t time = system_time();
2917 uint32 pagesAccessed = 0;
2918 uint32 pagesToInactive = 0;
2919 uint32 pagesScanned = 0;
2920
2921 vm_page* nextPage = queue.Head();
2922
2923 while (pagesToDeactivate > 0 && maxToScan > 0) {
2924 maxToScan--;
2925
2926 // get the next page
2927 vm_page* page = nextPage;
2928 if (page == NULL)
2929 break;
2930 nextPage = queue.Next(page);
2931
2932 if (page->busy)
2933 continue;
2934
2935 // mark the position
2936 queue.InsertAfter(page, &marker);
2937 queueLocker.Unlock();
2938
2939 // lock the page's cache
2940 VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2941 if (cache == NULL || page->busy || page->State() != PAGE_STATE_ACTIVE) {
2942 if (cache != NULL)
2943 cache->ReleaseRefAndUnlock();
2944 queueLocker.Lock();
2945 nextPage = queue.Next(&marker);
2946 queue.Remove(&marker);
2947 continue;
2948 }
2949
2950 pagesScanned++;
2951
2952 DEBUG_PAGE_ACCESS_START(page);
2953
2954 // Get the page active/modified flags and update the page's usage count.
2955 int32 usageCount = vm_clear_page_mapping_accessed_flags(page);
2956
2957 if (usageCount > 0) {
2958 usageCount += page->usage_count + kPageUsageAdvance;
2959 if (usageCount > kPageUsageMax)
2960 usageCount = kPageUsageMax;
2961 pagesAccessed++;
2962 // TODO: This would probably also be the place to reclaim swap space.
2963 } else {
2964 usageCount += page->usage_count - (int32)kPageUsageDecline;
2965 if (usageCount <= 0) {
2966 usageCount = 0;
2967 set_page_state(page, PAGE_STATE_INACTIVE);
2968 pagesToInactive++;
2969 }
2970 }
2971
2972 page->usage_count = usageCount;
2973
2974 DEBUG_PAGE_ACCESS_END(page);
2975
2976 cache->ReleaseRefAndUnlock();
2977
2978 // remove the marker
2979 queueLocker.Lock();
2980 nextPage = queue.Next(&marker);
2981 queue.Remove(&marker);
2982 }
2983
2984 time = system_time() - time;
2985 TRACE_DAEMON(" -> active scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32
2986 ", moved: %" B_PRIu32 " -> inactive, encountered %" B_PRIu32 " accessed"
2987 " ones\n", time, pagesScanned, pagesToInactive, pagesAccessed);
2988 }
2989
2990
2991 static void
page_daemon_idle_scan(page_stats & pageStats)2992 page_daemon_idle_scan(page_stats& pageStats)
2993 {
2994 TRACE_DAEMON("page daemon: idle run\n");
2995
2996 if (pageStats.totalFreePages < (int32)sFreePagesTarget) {
2997 // We want more actually free pages, so free some from the cached
2998 // ones.
2999 uint32 freed = free_cached_pages(
3000 sFreePagesTarget - pageStats.totalFreePages, false);
3001 if (freed > 0)
3002 unreserve_pages(freed);
3003 get_page_stats(pageStats);
3004 }
3005
3006 // Walk the active list and move pages to the inactive queue.
3007 get_page_stats(pageStats);
3008 idle_scan_active_pages(pageStats);
3009 }
3010
3011
3012 static void
page_daemon_full_scan(page_stats & pageStats,int32 despairLevel)3013 page_daemon_full_scan(page_stats& pageStats, int32 despairLevel)
3014 {
3015 TRACE_DAEMON("page daemon: full run: free: %" B_PRIu32 ", cached: %"
3016 B_PRIu32 ", to free: %" B_PRIu32 "\n", pageStats.totalFreePages,
3017 pageStats.cachedPages, pageStats.unsatisfiedReservations
3018 + sFreeOrCachedPagesTarget
3019 - (pageStats.totalFreePages + pageStats.cachedPages));
3020
3021 // Walk the inactive list and transfer pages to the cached and modified
3022 // queues.
3023 full_scan_inactive_pages(pageStats, despairLevel);
3024
3025 // Free cached pages. Also wake up reservation waiters.
3026 get_page_stats(pageStats);
3027 int32 pagesToFree = pageStats.unsatisfiedReservations + sFreePagesTarget
3028 - (pageStats.totalFreePages);
3029 if (pagesToFree > 0) {
3030 uint32 freed = free_cached_pages(pagesToFree, true);
3031 if (freed > 0)
3032 unreserve_pages(freed);
3033 }
3034
3035 // Walk the active list and move pages to the inactive queue.
3036 get_page_stats(pageStats);
3037 full_scan_active_pages(pageStats, despairLevel);
3038 }
3039
3040
3041 static status_t
page_daemon(void *)3042 page_daemon(void* /*unused*/)
3043 {
3044 int32 despairLevel = 0;
3045
3046 while (true) {
3047 sPageDaemonCondition.ClearActivated();
3048
3049 // evaluate the free pages situation
3050 page_stats pageStats;
3051 get_page_stats(pageStats);
3052
3053 if (!do_active_paging(pageStats)) {
3054 // Things look good -- just maintain statistics and keep the pool
3055 // of actually free pages full enough.
3056 despairLevel = 0;
3057 page_daemon_idle_scan(pageStats);
3058 sPageDaemonCondition.Wait(kIdleScanWaitInterval, false);
3059 } else {
3060 // Not enough free pages. We need to do some real work.
3061 despairLevel = std::max(despairLevel + 1, (int32)3);
3062 page_daemon_full_scan(pageStats, despairLevel);
3063
3064 // Don't wait after the first full scan, but rather immediately
3065 // check whether we were successful in freeing enough pages and
3066 // re-run with increased despair level. The first scan is
3067 // conservative with respect to moving inactive modified pages to
3068 // the modified list to avoid thrashing. The second scan, however,
3069 // will not hold back.
3070 if (despairLevel > 1)
3071 snooze(kBusyScanWaitInterval);
3072 }
3073 }
3074
3075 return B_OK;
3076 }
3077
3078
3079 /*! Returns how many pages could *not* be reserved.
3080 */
3081 static uint32
reserve_pages(uint32 count,int priority,bool dontWait)3082 reserve_pages(uint32 count, int priority, bool dontWait)
3083 {
3084 int32 dontTouch = kPageReserveForPriority[priority];
3085
3086 while (true) {
3087 count -= reserve_some_pages(count, dontTouch);
3088 if (count == 0)
3089 return 0;
3090
3091 if (sUnsatisfiedPageReservations == 0) {
3092 count -= free_cached_pages(count, dontWait);
3093 if (count == 0)
3094 return count;
3095 }
3096
3097 if (dontWait)
3098 return count;
3099
3100 // we need to wait for pages to become available
3101
3102 MutexLocker pageDeficitLocker(sPageDeficitLock);
3103
3104 bool notifyDaemon = sUnsatisfiedPageReservations == 0;
3105 sUnsatisfiedPageReservations += count;
3106
3107 if (atomic_get(&sUnreservedFreePages) > dontTouch) {
3108 // the situation changed
3109 sUnsatisfiedPageReservations -= count;
3110 continue;
3111 }
3112
3113 PageReservationWaiter waiter;
3114 waiter.dontTouch = dontTouch;
3115 waiter.missing = count;
3116 waiter.thread = thread_get_current_thread();
3117 waiter.threadPriority = waiter.thread->priority;
3118
3119 // insert ordered (i.e. after all waiters with higher or equal priority)
3120 PageReservationWaiter* otherWaiter = NULL;
3121 for (PageReservationWaiterList::Iterator it
3122 = sPageReservationWaiters.GetIterator();
3123 (otherWaiter = it.Next()) != NULL;) {
3124 if (waiter < *otherWaiter)
3125 break;
3126 }
3127
3128 sPageReservationWaiters.InsertBefore(otherWaiter, &waiter);
3129
3130 thread_prepare_to_block(waiter.thread, 0, THREAD_BLOCK_TYPE_OTHER,
3131 "waiting for pages");
3132
3133 if (notifyDaemon)
3134 sPageDaemonCondition.WakeUp();
3135
3136 pageDeficitLocker.Unlock();
3137
3138 low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0);
3139 thread_block();
3140
3141 pageDeficitLocker.Lock();
3142
3143 return 0;
3144 }
3145 }
3146
3147
3148 // #pragma mark - private kernel API
3149
3150
3151 /*! Writes a range of modified pages of a cache to disk.
3152 You need to hold the VMCache lock when calling this function.
3153 Note that the cache lock is released in this function.
3154 \param cache The cache.
3155 \param firstPage Offset (in page size units) of the first page in the range.
3156 \param endPage End offset (in page size units) of the page range. The page
3157 at this offset is not included.
3158 */
3159 status_t
vm_page_write_modified_page_range(struct VMCache * cache,uint32 firstPage,uint32 endPage)3160 vm_page_write_modified_page_range(struct VMCache* cache, uint32 firstPage,
3161 uint32 endPage)
3162 {
3163 static const int32 kMaxPages = 256;
3164 int32 maxPages = cache->MaxPagesPerWrite();
3165 if (maxPages < 0 || maxPages > kMaxPages)
3166 maxPages = kMaxPages;
3167
3168 const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
3169 | HEAP_DONT_LOCK_KERNEL_SPACE;
3170
3171 PageWriteWrapper stackWrappersPool[2];
3172 PageWriteWrapper* stackWrappers[1];
3173 PageWriteWrapper* wrapperPool
3174 = new(malloc_flags(allocationFlags)) PageWriteWrapper[maxPages + 1];
3175 PageWriteWrapper** wrappers
3176 = new(malloc_flags(allocationFlags)) PageWriteWrapper*[maxPages];
3177 if (wrapperPool == NULL || wrappers == NULL) {
3178 // don't fail, just limit our capabilities
3179 delete[] wrapperPool;
3180 delete[] wrappers;
3181 wrapperPool = stackWrappersPool;
3182 wrappers = stackWrappers;
3183 maxPages = 1;
3184 }
3185
3186 int32 nextWrapper = 0;
3187 int32 usedWrappers = 0;
3188
3189 PageWriteTransfer transfer;
3190 bool transferEmpty = true;
3191
3192 VMCachePagesTree::Iterator it
3193 = cache->pages.GetIterator(firstPage, true, true);
3194
3195 while (true) {
3196 vm_page* page = it.Next();
3197 if (page == NULL || page->cache_offset >= endPage) {
3198 if (transferEmpty)
3199 break;
3200
3201 page = NULL;
3202 }
3203
3204 if (page != NULL) {
3205 if (page->busy
3206 || (page->State() != PAGE_STATE_MODIFIED
3207 && !vm_test_map_modification(page))) {
3208 page = NULL;
3209 }
3210 }
3211
3212 PageWriteWrapper* wrapper = NULL;
3213 if (page != NULL) {
3214 wrapper = &wrapperPool[nextWrapper++];
3215 if (nextWrapper > maxPages)
3216 nextWrapper = 0;
3217
3218 DEBUG_PAGE_ACCESS_START(page);
3219
3220 wrapper->SetTo(page);
3221
3222 if (transferEmpty || transfer.AddPage(page)) {
3223 if (transferEmpty) {
3224 transfer.SetTo(NULL, page, maxPages);
3225 transferEmpty = false;
3226 }
3227
3228 DEBUG_PAGE_ACCESS_END(page);
3229
3230 wrappers[usedWrappers++] = wrapper;
3231 continue;
3232 }
3233
3234 DEBUG_PAGE_ACCESS_END(page);
3235 }
3236
3237 if (transferEmpty)
3238 continue;
3239
3240 cache->Unlock();
3241 status_t status = transfer.Schedule(0);
3242 cache->Lock();
3243
3244 for (int32 i = 0; i < usedWrappers; i++)
3245 wrappers[i]->Done(status);
3246
3247 usedWrappers = 0;
3248
3249 if (page != NULL) {
3250 transfer.SetTo(NULL, page, maxPages);
3251 wrappers[usedWrappers++] = wrapper;
3252 } else
3253 transferEmpty = true;
3254 }
3255
3256 if (wrapperPool != stackWrappersPool) {
3257 delete[] wrapperPool;
3258 delete[] wrappers;
3259 }
3260
3261 return B_OK;
3262 }
3263
3264
3265 /*! You need to hold the VMCache lock when calling this function.
3266 Note that the cache lock is released in this function.
3267 */
3268 status_t
vm_page_write_modified_pages(VMCache * cache)3269 vm_page_write_modified_pages(VMCache *cache)
3270 {
3271 return vm_page_write_modified_page_range(cache, 0,
3272 (cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT);
3273 }
3274
3275
3276 /*! Schedules the page writer to write back the specified \a page.
3277 Note, however, that it might not do this immediately, and it can well
3278 take several seconds until the page is actually written out.
3279 */
3280 void
vm_page_schedule_write_page(vm_page * page)3281 vm_page_schedule_write_page(vm_page *page)
3282 {
3283 PAGE_ASSERT(page, page->State() == PAGE_STATE_MODIFIED);
3284
3285 vm_page_requeue(page, false);
3286
3287 sPageWriterCondition.WakeUp();
3288 }
3289
3290
3291 /*! Cache must be locked.
3292 */
3293 void
vm_page_schedule_write_page_range(struct VMCache * cache,uint32 firstPage,uint32 endPage)3294 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage,
3295 uint32 endPage)
3296 {
3297 uint32 modified = 0;
3298 for (VMCachePagesTree::Iterator it
3299 = cache->pages.GetIterator(firstPage, true, true);
3300 vm_page *page = it.Next();) {
3301 if (page->cache_offset >= endPage)
3302 break;
3303
3304 if (!page->busy && page->State() == PAGE_STATE_MODIFIED) {
3305 DEBUG_PAGE_ACCESS_START(page);
3306 vm_page_requeue(page, false);
3307 modified++;
3308 DEBUG_PAGE_ACCESS_END(page);
3309 }
3310 }
3311
3312 if (modified > 0)
3313 sPageWriterCondition.WakeUp();
3314 }
3315
3316
3317 void
vm_page_init_num_pages(kernel_args * args)3318 vm_page_init_num_pages(kernel_args *args)
3319 {
3320 // calculate the size of memory by looking at the physical_memory_range array
3321 sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE;
3322 page_num_t physicalPagesEnd = sPhysicalPageOffset
3323 + args->physical_memory_range[0].size / B_PAGE_SIZE;
3324
3325 sNonExistingPages = 0;
3326 sIgnoredPages = args->ignored_physical_memory / B_PAGE_SIZE;
3327
3328 for (uint32 i = 1; i < args->num_physical_memory_ranges; i++) {
3329 page_num_t start = args->physical_memory_range[i].start / B_PAGE_SIZE;
3330 if (start > physicalPagesEnd)
3331 sNonExistingPages += start - physicalPagesEnd;
3332 physicalPagesEnd = start
3333 + args->physical_memory_range[i].size / B_PAGE_SIZE;
3334
3335 #ifdef LIMIT_AVAILABLE_MEMORY
3336 page_num_t available
3337 = physicalPagesEnd - sPhysicalPageOffset - sNonExistingPages;
3338 if (available > LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE)) {
3339 physicalPagesEnd = sPhysicalPageOffset + sNonExistingPages
3340 + LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE);
3341 break;
3342 }
3343 #endif
3344 }
3345
3346 TRACE(("first phys page = %#" B_PRIxPHYSADDR ", end %#" B_PRIxPHYSADDR "\n",
3347 sPhysicalPageOffset, physicalPagesEnd));
3348
3349 sNumPages = physicalPagesEnd - sPhysicalPageOffset;
3350 }
3351
3352
3353 status_t
vm_page_init(kernel_args * args)3354 vm_page_init(kernel_args *args)
3355 {
3356 TRACE(("vm_page_init: entry\n"));
3357
3358 // init page queues
3359 sModifiedPageQueue.Init("modified pages queue");
3360 sInactivePageQueue.Init("inactive pages queue");
3361 sActivePageQueue.Init("active pages queue");
3362 sCachedPageQueue.Init("cached pages queue");
3363 sFreePageQueue.Init("free pages queue");
3364 sClearPageQueue.Init("clear pages queue");
3365
3366 new (&sPageReservationWaiters) PageReservationWaiterList;
3367
3368 // map in the new free page table
3369 sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page),
3370 ~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3371
3372 TRACE(("vm_init: putting free_page_table @ %p, # ents %" B_PRIuPHYSADDR
3373 " (size %#" B_PRIxPHYSADDR ")\n", sPages, sNumPages,
3374 (phys_addr_t)(sNumPages * sizeof(vm_page))));
3375
3376 // initialize the free page table
3377 for (uint32 i = 0; i < sNumPages; i++) {
3378 sPages[i].Init(sPhysicalPageOffset + i);
3379 sFreePageQueue.Append(&sPages[i]);
3380
3381 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3382 sPages[i].allocation_tracking_info.Clear();
3383 #endif
3384 }
3385
3386 sUnreservedFreePages = sNumPages;
3387
3388 TRACE(("initialized table\n"));
3389
3390 // mark the ranges between usable physical memory unused
3391 phys_addr_t previousEnd = 0;
3392 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3393 phys_addr_t base = args->physical_memory_range[i].start;
3394 phys_size_t size = args->physical_memory_range[i].size;
3395 if (base > previousEnd) {
3396 mark_page_range_in_use(previousEnd / B_PAGE_SIZE,
3397 (base - previousEnd) / B_PAGE_SIZE, false);
3398 }
3399 previousEnd = base + size;
3400 }
3401
3402 // mark the allocated physical page ranges wired
3403 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3404 mark_page_range_in_use(
3405 args->physical_allocated_range[i].start / B_PAGE_SIZE,
3406 args->physical_allocated_range[i].size / B_PAGE_SIZE, true);
3407 }
3408
3409 // prevent future allocations from the kernel args ranges
3410 args->num_physical_allocated_ranges = 0;
3411
3412 // The target of actually free pages. This must be at least the system
3413 // reserve, but should be a few more pages, so we don't have to extract
3414 // a cached page with each allocation.
3415 sFreePagesTarget = VM_PAGE_RESERVE_USER
3416 + std::max((page_num_t)32, (sNumPages - sNonExistingPages) / 1024);
3417
3418 // The target of free + cached and inactive pages. On low-memory machines
3419 // keep things tight. free + cached is the pool of immediately allocatable
3420 // pages. We want a few inactive pages, so when we're actually paging, we
3421 // have a reasonably large set of pages to work with.
3422 if (sUnreservedFreePages < 16 * 1024) {
3423 sFreeOrCachedPagesTarget = sFreePagesTarget + 128;
3424 sInactivePagesTarget = sFreePagesTarget / 3;
3425 } else {
3426 sFreeOrCachedPagesTarget = 2 * sFreePagesTarget;
3427 sInactivePagesTarget = sFreePagesTarget / 2;
3428 }
3429
3430 TRACE(("vm_page_init: exit\n"));
3431
3432 return B_OK;
3433 }
3434
3435
3436 status_t
vm_page_init_post_area(kernel_args * args)3437 vm_page_init_post_area(kernel_args *args)
3438 {
3439 void *dummy;
3440
3441 dummy = sPages;
3442 create_area("page structures", &dummy, B_EXACT_ADDRESS,
3443 PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED,
3444 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3445
3446 add_debugger_command("list_pages", &dump_page_list,
3447 "List physical pages");
3448 add_debugger_command("page_stats", &dump_page_stats,
3449 "Dump statistics about page usage");
3450 add_debugger_command_etc("page", &dump_page_long,
3451 "Dump page info",
3452 "[ \"-p\" | \"-v\" ] [ \"-m\" ] <address>\n"
3453 "Prints information for the physical page. If neither \"-p\" nor\n"
3454 "\"-v\" are given, the provided address is interpreted as address of\n"
3455 "the vm_page data structure for the page in question. If \"-p\" is\n"
3456 "given, the address is the physical address of the page. If \"-v\" is\n"
3457 "given, the address is interpreted as virtual address in the current\n"
3458 "thread's address space and for the page it is mapped to (if any)\n"
3459 "information are printed. If \"-m\" is specified, the command will\n"
3460 "search all known address spaces for mappings to that page and print\n"
3461 "them.\n", 0);
3462 add_debugger_command("page_queue", &dump_page_queue, "Dump page queue");
3463 add_debugger_command("find_page", &find_page,
3464 "Find out which queue a page is actually in");
3465
3466 #ifdef TRACK_PAGE_USAGE_STATS
3467 add_debugger_command_etc("page_usage", &dump_page_usage_stats,
3468 "Dumps statistics about page usage counts",
3469 "\n"
3470 "Dumps statistics about page usage counts.\n",
3471 B_KDEBUG_DONT_PARSE_ARGUMENTS);
3472 #endif
3473
3474 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3475 add_debugger_command_etc("page_allocations_per_caller",
3476 &dump_page_allocations_per_caller,
3477 "Dump current page allocations summed up per caller",
3478 "[ -d <caller> ] [ -r ]\n"
3479 "The current allocations will by summed up by caller (their count)\n"
3480 "printed in decreasing order by count.\n"
3481 "If \"-d\" is given, each allocation for caller <caller> is printed\n"
3482 "including the respective stack trace.\n"
3483 "If \"-r\" is given, the allocation infos are reset after gathering\n"
3484 "the information, so the next command invocation will only show the\n"
3485 "allocations made after the reset.\n", 0);
3486 add_debugger_command_etc("page_allocation_infos",
3487 &dump_page_allocation_infos,
3488 "Dump current page allocations",
3489 "[ --stacktrace ] [ -p <page number> ] [ --team <team ID> ] "
3490 "[ --thread <thread ID> ]\n"
3491 "The current allocations filtered by optional values will be printed.\n"
3492 "The optional \"-p\" page number filters for a specific page,\n"
3493 "with \"--team\" and \"--thread\" allocations by specific teams\n"
3494 "and/or threads can be filtered (these only work if a corresponding\n"
3495 "tracing entry is still available).\n"
3496 "If \"--stacktrace\" is given, then stack traces of the allocation\n"
3497 "callers are printed, where available\n", 0);
3498 #endif
3499
3500 return B_OK;
3501 }
3502
3503
3504 status_t
vm_page_init_post_thread(kernel_args * args)3505 vm_page_init_post_thread(kernel_args *args)
3506 {
3507 new (&sFreePageCondition) ConditionVariable;
3508
3509 // create a kernel thread to clear out pages
3510
3511 thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber",
3512 B_LOWEST_ACTIVE_PRIORITY, NULL);
3513 resume_thread(thread);
3514
3515 // start page writer
3516
3517 sPageWriterCondition.Init("page writer");
3518
3519 thread = spawn_kernel_thread(&page_writer, "page writer",
3520 B_NORMAL_PRIORITY + 1, NULL);
3521 resume_thread(thread);
3522
3523 // start page daemon
3524
3525 sPageDaemonCondition.Init("page daemon");
3526
3527 thread = spawn_kernel_thread(&page_daemon, "page daemon",
3528 B_NORMAL_PRIORITY, NULL);
3529 resume_thread(thread);
3530
3531 return B_OK;
3532 }
3533
3534
3535 status_t
vm_mark_page_inuse(page_num_t page)3536 vm_mark_page_inuse(page_num_t page)
3537 {
3538 return vm_mark_page_range_inuse(page, 1);
3539 }
3540
3541
3542 status_t
vm_mark_page_range_inuse(page_num_t startPage,page_num_t length)3543 vm_mark_page_range_inuse(page_num_t startPage, page_num_t length)
3544 {
3545 return mark_page_range_in_use(startPage, length, false);
3546 }
3547
3548
3549 /*! Unreserve pages previously reserved with vm_page_reserve_pages().
3550 */
3551 void
vm_page_unreserve_pages(vm_page_reservation * reservation)3552 vm_page_unreserve_pages(vm_page_reservation* reservation)
3553 {
3554 uint32 count = reservation->count;
3555 reservation->count = 0;
3556
3557 if (count == 0)
3558 return;
3559
3560 TA(UnreservePages(count));
3561
3562 unreserve_pages(count);
3563 }
3564
3565
3566 /*! With this call, you can reserve a number of free pages in the system.
3567 They will only be handed out to someone who has actually reserved them.
3568 This call returns as soon as the number of requested pages has been
3569 reached.
3570 The caller must not hold any cache lock or the function might deadlock.
3571 */
3572 void
vm_page_reserve_pages(vm_page_reservation * reservation,uint32 count,int priority)3573 vm_page_reserve_pages(vm_page_reservation* reservation, uint32 count,
3574 int priority)
3575 {
3576 reservation->count = count;
3577
3578 if (count == 0)
3579 return;
3580
3581 TA(ReservePages(count));
3582
3583 reserve_pages(count, priority, false);
3584 }
3585
3586
3587 bool
vm_page_try_reserve_pages(vm_page_reservation * reservation,uint32 count,int priority)3588 vm_page_try_reserve_pages(vm_page_reservation* reservation, uint32 count,
3589 int priority)
3590 {
3591 if (count == 0) {
3592 reservation->count = count;
3593 return true;
3594 }
3595
3596 uint32 remaining = reserve_pages(count, priority, true);
3597 if (remaining == 0) {
3598 TA(ReservePages(count));
3599 reservation->count = count;
3600 return true;
3601 }
3602
3603 unreserve_pages(count - remaining);
3604
3605 return false;
3606 }
3607
3608
3609 vm_page *
vm_page_allocate_page(vm_page_reservation * reservation,uint32 flags)3610 vm_page_allocate_page(vm_page_reservation* reservation, uint32 flags)
3611 {
3612 uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3613 ASSERT(pageState != PAGE_STATE_FREE);
3614 ASSERT(pageState != PAGE_STATE_CLEAR);
3615
3616 ASSERT(reservation->count > 0);
3617 reservation->count--;
3618
3619 VMPageQueue* queue;
3620 VMPageQueue* otherQueue;
3621
3622 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3623 queue = &sClearPageQueue;
3624 otherQueue = &sFreePageQueue;
3625 } else {
3626 queue = &sFreePageQueue;
3627 otherQueue = &sClearPageQueue;
3628 }
3629
3630 ReadLocker locker(sFreePageQueuesLock);
3631
3632 vm_page* page = queue->RemoveHeadUnlocked();
3633 if (page == NULL) {
3634 // if the primary queue was empty, grab the page from the
3635 // secondary queue
3636 page = otherQueue->RemoveHeadUnlocked();
3637
3638 if (page == NULL) {
3639 // Unlikely, but possible: the page we have reserved has moved
3640 // between the queues after we checked the first queue. Grab the
3641 // write locker to make sure this doesn't happen again.
3642 locker.Unlock();
3643 WriteLocker writeLocker(sFreePageQueuesLock);
3644
3645 page = queue->RemoveHead();
3646 if (page == NULL)
3647 otherQueue->RemoveHead();
3648
3649 if (page == NULL) {
3650 panic("Had reserved page, but there is none!");
3651 return NULL;
3652 }
3653
3654 // downgrade to read lock
3655 locker.Lock();
3656 }
3657 }
3658
3659 if (page->CacheRef() != NULL)
3660 panic("supposed to be free page %p has cache @! page %p; cache _cache", page, page);
3661
3662 DEBUG_PAGE_ACCESS_START(page);
3663
3664 int oldPageState = page->State();
3665 page->SetState(pageState);
3666 page->busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3667 page->usage_count = 0;
3668 page->accessed = false;
3669 page->modified = false;
3670
3671 locker.Unlock();
3672
3673 if (pageState < PAGE_STATE_FIRST_UNQUEUED)
3674 sPageQueues[pageState].AppendUnlocked(page);
3675
3676 // clear the page, if we had to take it from the free queue and a clear
3677 // page was requested
3678 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0 && oldPageState != PAGE_STATE_CLEAR)
3679 clear_page(page);
3680
3681 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3682 page->allocation_tracking_info.Init(
3683 TA(AllocatePage(page->physical_page_number)));
3684 #else
3685 TA(AllocatePage(page->physical_page_number));
3686 #endif
3687
3688 return page;
3689 }
3690
3691
3692 static void
allocate_page_run_cleanup(VMPageQueue::PageList & freePages,VMPageQueue::PageList & clearPages)3693 allocate_page_run_cleanup(VMPageQueue::PageList& freePages,
3694 VMPageQueue::PageList& clearPages)
3695 {
3696 // Page lists are sorted, so remove tails before prepending to the respective queue.
3697
3698 while (vm_page* page = freePages.RemoveTail()) {
3699 page->busy = false;
3700 page->SetState(PAGE_STATE_FREE);
3701 DEBUG_PAGE_ACCESS_END(page);
3702 sFreePageQueue.PrependUnlocked(page);
3703 }
3704
3705 while (vm_page* page = clearPages.RemoveTail()) {
3706 page->busy = false;
3707 page->SetState(PAGE_STATE_CLEAR);
3708 DEBUG_PAGE_ACCESS_END(page);
3709 sClearPageQueue.PrependUnlocked(page);
3710 }
3711
3712 sFreePageCondition.NotifyAll();
3713 }
3714
3715
3716 /*! Tries to allocate the a contiguous run of \a length pages starting at
3717 index \a start.
3718
3719 The caller must have write-locked the free/clear page queues. The function
3720 will unlock regardless of whether it succeeds or fails.
3721
3722 If the function fails, it cleans up after itself, i.e. it will free all
3723 pages it managed to allocate.
3724
3725 \param start The start index (into \c sPages) of the run.
3726 \param length The number of pages to allocate.
3727 \param flags Page allocation flags. Encodes the state the function shall
3728 set the allocated pages to, whether the pages shall be marked busy
3729 (VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared
3730 (VM_PAGE_ALLOC_CLEAR).
3731 \param freeClearQueueLocker Locked WriteLocker for the free/clear page
3732 queues in locked state. Will be unlocked by the function.
3733 \return The index of the first page that could not be allocated. \a length
3734 is returned when the function was successful.
3735 */
3736 static page_num_t
allocate_page_run(page_num_t start,page_num_t length,uint32 flags,WriteLocker & freeClearQueueLocker)3737 allocate_page_run(page_num_t start, page_num_t length, uint32 flags,
3738 WriteLocker& freeClearQueueLocker)
3739 {
3740 uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3741 ASSERT(pageState != PAGE_STATE_FREE);
3742 ASSERT(pageState != PAGE_STATE_CLEAR);
3743 ASSERT(start + length <= sNumPages);
3744
3745 // Pull the free/clear pages out of their respective queues. Cached pages
3746 // are allocated later.
3747 page_num_t cachedPages = 0;
3748 VMPageQueue::PageList freePages;
3749 VMPageQueue::PageList clearPages;
3750 page_num_t i = 0;
3751 for (; i < length; i++) {
3752 bool pageAllocated = true;
3753 bool noPage = false;
3754 vm_page& page = sPages[start + i];
3755 switch (page.State()) {
3756 case PAGE_STATE_CLEAR:
3757 DEBUG_PAGE_ACCESS_START(&page);
3758 sClearPageQueue.Remove(&page);
3759 clearPages.Add(&page);
3760 break;
3761 case PAGE_STATE_FREE:
3762 DEBUG_PAGE_ACCESS_START(&page);
3763 sFreePageQueue.Remove(&page);
3764 freePages.Add(&page);
3765 break;
3766 case PAGE_STATE_CACHED:
3767 // We allocate cached pages later.
3768 cachedPages++;
3769 pageAllocated = false;
3770 break;
3771
3772 default:
3773 // Probably a page was cached when our caller checked. Now it's
3774 // gone and we have to abort.
3775 noPage = true;
3776 break;
3777 }
3778
3779 if (noPage)
3780 break;
3781
3782 if (pageAllocated) {
3783 page.SetState(flags & VM_PAGE_ALLOC_STATE);
3784 page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3785 page.usage_count = 0;
3786 page.accessed = false;
3787 page.modified = false;
3788 }
3789 }
3790
3791 if (i < length) {
3792 // failed to allocate a page -- free all that we've got
3793 allocate_page_run_cleanup(freePages, clearPages);
3794 return i;
3795 }
3796
3797 freeClearQueueLocker.Unlock();
3798
3799 if (cachedPages > 0) {
3800 // allocate the pages that weren't free but cached
3801 page_num_t freedCachedPages = 0;
3802 page_num_t nextIndex = start;
3803 vm_page* freePage = freePages.Head();
3804 vm_page* clearPage = clearPages.Head();
3805 while (cachedPages > 0) {
3806 // skip, if we've already got the page
3807 if (freePage != NULL && size_t(freePage - sPages) == nextIndex) {
3808 freePage = freePages.GetNext(freePage);
3809 nextIndex++;
3810 continue;
3811 }
3812 if (clearPage != NULL && size_t(clearPage - sPages) == nextIndex) {
3813 clearPage = clearPages.GetNext(clearPage);
3814 nextIndex++;
3815 continue;
3816 }
3817
3818 // free the page, if it is still cached
3819 vm_page& page = sPages[nextIndex];
3820 if (!free_cached_page(&page, false)) {
3821 // TODO: if the page turns out to have been freed already,
3822 // there would be no need to fail
3823 break;
3824 }
3825
3826 page.SetState(flags & VM_PAGE_ALLOC_STATE);
3827 page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3828 page.usage_count = 0;
3829 page.accessed = false;
3830 page.modified = false;
3831
3832 freePages.InsertBefore(freePage, &page);
3833 freedCachedPages++;
3834 cachedPages--;
3835 nextIndex++;
3836 }
3837
3838 // If we have freed cached pages, we need to balance things.
3839 if (freedCachedPages > 0)
3840 unreserve_pages(freedCachedPages);
3841
3842 if (nextIndex - start < length) {
3843 // failed to allocate all cached pages -- free all that we've got
3844 freeClearQueueLocker.Lock();
3845 allocate_page_run_cleanup(freePages, clearPages);
3846 freeClearQueueLocker.Unlock();
3847
3848 return nextIndex - start;
3849 }
3850 }
3851
3852 // clear pages, if requested
3853 if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3854 for (VMPageQueue::PageList::Iterator it = freePages.GetIterator();
3855 vm_page* page = it.Next();) {
3856 clear_page(page);
3857 }
3858 }
3859
3860 // add pages to target queue
3861 if (pageState < PAGE_STATE_FIRST_UNQUEUED) {
3862 freePages.MoveFrom(&clearPages);
3863 sPageQueues[pageState].AppendUnlocked(freePages, length);
3864 }
3865
3866 // Note: We don't unreserve the pages since we pulled them out of the
3867 // free/clear queues without adjusting sUnreservedFreePages.
3868
3869 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3870 AbstractTraceEntryWithStackTrace* traceEntry
3871 = TA(AllocatePageRun(start, length));
3872
3873 for (page_num_t i = start; i < start + length; i++)
3874 sPages[i].allocation_tracking_info.Init(traceEntry);
3875 #else
3876 TA(AllocatePageRun(start, length));
3877 #endif
3878
3879 return length;
3880 }
3881
3882
3883 /*! Allocate a physically contiguous range of pages.
3884
3885 \param flags Page allocation flags. Encodes the state the function shall
3886 set the allocated pages to, whether the pages shall be marked busy
3887 (VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared
3888 (VM_PAGE_ALLOC_CLEAR).
3889 \param length The number of contiguous pages to allocate.
3890 \param restrictions Restrictions to the physical addresses of the page run
3891 to allocate, including \c low_address, the first acceptable physical
3892 address where the page run may start, \c high_address, the last
3893 acceptable physical address where the page run may end (i.e. it must
3894 hold \code runStartAddress + length <= high_address \endcode),
3895 \c alignment, the alignment of the page run start address, and
3896 \c boundary, multiples of which the page run must not cross.
3897 Values set to \c 0 are ignored.
3898 \param priority The page reservation priority (as passed to
3899 vm_page_reserve_pages()).
3900 \return The first page of the allocated page run on success; \c NULL
3901 when the allocation failed.
3902 */
3903 vm_page*
vm_page_allocate_page_run(uint32 flags,page_num_t length,const physical_address_restrictions * restrictions,int priority)3904 vm_page_allocate_page_run(uint32 flags, page_num_t length,
3905 const physical_address_restrictions* restrictions, int priority)
3906 {
3907 // compute start and end page index
3908 page_num_t requestedStart
3909 = std::max(restrictions->low_address / B_PAGE_SIZE, sPhysicalPageOffset)
3910 - sPhysicalPageOffset;
3911 page_num_t start = requestedStart;
3912 page_num_t end;
3913 if (restrictions->high_address > 0) {
3914 end = std::max(restrictions->high_address / B_PAGE_SIZE,
3915 sPhysicalPageOffset)
3916 - sPhysicalPageOffset;
3917 end = std::min(end, sNumPages);
3918 } else
3919 end = sNumPages;
3920
3921 // compute alignment mask
3922 page_num_t alignmentMask
3923 = std::max(restrictions->alignment / B_PAGE_SIZE, (phys_addr_t)1) - 1;
3924 ASSERT(((alignmentMask + 1) & alignmentMask) == 0);
3925 // alignment must be a power of 2
3926
3927 // compute the boundary mask
3928 uint32 boundaryMask = 0;
3929 if (restrictions->boundary != 0) {
3930 page_num_t boundary = restrictions->boundary / B_PAGE_SIZE;
3931 // boundary must be a power of two and not less than alignment and
3932 // length
3933 ASSERT(((boundary - 1) & boundary) == 0);
3934 ASSERT(boundary >= alignmentMask + 1);
3935 ASSERT(boundary >= length);
3936
3937 boundaryMask = -boundary;
3938 }
3939
3940 vm_page_reservation reservation;
3941 vm_page_reserve_pages(&reservation, length, priority);
3942
3943 WriteLocker freeClearQueueLocker(sFreePageQueuesLock);
3944
3945 // First we try to get a run with free pages only. If that fails, we also
3946 // consider cached pages. If there are only few free pages and many cached
3947 // ones, the odds are that we won't find enough contiguous ones, so we skip
3948 // the first iteration in this case.
3949 int32 freePages = sUnreservedFreePages;
3950 int useCached = freePages > 0 && (page_num_t)freePages > 2 * length ? 0 : 1;
3951
3952 for (;;) {
3953 if (alignmentMask != 0 || boundaryMask != 0) {
3954 page_num_t offsetStart = start + sPhysicalPageOffset;
3955
3956 // enforce alignment
3957 if ((offsetStart & alignmentMask) != 0)
3958 offsetStart = (offsetStart + alignmentMask) & ~alignmentMask;
3959
3960 // enforce boundary
3961 if (boundaryMask != 0 && ((offsetStart ^ (offsetStart
3962 + length - 1)) & boundaryMask) != 0) {
3963 offsetStart = (offsetStart + length - 1) & boundaryMask;
3964 }
3965
3966 start = offsetStart - sPhysicalPageOffset;
3967 }
3968
3969 if (start + length > end) {
3970 if (useCached == 0) {
3971 // The first iteration with free pages only was unsuccessful.
3972 // Try again also considering cached pages.
3973 useCached = 1;
3974 start = requestedStart;
3975 continue;
3976 }
3977
3978 dprintf("vm_page_allocate_page_run(): Failed to allocate run of "
3979 "length %" B_PRIuPHYSADDR " (%" B_PRIuPHYSADDR " %"
3980 B_PRIuPHYSADDR ") in second iteration (align: %" B_PRIuPHYSADDR
3981 " boundary: %" B_PRIuPHYSADDR ")!\n", length, requestedStart,
3982 end, restrictions->alignment, restrictions->boundary);
3983
3984 freeClearQueueLocker.Unlock();
3985 vm_page_unreserve_pages(&reservation);
3986 return NULL;
3987 }
3988
3989 bool foundRun = true;
3990 page_num_t i;
3991 for (i = 0; i < length; i++) {
3992 uint32 pageState = sPages[start + i].State();
3993 if (pageState != PAGE_STATE_FREE
3994 && pageState != PAGE_STATE_CLEAR
3995 && (pageState != PAGE_STATE_CACHED || useCached == 0)) {
3996 foundRun = false;
3997 break;
3998 }
3999 }
4000
4001 if (foundRun) {
4002 i = allocate_page_run(start, length, flags, freeClearQueueLocker);
4003 if (i == length) {
4004 reservation.count = 0;
4005 return &sPages[start];
4006 }
4007
4008 // apparently a cached page couldn't be allocated -- skip it and
4009 // continue
4010 freeClearQueueLocker.Lock();
4011 }
4012
4013 start += i + 1;
4014 }
4015 }
4016
4017
4018 vm_page *
vm_page_at_index(int32 index)4019 vm_page_at_index(int32 index)
4020 {
4021 return &sPages[index];
4022 }
4023
4024
4025 vm_page *
vm_lookup_page(page_num_t pageNumber)4026 vm_lookup_page(page_num_t pageNumber)
4027 {
4028 if (pageNumber < sPhysicalPageOffset)
4029 return NULL;
4030
4031 pageNumber -= sPhysicalPageOffset;
4032 if (pageNumber >= sNumPages)
4033 return NULL;
4034
4035 return &sPages[pageNumber];
4036 }
4037
4038
4039 bool
vm_page_is_dummy(struct vm_page * page)4040 vm_page_is_dummy(struct vm_page *page)
4041 {
4042 return page < sPages || page >= sPages + sNumPages;
4043 }
4044
4045
4046 /*! Free the page that belonged to a certain cache.
4047 You can use vm_page_set_state() manually if you prefer, but only
4048 if the page does not equal PAGE_STATE_MODIFIED.
4049
4050 \param cache The cache the page was previously owned by or NULL. The page
4051 must have been removed from its cache before calling this method in
4052 either case.
4053 \param page The page to free.
4054 \param reservation If not NULL, the page count of the reservation will be
4055 incremented, thus allowing to allocate another page for the freed one at
4056 a later time.
4057 */
4058 void
vm_page_free_etc(VMCache * cache,vm_page * page,vm_page_reservation * reservation)4059 vm_page_free_etc(VMCache* cache, vm_page* page,
4060 vm_page_reservation* reservation)
4061 {
4062 PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
4063 && page->State() != PAGE_STATE_CLEAR);
4064
4065 if (page->State() == PAGE_STATE_MODIFIED && cache->temporary)
4066 atomic_add(&sModifiedTemporaryPages, -1);
4067
4068 free_page(page, false);
4069 if (reservation == NULL)
4070 unreserve_pages(1);
4071 }
4072
4073
4074 void
vm_page_set_state(vm_page * page,int pageState)4075 vm_page_set_state(vm_page *page, int pageState)
4076 {
4077 PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
4078 && page->State() != PAGE_STATE_CLEAR);
4079
4080 if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) {
4081 free_page(page, pageState == PAGE_STATE_CLEAR);
4082 unreserve_pages(1);
4083 } else
4084 set_page_state(page, pageState);
4085 }
4086
4087
4088 /*! Moves a page to either the tail of the head of its current queue,
4089 depending on \a tail.
4090 The page must have a cache and the cache must be locked!
4091 */
4092 void
vm_page_requeue(struct vm_page * page,bool tail)4093 vm_page_requeue(struct vm_page *page, bool tail)
4094 {
4095 PAGE_ASSERT(page, page->Cache() != NULL);
4096 page->Cache()->AssertLocked();
4097 // DEBUG_PAGE_ACCESS_CHECK(page);
4098 // TODO: This assertion cannot be satisfied by idle_scan_active_pages()
4099 // when it requeues busy pages. The reason is that vm_soft_fault()
4100 // (respectively fault_get_page()) and the file cache keep newly
4101 // allocated pages accessed while they are reading them from disk. It
4102 // would probably be better to change that code and reenable this
4103 // check.
4104
4105 VMPageQueue *queue = NULL;
4106
4107 switch (page->State()) {
4108 case PAGE_STATE_ACTIVE:
4109 queue = &sActivePageQueue;
4110 break;
4111 case PAGE_STATE_INACTIVE:
4112 queue = &sInactivePageQueue;
4113 break;
4114 case PAGE_STATE_MODIFIED:
4115 queue = &sModifiedPageQueue;
4116 break;
4117 case PAGE_STATE_CACHED:
4118 queue = &sCachedPageQueue;
4119 break;
4120 case PAGE_STATE_FREE:
4121 case PAGE_STATE_CLEAR:
4122 panic("vm_page_requeue() called for free/clear page %p", page);
4123 return;
4124 case PAGE_STATE_WIRED:
4125 case PAGE_STATE_UNUSED:
4126 return;
4127 default:
4128 panic("vm_page_touch: vm_page %p in invalid state %d\n",
4129 page, page->State());
4130 break;
4131 }
4132
4133 queue->RequeueUnlocked(page, tail);
4134 }
4135
4136
4137 page_num_t
vm_page_num_pages(void)4138 vm_page_num_pages(void)
4139 {
4140 return sNumPages - sNonExistingPages;
4141 }
4142
4143
4144 /*! There is a subtle distinction between the page counts returned by
4145 this function and vm_page_num_free_pages():
4146 The latter returns the number of pages that are completely uncommitted,
4147 whereas this one returns the number of pages that are available for
4148 use by being reclaimed as well (IOW it factors in things like cache pages
4149 as available).
4150 */
4151 page_num_t
vm_page_num_available_pages(void)4152 vm_page_num_available_pages(void)
4153 {
4154 return vm_available_memory() / B_PAGE_SIZE;
4155 }
4156
4157
4158 page_num_t
vm_page_num_free_pages(void)4159 vm_page_num_free_pages(void)
4160 {
4161 int32 count = sUnreservedFreePages + sCachedPageQueue.Count();
4162 return count > 0 ? count : 0;
4163 }
4164
4165
4166 page_num_t
vm_page_num_unused_pages(void)4167 vm_page_num_unused_pages(void)
4168 {
4169 int32 count = sUnreservedFreePages;
4170 return count > 0 ? count : 0;
4171 }
4172
4173
4174 void
vm_page_get_stats(system_info * info)4175 vm_page_get_stats(system_info *info)
4176 {
4177 // Note: there's no locking protecting any of the queues or counters here,
4178 // so we run the risk of getting bogus values when evaluating them
4179 // throughout this function. As these stats are for informational purposes
4180 // only, it is not really worth introducing such locking. Therefore we just
4181 // ensure that we don't under- or overflow any of the values.
4182
4183 // The pages used for the block cache buffers. Those should not be counted
4184 // as used but as cached pages.
4185 // TODO: We should subtract the blocks that are in use ATM, since those
4186 // can't really be freed in a low memory situation.
4187 page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE;
4188 info->block_cache_pages = blockCachePages;
4189
4190 // Non-temporary modified pages are special as they represent pages that
4191 // can be written back, so they could be freed if necessary, for us
4192 // basically making them into cached pages with a higher overhead. The
4193 // modified queue count is therefore split into temporary and non-temporary
4194 // counts that are then added to the corresponding number.
4195 page_num_t modifiedNonTemporaryPages
4196 = (sModifiedPageQueue.Count() - sModifiedTemporaryPages);
4197
4198 info->max_pages = vm_page_num_pages();
4199 info->cached_pages = sCachedPageQueue.Count() + modifiedNonTemporaryPages
4200 + blockCachePages;
4201
4202 // max_pages is composed of:
4203 // active + inactive + unused + wired + modified + cached + free + clear
4204 // So taking out the cached (including modified non-temporary), free and
4205 // clear ones leaves us with all used pages.
4206 uint32 subtractPages = info->cached_pages + sFreePageQueue.Count()
4207 + sClearPageQueue.Count();
4208 info->used_pages = subtractPages > info->max_pages
4209 ? 0 : info->max_pages - subtractPages;
4210
4211 if (info->used_pages + info->cached_pages > info->max_pages) {
4212 // Something was shuffled around while we were summing up the counts.
4213 // Make the values sane, preferring the worse case of more used pages.
4214 info->cached_pages = info->max_pages - info->used_pages;
4215 }
4216
4217 info->page_faults = vm_num_page_faults();
4218 info->ignored_pages = sIgnoredPages;
4219
4220 // TODO: We don't consider pages used for page directories/tables yet.
4221 }
4222
4223
4224 /*! Returns the greatest address within the last page of accessible physical
4225 memory.
4226 The value is inclusive, i.e. in case of a 32 bit phys_addr_t 0xffffffff
4227 means the that the last page ends at exactly 4 GB.
4228 */
4229 phys_addr_t
vm_page_max_address()4230 vm_page_max_address()
4231 {
4232 return ((phys_addr_t)sPhysicalPageOffset + sNumPages) * B_PAGE_SIZE - 1;
4233 }
4234
4235
4236 RANGE_MARKER_FUNCTION_END(vm_page)
4237