xref: /haiku/src/system/kernel/vm/vm_page.cpp (revision 106388ddbfdd00f4409c86bd3fe8d581bae532ec)
1 /*
2  * Copyright 2010-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <string.h>
12 #include <stdlib.h>
13 
14 #include <algorithm>
15 
16 #include <KernelExport.h>
17 #include <OS.h>
18 
19 #include <AutoDeleter.h>
20 
21 #include <arch/cpu.h>
22 #include <arch/vm_translation_map.h>
23 #include <block_cache.h>
24 #include <boot/kernel_args.h>
25 #include <condition_variable.h>
26 #include <elf.h>
27 #include <heap.h>
28 #include <kernel.h>
29 #include <low_resource_manager.h>
30 #include <thread.h>
31 #include <tracing.h>
32 #include <util/AutoLock.h>
33 #include <vfs.h>
34 #include <vm/vm.h>
35 #include <vm/vm_priv.h>
36 #include <vm/vm_page.h>
37 #include <vm/VMAddressSpace.h>
38 #include <vm/VMArea.h>
39 #include <vm/VMCache.h>
40 
41 #include "IORequest.h"
42 #include "PageCacheLocker.h"
43 #include "VMAnonymousCache.h"
44 #include "VMPageQueue.h"
45 
46 
47 //#define TRACE_VM_PAGE
48 #ifdef TRACE_VM_PAGE
49 #	define TRACE(x) dprintf x
50 #else
51 #	define TRACE(x) ;
52 #endif
53 
54 //#define TRACE_VM_DAEMONS
55 #ifdef TRACE_VM_DAEMONS
56 #define TRACE_DAEMON(x...) dprintf(x)
57 #else
58 #define TRACE_DAEMON(x...) do {} while (false)
59 #endif
60 
61 //#define TRACK_PAGE_USAGE_STATS	1
62 
63 #define PAGE_ASSERT(page, condition)	\
64 	ASSERT_PRINT((condition), "page: %p", (page))
65 
66 #define SCRUB_SIZE 32
67 	// this many pages will be cleared at once in the page scrubber thread
68 
69 #define MAX_PAGE_WRITER_IO_PRIORITY				B_URGENT_DISPLAY_PRIORITY
70 	// maximum I/O priority of the page writer
71 #define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD	10000
72 	// the maximum I/O priority shall be reached when this many pages need to
73 	// be written
74 
75 
76 // The page reserve an allocation of the certain priority must not touch.
77 static const size_t kPageReserveForPriority[] = {
78 	VM_PAGE_RESERVE_USER,		// user
79 	VM_PAGE_RESERVE_SYSTEM,		// system
80 	0							// VIP
81 };
82 
83 // Minimum number of free pages the page daemon will try to achieve.
84 static uint32 sFreePagesTarget;
85 static uint32 sFreeOrCachedPagesTarget;
86 static uint32 sInactivePagesTarget;
87 
88 // Wait interval between page daemon runs.
89 static const bigtime_t kIdleScanWaitInterval = 1000000LL;	// 1 sec
90 static const bigtime_t kBusyScanWaitInterval = 500000LL;	// 0.5 sec
91 
92 // Number of idle runs after which we want to have processed the full active
93 // queue.
94 static const uint32 kIdleRunsForFullQueue = 20;
95 
96 // Maximum limit for the vm_page::usage_count.
97 static const int32 kPageUsageMax = 64;
98 // vm_page::usage_count buff an accessed page receives in a scan.
99 static const int32 kPageUsageAdvance = 3;
100 // vm_page::usage_count debuff an unaccessed page receives in a scan.
101 static const int32 kPageUsageDecline = 1;
102 
103 int32 gMappedPagesCount;
104 
105 static VMPageQueue sPageQueues[PAGE_STATE_COUNT];
106 
107 static VMPageQueue& sFreePageQueue = sPageQueues[PAGE_STATE_FREE];
108 static VMPageQueue& sClearPageQueue = sPageQueues[PAGE_STATE_CLEAR];
109 static VMPageQueue& sModifiedPageQueue = sPageQueues[PAGE_STATE_MODIFIED];
110 static VMPageQueue& sInactivePageQueue = sPageQueues[PAGE_STATE_INACTIVE];
111 static VMPageQueue& sActivePageQueue = sPageQueues[PAGE_STATE_ACTIVE];
112 static VMPageQueue& sCachedPageQueue = sPageQueues[PAGE_STATE_CACHED];
113 
114 static vm_page *sPages;
115 static page_num_t sPhysicalPageOffset;
116 static page_num_t sNumPages;
117 static page_num_t sNonExistingPages;
118 	// pages in the sPages array that aren't backed by physical memory
119 static uint64 sIgnoredPages;
120 	// pages of physical memory ignored by the boot loader (and thus not
121 	// available here)
122 static int32 sUnreservedFreePages;
123 static int32 sUnsatisfiedPageReservations;
124 static int32 sModifiedTemporaryPages;
125 
126 static ConditionVariable sFreePageCondition;
127 static mutex sPageDeficitLock = MUTEX_INITIALIZER("page deficit");
128 
129 // This lock must be used whenever the free or clear page queues are changed.
130 // If you need to work on both queues at the same time, you need to hold a write
131 // lock, otherwise, a read lock suffices (each queue still has a spinlock to
132 // guard against concurrent changes).
133 static rw_lock sFreePageQueuesLock
134 	= RW_LOCK_INITIALIZER("free/clear page queues");
135 
136 #ifdef TRACK_PAGE_USAGE_STATS
137 static page_num_t sPageUsageArrays[512];
138 static page_num_t* sPageUsage = sPageUsageArrays;
139 static page_num_t sPageUsagePageCount;
140 static page_num_t* sNextPageUsage = sPageUsageArrays + 256;
141 static page_num_t sNextPageUsagePageCount;
142 #endif
143 
144 
145 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
146 
147 struct caller_info {
148 	addr_t		caller;
149 	size_t		count;
150 };
151 
152 static const int32 kCallerInfoTableSize = 1024;
153 static caller_info sCallerInfoTable[kCallerInfoTableSize];
154 static int32 sCallerInfoCount = 0;
155 
156 static caller_info* get_caller_info(addr_t caller);
157 
158 
159 RANGE_MARKER_FUNCTION_PROTOTYPES(vm_page)
160 
161 static const addr_t kVMPageCodeAddressRange[] = {
162 	RANGE_MARKER_FUNCTION_ADDRESS_RANGE(vm_page)
163 };
164 
165 #endif
166 
167 
168 RANGE_MARKER_FUNCTION_BEGIN(vm_page)
169 
170 
171 struct page_stats {
172 	int32	totalFreePages;
173 	int32	unsatisfiedReservations;
174 	int32	cachedPages;
175 };
176 
177 
178 struct PageReservationWaiter
179 		: public DoublyLinkedListLinkImpl<PageReservationWaiter> {
180 	Thread*	thread;
181 	uint32	dontTouch;		// reserve not to touch
182 	uint32	missing;		// pages missing for the reservation
183 	int32	threadPriority;
184 
185 	bool operator<(const PageReservationWaiter& other) const
186 	{
187 		// Implies an order by descending VM priority (ascending dontTouch)
188 		// and (secondarily) descending thread priority.
189 		if (dontTouch != other.dontTouch)
190 			return dontTouch < other.dontTouch;
191 		return threadPriority > other.threadPriority;
192 	}
193 };
194 
195 typedef DoublyLinkedList<PageReservationWaiter> PageReservationWaiterList;
196 static PageReservationWaiterList sPageReservationWaiters;
197 
198 
199 struct DaemonCondition {
200 	void Init(const char* name)
201 	{
202 		mutex_init(&fLock, "daemon condition");
203 		fCondition.Init(this, name);
204 		fActivated = false;
205 	}
206 
207 	bool Lock()
208 	{
209 		return mutex_lock(&fLock) == B_OK;
210 	}
211 
212 	void Unlock()
213 	{
214 		mutex_unlock(&fLock);
215 	}
216 
217 	bool Wait(bigtime_t timeout, bool clearActivated)
218 	{
219 		MutexLocker locker(fLock);
220 		if (clearActivated)
221 			fActivated = false;
222 		else if (fActivated)
223 			return true;
224 
225 		ConditionVariableEntry entry;
226 		fCondition.Add(&entry);
227 
228 		locker.Unlock();
229 
230 		return entry.Wait(B_RELATIVE_TIMEOUT, timeout) == B_OK;
231 	}
232 
233 	void WakeUp()
234 	{
235 		if (fActivated)
236 			return;
237 
238 		MutexLocker locker(fLock);
239 		fActivated = true;
240 		fCondition.NotifyOne();
241 	}
242 
243 	void ClearActivated()
244 	{
245 		MutexLocker locker(fLock);
246 		fActivated = false;
247 	}
248 
249 private:
250 	mutex				fLock;
251 	ConditionVariable	fCondition;
252 	bool				fActivated;
253 };
254 
255 
256 static DaemonCondition sPageWriterCondition;
257 static DaemonCondition sPageDaemonCondition;
258 
259 
260 #if PAGE_ALLOCATION_TRACING
261 
262 namespace PageAllocationTracing {
263 
264 class ReservePages : public AbstractTraceEntry {
265 public:
266 	ReservePages(uint32 count)
267 		:
268 		fCount(count)
269 	{
270 		Initialized();
271 	}
272 
273 	virtual void AddDump(TraceOutput& out)
274 	{
275 		out.Print("page reserve:   %" B_PRIu32, fCount);
276 	}
277 
278 private:
279 	uint32		fCount;
280 };
281 
282 
283 class UnreservePages : public AbstractTraceEntry {
284 public:
285 	UnreservePages(uint32 count)
286 		:
287 		fCount(count)
288 	{
289 		Initialized();
290 	}
291 
292 	virtual void AddDump(TraceOutput& out)
293 	{
294 		out.Print("page unreserve: %" B_PRId32, fCount);
295 	}
296 
297 private:
298 	uint32		fCount;
299 };
300 
301 
302 class AllocatePage
303 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
304 public:
305 	AllocatePage(page_num_t pageNumber)
306 		:
307 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
308 		fPageNumber(pageNumber)
309 	{
310 		Initialized();
311 	}
312 
313 	virtual void AddDump(TraceOutput& out)
314 	{
315 		out.Print("page alloc: %#" B_PRIxPHYSADDR, fPageNumber);
316 	}
317 
318 private:
319 	page_num_t	fPageNumber;
320 };
321 
322 
323 class AllocatePageRun
324 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
325 public:
326 	AllocatePageRun(page_num_t startPage, uint32 length)
327 		:
328 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
329 		fStartPage(startPage),
330 		fLength(length)
331 	{
332 		Initialized();
333 	}
334 
335 	virtual void AddDump(TraceOutput& out)
336 	{
337 		out.Print("page alloc run: start %#" B_PRIxPHYSADDR " length: %"
338 			B_PRIu32, fStartPage, fLength);
339 	}
340 
341 private:
342 	page_num_t	fStartPage;
343 	uint32		fLength;
344 };
345 
346 
347 class FreePage
348 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
349 public:
350 	FreePage(page_num_t pageNumber)
351 		:
352 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
353 		fPageNumber(pageNumber)
354 	{
355 		Initialized();
356 	}
357 
358 	virtual void AddDump(TraceOutput& out)
359 	{
360 		out.Print("page free: %#" B_PRIxPHYSADDR, fPageNumber);
361 	}
362 
363 private:
364 	page_num_t	fPageNumber;
365 };
366 
367 
368 class ScrubbingPages : public AbstractTraceEntry {
369 public:
370 	ScrubbingPages(uint32 count)
371 		:
372 		fCount(count)
373 	{
374 		Initialized();
375 	}
376 
377 	virtual void AddDump(TraceOutput& out)
378 	{
379 		out.Print("page scrubbing: %" B_PRId32, fCount);
380 	}
381 
382 private:
383 	uint32		fCount;
384 };
385 
386 
387 class ScrubbedPages : public AbstractTraceEntry {
388 public:
389 	ScrubbedPages(uint32 count)
390 		:
391 		fCount(count)
392 	{
393 		Initialized();
394 	}
395 
396 	virtual void AddDump(TraceOutput& out)
397 	{
398 		out.Print("page scrubbed:  %" B_PRId32, fCount);
399 	}
400 
401 private:
402 	uint32		fCount;
403 };
404 
405 
406 class StolenPage : public AbstractTraceEntry {
407 public:
408 	StolenPage()
409 	{
410 		Initialized();
411 	}
412 
413 	virtual void AddDump(TraceOutput& out)
414 	{
415 		out.Print("page stolen");
416 	}
417 };
418 
419 }	// namespace PageAllocationTracing
420 
421 #	define TA(x)	new(std::nothrow) PageAllocationTracing::x
422 
423 #else
424 #	define TA(x)
425 #endif	// PAGE_ALLOCATION_TRACING
426 
427 
428 #if PAGE_DAEMON_TRACING
429 
430 namespace PageDaemonTracing {
431 
432 class ActivatePage : public AbstractTraceEntry {
433 	public:
434 		ActivatePage(vm_page* page)
435 			:
436 			fCache(page->cache),
437 			fPage(page)
438 		{
439 			Initialized();
440 		}
441 
442 		virtual void AddDump(TraceOutput& out)
443 		{
444 			out.Print("page activated:   %p, cache: %p", fPage, fCache);
445 		}
446 
447 	private:
448 		VMCache*	fCache;
449 		vm_page*	fPage;
450 };
451 
452 
453 class DeactivatePage : public AbstractTraceEntry {
454 	public:
455 		DeactivatePage(vm_page* page)
456 			:
457 			fCache(page->cache),
458 			fPage(page)
459 		{
460 			Initialized();
461 		}
462 
463 		virtual void AddDump(TraceOutput& out)
464 		{
465 			out.Print("page deactivated: %p, cache: %p", fPage, fCache);
466 		}
467 
468 	private:
469 		VMCache*	fCache;
470 		vm_page*	fPage;
471 };
472 
473 
474 class FreedPageSwap : public AbstractTraceEntry {
475 	public:
476 		FreedPageSwap(vm_page* page)
477 			:
478 			fCache(page->cache),
479 			fPage(page)
480 		{
481 			Initialized();
482 		}
483 
484 		virtual void AddDump(TraceOutput& out)
485 		{
486 			out.Print("page swap freed:  %p, cache: %p", fPage, fCache);
487 		}
488 
489 	private:
490 		VMCache*	fCache;
491 		vm_page*	fPage;
492 };
493 
494 }	// namespace PageDaemonTracing
495 
496 #	define TD(x)	new(std::nothrow) PageDaemonTracing::x
497 
498 #else
499 #	define TD(x)
500 #endif	// PAGE_DAEMON_TRACING
501 
502 
503 #if PAGE_WRITER_TRACING
504 
505 namespace PageWriterTracing {
506 
507 class WritePage : public AbstractTraceEntry {
508 	public:
509 		WritePage(vm_page* page)
510 			:
511 			fCache(page->Cache()),
512 			fPage(page)
513 		{
514 			Initialized();
515 		}
516 
517 		virtual void AddDump(TraceOutput& out)
518 		{
519 			out.Print("page write: %p, cache: %p", fPage, fCache);
520 		}
521 
522 	private:
523 		VMCache*	fCache;
524 		vm_page*	fPage;
525 };
526 
527 }	// namespace PageWriterTracing
528 
529 #	define TPW(x)	new(std::nothrow) PageWriterTracing::x
530 
531 #else
532 #	define TPW(x)
533 #endif	// PAGE_WRITER_TRACING
534 
535 
536 #if PAGE_STATE_TRACING
537 
538 namespace PageStateTracing {
539 
540 class SetPageState : public AbstractTraceEntry {
541 	public:
542 		SetPageState(vm_page* page, uint8 newState)
543 			:
544 			fPage(page),
545 			fOldState(page->State()),
546 			fNewState(newState),
547 			fBusy(page->busy),
548 			fWired(page->WiredCount() > 0),
549 			fMapped(!page->mappings.IsEmpty()),
550 			fAccessed(page->accessed),
551 			fModified(page->modified)
552 		{
553 #if PAGE_STATE_TRACING_STACK_TRACE
554 			fStackTrace = capture_tracing_stack_trace(
555 				PAGE_STATE_TRACING_STACK_TRACE, 0, true);
556 				// Don't capture userland stack trace to avoid potential
557 				// deadlocks.
558 #endif
559 			Initialized();
560 		}
561 
562 #if PAGE_STATE_TRACING_STACK_TRACE
563 		virtual void DumpStackTrace(TraceOutput& out)
564 		{
565 			out.PrintStackTrace(fStackTrace);
566 		}
567 #endif
568 
569 		virtual void AddDump(TraceOutput& out)
570 		{
571 			out.Print("page set state: %p (%c%c%c%c%c): %s -> %s", fPage,
572 				fBusy ? 'b' : '-',
573 				fWired ? 'w' : '-',
574 				fMapped ? 'm' : '-',
575 				fAccessed ? 'a' : '-',
576 				fModified ? 'm' : '-',
577 				page_state_to_string(fOldState),
578 				page_state_to_string(fNewState));
579 		}
580 
581 	private:
582 		vm_page*	fPage;
583 #if PAGE_STATE_TRACING_STACK_TRACE
584 		tracing_stack_trace* fStackTrace;
585 #endif
586 		uint8		fOldState;
587 		uint8		fNewState;
588 		bool		fBusy : 1;
589 		bool		fWired : 1;
590 		bool		fMapped : 1;
591 		bool		fAccessed : 1;
592 		bool		fModified : 1;
593 };
594 
595 }	// namespace PageStateTracing
596 
597 #	define TPS(x)	new(std::nothrow) PageStateTracing::x
598 
599 #else
600 #	define TPS(x)
601 #endif	// PAGE_STATE_TRACING
602 
603 
604 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
605 
606 namespace BKernel {
607 
608 class AllocationTrackingCallback {
609 public:
610 	virtual						~AllocationTrackingCallback();
611 
612 	virtual	bool				ProcessTrackingInfo(
613 									AllocationTrackingInfo* info,
614 									page_num_t pageNumber) = 0;
615 };
616 
617 }
618 
619 using BKernel::AllocationTrackingCallback;
620 
621 
622 class AllocationCollectorCallback : public AllocationTrackingCallback {
623 public:
624 	AllocationCollectorCallback(bool resetInfos)
625 		:
626 		fResetInfos(resetInfos)
627 	{
628 	}
629 
630 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
631 		page_num_t pageNumber)
632 	{
633 		if (!info->IsInitialized())
634 			return true;
635 
636 		addr_t caller = 0;
637 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
638 
639 		if (traceEntry != NULL && info->IsTraceEntryValid()) {
640 			caller = tracing_find_caller_in_stack_trace(
641 				traceEntry->StackTrace(), kVMPageCodeAddressRange, 1);
642 		}
643 
644 		caller_info* callerInfo = get_caller_info(caller);
645 		if (callerInfo == NULL) {
646 			kprintf("out of space for caller infos\n");
647 			return false;
648 		}
649 
650 		callerInfo->count++;
651 
652 		if (fResetInfos)
653 			info->Clear();
654 
655 		return true;
656 	}
657 
658 private:
659 	bool	fResetInfos;
660 };
661 
662 
663 class AllocationInfoPrinterCallback : public AllocationTrackingCallback {
664 public:
665 	AllocationInfoPrinterCallback(bool printStackTrace, page_num_t pageFilter,
666 		team_id teamFilter, thread_id threadFilter)
667 		:
668 		fPrintStackTrace(printStackTrace),
669 		fPageFilter(pageFilter),
670 		fTeamFilter(teamFilter),
671 		fThreadFilter(threadFilter)
672 	{
673 	}
674 
675 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
676 		page_num_t pageNumber)
677 	{
678 		if (!info->IsInitialized())
679 			return true;
680 
681 		if (fPageFilter != 0 && pageNumber != fPageFilter)
682 			return true;
683 
684 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
685 		if (traceEntry != NULL && !info->IsTraceEntryValid())
686 			traceEntry = NULL;
687 
688 		if (traceEntry != NULL) {
689 			if (fTeamFilter != -1 && traceEntry->TeamID() != fTeamFilter)
690 				return true;
691 			if (fThreadFilter != -1 && traceEntry->ThreadID() != fThreadFilter)
692 				return true;
693 		} else {
694 			// we need the info if we have filters set
695 			if (fTeamFilter != -1 || fThreadFilter != -1)
696 				return true;
697 		}
698 
699 		kprintf("page number %#" B_PRIxPHYSADDR, pageNumber);
700 
701 		if (traceEntry != NULL) {
702 			kprintf(", team: %" B_PRId32 ", thread %" B_PRId32
703 				", time %" B_PRId64 "\n", traceEntry->TeamID(),
704 				traceEntry->ThreadID(), traceEntry->Time());
705 
706 			if (fPrintStackTrace)
707 				tracing_print_stack_trace(traceEntry->StackTrace());
708 		} else
709 			kprintf("\n");
710 
711 		return true;
712 	}
713 
714 private:
715 	bool		fPrintStackTrace;
716 	page_num_t	fPageFilter;
717 	team_id		fTeamFilter;
718 	thread_id	fThreadFilter;
719 };
720 
721 
722 class AllocationDetailPrinterCallback : public AllocationTrackingCallback {
723 public:
724 	AllocationDetailPrinterCallback(addr_t caller)
725 		:
726 		fCaller(caller)
727 	{
728 	}
729 
730 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
731 		page_num_t pageNumber)
732 	{
733 		if (!info->IsInitialized())
734 			return true;
735 
736 		addr_t caller = 0;
737 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
738 		if (traceEntry != NULL && !info->IsTraceEntryValid())
739 			traceEntry = NULL;
740 
741 		if (traceEntry != NULL) {
742 			caller = tracing_find_caller_in_stack_trace(
743 				traceEntry->StackTrace(), kVMPageCodeAddressRange, 1);
744 		}
745 
746 		if (caller != fCaller)
747 			return true;
748 
749 		kprintf("page %#" B_PRIxPHYSADDR "\n", pageNumber);
750 		if (traceEntry != NULL)
751 			tracing_print_stack_trace(traceEntry->StackTrace());
752 
753 		return true;
754 	}
755 
756 private:
757 	addr_t	fCaller;
758 };
759 
760 #endif	// VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
761 
762 
763 static void
764 list_page(vm_page* page)
765 {
766 	kprintf("0x%08" B_PRIxADDR " ",
767 		(addr_t)(page->physical_page_number * B_PAGE_SIZE));
768 	switch (page->State()) {
769 		case PAGE_STATE_ACTIVE:   kprintf("A"); break;
770 		case PAGE_STATE_INACTIVE: kprintf("I"); break;
771 		case PAGE_STATE_MODIFIED: kprintf("M"); break;
772 		case PAGE_STATE_CACHED:   kprintf("C"); break;
773 		case PAGE_STATE_FREE:     kprintf("F"); break;
774 		case PAGE_STATE_CLEAR:    kprintf("L"); break;
775 		case PAGE_STATE_WIRED:    kprintf("W"); break;
776 		case PAGE_STATE_UNUSED:   kprintf("-"); break;
777 	}
778 	kprintf(" ");
779 	if (page->busy)         kprintf("B"); else kprintf("-");
780 	if (page->busy_writing) kprintf("W"); else kprintf("-");
781 	if (page->accessed)     kprintf("A"); else kprintf("-");
782 	if (page->modified)     kprintf("M"); else kprintf("-");
783 	if (page->unused)       kprintf("U"); else kprintf("-");
784 
785 	kprintf(" usage:%3u", page->usage_count);
786 	kprintf(" wired:%5u", page->WiredCount());
787 
788 	bool first = true;
789 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
790 	vm_page_mapping* mapping;
791 	while ((mapping = iterator.Next()) != NULL) {
792 		if (first) {
793 			kprintf(": ");
794 			first = false;
795 		} else
796 			kprintf(", ");
797 
798 		kprintf("%" B_PRId32 " (%s)", mapping->area->id, mapping->area->name);
799 		mapping = mapping->page_link.next;
800 	}
801 }
802 
803 
804 static int
805 dump_page_list(int argc, char **argv)
806 {
807 	kprintf("page table:\n");
808 	for (page_num_t i = 0; i < sNumPages; i++) {
809 		if (sPages[i].State() != PAGE_STATE_UNUSED) {
810 			list_page(&sPages[i]);
811 			kprintf("\n");
812 		}
813 	}
814 	kprintf("end of page table\n");
815 
816 	return 0;
817 }
818 
819 
820 static int
821 find_page(int argc, char **argv)
822 {
823 	struct vm_page *page;
824 	addr_t address;
825 	int32 index = 1;
826 	int i;
827 
828 	struct {
829 		const char*	name;
830 		VMPageQueue*	queue;
831 	} pageQueueInfos[] = {
832 		{ "free",		&sFreePageQueue },
833 		{ "clear",		&sClearPageQueue },
834 		{ "modified",	&sModifiedPageQueue },
835 		{ "active",		&sActivePageQueue },
836 		{ "inactive",	&sInactivePageQueue },
837 		{ "cached",		&sCachedPageQueue },
838 		{ NULL, NULL }
839 	};
840 
841 	if (argc < 2
842 		|| strlen(argv[index]) <= 2
843 		|| argv[index][0] != '0'
844 		|| argv[index][1] != 'x') {
845 		kprintf("usage: find_page <address>\n");
846 		return 0;
847 	}
848 
849 	address = strtoul(argv[index], NULL, 0);
850 	page = (vm_page*)address;
851 
852 	for (i = 0; pageQueueInfos[i].name; i++) {
853 		VMPageQueue::Iterator it = pageQueueInfos[i].queue->GetIterator();
854 		while (vm_page* p = it.Next()) {
855 			if (p == page) {
856 				kprintf("found page %p in queue %p (%s)\n", page,
857 					pageQueueInfos[i].queue, pageQueueInfos[i].name);
858 				return 0;
859 			}
860 		}
861 	}
862 
863 	kprintf("page %p isn't in any queue\n", page);
864 
865 	return 0;
866 }
867 
868 
869 const char *
870 page_state_to_string(int state)
871 {
872 	switch(state) {
873 		case PAGE_STATE_ACTIVE:
874 			return "active";
875 		case PAGE_STATE_INACTIVE:
876 			return "inactive";
877 		case PAGE_STATE_MODIFIED:
878 			return "modified";
879 		case PAGE_STATE_CACHED:
880 			return "cached";
881 		case PAGE_STATE_FREE:
882 			return "free";
883 		case PAGE_STATE_CLEAR:
884 			return "clear";
885 		case PAGE_STATE_WIRED:
886 			return "wired";
887 		case PAGE_STATE_UNUSED:
888 			return "unused";
889 		default:
890 			return "unknown";
891 	}
892 }
893 
894 
895 static int
896 dump_page_long(int argc, char **argv)
897 {
898 	bool addressIsPointer = true;
899 	bool physical = false;
900 	bool searchMappings = false;
901 	int32 index = 1;
902 
903 	while (index < argc) {
904 		if (argv[index][0] != '-')
905 			break;
906 
907 		if (!strcmp(argv[index], "-p")) {
908 			addressIsPointer = false;
909 			physical = true;
910 		} else if (!strcmp(argv[index], "-v")) {
911 			addressIsPointer = false;
912 		} else if (!strcmp(argv[index], "-m")) {
913 			searchMappings = true;
914 		} else {
915 			print_debugger_command_usage(argv[0]);
916 			return 0;
917 		}
918 
919 		index++;
920 	}
921 
922 	if (index + 1 != argc) {
923 		print_debugger_command_usage(argv[0]);
924 		return 0;
925 	}
926 
927 	uint64 value;
928 	if (!evaluate_debug_expression(argv[index], &value, false))
929 		return 0;
930 
931 	uint64 pageAddress = value;
932 	struct vm_page* page;
933 
934 	if (addressIsPointer) {
935 		page = (struct vm_page *)(addr_t)pageAddress;
936 	} else {
937 		if (!physical) {
938 			VMAddressSpace *addressSpace = VMAddressSpace::Kernel();
939 
940 			if (debug_get_debugged_thread()->team->address_space != NULL)
941 				addressSpace = debug_get_debugged_thread()->team->address_space;
942 
943 			uint32 flags = 0;
944 			phys_addr_t physicalAddress;
945 			if (addressSpace->TranslationMap()->QueryInterrupt(pageAddress,
946 					&physicalAddress, &flags) != B_OK
947 				|| (flags & PAGE_PRESENT) == 0) {
948 				kprintf("Virtual address not mapped to a physical page in this "
949 					"address space.\n");
950 				return 0;
951 			}
952 			pageAddress = physicalAddress;
953 		}
954 
955 		page = vm_lookup_page(pageAddress / B_PAGE_SIZE);
956 	}
957 
958 	kprintf("PAGE: %p\n", page);
959 	kprintf("queue_next,prev: %p, %p\n", page->queue_link.next,
960 		page->queue_link.previous);
961 	kprintf("physical_number: %#" B_PRIxPHYSADDR "\n",
962 		page->physical_page_number);
963 	kprintf("cache:           %p\n", page->Cache());
964 	kprintf("cache_offset:    %" B_PRIuPHYSADDR "\n", page->cache_offset);
965 	kprintf("cache_next:      %p\n", page->cache_next);
966 	kprintf("state:           %s\n", page_state_to_string(page->State()));
967 	kprintf("wired_count:     %d\n", page->WiredCount());
968 	kprintf("usage_count:     %d\n", page->usage_count);
969 	kprintf("busy:            %d\n", page->busy);
970 	kprintf("busy_writing:    %d\n", page->busy_writing);
971 	kprintf("accessed:        %d\n", page->accessed);
972 	kprintf("modified:        %d\n", page->modified);
973 	#if DEBUG_PAGE_QUEUE
974 		kprintf("queue:           %p\n", page->queue);
975 	#endif
976 	#if DEBUG_PAGE_ACCESS
977 		kprintf("accessor:        %" B_PRId32 "\n", page->accessing_thread);
978 	#endif
979 	kprintf("area mappings:\n");
980 
981 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
982 	vm_page_mapping *mapping;
983 	while ((mapping = iterator.Next()) != NULL) {
984 		kprintf("  %p (%" B_PRId32 ")\n", mapping->area, mapping->area->id);
985 		mapping = mapping->page_link.next;
986 	}
987 
988 	if (searchMappings) {
989 		kprintf("all mappings:\n");
990 		VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
991 		while (addressSpace != NULL) {
992 			size_t pageCount = addressSpace->Size() / B_PAGE_SIZE;
993 			for (addr_t address = addressSpace->Base(); pageCount != 0;
994 					address += B_PAGE_SIZE, pageCount--) {
995 				phys_addr_t physicalAddress;
996 				uint32 flags = 0;
997 				if (addressSpace->TranslationMap()->QueryInterrupt(address,
998 						&physicalAddress, &flags) == B_OK
999 					&& (flags & PAGE_PRESENT) != 0
1000 					&& physicalAddress / B_PAGE_SIZE
1001 						== page->physical_page_number) {
1002 					VMArea* area = addressSpace->LookupArea(address);
1003 					kprintf("  aspace %" B_PRId32 ", area %" B_PRId32 ": %#"
1004 						B_PRIxADDR " (%c%c%s%s)\n", addressSpace->ID(),
1005 						area != NULL ? area->id : -1, address,
1006 						(flags & B_KERNEL_READ_AREA) != 0 ? 'r' : '-',
1007 						(flags & B_KERNEL_WRITE_AREA) != 0 ? 'w' : '-',
1008 						(flags & PAGE_MODIFIED) != 0 ? " modified" : "",
1009 						(flags & PAGE_ACCESSED) != 0 ? " accessed" : "");
1010 				}
1011 			}
1012 			addressSpace = VMAddressSpace::DebugNext(addressSpace);
1013 		}
1014 	}
1015 
1016 	set_debug_variable("_cache", (addr_t)page->Cache());
1017 	#if DEBUG_PAGE_ACCESS
1018 		set_debug_variable("_accessor", page->accessing_thread);
1019 	#endif
1020 
1021 	return 0;
1022 }
1023 
1024 
1025 static int
1026 dump_page_queue(int argc, char **argv)
1027 {
1028 	struct VMPageQueue *queue;
1029 
1030 	if (argc < 2) {
1031 		kprintf("usage: page_queue <address/name> [list]\n");
1032 		return 0;
1033 	}
1034 
1035 	if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x')
1036 		queue = (VMPageQueue*)strtoul(argv[1], NULL, 16);
1037 	else if (!strcmp(argv[1], "free"))
1038 		queue = &sFreePageQueue;
1039 	else if (!strcmp(argv[1], "clear"))
1040 		queue = &sClearPageQueue;
1041 	else if (!strcmp(argv[1], "modified"))
1042 		queue = &sModifiedPageQueue;
1043 	else if (!strcmp(argv[1], "active"))
1044 		queue = &sActivePageQueue;
1045 	else if (!strcmp(argv[1], "inactive"))
1046 		queue = &sInactivePageQueue;
1047 	else if (!strcmp(argv[1], "cached"))
1048 		queue = &sCachedPageQueue;
1049 	else {
1050 		kprintf("page_queue: unknown queue \"%s\".\n", argv[1]);
1051 		return 0;
1052 	}
1053 
1054 	kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %"
1055 		B_PRIuPHYSADDR "\n", queue, queue->Head(), queue->Tail(),
1056 		queue->Count());
1057 
1058 	if (argc == 3) {
1059 		struct vm_page *page = queue->Head();
1060 
1061 		kprintf("page        cache       type       state  wired  usage\n");
1062 		for (page_num_t i = 0; page; i++, page = queue->Next(page)) {
1063 			kprintf("%p  %p  %-7s %8s  %5d  %5d\n", page, page->Cache(),
1064 				vm_cache_type_to_string(page->Cache()->type),
1065 				page_state_to_string(page->State()),
1066 				page->WiredCount(), page->usage_count);
1067 		}
1068 	}
1069 	return 0;
1070 }
1071 
1072 
1073 static int
1074 dump_page_stats(int argc, char **argv)
1075 {
1076 	page_num_t swappableModified = 0;
1077 	page_num_t swappableModifiedInactive = 0;
1078 
1079 	size_t counter[8];
1080 	size_t busyCounter[8];
1081 	memset(counter, 0, sizeof(counter));
1082 	memset(busyCounter, 0, sizeof(busyCounter));
1083 
1084 	struct page_run {
1085 		page_num_t	start;
1086 		page_num_t	end;
1087 
1088 		page_num_t Length() const	{ return end - start; }
1089 	};
1090 
1091 	page_run currentFreeRun = { 0, 0 };
1092 	page_run currentCachedRun = { 0, 0 };
1093 	page_run longestFreeRun = { 0, 0 };
1094 	page_run longestCachedRun = { 0, 0 };
1095 
1096 	for (page_num_t i = 0; i < sNumPages; i++) {
1097 		if (sPages[i].State() > 7) {
1098 			panic("page %" B_PRIuPHYSADDR " at %p has invalid state!\n", i,
1099 				&sPages[i]);
1100 		}
1101 
1102 		uint32 pageState = sPages[i].State();
1103 
1104 		counter[pageState]++;
1105 		if (sPages[i].busy)
1106 			busyCounter[pageState]++;
1107 
1108 		if (pageState == PAGE_STATE_MODIFIED
1109 			&& sPages[i].Cache() != NULL
1110 			&& sPages[i].Cache()->temporary && sPages[i].WiredCount() == 0) {
1111 			swappableModified++;
1112 			if (sPages[i].usage_count == 0)
1113 				swappableModifiedInactive++;
1114 		}
1115 
1116 		// track free and cached pages runs
1117 		if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) {
1118 			currentFreeRun.end = i + 1;
1119 			currentCachedRun.end = i + 1;
1120 		} else {
1121 			if (currentFreeRun.Length() > longestFreeRun.Length())
1122 				longestFreeRun = currentFreeRun;
1123 			currentFreeRun.start = currentFreeRun.end = i + 1;
1124 
1125 			if (pageState == PAGE_STATE_CACHED) {
1126 				currentCachedRun.end = i + 1;
1127 			} else {
1128 				if (currentCachedRun.Length() > longestCachedRun.Length())
1129 					longestCachedRun = currentCachedRun;
1130 				currentCachedRun.start = currentCachedRun.end = i + 1;
1131 			}
1132 		}
1133 	}
1134 
1135 	kprintf("page stats:\n");
1136 	kprintf("total: %" B_PRIuPHYSADDR "\n", sNumPages);
1137 
1138 	kprintf("active: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1139 		counter[PAGE_STATE_ACTIVE], busyCounter[PAGE_STATE_ACTIVE]);
1140 	kprintf("inactive: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1141 		counter[PAGE_STATE_INACTIVE], busyCounter[PAGE_STATE_INACTIVE]);
1142 	kprintf("cached: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1143 		counter[PAGE_STATE_CACHED], busyCounter[PAGE_STATE_CACHED]);
1144 	kprintf("unused: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1145 		counter[PAGE_STATE_UNUSED], busyCounter[PAGE_STATE_UNUSED]);
1146 	kprintf("wired: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1147 		counter[PAGE_STATE_WIRED], busyCounter[PAGE_STATE_WIRED]);
1148 	kprintf("modified: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1149 		counter[PAGE_STATE_MODIFIED], busyCounter[PAGE_STATE_MODIFIED]);
1150 	kprintf("free: %" B_PRIuSIZE "\n", counter[PAGE_STATE_FREE]);
1151 	kprintf("clear: %" B_PRIuSIZE "\n", counter[PAGE_STATE_CLEAR]);
1152 
1153 	kprintf("unreserved free pages: %" B_PRId32 "\n", sUnreservedFreePages);
1154 	kprintf("unsatisfied page reservations: %" B_PRId32 "\n",
1155 		sUnsatisfiedPageReservations);
1156 	kprintf("mapped pages: %" B_PRId32 "\n", gMappedPagesCount);
1157 	kprintf("longest free pages run: %" B_PRIuPHYSADDR " pages (at %"
1158 		B_PRIuPHYSADDR ")\n", longestFreeRun.Length(),
1159 		sPages[longestFreeRun.start].physical_page_number);
1160 	kprintf("longest free/cached pages run: %" B_PRIuPHYSADDR " pages (at %"
1161 		B_PRIuPHYSADDR ")\n", longestCachedRun.Length(),
1162 		sPages[longestCachedRun.start].physical_page_number);
1163 
1164 	kprintf("waiting threads:\n");
1165 	for (PageReservationWaiterList::Iterator it
1166 			= sPageReservationWaiters.GetIterator();
1167 		PageReservationWaiter* waiter = it.Next();) {
1168 		kprintf("  %6" B_PRId32 ": missing: %6" B_PRIu32
1169 			", don't touch: %6" B_PRIu32 "\n", waiter->thread->id,
1170 			waiter->missing, waiter->dontTouch);
1171 	}
1172 
1173 	kprintf("\nfree queue: %p, count = %" B_PRIuPHYSADDR "\n", &sFreePageQueue,
1174 		sFreePageQueue.Count());
1175 	kprintf("clear queue: %p, count = %" B_PRIuPHYSADDR "\n", &sClearPageQueue,
1176 		sClearPageQueue.Count());
1177 	kprintf("modified queue: %p, count = %" B_PRIuPHYSADDR " (%" B_PRId32
1178 		" temporary, %" B_PRIuPHYSADDR " swappable, " "inactive: %"
1179 		B_PRIuPHYSADDR ")\n", &sModifiedPageQueue, sModifiedPageQueue.Count(),
1180 		sModifiedTemporaryPages, swappableModified, swappableModifiedInactive);
1181 	kprintf("active queue: %p, count = %" B_PRIuPHYSADDR "\n",
1182 		&sActivePageQueue, sActivePageQueue.Count());
1183 	kprintf("inactive queue: %p, count = %" B_PRIuPHYSADDR "\n",
1184 		&sInactivePageQueue, sInactivePageQueue.Count());
1185 	kprintf("cached queue: %p, count = %" B_PRIuPHYSADDR "\n",
1186 		&sCachedPageQueue, sCachedPageQueue.Count());
1187 	return 0;
1188 }
1189 
1190 
1191 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1192 
1193 static caller_info*
1194 get_caller_info(addr_t caller)
1195 {
1196 	// find the caller info
1197 	for (int32 i = 0; i < sCallerInfoCount; i++) {
1198 		if (caller == sCallerInfoTable[i].caller)
1199 			return &sCallerInfoTable[i];
1200 	}
1201 
1202 	// not found, add a new entry, if there are free slots
1203 	if (sCallerInfoCount >= kCallerInfoTableSize)
1204 		return NULL;
1205 
1206 	caller_info* info = &sCallerInfoTable[sCallerInfoCount++];
1207 	info->caller = caller;
1208 	info->count = 0;
1209 
1210 	return info;
1211 }
1212 
1213 
1214 static int
1215 caller_info_compare_count(const void* _a, const void* _b)
1216 {
1217 	const caller_info* a = (const caller_info*)_a;
1218 	const caller_info* b = (const caller_info*)_b;
1219 	return (int)(b->count - a->count);
1220 }
1221 
1222 
1223 static int
1224 dump_page_allocations_per_caller(int argc, char** argv)
1225 {
1226 	bool resetAllocationInfos = false;
1227 	bool printDetails = false;
1228 	addr_t caller = 0;
1229 
1230 	for (int32 i = 1; i < argc; i++) {
1231 		if (strcmp(argv[i], "-d") == 0) {
1232 			uint64 callerAddress;
1233 			if (++i >= argc
1234 				|| !evaluate_debug_expression(argv[i], &callerAddress, true)) {
1235 				print_debugger_command_usage(argv[0]);
1236 				return 0;
1237 			}
1238 
1239 			caller = callerAddress;
1240 			printDetails = true;
1241 		} else if (strcmp(argv[i], "-r") == 0) {
1242 			resetAllocationInfos = true;
1243 		} else {
1244 			print_debugger_command_usage(argv[0]);
1245 			return 0;
1246 		}
1247 	}
1248 
1249 	sCallerInfoCount = 0;
1250 
1251 	AllocationCollectorCallback collectorCallback(resetAllocationInfos);
1252 	AllocationDetailPrinterCallback detailsCallback(caller);
1253 	AllocationTrackingCallback& callback = printDetails
1254 		? (AllocationTrackingCallback&)detailsCallback
1255 		: (AllocationTrackingCallback&)collectorCallback;
1256 
1257 	for (page_num_t i = 0; i < sNumPages; i++)
1258 		callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i);
1259 
1260 	if (printDetails)
1261 		return 0;
1262 
1263 	// sort the array
1264 	qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info),
1265 		&caller_info_compare_count);
1266 
1267 	kprintf("%" B_PRId32 " different callers\n\n", sCallerInfoCount);
1268 
1269 	size_t totalAllocationCount = 0;
1270 
1271 	kprintf("     count      caller\n");
1272 	kprintf("----------------------------------\n");
1273 	for (int32 i = 0; i < sCallerInfoCount; i++) {
1274 		caller_info& info = sCallerInfoTable[i];
1275 		kprintf("%10" B_PRIuSIZE "  %p", info.count, (void*)info.caller);
1276 
1277 		const char* symbol;
1278 		const char* imageName;
1279 		bool exactMatch;
1280 		addr_t baseAddress;
1281 
1282 		if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol,
1283 				&imageName, &exactMatch) == B_OK) {
1284 			kprintf("  %s + %#" B_PRIxADDR " (%s)%s\n", symbol,
1285 				info.caller - baseAddress, imageName,
1286 				exactMatch ? "" : " (nearest)");
1287 		} else
1288 			kprintf("\n");
1289 
1290 		totalAllocationCount += info.count;
1291 	}
1292 
1293 	kprintf("\ntotal page allocations: %" B_PRIuSIZE "\n",
1294 		totalAllocationCount);
1295 
1296 	return 0;
1297 }
1298 
1299 
1300 static int
1301 dump_page_allocation_infos(int argc, char** argv)
1302 {
1303 	page_num_t pageFilter = 0;
1304 	team_id teamFilter = -1;
1305 	thread_id threadFilter = -1;
1306 	bool printStackTraces = false;
1307 
1308 	for (int32 i = 1; i < argc; i++) {
1309 		if (strcmp(argv[i], "--stacktrace") == 0)
1310 			printStackTraces = true;
1311 		else if (strcmp(argv[i], "-p") == 0) {
1312 			uint64 pageNumber;
1313 			if (++i >= argc
1314 				|| !evaluate_debug_expression(argv[i], &pageNumber, true)) {
1315 				print_debugger_command_usage(argv[0]);
1316 				return 0;
1317 			}
1318 
1319 			pageFilter = pageNumber;
1320 		} else if (strcmp(argv[i], "--team") == 0) {
1321 			uint64 team;
1322 			if (++i >= argc
1323 				|| !evaluate_debug_expression(argv[i], &team, true)) {
1324 				print_debugger_command_usage(argv[0]);
1325 				return 0;
1326 			}
1327 
1328 			teamFilter = team;
1329 		} else if (strcmp(argv[i], "--thread") == 0) {
1330 			uint64 thread;
1331 			if (++i >= argc
1332 				|| !evaluate_debug_expression(argv[i], &thread, true)) {
1333 				print_debugger_command_usage(argv[0]);
1334 				return 0;
1335 			}
1336 
1337 			threadFilter = thread;
1338 		} else {
1339 			print_debugger_command_usage(argv[0]);
1340 			return 0;
1341 		}
1342 	}
1343 
1344 	AllocationInfoPrinterCallback callback(printStackTraces, pageFilter,
1345 		teamFilter, threadFilter);
1346 
1347 	for (page_num_t i = 0; i < sNumPages; i++)
1348 		callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i);
1349 
1350 	return 0;
1351 }
1352 
1353 #endif	// VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1354 
1355 
1356 #ifdef TRACK_PAGE_USAGE_STATS
1357 
1358 static void
1359 track_page_usage(vm_page* page)
1360 {
1361 	if (page->WiredCount() == 0) {
1362 		sNextPageUsage[(int32)page->usage_count + 128]++;
1363 		sNextPageUsagePageCount++;
1364 	}
1365 }
1366 
1367 
1368 static void
1369 update_page_usage_stats()
1370 {
1371 	std::swap(sPageUsage, sNextPageUsage);
1372 	sPageUsagePageCount = sNextPageUsagePageCount;
1373 
1374 	memset(sNextPageUsage, 0, sizeof(page_num_t) * 256);
1375 	sNextPageUsagePageCount = 0;
1376 
1377 	// compute average
1378 	if (sPageUsagePageCount > 0) {
1379 		int64 sum = 0;
1380 		for (int32 i = 0; i < 256; i++)
1381 			sum += (int64)sPageUsage[i] * (i - 128);
1382 
1383 		TRACE_DAEMON("average page usage: %f (%lu pages)\n",
1384 			(float)sum / sPageUsagePageCount, sPageUsagePageCount);
1385 	}
1386 }
1387 
1388 
1389 static int
1390 dump_page_usage_stats(int argc, char** argv)
1391 {
1392 	kprintf("distribution of page usage counts (%lu pages):",
1393 		sPageUsagePageCount);
1394 
1395 	int64 sum = 0;
1396 	for (int32 i = 0; i < 256; i++) {
1397 		if (i % 8 == 0)
1398 			kprintf("\n%4ld:", i - 128);
1399 
1400 		int64 count = sPageUsage[i];
1401 		sum += count * (i - 128);
1402 
1403 		kprintf("  %9llu", count);
1404 	}
1405 
1406 	kprintf("\n\n");
1407 
1408 	kprintf("average usage count: %f\n",
1409 		sPageUsagePageCount > 0 ? (float)sum / sPageUsagePageCount : 0);
1410 
1411 	return 0;
1412 }
1413 
1414 #endif	// TRACK_PAGE_USAGE_STATS
1415 
1416 
1417 // #pragma mark - vm_page
1418 
1419 
1420 inline void
1421 vm_page::InitState(uint8 newState)
1422 {
1423 	state = newState;
1424 }
1425 
1426 
1427 inline void
1428 vm_page::SetState(uint8 newState)
1429 {
1430 	TPS(SetPageState(this, newState));
1431 
1432 	state = newState;
1433 }
1434 
1435 
1436 // #pragma mark -
1437 
1438 
1439 static void
1440 get_page_stats(page_stats& _pageStats)
1441 {
1442 	_pageStats.totalFreePages = sUnreservedFreePages;
1443 	_pageStats.cachedPages = sCachedPageQueue.Count();
1444 	_pageStats.unsatisfiedReservations = sUnsatisfiedPageReservations;
1445 	// TODO: We don't get an actual snapshot here!
1446 }
1447 
1448 
1449 static bool
1450 do_active_paging(const page_stats& pageStats)
1451 {
1452 	return pageStats.totalFreePages + pageStats.cachedPages
1453 		< pageStats.unsatisfiedReservations
1454 			+ (int32)sFreeOrCachedPagesTarget;
1455 }
1456 
1457 
1458 /*!	Reserves as many pages as possible from \c sUnreservedFreePages up to
1459 	\a count. Doesn't touch the last \a dontTouch pages of
1460 	\c sUnreservedFreePages, though.
1461 	\return The number of actually reserved pages.
1462 */
1463 static uint32
1464 reserve_some_pages(uint32 count, uint32 dontTouch)
1465 {
1466 	while (true) {
1467 		int32 freePages = atomic_get(&sUnreservedFreePages);
1468 		if (freePages <= (int32)dontTouch)
1469 			return 0;
1470 
1471 		int32 toReserve = std::min(count, freePages - dontTouch);
1472 		if (atomic_test_and_set(&sUnreservedFreePages,
1473 					freePages - toReserve, freePages)
1474 				== freePages) {
1475 			return toReserve;
1476 		}
1477 
1478 		// the count changed in the meantime -- retry
1479 	}
1480 }
1481 
1482 
1483 static void
1484 wake_up_page_reservation_waiters()
1485 {
1486 	MutexLocker pageDeficitLocker(sPageDeficitLock);
1487 
1488 	// TODO: If this is a low priority thread, we might want to disable
1489 	// interrupts or otherwise ensure that we aren't unscheduled. Otherwise
1490 	// high priority threads wait be kept waiting while a medium priority thread
1491 	// prevents us from running.
1492 
1493 	while (PageReservationWaiter* waiter = sPageReservationWaiters.Head()) {
1494 		int32 reserved = reserve_some_pages(waiter->missing,
1495 			waiter->dontTouch);
1496 		if (reserved == 0)
1497 			return;
1498 
1499 		atomic_add(&sUnsatisfiedPageReservations, -reserved);
1500 		waiter->missing -= reserved;
1501 
1502 		if (waiter->missing > 0)
1503 			return;
1504 
1505 		sPageReservationWaiters.Remove(waiter);
1506 
1507 		thread_unblock(waiter->thread, B_OK);
1508 	}
1509 }
1510 
1511 
1512 static inline void
1513 unreserve_pages(uint32 count)
1514 {
1515 	atomic_add(&sUnreservedFreePages, count);
1516 	if (atomic_get(&sUnsatisfiedPageReservations) != 0)
1517 		wake_up_page_reservation_waiters();
1518 }
1519 
1520 
1521 static void
1522 free_page(vm_page* page, bool clear)
1523 {
1524 	DEBUG_PAGE_ACCESS_CHECK(page);
1525 
1526 	PAGE_ASSERT(page, !page->IsMapped());
1527 
1528 	VMPageQueue* fromQueue;
1529 
1530 	switch (page->State()) {
1531 		case PAGE_STATE_ACTIVE:
1532 			fromQueue = &sActivePageQueue;
1533 			break;
1534 		case PAGE_STATE_INACTIVE:
1535 			fromQueue = &sInactivePageQueue;
1536 			break;
1537 		case PAGE_STATE_MODIFIED:
1538 			fromQueue = &sModifiedPageQueue;
1539 			break;
1540 		case PAGE_STATE_CACHED:
1541 			fromQueue = &sCachedPageQueue;
1542 			break;
1543 		case PAGE_STATE_FREE:
1544 		case PAGE_STATE_CLEAR:
1545 			panic("free_page(): page %p already free", page);
1546 			return;
1547 		case PAGE_STATE_WIRED:
1548 		case PAGE_STATE_UNUSED:
1549 			fromQueue = NULL;
1550 			break;
1551 		default:
1552 			panic("free_page(): page %p in invalid state %d",
1553 				page, page->State());
1554 			return;
1555 	}
1556 
1557 	if (page->CacheRef() != NULL)
1558 		panic("to be freed page %p has cache", page);
1559 	if (page->IsMapped())
1560 		panic("to be freed page %p has mappings", page);
1561 
1562 	if (fromQueue != NULL)
1563 		fromQueue->RemoveUnlocked(page);
1564 
1565 	TA(FreePage(page->physical_page_number));
1566 
1567 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1568 	page->allocation_tracking_info.Clear();
1569 #endif
1570 
1571 	ReadLocker locker(sFreePageQueuesLock);
1572 
1573 	DEBUG_PAGE_ACCESS_END(page);
1574 
1575 	if (clear) {
1576 		page->SetState(PAGE_STATE_CLEAR);
1577 		sClearPageQueue.PrependUnlocked(page);
1578 	} else {
1579 		page->SetState(PAGE_STATE_FREE);
1580 		sFreePageQueue.PrependUnlocked(page);
1581 		sFreePageCondition.NotifyAll();
1582 	}
1583 
1584 	locker.Unlock();
1585 }
1586 
1587 
1588 /*!	The caller must make sure that no-one else tries to change the page's state
1589 	while the function is called. If the page has a cache, this can be done by
1590 	locking the cache.
1591 */
1592 static void
1593 set_page_state(vm_page *page, int pageState)
1594 {
1595 	DEBUG_PAGE_ACCESS_CHECK(page);
1596 
1597 	if (pageState == page->State())
1598 		return;
1599 
1600 	VMPageQueue* fromQueue;
1601 
1602 	switch (page->State()) {
1603 		case PAGE_STATE_ACTIVE:
1604 			fromQueue = &sActivePageQueue;
1605 			break;
1606 		case PAGE_STATE_INACTIVE:
1607 			fromQueue = &sInactivePageQueue;
1608 			break;
1609 		case PAGE_STATE_MODIFIED:
1610 			fromQueue = &sModifiedPageQueue;
1611 			break;
1612 		case PAGE_STATE_CACHED:
1613 			fromQueue = &sCachedPageQueue;
1614 			break;
1615 		case PAGE_STATE_FREE:
1616 		case PAGE_STATE_CLEAR:
1617 			panic("set_page_state(): page %p is free/clear", page);
1618 			return;
1619 		case PAGE_STATE_WIRED:
1620 		case PAGE_STATE_UNUSED:
1621 			fromQueue = NULL;
1622 			break;
1623 		default:
1624 			panic("set_page_state(): page %p in invalid state %d",
1625 				page, page->State());
1626 			return;
1627 	}
1628 
1629 	VMPageQueue* toQueue;
1630 
1631 	switch (pageState) {
1632 		case PAGE_STATE_ACTIVE:
1633 			toQueue = &sActivePageQueue;
1634 			break;
1635 		case PAGE_STATE_INACTIVE:
1636 			toQueue = &sInactivePageQueue;
1637 			break;
1638 		case PAGE_STATE_MODIFIED:
1639 			toQueue = &sModifiedPageQueue;
1640 			break;
1641 		case PAGE_STATE_CACHED:
1642 			PAGE_ASSERT(page, !page->IsMapped());
1643 			PAGE_ASSERT(page, !page->modified);
1644 			toQueue = &sCachedPageQueue;
1645 			break;
1646 		case PAGE_STATE_FREE:
1647 		case PAGE_STATE_CLEAR:
1648 			panic("set_page_state(): target state is free/clear");
1649 			return;
1650 		case PAGE_STATE_WIRED:
1651 		case PAGE_STATE_UNUSED:
1652 			toQueue = NULL;
1653 			break;
1654 		default:
1655 			panic("set_page_state(): invalid target state %d", pageState);
1656 			return;
1657 	}
1658 
1659 	VMCache* cache = page->Cache();
1660 	if (cache != NULL && cache->temporary) {
1661 		if (pageState == PAGE_STATE_MODIFIED)
1662 			atomic_add(&sModifiedTemporaryPages, 1);
1663 		else if (page->State() == PAGE_STATE_MODIFIED)
1664 			atomic_add(&sModifiedTemporaryPages, -1);
1665 	}
1666 
1667 	// move the page
1668 	if (toQueue == fromQueue) {
1669 		// Note: Theoretically we are required to lock when changing the page
1670 		// state, even if we don't change the queue. We actually don't have to
1671 		// do this, though, since only for the active queue there are different
1672 		// page states and active pages have a cache that must be locked at
1673 		// this point. So we rely on the fact that everyone must lock the cache
1674 		// before trying to change/interpret the page state.
1675 		PAGE_ASSERT(page, cache != NULL);
1676 		cache->AssertLocked();
1677 		page->SetState(pageState);
1678 	} else {
1679 		if (fromQueue != NULL)
1680 			fromQueue->RemoveUnlocked(page);
1681 
1682 		page->SetState(pageState);
1683 
1684 		if (toQueue != NULL)
1685 			toQueue->AppendUnlocked(page);
1686 	}
1687 }
1688 
1689 
1690 /*! Moves a previously modified page into a now appropriate queue.
1691 	The page queues must not be locked.
1692 */
1693 static void
1694 move_page_to_appropriate_queue(vm_page *page)
1695 {
1696 	DEBUG_PAGE_ACCESS_CHECK(page);
1697 
1698 	// Note, this logic must be in sync with what the page daemon does.
1699 	int32 state;
1700 	if (page->IsMapped())
1701 		state = PAGE_STATE_ACTIVE;
1702 	else if (page->modified)
1703 		state = PAGE_STATE_MODIFIED;
1704 	else
1705 		state = PAGE_STATE_CACHED;
1706 
1707 // TODO: If free + cached pages are low, we might directly want to free the
1708 // page.
1709 	set_page_state(page, state);
1710 }
1711 
1712 
1713 static void
1714 clear_page(struct vm_page *page)
1715 {
1716 	vm_memset_physical(page->physical_page_number << PAGE_SHIFT, 0,
1717 		B_PAGE_SIZE);
1718 }
1719 
1720 
1721 static status_t
1722 mark_page_range_in_use(page_num_t startPage, page_num_t length, bool wired)
1723 {
1724 	TRACE(("mark_page_range_in_use: start %#" B_PRIxPHYSADDR ", len %#"
1725 		B_PRIxPHYSADDR "\n", startPage, length));
1726 
1727 	if (sPhysicalPageOffset > startPage) {
1728 		dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR
1729 			"): start page is before free list\n", startPage, length);
1730 		if (sPhysicalPageOffset - startPage >= length)
1731 			return B_OK;
1732 		length -= sPhysicalPageOffset - startPage;
1733 		startPage = sPhysicalPageOffset;
1734 	}
1735 
1736 	startPage -= sPhysicalPageOffset;
1737 
1738 	if (startPage + length > sNumPages) {
1739 		dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR
1740 			"): range would extend past free list\n", startPage, length);
1741 		if (startPage >= sNumPages)
1742 			return B_OK;
1743 		length = sNumPages - startPage;
1744 	}
1745 
1746 	WriteLocker locker(sFreePageQueuesLock);
1747 
1748 	for (page_num_t i = 0; i < length; i++) {
1749 		vm_page *page = &sPages[startPage + i];
1750 		switch (page->State()) {
1751 			case PAGE_STATE_FREE:
1752 			case PAGE_STATE_CLEAR:
1753 			{
1754 // TODO: This violates the page reservation policy, since we remove pages from
1755 // the free/clear queues without having reserved them before. This should happen
1756 // in the early boot process only, though.
1757 				DEBUG_PAGE_ACCESS_START(page);
1758 				VMPageQueue& queue = page->State() == PAGE_STATE_FREE
1759 					? sFreePageQueue : sClearPageQueue;
1760 				queue.Remove(page);
1761 				page->SetState(wired ? PAGE_STATE_WIRED : PAGE_STATE_UNUSED);
1762 				page->busy = false;
1763 				atomic_add(&sUnreservedFreePages, -1);
1764 				DEBUG_PAGE_ACCESS_END(page);
1765 				break;
1766 			}
1767 			case PAGE_STATE_WIRED:
1768 			case PAGE_STATE_UNUSED:
1769 				break;
1770 			case PAGE_STATE_ACTIVE:
1771 			case PAGE_STATE_INACTIVE:
1772 			case PAGE_STATE_MODIFIED:
1773 			case PAGE_STATE_CACHED:
1774 			default:
1775 				// uh
1776 				dprintf("mark_page_range_in_use: page %#" B_PRIxPHYSADDR
1777 					" in non-free state %d!\n", startPage + i, page->State());
1778 				break;
1779 		}
1780 	}
1781 
1782 	return B_OK;
1783 }
1784 
1785 
1786 /*!
1787 	This is a background thread that wakes up when its condition is notified
1788 	and moves some pages from the free queue over to the clear queue.
1789 	Given enough time, it will clear out all pages from the free queue - we
1790 	could probably slow it down after having reached a certain threshold.
1791 */
1792 static int32
1793 page_scrubber(void *unused)
1794 {
1795 	(void)(unused);
1796 
1797 	TRACE(("page_scrubber starting...\n"));
1798 
1799 	ConditionVariableEntry entry;
1800 	for (;;) {
1801 		while (sFreePageQueue.Count() == 0
1802 				|| atomic_get(&sUnreservedFreePages)
1803 					< (int32)sFreePagesTarget) {
1804 			sFreePageCondition.Add(&entry);
1805 			entry.Wait();
1806 		}
1807 
1808 		// Since we temporarily remove pages from the free pages reserve,
1809 		// we must make sure we don't cause a violation of the page
1810 		// reservation warranty. The following is usually stricter than
1811 		// necessary, because we don't have information on how many of the
1812 		// reserved pages have already been allocated.
1813 		int32 reserved = reserve_some_pages(SCRUB_SIZE,
1814 			kPageReserveForPriority[VM_PRIORITY_USER]);
1815 		if (reserved == 0)
1816 			continue;
1817 
1818 		// get some pages from the free queue, mostly sorted
1819 		ReadLocker locker(sFreePageQueuesLock);
1820 
1821 		vm_page *page[SCRUB_SIZE];
1822 		int32 scrubCount = 0;
1823 		for (int32 i = 0; i < reserved; i++) {
1824 			page[i] = sFreePageQueue.RemoveHeadUnlocked();
1825 			if (page[i] == NULL)
1826 				break;
1827 
1828 			DEBUG_PAGE_ACCESS_START(page[i]);
1829 
1830 			page[i]->SetState(PAGE_STATE_ACTIVE);
1831 			page[i]->busy = true;
1832 			scrubCount++;
1833 		}
1834 
1835 		locker.Unlock();
1836 
1837 		if (scrubCount == 0) {
1838 			unreserve_pages(reserved);
1839 			continue;
1840 		}
1841 
1842 		TA(ScrubbingPages(scrubCount));
1843 
1844 		// clear them
1845 		for (int32 i = 0; i < scrubCount; i++)
1846 			clear_page(page[i]);
1847 
1848 		locker.Lock();
1849 
1850 		// and put them into the clear queue
1851 		// process the array reversed when prepending to preserve sequential order
1852 		for (int32 i = scrubCount - 1; i >= 0; i--) {
1853 			page[i]->SetState(PAGE_STATE_CLEAR);
1854 			page[i]->busy = false;
1855 			DEBUG_PAGE_ACCESS_END(page[i]);
1856 			sClearPageQueue.PrependUnlocked(page[i]);
1857 		}
1858 
1859 		locker.Unlock();
1860 
1861 		unreserve_pages(reserved);
1862 
1863 		TA(ScrubbedPages(scrubCount));
1864 
1865 		// wait at least 100ms between runs
1866 		snooze(100 * 1000);
1867 	}
1868 
1869 	return 0;
1870 }
1871 
1872 
1873 static void
1874 init_page_marker(vm_page &marker)
1875 {
1876 	marker.SetCacheRef(NULL);
1877 	marker.InitState(PAGE_STATE_UNUSED);
1878 	marker.busy = true;
1879 #if DEBUG_PAGE_QUEUE
1880 	marker.queue = NULL;
1881 #endif
1882 #if DEBUG_PAGE_ACCESS
1883 	marker.accessing_thread = thread_get_current_thread_id();
1884 #endif
1885 }
1886 
1887 
1888 static void
1889 remove_page_marker(struct vm_page &marker)
1890 {
1891 	DEBUG_PAGE_ACCESS_CHECK(&marker);
1892 
1893 	if (marker.State() < PAGE_STATE_FIRST_UNQUEUED)
1894 		sPageQueues[marker.State()].RemoveUnlocked(&marker);
1895 
1896 	marker.SetState(PAGE_STATE_UNUSED);
1897 }
1898 
1899 
1900 static vm_page*
1901 next_modified_page(page_num_t& maxPagesToSee)
1902 {
1903 	InterruptsSpinLocker locker(sModifiedPageQueue.GetLock());
1904 
1905 	while (maxPagesToSee > 0) {
1906 		vm_page* page = sModifiedPageQueue.Head();
1907 		if (page == NULL)
1908 			return NULL;
1909 
1910 		sModifiedPageQueue.Requeue(page, true);
1911 
1912 		maxPagesToSee--;
1913 
1914 		if (!page->busy)
1915 			return page;
1916 	}
1917 
1918 	return NULL;
1919 }
1920 
1921 
1922 // #pragma mark -
1923 
1924 
1925 class PageWriteTransfer;
1926 class PageWriteWrapper;
1927 
1928 
1929 class PageWriterRun {
1930 public:
1931 	status_t Init(uint32 maxPages);
1932 
1933 	void PrepareNextRun();
1934 	void AddPage(vm_page* page);
1935 	uint32 Go();
1936 
1937 	void PageWritten(PageWriteTransfer* transfer, status_t status,
1938 		bool partialTransfer, size_t bytesTransferred);
1939 
1940 private:
1941 	uint32				fMaxPages;
1942 	uint32				fWrapperCount;
1943 	uint32				fTransferCount;
1944 	int32				fPendingTransfers;
1945 	PageWriteWrapper*	fWrappers;
1946 	PageWriteTransfer*	fTransfers;
1947 	ConditionVariable	fAllFinishedCondition;
1948 };
1949 
1950 
1951 class PageWriteTransfer : public AsyncIOCallback {
1952 public:
1953 	void SetTo(PageWriterRun* run, vm_page* page, int32 maxPages);
1954 	bool AddPage(vm_page* page);
1955 
1956 	status_t Schedule(uint32 flags);
1957 
1958 	void SetStatus(status_t status, size_t transferred);
1959 
1960 	status_t Status() const	{ return fStatus; }
1961 	struct VMCache* Cache() const { return fCache; }
1962 	uint32 PageCount() const { return fPageCount; }
1963 
1964 	virtual void IOFinished(status_t status, bool partialTransfer,
1965 		generic_size_t bytesTransferred);
1966 private:
1967 	PageWriterRun*		fRun;
1968 	struct VMCache*		fCache;
1969 	off_t				fOffset;
1970 	uint32				fPageCount;
1971 	int32				fMaxPages;
1972 	status_t			fStatus;
1973 	uint32				fVecCount;
1974 	generic_io_vec		fVecs[32]; // TODO: make dynamic/configurable
1975 };
1976 
1977 
1978 class PageWriteWrapper {
1979 public:
1980 	PageWriteWrapper();
1981 	~PageWriteWrapper();
1982 	void SetTo(vm_page* page);
1983 	bool Done(status_t result);
1984 
1985 private:
1986 	vm_page*			fPage;
1987 	struct VMCache*		fCache;
1988 	bool				fIsActive;
1989 };
1990 
1991 
1992 PageWriteWrapper::PageWriteWrapper()
1993 	:
1994 	fIsActive(false)
1995 {
1996 }
1997 
1998 
1999 PageWriteWrapper::~PageWriteWrapper()
2000 {
2001 	if (fIsActive)
2002 		panic("page write wrapper going out of scope but isn't completed");
2003 }
2004 
2005 
2006 /*!	The page's cache must be locked.
2007 */
2008 void
2009 PageWriteWrapper::SetTo(vm_page* page)
2010 {
2011 	DEBUG_PAGE_ACCESS_CHECK(page);
2012 
2013 	if (page->busy)
2014 		panic("setting page write wrapper to busy page");
2015 
2016 	if (fIsActive)
2017 		panic("re-setting page write wrapper that isn't completed");
2018 
2019 	fPage = page;
2020 	fCache = page->Cache();
2021 	fIsActive = true;
2022 
2023 	fPage->busy = true;
2024 	fPage->busy_writing = true;
2025 
2026 	// We have a modified page -- however, while we're writing it back,
2027 	// the page might still be mapped. In order not to lose any changes to the
2028 	// page, we mark it clean before actually writing it back; if
2029 	// writing the page fails for some reason, we'll just keep it in the
2030 	// modified page list, but that should happen only rarely.
2031 
2032 	// If the page is changed after we cleared the dirty flag, but before we
2033 	// had the chance to write it back, then we'll write it again later -- that
2034 	// will probably not happen that often, though.
2035 
2036 	vm_clear_map_flags(fPage, PAGE_MODIFIED);
2037 }
2038 
2039 
2040 /*!	The page's cache must be locked.
2041 	The page queues must not be locked.
2042 	\return \c true if the page was written successfully respectively could be
2043 		handled somehow, \c false otherwise.
2044 */
2045 bool
2046 PageWriteWrapper::Done(status_t result)
2047 {
2048 	if (!fIsActive)
2049 		panic("completing page write wrapper that is not active");
2050 
2051 	DEBUG_PAGE_ACCESS_START(fPage);
2052 
2053 	fPage->busy = false;
2054 		// Set unbusy and notify later by hand, since we might free the page.
2055 
2056 	bool success = true;
2057 
2058 	if (result == B_OK) {
2059 		// put it into the active/inactive queue
2060 		move_page_to_appropriate_queue(fPage);
2061 		fPage->busy_writing = false;
2062 		DEBUG_PAGE_ACCESS_END(fPage);
2063 	} else {
2064 		// Writing the page failed. One reason would be that the cache has been
2065 		// shrunk and the page does no longer belong to the file. Otherwise the
2066 		// actual I/O failed, in which case we'll simply keep the page modified.
2067 
2068 		if (!fPage->busy_writing) {
2069 			// The busy_writing flag was cleared. That means the cache has been
2070 			// shrunk while we were trying to write the page and we have to free
2071 			// it now.
2072 			vm_remove_all_page_mappings(fPage);
2073 // TODO: Unmapping should already happen when resizing the cache!
2074 			fCache->RemovePage(fPage);
2075 			free_page(fPage, false);
2076 			unreserve_pages(1);
2077 		} else {
2078 			// Writing the page failed -- mark the page modified and move it to
2079 			// an appropriate queue other than the modified queue, so we don't
2080 			// keep trying to write it over and over again. We keep
2081 			// non-temporary pages in the modified queue, though, so they don't
2082 			// get lost in the inactive queue.
2083 			dprintf("PageWriteWrapper: Failed to write page %p: %s\n", fPage,
2084 				strerror(result));
2085 
2086 			fPage->modified = true;
2087 			if (!fCache->temporary)
2088 				set_page_state(fPage, PAGE_STATE_MODIFIED);
2089 			else if (fPage->IsMapped())
2090 				set_page_state(fPage, PAGE_STATE_ACTIVE);
2091 			else
2092 				set_page_state(fPage, PAGE_STATE_INACTIVE);
2093 
2094 			fPage->busy_writing = false;
2095 			DEBUG_PAGE_ACCESS_END(fPage);
2096 
2097 			success = false;
2098 		}
2099 	}
2100 
2101 	fCache->NotifyPageEvents(fPage, PAGE_EVENT_NOT_BUSY);
2102 	fIsActive = false;
2103 
2104 	return success;
2105 }
2106 
2107 
2108 /*!	The page's cache must be locked.
2109 */
2110 void
2111 PageWriteTransfer::SetTo(PageWriterRun* run, vm_page* page, int32 maxPages)
2112 {
2113 	fRun = run;
2114 	fCache = page->Cache();
2115 	fOffset = page->cache_offset;
2116 	fPageCount = 1;
2117 	fMaxPages = maxPages;
2118 	fStatus = B_OK;
2119 
2120 	fVecs[0].base = (phys_addr_t)page->physical_page_number << PAGE_SHIFT;
2121 	fVecs[0].length = B_PAGE_SIZE;
2122 	fVecCount = 1;
2123 }
2124 
2125 
2126 /*!	The page's cache must be locked.
2127 */
2128 bool
2129 PageWriteTransfer::AddPage(vm_page* page)
2130 {
2131 	if (page->Cache() != fCache
2132 		|| (fMaxPages >= 0 && fPageCount >= (uint32)fMaxPages))
2133 		return false;
2134 
2135 	phys_addr_t nextBase = fVecs[fVecCount - 1].base
2136 		+ fVecs[fVecCount - 1].length;
2137 
2138 	if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase
2139 		&& (off_t)page->cache_offset == fOffset + fPageCount) {
2140 		// append to last iovec
2141 		fVecs[fVecCount - 1].length += B_PAGE_SIZE;
2142 		fPageCount++;
2143 		return true;
2144 	}
2145 
2146 	nextBase = fVecs[0].base - B_PAGE_SIZE;
2147 	if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase
2148 		&& (off_t)page->cache_offset == fOffset - 1) {
2149 		// prepend to first iovec and adjust offset
2150 		fVecs[0].base = nextBase;
2151 		fVecs[0].length += B_PAGE_SIZE;
2152 		fOffset = page->cache_offset;
2153 		fPageCount++;
2154 		return true;
2155 	}
2156 
2157 	if (((off_t)page->cache_offset == fOffset + fPageCount
2158 			|| (off_t)page->cache_offset == fOffset - 1)
2159 		&& fVecCount < sizeof(fVecs) / sizeof(fVecs[0])) {
2160 		// not physically contiguous or not in the right order
2161 		uint32 vectorIndex;
2162 		if ((off_t)page->cache_offset < fOffset) {
2163 			// we are pre-pending another vector, move the other vecs
2164 			for (uint32 i = fVecCount; i > 0; i--)
2165 				fVecs[i] = fVecs[i - 1];
2166 
2167 			fOffset = page->cache_offset;
2168 			vectorIndex = 0;
2169 		} else
2170 			vectorIndex = fVecCount;
2171 
2172 		fVecs[vectorIndex].base
2173 			= (phys_addr_t)page->physical_page_number << PAGE_SHIFT;
2174 		fVecs[vectorIndex].length = B_PAGE_SIZE;
2175 
2176 		fVecCount++;
2177 		fPageCount++;
2178 		return true;
2179 	}
2180 
2181 	return false;
2182 }
2183 
2184 
2185 status_t
2186 PageWriteTransfer::Schedule(uint32 flags)
2187 {
2188 	off_t writeOffset = (off_t)fOffset << PAGE_SHIFT;
2189 	generic_size_t writeLength = (phys_size_t)fPageCount << PAGE_SHIFT;
2190 
2191 	if (fRun != NULL) {
2192 		return fCache->WriteAsync(writeOffset, fVecs, fVecCount, writeLength,
2193 			flags | B_PHYSICAL_IO_REQUEST, this);
2194 	}
2195 
2196 	status_t status = fCache->Write(writeOffset, fVecs, fVecCount,
2197 		flags | B_PHYSICAL_IO_REQUEST, &writeLength);
2198 
2199 	SetStatus(status, writeLength);
2200 	return fStatus;
2201 }
2202 
2203 
2204 void
2205 PageWriteTransfer::SetStatus(status_t status, size_t transferred)
2206 {
2207 	// only succeed if all pages up to the last one have been written fully
2208 	// and the last page has at least been written partially
2209 	if (status == B_OK && transferred <= (fPageCount - 1) * B_PAGE_SIZE)
2210 		status = B_ERROR;
2211 
2212 	fStatus = status;
2213 }
2214 
2215 
2216 void
2217 PageWriteTransfer::IOFinished(status_t status, bool partialTransfer,
2218 	generic_size_t bytesTransferred)
2219 {
2220 	SetStatus(status, bytesTransferred);
2221 	fRun->PageWritten(this, fStatus, partialTransfer, bytesTransferred);
2222 }
2223 
2224 
2225 status_t
2226 PageWriterRun::Init(uint32 maxPages)
2227 {
2228 	fMaxPages = maxPages;
2229 	fWrapperCount = 0;
2230 	fTransferCount = 0;
2231 	fPendingTransfers = 0;
2232 
2233 	fWrappers = new(std::nothrow) PageWriteWrapper[maxPages];
2234 	fTransfers = new(std::nothrow) PageWriteTransfer[maxPages];
2235 	if (fWrappers == NULL || fTransfers == NULL)
2236 		return B_NO_MEMORY;
2237 
2238 	return B_OK;
2239 }
2240 
2241 
2242 void
2243 PageWriterRun::PrepareNextRun()
2244 {
2245 	fWrapperCount = 0;
2246 	fTransferCount = 0;
2247 	fPendingTransfers = 0;
2248 }
2249 
2250 
2251 /*!	The page's cache must be locked.
2252 */
2253 void
2254 PageWriterRun::AddPage(vm_page* page)
2255 {
2256 	fWrappers[fWrapperCount++].SetTo(page);
2257 
2258 	if (fTransferCount == 0 || !fTransfers[fTransferCount - 1].AddPage(page)) {
2259 		fTransfers[fTransferCount++].SetTo(this, page,
2260 			page->Cache()->MaxPagesPerAsyncWrite());
2261 	}
2262 }
2263 
2264 
2265 /*!	Writes all pages previously added.
2266 	\return The number of pages that could not be written or otherwise handled.
2267 */
2268 uint32
2269 PageWriterRun::Go()
2270 {
2271 	atomic_set(&fPendingTransfers, fTransferCount);
2272 
2273 	fAllFinishedCondition.Init(this, "page writer wait for I/O");
2274 	ConditionVariableEntry waitEntry;
2275 	fAllFinishedCondition.Add(&waitEntry);
2276 
2277 	// schedule writes
2278 	for (uint32 i = 0; i < fTransferCount; i++)
2279 		fTransfers[i].Schedule(B_VIP_IO_REQUEST);
2280 
2281 	// wait until all pages have been written
2282 	waitEntry.Wait();
2283 
2284 	// mark pages depending on whether they could be written or not
2285 
2286 	uint32 failedPages = 0;
2287 	uint32 wrapperIndex = 0;
2288 	for (uint32 i = 0; i < fTransferCount; i++) {
2289 		PageWriteTransfer& transfer = fTransfers[i];
2290 		transfer.Cache()->Lock();
2291 
2292 		for (uint32 j = 0; j < transfer.PageCount(); j++) {
2293 			if (!fWrappers[wrapperIndex++].Done(transfer.Status()))
2294 				failedPages++;
2295 		}
2296 
2297 		transfer.Cache()->Unlock();
2298 	}
2299 
2300 	ASSERT(wrapperIndex == fWrapperCount);
2301 
2302 	for (uint32 i = 0; i < fTransferCount; i++) {
2303 		PageWriteTransfer& transfer = fTransfers[i];
2304 		struct VMCache* cache = transfer.Cache();
2305 
2306 		// We've acquired a references for each page
2307 		for (uint32 j = 0; j < transfer.PageCount(); j++) {
2308 			// We release the cache references after all pages were made
2309 			// unbusy again - otherwise releasing a vnode could deadlock.
2310 			cache->ReleaseStoreRef();
2311 			cache->ReleaseRef();
2312 		}
2313 	}
2314 
2315 	return failedPages;
2316 }
2317 
2318 
2319 void
2320 PageWriterRun::PageWritten(PageWriteTransfer* transfer, status_t status,
2321 	bool partialTransfer, size_t bytesTransferred)
2322 {
2323 	if (atomic_add(&fPendingTransfers, -1) == 1)
2324 		fAllFinishedCondition.NotifyAll();
2325 }
2326 
2327 
2328 /*!	The page writer continuously takes some pages from the modified
2329 	queue, writes them back, and moves them back to the active queue.
2330 	It runs in its own thread, and is only there to keep the number
2331 	of modified pages low, so that more pages can be reused with
2332 	fewer costs.
2333 */
2334 status_t
2335 page_writer(void* /*unused*/)
2336 {
2337 	const uint32 kNumPages = 256;
2338 #ifdef TRACE_VM_PAGE
2339 	uint32 writtenPages = 0;
2340 	bigtime_t lastWrittenTime = 0;
2341 	bigtime_t pageCollectionTime = 0;
2342 	bigtime_t pageWritingTime = 0;
2343 #endif
2344 
2345 	PageWriterRun run;
2346 	if (run.Init(kNumPages) != B_OK) {
2347 		panic("page writer: Failed to init PageWriterRun!");
2348 		return B_ERROR;
2349 	}
2350 
2351 	page_num_t pagesSinceLastSuccessfulWrite = 0;
2352 
2353 	while (true) {
2354 // TODO: Maybe wait shorter when memory is low!
2355 		if (sModifiedPageQueue.Count() < kNumPages) {
2356 			sPageWriterCondition.Wait(3000000, true);
2357 				// all 3 seconds when no one triggers us
2358 		}
2359 
2360 		page_num_t modifiedPages = sModifiedPageQueue.Count();
2361 		if (modifiedPages == 0)
2362 			continue;
2363 
2364 		if (modifiedPages <= pagesSinceLastSuccessfulWrite) {
2365 			// We ran through the whole queue without being able to write a
2366 			// single page. Take a break.
2367 			snooze(500000);
2368 			pagesSinceLastSuccessfulWrite = 0;
2369 		}
2370 
2371 #if ENABLE_SWAP_SUPPORT
2372 		page_stats pageStats;
2373 		get_page_stats(pageStats);
2374 		bool activePaging = do_active_paging(pageStats);
2375 #endif
2376 
2377 		// depending on how urgent it becomes to get pages to disk, we adjust
2378 		// our I/O priority
2379 		uint32 lowPagesState = low_resource_state(B_KERNEL_RESOURCE_PAGES);
2380 		int32 ioPriority = B_IDLE_PRIORITY;
2381 		if (lowPagesState >= B_LOW_RESOURCE_CRITICAL
2382 			|| modifiedPages > MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD) {
2383 			ioPriority = MAX_PAGE_WRITER_IO_PRIORITY;
2384 		} else {
2385 			ioPriority = (uint64)MAX_PAGE_WRITER_IO_PRIORITY * modifiedPages
2386 				/ MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD;
2387 		}
2388 
2389 		thread_set_io_priority(ioPriority);
2390 
2391 		uint32 numPages = 0;
2392 		run.PrepareNextRun();
2393 
2394 		// TODO: make this laptop friendly, too (ie. only start doing
2395 		// something if someone else did something or there is really
2396 		// enough to do).
2397 
2398 		// collect pages to be written
2399 #ifdef TRACE_VM_PAGE
2400 		pageCollectionTime -= system_time();
2401 #endif
2402 
2403 		page_num_t maxPagesToSee = modifiedPages;
2404 
2405 		while (numPages < kNumPages && maxPagesToSee > 0) {
2406 			vm_page *page = next_modified_page(maxPagesToSee);
2407 			if (page == NULL)
2408 				break;
2409 
2410 			PageCacheLocker cacheLocker(page, false);
2411 			if (!cacheLocker.IsLocked())
2412 				continue;
2413 
2414 			VMCache *cache = page->Cache();
2415 
2416 			// If the page is busy or its state has changed while we were
2417 			// locking the cache, just ignore it.
2418 			if (page->busy || page->State() != PAGE_STATE_MODIFIED)
2419 				continue;
2420 
2421 			DEBUG_PAGE_ACCESS_START(page);
2422 
2423 			// Don't write back wired (locked) pages.
2424 			if (page->WiredCount() > 0) {
2425 				set_page_state(page, PAGE_STATE_ACTIVE);
2426 				DEBUG_PAGE_ACCESS_END(page);
2427 				continue;
2428 			}
2429 
2430 			// Write back temporary pages only when we're actively paging.
2431 			if (cache->temporary
2432 #if ENABLE_SWAP_SUPPORT
2433 				&& (!activePaging
2434 					|| !cache->CanWritePage(
2435 							(off_t)page->cache_offset << PAGE_SHIFT))
2436 #endif
2437 				) {
2438 				// We can't/don't want to do anything with this page, so move it
2439 				// to one of the other queues.
2440 				if (page->mappings.IsEmpty())
2441 					set_page_state(page, PAGE_STATE_INACTIVE);
2442 				else
2443 					set_page_state(page, PAGE_STATE_ACTIVE);
2444 
2445 				DEBUG_PAGE_ACCESS_END(page);
2446 				continue;
2447 			}
2448 
2449 			// We need our own reference to the store, as it might currently be
2450 			// destroyed.
2451 			if (cache->AcquireUnreferencedStoreRef() != B_OK) {
2452 				DEBUG_PAGE_ACCESS_END(page);
2453 				cacheLocker.Unlock();
2454 				thread_yield();
2455 				continue;
2456 			}
2457 
2458 			run.AddPage(page);
2459 				// TODO: We're possibly adding pages of different caches and
2460 				// thus maybe of different underlying file systems here. This
2461 				// is a potential problem for loop file systems/devices, since
2462 				// we could mark a page busy that would need to be accessed
2463 				// when writing back another page, thus causing a deadlock.
2464 
2465 			DEBUG_PAGE_ACCESS_END(page);
2466 
2467 			//dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count);
2468 			TPW(WritePage(page));
2469 
2470 			cache->AcquireRefLocked();
2471 			numPages++;
2472 		}
2473 
2474 #ifdef TRACE_VM_PAGE
2475 		pageCollectionTime += system_time();
2476 #endif
2477 		if (numPages == 0)
2478 			continue;
2479 
2480 		// write pages to disk and do all the cleanup
2481 #ifdef TRACE_VM_PAGE
2482 		pageWritingTime -= system_time();
2483 #endif
2484 		uint32 failedPages = run.Go();
2485 #ifdef TRACE_VM_PAGE
2486 		pageWritingTime += system_time();
2487 
2488 		// debug output only...
2489 		writtenPages += numPages;
2490 		if (writtenPages >= 1024) {
2491 			bigtime_t now = system_time();
2492 			TRACE(("page writer: wrote 1024 pages (total: %" B_PRIu64 " ms, "
2493 				"collect: %" B_PRIu64 " ms, write: %" B_PRIu64 " ms)\n",
2494 				(now - lastWrittenTime) / 1000,
2495 				pageCollectionTime / 1000, pageWritingTime / 1000));
2496 			lastWrittenTime = now;
2497 
2498 			writtenPages -= 1024;
2499 			pageCollectionTime = 0;
2500 			pageWritingTime = 0;
2501 		}
2502 #endif
2503 
2504 		if (failedPages == numPages)
2505 			pagesSinceLastSuccessfulWrite += modifiedPages - maxPagesToSee;
2506 		else
2507 			pagesSinceLastSuccessfulWrite = 0;
2508 	}
2509 
2510 	return B_OK;
2511 }
2512 
2513 
2514 // #pragma mark -
2515 
2516 
2517 // TODO: This should be done in the page daemon!
2518 #if 0
2519 #if ENABLE_SWAP_SUPPORT
2520 static bool
2521 free_page_swap_space(int32 index)
2522 {
2523 	vm_page *page = vm_page_at_index(index);
2524 	PageCacheLocker locker(page);
2525 	if (!locker.IsLocked())
2526 		return false;
2527 
2528 	DEBUG_PAGE_ACCESS_START(page);
2529 
2530 	VMCache* cache = page->Cache();
2531 	if (cache->temporary && page->WiredCount() == 0
2532 			&& cache->HasPage(page->cache_offset << PAGE_SHIFT)
2533 			&& page->usage_count > 0) {
2534 		// TODO: how to judge a page is highly active?
2535 		if (swap_free_page_swap_space(page)) {
2536 			// We need to mark the page modified, since otherwise it could be
2537 			// stolen and we'd lose its data.
2538 			vm_page_set_state(page, PAGE_STATE_MODIFIED);
2539 			TD(FreedPageSwap(page));
2540 			DEBUG_PAGE_ACCESS_END(page);
2541 			return true;
2542 		}
2543 	}
2544 	DEBUG_PAGE_ACCESS_END(page);
2545 	return false;
2546 }
2547 #endif
2548 #endif	// 0
2549 
2550 
2551 static vm_page *
2552 find_cached_page_candidate(struct vm_page &marker)
2553 {
2554 	DEBUG_PAGE_ACCESS_CHECK(&marker);
2555 
2556 	InterruptsSpinLocker locker(sCachedPageQueue.GetLock());
2557 	vm_page *page;
2558 
2559 	if (marker.State() == PAGE_STATE_UNUSED) {
2560 		// Get the first free pages of the (in)active queue
2561 		page = sCachedPageQueue.Head();
2562 	} else {
2563 		// Get the next page of the current queue
2564 		if (marker.State() != PAGE_STATE_CACHED) {
2565 			panic("invalid marker %p state", &marker);
2566 			return NULL;
2567 		}
2568 
2569 		page = sCachedPageQueue.Next(&marker);
2570 		sCachedPageQueue.Remove(&marker);
2571 		marker.SetState(PAGE_STATE_UNUSED);
2572 	}
2573 
2574 	while (page != NULL) {
2575 		if (!page->busy) {
2576 			// we found a candidate, insert marker
2577 			marker.SetState(PAGE_STATE_CACHED);
2578 			sCachedPageQueue.InsertAfter(page, &marker);
2579 			return page;
2580 		}
2581 
2582 		page = sCachedPageQueue.Next(page);
2583 	}
2584 
2585 	return NULL;
2586 }
2587 
2588 
2589 static bool
2590 free_cached_page(vm_page *page, bool dontWait)
2591 {
2592 	// try to lock the page's cache
2593 	if (vm_cache_acquire_locked_page_cache(page, dontWait) == NULL)
2594 		return false;
2595 	VMCache* cache = page->Cache();
2596 
2597 	AutoLocker<VMCache> cacheLocker(cache, true);
2598 	MethodDeleter<VMCache, void, &VMCache::ReleaseRefLocked> _2(cache);
2599 
2600 	// check again if that page is still a candidate
2601 	if (page->busy || page->State() != PAGE_STATE_CACHED)
2602 		return false;
2603 
2604 	DEBUG_PAGE_ACCESS_START(page);
2605 
2606 	PAGE_ASSERT(page, !page->IsMapped());
2607 	PAGE_ASSERT(page, !page->modified);
2608 
2609 	// we can now steal this page
2610 
2611 	cache->RemovePage(page);
2612 		// Now the page doesn't have cache anymore, so no one else (e.g.
2613 		// vm_page_allocate_page_run() can pick it up), since they would be
2614 		// required to lock the cache first, which would fail.
2615 
2616 	sCachedPageQueue.RemoveUnlocked(page);
2617 	return true;
2618 }
2619 
2620 
2621 static uint32
2622 free_cached_pages(uint32 pagesToFree, bool dontWait)
2623 {
2624 	vm_page marker;
2625 	init_page_marker(marker);
2626 
2627 	uint32 pagesFreed = 0;
2628 
2629 	while (pagesFreed < pagesToFree) {
2630 		vm_page *page = find_cached_page_candidate(marker);
2631 		if (page == NULL)
2632 			break;
2633 
2634 		if (free_cached_page(page, dontWait)) {
2635 			ReadLocker locker(sFreePageQueuesLock);
2636 			page->SetState(PAGE_STATE_FREE);
2637 			DEBUG_PAGE_ACCESS_END(page);
2638 			sFreePageQueue.PrependUnlocked(page);
2639 			locker.Unlock();
2640 
2641 			TA(StolenPage());
2642 
2643 			pagesFreed++;
2644 		}
2645 	}
2646 
2647 	remove_page_marker(marker);
2648 
2649 	sFreePageCondition.NotifyAll();
2650 
2651 	return pagesFreed;
2652 }
2653 
2654 
2655 static void
2656 idle_scan_active_pages(page_stats& pageStats)
2657 {
2658 	VMPageQueue& queue = sActivePageQueue;
2659 
2660 	// We want to scan the whole queue in roughly kIdleRunsForFullQueue runs.
2661 	uint32 maxToScan = queue.Count() / kIdleRunsForFullQueue + 1;
2662 
2663 	while (maxToScan > 0) {
2664 		maxToScan--;
2665 
2666 		// Get the next page. Note that we don't bother to lock here. We go with
2667 		// the assumption that on all architectures reading/writing pointers is
2668 		// atomic. Beyond that it doesn't really matter. We have to unlock the
2669 		// queue anyway to lock the page's cache, and we'll recheck afterwards.
2670 		vm_page* page = queue.Head();
2671 		if (page == NULL)
2672 			break;
2673 
2674 		// lock the page's cache
2675 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2676 		if (cache == NULL)
2677 			continue;
2678 
2679 		if (page->State() != PAGE_STATE_ACTIVE) {
2680 			// page is no longer in the cache or in this queue
2681 			cache->ReleaseRefAndUnlock();
2682 			continue;
2683 		}
2684 
2685 		if (page->busy) {
2686 			// page is busy -- requeue at the end
2687 			vm_page_requeue(page, true);
2688 			cache->ReleaseRefAndUnlock();
2689 			continue;
2690 		}
2691 
2692 		DEBUG_PAGE_ACCESS_START(page);
2693 
2694 		// Get the page active/modified flags and update the page's usage count.
2695 		// We completely unmap inactive temporary pages. This saves us to
2696 		// iterate through the inactive list as well, since we'll be notified
2697 		// via page fault whenever such an inactive page is used again.
2698 		// We don't remove the mappings of non-temporary pages, since we
2699 		// wouldn't notice when those would become unused and could thus be
2700 		// moved to the cached list.
2701 		int32 usageCount;
2702 		if (page->WiredCount() > 0 || page->usage_count > 0
2703 			|| !cache->temporary) {
2704 			usageCount = vm_clear_page_mapping_accessed_flags(page);
2705 		} else
2706 			usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2707 
2708 		if (usageCount > 0) {
2709 			usageCount += page->usage_count + kPageUsageAdvance;
2710 			if (usageCount > kPageUsageMax)
2711 				usageCount = kPageUsageMax;
2712 // TODO: This would probably also be the place to reclaim swap space.
2713 		} else {
2714 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2715 			if (usageCount < 0) {
2716 				usageCount = 0;
2717 				set_page_state(page, PAGE_STATE_INACTIVE);
2718 			}
2719 		}
2720 
2721 		page->usage_count = usageCount;
2722 
2723 		DEBUG_PAGE_ACCESS_END(page);
2724 
2725 		cache->ReleaseRefAndUnlock();
2726 	}
2727 }
2728 
2729 
2730 static void
2731 full_scan_inactive_pages(page_stats& pageStats, int32 despairLevel)
2732 {
2733 	int32 pagesToFree = pageStats.unsatisfiedReservations
2734 		+ sFreeOrCachedPagesTarget
2735 		- (pageStats.totalFreePages + pageStats.cachedPages);
2736 	if (pagesToFree <= 0)
2737 		return;
2738 
2739 	bigtime_t time = system_time();
2740 	uint32 pagesScanned = 0;
2741 	uint32 pagesToCached = 0;
2742 	uint32 pagesToModified = 0;
2743 	uint32 pagesToActive = 0;
2744 
2745 	// Determine how many pages at maximum to send to the modified queue. Since
2746 	// it is relatively expensive to page out pages, we do that on a grander
2747 	// scale only when things get desperate.
2748 	uint32 maxToFlush = despairLevel <= 1 ? 32 : 10000;
2749 
2750 	vm_page marker;
2751 	init_page_marker(marker);
2752 
2753 	VMPageQueue& queue = sInactivePageQueue;
2754 	InterruptsSpinLocker queueLocker(queue.GetLock());
2755 	uint32 maxToScan = queue.Count();
2756 
2757 	vm_page* nextPage = queue.Head();
2758 
2759 	while (pagesToFree > 0 && maxToScan > 0) {
2760 		maxToScan--;
2761 
2762 		// get the next page
2763 		vm_page* page = nextPage;
2764 		if (page == NULL)
2765 			break;
2766 		nextPage = queue.Next(page);
2767 
2768 		if (page->busy)
2769 			continue;
2770 
2771 		// mark the position
2772 		queue.InsertAfter(page, &marker);
2773 		queueLocker.Unlock();
2774 
2775 		// lock the page's cache
2776 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2777 		if (cache == NULL || page->busy
2778 				|| page->State() != PAGE_STATE_INACTIVE) {
2779 			if (cache != NULL)
2780 				cache->ReleaseRefAndUnlock();
2781 			queueLocker.Lock();
2782 			nextPage = queue.Next(&marker);
2783 			queue.Remove(&marker);
2784 			continue;
2785 		}
2786 
2787 		pagesScanned++;
2788 
2789 		DEBUG_PAGE_ACCESS_START(page);
2790 
2791 		// Get the accessed count, clear the accessed/modified flags and
2792 		// unmap the page, if it hasn't been accessed.
2793 		int32 usageCount;
2794 		if (page->WiredCount() > 0)
2795 			usageCount = vm_clear_page_mapping_accessed_flags(page);
2796 		else
2797 			usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2798 
2799 		// update usage count
2800 		if (usageCount > 0) {
2801 			usageCount += page->usage_count + kPageUsageAdvance;
2802 			if (usageCount > kPageUsageMax)
2803 				usageCount = kPageUsageMax;
2804 		} else {
2805 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2806 			if (usageCount < 0)
2807 				usageCount = 0;
2808 		}
2809 
2810 		page->usage_count = usageCount;
2811 
2812 		// Move to fitting queue or requeue:
2813 		// * Active mapped pages go to the active queue.
2814 		// * Inactive mapped (i.e. wired) pages are requeued.
2815 		// * The remaining pages are cachable. Thus, if unmodified they go to
2816 		//   the cached queue, otherwise to the modified queue (up to a limit).
2817 		//   Note that until in the idle scanning we don't exempt pages of
2818 		//   temporary caches. Apparently we really need memory, so we better
2819 		//   page out memory as well.
2820 		bool isMapped = page->IsMapped();
2821 		if (usageCount > 0) {
2822 			if (isMapped) {
2823 				set_page_state(page, PAGE_STATE_ACTIVE);
2824 				pagesToActive++;
2825 			} else
2826 				vm_page_requeue(page, true);
2827 		} else if (isMapped) {
2828 			vm_page_requeue(page, true);
2829 		} else if (!page->modified) {
2830 			set_page_state(page, PAGE_STATE_CACHED);
2831 			pagesToFree--;
2832 			pagesToCached++;
2833 		} else if (maxToFlush > 0) {
2834 			set_page_state(page, PAGE_STATE_MODIFIED);
2835 			maxToFlush--;
2836 			pagesToModified++;
2837 		} else
2838 			vm_page_requeue(page, true);
2839 
2840 		DEBUG_PAGE_ACCESS_END(page);
2841 
2842 		cache->ReleaseRefAndUnlock();
2843 
2844 		// remove the marker
2845 		queueLocker.Lock();
2846 		nextPage = queue.Next(&marker);
2847 		queue.Remove(&marker);
2848 	}
2849 
2850 	queueLocker.Unlock();
2851 
2852 	time = system_time() - time;
2853 	TRACE_DAEMON("  -> inactive scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32
2854 		", moved: %" B_PRIu32 " -> cached, %" B_PRIu32 " -> modified, %"
2855 		B_PRIu32 " -> active\n", time, pagesScanned, pagesToCached,
2856 		pagesToModified, pagesToActive);
2857 
2858 	// wake up the page writer, if we tossed it some pages
2859 	if (pagesToModified > 0)
2860 		sPageWriterCondition.WakeUp();
2861 }
2862 
2863 
2864 static void
2865 full_scan_active_pages(page_stats& pageStats, int32 despairLevel)
2866 {
2867 	vm_page marker;
2868 	init_page_marker(marker);
2869 
2870 	VMPageQueue& queue = sActivePageQueue;
2871 	InterruptsSpinLocker queueLocker(queue.GetLock());
2872 	uint32 maxToScan = queue.Count();
2873 
2874 	int32 pagesToDeactivate = pageStats.unsatisfiedReservations
2875 		+ sFreeOrCachedPagesTarget
2876 		- (pageStats.totalFreePages + pageStats.cachedPages)
2877 		+ std::max((int32)sInactivePagesTarget - (int32)maxToScan, (int32)0);
2878 	if (pagesToDeactivate <= 0)
2879 		return;
2880 
2881 	bigtime_t time = system_time();
2882 	uint32 pagesAccessed = 0;
2883 	uint32 pagesToInactive = 0;
2884 	uint32 pagesScanned = 0;
2885 
2886 	vm_page* nextPage = queue.Head();
2887 
2888 	while (pagesToDeactivate > 0 && maxToScan > 0) {
2889 		maxToScan--;
2890 
2891 		// get the next page
2892 		vm_page* page = nextPage;
2893 		if (page == NULL)
2894 			break;
2895 		nextPage = queue.Next(page);
2896 
2897 		if (page->busy)
2898 			continue;
2899 
2900 		// mark the position
2901 		queue.InsertAfter(page, &marker);
2902 		queueLocker.Unlock();
2903 
2904 		// lock the page's cache
2905 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2906 		if (cache == NULL || page->busy || page->State() != PAGE_STATE_ACTIVE) {
2907 			if (cache != NULL)
2908 				cache->ReleaseRefAndUnlock();
2909 			queueLocker.Lock();
2910 			nextPage = queue.Next(&marker);
2911 			queue.Remove(&marker);
2912 			continue;
2913 		}
2914 
2915 		pagesScanned++;
2916 
2917 		DEBUG_PAGE_ACCESS_START(page);
2918 
2919 		// Get the page active/modified flags and update the page's usage count.
2920 		int32 usageCount = vm_clear_page_mapping_accessed_flags(page);
2921 
2922 		if (usageCount > 0) {
2923 			usageCount += page->usage_count + kPageUsageAdvance;
2924 			if (usageCount > kPageUsageMax)
2925 				usageCount = kPageUsageMax;
2926 			pagesAccessed++;
2927 // TODO: This would probably also be the place to reclaim swap space.
2928 		} else {
2929 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2930 			if (usageCount <= 0) {
2931 				usageCount = 0;
2932 				set_page_state(page, PAGE_STATE_INACTIVE);
2933 				pagesToInactive++;
2934 			}
2935 		}
2936 
2937 		page->usage_count = usageCount;
2938 
2939 		DEBUG_PAGE_ACCESS_END(page);
2940 
2941 		cache->ReleaseRefAndUnlock();
2942 
2943 		// remove the marker
2944 		queueLocker.Lock();
2945 		nextPage = queue.Next(&marker);
2946 		queue.Remove(&marker);
2947 	}
2948 
2949 	time = system_time() - time;
2950 	TRACE_DAEMON("  ->   active scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32
2951 		", moved: %" B_PRIu32 " -> inactive, encountered %" B_PRIu32 " accessed"
2952 		" ones\n", time, pagesScanned, pagesToInactive, pagesAccessed);
2953 }
2954 
2955 
2956 static void
2957 page_daemon_idle_scan(page_stats& pageStats)
2958 {
2959 	TRACE_DAEMON("page daemon: idle run\n");
2960 
2961 	if (pageStats.totalFreePages < (int32)sFreePagesTarget) {
2962 		// We want more actually free pages, so free some from the cached
2963 		// ones.
2964 		uint32 freed = free_cached_pages(
2965 			sFreePagesTarget - pageStats.totalFreePages, false);
2966 		if (freed > 0)
2967 			unreserve_pages(freed);
2968 		get_page_stats(pageStats);
2969 	}
2970 
2971 	// Walk the active list and move pages to the inactive queue.
2972 	get_page_stats(pageStats);
2973 	idle_scan_active_pages(pageStats);
2974 }
2975 
2976 
2977 static void
2978 page_daemon_full_scan(page_stats& pageStats, int32 despairLevel)
2979 {
2980 	TRACE_DAEMON("page daemon: full run: free: %" B_PRIu32 ", cached: %"
2981 		B_PRIu32 ", to free: %" B_PRIu32 "\n", pageStats.totalFreePages,
2982 		pageStats.cachedPages, pageStats.unsatisfiedReservations
2983 			+ sFreeOrCachedPagesTarget
2984 			- (pageStats.totalFreePages + pageStats.cachedPages));
2985 
2986 	// Walk the inactive list and transfer pages to the cached and modified
2987 	// queues.
2988 	full_scan_inactive_pages(pageStats, despairLevel);
2989 
2990 	// Free cached pages. Also wake up reservation waiters.
2991 	get_page_stats(pageStats);
2992 	int32 pagesToFree = pageStats.unsatisfiedReservations + sFreePagesTarget
2993 		- (pageStats.totalFreePages);
2994 	if (pagesToFree > 0) {
2995 		uint32 freed = free_cached_pages(pagesToFree, true);
2996 		if (freed > 0)
2997 			unreserve_pages(freed);
2998 	}
2999 
3000 	// Walk the active list and move pages to the inactive queue.
3001 	get_page_stats(pageStats);
3002 	full_scan_active_pages(pageStats, despairLevel);
3003 }
3004 
3005 
3006 static status_t
3007 page_daemon(void* /*unused*/)
3008 {
3009 	int32 despairLevel = 0;
3010 
3011 	while (true) {
3012 		sPageDaemonCondition.ClearActivated();
3013 
3014 		// evaluate the free pages situation
3015 		page_stats pageStats;
3016 		get_page_stats(pageStats);
3017 
3018 		if (!do_active_paging(pageStats)) {
3019 			// Things look good -- just maintain statistics and keep the pool
3020 			// of actually free pages full enough.
3021 			despairLevel = 0;
3022 			page_daemon_idle_scan(pageStats);
3023 			sPageDaemonCondition.Wait(kIdleScanWaitInterval, false);
3024 		} else {
3025 			// Not enough free pages. We need to do some real work.
3026 			despairLevel = std::max(despairLevel + 1, (int32)3);
3027 			page_daemon_full_scan(pageStats, despairLevel);
3028 
3029 			// Don't wait after the first full scan, but rather immediately
3030 			// check whether we were successful in freeing enough pages and
3031 			// re-run with increased despair level. The first scan is
3032 			// conservative with respect to moving inactive modified pages to
3033 			// the modified list to avoid thrashing. The second scan, however,
3034 			// will not hold back.
3035 			if (despairLevel > 1)
3036 				snooze(kBusyScanWaitInterval);
3037 		}
3038 	}
3039 
3040 	return B_OK;
3041 }
3042 
3043 
3044 /*!	Returns how many pages could *not* be reserved.
3045 */
3046 static uint32
3047 reserve_pages(uint32 count, int priority, bool dontWait)
3048 {
3049 	int32 dontTouch = kPageReserveForPriority[priority];
3050 
3051 	while (true) {
3052 		count -= reserve_some_pages(count, dontTouch);
3053 		if (count == 0)
3054 			return 0;
3055 
3056 		if (sUnsatisfiedPageReservations == 0) {
3057 			count -= free_cached_pages(count, dontWait);
3058 			if (count == 0)
3059 				return count;
3060 		}
3061 
3062 		if (dontWait)
3063 			return count;
3064 
3065 		// we need to wait for pages to become available
3066 
3067 		MutexLocker pageDeficitLocker(sPageDeficitLock);
3068 
3069 		bool notifyDaemon = sUnsatisfiedPageReservations == 0;
3070 		sUnsatisfiedPageReservations += count;
3071 
3072 		if (atomic_get(&sUnreservedFreePages) > dontTouch) {
3073 			// the situation changed
3074 			sUnsatisfiedPageReservations -= count;
3075 			continue;
3076 		}
3077 
3078 		PageReservationWaiter waiter;
3079 		waiter.dontTouch = dontTouch;
3080 		waiter.missing = count;
3081 		waiter.thread = thread_get_current_thread();
3082 		waiter.threadPriority = waiter.thread->priority;
3083 
3084 		// insert ordered (i.e. after all waiters with higher or equal priority)
3085 		PageReservationWaiter* otherWaiter = NULL;
3086 		for (PageReservationWaiterList::Iterator it
3087 				= sPageReservationWaiters.GetIterator();
3088 			(otherWaiter = it.Next()) != NULL;) {
3089 			if (waiter < *otherWaiter)
3090 				break;
3091 		}
3092 
3093 		sPageReservationWaiters.InsertBefore(otherWaiter, &waiter);
3094 
3095 		thread_prepare_to_block(waiter.thread, 0, THREAD_BLOCK_TYPE_OTHER,
3096 			"waiting for pages");
3097 
3098 		if (notifyDaemon)
3099 			sPageDaemonCondition.WakeUp();
3100 
3101 		pageDeficitLocker.Unlock();
3102 
3103 		low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0);
3104 		thread_block();
3105 
3106 		pageDeficitLocker.Lock();
3107 
3108 		return 0;
3109 	}
3110 }
3111 
3112 
3113 //	#pragma mark - private kernel API
3114 
3115 
3116 /*!	Writes a range of modified pages of a cache to disk.
3117 	You need to hold the VMCache lock when calling this function.
3118 	Note that the cache lock is released in this function.
3119 	\param cache The cache.
3120 	\param firstPage Offset (in page size units) of the first page in the range.
3121 	\param endPage End offset (in page size units) of the page range. The page
3122 		at this offset is not included.
3123 */
3124 status_t
3125 vm_page_write_modified_page_range(struct VMCache* cache, uint32 firstPage,
3126 	uint32 endPage)
3127 {
3128 	static const int32 kMaxPages = 256;
3129 	int32 maxPages = cache->MaxPagesPerWrite();
3130 	if (maxPages < 0 || maxPages > kMaxPages)
3131 		maxPages = kMaxPages;
3132 
3133 	const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
3134 		| HEAP_DONT_LOCK_KERNEL_SPACE;
3135 
3136 	PageWriteWrapper stackWrappersPool[2];
3137 	PageWriteWrapper* stackWrappers[1];
3138 	PageWriteWrapper* wrapperPool
3139 		= new(malloc_flags(allocationFlags)) PageWriteWrapper[maxPages + 1];
3140 	PageWriteWrapper** wrappers
3141 		= new(malloc_flags(allocationFlags)) PageWriteWrapper*[maxPages];
3142 	if (wrapperPool == NULL || wrappers == NULL) {
3143 		// don't fail, just limit our capabilities
3144 		delete[] wrapperPool;
3145 		delete[] wrappers;
3146 		wrapperPool = stackWrappersPool;
3147 		wrappers = stackWrappers;
3148 		maxPages = 1;
3149 	}
3150 
3151 	int32 nextWrapper = 0;
3152 	int32 usedWrappers = 0;
3153 
3154 	PageWriteTransfer transfer;
3155 	bool transferEmpty = true;
3156 
3157 	VMCachePagesTree::Iterator it
3158 		= cache->pages.GetIterator(firstPage, true, true);
3159 
3160 	while (true) {
3161 		vm_page* page = it.Next();
3162 		if (page == NULL || page->cache_offset >= endPage) {
3163 			if (transferEmpty)
3164 				break;
3165 
3166 			page = NULL;
3167 		}
3168 
3169 		if (page != NULL) {
3170 			if (page->busy
3171 				|| (page->State() != PAGE_STATE_MODIFIED
3172 					&& !vm_test_map_modification(page))) {
3173 				page = NULL;
3174 			}
3175 		}
3176 
3177 		PageWriteWrapper* wrapper = NULL;
3178 		if (page != NULL) {
3179 			wrapper = &wrapperPool[nextWrapper++];
3180 			if (nextWrapper > maxPages)
3181 				nextWrapper = 0;
3182 
3183 			DEBUG_PAGE_ACCESS_START(page);
3184 
3185 			wrapper->SetTo(page);
3186 
3187 			if (transferEmpty || transfer.AddPage(page)) {
3188 				if (transferEmpty) {
3189 					transfer.SetTo(NULL, page, maxPages);
3190 					transferEmpty = false;
3191 				}
3192 
3193 				DEBUG_PAGE_ACCESS_END(page);
3194 
3195 				wrappers[usedWrappers++] = wrapper;
3196 				continue;
3197 			}
3198 
3199 			DEBUG_PAGE_ACCESS_END(page);
3200 		}
3201 
3202 		if (transferEmpty)
3203 			continue;
3204 
3205 		cache->Unlock();
3206 		status_t status = transfer.Schedule(0);
3207 		cache->Lock();
3208 
3209 		for (int32 i = 0; i < usedWrappers; i++)
3210 			wrappers[i]->Done(status);
3211 
3212 		usedWrappers = 0;
3213 
3214 		if (page != NULL) {
3215 			transfer.SetTo(NULL, page, maxPages);
3216 			wrappers[usedWrappers++] = wrapper;
3217 		} else
3218 			transferEmpty = true;
3219 	}
3220 
3221 	if (wrapperPool != stackWrappersPool) {
3222 		delete[] wrapperPool;
3223 		delete[] wrappers;
3224 	}
3225 
3226 	return B_OK;
3227 }
3228 
3229 
3230 /*!	You need to hold the VMCache lock when calling this function.
3231 	Note that the cache lock is released in this function.
3232 */
3233 status_t
3234 vm_page_write_modified_pages(VMCache *cache)
3235 {
3236 	return vm_page_write_modified_page_range(cache, 0,
3237 		(cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT);
3238 }
3239 
3240 
3241 /*!	Schedules the page writer to write back the specified \a page.
3242 	Note, however, that it might not do this immediately, and it can well
3243 	take several seconds until the page is actually written out.
3244 */
3245 void
3246 vm_page_schedule_write_page(vm_page *page)
3247 {
3248 	PAGE_ASSERT(page, page->State() == PAGE_STATE_MODIFIED);
3249 
3250 	vm_page_requeue(page, false);
3251 
3252 	sPageWriterCondition.WakeUp();
3253 }
3254 
3255 
3256 /*!	Cache must be locked.
3257 */
3258 void
3259 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage,
3260 	uint32 endPage)
3261 {
3262 	uint32 modified = 0;
3263 	for (VMCachePagesTree::Iterator it
3264 				= cache->pages.GetIterator(firstPage, true, true);
3265 			vm_page *page = it.Next();) {
3266 		if (page->cache_offset >= endPage)
3267 			break;
3268 
3269 		if (!page->busy && page->State() == PAGE_STATE_MODIFIED) {
3270 			DEBUG_PAGE_ACCESS_START(page);
3271 			vm_page_requeue(page, false);
3272 			modified++;
3273 			DEBUG_PAGE_ACCESS_END(page);
3274 		}
3275 	}
3276 
3277 	if (modified > 0)
3278 		sPageWriterCondition.WakeUp();
3279 }
3280 
3281 
3282 void
3283 vm_page_init_num_pages(kernel_args *args)
3284 {
3285 	// calculate the size of memory by looking at the physical_memory_range array
3286 	sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE;
3287 	page_num_t physicalPagesEnd = sPhysicalPageOffset
3288 		+ args->physical_memory_range[0].size / B_PAGE_SIZE;
3289 
3290 	sNonExistingPages = 0;
3291 	sIgnoredPages = args->ignored_physical_memory / B_PAGE_SIZE;
3292 
3293 	for (uint32 i = 1; i < args->num_physical_memory_ranges; i++) {
3294 		page_num_t start = args->physical_memory_range[i].start / B_PAGE_SIZE;
3295 		if (start > physicalPagesEnd)
3296 			sNonExistingPages += start - physicalPagesEnd;
3297 		physicalPagesEnd = start
3298 			+ args->physical_memory_range[i].size / B_PAGE_SIZE;
3299 
3300 #ifdef LIMIT_AVAILABLE_MEMORY
3301 		page_num_t available
3302 			= physicalPagesEnd - sPhysicalPageOffset - sNonExistingPages;
3303 		if (available > LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE)) {
3304 			physicalPagesEnd = sPhysicalPageOffset + sNonExistingPages
3305 				+ LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE);
3306 			break;
3307 		}
3308 #endif
3309 	}
3310 
3311 	TRACE(("first phys page = %#" B_PRIxPHYSADDR ", end %#" B_PRIxPHYSADDR "\n",
3312 		sPhysicalPageOffset, physicalPagesEnd));
3313 
3314 	sNumPages = physicalPagesEnd - sPhysicalPageOffset;
3315 }
3316 
3317 
3318 status_t
3319 vm_page_init(kernel_args *args)
3320 {
3321 	TRACE(("vm_page_init: entry\n"));
3322 
3323 	// init page queues
3324 	sModifiedPageQueue.Init("modified pages queue");
3325 	sInactivePageQueue.Init("inactive pages queue");
3326 	sActivePageQueue.Init("active pages queue");
3327 	sCachedPageQueue.Init("cached pages queue");
3328 	sFreePageQueue.Init("free pages queue");
3329 	sClearPageQueue.Init("clear pages queue");
3330 
3331 	new (&sPageReservationWaiters) PageReservationWaiterList;
3332 
3333 	// map in the new free page table
3334 	sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page),
3335 		~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3336 
3337 	TRACE(("vm_init: putting free_page_table @ %p, # ents %" B_PRIuPHYSADDR
3338 		" (size %#" B_PRIxPHYSADDR ")\n", sPages, sNumPages,
3339 		(phys_addr_t)(sNumPages * sizeof(vm_page))));
3340 
3341 	// initialize the free page table
3342 	for (uint32 i = 0; i < sNumPages; i++) {
3343 		sPages[i].Init(sPhysicalPageOffset + i);
3344 		sFreePageQueue.Append(&sPages[i]);
3345 
3346 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3347 		sPages[i].allocation_tracking_info.Clear();
3348 #endif
3349 	}
3350 
3351 	sUnreservedFreePages = sNumPages;
3352 
3353 	TRACE(("initialized table\n"));
3354 
3355 	// mark the ranges between usable physical memory unused
3356 	phys_addr_t previousEnd = 0;
3357 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3358 		phys_addr_t base = args->physical_memory_range[i].start;
3359 		phys_size_t size = args->physical_memory_range[i].size;
3360 		if (base > previousEnd) {
3361 			mark_page_range_in_use(previousEnd / B_PAGE_SIZE,
3362 				(base - previousEnd) / B_PAGE_SIZE, false);
3363 		}
3364 		previousEnd = base + size;
3365 	}
3366 
3367 	// mark the allocated physical page ranges wired
3368 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3369 		mark_page_range_in_use(
3370 			args->physical_allocated_range[i].start / B_PAGE_SIZE,
3371 			args->physical_allocated_range[i].size / B_PAGE_SIZE, true);
3372 	}
3373 
3374 	// The target of actually free pages. This must be at least the system
3375 	// reserve, but should be a few more pages, so we don't have to extract
3376 	// a cached page with each allocation.
3377 	sFreePagesTarget = VM_PAGE_RESERVE_USER
3378 		+ std::max((page_num_t)32, (sNumPages - sNonExistingPages) / 1024);
3379 
3380 	// The target of free + cached and inactive pages. On low-memory machines
3381 	// keep things tight. free + cached is the pool of immediately allocatable
3382 	// pages. We want a few inactive pages, so when we're actually paging, we
3383 	// have a reasonably large set of pages to work with.
3384 	if (sUnreservedFreePages < 16 * 1024) {
3385 		sFreeOrCachedPagesTarget = sFreePagesTarget + 128;
3386 		sInactivePagesTarget = sFreePagesTarget / 3;
3387 	} else {
3388 		sFreeOrCachedPagesTarget = 2 * sFreePagesTarget;
3389 		sInactivePagesTarget = sFreePagesTarget / 2;
3390 	}
3391 
3392 	TRACE(("vm_page_init: exit\n"));
3393 
3394 	return B_OK;
3395 }
3396 
3397 
3398 status_t
3399 vm_page_init_post_area(kernel_args *args)
3400 {
3401 	void *dummy;
3402 
3403 	dummy = sPages;
3404 	create_area("page structures", &dummy, B_EXACT_ADDRESS,
3405 		PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED,
3406 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3407 
3408 	add_debugger_command("list_pages", &dump_page_list,
3409 		"List physical pages");
3410 	add_debugger_command("page_stats", &dump_page_stats,
3411 		"Dump statistics about page usage");
3412 	add_debugger_command_etc("page", &dump_page_long,
3413 		"Dump page info",
3414 		"[ \"-p\" | \"-v\" ] [ \"-m\" ] <address>\n"
3415 		"Prints information for the physical page. If neither \"-p\" nor\n"
3416 		"\"-v\" are given, the provided address is interpreted as address of\n"
3417 		"the vm_page data structure for the page in question. If \"-p\" is\n"
3418 		"given, the address is the physical address of the page. If \"-v\" is\n"
3419 		"given, the address is interpreted as virtual address in the current\n"
3420 		"thread's address space and for the page it is mapped to (if any)\n"
3421 		"information are printed. If \"-m\" is specified, the command will\n"
3422 		"search all known address spaces for mappings to that page and print\n"
3423 		"them.\n", 0);
3424 	add_debugger_command("page_queue", &dump_page_queue, "Dump page queue");
3425 	add_debugger_command("find_page", &find_page,
3426 		"Find out which queue a page is actually in");
3427 
3428 #ifdef TRACK_PAGE_USAGE_STATS
3429 	add_debugger_command_etc("page_usage", &dump_page_usage_stats,
3430 		"Dumps statistics about page usage counts",
3431 		"\n"
3432 		"Dumps statistics about page usage counts.\n",
3433 		B_KDEBUG_DONT_PARSE_ARGUMENTS);
3434 #endif
3435 
3436 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3437 	add_debugger_command_etc("page_allocations_per_caller",
3438 		&dump_page_allocations_per_caller,
3439 		"Dump current page allocations summed up per caller",
3440 		"[ -d <caller> ] [ -r ]\n"
3441 		"The current allocations will by summed up by caller (their count)\n"
3442 		"printed in decreasing order by count.\n"
3443 		"If \"-d\" is given, each allocation for caller <caller> is printed\n"
3444 		"including the respective stack trace.\n"
3445 		"If \"-r\" is given, the allocation infos are reset after gathering\n"
3446 		"the information, so the next command invocation will only show the\n"
3447 		"allocations made after the reset.\n", 0);
3448 	add_debugger_command_etc("page_allocation_infos",
3449 		&dump_page_allocation_infos,
3450 		"Dump current page allocations",
3451 		"[ --stacktrace ] [ -p <page number> ] [ --team <team ID> ] "
3452 		"[ --thread <thread ID> ]\n"
3453 		"The current allocations filtered by optional values will be printed.\n"
3454 		"The optional \"-p\" page number filters for a specific page,\n"
3455 		"with \"--team\" and \"--thread\" allocations by specific teams\n"
3456 		"and/or threads can be filtered (these only work if a corresponding\n"
3457 		"tracing entry is still available).\n"
3458 		"If \"--stacktrace\" is given, then stack traces of the allocation\n"
3459 		"callers are printed, where available\n", 0);
3460 #endif
3461 
3462 	return B_OK;
3463 }
3464 
3465 
3466 status_t
3467 vm_page_init_post_thread(kernel_args *args)
3468 {
3469 	new (&sFreePageCondition) ConditionVariable;
3470 
3471 	// create a kernel thread to clear out pages
3472 
3473 	thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber",
3474 		B_LOWEST_ACTIVE_PRIORITY, NULL);
3475 	resume_thread(thread);
3476 
3477 	// start page writer
3478 
3479 	sPageWriterCondition.Init("page writer");
3480 
3481 	thread = spawn_kernel_thread(&page_writer, "page writer",
3482 		B_NORMAL_PRIORITY + 1, NULL);
3483 	resume_thread(thread);
3484 
3485 	// start page daemon
3486 
3487 	sPageDaemonCondition.Init("page daemon");
3488 
3489 	thread = spawn_kernel_thread(&page_daemon, "page daemon",
3490 		B_NORMAL_PRIORITY, NULL);
3491 	resume_thread(thread);
3492 
3493 	return B_OK;
3494 }
3495 
3496 
3497 status_t
3498 vm_mark_page_inuse(page_num_t page)
3499 {
3500 	return vm_mark_page_range_inuse(page, 1);
3501 }
3502 
3503 
3504 status_t
3505 vm_mark_page_range_inuse(page_num_t startPage, page_num_t length)
3506 {
3507 	return mark_page_range_in_use(startPage, length, false);
3508 }
3509 
3510 
3511 /*!	Unreserve pages previously reserved with vm_page_reserve_pages().
3512 */
3513 void
3514 vm_page_unreserve_pages(vm_page_reservation* reservation)
3515 {
3516 	uint32 count = reservation->count;
3517 	reservation->count = 0;
3518 
3519 	if (count == 0)
3520 		return;
3521 
3522 	TA(UnreservePages(count));
3523 
3524 	unreserve_pages(count);
3525 }
3526 
3527 
3528 /*!	With this call, you can reserve a number of free pages in the system.
3529 	They will only be handed out to someone who has actually reserved them.
3530 	This call returns as soon as the number of requested pages has been
3531 	reached.
3532 	The caller must not hold any cache lock or the function might deadlock.
3533 */
3534 void
3535 vm_page_reserve_pages(vm_page_reservation* reservation, uint32 count,
3536 	int priority)
3537 {
3538 	reservation->count = count;
3539 
3540 	if (count == 0)
3541 		return;
3542 
3543 	TA(ReservePages(count));
3544 
3545 	reserve_pages(count, priority, false);
3546 }
3547 
3548 
3549 bool
3550 vm_page_try_reserve_pages(vm_page_reservation* reservation, uint32 count,
3551 	int priority)
3552 {
3553 	if (count == 0) {
3554 		reservation->count = count;
3555 		return true;
3556 	}
3557 
3558 	uint32 remaining = reserve_pages(count, priority, true);
3559 	if (remaining == 0) {
3560 		TA(ReservePages(count));
3561 		reservation->count = count;
3562 		return true;
3563 	}
3564 
3565 	unreserve_pages(count - remaining);
3566 
3567 	return false;
3568 }
3569 
3570 
3571 vm_page *
3572 vm_page_allocate_page(vm_page_reservation* reservation, uint32 flags)
3573 {
3574 	uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3575 	ASSERT(pageState != PAGE_STATE_FREE);
3576 	ASSERT(pageState != PAGE_STATE_CLEAR);
3577 
3578 	ASSERT(reservation->count > 0);
3579 	reservation->count--;
3580 
3581 	VMPageQueue* queue;
3582 	VMPageQueue* otherQueue;
3583 
3584 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3585 		queue = &sClearPageQueue;
3586 		otherQueue = &sFreePageQueue;
3587 	} else {
3588 		queue = &sFreePageQueue;
3589 		otherQueue = &sClearPageQueue;
3590 	}
3591 
3592 	ReadLocker locker(sFreePageQueuesLock);
3593 
3594 	vm_page* page = queue->RemoveHeadUnlocked();
3595 	if (page == NULL) {
3596 		// if the primary queue was empty, grab the page from the
3597 		// secondary queue
3598 		page = otherQueue->RemoveHeadUnlocked();
3599 
3600 		if (page == NULL) {
3601 			// Unlikely, but possible: the page we have reserved has moved
3602 			// between the queues after we checked the first queue. Grab the
3603 			// write locker to make sure this doesn't happen again.
3604 			locker.Unlock();
3605 			WriteLocker writeLocker(sFreePageQueuesLock);
3606 
3607 			page = queue->RemoveHead();
3608 			if (page == NULL)
3609 				otherQueue->RemoveHead();
3610 
3611 			if (page == NULL) {
3612 				panic("Had reserved page, but there is none!");
3613 				return NULL;
3614 			}
3615 
3616 			// downgrade to read lock
3617 			locker.Lock();
3618 		}
3619 	}
3620 
3621 	if (page->CacheRef() != NULL)
3622 		panic("supposed to be free page %p has cache\n", page);
3623 
3624 	DEBUG_PAGE_ACCESS_START(page);
3625 
3626 	int oldPageState = page->State();
3627 	page->SetState(pageState);
3628 	page->busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3629 	page->usage_count = 0;
3630 	page->accessed = false;
3631 	page->modified = false;
3632 
3633 	locker.Unlock();
3634 
3635 	if (pageState < PAGE_STATE_FIRST_UNQUEUED)
3636 		sPageQueues[pageState].AppendUnlocked(page);
3637 
3638 	// clear the page, if we had to take it from the free queue and a clear
3639 	// page was requested
3640 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0 && oldPageState != PAGE_STATE_CLEAR)
3641 		clear_page(page);
3642 
3643 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3644 	page->allocation_tracking_info.Init(
3645 		TA(AllocatePage(page->physical_page_number)));
3646 #else
3647 	TA(AllocatePage(page->physical_page_number));
3648 #endif
3649 
3650 	return page;
3651 }
3652 
3653 
3654 static void
3655 allocate_page_run_cleanup(VMPageQueue::PageList& freePages,
3656 	VMPageQueue::PageList& clearPages)
3657 {
3658 	// Page lists are sorted, so remove tails before prepending to the respective queue.
3659 
3660 	while (vm_page* page = freePages.RemoveTail()) {
3661 		page->busy = false;
3662 		page->SetState(PAGE_STATE_FREE);
3663 		DEBUG_PAGE_ACCESS_END(page);
3664 		sFreePageQueue.PrependUnlocked(page);
3665 	}
3666 
3667 	while (vm_page* page = clearPages.RemoveTail()) {
3668 		page->busy = false;
3669 		page->SetState(PAGE_STATE_CLEAR);
3670 		DEBUG_PAGE_ACCESS_END(page);
3671 		sClearPageQueue.PrependUnlocked(page);
3672 	}
3673 
3674 	sFreePageCondition.NotifyAll();
3675 }
3676 
3677 
3678 /*!	Tries to allocate the a contiguous run of \a length pages starting at
3679 	index \a start.
3680 
3681 	The caller must have write-locked the free/clear page queues. The function
3682 	will unlock regardless of whether it succeeds or fails.
3683 
3684 	If the function fails, it cleans up after itself, i.e. it will free all
3685 	pages it managed to allocate.
3686 
3687 	\param start The start index (into \c sPages) of the run.
3688 	\param length The number of pages to allocate.
3689 	\param flags Page allocation flags. Encodes the state the function shall
3690 		set the allocated pages to, whether the pages shall be marked busy
3691 		(VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared
3692 		(VM_PAGE_ALLOC_CLEAR).
3693 	\param freeClearQueueLocker Locked WriteLocker for the free/clear page
3694 		queues in locked state. Will be unlocked by the function.
3695 	\return The index of the first page that could not be allocated. \a length
3696 		is returned when the function was successful.
3697 */
3698 static page_num_t
3699 allocate_page_run(page_num_t start, page_num_t length, uint32 flags,
3700 	WriteLocker& freeClearQueueLocker)
3701 {
3702 	uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3703 	ASSERT(pageState != PAGE_STATE_FREE);
3704 	ASSERT(pageState != PAGE_STATE_CLEAR);
3705 	ASSERT(start + length <= sNumPages);
3706 
3707 	// Pull the free/clear pages out of their respective queues. Cached pages
3708 	// are allocated later.
3709 	page_num_t cachedPages = 0;
3710 	VMPageQueue::PageList freePages;
3711 	VMPageQueue::PageList clearPages;
3712 	page_num_t i = 0;
3713 	for (; i < length; i++) {
3714 		bool pageAllocated = true;
3715 		bool noPage = false;
3716 		vm_page& page = sPages[start + i];
3717 		switch (page.State()) {
3718 			case PAGE_STATE_CLEAR:
3719 				DEBUG_PAGE_ACCESS_START(&page);
3720 				sClearPageQueue.Remove(&page);
3721 				clearPages.Add(&page);
3722 				break;
3723 			case PAGE_STATE_FREE:
3724 				DEBUG_PAGE_ACCESS_START(&page);
3725 				sFreePageQueue.Remove(&page);
3726 				freePages.Add(&page);
3727 				break;
3728 			case PAGE_STATE_CACHED:
3729 				// We allocate cached pages later.
3730 				cachedPages++;
3731 				pageAllocated = false;
3732 				break;
3733 
3734 			default:
3735 				// Probably a page was cached when our caller checked. Now it's
3736 				// gone and we have to abort.
3737 				noPage = true;
3738 				break;
3739 		}
3740 
3741 		if (noPage)
3742 			break;
3743 
3744 		if (pageAllocated) {
3745 			page.SetState(flags & VM_PAGE_ALLOC_STATE);
3746 			page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3747 			page.usage_count = 0;
3748 			page.accessed = false;
3749 			page.modified = false;
3750 		}
3751 	}
3752 
3753 	if (i < length) {
3754 		// failed to allocate a page -- free all that we've got
3755 		allocate_page_run_cleanup(freePages, clearPages);
3756 		return i;
3757 	}
3758 
3759 	freeClearQueueLocker.Unlock();
3760 
3761 	if (cachedPages > 0) {
3762 		// allocate the pages that weren't free but cached
3763 		page_num_t freedCachedPages = 0;
3764 		page_num_t nextIndex = start;
3765 		vm_page* freePage = freePages.Head();
3766 		vm_page* clearPage = clearPages.Head();
3767 		while (cachedPages > 0) {
3768 			// skip, if we've already got the page
3769 			if (freePage != NULL && size_t(freePage - sPages) == nextIndex) {
3770 				freePage = freePages.GetNext(freePage);
3771 				nextIndex++;
3772 				continue;
3773 			}
3774 			if (clearPage != NULL && size_t(clearPage - sPages) == nextIndex) {
3775 				clearPage = clearPages.GetNext(clearPage);
3776 				nextIndex++;
3777 				continue;
3778 			}
3779 
3780 			// free the page, if it is still cached
3781 			vm_page& page = sPages[nextIndex];
3782 			if (!free_cached_page(&page, false)) {
3783 				// TODO: if the page turns out to have been freed already,
3784 				// there would be no need to fail
3785 				break;
3786 			}
3787 
3788 			page.SetState(flags & VM_PAGE_ALLOC_STATE);
3789 			page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3790 			page.usage_count = 0;
3791 			page.accessed = false;
3792 			page.modified = false;
3793 
3794 			freePages.InsertBefore(freePage, &page);
3795 			freedCachedPages++;
3796 			cachedPages--;
3797 			nextIndex++;
3798 		}
3799 
3800 		// If we have freed cached pages, we need to balance things.
3801 		if (freedCachedPages > 0)
3802 			unreserve_pages(freedCachedPages);
3803 
3804 		if (nextIndex - start < length) {
3805 			// failed to allocate all cached pages -- free all that we've got
3806 			freeClearQueueLocker.Lock();
3807 			allocate_page_run_cleanup(freePages, clearPages);
3808 			freeClearQueueLocker.Unlock();
3809 
3810 			return nextIndex - start;
3811 		}
3812 	}
3813 
3814 	// clear pages, if requested
3815 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3816 		for (VMPageQueue::PageList::Iterator it = freePages.GetIterator();
3817 				vm_page* page = it.Next();) {
3818 			clear_page(page);
3819 		}
3820 	}
3821 
3822 	// add pages to target queue
3823 	if (pageState < PAGE_STATE_FIRST_UNQUEUED) {
3824 		freePages.MoveFrom(&clearPages);
3825 		sPageQueues[pageState].AppendUnlocked(freePages, length);
3826 	}
3827 
3828 	// Note: We don't unreserve the pages since we pulled them out of the
3829 	// free/clear queues without adjusting sUnreservedFreePages.
3830 
3831 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3832 	AbstractTraceEntryWithStackTrace* traceEntry
3833 		= TA(AllocatePageRun(start, length));
3834 
3835 	for (page_num_t i = start; i < start + length; i++)
3836 		sPages[i].allocation_tracking_info.Init(traceEntry);
3837 #else
3838 	TA(AllocatePageRun(start, length));
3839 #endif
3840 
3841 	return length;
3842 }
3843 
3844 
3845 /*! Allocate a physically contiguous range of pages.
3846 
3847 	\param flags Page allocation flags. Encodes the state the function shall
3848 		set the allocated pages to, whether the pages shall be marked busy
3849 		(VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared
3850 		(VM_PAGE_ALLOC_CLEAR).
3851 	\param length The number of contiguous pages to allocate.
3852 	\param restrictions Restrictions to the physical addresses of the page run
3853 		to allocate, including \c low_address, the first acceptable physical
3854 		address where the page run may start, \c high_address, the last
3855 		acceptable physical address where the page run may end (i.e. it must
3856 		hold \code runStartAddress + length <= high_address \endcode),
3857 		\c alignment, the alignment of the page run start address, and
3858 		\c boundary, multiples of which the page run must not cross.
3859 		Values set to \c 0 are ignored.
3860 	\param priority The page reservation priority (as passed to
3861 		vm_page_reserve_pages()).
3862 	\return The first page of the allocated page run on success; \c NULL
3863 		when the allocation failed.
3864 */
3865 vm_page*
3866 vm_page_allocate_page_run(uint32 flags, page_num_t length,
3867 	const physical_address_restrictions* restrictions, int priority)
3868 {
3869 	// compute start and end page index
3870 	page_num_t requestedStart
3871 		= std::max(restrictions->low_address / B_PAGE_SIZE, sPhysicalPageOffset)
3872 			- sPhysicalPageOffset;
3873 	page_num_t start = requestedStart;
3874 	page_num_t end;
3875 	if (restrictions->high_address > 0) {
3876 		end = std::max(restrictions->high_address / B_PAGE_SIZE,
3877 				sPhysicalPageOffset)
3878 			- sPhysicalPageOffset;
3879 		end = std::min(end, sNumPages);
3880 	} else
3881 		end = sNumPages;
3882 
3883 	// compute alignment mask
3884 	page_num_t alignmentMask
3885 		= std::max(restrictions->alignment / B_PAGE_SIZE, (phys_addr_t)1) - 1;
3886 	ASSERT(((alignmentMask + 1) & alignmentMask) == 0);
3887 		// alignment must be a power of 2
3888 
3889 	// compute the boundary mask
3890 	uint32 boundaryMask = 0;
3891 	if (restrictions->boundary != 0) {
3892 		page_num_t boundary = restrictions->boundary / B_PAGE_SIZE;
3893 		// boundary must be a power of two and not less than alignment and
3894 		// length
3895 		ASSERT(((boundary - 1) & boundary) == 0);
3896 		ASSERT(boundary >= alignmentMask + 1);
3897 		ASSERT(boundary >= length);
3898 
3899 		boundaryMask = -boundary;
3900 	}
3901 
3902 	vm_page_reservation reservation;
3903 	vm_page_reserve_pages(&reservation, length, priority);
3904 
3905 	WriteLocker freeClearQueueLocker(sFreePageQueuesLock);
3906 
3907 	// First we try to get a run with free pages only. If that fails, we also
3908 	// consider cached pages. If there are only few free pages and many cached
3909 	// ones, the odds are that we won't find enough contiguous ones, so we skip
3910 	// the first iteration in this case.
3911 	int32 freePages = sUnreservedFreePages;
3912 	int useCached = freePages > 0 && (page_num_t)freePages > 2 * length ? 0 : 1;
3913 
3914 	for (;;) {
3915 		if (alignmentMask != 0 || boundaryMask != 0) {
3916 			page_num_t offsetStart = start + sPhysicalPageOffset;
3917 
3918 			// enforce alignment
3919 			if ((offsetStart & alignmentMask) != 0)
3920 				offsetStart = (offsetStart + alignmentMask) & ~alignmentMask;
3921 
3922 			// enforce boundary
3923 			if (boundaryMask != 0 && ((offsetStart ^ (offsetStart
3924 				+ length - 1)) & boundaryMask) != 0) {
3925 				offsetStart = (offsetStart + length - 1) & boundaryMask;
3926 			}
3927 
3928 			start = offsetStart - sPhysicalPageOffset;
3929 		}
3930 
3931 		if (start + length > end) {
3932 			if (useCached == 0) {
3933 				// The first iteration with free pages only was unsuccessful.
3934 				// Try again also considering cached pages.
3935 				useCached = 1;
3936 				start = requestedStart;
3937 				continue;
3938 			}
3939 
3940 			dprintf("vm_page_allocate_page_run(): Failed to allocate run of "
3941 				"length %" B_PRIuPHYSADDR " (%" B_PRIuPHYSADDR " %"
3942 				B_PRIuPHYSADDR ") in second iteration (align: %" B_PRIuPHYSADDR
3943 				" boundary: %" B_PRIuPHYSADDR ")!\n", length, requestedStart,
3944 				end, restrictions->alignment, restrictions->boundary);
3945 
3946 			freeClearQueueLocker.Unlock();
3947 			vm_page_unreserve_pages(&reservation);
3948 			return NULL;
3949 		}
3950 
3951 		bool foundRun = true;
3952 		page_num_t i;
3953 		for (i = 0; i < length; i++) {
3954 			uint32 pageState = sPages[start + i].State();
3955 			if (pageState != PAGE_STATE_FREE
3956 				&& pageState != PAGE_STATE_CLEAR
3957 				&& (pageState != PAGE_STATE_CACHED || useCached == 0)) {
3958 				foundRun = false;
3959 				break;
3960 			}
3961 		}
3962 
3963 		if (foundRun) {
3964 			i = allocate_page_run(start, length, flags, freeClearQueueLocker);
3965 			if (i == length)
3966 				return &sPages[start];
3967 
3968 			// apparently a cached page couldn't be allocated -- skip it and
3969 			// continue
3970 			freeClearQueueLocker.Lock();
3971 		}
3972 
3973 		start += i + 1;
3974 	}
3975 }
3976 
3977 
3978 vm_page *
3979 vm_page_at_index(int32 index)
3980 {
3981 	return &sPages[index];
3982 }
3983 
3984 
3985 vm_page *
3986 vm_lookup_page(page_num_t pageNumber)
3987 {
3988 	if (pageNumber < sPhysicalPageOffset)
3989 		return NULL;
3990 
3991 	pageNumber -= sPhysicalPageOffset;
3992 	if (pageNumber >= sNumPages)
3993 		return NULL;
3994 
3995 	return &sPages[pageNumber];
3996 }
3997 
3998 
3999 bool
4000 vm_page_is_dummy(struct vm_page *page)
4001 {
4002 	return page < sPages || page >= sPages + sNumPages;
4003 }
4004 
4005 
4006 /*!	Free the page that belonged to a certain cache.
4007 	You can use vm_page_set_state() manually if you prefer, but only
4008 	if the page does not equal PAGE_STATE_MODIFIED.
4009 
4010 	\param cache The cache the page was previously owned by or NULL. The page
4011 		must have been removed from its cache before calling this method in
4012 		either case.
4013 	\param page The page to free.
4014 	\param reservation If not NULL, the page count of the reservation will be
4015 		incremented, thus allowing to allocate another page for the freed one at
4016 		a later time.
4017 */
4018 void
4019 vm_page_free_etc(VMCache* cache, vm_page* page,
4020 	vm_page_reservation* reservation)
4021 {
4022 	PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
4023 		&& page->State() != PAGE_STATE_CLEAR);
4024 
4025 	if (page->State() == PAGE_STATE_MODIFIED && cache->temporary)
4026 		atomic_add(&sModifiedTemporaryPages, -1);
4027 
4028 	free_page(page, false);
4029 	if (reservation == NULL)
4030 		unreserve_pages(1);
4031 }
4032 
4033 
4034 void
4035 vm_page_set_state(vm_page *page, int pageState)
4036 {
4037 	PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
4038 		&& page->State() != PAGE_STATE_CLEAR);
4039 
4040 	if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) {
4041 		free_page(page, pageState == PAGE_STATE_CLEAR);
4042 		unreserve_pages(1);
4043 	} else
4044 		set_page_state(page, pageState);
4045 }
4046 
4047 
4048 /*!	Moves a page to either the tail of the head of its current queue,
4049 	depending on \a tail.
4050 	The page must have a cache and the cache must be locked!
4051 */
4052 void
4053 vm_page_requeue(struct vm_page *page, bool tail)
4054 {
4055 	PAGE_ASSERT(page, page->Cache() != NULL);
4056 	page->Cache()->AssertLocked();
4057 	// DEBUG_PAGE_ACCESS_CHECK(page);
4058 		// TODO: This assertion cannot be satisfied by idle_scan_active_pages()
4059 		// when it requeues busy pages. The reason is that vm_soft_fault()
4060 		// (respectively fault_get_page()) and the file cache keep newly
4061 		// allocated pages accessed while they are reading them from disk. It
4062 		// would probably be better to change that code and reenable this
4063 		// check.
4064 
4065 	VMPageQueue *queue = NULL;
4066 
4067 	switch (page->State()) {
4068 		case PAGE_STATE_ACTIVE:
4069 			queue = &sActivePageQueue;
4070 			break;
4071 		case PAGE_STATE_INACTIVE:
4072 			queue = &sInactivePageQueue;
4073 			break;
4074 		case PAGE_STATE_MODIFIED:
4075 			queue = &sModifiedPageQueue;
4076 			break;
4077 		case PAGE_STATE_CACHED:
4078 			queue = &sCachedPageQueue;
4079 			break;
4080 		case PAGE_STATE_FREE:
4081 		case PAGE_STATE_CLEAR:
4082 			panic("vm_page_requeue() called for free/clear page %p", page);
4083 			return;
4084 		case PAGE_STATE_WIRED:
4085 		case PAGE_STATE_UNUSED:
4086 			return;
4087 		default:
4088 			panic("vm_page_touch: vm_page %p in invalid state %d\n",
4089 				page, page->State());
4090 			break;
4091 	}
4092 
4093 	queue->RequeueUnlocked(page, tail);
4094 }
4095 
4096 
4097 page_num_t
4098 vm_page_num_pages(void)
4099 {
4100 	return sNumPages - sNonExistingPages;
4101 }
4102 
4103 
4104 /*! There is a subtle distinction between the page counts returned by
4105 	this function and vm_page_num_free_pages():
4106 	The latter returns the number of pages that are completely uncommitted,
4107 	whereas this one returns the number of pages that are available for
4108 	use by being reclaimed as well (IOW it factors in things like cache pages
4109 	as available).
4110 */
4111 page_num_t
4112 vm_page_num_available_pages(void)
4113 {
4114 	return vm_available_memory() / B_PAGE_SIZE;
4115 }
4116 
4117 
4118 page_num_t
4119 vm_page_num_free_pages(void)
4120 {
4121 	int32 count = sUnreservedFreePages + sCachedPageQueue.Count();
4122 	return count > 0 ? count : 0;
4123 }
4124 
4125 
4126 page_num_t
4127 vm_page_num_unused_pages(void)
4128 {
4129 	int32 count = sUnreservedFreePages;
4130 	return count > 0 ? count : 0;
4131 }
4132 
4133 
4134 void
4135 vm_page_get_stats(system_info *info)
4136 {
4137 	// Note: there's no locking protecting any of the queues or counters here,
4138 	// so we run the risk of getting bogus values when evaluating them
4139 	// throughout this function. As these stats are for informational purposes
4140 	// only, it is not really worth introducing such locking. Therefore we just
4141 	// ensure that we don't under- or overflow any of the values.
4142 
4143 	// The pages used for the block cache buffers. Those should not be counted
4144 	// as used but as cached pages.
4145 	// TODO: We should subtract the blocks that are in use ATM, since those
4146 	// can't really be freed in a low memory situation.
4147 	page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE;
4148 	info->block_cache_pages = blockCachePages;
4149 
4150 	// Non-temporary modified pages are special as they represent pages that
4151 	// can be written back, so they could be freed if necessary, for us
4152 	// basically making them into cached pages with a higher overhead. The
4153 	// modified queue count is therefore split into temporary and non-temporary
4154 	// counts that are then added to the corresponding number.
4155 	page_num_t modifiedNonTemporaryPages
4156 		= (sModifiedPageQueue.Count() - sModifiedTemporaryPages);
4157 
4158 	info->max_pages = vm_page_num_pages();
4159 	info->cached_pages = sCachedPageQueue.Count() + modifiedNonTemporaryPages
4160 		+ blockCachePages;
4161 
4162 	// max_pages is composed of:
4163 	//	active + inactive + unused + wired + modified + cached + free + clear
4164 	// So taking out the cached (including modified non-temporary), free and
4165 	// clear ones leaves us with all used pages.
4166 	uint32 subtractPages = info->cached_pages + sFreePageQueue.Count()
4167 		+ sClearPageQueue.Count();
4168 	info->used_pages = subtractPages > info->max_pages
4169 		? 0 : info->max_pages - subtractPages;
4170 
4171 	if (info->used_pages + info->cached_pages > info->max_pages) {
4172 		// Something was shuffled around while we were summing up the counts.
4173 		// Make the values sane, preferring the worse case of more used pages.
4174 		info->cached_pages = info->max_pages - info->used_pages;
4175 	}
4176 
4177 	info->page_faults = vm_num_page_faults();
4178 	info->ignored_pages = sIgnoredPages;
4179 
4180 	// TODO: We don't consider pages used for page directories/tables yet.
4181 }
4182 
4183 
4184 /*!	Returns the greatest address within the last page of accessible physical
4185 	memory.
4186 	The value is inclusive, i.e. in case of a 32 bit phys_addr_t 0xffffffff
4187 	means the that the last page ends at exactly 4 GB.
4188 */
4189 phys_addr_t
4190 vm_page_max_address()
4191 {
4192 	return ((phys_addr_t)sPhysicalPageOffset + sNumPages) * B_PAGE_SIZE - 1;
4193 }
4194 
4195 
4196 RANGE_MARKER_FUNCTION_END(vm_page)
4197