xref: /haiku/src/system/kernel/vm/vm_page.cpp (revision 1026b0a1a76dc88927bb8175c470f638dc5464ee)
1 /*
2  * Copyright 2010-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <string.h>
12 #include <stdlib.h>
13 
14 #include <algorithm>
15 
16 #include <KernelExport.h>
17 #include <OS.h>
18 
19 #include <AutoDeleter.h>
20 
21 #include <arch/cpu.h>
22 #include <arch/vm_translation_map.h>
23 #include <block_cache.h>
24 #include <boot/kernel_args.h>
25 #include <condition_variable.h>
26 #include <elf.h>
27 #include <heap.h>
28 #include <kernel.h>
29 #include <low_resource_manager.h>
30 #include <thread.h>
31 #include <tracing.h>
32 #include <util/AutoLock.h>
33 #include <vfs.h>
34 #include <vm/vm.h>
35 #include <vm/vm_priv.h>
36 #include <vm/vm_page.h>
37 #include <vm/VMAddressSpace.h>
38 #include <vm/VMArea.h>
39 #include <vm/VMCache.h>
40 
41 #include "IORequest.h"
42 #include "PageCacheLocker.h"
43 #include "VMAnonymousCache.h"
44 #include "VMPageQueue.h"
45 
46 
47 //#define TRACE_VM_PAGE
48 #ifdef TRACE_VM_PAGE
49 #	define TRACE(x) dprintf x
50 #else
51 #	define TRACE(x) ;
52 #endif
53 
54 //#define TRACE_VM_DAEMONS
55 #ifdef TRACE_VM_DAEMONS
56 #define TRACE_DAEMON(x...) dprintf(x)
57 #else
58 #define TRACE_DAEMON(x...) do {} while (false)
59 #endif
60 
61 //#define TRACK_PAGE_USAGE_STATS	1
62 
63 #define PAGE_ASSERT(page, condition)	\
64 	ASSERT_PRINT((condition), "page: %p", (page))
65 
66 #define SCRUB_SIZE 16
67 	// this many pages will be cleared at once in the page scrubber thread
68 
69 #define MAX_PAGE_WRITER_IO_PRIORITY				B_URGENT_DISPLAY_PRIORITY
70 	// maximum I/O priority of the page writer
71 #define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD	10000
72 	// the maximum I/O priority shall be reached when this many pages need to
73 	// be written
74 
75 
76 // The page reserve an allocation of the certain priority must not touch.
77 static const size_t kPageReserveForPriority[] = {
78 	VM_PAGE_RESERVE_USER,		// user
79 	VM_PAGE_RESERVE_SYSTEM,		// system
80 	0							// VIP
81 };
82 
83 // Minimum number of free pages the page daemon will try to achieve.
84 static uint32 sFreePagesTarget;
85 static uint32 sFreeOrCachedPagesTarget;
86 static uint32 sInactivePagesTarget;
87 
88 // Wait interval between page daemon runs.
89 static const bigtime_t kIdleScanWaitInterval = 1000000LL;	// 1 sec
90 static const bigtime_t kBusyScanWaitInterval = 500000LL;	// 0.5 sec
91 
92 // Number of idle runs after which we want to have processed the full active
93 // queue.
94 static const uint32 kIdleRunsForFullQueue = 20;
95 
96 // Maximum limit for the vm_page::usage_count.
97 static const int32 kPageUsageMax = 64;
98 // vm_page::usage_count buff an accessed page receives in a scan.
99 static const int32 kPageUsageAdvance = 3;
100 // vm_page::usage_count debuff an unaccessed page receives in a scan.
101 static const int32 kPageUsageDecline = 1;
102 
103 int32 gMappedPagesCount;
104 
105 static VMPageQueue sPageQueues[PAGE_STATE_COUNT];
106 
107 static VMPageQueue& sFreePageQueue = sPageQueues[PAGE_STATE_FREE];
108 static VMPageQueue& sClearPageQueue = sPageQueues[PAGE_STATE_CLEAR];
109 static VMPageQueue& sModifiedPageQueue = sPageQueues[PAGE_STATE_MODIFIED];
110 static VMPageQueue& sInactivePageQueue = sPageQueues[PAGE_STATE_INACTIVE];
111 static VMPageQueue& sActivePageQueue = sPageQueues[PAGE_STATE_ACTIVE];
112 static VMPageQueue& sCachedPageQueue = sPageQueues[PAGE_STATE_CACHED];
113 
114 static vm_page *sPages;
115 static page_num_t sPhysicalPageOffset;
116 static page_num_t sNumPages;
117 static page_num_t sNonExistingPages;
118 	// pages in the sPages array that aren't backed by physical memory
119 static uint64 sIgnoredPages;
120 	// pages of physical memory ignored by the boot loader (and thus not
121 	// available here)
122 static vint32 sUnreservedFreePages;
123 static vint32 sUnsatisfiedPageReservations;
124 static vint32 sModifiedTemporaryPages;
125 
126 static ConditionVariable sFreePageCondition;
127 static mutex sPageDeficitLock = MUTEX_INITIALIZER("page deficit");
128 
129 // This lock must be used whenever the free or clear page queues are changed.
130 // If you need to work on both queues at the same time, you need to hold a write
131 // lock, otherwise, a read lock suffices (each queue still has a spinlock to
132 // guard against concurrent changes).
133 static rw_lock sFreePageQueuesLock
134 	= RW_LOCK_INITIALIZER("free/clear page queues");
135 
136 #ifdef TRACK_PAGE_USAGE_STATS
137 static page_num_t sPageUsageArrays[512];
138 static page_num_t* sPageUsage = sPageUsageArrays;
139 static page_num_t sPageUsagePageCount;
140 static page_num_t* sNextPageUsage = sPageUsageArrays + 256;
141 static page_num_t sNextPageUsagePageCount;
142 #endif
143 
144 
145 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
146 
147 struct caller_info {
148 	addr_t		caller;
149 	size_t		count;
150 };
151 
152 static const int32 kCallerInfoTableSize = 1024;
153 static caller_info sCallerInfoTable[kCallerInfoTableSize];
154 static int32 sCallerInfoCount = 0;
155 
156 static caller_info* get_caller_info(addr_t caller);
157 
158 
159 RANGE_MARKER_FUNCTION_PROTOTYPES(vm_page)
160 
161 static const addr_t kVMPageCodeAddressRange[] = {
162 	RANGE_MARKER_FUNCTION_ADDRESS_RANGE(vm_page)
163 };
164 
165 #endif
166 
167 
168 RANGE_MARKER_FUNCTION_BEGIN(vm_page)
169 
170 
171 struct page_stats {
172 	int32	totalFreePages;
173 	int32	unsatisfiedReservations;
174 	int32	cachedPages;
175 };
176 
177 
178 struct PageReservationWaiter
179 		: public DoublyLinkedListLinkImpl<PageReservationWaiter> {
180 	Thread*	thread;
181 	uint32	dontTouch;		// reserve not to touch
182 	uint32	missing;		// pages missing for the reservation
183 	int32	threadPriority;
184 
185 	bool operator<(const PageReservationWaiter& other) const
186 	{
187 		// Implies an order by descending VM priority (ascending dontTouch)
188 		// and (secondarily) descending thread priority.
189 		if (dontTouch != other.dontTouch)
190 			return dontTouch < other.dontTouch;
191 		return threadPriority > other.threadPriority;
192 	}
193 };
194 
195 typedef DoublyLinkedList<PageReservationWaiter> PageReservationWaiterList;
196 static PageReservationWaiterList sPageReservationWaiters;
197 
198 
199 struct DaemonCondition {
200 	void Init(const char* name)
201 	{
202 		mutex_init(&fLock, "daemon condition");
203 		fCondition.Init(this, name);
204 		fActivated = false;
205 	}
206 
207 	bool Lock()
208 	{
209 		return mutex_lock(&fLock) == B_OK;
210 	}
211 
212 	void Unlock()
213 	{
214 		mutex_unlock(&fLock);
215 	}
216 
217 	bool Wait(bigtime_t timeout, bool clearActivated)
218 	{
219 		MutexLocker locker(fLock);
220 		if (clearActivated)
221 			fActivated = false;
222 		else if (fActivated)
223 			return true;
224 
225 		ConditionVariableEntry entry;
226 		fCondition.Add(&entry);
227 
228 		locker.Unlock();
229 
230 		return entry.Wait(B_RELATIVE_TIMEOUT, timeout) == B_OK;
231 	}
232 
233 	void WakeUp()
234 	{
235 		if (fActivated)
236 			return;
237 
238 		MutexLocker locker(fLock);
239 		fActivated = true;
240 		fCondition.NotifyOne();
241 	}
242 
243 	void ClearActivated()
244 	{
245 		MutexLocker locker(fLock);
246 		fActivated = false;
247 	}
248 
249 private:
250 	mutex				fLock;
251 	ConditionVariable	fCondition;
252 	bool				fActivated;
253 };
254 
255 
256 static DaemonCondition sPageWriterCondition;
257 static DaemonCondition sPageDaemonCondition;
258 
259 
260 #if PAGE_ALLOCATION_TRACING
261 
262 namespace PageAllocationTracing {
263 
264 class ReservePages : public AbstractTraceEntry {
265 public:
266 	ReservePages(uint32 count)
267 		:
268 		fCount(count)
269 	{
270 		Initialized();
271 	}
272 
273 	virtual void AddDump(TraceOutput& out)
274 	{
275 		out.Print("page reserve:   %lu", fCount);
276 	}
277 
278 private:
279 	uint32		fCount;
280 };
281 
282 
283 class UnreservePages : public AbstractTraceEntry {
284 public:
285 	UnreservePages(uint32 count)
286 		:
287 		fCount(count)
288 	{
289 		Initialized();
290 	}
291 
292 	virtual void AddDump(TraceOutput& out)
293 	{
294 		out.Print("page unreserve: %lu", fCount);
295 	}
296 
297 private:
298 	uint32		fCount;
299 };
300 
301 
302 class AllocatePage
303 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
304 public:
305 	AllocatePage(page_num_t pageNumber)
306 		:
307 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
308 		fPageNumber(pageNumber)
309 	{
310 		Initialized();
311 	}
312 
313 	virtual void AddDump(TraceOutput& out)
314 	{
315 		out.Print("page alloc: %#" B_PRIxPHYSADDR, fPageNumber);
316 	}
317 
318 private:
319 	page_num_t	fPageNumber;
320 };
321 
322 
323 class AllocatePageRun
324 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
325 public:
326 	AllocatePageRun(page_num_t startPage, uint32 length)
327 		:
328 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
329 		fStartPage(startPage),
330 		fLength(length)
331 	{
332 		Initialized();
333 	}
334 
335 	virtual void AddDump(TraceOutput& out)
336 	{
337 		out.Print("page alloc run: start %#" B_PRIxPHYSADDR " length: %"
338 			B_PRIu32, fStartPage, fLength);
339 	}
340 
341 private:
342 	page_num_t	fStartPage;
343 	uint32		fLength;
344 };
345 
346 
347 class FreePage
348 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
349 public:
350 	FreePage(page_num_t pageNumber)
351 		:
352 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
353 		fPageNumber(pageNumber)
354 	{
355 		Initialized();
356 	}
357 
358 	virtual void AddDump(TraceOutput& out)
359 	{
360 		out.Print("page free: %#" B_PRIxPHYSADDR, fPageNumber);
361 	}
362 
363 private:
364 	page_num_t	fPageNumber;
365 };
366 
367 
368 class ScrubbingPages : public AbstractTraceEntry {
369 public:
370 	ScrubbingPages(uint32 count)
371 		:
372 		fCount(count)
373 	{
374 		Initialized();
375 	}
376 
377 	virtual void AddDump(TraceOutput& out)
378 	{
379 		out.Print("page scrubbing: %lu", fCount);
380 	}
381 
382 private:
383 	uint32		fCount;
384 };
385 
386 
387 class ScrubbedPages : public AbstractTraceEntry {
388 public:
389 	ScrubbedPages(uint32 count)
390 		:
391 		fCount(count)
392 	{
393 		Initialized();
394 	}
395 
396 	virtual void AddDump(TraceOutput& out)
397 	{
398 		out.Print("page scrubbed:  %lu", fCount);
399 	}
400 
401 private:
402 	uint32		fCount;
403 };
404 
405 
406 class StolenPage : public AbstractTraceEntry {
407 public:
408 	StolenPage()
409 	{
410 		Initialized();
411 	}
412 
413 	virtual void AddDump(TraceOutput& out)
414 	{
415 		out.Print("page stolen");
416 	}
417 };
418 
419 }	// namespace PageAllocationTracing
420 
421 #	define TA(x)	new(std::nothrow) PageAllocationTracing::x
422 
423 #else
424 #	define TA(x)
425 #endif	// PAGE_ALLOCATION_TRACING
426 
427 
428 #if PAGE_DAEMON_TRACING
429 
430 namespace PageDaemonTracing {
431 
432 class ActivatePage : public AbstractTraceEntry {
433 	public:
434 		ActivatePage(vm_page* page)
435 			:
436 			fCache(page->cache),
437 			fPage(page)
438 		{
439 			Initialized();
440 		}
441 
442 		virtual void AddDump(TraceOutput& out)
443 		{
444 			out.Print("page activated:   %p, cache: %p", fPage, fCache);
445 		}
446 
447 	private:
448 		VMCache*	fCache;
449 		vm_page*	fPage;
450 };
451 
452 
453 class DeactivatePage : public AbstractTraceEntry {
454 	public:
455 		DeactivatePage(vm_page* page)
456 			:
457 			fCache(page->cache),
458 			fPage(page)
459 		{
460 			Initialized();
461 		}
462 
463 		virtual void AddDump(TraceOutput& out)
464 		{
465 			out.Print("page deactivated: %p, cache: %p", fPage, fCache);
466 		}
467 
468 	private:
469 		VMCache*	fCache;
470 		vm_page*	fPage;
471 };
472 
473 
474 class FreedPageSwap : public AbstractTraceEntry {
475 	public:
476 		FreedPageSwap(vm_page* page)
477 			:
478 			fCache(page->cache),
479 			fPage(page)
480 		{
481 			Initialized();
482 		}
483 
484 		virtual void AddDump(TraceOutput& out)
485 		{
486 			out.Print("page swap freed:  %p, cache: %p", fPage, fCache);
487 		}
488 
489 	private:
490 		VMCache*	fCache;
491 		vm_page*	fPage;
492 };
493 
494 }	// namespace PageDaemonTracing
495 
496 #	define TD(x)	new(std::nothrow) PageDaemonTracing::x
497 
498 #else
499 #	define TD(x)
500 #endif	// PAGE_DAEMON_TRACING
501 
502 
503 #if PAGE_WRITER_TRACING
504 
505 namespace PageWriterTracing {
506 
507 class WritePage : public AbstractTraceEntry {
508 	public:
509 		WritePage(vm_page* page)
510 			:
511 			fCache(page->Cache()),
512 			fPage(page)
513 		{
514 			Initialized();
515 		}
516 
517 		virtual void AddDump(TraceOutput& out)
518 		{
519 			out.Print("page write: %p, cache: %p", fPage, fCache);
520 		}
521 
522 	private:
523 		VMCache*	fCache;
524 		vm_page*	fPage;
525 };
526 
527 }	// namespace PageWriterTracing
528 
529 #	define TPW(x)	new(std::nothrow) PageWriterTracing::x
530 
531 #else
532 #	define TPW(x)
533 #endif	// PAGE_WRITER_TRACING
534 
535 
536 #if PAGE_STATE_TRACING
537 
538 namespace PageStateTracing {
539 
540 class SetPageState : public AbstractTraceEntry {
541 	public:
542 		SetPageState(vm_page* page, uint8 newState)
543 			:
544 			fPage(page),
545 			fOldState(page->State()),
546 			fNewState(newState),
547 			fBusy(page->busy),
548 			fWired(page->WiredCount() > 0),
549 			fMapped(!page->mappings.IsEmpty()),
550 			fAccessed(page->accessed),
551 			fModified(page->modified)
552 		{
553 #if PAGE_STATE_TRACING_STACK_TRACE
554 			fStackTrace = capture_tracing_stack_trace(
555 				PAGE_STATE_TRACING_STACK_TRACE, 0, true);
556 				// Don't capture userland stack trace to avoid potential
557 				// deadlocks.
558 #endif
559 			Initialized();
560 		}
561 
562 #if PAGE_STATE_TRACING_STACK_TRACE
563 		virtual void DumpStackTrace(TraceOutput& out)
564 		{
565 			out.PrintStackTrace(fStackTrace);
566 		}
567 #endif
568 
569 		virtual void AddDump(TraceOutput& out)
570 		{
571 			out.Print("page set state: %p (%c%c%c%c%c): %s -> %s", fPage,
572 				fBusy ? 'b' : '-',
573 				fWired ? 'w' : '-',
574 				fMapped ? 'm' : '-',
575 				fAccessed ? 'a' : '-',
576 				fModified ? 'm' : '-',
577 				page_state_to_string(fOldState),
578 				page_state_to_string(fNewState));
579 		}
580 
581 	private:
582 		vm_page*	fPage;
583 #if PAGE_STATE_TRACING_STACK_TRACE
584 		tracing_stack_trace* fStackTrace;
585 #endif
586 		uint8		fOldState;
587 		uint8		fNewState;
588 		bool		fBusy : 1;
589 		bool		fWired : 1;
590 		bool		fMapped : 1;
591 		bool		fAccessed : 1;
592 		bool		fModified : 1;
593 };
594 
595 }	// namespace PageStateTracing
596 
597 #	define TPS(x)	new(std::nothrow) PageStateTracing::x
598 
599 #else
600 #	define TPS(x)
601 #endif	// PAGE_STATE_TRACING
602 
603 
604 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
605 
606 namespace BKernel {
607 
608 class AllocationTrackingCallback {
609 public:
610 	virtual						~AllocationTrackingCallback();
611 
612 	virtual	bool				ProcessTrackingInfo(
613 									AllocationTrackingInfo* info,
614 									page_num_t pageNumber) = 0;
615 };
616 
617 }
618 
619 using BKernel::AllocationTrackingCallback;
620 
621 
622 class AllocationCollectorCallback : public AllocationTrackingCallback {
623 public:
624 	AllocationCollectorCallback(bool resetInfos)
625 		:
626 		fResetInfos(resetInfos)
627 	{
628 	}
629 
630 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
631 		page_num_t pageNumber)
632 	{
633 		if (!info->IsInitialized())
634 			return true;
635 
636 		addr_t caller = 0;
637 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
638 
639 		if (traceEntry != NULL && info->IsTraceEntryValid()) {
640 			caller = tracing_find_caller_in_stack_trace(
641 				traceEntry->StackTrace(), kVMPageCodeAddressRange, 1);
642 		}
643 
644 		caller_info* callerInfo = get_caller_info(caller);
645 		if (callerInfo == NULL) {
646 			kprintf("out of space for caller infos\n");
647 			return false;
648 		}
649 
650 		callerInfo->count++;
651 
652 		if (fResetInfos)
653 			info->Clear();
654 
655 		return true;
656 	}
657 
658 private:
659 	bool	fResetInfos;
660 };
661 
662 
663 class AllocationInfoPrinterCallback : public AllocationTrackingCallback {
664 public:
665 	AllocationInfoPrinterCallback(bool printStackTrace, page_num_t pageFilter,
666 		team_id teamFilter, thread_id threadFilter)
667 		:
668 		fPrintStackTrace(printStackTrace),
669 		fPageFilter(pageFilter),
670 		fTeamFilter(teamFilter),
671 		fThreadFilter(threadFilter)
672 	{
673 	}
674 
675 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
676 		page_num_t pageNumber)
677 	{
678 		if (!info->IsInitialized())
679 			return true;
680 
681 		if (fPageFilter != 0 && pageNumber != fPageFilter)
682 			return true;
683 
684 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
685 		if (traceEntry != NULL && !info->IsTraceEntryValid())
686 			traceEntry = NULL;
687 
688 		if (traceEntry != NULL) {
689 			if (fTeamFilter != -1 && traceEntry->TeamID() != fTeamFilter)
690 				return true;
691 			if (fThreadFilter != -1 && traceEntry->ThreadID() != fThreadFilter)
692 				return true;
693 		} else {
694 			// we need the info if we have filters set
695 			if (fTeamFilter != -1 || fThreadFilter != -1)
696 				return true;
697 		}
698 
699 		kprintf("page number %#" B_PRIxPHYSADDR, pageNumber);
700 
701 		if (traceEntry != NULL) {
702 			kprintf(", team: %" B_PRId32 ", thread %" B_PRId32
703 				", time %" B_PRId64 "\n", traceEntry->TeamID(),
704 				traceEntry->ThreadID(), traceEntry->Time());
705 
706 			if (fPrintStackTrace)
707 				tracing_print_stack_trace(traceEntry->StackTrace());
708 		} else
709 			kprintf("\n");
710 
711 		return true;
712 	}
713 
714 private:
715 	bool		fPrintStackTrace;
716 	page_num_t	fPageFilter;
717 	team_id		fTeamFilter;
718 	thread_id	fThreadFilter;
719 };
720 
721 
722 class AllocationDetailPrinterCallback : public AllocationTrackingCallback {
723 public:
724 	AllocationDetailPrinterCallback(addr_t caller)
725 		:
726 		fCaller(caller)
727 	{
728 	}
729 
730 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
731 		page_num_t pageNumber)
732 	{
733 		if (!info->IsInitialized())
734 			return true;
735 
736 		addr_t caller = 0;
737 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
738 		if (traceEntry != NULL && !info->IsTraceEntryValid())
739 			traceEntry = NULL;
740 
741 		if (traceEntry != NULL) {
742 			caller = tracing_find_caller_in_stack_trace(
743 				traceEntry->StackTrace(), kVMPageCodeAddressRange, 1);
744 		}
745 
746 		if (caller != fCaller)
747 			return true;
748 
749 		kprintf("page %#" B_PRIxPHYSADDR "\n", pageNumber);
750 		if (traceEntry != NULL)
751 			tracing_print_stack_trace(traceEntry->StackTrace());
752 
753 		return true;
754 	}
755 
756 private:
757 	addr_t	fCaller;
758 };
759 
760 #endif	// VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
761 
762 
763 static int
764 find_page(int argc, char **argv)
765 {
766 	struct vm_page *page;
767 	addr_t address;
768 	int32 index = 1;
769 	int i;
770 
771 	struct {
772 		const char*	name;
773 		VMPageQueue*	queue;
774 	} pageQueueInfos[] = {
775 		{ "free",		&sFreePageQueue },
776 		{ "clear",		&sClearPageQueue },
777 		{ "modified",	&sModifiedPageQueue },
778 		{ "active",		&sActivePageQueue },
779 		{ "inactive",	&sInactivePageQueue },
780 		{ "cached",		&sCachedPageQueue },
781 		{ NULL, NULL }
782 	};
783 
784 	if (argc < 2
785 		|| strlen(argv[index]) <= 2
786 		|| argv[index][0] != '0'
787 		|| argv[index][1] != 'x') {
788 		kprintf("usage: find_page <address>\n");
789 		return 0;
790 	}
791 
792 	address = strtoul(argv[index], NULL, 0);
793 	page = (vm_page*)address;
794 
795 	for (i = 0; pageQueueInfos[i].name; i++) {
796 		VMPageQueue::Iterator it = pageQueueInfos[i].queue->GetIterator();
797 		while (vm_page* p = it.Next()) {
798 			if (p == page) {
799 				kprintf("found page %p in queue %p (%s)\n", page,
800 					pageQueueInfos[i].queue, pageQueueInfos[i].name);
801 				return 0;
802 			}
803 		}
804 	}
805 
806 	kprintf("page %p isn't in any queue\n", page);
807 
808 	return 0;
809 }
810 
811 
812 const char *
813 page_state_to_string(int state)
814 {
815 	switch(state) {
816 		case PAGE_STATE_ACTIVE:
817 			return "active";
818 		case PAGE_STATE_INACTIVE:
819 			return "inactive";
820 		case PAGE_STATE_MODIFIED:
821 			return "modified";
822 		case PAGE_STATE_CACHED:
823 			return "cached";
824 		case PAGE_STATE_FREE:
825 			return "free";
826 		case PAGE_STATE_CLEAR:
827 			return "clear";
828 		case PAGE_STATE_WIRED:
829 			return "wired";
830 		case PAGE_STATE_UNUSED:
831 			return "unused";
832 		default:
833 			return "unknown";
834 	}
835 }
836 
837 
838 static int
839 dump_page(int argc, char **argv)
840 {
841 	bool addressIsPointer = true;
842 	bool physical = false;
843 	bool searchMappings = false;
844 	int32 index = 1;
845 
846 	while (index < argc) {
847 		if (argv[index][0] != '-')
848 			break;
849 
850 		if (!strcmp(argv[index], "-p")) {
851 			addressIsPointer = false;
852 			physical = true;
853 		} else if (!strcmp(argv[index], "-v")) {
854 			addressIsPointer = false;
855 		} else if (!strcmp(argv[index], "-m")) {
856 			searchMappings = true;
857 		} else {
858 			print_debugger_command_usage(argv[0]);
859 			return 0;
860 		}
861 
862 		index++;
863 	}
864 
865 	if (index + 1 != argc) {
866 		print_debugger_command_usage(argv[0]);
867 		return 0;
868 	}
869 
870 	uint64 value;
871 	if (!evaluate_debug_expression(argv[index], &value, false))
872 		return 0;
873 
874 	uint64 pageAddress = value;
875 	struct vm_page* page;
876 
877 	if (addressIsPointer) {
878 		page = (struct vm_page *)(addr_t)pageAddress;
879 	} else {
880 		if (!physical) {
881 			VMAddressSpace *addressSpace = VMAddressSpace::Kernel();
882 
883 			if (debug_get_debugged_thread()->team->address_space != NULL)
884 				addressSpace = debug_get_debugged_thread()->team->address_space;
885 
886 			uint32 flags = 0;
887 			phys_addr_t physicalAddress;
888 			if (addressSpace->TranslationMap()->QueryInterrupt(pageAddress,
889 					&physicalAddress, &flags) != B_OK
890 				|| (flags & PAGE_PRESENT) == 0) {
891 				kprintf("Virtual address not mapped to a physical page in this "
892 					"address space.\n");
893 				return 0;
894 			}
895 			pageAddress = physicalAddress;
896 		}
897 
898 		page = vm_lookup_page(pageAddress / B_PAGE_SIZE);
899 	}
900 
901 	kprintf("PAGE: %p\n", page);
902 	kprintf("queue_next,prev: %p, %p\n", page->queue_link.next,
903 		page->queue_link.previous);
904 	kprintf("physical_number: %#" B_PRIxPHYSADDR "\n",
905 		page->physical_page_number);
906 	kprintf("cache:           %p\n", page->Cache());
907 	kprintf("cache_offset:    %" B_PRIuPHYSADDR "\n", page->cache_offset);
908 	kprintf("cache_next:      %p\n", page->cache_next);
909 	kprintf("state:           %s\n", page_state_to_string(page->State()));
910 	kprintf("wired_count:     %d\n", page->WiredCount());
911 	kprintf("usage_count:     %d\n", page->usage_count);
912 	kprintf("busy:            %d\n", page->busy);
913 	kprintf("busy_writing:    %d\n", page->busy_writing);
914 	kprintf("accessed:        %d\n", page->accessed);
915 	kprintf("modified:        %d\n", page->modified);
916 	#if DEBUG_PAGE_QUEUE
917 		kprintf("queue:           %p\n", page->queue);
918 	#endif
919 	#if DEBUG_PAGE_ACCESS
920 		kprintf("accessor:        %" B_PRId32 "\n", page->accessing_thread);
921 	#endif
922 	kprintf("area mappings:\n");
923 
924 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
925 	vm_page_mapping *mapping;
926 	while ((mapping = iterator.Next()) != NULL) {
927 		kprintf("  %p (%" B_PRId32 ")\n", mapping->area, mapping->area->id);
928 		mapping = mapping->page_link.next;
929 	}
930 
931 	if (searchMappings) {
932 		kprintf("all mappings:\n");
933 		VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
934 		while (addressSpace != NULL) {
935 			size_t pageCount = addressSpace->Size() / B_PAGE_SIZE;
936 			for (addr_t address = addressSpace->Base(); pageCount != 0;
937 					address += B_PAGE_SIZE, pageCount--) {
938 				phys_addr_t physicalAddress;
939 				uint32 flags = 0;
940 				if (addressSpace->TranslationMap()->QueryInterrupt(address,
941 						&physicalAddress, &flags) == B_OK
942 					&& (flags & PAGE_PRESENT) != 0
943 					&& physicalAddress / B_PAGE_SIZE
944 						== page->physical_page_number) {
945 					VMArea* area = addressSpace->LookupArea(address);
946 					kprintf("  aspace %ld, area %ld: %#" B_PRIxADDR
947 						" (%c%c%s%s)\n", addressSpace->ID(),
948 						area != NULL ? area->id : -1, address,
949 						(flags & B_KERNEL_READ_AREA) != 0 ? 'r' : '-',
950 						(flags & B_KERNEL_WRITE_AREA) != 0 ? 'w' : '-',
951 						(flags & PAGE_MODIFIED) != 0 ? " modified" : "",
952 						(flags & PAGE_ACCESSED) != 0 ? " accessed" : "");
953 				}
954 			}
955 			addressSpace = VMAddressSpace::DebugNext(addressSpace);
956 		}
957 	}
958 
959 	set_debug_variable("_cache", (addr_t)page->Cache());
960 	#if DEBUG_PAGE_ACCESS
961 		set_debug_variable("_accessor", page->accessing_thread);
962 	#endif
963 
964 	return 0;
965 }
966 
967 
968 static int
969 dump_page_queue(int argc, char **argv)
970 {
971 	struct VMPageQueue *queue;
972 
973 	if (argc < 2) {
974 		kprintf("usage: page_queue <address/name> [list]\n");
975 		return 0;
976 	}
977 
978 	if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x')
979 		queue = (VMPageQueue*)strtoul(argv[1], NULL, 16);
980 	if (!strcmp(argv[1], "free"))
981 		queue = &sFreePageQueue;
982 	else if (!strcmp(argv[1], "clear"))
983 		queue = &sClearPageQueue;
984 	else if (!strcmp(argv[1], "modified"))
985 		queue = &sModifiedPageQueue;
986 	else if (!strcmp(argv[1], "active"))
987 		queue = &sActivePageQueue;
988 	else if (!strcmp(argv[1], "inactive"))
989 		queue = &sInactivePageQueue;
990 	else if (!strcmp(argv[1], "cached"))
991 		queue = &sCachedPageQueue;
992 	else {
993 		kprintf("page_queue: unknown queue \"%s\".\n", argv[1]);
994 		return 0;
995 	}
996 
997 	kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %"
998 		B_PRIuPHYSADDR "\n", queue, queue->Head(), queue->Tail(),
999 		queue->Count());
1000 
1001 	if (argc == 3) {
1002 		struct vm_page *page = queue->Head();
1003 		const char *type = "none";
1004 		int i;
1005 
1006 		if (page->Cache() != NULL) {
1007 			switch (page->Cache()->type) {
1008 				case CACHE_TYPE_RAM:
1009 					type = "RAM";
1010 					break;
1011 				case CACHE_TYPE_DEVICE:
1012 					type = "device";
1013 					break;
1014 				case CACHE_TYPE_VNODE:
1015 					type = "vnode";
1016 					break;
1017 				case CACHE_TYPE_NULL:
1018 					type = "null";
1019 					break;
1020 				default:
1021 					type = "???";
1022 					break;
1023 			}
1024 		}
1025 
1026 		kprintf("page        cache       type       state  wired  usage\n");
1027 		for (i = 0; page; i++, page = queue->Next(page)) {
1028 			kprintf("%p  %p  %-7s %8s  %5d  %5d\n", page, page->Cache(),
1029 				type, page_state_to_string(page->State()),
1030 				page->WiredCount(), page->usage_count);
1031 		}
1032 	}
1033 	return 0;
1034 }
1035 
1036 
1037 static int
1038 dump_page_stats(int argc, char **argv)
1039 {
1040 	page_num_t swappableModified = 0;
1041 	page_num_t swappableModifiedInactive = 0;
1042 
1043 	size_t counter[8];
1044 	size_t busyCounter[8];
1045 	memset(counter, 0, sizeof(counter));
1046 	memset(busyCounter, 0, sizeof(busyCounter));
1047 
1048 	struct page_run {
1049 		page_num_t	start;
1050 		page_num_t	end;
1051 
1052 		page_num_t Length() const	{ return end - start; }
1053 	};
1054 
1055 	page_run currentFreeRun = { 0, 0 };
1056 	page_run currentCachedRun = { 0, 0 };
1057 	page_run longestFreeRun = { 0, 0 };
1058 	page_run longestCachedRun = { 0, 0 };
1059 
1060 	for (page_num_t i = 0; i < sNumPages; i++) {
1061 		if (sPages[i].State() > 7) {
1062 			panic("page %" B_PRIuPHYSADDR " at %p has invalid state!\n", i,
1063 				&sPages[i]);
1064 		}
1065 
1066 		uint32 pageState = sPages[i].State();
1067 
1068 		counter[pageState]++;
1069 		if (sPages[i].busy)
1070 			busyCounter[pageState]++;
1071 
1072 		if (pageState == PAGE_STATE_MODIFIED
1073 			&& sPages[i].Cache() != NULL
1074 			&& sPages[i].Cache()->temporary && sPages[i].WiredCount() == 0) {
1075 			swappableModified++;
1076 			if (sPages[i].usage_count == 0)
1077 				swappableModifiedInactive++;
1078 		}
1079 
1080 		// track free and cached pages runs
1081 		if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) {
1082 			currentFreeRun.end = i + 1;
1083 			currentCachedRun.end = i + 1;
1084 		} else {
1085 			if (currentFreeRun.Length() > longestFreeRun.Length())
1086 				longestFreeRun = currentFreeRun;
1087 			currentFreeRun.start = currentFreeRun.end = i + 1;
1088 
1089 			if (pageState == PAGE_STATE_CACHED) {
1090 				currentCachedRun.end = i + 1;
1091 			} else {
1092 				if (currentCachedRun.Length() > longestCachedRun.Length())
1093 					longestCachedRun = currentCachedRun;
1094 				currentCachedRun.start = currentCachedRun.end = i + 1;
1095 			}
1096 		}
1097 	}
1098 
1099 	kprintf("page stats:\n");
1100 	kprintf("total: %" B_PRIuPHYSADDR "\n", sNumPages);
1101 
1102 	kprintf("active: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1103 		counter[PAGE_STATE_ACTIVE], busyCounter[PAGE_STATE_ACTIVE]);
1104 	kprintf("inactive: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1105 		counter[PAGE_STATE_INACTIVE], busyCounter[PAGE_STATE_INACTIVE]);
1106 	kprintf("cached: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1107 		counter[PAGE_STATE_CACHED], busyCounter[PAGE_STATE_CACHED]);
1108 	kprintf("unused: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1109 		counter[PAGE_STATE_UNUSED], busyCounter[PAGE_STATE_UNUSED]);
1110 	kprintf("wired: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1111 		counter[PAGE_STATE_WIRED], busyCounter[PAGE_STATE_WIRED]);
1112 	kprintf("modified: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1113 		counter[PAGE_STATE_MODIFIED], busyCounter[PAGE_STATE_MODIFIED]);
1114 	kprintf("free: %" B_PRIuSIZE "\n", counter[PAGE_STATE_FREE]);
1115 	kprintf("clear: %" B_PRIuSIZE "\n", counter[PAGE_STATE_CLEAR]);
1116 
1117 	kprintf("unreserved free pages: %" B_PRId32 "\n", sUnreservedFreePages);
1118 	kprintf("unsatisfied page reservations: %" B_PRId32 "\n",
1119 		sUnsatisfiedPageReservations);
1120 	kprintf("mapped pages: %lu\n", gMappedPagesCount);
1121 	kprintf("longest free pages run: %" B_PRIuPHYSADDR " pages (at %"
1122 		B_PRIuPHYSADDR ")\n", longestFreeRun.Length(),
1123 		sPages[longestFreeRun.start].physical_page_number);
1124 	kprintf("longest free/cached pages run: %" B_PRIuPHYSADDR " pages (at %"
1125 		B_PRIuPHYSADDR ")\n", longestCachedRun.Length(),
1126 		sPages[longestCachedRun.start].physical_page_number);
1127 
1128 	kprintf("waiting threads:\n");
1129 	for (PageReservationWaiterList::Iterator it
1130 			= sPageReservationWaiters.GetIterator();
1131 		PageReservationWaiter* waiter = it.Next();) {
1132 		kprintf("  %6" B_PRId32 ": missing: %6" B_PRIu32
1133 			", don't touch: %6" B_PRIu32 "\n", waiter->thread->id,
1134 			waiter->missing, waiter->dontTouch);
1135 	}
1136 
1137 	kprintf("\nfree queue: %p, count = %" B_PRIuPHYSADDR "\n", &sFreePageQueue,
1138 		sFreePageQueue.Count());
1139 	kprintf("clear queue: %p, count = %" B_PRIuPHYSADDR "\n", &sClearPageQueue,
1140 		sClearPageQueue.Count());
1141 	kprintf("modified queue: %p, count = %" B_PRIuPHYSADDR " (%" B_PRId32
1142 		" temporary, %" B_PRIuPHYSADDR " swappable, " "inactive: %"
1143 		B_PRIuPHYSADDR ")\n", &sModifiedPageQueue, sModifiedPageQueue.Count(),
1144 		sModifiedTemporaryPages, swappableModified, swappableModifiedInactive);
1145 	kprintf("active queue: %p, count = %" B_PRIuPHYSADDR "\n",
1146 		&sActivePageQueue, sActivePageQueue.Count());
1147 	kprintf("inactive queue: %p, count = %" B_PRIuPHYSADDR "\n",
1148 		&sInactivePageQueue, sInactivePageQueue.Count());
1149 	kprintf("cached queue: %p, count = %" B_PRIuPHYSADDR "\n",
1150 		&sCachedPageQueue, sCachedPageQueue.Count());
1151 	return 0;
1152 }
1153 
1154 
1155 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1156 
1157 static caller_info*
1158 get_caller_info(addr_t caller)
1159 {
1160 	// find the caller info
1161 	for (int32 i = 0; i < sCallerInfoCount; i++) {
1162 		if (caller == sCallerInfoTable[i].caller)
1163 			return &sCallerInfoTable[i];
1164 	}
1165 
1166 	// not found, add a new entry, if there are free slots
1167 	if (sCallerInfoCount >= kCallerInfoTableSize)
1168 		return NULL;
1169 
1170 	caller_info* info = &sCallerInfoTable[sCallerInfoCount++];
1171 	info->caller = caller;
1172 	info->count = 0;
1173 
1174 	return info;
1175 }
1176 
1177 
1178 static int
1179 caller_info_compare_count(const void* _a, const void* _b)
1180 {
1181 	const caller_info* a = (const caller_info*)_a;
1182 	const caller_info* b = (const caller_info*)_b;
1183 	return (int)(b->count - a->count);
1184 }
1185 
1186 
1187 static int
1188 dump_page_allocations_per_caller(int argc, char** argv)
1189 {
1190 	bool resetAllocationInfos = false;
1191 	bool printDetails = false;
1192 	addr_t caller = 0;
1193 
1194 	for (int32 i = 1; i < argc; i++) {
1195 		if (strcmp(argv[i], "-d") == 0) {
1196 			uint64 callerAddress;
1197 			if (++i >= argc
1198 				|| !evaluate_debug_expression(argv[i], &callerAddress, true)) {
1199 				print_debugger_command_usage(argv[0]);
1200 				return 0;
1201 			}
1202 
1203 			caller = callerAddress;
1204 			printDetails = true;
1205 		} else if (strcmp(argv[i], "-r") == 0) {
1206 			resetAllocationInfos = true;
1207 		} else {
1208 			print_debugger_command_usage(argv[0]);
1209 			return 0;
1210 		}
1211 	}
1212 
1213 	sCallerInfoCount = 0;
1214 
1215 	AllocationCollectorCallback collectorCallback(resetAllocationInfos);
1216 	AllocationDetailPrinterCallback detailsCallback(caller);
1217 	AllocationTrackingCallback& callback = printDetails
1218 		? (AllocationTrackingCallback&)detailsCallback
1219 		: (AllocationTrackingCallback&)collectorCallback;
1220 
1221 	for (page_num_t i = 0; i < sNumPages; i++)
1222 		callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i);
1223 
1224 	if (printDetails)
1225 		return 0;
1226 
1227 	// sort the array
1228 	qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info),
1229 		&caller_info_compare_count);
1230 
1231 	kprintf("%" B_PRId32 " different callers\n\n", sCallerInfoCount);
1232 
1233 	size_t totalAllocationCount = 0;
1234 
1235 	kprintf("     count      caller\n");
1236 	kprintf("----------------------------------\n");
1237 	for (int32 i = 0; i < sCallerInfoCount; i++) {
1238 		caller_info& info = sCallerInfoTable[i];
1239 		kprintf("%10" B_PRIuSIZE "  %p", info.count, (void*)info.caller);
1240 
1241 		const char* symbol;
1242 		const char* imageName;
1243 		bool exactMatch;
1244 		addr_t baseAddress;
1245 
1246 		if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol,
1247 				&imageName, &exactMatch) == B_OK) {
1248 			kprintf("  %s + %#" B_PRIxADDR " (%s)%s\n", symbol,
1249 				info.caller - baseAddress, imageName,
1250 				exactMatch ? "" : " (nearest)");
1251 		} else
1252 			kprintf("\n");
1253 
1254 		totalAllocationCount += info.count;
1255 	}
1256 
1257 	kprintf("\ntotal page allocations: %" B_PRIuSIZE "\n",
1258 		totalAllocationCount);
1259 
1260 	return 0;
1261 }
1262 
1263 
1264 static int
1265 dump_page_allocation_infos(int argc, char** argv)
1266 {
1267 	page_num_t pageFilter = 0;
1268 	team_id teamFilter = -1;
1269 	thread_id threadFilter = -1;
1270 	bool printStackTraces = false;
1271 
1272 	for (int32 i = 1; i < argc; i++) {
1273 		if (strcmp(argv[i], "--stacktrace") == 0)
1274 			printStackTraces = true;
1275 		else if (strcmp(argv[i], "-p") == 0) {
1276 			uint64 pageNumber;
1277 			if (++i >= argc
1278 				|| !evaluate_debug_expression(argv[i], &pageNumber, true)) {
1279 				print_debugger_command_usage(argv[0]);
1280 				return 0;
1281 			}
1282 
1283 			pageFilter = pageNumber;
1284 		} else if (strcmp(argv[i], "--team") == 0) {
1285 			uint64 team;
1286 			if (++i >= argc
1287 				|| !evaluate_debug_expression(argv[i], &team, true)) {
1288 				print_debugger_command_usage(argv[0]);
1289 				return 0;
1290 			}
1291 
1292 			teamFilter = team;
1293 		} else if (strcmp(argv[i], "--thread") == 0) {
1294 			uint64 thread;
1295 			if (++i >= argc
1296 				|| !evaluate_debug_expression(argv[i], &thread, true)) {
1297 				print_debugger_command_usage(argv[0]);
1298 				return 0;
1299 			}
1300 
1301 			threadFilter = thread;
1302 		} else {
1303 			print_debugger_command_usage(argv[0]);
1304 			return 0;
1305 		}
1306 	}
1307 
1308 	AllocationInfoPrinterCallback callback(printStackTraces, pageFilter,
1309 		teamFilter, threadFilter);
1310 
1311 	for (page_num_t i = 0; i < sNumPages; i++)
1312 		callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i);
1313 
1314 	return 0;
1315 }
1316 
1317 #endif	// VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1318 
1319 
1320 #ifdef TRACK_PAGE_USAGE_STATS
1321 
1322 static void
1323 track_page_usage(vm_page* page)
1324 {
1325 	if (page->WiredCount() == 0) {
1326 		sNextPageUsage[(int32)page->usage_count + 128]++;
1327 		sNextPageUsagePageCount++;
1328 	}
1329 }
1330 
1331 
1332 static void
1333 update_page_usage_stats()
1334 {
1335 	std::swap(sPageUsage, sNextPageUsage);
1336 	sPageUsagePageCount = sNextPageUsagePageCount;
1337 
1338 	memset(sNextPageUsage, 0, sizeof(page_num_t) * 256);
1339 	sNextPageUsagePageCount = 0;
1340 
1341 	// compute average
1342 	if (sPageUsagePageCount > 0) {
1343 		int64 sum = 0;
1344 		for (int32 i = 0; i < 256; i++)
1345 			sum += (int64)sPageUsage[i] * (i - 128);
1346 
1347 		TRACE_DAEMON("average page usage: %f (%lu pages)\n",
1348 			(float)sum / sPageUsagePageCount, sPageUsagePageCount);
1349 	}
1350 }
1351 
1352 
1353 static int
1354 dump_page_usage_stats(int argc, char** argv)
1355 {
1356 	kprintf("distribution of page usage counts (%lu pages):",
1357 		sPageUsagePageCount);
1358 
1359 	int64 sum = 0;
1360 	for (int32 i = 0; i < 256; i++) {
1361 		if (i % 8 == 0)
1362 			kprintf("\n%4ld:", i - 128);
1363 
1364 		int64 count = sPageUsage[i];
1365 		sum += count * (i - 128);
1366 
1367 		kprintf("  %9llu", count);
1368 	}
1369 
1370 	kprintf("\n\n");
1371 
1372 	kprintf("average usage count: %f\n",
1373 		sPageUsagePageCount > 0 ? (float)sum / sPageUsagePageCount : 0);
1374 
1375 	return 0;
1376 }
1377 
1378 #endif	// TRACK_PAGE_USAGE_STATS
1379 
1380 
1381 // #pragma mark - vm_page
1382 
1383 
1384 inline void
1385 vm_page::InitState(uint8 newState)
1386 {
1387 	state = newState;
1388 }
1389 
1390 
1391 inline void
1392 vm_page::SetState(uint8 newState)
1393 {
1394 	TPS(SetPageState(this, newState));
1395 
1396 	state = newState;
1397 }
1398 
1399 
1400 // #pragma mark -
1401 
1402 
1403 static void
1404 get_page_stats(page_stats& _pageStats)
1405 {
1406 	_pageStats.totalFreePages = sUnreservedFreePages;
1407 	_pageStats.cachedPages = sCachedPageQueue.Count();
1408 	_pageStats.unsatisfiedReservations = sUnsatisfiedPageReservations;
1409 	// TODO: We don't get an actual snapshot here!
1410 }
1411 
1412 
1413 static bool
1414 do_active_paging(const page_stats& pageStats)
1415 {
1416 	return pageStats.totalFreePages + pageStats.cachedPages
1417 		< pageStats.unsatisfiedReservations
1418 			+ (int32)sFreeOrCachedPagesTarget;
1419 }
1420 
1421 
1422 /*!	Reserves as many pages as possible from \c sUnreservedFreePages up to
1423 	\a count. Doesn't touch the last \a dontTouch pages of
1424 	\c sUnreservedFreePages, though.
1425 	\return The number of actually reserved pages.
1426 */
1427 static uint32
1428 reserve_some_pages(uint32 count, uint32 dontTouch)
1429 {
1430 	while (true) {
1431 		int32 freePages = sUnreservedFreePages;
1432 		if (freePages <= (int32)dontTouch)
1433 			return 0;
1434 
1435 		int32 toReserve = std::min(count, freePages - dontTouch);
1436 		if (atomic_test_and_set(&sUnreservedFreePages,
1437 					freePages - toReserve, freePages)
1438 				== freePages) {
1439 			return toReserve;
1440 		}
1441 
1442 		// the count changed in the meantime -- retry
1443 	}
1444 }
1445 
1446 
1447 static void
1448 wake_up_page_reservation_waiters()
1449 {
1450 	MutexLocker pageDeficitLocker(sPageDeficitLock);
1451 
1452 	// TODO: If this is a low priority thread, we might want to disable
1453 	// interrupts or otherwise ensure that we aren't unscheduled. Otherwise
1454 	// high priority threads wait be kept waiting while a medium priority thread
1455 	// prevents us from running.
1456 
1457 	while (PageReservationWaiter* waiter = sPageReservationWaiters.Head()) {
1458 		int32 reserved = reserve_some_pages(waiter->missing,
1459 			waiter->dontTouch);
1460 		if (reserved == 0)
1461 			return;
1462 
1463 		atomic_add(&sUnsatisfiedPageReservations, -reserved);
1464 		waiter->missing -= reserved;
1465 
1466 		if (waiter->missing > 0)
1467 			return;
1468 
1469 		sPageReservationWaiters.Remove(waiter);
1470 
1471 		InterruptsSpinLocker schedulerLocker(gSchedulerLock);
1472 		thread_unblock_locked(waiter->thread, B_OK);
1473 	}
1474 }
1475 
1476 
1477 static inline void
1478 unreserve_pages(uint32 count)
1479 {
1480 	atomic_add(&sUnreservedFreePages, count);
1481 	if (sUnsatisfiedPageReservations != 0)
1482 		wake_up_page_reservation_waiters();
1483 }
1484 
1485 
1486 static void
1487 free_page(vm_page* page, bool clear)
1488 {
1489 	DEBUG_PAGE_ACCESS_CHECK(page);
1490 
1491 	PAGE_ASSERT(page, !page->IsMapped());
1492 
1493 	VMPageQueue* fromQueue;
1494 
1495 	switch (page->State()) {
1496 		case PAGE_STATE_ACTIVE:
1497 			fromQueue = &sActivePageQueue;
1498 			break;
1499 		case PAGE_STATE_INACTIVE:
1500 			fromQueue = &sInactivePageQueue;
1501 			break;
1502 		case PAGE_STATE_MODIFIED:
1503 			fromQueue = &sModifiedPageQueue;
1504 			break;
1505 		case PAGE_STATE_CACHED:
1506 			fromQueue = &sCachedPageQueue;
1507 			break;
1508 		case PAGE_STATE_FREE:
1509 		case PAGE_STATE_CLEAR:
1510 			panic("free_page(): page %p already free", page);
1511 			return;
1512 		case PAGE_STATE_WIRED:
1513 		case PAGE_STATE_UNUSED:
1514 			fromQueue = NULL;
1515 			break;
1516 		default:
1517 			panic("free_page(): page %p in invalid state %d",
1518 				page, page->State());
1519 			return;
1520 	}
1521 
1522 	if (page->CacheRef() != NULL)
1523 		panic("to be freed page %p has cache", page);
1524 	if (page->IsMapped())
1525 		panic("to be freed page %p has mappings", page);
1526 
1527 	if (fromQueue != NULL)
1528 		fromQueue->RemoveUnlocked(page);
1529 
1530 	TA(FreePage(page->physical_page_number));
1531 
1532 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1533 	page->allocation_tracking_info.Clear();
1534 #endif
1535 
1536 	ReadLocker locker(sFreePageQueuesLock);
1537 
1538 	DEBUG_PAGE_ACCESS_END(page);
1539 
1540 	if (clear) {
1541 		page->SetState(PAGE_STATE_CLEAR);
1542 		sClearPageQueue.PrependUnlocked(page);
1543 	} else {
1544 		page->SetState(PAGE_STATE_FREE);
1545 		sFreePageQueue.PrependUnlocked(page);
1546 	}
1547 
1548 	locker.Unlock();
1549 
1550 	unreserve_pages(1);
1551 }
1552 
1553 
1554 /*!	The caller must make sure that no-one else tries to change the page's state
1555 	while the function is called. If the page has a cache, this can be done by
1556 	locking the cache.
1557 */
1558 static void
1559 set_page_state(vm_page *page, int pageState)
1560 {
1561 	DEBUG_PAGE_ACCESS_CHECK(page);
1562 
1563 	if (pageState == page->State())
1564 		return;
1565 
1566 	VMPageQueue* fromQueue;
1567 
1568 	switch (page->State()) {
1569 		case PAGE_STATE_ACTIVE:
1570 			fromQueue = &sActivePageQueue;
1571 			break;
1572 		case PAGE_STATE_INACTIVE:
1573 			fromQueue = &sInactivePageQueue;
1574 			break;
1575 		case PAGE_STATE_MODIFIED:
1576 			fromQueue = &sModifiedPageQueue;
1577 			break;
1578 		case PAGE_STATE_CACHED:
1579 			fromQueue = &sCachedPageQueue;
1580 			break;
1581 		case PAGE_STATE_FREE:
1582 		case PAGE_STATE_CLEAR:
1583 			panic("set_page_state(): page %p is free/clear", page);
1584 			return;
1585 		case PAGE_STATE_WIRED:
1586 		case PAGE_STATE_UNUSED:
1587 			fromQueue = NULL;
1588 			break;
1589 		default:
1590 			panic("set_page_state(): page %p in invalid state %d",
1591 				page, page->State());
1592 			return;
1593 	}
1594 
1595 	VMPageQueue* toQueue;
1596 
1597 	switch (pageState) {
1598 		case PAGE_STATE_ACTIVE:
1599 			toQueue = &sActivePageQueue;
1600 			break;
1601 		case PAGE_STATE_INACTIVE:
1602 			toQueue = &sInactivePageQueue;
1603 			break;
1604 		case PAGE_STATE_MODIFIED:
1605 			toQueue = &sModifiedPageQueue;
1606 			break;
1607 		case PAGE_STATE_CACHED:
1608 			PAGE_ASSERT(page, !page->IsMapped());
1609 			PAGE_ASSERT(page, !page->modified);
1610 			toQueue = &sCachedPageQueue;
1611 			break;
1612 		case PAGE_STATE_FREE:
1613 		case PAGE_STATE_CLEAR:
1614 			panic("set_page_state(): target state is free/clear");
1615 			return;
1616 		case PAGE_STATE_WIRED:
1617 		case PAGE_STATE_UNUSED:
1618 			toQueue = NULL;
1619 			break;
1620 		default:
1621 			panic("set_page_state(): invalid target state %d", pageState);
1622 			return;
1623 	}
1624 
1625 	VMCache* cache = page->Cache();
1626 	if (cache != NULL && cache->temporary) {
1627 		if (pageState == PAGE_STATE_MODIFIED)
1628 			atomic_add(&sModifiedTemporaryPages, 1);
1629 		else if (page->State() == PAGE_STATE_MODIFIED)
1630 			atomic_add(&sModifiedTemporaryPages, -1);
1631 	}
1632 
1633 	// move the page
1634 	if (toQueue == fromQueue) {
1635 		// Note: Theoretically we are required to lock when changing the page
1636 		// state, even if we don't change the queue. We actually don't have to
1637 		// do this, though, since only for the active queue there are different
1638 		// page states and active pages have a cache that must be locked at
1639 		// this point. So we rely on the fact that everyone must lock the cache
1640 		// before trying to change/interpret the page state.
1641 		PAGE_ASSERT(page, cache != NULL);
1642 		cache->AssertLocked();
1643 		page->SetState(pageState);
1644 	} else {
1645 		if (fromQueue != NULL)
1646 			fromQueue->RemoveUnlocked(page);
1647 
1648 		page->SetState(pageState);
1649 
1650 		if (toQueue != NULL)
1651 			toQueue->AppendUnlocked(page);
1652 	}
1653 }
1654 
1655 
1656 /*! Moves a previously modified page into a now appropriate queue.
1657 	The page queues must not be locked.
1658 */
1659 static void
1660 move_page_to_appropriate_queue(vm_page *page)
1661 {
1662 	DEBUG_PAGE_ACCESS_CHECK(page);
1663 
1664 	// Note, this logic must be in sync with what the page daemon does.
1665 	int32 state;
1666 	if (page->IsMapped())
1667 		state = PAGE_STATE_ACTIVE;
1668 	else if (page->modified)
1669 		state = PAGE_STATE_MODIFIED;
1670 	else
1671 		state = PAGE_STATE_CACHED;
1672 
1673 // TODO: If free + cached pages are low, we might directly want to free the
1674 // page.
1675 	set_page_state(page, state);
1676 }
1677 
1678 
1679 static void
1680 clear_page(struct vm_page *page)
1681 {
1682 	vm_memset_physical(page->physical_page_number << PAGE_SHIFT, 0,
1683 		B_PAGE_SIZE);
1684 }
1685 
1686 
1687 static status_t
1688 mark_page_range_in_use(page_num_t startPage, page_num_t length, bool wired)
1689 {
1690 	TRACE(("mark_page_range_in_use: start %#" B_PRIxPHYSADDR ", len %#"
1691 		B_PRIxPHYSADDR "\n", startPage, length));
1692 
1693 	if (sPhysicalPageOffset > startPage) {
1694 		dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR
1695 			"): start page is before free list\n", startPage, length);
1696 		if (sPhysicalPageOffset - startPage >= length)
1697 			return B_OK;
1698 		length -= sPhysicalPageOffset - startPage;
1699 		startPage = sPhysicalPageOffset;
1700 	}
1701 
1702 	startPage -= sPhysicalPageOffset;
1703 
1704 	if (startPage + length > sNumPages) {
1705 		dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR
1706 			"): range would extend past free list\n", startPage, length);
1707 		if (startPage >= sNumPages)
1708 			return B_OK;
1709 		length = sNumPages - startPage;
1710 	}
1711 
1712 	WriteLocker locker(sFreePageQueuesLock);
1713 
1714 	for (page_num_t i = 0; i < length; i++) {
1715 		vm_page *page = &sPages[startPage + i];
1716 		switch (page->State()) {
1717 			case PAGE_STATE_FREE:
1718 			case PAGE_STATE_CLEAR:
1719 			{
1720 // TODO: This violates the page reservation policy, since we remove pages from
1721 // the free/clear queues without having reserved them before. This should happen
1722 // in the early boot process only, though.
1723 				DEBUG_PAGE_ACCESS_START(page);
1724 				VMPageQueue& queue = page->State() == PAGE_STATE_FREE
1725 					? sFreePageQueue : sClearPageQueue;
1726 				queue.Remove(page);
1727 				page->SetState(wired ? PAGE_STATE_WIRED : PAGE_STATE_UNUSED);
1728 				page->busy = false;
1729 				atomic_add(&sUnreservedFreePages, -1);
1730 				DEBUG_PAGE_ACCESS_END(page);
1731 				break;
1732 			}
1733 			case PAGE_STATE_WIRED:
1734 			case PAGE_STATE_UNUSED:
1735 				break;
1736 			case PAGE_STATE_ACTIVE:
1737 			case PAGE_STATE_INACTIVE:
1738 			case PAGE_STATE_MODIFIED:
1739 			case PAGE_STATE_CACHED:
1740 			default:
1741 				// uh
1742 				dprintf("mark_page_range_in_use: page %#" B_PRIxPHYSADDR
1743 					" in non-free state %d!\n", startPage + i, page->State());
1744 				break;
1745 		}
1746 	}
1747 
1748 	return B_OK;
1749 }
1750 
1751 
1752 /*!
1753 	This is a background thread that wakes up every now and then (every 100ms)
1754 	and moves some pages from the free queue over to the clear queue.
1755 	Given enough time, it will clear out all pages from the free queue - we
1756 	could probably slow it down after having reached a certain threshold.
1757 */
1758 static int32
1759 page_scrubber(void *unused)
1760 {
1761 	(void)(unused);
1762 
1763 	TRACE(("page_scrubber starting...\n"));
1764 
1765 	for (;;) {
1766 		snooze(100000); // 100ms
1767 
1768 		if (sFreePageQueue.Count() == 0
1769 				|| sUnreservedFreePages < (int32)sFreePagesTarget) {
1770 			continue;
1771 		}
1772 
1773 		// Since we temporarily remove pages from the free pages reserve,
1774 		// we must make sure we don't cause a violation of the page
1775 		// reservation warranty. The following is usually stricter than
1776 		// necessary, because we don't have information on how many of the
1777 		// reserved pages have already been allocated.
1778 		int32 reserved = reserve_some_pages(SCRUB_SIZE,
1779 			kPageReserveForPriority[VM_PRIORITY_USER]);
1780 		if (reserved == 0)
1781 			continue;
1782 
1783 		// get some pages from the free queue
1784 		ReadLocker locker(sFreePageQueuesLock);
1785 
1786 		vm_page *page[SCRUB_SIZE];
1787 		int32 scrubCount = 0;
1788 		for (int32 i = 0; i < reserved; i++) {
1789 			page[i] = sFreePageQueue.RemoveHeadUnlocked();
1790 			if (page[i] == NULL)
1791 				break;
1792 
1793 			DEBUG_PAGE_ACCESS_START(page[i]);
1794 
1795 			page[i]->SetState(PAGE_STATE_ACTIVE);
1796 			page[i]->busy = true;
1797 			scrubCount++;
1798 		}
1799 
1800 		locker.Unlock();
1801 
1802 		if (scrubCount == 0) {
1803 			unreserve_pages(reserved);
1804 			continue;
1805 		}
1806 
1807 		TA(ScrubbingPages(scrubCount));
1808 
1809 		// clear them
1810 		for (int32 i = 0; i < scrubCount; i++)
1811 			clear_page(page[i]);
1812 
1813 		locker.Lock();
1814 
1815 		// and put them into the clear queue
1816 		for (int32 i = 0; i < scrubCount; i++) {
1817 			page[i]->SetState(PAGE_STATE_CLEAR);
1818 			page[i]->busy = false;
1819 			DEBUG_PAGE_ACCESS_END(page[i]);
1820 			sClearPageQueue.PrependUnlocked(page[i]);
1821 		}
1822 
1823 		locker.Unlock();
1824 
1825 		unreserve_pages(reserved);
1826 
1827 		TA(ScrubbedPages(scrubCount));
1828 	}
1829 
1830 	return 0;
1831 }
1832 
1833 
1834 static void
1835 init_page_marker(vm_page &marker)
1836 {
1837 	marker.SetCacheRef(NULL);
1838 	marker.InitState(PAGE_STATE_UNUSED);
1839 	marker.busy = true;
1840 #if DEBUG_PAGE_QUEUE
1841 	marker.queue = NULL;
1842 #endif
1843 #if DEBUG_PAGE_ACCESS
1844 	marker.accessing_thread = thread_get_current_thread_id();
1845 #endif
1846 }
1847 
1848 
1849 static void
1850 remove_page_marker(struct vm_page &marker)
1851 {
1852 	DEBUG_PAGE_ACCESS_CHECK(&marker);
1853 
1854 	if (marker.State() < PAGE_STATE_FIRST_UNQUEUED)
1855 		sPageQueues[marker.State()].RemoveUnlocked(&marker);
1856 
1857 	marker.SetState(PAGE_STATE_UNUSED);
1858 }
1859 
1860 
1861 static vm_page*
1862 next_modified_page(page_num_t& maxPagesToSee)
1863 {
1864 	InterruptsSpinLocker locker(sModifiedPageQueue.GetLock());
1865 
1866 	while (maxPagesToSee > 0) {
1867 		vm_page* page = sModifiedPageQueue.Head();
1868 		if (page == NULL)
1869 			return NULL;
1870 
1871 		sModifiedPageQueue.Requeue(page, true);
1872 
1873 		maxPagesToSee--;
1874 
1875 		if (!page->busy)
1876 			return page;
1877 	}
1878 
1879 	return NULL;
1880 }
1881 
1882 
1883 // #pragma mark -
1884 
1885 
1886 class PageWriteTransfer;
1887 class PageWriteWrapper;
1888 
1889 
1890 class PageWriterRun {
1891 public:
1892 	status_t Init(uint32 maxPages);
1893 
1894 	void PrepareNextRun();
1895 	void AddPage(vm_page* page);
1896 	uint32 Go();
1897 
1898 	void PageWritten(PageWriteTransfer* transfer, status_t status,
1899 		bool partialTransfer, size_t bytesTransferred);
1900 
1901 private:
1902 	uint32				fMaxPages;
1903 	uint32				fWrapperCount;
1904 	uint32				fTransferCount;
1905 	vint32				fPendingTransfers;
1906 	PageWriteWrapper*	fWrappers;
1907 	PageWriteTransfer*	fTransfers;
1908 	ConditionVariable	fAllFinishedCondition;
1909 };
1910 
1911 
1912 class PageWriteTransfer : public AsyncIOCallback {
1913 public:
1914 	void SetTo(PageWriterRun* run, vm_page* page, int32 maxPages);
1915 	bool AddPage(vm_page* page);
1916 
1917 	status_t Schedule(uint32 flags);
1918 
1919 	void SetStatus(status_t status, size_t transferred);
1920 
1921 	status_t Status() const	{ return fStatus; }
1922 	struct VMCache* Cache() const { return fCache; }
1923 	uint32 PageCount() const { return fPageCount; }
1924 
1925 	virtual void IOFinished(status_t status, bool partialTransfer,
1926 		generic_size_t bytesTransferred);
1927 private:
1928 	PageWriterRun*		fRun;
1929 	struct VMCache*		fCache;
1930 	off_t				fOffset;
1931 	uint32				fPageCount;
1932 	int32				fMaxPages;
1933 	status_t			fStatus;
1934 	uint32				fVecCount;
1935 	generic_io_vec		fVecs[32]; // TODO: make dynamic/configurable
1936 };
1937 
1938 
1939 class PageWriteWrapper {
1940 public:
1941 	PageWriteWrapper();
1942 	~PageWriteWrapper();
1943 	void SetTo(vm_page* page);
1944 	bool Done(status_t result);
1945 
1946 private:
1947 	vm_page*			fPage;
1948 	struct VMCache*		fCache;
1949 	bool				fIsActive;
1950 };
1951 
1952 
1953 PageWriteWrapper::PageWriteWrapper()
1954 	:
1955 	fIsActive(false)
1956 {
1957 }
1958 
1959 
1960 PageWriteWrapper::~PageWriteWrapper()
1961 {
1962 	if (fIsActive)
1963 		panic("page write wrapper going out of scope but isn't completed");
1964 }
1965 
1966 
1967 /*!	The page's cache must be locked.
1968 */
1969 void
1970 PageWriteWrapper::SetTo(vm_page* page)
1971 {
1972 	DEBUG_PAGE_ACCESS_CHECK(page);
1973 
1974 	if (page->busy)
1975 		panic("setting page write wrapper to busy page");
1976 
1977 	if (fIsActive)
1978 		panic("re-setting page write wrapper that isn't completed");
1979 
1980 	fPage = page;
1981 	fCache = page->Cache();
1982 	fIsActive = true;
1983 
1984 	fPage->busy = true;
1985 	fPage->busy_writing = true;
1986 
1987 	// We have a modified page -- however, while we're writing it back,
1988 	// the page might still be mapped. In order not to lose any changes to the
1989 	// page, we mark it clean before actually writing it back; if
1990 	// writing the page fails for some reason, we'll just keep it in the
1991 	// modified page list, but that should happen only rarely.
1992 
1993 	// If the page is changed after we cleared the dirty flag, but before we
1994 	// had the chance to write it back, then we'll write it again later -- that
1995 	// will probably not happen that often, though.
1996 
1997 	vm_clear_map_flags(fPage, PAGE_MODIFIED);
1998 }
1999 
2000 
2001 /*!	The page's cache must be locked.
2002 	The page queues must not be locked.
2003 	\return \c true if the page was written successfully respectively could be
2004 		handled somehow, \c false otherwise.
2005 */
2006 bool
2007 PageWriteWrapper::Done(status_t result)
2008 {
2009 	if (!fIsActive)
2010 		panic("completing page write wrapper that is not active");
2011 
2012 	DEBUG_PAGE_ACCESS_START(fPage);
2013 
2014 	fPage->busy = false;
2015 		// Set unbusy and notify later by hand, since we might free the page.
2016 
2017 	bool success = true;
2018 
2019 	if (result == B_OK) {
2020 		// put it into the active/inactive queue
2021 		move_page_to_appropriate_queue(fPage);
2022 		fPage->busy_writing = false;
2023 		DEBUG_PAGE_ACCESS_END(fPage);
2024 	} else {
2025 		// Writing the page failed. One reason would be that the cache has been
2026 		// shrunk and the page does no longer belong to the file. Otherwise the
2027 		// actual I/O failed, in which case we'll simply keep the page modified.
2028 
2029 		if (!fPage->busy_writing) {
2030 			// The busy_writing flag was cleared. That means the cache has been
2031 			// shrunk while we were trying to write the page and we have to free
2032 			// it now.
2033 			vm_remove_all_page_mappings(fPage);
2034 // TODO: Unmapping should already happen when resizing the cache!
2035 			fCache->RemovePage(fPage);
2036 			free_page(fPage, false);
2037 		} else {
2038 			// Writing the page failed -- mark the page modified and move it to
2039 			// an appropriate queue other than the modified queue, so we don't
2040 			// keep trying to write it over and over again. We keep
2041 			// non-temporary pages in the modified queue, though, so they don't
2042 			// get lost in the inactive queue.
2043 			dprintf("PageWriteWrapper: Failed to write page %p: %s\n", fPage,
2044 				strerror(result));
2045 
2046 			fPage->modified = true;
2047 			if (!fCache->temporary)
2048 				set_page_state(fPage, PAGE_STATE_MODIFIED);
2049 			else if (fPage->IsMapped())
2050 				set_page_state(fPage, PAGE_STATE_ACTIVE);
2051 			else
2052 				set_page_state(fPage, PAGE_STATE_INACTIVE);
2053 
2054 			fPage->busy_writing = false;
2055 			DEBUG_PAGE_ACCESS_END(fPage);
2056 
2057 			success = false;
2058 		}
2059 	}
2060 
2061 	fCache->NotifyPageEvents(fPage, PAGE_EVENT_NOT_BUSY);
2062 	fIsActive = false;
2063 
2064 	return success;
2065 }
2066 
2067 
2068 /*!	The page's cache must be locked.
2069 */
2070 void
2071 PageWriteTransfer::SetTo(PageWriterRun* run, vm_page* page, int32 maxPages)
2072 {
2073 	fRun = run;
2074 	fCache = page->Cache();
2075 	fOffset = page->cache_offset;
2076 	fPageCount = 1;
2077 	fMaxPages = maxPages;
2078 	fStatus = B_OK;
2079 
2080 	fVecs[0].base = (phys_addr_t)page->physical_page_number << PAGE_SHIFT;
2081 	fVecs[0].length = B_PAGE_SIZE;
2082 	fVecCount = 1;
2083 }
2084 
2085 
2086 /*!	The page's cache must be locked.
2087 */
2088 bool
2089 PageWriteTransfer::AddPage(vm_page* page)
2090 {
2091 	if (page->Cache() != fCache
2092 		|| (fMaxPages >= 0 && fPageCount >= (uint32)fMaxPages))
2093 		return false;
2094 
2095 	phys_addr_t nextBase = fVecs[fVecCount - 1].base
2096 		+ fVecs[fVecCount - 1].length;
2097 
2098 	if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase
2099 		&& (off_t)page->cache_offset == fOffset + fPageCount) {
2100 		// append to last iovec
2101 		fVecs[fVecCount - 1].length += B_PAGE_SIZE;
2102 		fPageCount++;
2103 		return true;
2104 	}
2105 
2106 	nextBase = fVecs[0].base - B_PAGE_SIZE;
2107 	if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase
2108 		&& (off_t)page->cache_offset == fOffset - 1) {
2109 		// prepend to first iovec and adjust offset
2110 		fVecs[0].base = nextBase;
2111 		fVecs[0].length += B_PAGE_SIZE;
2112 		fOffset = page->cache_offset;
2113 		fPageCount++;
2114 		return true;
2115 	}
2116 
2117 	if (((off_t)page->cache_offset == fOffset + fPageCount
2118 			|| (off_t)page->cache_offset == fOffset - 1)
2119 		&& fVecCount < sizeof(fVecs) / sizeof(fVecs[0])) {
2120 		// not physically contiguous or not in the right order
2121 		uint32 vectorIndex;
2122 		if ((off_t)page->cache_offset < fOffset) {
2123 			// we are pre-pending another vector, move the other vecs
2124 			for (uint32 i = fVecCount; i > 0; i--)
2125 				fVecs[i] = fVecs[i - 1];
2126 
2127 			fOffset = page->cache_offset;
2128 			vectorIndex = 0;
2129 		} else
2130 			vectorIndex = fVecCount;
2131 
2132 		fVecs[vectorIndex].base
2133 			= (phys_addr_t)page->physical_page_number << PAGE_SHIFT;
2134 		fVecs[vectorIndex].length = B_PAGE_SIZE;
2135 
2136 		fVecCount++;
2137 		fPageCount++;
2138 		return true;
2139 	}
2140 
2141 	return false;
2142 }
2143 
2144 
2145 status_t
2146 PageWriteTransfer::Schedule(uint32 flags)
2147 {
2148 	off_t writeOffset = (off_t)fOffset << PAGE_SHIFT;
2149 	generic_size_t writeLength = (phys_size_t)fPageCount << PAGE_SHIFT;
2150 
2151 	if (fRun != NULL) {
2152 		return fCache->WriteAsync(writeOffset, fVecs, fVecCount, writeLength,
2153 			flags | B_PHYSICAL_IO_REQUEST, this);
2154 	}
2155 
2156 	status_t status = fCache->Write(writeOffset, fVecs, fVecCount,
2157 		flags | B_PHYSICAL_IO_REQUEST, &writeLength);
2158 
2159 	SetStatus(status, writeLength);
2160 	return fStatus;
2161 }
2162 
2163 
2164 void
2165 PageWriteTransfer::SetStatus(status_t status, size_t transferred)
2166 {
2167 	// only succeed if all pages up to the last one have been written fully
2168 	// and the last page has at least been written partially
2169 	if (status == B_OK && transferred <= (fPageCount - 1) * B_PAGE_SIZE)
2170 		status = B_ERROR;
2171 
2172 	fStatus = status;
2173 }
2174 
2175 
2176 void
2177 PageWriteTransfer::IOFinished(status_t status, bool partialTransfer,
2178 	generic_size_t bytesTransferred)
2179 {
2180 	SetStatus(status, bytesTransferred);
2181 	fRun->PageWritten(this, fStatus, partialTransfer, bytesTransferred);
2182 }
2183 
2184 
2185 status_t
2186 PageWriterRun::Init(uint32 maxPages)
2187 {
2188 	fMaxPages = maxPages;
2189 	fWrapperCount = 0;
2190 	fTransferCount = 0;
2191 	fPendingTransfers = 0;
2192 
2193 	fWrappers = new(std::nothrow) PageWriteWrapper[maxPages];
2194 	fTransfers = new(std::nothrow) PageWriteTransfer[maxPages];
2195 	if (fWrappers == NULL || fTransfers == NULL)
2196 		return B_NO_MEMORY;
2197 
2198 	return B_OK;
2199 }
2200 
2201 
2202 void
2203 PageWriterRun::PrepareNextRun()
2204 {
2205 	fWrapperCount = 0;
2206 	fTransferCount = 0;
2207 	fPendingTransfers = 0;
2208 }
2209 
2210 
2211 /*!	The page's cache must be locked.
2212 */
2213 void
2214 PageWriterRun::AddPage(vm_page* page)
2215 {
2216 	fWrappers[fWrapperCount++].SetTo(page);
2217 
2218 	if (fTransferCount == 0 || !fTransfers[fTransferCount - 1].AddPage(page)) {
2219 		fTransfers[fTransferCount++].SetTo(this, page,
2220 			page->Cache()->MaxPagesPerAsyncWrite());
2221 	}
2222 }
2223 
2224 
2225 /*!	Writes all pages previously added.
2226 	\return The number of pages that could not be written or otherwise handled.
2227 */
2228 uint32
2229 PageWriterRun::Go()
2230 {
2231 	fPendingTransfers = fTransferCount;
2232 
2233 	fAllFinishedCondition.Init(this, "page writer wait for I/O");
2234 	ConditionVariableEntry waitEntry;
2235 	fAllFinishedCondition.Add(&waitEntry);
2236 
2237 	// schedule writes
2238 	for (uint32 i = 0; i < fTransferCount; i++)
2239 		fTransfers[i].Schedule(B_VIP_IO_REQUEST);
2240 
2241 	// wait until all pages have been written
2242 	waitEntry.Wait();
2243 
2244 	// mark pages depending on whether they could be written or not
2245 
2246 	uint32 failedPages = 0;
2247 	uint32 wrapperIndex = 0;
2248 	for (uint32 i = 0; i < fTransferCount; i++) {
2249 		PageWriteTransfer& transfer = fTransfers[i];
2250 		transfer.Cache()->Lock();
2251 
2252 		for (uint32 j = 0; j < transfer.PageCount(); j++) {
2253 			if (!fWrappers[wrapperIndex++].Done(transfer.Status()))
2254 				failedPages++;
2255 		}
2256 
2257 		transfer.Cache()->Unlock();
2258 	}
2259 
2260 	ASSERT(wrapperIndex == fWrapperCount);
2261 
2262 	for (uint32 i = 0; i < fTransferCount; i++) {
2263 		PageWriteTransfer& transfer = fTransfers[i];
2264 		struct VMCache* cache = transfer.Cache();
2265 
2266 		// We've acquired a references for each page
2267 		for (uint32 j = 0; j < transfer.PageCount(); j++) {
2268 			// We release the cache references after all pages were made
2269 			// unbusy again - otherwise releasing a vnode could deadlock.
2270 			cache->ReleaseStoreRef();
2271 			cache->ReleaseRef();
2272 		}
2273 	}
2274 
2275 	return failedPages;
2276 }
2277 
2278 
2279 void
2280 PageWriterRun::PageWritten(PageWriteTransfer* transfer, status_t status,
2281 	bool partialTransfer, size_t bytesTransferred)
2282 {
2283 	if (atomic_add(&fPendingTransfers, -1) == 1)
2284 		fAllFinishedCondition.NotifyAll();
2285 }
2286 
2287 
2288 /*!	The page writer continuously takes some pages from the modified
2289 	queue, writes them back, and moves them back to the active queue.
2290 	It runs in its own thread, and is only there to keep the number
2291 	of modified pages low, so that more pages can be reused with
2292 	fewer costs.
2293 */
2294 status_t
2295 page_writer(void* /*unused*/)
2296 {
2297 	const uint32 kNumPages = 256;
2298 #ifdef TRACE_VM_PAGE
2299 	uint32 writtenPages = 0;
2300 	bigtime_t lastWrittenTime = 0;
2301 	bigtime_t pageCollectionTime = 0;
2302 	bigtime_t pageWritingTime = 0;
2303 #endif
2304 
2305 	PageWriterRun run;
2306 	if (run.Init(kNumPages) != B_OK) {
2307 		panic("page writer: Failed to init PageWriterRun!");
2308 		return B_ERROR;
2309 	}
2310 
2311 	page_num_t pagesSinceLastSuccessfulWrite = 0;
2312 
2313 	while (true) {
2314 // TODO: Maybe wait shorter when memory is low!
2315 		if (sModifiedPageQueue.Count() < kNumPages) {
2316 			sPageWriterCondition.Wait(3000000, true);
2317 				// all 3 seconds when no one triggers us
2318 		}
2319 
2320 		page_num_t modifiedPages = sModifiedPageQueue.Count();
2321 		if (modifiedPages == 0)
2322 			continue;
2323 
2324 		if (modifiedPages <= pagesSinceLastSuccessfulWrite) {
2325 			// We ran through the whole queue without being able to write a
2326 			// single page. Take a break.
2327 			snooze(500000);
2328 			pagesSinceLastSuccessfulWrite = 0;
2329 		}
2330 
2331 #if ENABLE_SWAP_SUPPORT
2332 		page_stats pageStats;
2333 		get_page_stats(pageStats);
2334 		bool activePaging = do_active_paging(pageStats);
2335 #endif
2336 
2337 		// depending on how urgent it becomes to get pages to disk, we adjust
2338 		// our I/O priority
2339 		uint32 lowPagesState = low_resource_state(B_KERNEL_RESOURCE_PAGES);
2340 		int32 ioPriority = B_IDLE_PRIORITY;
2341 		if (lowPagesState >= B_LOW_RESOURCE_CRITICAL
2342 			|| modifiedPages > MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD) {
2343 			ioPriority = MAX_PAGE_WRITER_IO_PRIORITY;
2344 		} else {
2345 			ioPriority = (uint64)MAX_PAGE_WRITER_IO_PRIORITY * modifiedPages
2346 				/ MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD;
2347 		}
2348 
2349 		thread_set_io_priority(ioPriority);
2350 
2351 		uint32 numPages = 0;
2352 		run.PrepareNextRun();
2353 
2354 		// TODO: make this laptop friendly, too (ie. only start doing
2355 		// something if someone else did something or there is really
2356 		// enough to do).
2357 
2358 		// collect pages to be written
2359 #ifdef TRACE_VM_PAGE
2360 		pageCollectionTime -= system_time();
2361 #endif
2362 
2363 		page_num_t maxPagesToSee = modifiedPages;
2364 
2365 		while (numPages < kNumPages && maxPagesToSee > 0) {
2366 			vm_page *page = next_modified_page(maxPagesToSee);
2367 			if (page == NULL)
2368 				break;
2369 
2370 			PageCacheLocker cacheLocker(page, false);
2371 			if (!cacheLocker.IsLocked())
2372 				continue;
2373 
2374 			VMCache *cache = page->Cache();
2375 
2376 			// If the page is busy or its state has changed while we were
2377 			// locking the cache, just ignore it.
2378 			if (page->busy || page->State() != PAGE_STATE_MODIFIED)
2379 				continue;
2380 
2381 			DEBUG_PAGE_ACCESS_START(page);
2382 
2383 			// Don't write back wired (locked) pages.
2384 			if (page->WiredCount() > 0) {
2385 				set_page_state(page, PAGE_STATE_ACTIVE);
2386 				DEBUG_PAGE_ACCESS_END(page);
2387 				continue;
2388 			}
2389 
2390 			// Write back temporary pages only when we're actively paging.
2391 			if (cache->temporary
2392 #if ENABLE_SWAP_SUPPORT
2393 				&& (!activePaging
2394 					|| !cache->CanWritePage(
2395 							(off_t)page->cache_offset << PAGE_SHIFT))
2396 #endif
2397 				) {
2398 				// We can't/don't want to do anything with this page, so move it
2399 				// to one of the other queues.
2400 				if (page->mappings.IsEmpty())
2401 					set_page_state(page, PAGE_STATE_INACTIVE);
2402 				else
2403 					set_page_state(page, PAGE_STATE_ACTIVE);
2404 
2405 				DEBUG_PAGE_ACCESS_END(page);
2406 				continue;
2407 			}
2408 
2409 			// We need our own reference to the store, as it might currently be
2410 			// destroyed.
2411 			if (cache->AcquireUnreferencedStoreRef() != B_OK) {
2412 				DEBUG_PAGE_ACCESS_END(page);
2413 				cacheLocker.Unlock();
2414 				thread_yield(true);
2415 				continue;
2416 			}
2417 
2418 			run.AddPage(page);
2419 				// TODO: We're possibly adding pages of different caches and
2420 				// thus maybe of different underlying file systems here. This
2421 				// is a potential problem for loop file systems/devices, since
2422 				// we could mark a page busy that would need to be accessed
2423 				// when writing back another page, thus causing a deadlock.
2424 
2425 			DEBUG_PAGE_ACCESS_END(page);
2426 
2427 			//dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count);
2428 			TPW(WritePage(page));
2429 
2430 			cache->AcquireRefLocked();
2431 			numPages++;
2432 		}
2433 
2434 #ifdef TRACE_VM_PAGE
2435 		pageCollectionTime += system_time();
2436 #endif
2437 		if (numPages == 0)
2438 			continue;
2439 
2440 		// write pages to disk and do all the cleanup
2441 #ifdef TRACE_VM_PAGE
2442 		pageWritingTime -= system_time();
2443 #endif
2444 		uint32 failedPages = run.Go();
2445 #ifdef TRACE_VM_PAGE
2446 		pageWritingTime += system_time();
2447 
2448 		// debug output only...
2449 		writtenPages += numPages;
2450 		if (writtenPages >= 1024) {
2451 			bigtime_t now = system_time();
2452 			TRACE(("page writer: wrote 1024 pages (total: %llu ms, "
2453 				"collect: %llu ms, write: %llu ms)\n",
2454 				(now - lastWrittenTime) / 1000,
2455 				pageCollectionTime / 1000, pageWritingTime / 1000));
2456 			lastWrittenTime = now;
2457 
2458 			writtenPages -= 1024;
2459 			pageCollectionTime = 0;
2460 			pageWritingTime = 0;
2461 		}
2462 #endif
2463 
2464 		if (failedPages == numPages)
2465 			pagesSinceLastSuccessfulWrite += modifiedPages - maxPagesToSee;
2466 		else
2467 			pagesSinceLastSuccessfulWrite = 0;
2468 	}
2469 
2470 	return B_OK;
2471 }
2472 
2473 
2474 // #pragma mark -
2475 
2476 
2477 // TODO: This should be done in the page daemon!
2478 #if 0
2479 #if ENABLE_SWAP_SUPPORT
2480 static bool
2481 free_page_swap_space(int32 index)
2482 {
2483 	vm_page *page = vm_page_at_index(index);
2484 	PageCacheLocker locker(page);
2485 	if (!locker.IsLocked())
2486 		return false;
2487 
2488 	DEBUG_PAGE_ACCESS_START(page);
2489 
2490 	VMCache* cache = page->Cache();
2491 	if (cache->temporary && page->WiredCount() == 0
2492 			&& cache->HasPage(page->cache_offset << PAGE_SHIFT)
2493 			&& page->usage_count > 0) {
2494 		// TODO: how to judge a page is highly active?
2495 		if (swap_free_page_swap_space(page)) {
2496 			// We need to mark the page modified, since otherwise it could be
2497 			// stolen and we'd lose its data.
2498 			vm_page_set_state(page, PAGE_STATE_MODIFIED);
2499 			TD(FreedPageSwap(page));
2500 			DEBUG_PAGE_ACCESS_END(page);
2501 			return true;
2502 		}
2503 	}
2504 	DEBUG_PAGE_ACCESS_END(page);
2505 	return false;
2506 }
2507 #endif
2508 #endif	// 0
2509 
2510 
2511 static vm_page *
2512 find_cached_page_candidate(struct vm_page &marker)
2513 {
2514 	DEBUG_PAGE_ACCESS_CHECK(&marker);
2515 
2516 	InterruptsSpinLocker locker(sCachedPageQueue.GetLock());
2517 	vm_page *page;
2518 
2519 	if (marker.State() == PAGE_STATE_UNUSED) {
2520 		// Get the first free pages of the (in)active queue
2521 		page = sCachedPageQueue.Head();
2522 	} else {
2523 		// Get the next page of the current queue
2524 		if (marker.State() != PAGE_STATE_CACHED) {
2525 			panic("invalid marker %p state", &marker);
2526 			return NULL;
2527 		}
2528 
2529 		page = sCachedPageQueue.Next(&marker);
2530 		sCachedPageQueue.Remove(&marker);
2531 		marker.SetState(PAGE_STATE_UNUSED);
2532 	}
2533 
2534 	while (page != NULL) {
2535 		if (!page->busy) {
2536 			// we found a candidate, insert marker
2537 			marker.SetState(PAGE_STATE_CACHED);
2538 			sCachedPageQueue.InsertAfter(page, &marker);
2539 			return page;
2540 		}
2541 
2542 		page = sCachedPageQueue.Next(page);
2543 	}
2544 
2545 	return NULL;
2546 }
2547 
2548 
2549 static bool
2550 free_cached_page(vm_page *page, bool dontWait)
2551 {
2552 	// try to lock the page's cache
2553 	if (vm_cache_acquire_locked_page_cache(page, dontWait) == NULL)
2554 		return false;
2555 	VMCache* cache = page->Cache();
2556 
2557 	AutoLocker<VMCache> cacheLocker(cache, true);
2558 	MethodDeleter<VMCache> _2(cache, &VMCache::ReleaseRefLocked);
2559 
2560 	// check again if that page is still a candidate
2561 	if (page->busy || page->State() != PAGE_STATE_CACHED)
2562 		return false;
2563 
2564 	DEBUG_PAGE_ACCESS_START(page);
2565 
2566 	PAGE_ASSERT(page, !page->IsMapped());
2567 	PAGE_ASSERT(page, !page->modified);
2568 
2569 	// we can now steal this page
2570 
2571 	cache->RemovePage(page);
2572 		// Now the page doesn't have cache anymore, so no one else (e.g.
2573 		// vm_page_allocate_page_run() can pick it up), since they would be
2574 		// required to lock the cache first, which would fail.
2575 
2576 	sCachedPageQueue.RemoveUnlocked(page);
2577 	return true;
2578 }
2579 
2580 
2581 static uint32
2582 free_cached_pages(uint32 pagesToFree, bool dontWait)
2583 {
2584 	vm_page marker;
2585 	init_page_marker(marker);
2586 
2587 	uint32 pagesFreed = 0;
2588 
2589 	while (pagesFreed < pagesToFree) {
2590 		vm_page *page = find_cached_page_candidate(marker);
2591 		if (page == NULL)
2592 			break;
2593 
2594 		if (free_cached_page(page, dontWait)) {
2595 			ReadLocker locker(sFreePageQueuesLock);
2596 			page->SetState(PAGE_STATE_FREE);
2597 			DEBUG_PAGE_ACCESS_END(page);
2598 			sFreePageQueue.PrependUnlocked(page);
2599 			locker.Unlock();
2600 
2601 			TA(StolenPage());
2602 
2603 			pagesFreed++;
2604 		}
2605 	}
2606 
2607 	remove_page_marker(marker);
2608 
2609 	return pagesFreed;
2610 }
2611 
2612 
2613 static void
2614 idle_scan_active_pages(page_stats& pageStats)
2615 {
2616 	VMPageQueue& queue = sActivePageQueue;
2617 
2618 	// We want to scan the whole queue in roughly kIdleRunsForFullQueue runs.
2619 	uint32 maxToScan = queue.Count() / kIdleRunsForFullQueue + 1;
2620 
2621 	while (maxToScan > 0) {
2622 		maxToScan--;
2623 
2624 		// Get the next page. Note that we don't bother to lock here. We go with
2625 		// the assumption that on all architectures reading/writing pointers is
2626 		// atomic. Beyond that it doesn't really matter. We have to unlock the
2627 		// queue anyway to lock the page's cache, and we'll recheck afterwards.
2628 		vm_page* page = queue.Head();
2629 		if (page == NULL)
2630 			break;
2631 
2632 		// lock the page's cache
2633 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2634 		if (cache == NULL)
2635 			continue;
2636 
2637 		if (page->State() != PAGE_STATE_ACTIVE) {
2638 			// page is no longer in the cache or in this queue
2639 			cache->ReleaseRefAndUnlock();
2640 			continue;
2641 		}
2642 
2643 		if (page->busy) {
2644 			// page is busy -- requeue at the end
2645 			vm_page_requeue(page, true);
2646 			cache->ReleaseRefAndUnlock();
2647 			continue;
2648 		}
2649 
2650 		DEBUG_PAGE_ACCESS_START(page);
2651 
2652 		// Get the page active/modified flags and update the page's usage count.
2653 		// We completely unmap inactive temporary pages. This saves us to
2654 		// iterate through the inactive list as well, since we'll be notified
2655 		// via page fault whenever such an inactive page is used again.
2656 		// We don't remove the mappings of non-temporary pages, since we
2657 		// wouldn't notice when those would become unused and could thus be
2658 		// moved to the cached list.
2659 		int32 usageCount;
2660 		if (page->WiredCount() > 0 || page->usage_count > 0
2661 			|| !cache->temporary) {
2662 			usageCount = vm_clear_page_mapping_accessed_flags(page);
2663 		} else
2664 			usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2665 
2666 		if (usageCount > 0) {
2667 			usageCount += page->usage_count + kPageUsageAdvance;
2668 			if (usageCount > kPageUsageMax)
2669 				usageCount = kPageUsageMax;
2670 // TODO: This would probably also be the place to reclaim swap space.
2671 		} else {
2672 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2673 			if (usageCount < 0) {
2674 				usageCount = 0;
2675 				set_page_state(page, PAGE_STATE_INACTIVE);
2676 			}
2677 		}
2678 
2679 		page->usage_count = usageCount;
2680 
2681 		DEBUG_PAGE_ACCESS_END(page);
2682 
2683 		cache->ReleaseRefAndUnlock();
2684 	}
2685 }
2686 
2687 
2688 static void
2689 full_scan_inactive_pages(page_stats& pageStats, int32 despairLevel)
2690 {
2691 	int32 pagesToFree = pageStats.unsatisfiedReservations
2692 		+ sFreeOrCachedPagesTarget
2693 		- (pageStats.totalFreePages + pageStats.cachedPages);
2694 	if (pagesToFree <= 0)
2695 		return;
2696 
2697 	bigtime_t time = system_time();
2698 	uint32 pagesScanned = 0;
2699 	uint32 pagesToCached = 0;
2700 	uint32 pagesToModified = 0;
2701 	uint32 pagesToActive = 0;
2702 
2703 	// Determine how many pages at maximum to send to the modified queue. Since
2704 	// it is relatively expensive to page out pages, we do that on a grander
2705 	// scale only when things get desperate.
2706 	uint32 maxToFlush = despairLevel <= 1 ? 32 : 10000;
2707 
2708 	vm_page marker;
2709 	init_page_marker(marker);
2710 
2711 	VMPageQueue& queue = sInactivePageQueue;
2712 	InterruptsSpinLocker queueLocker(queue.GetLock());
2713 	uint32 maxToScan = queue.Count();
2714 
2715 	vm_page* nextPage = queue.Head();
2716 
2717 	while (pagesToFree > 0 && maxToScan > 0) {
2718 		maxToScan--;
2719 
2720 		// get the next page
2721 		vm_page* page = nextPage;
2722 		if (page == NULL)
2723 			break;
2724 		nextPage = queue.Next(page);
2725 
2726 		if (page->busy)
2727 			continue;
2728 
2729 		// mark the position
2730 		queue.InsertAfter(page, &marker);
2731 		queueLocker.Unlock();
2732 
2733 		// lock the page's cache
2734 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2735 		if (cache == NULL || page->busy
2736 				|| page->State() != PAGE_STATE_INACTIVE) {
2737 			if (cache != NULL)
2738 				cache->ReleaseRefAndUnlock();
2739 			queueLocker.Lock();
2740 			nextPage = queue.Next(&marker);
2741 			queue.Remove(&marker);
2742 			continue;
2743 		}
2744 
2745 		pagesScanned++;
2746 
2747 		DEBUG_PAGE_ACCESS_START(page);
2748 
2749 		// Get the accessed count, clear the accessed/modified flags and
2750 		// unmap the page, if it hasn't been accessed.
2751 		int32 usageCount;
2752 		if (page->WiredCount() > 0)
2753 			usageCount = vm_clear_page_mapping_accessed_flags(page);
2754 		else
2755 			usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2756 
2757 		// update usage count
2758 		if (usageCount > 0) {
2759 			usageCount += page->usage_count + kPageUsageAdvance;
2760 			if (usageCount > kPageUsageMax)
2761 				usageCount = kPageUsageMax;
2762 		} else {
2763 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2764 			if (usageCount < 0)
2765 				usageCount = 0;
2766 		}
2767 
2768 		page->usage_count = usageCount;
2769 
2770 		// Move to fitting queue or requeue:
2771 		// * Active mapped pages go to the active queue.
2772 		// * Inactive mapped (i.e. wired) pages are requeued.
2773 		// * The remaining pages are cachable. Thus, if unmodified they go to
2774 		//   the cached queue, otherwise to the modified queue (up to a limit).
2775 		//   Note that until in the idle scanning we don't exempt pages of
2776 		//   temporary caches. Apparently we really need memory, so we better
2777 		//   page out memory as well.
2778 		bool isMapped = page->IsMapped();
2779 		if (usageCount > 0) {
2780 			if (isMapped) {
2781 				set_page_state(page, PAGE_STATE_ACTIVE);
2782 				pagesToActive++;
2783 			} else
2784 				vm_page_requeue(page, true);
2785 		} else if (isMapped) {
2786 			vm_page_requeue(page, true);
2787 		} else if (!page->modified) {
2788 			set_page_state(page, PAGE_STATE_CACHED);
2789 			pagesToFree--;
2790 			pagesToCached++;
2791 		} else if (maxToFlush > 0) {
2792 			set_page_state(page, PAGE_STATE_MODIFIED);
2793 			maxToFlush--;
2794 			pagesToModified++;
2795 		} else
2796 			vm_page_requeue(page, true);
2797 
2798 		DEBUG_PAGE_ACCESS_END(page);
2799 
2800 		cache->ReleaseRefAndUnlock();
2801 
2802 		// remove the marker
2803 		queueLocker.Lock();
2804 		nextPage = queue.Next(&marker);
2805 		queue.Remove(&marker);
2806 	}
2807 
2808 	queueLocker.Unlock();
2809 
2810 	time = system_time() - time;
2811 	TRACE_DAEMON("  -> inactive scan (%7lld us): scanned: %7lu, "
2812 		"moved: %lu -> cached, %lu -> modified, %lu -> active\n", time,
2813 		pagesScanned, pagesToCached, pagesToModified, pagesToActive);
2814 
2815 	// wake up the page writer, if we tossed it some pages
2816 	if (pagesToModified > 0)
2817 		sPageWriterCondition.WakeUp();
2818 }
2819 
2820 
2821 static void
2822 full_scan_active_pages(page_stats& pageStats, int32 despairLevel)
2823 {
2824 	vm_page marker;
2825 	init_page_marker(marker);
2826 
2827 	VMPageQueue& queue = sActivePageQueue;
2828 	InterruptsSpinLocker queueLocker(queue.GetLock());
2829 	uint32 maxToScan = queue.Count();
2830 
2831 	int32 pagesToDeactivate = pageStats.unsatisfiedReservations
2832 		+ sFreeOrCachedPagesTarget
2833 		- (pageStats.totalFreePages + pageStats.cachedPages)
2834 		+ std::max((int32)sInactivePagesTarget - (int32)maxToScan, (int32)0);
2835 	if (pagesToDeactivate <= 0)
2836 		return;
2837 
2838 	bigtime_t time = system_time();
2839 	uint32 pagesAccessed = 0;
2840 	uint32 pagesToInactive = 0;
2841 	uint32 pagesScanned = 0;
2842 
2843 	vm_page* nextPage = queue.Head();
2844 
2845 	while (pagesToDeactivate > 0 && maxToScan > 0) {
2846 		maxToScan--;
2847 
2848 		// get the next page
2849 		vm_page* page = nextPage;
2850 		if (page == NULL)
2851 			break;
2852 		nextPage = queue.Next(page);
2853 
2854 		if (page->busy)
2855 			continue;
2856 
2857 		// mark the position
2858 		queue.InsertAfter(page, &marker);
2859 		queueLocker.Unlock();
2860 
2861 		// lock the page's cache
2862 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2863 		if (cache == NULL || page->busy || page->State() != PAGE_STATE_ACTIVE) {
2864 			if (cache != NULL)
2865 				cache->ReleaseRefAndUnlock();
2866 			queueLocker.Lock();
2867 			nextPage = queue.Next(&marker);
2868 			queue.Remove(&marker);
2869 			continue;
2870 		}
2871 
2872 		pagesScanned++;
2873 
2874 		DEBUG_PAGE_ACCESS_START(page);
2875 
2876 		// Get the page active/modified flags and update the page's usage count.
2877 		int32 usageCount = vm_clear_page_mapping_accessed_flags(page);
2878 
2879 		if (usageCount > 0) {
2880 			usageCount += page->usage_count + kPageUsageAdvance;
2881 			if (usageCount > kPageUsageMax)
2882 				usageCount = kPageUsageMax;
2883 			pagesAccessed++;
2884 // TODO: This would probably also be the place to reclaim swap space.
2885 		} else {
2886 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2887 			if (usageCount <= 0) {
2888 				usageCount = 0;
2889 				set_page_state(page, PAGE_STATE_INACTIVE);
2890 				pagesToInactive++;
2891 			}
2892 		}
2893 
2894 		page->usage_count = usageCount;
2895 
2896 		DEBUG_PAGE_ACCESS_END(page);
2897 
2898 		cache->ReleaseRefAndUnlock();
2899 
2900 		// remove the marker
2901 		queueLocker.Lock();
2902 		nextPage = queue.Next(&marker);
2903 		queue.Remove(&marker);
2904 	}
2905 
2906 	time = system_time() - time;
2907 	TRACE_DAEMON("  ->   active scan (%7lld us): scanned: %7lu, "
2908 		"moved: %lu -> inactive, encountered %lu accessed ones\n", time,
2909 		pagesScanned, pagesToInactive, pagesAccessed);
2910 }
2911 
2912 
2913 static void
2914 page_daemon_idle_scan(page_stats& pageStats)
2915 {
2916 	TRACE_DAEMON("page daemon: idle run\n");
2917 
2918 	if (pageStats.totalFreePages < (int32)sFreePagesTarget) {
2919 		// We want more actually free pages, so free some from the cached
2920 		// ones.
2921 		uint32 freed = free_cached_pages(
2922 			sFreePagesTarget - pageStats.totalFreePages, false);
2923 		if (freed > 0)
2924 			unreserve_pages(freed);
2925 		get_page_stats(pageStats);
2926 	}
2927 
2928 	// Walk the active list and move pages to the inactive queue.
2929 	get_page_stats(pageStats);
2930 	idle_scan_active_pages(pageStats);
2931 }
2932 
2933 
2934 static void
2935 page_daemon_full_scan(page_stats& pageStats, int32 despairLevel)
2936 {
2937 	TRACE_DAEMON("page daemon: full run: free: %lu, cached: %lu, "
2938 		"to free: %lu\n", pageStats.totalFreePages, pageStats.cachedPages,
2939 		pageStats.unsatisfiedReservations + sFreeOrCachedPagesTarget
2940 			- (pageStats.totalFreePages + pageStats.cachedPages));
2941 
2942 	// Walk the inactive list and transfer pages to the cached and modified
2943 	// queues.
2944 	full_scan_inactive_pages(pageStats, despairLevel);
2945 
2946 	// Free cached pages. Also wake up reservation waiters.
2947 	get_page_stats(pageStats);
2948 	int32 pagesToFree = pageStats.unsatisfiedReservations + sFreePagesTarget
2949 		- (pageStats.totalFreePages);
2950 	if (pagesToFree > 0) {
2951 		uint32 freed = free_cached_pages(pagesToFree, true);
2952 		if (freed > 0)
2953 			unreserve_pages(freed);
2954 	}
2955 
2956 	// Walk the active list and move pages to the inactive queue.
2957 	get_page_stats(pageStats);
2958 	full_scan_active_pages(pageStats, despairLevel);
2959 }
2960 
2961 
2962 static status_t
2963 page_daemon(void* /*unused*/)
2964 {
2965 	int32 despairLevel = 0;
2966 
2967 	while (true) {
2968 		sPageDaemonCondition.ClearActivated();
2969 
2970 		// evaluate the free pages situation
2971 		page_stats pageStats;
2972 		get_page_stats(pageStats);
2973 
2974 		if (!do_active_paging(pageStats)) {
2975 			// Things look good -- just maintain statistics and keep the pool
2976 			// of actually free pages full enough.
2977 			despairLevel = 0;
2978 			page_daemon_idle_scan(pageStats);
2979 			sPageDaemonCondition.Wait(kIdleScanWaitInterval, false);
2980 		} else {
2981 			// Not enough free pages. We need to do some real work.
2982 			despairLevel = std::max(despairLevel + 1, (int32)3);
2983 			page_daemon_full_scan(pageStats, despairLevel);
2984 
2985 			// Don't wait after the first full scan, but rather immediately
2986 			// check whether we were successful in freeing enough pages and
2987 			// re-run with increased despair level. The first scan is
2988 			// conservative with respect to moving inactive modified pages to
2989 			// the modified list to avoid thrashing. The second scan, however,
2990 			// will not hold back.
2991 			if (despairLevel > 1)
2992 				snooze(kBusyScanWaitInterval);
2993 		}
2994 	}
2995 
2996 	return B_OK;
2997 }
2998 
2999 
3000 /*!	Returns how many pages could *not* be reserved.
3001 */
3002 static uint32
3003 reserve_pages(uint32 count, int priority, bool dontWait)
3004 {
3005 	int32 dontTouch = kPageReserveForPriority[priority];
3006 
3007 	while (true) {
3008 		count -= reserve_some_pages(count, dontTouch);
3009 		if (count == 0)
3010 			return 0;
3011 
3012 		if (sUnsatisfiedPageReservations == 0) {
3013 			count -= free_cached_pages(count, dontWait);
3014 			if (count == 0)
3015 				return count;
3016 		}
3017 
3018 		if (dontWait)
3019 			return count;
3020 
3021 		// we need to wait for pages to become available
3022 
3023 		MutexLocker pageDeficitLocker(sPageDeficitLock);
3024 
3025 		bool notifyDaemon = sUnsatisfiedPageReservations == 0;
3026 		sUnsatisfiedPageReservations += count;
3027 
3028 		if (sUnreservedFreePages > dontTouch) {
3029 			// the situation changed
3030 			sUnsatisfiedPageReservations -= count;
3031 			continue;
3032 		}
3033 
3034 		PageReservationWaiter waiter;
3035 		waiter.dontTouch = dontTouch;
3036 		waiter.missing = count;
3037 		waiter.thread = thread_get_current_thread();
3038 		waiter.threadPriority = waiter.thread->priority;
3039 
3040 		// insert ordered (i.e. after all waiters with higher or equal priority)
3041 		PageReservationWaiter* otherWaiter = NULL;
3042 		for (PageReservationWaiterList::Iterator it
3043 				= sPageReservationWaiters.GetIterator();
3044 			(otherWaiter = it.Next()) != NULL;) {
3045 			if (waiter < *otherWaiter)
3046 				break;
3047 		}
3048 
3049 		sPageReservationWaiters.InsertBefore(otherWaiter, &waiter);
3050 
3051 		thread_prepare_to_block(waiter.thread, 0, THREAD_BLOCK_TYPE_OTHER,
3052 			"waiting for pages");
3053 
3054 		if (notifyDaemon)
3055 			sPageDaemonCondition.WakeUp();
3056 
3057 		pageDeficitLocker.Unlock();
3058 
3059 		low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0);
3060 		thread_block();
3061 
3062 		pageDeficitLocker.Lock();
3063 
3064 		return 0;
3065 	}
3066 }
3067 
3068 
3069 //	#pragma mark - private kernel API
3070 
3071 
3072 /*!	Writes a range of modified pages of a cache to disk.
3073 	You need to hold the VMCache lock when calling this function.
3074 	Note that the cache lock is released in this function.
3075 	\param cache The cache.
3076 	\param firstPage Offset (in page size units) of the first page in the range.
3077 	\param endPage End offset (in page size units) of the page range. The page
3078 		at this offset is not included.
3079 */
3080 status_t
3081 vm_page_write_modified_page_range(struct VMCache* cache, uint32 firstPage,
3082 	uint32 endPage)
3083 {
3084 	static const int32 kMaxPages = 256;
3085 	int32 maxPages = cache->MaxPagesPerWrite();
3086 	if (maxPages < 0 || maxPages > kMaxPages)
3087 		maxPages = kMaxPages;
3088 
3089 	const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
3090 		| HEAP_DONT_LOCK_KERNEL_SPACE;
3091 
3092 	PageWriteWrapper stackWrappers[2];
3093 	PageWriteWrapper* wrapperPool
3094 		= new(malloc_flags(allocationFlags)) PageWriteWrapper[maxPages + 1];
3095 	if (wrapperPool == NULL) {
3096 		// don't fail, just limit our capabilities
3097 		wrapperPool = stackWrappers;
3098 		maxPages = 1;
3099 	}
3100 
3101 	int32 nextWrapper = 0;
3102 
3103 	PageWriteWrapper* wrappers[maxPages];
3104 	int32 usedWrappers = 0;
3105 
3106 	PageWriteTransfer transfer;
3107 	bool transferEmpty = true;
3108 
3109 	VMCachePagesTree::Iterator it
3110 		= cache->pages.GetIterator(firstPage, true, true);
3111 
3112 	while (true) {
3113 		vm_page* page = it.Next();
3114 		if (page == NULL || page->cache_offset >= endPage) {
3115 			if (transferEmpty)
3116 				break;
3117 
3118 			page = NULL;
3119 		}
3120 
3121 		if (page != NULL) {
3122 			if (page->busy
3123 				|| (page->State() != PAGE_STATE_MODIFIED
3124 					&& !vm_test_map_modification(page))) {
3125 				page = NULL;
3126 			}
3127 		}
3128 
3129 		PageWriteWrapper* wrapper = NULL;
3130 		if (page != NULL) {
3131 			wrapper = &wrapperPool[nextWrapper++];
3132 			if (nextWrapper > maxPages)
3133 				nextWrapper = 0;
3134 
3135 			DEBUG_PAGE_ACCESS_START(page);
3136 
3137 			wrapper->SetTo(page);
3138 
3139 			if (transferEmpty || transfer.AddPage(page)) {
3140 				if (transferEmpty) {
3141 					transfer.SetTo(NULL, page, maxPages);
3142 					transferEmpty = false;
3143 				}
3144 
3145 				DEBUG_PAGE_ACCESS_END(page);
3146 
3147 				wrappers[usedWrappers++] = wrapper;
3148 				continue;
3149 			}
3150 
3151 			DEBUG_PAGE_ACCESS_END(page);
3152 		}
3153 
3154 		if (transferEmpty)
3155 			continue;
3156 
3157 		cache->Unlock();
3158 		status_t status = transfer.Schedule(0);
3159 		cache->Lock();
3160 
3161 		for (int32 i = 0; i < usedWrappers; i++)
3162 			wrappers[i]->Done(status);
3163 
3164 		usedWrappers = 0;
3165 
3166 		if (page != NULL) {
3167 			transfer.SetTo(NULL, page, maxPages);
3168 			wrappers[usedWrappers++] = wrapper;
3169 		} else
3170 			transferEmpty = true;
3171 	}
3172 
3173 	if (wrapperPool != stackWrappers)
3174 		delete[] wrapperPool;
3175 
3176 	return B_OK;
3177 }
3178 
3179 
3180 /*!	You need to hold the VMCache lock when calling this function.
3181 	Note that the cache lock is released in this function.
3182 */
3183 status_t
3184 vm_page_write_modified_pages(VMCache *cache)
3185 {
3186 	return vm_page_write_modified_page_range(cache, 0,
3187 		(cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT);
3188 }
3189 
3190 
3191 /*!	Schedules the page writer to write back the specified \a page.
3192 	Note, however, that it might not do this immediately, and it can well
3193 	take several seconds until the page is actually written out.
3194 */
3195 void
3196 vm_page_schedule_write_page(vm_page *page)
3197 {
3198 	PAGE_ASSERT(page, page->State() == PAGE_STATE_MODIFIED);
3199 
3200 	vm_page_requeue(page, false);
3201 
3202 	sPageWriterCondition.WakeUp();
3203 }
3204 
3205 
3206 /*!	Cache must be locked.
3207 */
3208 void
3209 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage,
3210 	uint32 endPage)
3211 {
3212 	uint32 modified = 0;
3213 	for (VMCachePagesTree::Iterator it
3214 				= cache->pages.GetIterator(firstPage, true, true);
3215 			vm_page *page = it.Next();) {
3216 		if (page->cache_offset >= endPage)
3217 			break;
3218 
3219 		if (!page->busy && page->State() == PAGE_STATE_MODIFIED) {
3220 			DEBUG_PAGE_ACCESS_START(page);
3221 			vm_page_requeue(page, false);
3222 			modified++;
3223 			DEBUG_PAGE_ACCESS_END(page);
3224 		}
3225 	}
3226 
3227 	if (modified > 0)
3228 		sPageWriterCondition.WakeUp();
3229 }
3230 
3231 
3232 void
3233 vm_page_init_num_pages(kernel_args *args)
3234 {
3235 	// calculate the size of memory by looking at the physical_memory_range array
3236 	sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE;
3237 	page_num_t physicalPagesEnd = sPhysicalPageOffset
3238 		+ args->physical_memory_range[0].size / B_PAGE_SIZE;
3239 
3240 	sNonExistingPages = 0;
3241 	sIgnoredPages = args->ignored_physical_memory / B_PAGE_SIZE;
3242 
3243 	for (uint32 i = 1; i < args->num_physical_memory_ranges; i++) {
3244 		page_num_t start = args->physical_memory_range[i].start / B_PAGE_SIZE;
3245 		if (start > physicalPagesEnd)
3246 			sNonExistingPages += start - physicalPagesEnd;
3247 		physicalPagesEnd = start
3248 			+ args->physical_memory_range[i].size / B_PAGE_SIZE;
3249 
3250 #ifdef LIMIT_AVAILABLE_MEMORY
3251 		page_num_t available
3252 			= physicalPagesEnd - sPhysicalPageOffset - sNonExistingPages;
3253 		if (available > LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE)) {
3254 			physicalPagesEnd = sPhysicalPageOffset + sNonExistingPages
3255 				+ LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE);
3256 			break;
3257 		}
3258 #endif
3259 	}
3260 
3261 	TRACE(("first phys page = %#" B_PRIxPHYSADDR ", end %#" B_PRIxPHYSADDR "\n",
3262 		sPhysicalPageOffset, physicalPagesEnd));
3263 
3264 	sNumPages = physicalPagesEnd - sPhysicalPageOffset;
3265 }
3266 
3267 
3268 status_t
3269 vm_page_init(kernel_args *args)
3270 {
3271 	TRACE(("vm_page_init: entry\n"));
3272 
3273 	// init page queues
3274 	sModifiedPageQueue.Init("modified pages queue");
3275 	sInactivePageQueue.Init("inactive pages queue");
3276 	sActivePageQueue.Init("active pages queue");
3277 	sCachedPageQueue.Init("cached pages queue");
3278 	sFreePageQueue.Init("free pages queue");
3279 	sClearPageQueue.Init("clear pages queue");
3280 
3281 	new (&sPageReservationWaiters) PageReservationWaiterList;
3282 
3283 	// map in the new free page table
3284 	sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page),
3285 		~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3286 
3287 	TRACE(("vm_init: putting free_page_table @ %p, # ents %" B_PRIuPHYSADDR
3288 		" (size %#" B_PRIxPHYSADDR ")\n", sPages, sNumPages,
3289 		(phys_addr_t)(sNumPages * sizeof(vm_page))));
3290 
3291 	// initialize the free page table
3292 	for (uint32 i = 0; i < sNumPages; i++) {
3293 		sPages[i].Init(sPhysicalPageOffset + i);
3294 		sFreePageQueue.Append(&sPages[i]);
3295 
3296 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3297 		sPages[i].allocation_tracking_info.Clear();
3298 #endif
3299 	}
3300 
3301 	sUnreservedFreePages = sNumPages;
3302 
3303 	TRACE(("initialized table\n"));
3304 
3305 	// mark the ranges between usable physical memory unused
3306 	phys_addr_t previousEnd = 0;
3307 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3308 		phys_addr_t base = args->physical_memory_range[i].start;
3309 		phys_size_t size = args->physical_memory_range[i].size;
3310 		if (base > previousEnd) {
3311 			mark_page_range_in_use(previousEnd / B_PAGE_SIZE,
3312 				(base - previousEnd) / B_PAGE_SIZE, false);
3313 		}
3314 		previousEnd = base + size;
3315 	}
3316 
3317 	// mark the allocated physical page ranges wired
3318 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3319 		mark_page_range_in_use(
3320 			args->physical_allocated_range[i].start / B_PAGE_SIZE,
3321 			args->physical_allocated_range[i].size / B_PAGE_SIZE, true);
3322 	}
3323 
3324 	// The target of actually free pages. This must be at least the system
3325 	// reserve, but should be a few more pages, so we don't have to extract
3326 	// a cached page with each allocation.
3327 	sFreePagesTarget = VM_PAGE_RESERVE_USER
3328 		+ std::max((page_num_t)32, (sNumPages - sNonExistingPages) / 1024);
3329 
3330 	// The target of free + cached and inactive pages. On low-memory machines
3331 	// keep things tight. free + cached is the pool of immediately allocatable
3332 	// pages. We want a few inactive pages, so when we're actually paging, we
3333 	// have a reasonably large set of pages to work with.
3334 	if (sUnreservedFreePages < 16 * 1024) {
3335 		sFreeOrCachedPagesTarget = sFreePagesTarget + 128;
3336 		sInactivePagesTarget = sFreePagesTarget / 3;
3337 	} else {
3338 		sFreeOrCachedPagesTarget = 2 * sFreePagesTarget;
3339 		sInactivePagesTarget = sFreePagesTarget / 2;
3340 	}
3341 
3342 	TRACE(("vm_page_init: exit\n"));
3343 
3344 	return B_OK;
3345 }
3346 
3347 
3348 status_t
3349 vm_page_init_post_area(kernel_args *args)
3350 {
3351 	void *dummy;
3352 
3353 	dummy = sPages;
3354 	create_area("page structures", &dummy, B_EXACT_ADDRESS,
3355 		PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED,
3356 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3357 
3358 	add_debugger_command("page_stats", &dump_page_stats,
3359 		"Dump statistics about page usage");
3360 	add_debugger_command_etc("page", &dump_page,
3361 		"Dump page info",
3362 		"[ \"-p\" | \"-v\" ] [ \"-m\" ] <address>\n"
3363 		"Prints information for the physical page. If neither \"-p\" nor\n"
3364 		"\"-v\" are given, the provided address is interpreted as address of\n"
3365 		"the vm_page data structure for the page in question. If \"-p\" is\n"
3366 		"given, the address is the physical address of the page. If \"-v\" is\n"
3367 		"given, the address is interpreted as virtual address in the current\n"
3368 		"thread's address space and for the page it is mapped to (if any)\n"
3369 		"information are printed. If \"-m\" is specified, the command will\n"
3370 		"search all known address spaces for mappings to that page and print\n"
3371 		"them.\n", 0);
3372 	add_debugger_command("page_queue", &dump_page_queue, "Dump page queue");
3373 	add_debugger_command("find_page", &find_page,
3374 		"Find out which queue a page is actually in");
3375 
3376 #ifdef TRACK_PAGE_USAGE_STATS
3377 	add_debugger_command_etc("page_usage", &dump_page_usage_stats,
3378 		"Dumps statistics about page usage counts",
3379 		"\n"
3380 		"Dumps statistics about page usage counts.\n",
3381 		B_KDEBUG_DONT_PARSE_ARGUMENTS);
3382 #endif
3383 
3384 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3385 	add_debugger_command_etc("page_allocations_per_caller",
3386 		&dump_page_allocations_per_caller,
3387 		"Dump current page allocations summed up per caller",
3388 		"[ -d <caller> ] [ -r ]\n"
3389 		"The current allocations will by summed up by caller (their count)\n"
3390 		"printed in decreasing order by count.\n"
3391 		"If \"-d\" is given, each allocation for caller <caller> is printed\n"
3392 		"including the respective stack trace.\n"
3393 		"If \"-r\" is given, the allocation infos are reset after gathering\n"
3394 		"the information, so the next command invocation will only show the\n"
3395 		"allocations made after the reset.\n", 0);
3396 	add_debugger_command_etc("page_allocation_infos",
3397 		&dump_page_allocation_infos,
3398 		"Dump current page allocations",
3399 		"[ --stacktrace ] [ -p <page number> ] [ --team <team ID> ] "
3400 		"[ --thread <thread ID> ]\n"
3401 		"The current allocations filtered by optional values will be printed.\n"
3402 		"The optional \"-p\" page number filters for a specific page,\n"
3403 		"with \"--team\" and \"--thread\" allocations by specific teams\n"
3404 		"and/or threads can be filtered (these only work if a corresponding\n"
3405 		"tracing entry is still available).\n"
3406 		"If \"--stacktrace\" is given, then stack traces of the allocation\n"
3407 		"callers are printed, where available\n", 0);
3408 #endif
3409 
3410 	return B_OK;
3411 }
3412 
3413 
3414 status_t
3415 vm_page_init_post_thread(kernel_args *args)
3416 {
3417 	new (&sFreePageCondition) ConditionVariable;
3418 	sFreePageCondition.Publish(&sFreePageQueue, "free page");
3419 
3420 	// create a kernel thread to clear out pages
3421 
3422 	thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber",
3423 		B_LOWEST_ACTIVE_PRIORITY, NULL);
3424 	resume_thread(thread);
3425 
3426 	// start page writer
3427 
3428 	sPageWriterCondition.Init("page writer");
3429 
3430 	thread = spawn_kernel_thread(&page_writer, "page writer",
3431 		B_NORMAL_PRIORITY + 1, NULL);
3432 	resume_thread(thread);
3433 
3434 	// start page daemon
3435 
3436 	sPageDaemonCondition.Init("page daemon");
3437 
3438 	thread = spawn_kernel_thread(&page_daemon, "page daemon",
3439 		B_NORMAL_PRIORITY, NULL);
3440 	resume_thread(thread);
3441 
3442 	return B_OK;
3443 }
3444 
3445 
3446 status_t
3447 vm_mark_page_inuse(page_num_t page)
3448 {
3449 	return vm_mark_page_range_inuse(page, 1);
3450 }
3451 
3452 
3453 status_t
3454 vm_mark_page_range_inuse(page_num_t startPage, page_num_t length)
3455 {
3456 	return mark_page_range_in_use(startPage, length, false);
3457 }
3458 
3459 
3460 /*!	Unreserve pages previously reserved with vm_page_reserve_pages().
3461 */
3462 void
3463 vm_page_unreserve_pages(vm_page_reservation* reservation)
3464 {
3465 	uint32 count = reservation->count;
3466 	reservation->count = 0;
3467 
3468 	if (count == 0)
3469 		return;
3470 
3471 	TA(UnreservePages(count));
3472 
3473 	unreserve_pages(count);
3474 }
3475 
3476 
3477 /*!	With this call, you can reserve a number of free pages in the system.
3478 	They will only be handed out to someone who has actually reserved them.
3479 	This call returns as soon as the number of requested pages has been
3480 	reached.
3481 	The caller must not hold any cache lock or the function might deadlock.
3482 */
3483 void
3484 vm_page_reserve_pages(vm_page_reservation* reservation, uint32 count,
3485 	int priority)
3486 {
3487 	reservation->count = count;
3488 
3489 	if (count == 0)
3490 		return;
3491 
3492 	TA(ReservePages(count));
3493 
3494 	reserve_pages(count, priority, false);
3495 }
3496 
3497 
3498 bool
3499 vm_page_try_reserve_pages(vm_page_reservation* reservation, uint32 count,
3500 	int priority)
3501 {
3502 	if (count == 0) {
3503 		reservation->count = count;
3504 		return true;
3505 	}
3506 
3507 	uint32 remaining = reserve_pages(count, priority, true);
3508 	if (remaining == 0) {
3509 		TA(ReservePages(count));
3510 		reservation->count = count;
3511 		return true;
3512 	}
3513 
3514 	unreserve_pages(count - remaining);
3515 
3516 	return false;
3517 }
3518 
3519 
3520 vm_page *
3521 vm_page_allocate_page(vm_page_reservation* reservation, uint32 flags)
3522 {
3523 	uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3524 	ASSERT(pageState != PAGE_STATE_FREE);
3525 	ASSERT(pageState != PAGE_STATE_CLEAR);
3526 
3527 	ASSERT(reservation->count > 0);
3528 	reservation->count--;
3529 
3530 	VMPageQueue* queue;
3531 	VMPageQueue* otherQueue;
3532 
3533 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3534 		queue = &sClearPageQueue;
3535 		otherQueue = &sFreePageQueue;
3536 	} else {
3537 		queue = &sFreePageQueue;
3538 		otherQueue = &sClearPageQueue;
3539 	}
3540 
3541 	ReadLocker locker(sFreePageQueuesLock);
3542 
3543 	vm_page* page = queue->RemoveHeadUnlocked();
3544 	if (page == NULL) {
3545 		// if the primary queue was empty, grab the page from the
3546 		// secondary queue
3547 		page = otherQueue->RemoveHeadUnlocked();
3548 
3549 		if (page == NULL) {
3550 			// Unlikely, but possible: the page we have reserved has moved
3551 			// between the queues after we checked the first queue. Grab the
3552 			// write locker to make sure this doesn't happen again.
3553 			locker.Unlock();
3554 			WriteLocker writeLocker(sFreePageQueuesLock);
3555 
3556 			page = queue->RemoveHead();
3557 			if (page == NULL)
3558 				otherQueue->RemoveHead();
3559 
3560 			if (page == NULL) {
3561 				panic("Had reserved page, but there is none!");
3562 				return NULL;
3563 			}
3564 
3565 			// downgrade to read lock
3566 			locker.Lock();
3567 		}
3568 	}
3569 
3570 	if (page->CacheRef() != NULL)
3571 		panic("supposed to be free page %p has cache\n", page);
3572 
3573 	DEBUG_PAGE_ACCESS_START(page);
3574 
3575 	int oldPageState = page->State();
3576 	page->SetState(pageState);
3577 	page->busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3578 	page->usage_count = 0;
3579 	page->accessed = false;
3580 	page->modified = false;
3581 
3582 	locker.Unlock();
3583 
3584 	if (pageState < PAGE_STATE_FIRST_UNQUEUED)
3585 		sPageQueues[pageState].AppendUnlocked(page);
3586 
3587 	// clear the page, if we had to take it from the free queue and a clear
3588 	// page was requested
3589 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0 && oldPageState != PAGE_STATE_CLEAR)
3590 		clear_page(page);
3591 
3592 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3593 	page->allocation_tracking_info.Init(
3594 		TA(AllocatePage(page->physical_page_number)));
3595 #else
3596 	TA(AllocatePage(page->physical_page_number));
3597 #endif
3598 
3599 	return page;
3600 }
3601 
3602 
3603 static void
3604 allocate_page_run_cleanup(VMPageQueue::PageList& freePages,
3605 	VMPageQueue::PageList& clearPages)
3606 {
3607 	while (vm_page* page = freePages.RemoveHead()) {
3608 		page->busy = false;
3609 		page->SetState(PAGE_STATE_FREE);
3610 		DEBUG_PAGE_ACCESS_END(page);
3611 		sFreePageQueue.PrependUnlocked(page);
3612 	}
3613 
3614 	while (vm_page* page = clearPages.RemoveHead()) {
3615 		page->busy = false;
3616 		page->SetState(PAGE_STATE_CLEAR);
3617 		DEBUG_PAGE_ACCESS_END(page);
3618 		sClearPageQueue.PrependUnlocked(page);
3619 	}
3620 }
3621 
3622 
3623 /*!	Tries to allocate the a contiguous run of \a length pages starting at
3624 	index \a start.
3625 
3626 	The caller must have write-locked the free/clear page queues. The function
3627 	will unlock regardless of whether it succeeds or fails.
3628 
3629 	If the function fails, it cleans up after itself, i.e. it will free all
3630 	pages it managed to allocate.
3631 
3632 	\param start The start index (into \c sPages) of the run.
3633 	\param length The number of pages to allocate.
3634 	\param flags Page allocation flags. Encodes the state the function shall
3635 		set the allocated pages to, whether the pages shall be marked busy
3636 		(VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared
3637 		(VM_PAGE_ALLOC_CLEAR).
3638 	\param freeClearQueueLocker Locked WriteLocker for the free/clear page
3639 		queues in locked state. Will be unlocked by the function.
3640 	\return The index of the first page that could not be allocated. \a length
3641 		is returned when the function was successful.
3642 */
3643 static page_num_t
3644 allocate_page_run(page_num_t start, page_num_t length, uint32 flags,
3645 	WriteLocker& freeClearQueueLocker)
3646 {
3647 	uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3648 	ASSERT(pageState != PAGE_STATE_FREE);
3649 	ASSERT(pageState != PAGE_STATE_CLEAR);
3650 	ASSERT(start + length <= sNumPages);
3651 
3652 	// Pull the free/clear pages out of their respective queues. Cached pages
3653 	// are allocated later.
3654 	page_num_t cachedPages = 0;
3655 	VMPageQueue::PageList freePages;
3656 	VMPageQueue::PageList clearPages;
3657 	page_num_t i = 0;
3658 	for (; i < length; i++) {
3659 		bool pageAllocated = true;
3660 		bool noPage = false;
3661 		vm_page& page = sPages[start + i];
3662 		switch (page.State()) {
3663 			case PAGE_STATE_CLEAR:
3664 				DEBUG_PAGE_ACCESS_START(&page);
3665 				sClearPageQueue.Remove(&page);
3666 				clearPages.Add(&page);
3667 				break;
3668 			case PAGE_STATE_FREE:
3669 				DEBUG_PAGE_ACCESS_START(&page);
3670 				sFreePageQueue.Remove(&page);
3671 				freePages.Add(&page);
3672 				break;
3673 			case PAGE_STATE_CACHED:
3674 				// We allocate cached pages later.
3675 				cachedPages++;
3676 				pageAllocated = false;
3677 				break;
3678 
3679 			default:
3680 				// Probably a page was cached when our caller checked. Now it's
3681 				// gone and we have to abort.
3682 				noPage = true;
3683 				break;
3684 		}
3685 
3686 		if (noPage)
3687 			break;
3688 
3689 		if (pageAllocated) {
3690 			page.SetState(flags & VM_PAGE_ALLOC_STATE);
3691 			page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3692 			page.usage_count = 0;
3693 			page.accessed = false;
3694 			page.modified = false;
3695 		}
3696 	}
3697 
3698 	if (i < length) {
3699 		// failed to allocate a page -- free all that we've got
3700 		allocate_page_run_cleanup(freePages, clearPages);
3701 		return i;
3702 	}
3703 
3704 	freeClearQueueLocker.Unlock();
3705 
3706 	if (cachedPages > 0) {
3707 		// allocate the pages that weren't free but cached
3708 		page_num_t freedCachedPages = 0;
3709 		page_num_t nextIndex = start;
3710 		vm_page* freePage = freePages.Head();
3711 		vm_page* clearPage = clearPages.Head();
3712 		while (cachedPages > 0) {
3713 			// skip, if we've already got the page
3714 			if (freePage != NULL && size_t(freePage - sPages) == nextIndex) {
3715 				freePage = freePages.GetNext(freePage);
3716 				nextIndex++;
3717 				continue;
3718 			}
3719 			if (clearPage != NULL && size_t(clearPage - sPages) == nextIndex) {
3720 				clearPage = clearPages.GetNext(clearPage);
3721 				nextIndex++;
3722 				continue;
3723 			}
3724 
3725 			// free the page, if it is still cached
3726 			vm_page& page = sPages[nextIndex];
3727 			if (!free_cached_page(&page, false)) {
3728 				// TODO: if the page turns out to have been freed already,
3729 				// there would be no need to fail
3730 				break;
3731 			}
3732 
3733 			page.SetState(flags & VM_PAGE_ALLOC_STATE);
3734 			page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3735 			page.usage_count = 0;
3736 			page.accessed = false;
3737 			page.modified = false;
3738 
3739 			freePages.InsertBefore(freePage, &page);
3740 			freedCachedPages++;
3741 			cachedPages--;
3742 			nextIndex++;
3743 		}
3744 
3745 		// If we have freed cached pages, we need to balance things.
3746 		if (freedCachedPages > 0)
3747 			unreserve_pages(freedCachedPages);
3748 
3749 		if (nextIndex - start < length) {
3750 			// failed to allocate all cached pages -- free all that we've got
3751 			freeClearQueueLocker.Lock();
3752 			allocate_page_run_cleanup(freePages, clearPages);
3753 			freeClearQueueLocker.Unlock();
3754 
3755 			return nextIndex - start;
3756 		}
3757 	}
3758 
3759 	// clear pages, if requested
3760 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3761 		for (VMPageQueue::PageList::Iterator it = freePages.GetIterator();
3762 				vm_page* page = it.Next();) {
3763  			clear_page(page);
3764 		}
3765 	}
3766 
3767 	// add pages to target queue
3768 	if (pageState < PAGE_STATE_FIRST_UNQUEUED) {
3769 		freePages.MoveFrom(&clearPages);
3770 		sPageQueues[pageState].AppendUnlocked(freePages, length);
3771 	}
3772 
3773 	// Note: We don't unreserve the pages since we pulled them out of the
3774 	// free/clear queues without adjusting sUnreservedFreePages.
3775 
3776 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3777 	AbstractTraceEntryWithStackTrace* traceEntry
3778 		= TA(AllocatePageRun(start, length));
3779 
3780 	for (page_num_t i = start; i < start + length; i++)
3781 		sPages[i].allocation_tracking_info.Init(traceEntry);
3782 #else
3783 	TA(AllocatePageRun(start, length));
3784 #endif
3785 
3786 	return length;
3787 }
3788 
3789 
3790 /*! Allocate a physically contiguous range of pages.
3791 
3792 	\param flags Page allocation flags. Encodes the state the function shall
3793 		set the allocated pages to, whether the pages shall be marked busy
3794 		(VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared
3795 		(VM_PAGE_ALLOC_CLEAR).
3796 	\param length The number of contiguous pages to allocate.
3797 	\param restrictions Restrictions to the physical addresses of the page run
3798 		to allocate, including \c low_address, the first acceptable physical
3799 		address where the page run may start, \c high_address, the last
3800 		acceptable physical address where the page run may end (i.e. it must
3801 		hold \code runStartAddress + length <= high_address \endcode),
3802 		\c alignment, the alignment of the page run start address, and
3803 		\c boundary, multiples of which the page run must not cross.
3804 		Values set to \c 0 are ignored.
3805 	\param priority The page reservation priority (as passed to
3806 		vm_page_reserve_pages()).
3807 	\return The first page of the allocated page run on success; \c NULL
3808 		when the allocation failed.
3809 */
3810 vm_page*
3811 vm_page_allocate_page_run(uint32 flags, page_num_t length,
3812 	const physical_address_restrictions* restrictions, int priority)
3813 {
3814 	// compute start and end page index
3815 	page_num_t requestedStart
3816 		= std::max(restrictions->low_address / B_PAGE_SIZE, sPhysicalPageOffset)
3817 			- sPhysicalPageOffset;
3818 	page_num_t start = requestedStart;
3819 	page_num_t end;
3820 	if (restrictions->high_address > 0) {
3821 		end = std::max(restrictions->high_address / B_PAGE_SIZE,
3822 				sPhysicalPageOffset)
3823 			- sPhysicalPageOffset;
3824 		end = std::min(end, sNumPages);
3825 	} else
3826 		end = sNumPages;
3827 
3828 	// compute alignment mask
3829 	page_num_t alignmentMask
3830 		= std::max(restrictions->alignment / B_PAGE_SIZE, (phys_addr_t)1) - 1;
3831 	ASSERT(((alignmentMask + 1) & alignmentMask) == 0);
3832 		// alignment must be a power of 2
3833 
3834 	// compute the boundary shift
3835 	uint32 boundaryShift = 0;
3836 	if (restrictions->boundary != 0) {
3837 		page_num_t boundary = restrictions->boundary / B_PAGE_SIZE;
3838 		// boundary must be a power of two and not less than alignment and
3839 		// length
3840 		ASSERT(((boundary - 1) & boundary) == 0);
3841 		ASSERT(boundary >= alignmentMask + 1);
3842 		ASSERT(boundary >= length);
3843 
3844 		while ((boundary >>= 1) > 0)
3845 			boundaryShift++;
3846 	}
3847 
3848 	vm_page_reservation reservation;
3849 	vm_page_reserve_pages(&reservation, length, priority);
3850 
3851 	WriteLocker freeClearQueueLocker(sFreePageQueuesLock);
3852 
3853 	// First we try to get a run with free pages only. If that fails, we also
3854 	// consider cached pages. If there are only few free pages and many cached
3855 	// ones, the odds are that we won't find enough contiguous ones, so we skip
3856 	// the first iteration in this case.
3857 	int32 freePages = sUnreservedFreePages;
3858 	int useCached = freePages > 0 && (page_num_t)freePages > 2 * length ? 0 : 1;
3859 
3860 	for (;;) {
3861 		if (alignmentMask != 0 || boundaryShift != 0) {
3862 			page_num_t offsetStart = start + sPhysicalPageOffset;
3863 
3864 			// enforce alignment
3865 			if ((offsetStart & alignmentMask) != 0) {
3866 				offsetStart = ((offsetStart + alignmentMask) & ~alignmentMask)
3867 					- sPhysicalPageOffset;
3868 			}
3869 
3870 			// enforce boundary
3871 			if (offsetStart << boundaryShift
3872 					!= (offsetStart + length - 1) << boundaryShift) {
3873 				offsetStart = (offsetStart + length - 1) << boundaryShift
3874 					>> boundaryShift;
3875 			}
3876 
3877 			start = offsetStart - sPhysicalPageOffset;
3878 		}
3879 
3880 		if (start + length > end) {
3881 			if (useCached == 0) {
3882 				// The first iteration with free pages only was unsuccessful.
3883 				// Try again also considering cached pages.
3884 				useCached = 1;
3885 				start = requestedStart;
3886 				continue;
3887 			}
3888 
3889 			dprintf("vm_page_allocate_page_run(): Failed to allocate run of "
3890 				"length %" B_PRIuPHYSADDR " in second iteration!", length);
3891 
3892 			freeClearQueueLocker.Unlock();
3893 			vm_page_unreserve_pages(&reservation);
3894 			return NULL;
3895 		}
3896 
3897 		bool foundRun = true;
3898 		page_num_t i;
3899 		for (i = 0; i < length; i++) {
3900 			uint32 pageState = sPages[start + i].State();
3901 			if (pageState != PAGE_STATE_FREE
3902 				&& pageState != PAGE_STATE_CLEAR
3903 				&& (pageState != PAGE_STATE_CACHED || useCached == 0)) {
3904 				foundRun = false;
3905 				break;
3906 			}
3907 		}
3908 
3909 		if (foundRun) {
3910 			i = allocate_page_run(start, length, flags, freeClearQueueLocker);
3911 			if (i == length)
3912 				return &sPages[start];
3913 
3914 			// apparently a cached page couldn't be allocated -- skip it and
3915 			// continue
3916 			freeClearQueueLocker.Lock();
3917 		}
3918 
3919 		start += i + 1;
3920 	}
3921 }
3922 
3923 
3924 vm_page *
3925 vm_page_at_index(int32 index)
3926 {
3927 	return &sPages[index];
3928 }
3929 
3930 
3931 vm_page *
3932 vm_lookup_page(page_num_t pageNumber)
3933 {
3934 	if (pageNumber < sPhysicalPageOffset)
3935 		return NULL;
3936 
3937 	pageNumber -= sPhysicalPageOffset;
3938 	if (pageNumber >= sNumPages)
3939 		return NULL;
3940 
3941 	return &sPages[pageNumber];
3942 }
3943 
3944 
3945 bool
3946 vm_page_is_dummy(struct vm_page *page)
3947 {
3948 	return page < sPages || page >= sPages + sNumPages;
3949 }
3950 
3951 
3952 /*!	Free the page that belonged to a certain cache.
3953 	You can use vm_page_set_state() manually if you prefer, but only
3954 	if the page does not equal PAGE_STATE_MODIFIED.
3955 */
3956 void
3957 vm_page_free(VMCache *cache, vm_page *page)
3958 {
3959 	PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
3960 		&& page->State() != PAGE_STATE_CLEAR);
3961 
3962 	if (page->State() == PAGE_STATE_MODIFIED && cache->temporary)
3963 		atomic_add(&sModifiedTemporaryPages, -1);
3964 
3965 	free_page(page, false);
3966 }
3967 
3968 
3969 void
3970 vm_page_set_state(vm_page *page, int pageState)
3971 {
3972 	PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
3973 		&& page->State() != PAGE_STATE_CLEAR);
3974 
3975 	if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR)
3976 		free_page(page, pageState == PAGE_STATE_CLEAR);
3977 	else
3978 		set_page_state(page, pageState);
3979 }
3980 
3981 
3982 /*!	Moves a page to either the tail of the head of its current queue,
3983 	depending on \a tail.
3984 	The page must have a cache and the cache must be locked!
3985 */
3986 void
3987 vm_page_requeue(struct vm_page *page, bool tail)
3988 {
3989 	PAGE_ASSERT(page, page->Cache() != NULL);
3990 	page->Cache()->AssertLocked();
3991 	// DEBUG_PAGE_ACCESS_CHECK(page);
3992 		// TODO: This assertion cannot be satisfied by idle_scan_active_pages()
3993 		// when it requeues busy pages. The reason is that vm_soft_fault()
3994 		// (respectively fault_get_page()) and the file cache keep newly
3995 		// allocated pages accessed while they are reading them from disk. It
3996 		// would probably be better to change that code and reenable this
3997 		// check.
3998 
3999 	VMPageQueue *queue = NULL;
4000 
4001 	switch (page->State()) {
4002 		case PAGE_STATE_ACTIVE:
4003 			queue = &sActivePageQueue;
4004 			break;
4005 		case PAGE_STATE_INACTIVE:
4006 			queue = &sInactivePageQueue;
4007 			break;
4008 		case PAGE_STATE_MODIFIED:
4009 			queue = &sModifiedPageQueue;
4010 			break;
4011 		case PAGE_STATE_CACHED:
4012 			queue = &sCachedPageQueue;
4013 			break;
4014 		case PAGE_STATE_FREE:
4015 		case PAGE_STATE_CLEAR:
4016 			panic("vm_page_requeue() called for free/clear page %p", page);
4017 			return;
4018 		case PAGE_STATE_WIRED:
4019 		case PAGE_STATE_UNUSED:
4020 			return;
4021 		default:
4022 			panic("vm_page_touch: vm_page %p in invalid state %d\n",
4023 				page, page->State());
4024 			break;
4025 	}
4026 
4027 	queue->RequeueUnlocked(page, tail);
4028 }
4029 
4030 
4031 page_num_t
4032 vm_page_num_pages(void)
4033 {
4034 	return sNumPages - sNonExistingPages;
4035 }
4036 
4037 
4038 /*! There is a subtle distinction between the page counts returned by
4039 	this function and vm_page_num_free_pages():
4040 	The latter returns the number of pages that are completely uncommitted,
4041 	whereas this one returns the number of pages that are available for
4042 	use by being reclaimed as well (IOW it factors in things like cache pages
4043 	as available).
4044 */
4045 page_num_t
4046 vm_page_num_available_pages(void)
4047 {
4048 	return vm_available_memory() / B_PAGE_SIZE;
4049 }
4050 
4051 
4052 page_num_t
4053 vm_page_num_free_pages(void)
4054 {
4055 	int32 count = sUnreservedFreePages + sCachedPageQueue.Count();
4056 	return count > 0 ? count : 0;
4057 }
4058 
4059 
4060 page_num_t
4061 vm_page_num_unused_pages(void)
4062 {
4063 	int32 count = sUnreservedFreePages;
4064 	return count > 0 ? count : 0;
4065 }
4066 
4067 
4068 void
4069 vm_page_get_stats(system_info *info)
4070 {
4071 	// Note: there's no locking protecting any of the queues or counters here,
4072 	// so we run the risk of getting bogus values when evaluating them
4073 	// throughout this function. As these stats are for informational purposes
4074 	// only, it is not really worth introducing such locking. Therefore we just
4075 	// ensure that we don't under- or overflow any of the values.
4076 
4077 	// The pages used for the block cache buffers. Those should not be counted
4078 	// as used but as cached pages.
4079 	// TODO: We should subtract the blocks that are in use ATM, since those
4080 	// can't really be freed in a low memory situation.
4081 	page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE;
4082 
4083 	// Non-temporary modified pages are special as they represent pages that
4084 	// can be written back, so they could be freed if necessary, for us
4085 	// basically making them into cached pages with a higher overhead. The
4086 	// modified queue count is therefore split into temporary and non-temporary
4087 	// counts that are then added to the corresponding number.
4088 	page_num_t modifiedNonTemporaryPages
4089 		= (sModifiedPageQueue.Count() - sModifiedTemporaryPages);
4090 
4091 	info->max_pages = vm_page_num_pages();
4092 	info->cached_pages = sCachedPageQueue.Count() + modifiedNonTemporaryPages
4093 		+ blockCachePages;
4094 
4095 	// max_pages is composed of:
4096 	//	active + inactive + unused + wired + modified + cached + free + clear
4097 	// So taking out the cached (including modified non-temporary), free and
4098 	// clear ones leaves us with all used pages.
4099 	int32 subtractPages = info->cached_pages + sFreePageQueue.Count()
4100 		+ sClearPageQueue.Count();
4101 	info->used_pages = subtractPages > info->max_pages
4102 		? 0 : info->max_pages - subtractPages;
4103 
4104 	if (info->used_pages + info->cached_pages > info->max_pages) {
4105 		// Something was shuffled around while we were summing up the counts.
4106 		// Make the values sane, preferring the worse case of more used pages.
4107 		info->cached_pages = info->max_pages - info->used_pages;
4108 	}
4109 
4110 	info->page_faults = vm_num_page_faults();
4111 	info->ignored_pages = sIgnoredPages;
4112 
4113 	// TODO: We don't consider pages used for page directories/tables yet.
4114 }
4115 
4116 
4117 /*!	Returns the greatest address within the last page of accessible physical
4118 	memory.
4119 	The value is inclusive, i.e. in case of a 32 bit phys_addr_t 0xffffffff
4120 	means the that the last page ends at exactly 4 GB.
4121 */
4122 phys_addr_t
4123 vm_page_max_address()
4124 {
4125 	return ((phys_addr_t)sPhysicalPageOffset + sNumPages) * B_PAGE_SIZE - 1;
4126 }
4127 
4128 
4129 RANGE_MARKER_FUNCTION_END(vm_page)
4130