xref: /haiku/src/system/kernel/vm/vm_page.cpp (revision 3d4afef9cba2f328e238089d4609d00d4b1524f3)
1 /*
2  * Copyright 2010-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <string.h>
12 #include <stdlib.h>
13 
14 #include <algorithm>
15 
16 #include <KernelExport.h>
17 #include <OS.h>
18 
19 #include <AutoDeleter.h>
20 
21 #include <arch/cpu.h>
22 #include <arch/vm_translation_map.h>
23 #include <block_cache.h>
24 #include <boot/kernel_args.h>
25 #include <condition_variable.h>
26 #include <elf.h>
27 #include <heap.h>
28 #include <kernel.h>
29 #include <low_resource_manager.h>
30 #include <thread.h>
31 #include <tracing.h>
32 #include <util/AutoLock.h>
33 #include <vfs.h>
34 #include <vm/vm.h>
35 #include <vm/vm_priv.h>
36 #include <vm/vm_page.h>
37 #include <vm/VMAddressSpace.h>
38 #include <vm/VMArea.h>
39 #include <vm/VMCache.h>
40 
41 #include "IORequest.h"
42 #include "PageCacheLocker.h"
43 #include "VMAnonymousCache.h"
44 #include "VMPageQueue.h"
45 
46 
47 //#define TRACE_VM_PAGE
48 #ifdef TRACE_VM_PAGE
49 #	define TRACE(x) dprintf x
50 #else
51 #	define TRACE(x) ;
52 #endif
53 
54 //#define TRACE_VM_DAEMONS
55 #ifdef TRACE_VM_DAEMONS
56 #define TRACE_DAEMON(x...) dprintf(x)
57 #else
58 #define TRACE_DAEMON(x...) do {} while (false)
59 #endif
60 
61 //#define TRACK_PAGE_USAGE_STATS	1
62 
63 #define PAGE_ASSERT(page, condition)	\
64 	ASSERT_PRINT((condition), "page: %p", (page))
65 
66 #define SCRUB_SIZE 32
67 	// this many pages will be cleared at once in the page scrubber thread
68 
69 #define MAX_PAGE_WRITER_IO_PRIORITY				B_URGENT_DISPLAY_PRIORITY
70 	// maximum I/O priority of the page writer
71 #define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD	10000
72 	// the maximum I/O priority shall be reached when this many pages need to
73 	// be written
74 
75 
76 // The page reserve an allocation of the certain priority must not touch.
77 static const size_t kPageReserveForPriority[] = {
78 	VM_PAGE_RESERVE_USER,		// user
79 	VM_PAGE_RESERVE_SYSTEM,		// system
80 	0							// VIP
81 };
82 
83 // Minimum number of free pages the page daemon will try to achieve.
84 static uint32 sFreePagesTarget;
85 static uint32 sFreeOrCachedPagesTarget;
86 static uint32 sInactivePagesTarget;
87 
88 // Wait interval between page daemon runs.
89 static const bigtime_t kIdleScanWaitInterval = 1000000LL;	// 1 sec
90 static const bigtime_t kBusyScanWaitInterval = 500000LL;	// 0.5 sec
91 
92 // Number of idle runs after which we want to have processed the full active
93 // queue.
94 static const uint32 kIdleRunsForFullQueue = 20;
95 
96 // Maximum limit for the vm_page::usage_count.
97 static const int32 kPageUsageMax = 64;
98 // vm_page::usage_count buff an accessed page receives in a scan.
99 static const int32 kPageUsageAdvance = 3;
100 // vm_page::usage_count debuff an unaccessed page receives in a scan.
101 static const int32 kPageUsageDecline = 1;
102 
103 int32 gMappedPagesCount;
104 
105 static VMPageQueue sPageQueues[PAGE_STATE_COUNT];
106 
107 static VMPageQueue& sFreePageQueue = sPageQueues[PAGE_STATE_FREE];
108 static VMPageQueue& sClearPageQueue = sPageQueues[PAGE_STATE_CLEAR];
109 static VMPageQueue& sModifiedPageQueue = sPageQueues[PAGE_STATE_MODIFIED];
110 static VMPageQueue& sInactivePageQueue = sPageQueues[PAGE_STATE_INACTIVE];
111 static VMPageQueue& sActivePageQueue = sPageQueues[PAGE_STATE_ACTIVE];
112 static VMPageQueue& sCachedPageQueue = sPageQueues[PAGE_STATE_CACHED];
113 
114 static vm_page *sPages;
115 static page_num_t sPhysicalPageOffset;
116 static page_num_t sNumPages;
117 static page_num_t sNonExistingPages;
118 	// pages in the sPages array that aren't backed by physical memory
119 static uint64 sIgnoredPages;
120 	// pages of physical memory ignored by the boot loader (and thus not
121 	// available here)
122 static int32 sUnreservedFreePages;
123 static int32 sUnsatisfiedPageReservations;
124 static int32 sModifiedTemporaryPages;
125 
126 static ConditionVariable sFreePageCondition;
127 static mutex sPageDeficitLock = MUTEX_INITIALIZER("page deficit");
128 
129 // This lock must be used whenever the free or clear page queues are changed.
130 // If you need to work on both queues at the same time, you need to hold a write
131 // lock, otherwise, a read lock suffices (each queue still has a spinlock to
132 // guard against concurrent changes).
133 static rw_lock sFreePageQueuesLock
134 	= RW_LOCK_INITIALIZER("free/clear page queues");
135 
136 #ifdef TRACK_PAGE_USAGE_STATS
137 static page_num_t sPageUsageArrays[512];
138 static page_num_t* sPageUsage = sPageUsageArrays;
139 static page_num_t sPageUsagePageCount;
140 static page_num_t* sNextPageUsage = sPageUsageArrays + 256;
141 static page_num_t sNextPageUsagePageCount;
142 #endif
143 
144 
145 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
146 
147 struct caller_info {
148 	addr_t		caller;
149 	size_t		count;
150 };
151 
152 static const int32 kCallerInfoTableSize = 1024;
153 static caller_info sCallerInfoTable[kCallerInfoTableSize];
154 static int32 sCallerInfoCount = 0;
155 
156 static caller_info* get_caller_info(addr_t caller);
157 
158 
159 RANGE_MARKER_FUNCTION_PROTOTYPES(vm_page)
160 
161 static const addr_t kVMPageCodeAddressRange[] = {
162 	RANGE_MARKER_FUNCTION_ADDRESS_RANGE(vm_page)
163 };
164 
165 #endif
166 
167 
168 RANGE_MARKER_FUNCTION_BEGIN(vm_page)
169 
170 
171 struct page_stats {
172 	int32	totalFreePages;
173 	int32	unsatisfiedReservations;
174 	int32	cachedPages;
175 };
176 
177 
178 struct PageReservationWaiter
179 		: public DoublyLinkedListLinkImpl<PageReservationWaiter> {
180 	Thread*	thread;
181 	uint32	dontTouch;		// reserve not to touch
182 	uint32	missing;		// pages missing for the reservation
183 	int32	threadPriority;
184 
185 	bool operator<(const PageReservationWaiter& other) const
186 	{
187 		// Implies an order by descending VM priority (ascending dontTouch)
188 		// and (secondarily) descending thread priority.
189 		if (dontTouch != other.dontTouch)
190 			return dontTouch < other.dontTouch;
191 		return threadPriority > other.threadPriority;
192 	}
193 };
194 
195 typedef DoublyLinkedList<PageReservationWaiter> PageReservationWaiterList;
196 static PageReservationWaiterList sPageReservationWaiters;
197 
198 
199 struct DaemonCondition {
200 	void Init(const char* name)
201 	{
202 		mutex_init(&fLock, "daemon condition");
203 		fCondition.Init(this, name);
204 		fActivated = false;
205 	}
206 
207 	bool Lock()
208 	{
209 		return mutex_lock(&fLock) == B_OK;
210 	}
211 
212 	void Unlock()
213 	{
214 		mutex_unlock(&fLock);
215 	}
216 
217 	bool Wait(bigtime_t timeout, bool clearActivated)
218 	{
219 		MutexLocker locker(fLock);
220 		if (clearActivated)
221 			fActivated = false;
222 		else if (fActivated)
223 			return true;
224 
225 		ConditionVariableEntry entry;
226 		fCondition.Add(&entry);
227 
228 		locker.Unlock();
229 
230 		return entry.Wait(B_RELATIVE_TIMEOUT, timeout) == B_OK;
231 	}
232 
233 	void WakeUp()
234 	{
235 		if (fActivated)
236 			return;
237 
238 		MutexLocker locker(fLock);
239 		fActivated = true;
240 		fCondition.NotifyOne();
241 	}
242 
243 	void ClearActivated()
244 	{
245 		MutexLocker locker(fLock);
246 		fActivated = false;
247 	}
248 
249 private:
250 	mutex				fLock;
251 	ConditionVariable	fCondition;
252 	bool				fActivated;
253 };
254 
255 
256 static DaemonCondition sPageWriterCondition;
257 static DaemonCondition sPageDaemonCondition;
258 
259 
260 #if PAGE_ALLOCATION_TRACING
261 
262 namespace PageAllocationTracing {
263 
264 class ReservePages : public AbstractTraceEntry {
265 public:
266 	ReservePages(uint32 count)
267 		:
268 		fCount(count)
269 	{
270 		Initialized();
271 	}
272 
273 	virtual void AddDump(TraceOutput& out)
274 	{
275 		out.Print("page reserve:   %" B_PRIu32, fCount);
276 	}
277 
278 private:
279 	uint32		fCount;
280 };
281 
282 
283 class UnreservePages : public AbstractTraceEntry {
284 public:
285 	UnreservePages(uint32 count)
286 		:
287 		fCount(count)
288 	{
289 		Initialized();
290 	}
291 
292 	virtual void AddDump(TraceOutput& out)
293 	{
294 		out.Print("page unreserve: %" B_PRId32, fCount);
295 	}
296 
297 private:
298 	uint32		fCount;
299 };
300 
301 
302 class AllocatePage
303 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
304 public:
305 	AllocatePage(page_num_t pageNumber)
306 		:
307 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
308 		fPageNumber(pageNumber)
309 	{
310 		Initialized();
311 	}
312 
313 	virtual void AddDump(TraceOutput& out)
314 	{
315 		out.Print("page alloc: %#" B_PRIxPHYSADDR, fPageNumber);
316 	}
317 
318 private:
319 	page_num_t	fPageNumber;
320 };
321 
322 
323 class AllocatePageRun
324 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
325 public:
326 	AllocatePageRun(page_num_t startPage, uint32 length)
327 		:
328 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
329 		fStartPage(startPage),
330 		fLength(length)
331 	{
332 		Initialized();
333 	}
334 
335 	virtual void AddDump(TraceOutput& out)
336 	{
337 		out.Print("page alloc run: start %#" B_PRIxPHYSADDR " length: %"
338 			B_PRIu32, fStartPage, fLength);
339 	}
340 
341 private:
342 	page_num_t	fStartPage;
343 	uint32		fLength;
344 };
345 
346 
347 class FreePage
348 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
349 public:
350 	FreePage(page_num_t pageNumber)
351 		:
352 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
353 		fPageNumber(pageNumber)
354 	{
355 		Initialized();
356 	}
357 
358 	virtual void AddDump(TraceOutput& out)
359 	{
360 		out.Print("page free: %#" B_PRIxPHYSADDR, fPageNumber);
361 	}
362 
363 private:
364 	page_num_t	fPageNumber;
365 };
366 
367 
368 class ScrubbingPages : public AbstractTraceEntry {
369 public:
370 	ScrubbingPages(uint32 count)
371 		:
372 		fCount(count)
373 	{
374 		Initialized();
375 	}
376 
377 	virtual void AddDump(TraceOutput& out)
378 	{
379 		out.Print("page scrubbing: %" B_PRId32, fCount);
380 	}
381 
382 private:
383 	uint32		fCount;
384 };
385 
386 
387 class ScrubbedPages : public AbstractTraceEntry {
388 public:
389 	ScrubbedPages(uint32 count)
390 		:
391 		fCount(count)
392 	{
393 		Initialized();
394 	}
395 
396 	virtual void AddDump(TraceOutput& out)
397 	{
398 		out.Print("page scrubbed:  %" B_PRId32, fCount);
399 	}
400 
401 private:
402 	uint32		fCount;
403 };
404 
405 
406 class StolenPage : public AbstractTraceEntry {
407 public:
408 	StolenPage()
409 	{
410 		Initialized();
411 	}
412 
413 	virtual void AddDump(TraceOutput& out)
414 	{
415 		out.Print("page stolen");
416 	}
417 };
418 
419 }	// namespace PageAllocationTracing
420 
421 #	define TA(x)	new(std::nothrow) PageAllocationTracing::x
422 
423 #else
424 #	define TA(x)
425 #endif	// PAGE_ALLOCATION_TRACING
426 
427 
428 #if PAGE_DAEMON_TRACING
429 
430 namespace PageDaemonTracing {
431 
432 class ActivatePage : public AbstractTraceEntry {
433 	public:
434 		ActivatePage(vm_page* page)
435 			:
436 			fCache(page->cache),
437 			fPage(page)
438 		{
439 			Initialized();
440 		}
441 
442 		virtual void AddDump(TraceOutput& out)
443 		{
444 			out.Print("page activated:   %p, cache: %p", fPage, fCache);
445 		}
446 
447 	private:
448 		VMCache*	fCache;
449 		vm_page*	fPage;
450 };
451 
452 
453 class DeactivatePage : public AbstractTraceEntry {
454 	public:
455 		DeactivatePage(vm_page* page)
456 			:
457 			fCache(page->cache),
458 			fPage(page)
459 		{
460 			Initialized();
461 		}
462 
463 		virtual void AddDump(TraceOutput& out)
464 		{
465 			out.Print("page deactivated: %p, cache: %p", fPage, fCache);
466 		}
467 
468 	private:
469 		VMCache*	fCache;
470 		vm_page*	fPage;
471 };
472 
473 
474 class FreedPageSwap : public AbstractTraceEntry {
475 	public:
476 		FreedPageSwap(vm_page* page)
477 			:
478 			fCache(page->cache),
479 			fPage(page)
480 		{
481 			Initialized();
482 		}
483 
484 		virtual void AddDump(TraceOutput& out)
485 		{
486 			out.Print("page swap freed:  %p, cache: %p", fPage, fCache);
487 		}
488 
489 	private:
490 		VMCache*	fCache;
491 		vm_page*	fPage;
492 };
493 
494 }	// namespace PageDaemonTracing
495 
496 #	define TD(x)	new(std::nothrow) PageDaemonTracing::x
497 
498 #else
499 #	define TD(x)
500 #endif	// PAGE_DAEMON_TRACING
501 
502 
503 #if PAGE_WRITER_TRACING
504 
505 namespace PageWriterTracing {
506 
507 class WritePage : public AbstractTraceEntry {
508 	public:
509 		WritePage(vm_page* page)
510 			:
511 			fCache(page->Cache()),
512 			fPage(page)
513 		{
514 			Initialized();
515 		}
516 
517 		virtual void AddDump(TraceOutput& out)
518 		{
519 			out.Print("page write: %p, cache: %p", fPage, fCache);
520 		}
521 
522 	private:
523 		VMCache*	fCache;
524 		vm_page*	fPage;
525 };
526 
527 }	// namespace PageWriterTracing
528 
529 #	define TPW(x)	new(std::nothrow) PageWriterTracing::x
530 
531 #else
532 #	define TPW(x)
533 #endif	// PAGE_WRITER_TRACING
534 
535 
536 #if PAGE_STATE_TRACING
537 
538 namespace PageStateTracing {
539 
540 class SetPageState : public AbstractTraceEntry {
541 	public:
542 		SetPageState(vm_page* page, uint8 newState)
543 			:
544 			fPage(page),
545 			fOldState(page->State()),
546 			fNewState(newState),
547 			fBusy(page->busy),
548 			fWired(page->WiredCount() > 0),
549 			fMapped(!page->mappings.IsEmpty()),
550 			fAccessed(page->accessed),
551 			fModified(page->modified)
552 		{
553 #if PAGE_STATE_TRACING_STACK_TRACE
554 			fStackTrace = capture_tracing_stack_trace(
555 				PAGE_STATE_TRACING_STACK_TRACE, 0, true);
556 				// Don't capture userland stack trace to avoid potential
557 				// deadlocks.
558 #endif
559 			Initialized();
560 		}
561 
562 #if PAGE_STATE_TRACING_STACK_TRACE
563 		virtual void DumpStackTrace(TraceOutput& out)
564 		{
565 			out.PrintStackTrace(fStackTrace);
566 		}
567 #endif
568 
569 		virtual void AddDump(TraceOutput& out)
570 		{
571 			out.Print("page set state: %p (%c%c%c%c%c): %s -> %s", fPage,
572 				fBusy ? 'b' : '-',
573 				fWired ? 'w' : '-',
574 				fMapped ? 'm' : '-',
575 				fAccessed ? 'a' : '-',
576 				fModified ? 'm' : '-',
577 				page_state_to_string(fOldState),
578 				page_state_to_string(fNewState));
579 		}
580 
581 	private:
582 		vm_page*	fPage;
583 #if PAGE_STATE_TRACING_STACK_TRACE
584 		tracing_stack_trace* fStackTrace;
585 #endif
586 		uint8		fOldState;
587 		uint8		fNewState;
588 		bool		fBusy : 1;
589 		bool		fWired : 1;
590 		bool		fMapped : 1;
591 		bool		fAccessed : 1;
592 		bool		fModified : 1;
593 };
594 
595 }	// namespace PageStateTracing
596 
597 #	define TPS(x)	new(std::nothrow) PageStateTracing::x
598 
599 #else
600 #	define TPS(x)
601 #endif	// PAGE_STATE_TRACING
602 
603 
604 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
605 
606 namespace BKernel {
607 
608 class AllocationTrackingCallback {
609 public:
610 	virtual						~AllocationTrackingCallback();
611 
612 	virtual	bool				ProcessTrackingInfo(
613 									AllocationTrackingInfo* info,
614 									page_num_t pageNumber) = 0;
615 };
616 
617 }
618 
619 using BKernel::AllocationTrackingCallback;
620 
621 
622 class AllocationCollectorCallback : public AllocationTrackingCallback {
623 public:
624 	AllocationCollectorCallback(bool resetInfos)
625 		:
626 		fResetInfos(resetInfos)
627 	{
628 	}
629 
630 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
631 		page_num_t pageNumber)
632 	{
633 		if (!info->IsInitialized())
634 			return true;
635 
636 		addr_t caller = 0;
637 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
638 
639 		if (traceEntry != NULL && info->IsTraceEntryValid()) {
640 			caller = tracing_find_caller_in_stack_trace(
641 				traceEntry->StackTrace(), kVMPageCodeAddressRange, 1);
642 		}
643 
644 		caller_info* callerInfo = get_caller_info(caller);
645 		if (callerInfo == NULL) {
646 			kprintf("out of space for caller infos\n");
647 			return false;
648 		}
649 
650 		callerInfo->count++;
651 
652 		if (fResetInfos)
653 			info->Clear();
654 
655 		return true;
656 	}
657 
658 private:
659 	bool	fResetInfos;
660 };
661 
662 
663 class AllocationInfoPrinterCallback : public AllocationTrackingCallback {
664 public:
665 	AllocationInfoPrinterCallback(bool printStackTrace, page_num_t pageFilter,
666 		team_id teamFilter, thread_id threadFilter)
667 		:
668 		fPrintStackTrace(printStackTrace),
669 		fPageFilter(pageFilter),
670 		fTeamFilter(teamFilter),
671 		fThreadFilter(threadFilter)
672 	{
673 	}
674 
675 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
676 		page_num_t pageNumber)
677 	{
678 		if (!info->IsInitialized())
679 			return true;
680 
681 		if (fPageFilter != 0 && pageNumber != fPageFilter)
682 			return true;
683 
684 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
685 		if (traceEntry != NULL && !info->IsTraceEntryValid())
686 			traceEntry = NULL;
687 
688 		if (traceEntry != NULL) {
689 			if (fTeamFilter != -1 && traceEntry->TeamID() != fTeamFilter)
690 				return true;
691 			if (fThreadFilter != -1 && traceEntry->ThreadID() != fThreadFilter)
692 				return true;
693 		} else {
694 			// we need the info if we have filters set
695 			if (fTeamFilter != -1 || fThreadFilter != -1)
696 				return true;
697 		}
698 
699 		kprintf("page number %#" B_PRIxPHYSADDR, pageNumber);
700 
701 		if (traceEntry != NULL) {
702 			kprintf(", team: %" B_PRId32 ", thread %" B_PRId32
703 				", time %" B_PRId64 "\n", traceEntry->TeamID(),
704 				traceEntry->ThreadID(), traceEntry->Time());
705 
706 			if (fPrintStackTrace)
707 				tracing_print_stack_trace(traceEntry->StackTrace());
708 		} else
709 			kprintf("\n");
710 
711 		return true;
712 	}
713 
714 private:
715 	bool		fPrintStackTrace;
716 	page_num_t	fPageFilter;
717 	team_id		fTeamFilter;
718 	thread_id	fThreadFilter;
719 };
720 
721 
722 class AllocationDetailPrinterCallback : public AllocationTrackingCallback {
723 public:
724 	AllocationDetailPrinterCallback(addr_t caller)
725 		:
726 		fCaller(caller)
727 	{
728 	}
729 
730 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
731 		page_num_t pageNumber)
732 	{
733 		if (!info->IsInitialized())
734 			return true;
735 
736 		addr_t caller = 0;
737 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
738 		if (traceEntry != NULL && !info->IsTraceEntryValid())
739 			traceEntry = NULL;
740 
741 		if (traceEntry != NULL) {
742 			caller = tracing_find_caller_in_stack_trace(
743 				traceEntry->StackTrace(), kVMPageCodeAddressRange, 1);
744 		}
745 
746 		if (caller != fCaller)
747 			return true;
748 
749 		kprintf("page %#" B_PRIxPHYSADDR "\n", pageNumber);
750 		if (traceEntry != NULL)
751 			tracing_print_stack_trace(traceEntry->StackTrace());
752 
753 		return true;
754 	}
755 
756 private:
757 	addr_t	fCaller;
758 };
759 
760 #endif	// VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
761 
762 
763 static void
764 list_page(vm_page* page)
765 {
766 	kprintf("0x%08" B_PRIxADDR " ",
767 		(addr_t)(page->physical_page_number * B_PAGE_SIZE));
768 	switch (page->State()) {
769 		case PAGE_STATE_ACTIVE:   kprintf("A"); break;
770 		case PAGE_STATE_INACTIVE: kprintf("I"); break;
771 		case PAGE_STATE_MODIFIED: kprintf("M"); break;
772 		case PAGE_STATE_CACHED:   kprintf("C"); break;
773 		case PAGE_STATE_FREE:     kprintf("F"); break;
774 		case PAGE_STATE_CLEAR:    kprintf("L"); break;
775 		case PAGE_STATE_WIRED:    kprintf("W"); break;
776 		case PAGE_STATE_UNUSED:   kprintf("-"); break;
777 	}
778 	kprintf(" ");
779 	if (page->busy)         kprintf("B"); else kprintf("-");
780 	if (page->busy_writing) kprintf("W"); else kprintf("-");
781 	if (page->accessed)     kprintf("A"); else kprintf("-");
782 	if (page->modified)     kprintf("M"); else kprintf("-");
783 	if (page->unused)       kprintf("U"); else kprintf("-");
784 
785 	kprintf(" usage:%3u", page->usage_count);
786 	kprintf(" wired:%5u", page->WiredCount());
787 
788 	bool first = true;
789 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
790 	vm_page_mapping* mapping;
791 	while ((mapping = iterator.Next()) != NULL) {
792 		if (first) {
793 			kprintf(": ");
794 			first = false;
795 		} else
796 			kprintf(", ");
797 
798 		kprintf("%" B_PRId32 " (%s)", mapping->area->id, mapping->area->name);
799 		mapping = mapping->page_link.next;
800 	}
801 }
802 
803 
804 static int
805 dump_page_list(int argc, char **argv)
806 {
807 	kprintf("page table:\n");
808 	for (page_num_t i = 0; i < sNumPages; i++) {
809 		if (sPages[i].State() != PAGE_STATE_UNUSED) {
810 			list_page(&sPages[i]);
811 			kprintf("\n");
812 		}
813 	}
814 	kprintf("end of page table\n");
815 
816 	return 0;
817 }
818 
819 
820 static int
821 find_page(int argc, char **argv)
822 {
823 	struct vm_page *page;
824 	addr_t address;
825 	int32 index = 1;
826 	int i;
827 
828 	struct {
829 		const char*	name;
830 		VMPageQueue*	queue;
831 	} pageQueueInfos[] = {
832 		{ "free",		&sFreePageQueue },
833 		{ "clear",		&sClearPageQueue },
834 		{ "modified",	&sModifiedPageQueue },
835 		{ "active",		&sActivePageQueue },
836 		{ "inactive",	&sInactivePageQueue },
837 		{ "cached",		&sCachedPageQueue },
838 		{ NULL, NULL }
839 	};
840 
841 	if (argc < 2
842 		|| strlen(argv[index]) <= 2
843 		|| argv[index][0] != '0'
844 		|| argv[index][1] != 'x') {
845 		kprintf("usage: find_page <address>\n");
846 		return 0;
847 	}
848 
849 	address = strtoul(argv[index], NULL, 0);
850 	page = (vm_page*)address;
851 
852 	for (i = 0; pageQueueInfos[i].name; i++) {
853 		VMPageQueue::Iterator it = pageQueueInfos[i].queue->GetIterator();
854 		while (vm_page* p = it.Next()) {
855 			if (p == page) {
856 				kprintf("found page %p in queue %p (%s)\n", page,
857 					pageQueueInfos[i].queue, pageQueueInfos[i].name);
858 				return 0;
859 			}
860 		}
861 	}
862 
863 	kprintf("page %p isn't in any queue\n", page);
864 
865 	return 0;
866 }
867 
868 
869 const char *
870 page_state_to_string(int state)
871 {
872 	switch(state) {
873 		case PAGE_STATE_ACTIVE:
874 			return "active";
875 		case PAGE_STATE_INACTIVE:
876 			return "inactive";
877 		case PAGE_STATE_MODIFIED:
878 			return "modified";
879 		case PAGE_STATE_CACHED:
880 			return "cached";
881 		case PAGE_STATE_FREE:
882 			return "free";
883 		case PAGE_STATE_CLEAR:
884 			return "clear";
885 		case PAGE_STATE_WIRED:
886 			return "wired";
887 		case PAGE_STATE_UNUSED:
888 			return "unused";
889 		default:
890 			return "unknown";
891 	}
892 }
893 
894 
895 static int
896 dump_page_long(int argc, char **argv)
897 {
898 	bool addressIsPointer = true;
899 	bool physical = false;
900 	bool searchMappings = false;
901 	int32 index = 1;
902 
903 	while (index < argc) {
904 		if (argv[index][0] != '-')
905 			break;
906 
907 		if (!strcmp(argv[index], "-p")) {
908 			addressIsPointer = false;
909 			physical = true;
910 		} else if (!strcmp(argv[index], "-v")) {
911 			addressIsPointer = false;
912 		} else if (!strcmp(argv[index], "-m")) {
913 			searchMappings = true;
914 		} else {
915 			print_debugger_command_usage(argv[0]);
916 			return 0;
917 		}
918 
919 		index++;
920 	}
921 
922 	if (index + 1 != argc) {
923 		print_debugger_command_usage(argv[0]);
924 		return 0;
925 	}
926 
927 	uint64 value;
928 	if (!evaluate_debug_expression(argv[index], &value, false))
929 		return 0;
930 
931 	uint64 pageAddress = value;
932 	struct vm_page* page;
933 
934 	if (addressIsPointer) {
935 		page = (struct vm_page *)(addr_t)pageAddress;
936 	} else {
937 		if (!physical) {
938 			VMAddressSpace *addressSpace = VMAddressSpace::Kernel();
939 
940 			if (debug_get_debugged_thread()->team->address_space != NULL)
941 				addressSpace = debug_get_debugged_thread()->team->address_space;
942 
943 			uint32 flags = 0;
944 			phys_addr_t physicalAddress;
945 			if (addressSpace->TranslationMap()->QueryInterrupt(pageAddress,
946 					&physicalAddress, &flags) != B_OK
947 				|| (flags & PAGE_PRESENT) == 0) {
948 				kprintf("Virtual address not mapped to a physical page in this "
949 					"address space.\n");
950 				return 0;
951 			}
952 			pageAddress = physicalAddress;
953 		}
954 
955 		page = vm_lookup_page(pageAddress / B_PAGE_SIZE);
956 	}
957 
958 	kprintf("PAGE: %p\n", page);
959 	kprintf("queue_next,prev: %p, %p\n", page->queue_link.next,
960 		page->queue_link.previous);
961 	kprintf("physical_number: %#" B_PRIxPHYSADDR "\n",
962 		page->physical_page_number);
963 	kprintf("cache:           %p\n", page->Cache());
964 	kprintf("cache_offset:    %" B_PRIuPHYSADDR "\n", page->cache_offset);
965 	kprintf("cache_next:      %p\n", page->cache_next);
966 	kprintf("state:           %s\n", page_state_to_string(page->State()));
967 	kprintf("wired_count:     %d\n", page->WiredCount());
968 	kprintf("usage_count:     %d\n", page->usage_count);
969 	kprintf("busy:            %d\n", page->busy);
970 	kprintf("busy_writing:    %d\n", page->busy_writing);
971 	kprintf("accessed:        %d\n", page->accessed);
972 	kprintf("modified:        %d\n", page->modified);
973 	#if DEBUG_PAGE_QUEUE
974 		kprintf("queue:           %p\n", page->queue);
975 	#endif
976 	#if DEBUG_PAGE_ACCESS
977 		kprintf("accessor:        %" B_PRId32 "\n", page->accessing_thread);
978 	#endif
979 	kprintf("area mappings:\n");
980 
981 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
982 	vm_page_mapping *mapping;
983 	while ((mapping = iterator.Next()) != NULL) {
984 		kprintf("  %p (%" B_PRId32 ")\n", mapping->area, mapping->area->id);
985 		mapping = mapping->page_link.next;
986 	}
987 
988 	if (searchMappings) {
989 		kprintf("all mappings:\n");
990 		VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
991 		while (addressSpace != NULL) {
992 			size_t pageCount = addressSpace->Size() / B_PAGE_SIZE;
993 			for (addr_t address = addressSpace->Base(); pageCount != 0;
994 					address += B_PAGE_SIZE, pageCount--) {
995 				phys_addr_t physicalAddress;
996 				uint32 flags = 0;
997 				if (addressSpace->TranslationMap()->QueryInterrupt(address,
998 						&physicalAddress, &flags) == B_OK
999 					&& (flags & PAGE_PRESENT) != 0
1000 					&& physicalAddress / B_PAGE_SIZE
1001 						== page->physical_page_number) {
1002 					VMArea* area = addressSpace->LookupArea(address);
1003 					kprintf("  aspace %" B_PRId32 ", area %" B_PRId32 ": %#"
1004 						B_PRIxADDR " (%c%c%s%s)\n", addressSpace->ID(),
1005 						area != NULL ? area->id : -1, address,
1006 						(flags & B_KERNEL_READ_AREA) != 0 ? 'r' : '-',
1007 						(flags & B_KERNEL_WRITE_AREA) != 0 ? 'w' : '-',
1008 						(flags & PAGE_MODIFIED) != 0 ? " modified" : "",
1009 						(flags & PAGE_ACCESSED) != 0 ? " accessed" : "");
1010 				}
1011 			}
1012 			addressSpace = VMAddressSpace::DebugNext(addressSpace);
1013 		}
1014 	}
1015 
1016 	set_debug_variable("_cache", (addr_t)page->Cache());
1017 	#if DEBUG_PAGE_ACCESS
1018 		set_debug_variable("_accessor", page->accessing_thread);
1019 	#endif
1020 
1021 	return 0;
1022 }
1023 
1024 
1025 static int
1026 dump_page_queue(int argc, char **argv)
1027 {
1028 	struct VMPageQueue *queue;
1029 
1030 	if (argc < 2) {
1031 		kprintf("usage: page_queue <address/name> [list]\n");
1032 		return 0;
1033 	}
1034 
1035 	if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x')
1036 		queue = (VMPageQueue*)strtoul(argv[1], NULL, 16);
1037 	else if (!strcmp(argv[1], "free"))
1038 		queue = &sFreePageQueue;
1039 	else if (!strcmp(argv[1], "clear"))
1040 		queue = &sClearPageQueue;
1041 	else if (!strcmp(argv[1], "modified"))
1042 		queue = &sModifiedPageQueue;
1043 	else if (!strcmp(argv[1], "active"))
1044 		queue = &sActivePageQueue;
1045 	else if (!strcmp(argv[1], "inactive"))
1046 		queue = &sInactivePageQueue;
1047 	else if (!strcmp(argv[1], "cached"))
1048 		queue = &sCachedPageQueue;
1049 	else {
1050 		kprintf("page_queue: unknown queue \"%s\".\n", argv[1]);
1051 		return 0;
1052 	}
1053 
1054 	kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %"
1055 		B_PRIuPHYSADDR "\n", queue, queue->Head(), queue->Tail(),
1056 		queue->Count());
1057 
1058 	if (argc == 3) {
1059 		struct vm_page *page = queue->Head();
1060 
1061 		kprintf("page        cache       type       state  wired  usage\n");
1062 		for (page_num_t i = 0; page; i++, page = queue->Next(page)) {
1063 			kprintf("%p  %p  %-7s %8s  %5d  %5d\n", page, page->Cache(),
1064 				vm_cache_type_to_string(page->Cache()->type),
1065 				page_state_to_string(page->State()),
1066 				page->WiredCount(), page->usage_count);
1067 		}
1068 	}
1069 	return 0;
1070 }
1071 
1072 
1073 static int
1074 dump_page_stats(int argc, char **argv)
1075 {
1076 	page_num_t swappableModified = 0;
1077 	page_num_t swappableModifiedInactive = 0;
1078 
1079 	size_t counter[8];
1080 	size_t busyCounter[8];
1081 	memset(counter, 0, sizeof(counter));
1082 	memset(busyCounter, 0, sizeof(busyCounter));
1083 
1084 	struct page_run {
1085 		page_num_t	start;
1086 		page_num_t	end;
1087 
1088 		page_num_t Length() const	{ return end - start; }
1089 	};
1090 
1091 	page_run currentFreeRun = { 0, 0 };
1092 	page_run currentCachedRun = { 0, 0 };
1093 	page_run longestFreeRun = { 0, 0 };
1094 	page_run longestCachedRun = { 0, 0 };
1095 
1096 	for (page_num_t i = 0; i < sNumPages; i++) {
1097 		if (sPages[i].State() > 7) {
1098 			panic("page %" B_PRIuPHYSADDR " at %p has invalid state!\n", i,
1099 				&sPages[i]);
1100 		}
1101 
1102 		uint32 pageState = sPages[i].State();
1103 
1104 		counter[pageState]++;
1105 		if (sPages[i].busy)
1106 			busyCounter[pageState]++;
1107 
1108 		if (pageState == PAGE_STATE_MODIFIED
1109 			&& sPages[i].Cache() != NULL
1110 			&& sPages[i].Cache()->temporary && sPages[i].WiredCount() == 0) {
1111 			swappableModified++;
1112 			if (sPages[i].usage_count == 0)
1113 				swappableModifiedInactive++;
1114 		}
1115 
1116 		// track free and cached pages runs
1117 		if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) {
1118 			currentFreeRun.end = i + 1;
1119 			currentCachedRun.end = i + 1;
1120 		} else {
1121 			if (currentFreeRun.Length() > longestFreeRun.Length())
1122 				longestFreeRun = currentFreeRun;
1123 			currentFreeRun.start = currentFreeRun.end = i + 1;
1124 
1125 			if (pageState == PAGE_STATE_CACHED) {
1126 				currentCachedRun.end = i + 1;
1127 			} else {
1128 				if (currentCachedRun.Length() > longestCachedRun.Length())
1129 					longestCachedRun = currentCachedRun;
1130 				currentCachedRun.start = currentCachedRun.end = i + 1;
1131 			}
1132 		}
1133 	}
1134 
1135 	kprintf("page stats:\n");
1136 	kprintf("total: %" B_PRIuPHYSADDR "\n", sNumPages);
1137 
1138 	kprintf("active: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1139 		counter[PAGE_STATE_ACTIVE], busyCounter[PAGE_STATE_ACTIVE]);
1140 	kprintf("inactive: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1141 		counter[PAGE_STATE_INACTIVE], busyCounter[PAGE_STATE_INACTIVE]);
1142 	kprintf("cached: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1143 		counter[PAGE_STATE_CACHED], busyCounter[PAGE_STATE_CACHED]);
1144 	kprintf("unused: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1145 		counter[PAGE_STATE_UNUSED], busyCounter[PAGE_STATE_UNUSED]);
1146 	kprintf("wired: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1147 		counter[PAGE_STATE_WIRED], busyCounter[PAGE_STATE_WIRED]);
1148 	kprintf("modified: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1149 		counter[PAGE_STATE_MODIFIED], busyCounter[PAGE_STATE_MODIFIED]);
1150 	kprintf("free: %" B_PRIuSIZE "\n", counter[PAGE_STATE_FREE]);
1151 	kprintf("clear: %" B_PRIuSIZE "\n", counter[PAGE_STATE_CLEAR]);
1152 
1153 	kprintf("unreserved free pages: %" B_PRId32 "\n", sUnreservedFreePages);
1154 	kprintf("unsatisfied page reservations: %" B_PRId32 "\n",
1155 		sUnsatisfiedPageReservations);
1156 	kprintf("mapped pages: %" B_PRId32 "\n", gMappedPagesCount);
1157 	kprintf("longest free pages run: %" B_PRIuPHYSADDR " pages (at %"
1158 		B_PRIuPHYSADDR ")\n", longestFreeRun.Length(),
1159 		sPages[longestFreeRun.start].physical_page_number);
1160 	kprintf("longest free/cached pages run: %" B_PRIuPHYSADDR " pages (at %"
1161 		B_PRIuPHYSADDR ")\n", longestCachedRun.Length(),
1162 		sPages[longestCachedRun.start].physical_page_number);
1163 
1164 	kprintf("waiting threads:\n");
1165 	for (PageReservationWaiterList::Iterator it
1166 			= sPageReservationWaiters.GetIterator();
1167 		PageReservationWaiter* waiter = it.Next();) {
1168 		kprintf("  %6" B_PRId32 ": missing: %6" B_PRIu32
1169 			", don't touch: %6" B_PRIu32 "\n", waiter->thread->id,
1170 			waiter->missing, waiter->dontTouch);
1171 	}
1172 
1173 	kprintf("\nfree queue: %p, count = %" B_PRIuPHYSADDR "\n", &sFreePageQueue,
1174 		sFreePageQueue.Count());
1175 	kprintf("clear queue: %p, count = %" B_PRIuPHYSADDR "\n", &sClearPageQueue,
1176 		sClearPageQueue.Count());
1177 	kprintf("modified queue: %p, count = %" B_PRIuPHYSADDR " (%" B_PRId32
1178 		" temporary, %" B_PRIuPHYSADDR " swappable, " "inactive: %"
1179 		B_PRIuPHYSADDR ")\n", &sModifiedPageQueue, sModifiedPageQueue.Count(),
1180 		sModifiedTemporaryPages, swappableModified, swappableModifiedInactive);
1181 	kprintf("active queue: %p, count = %" B_PRIuPHYSADDR "\n",
1182 		&sActivePageQueue, sActivePageQueue.Count());
1183 	kprintf("inactive queue: %p, count = %" B_PRIuPHYSADDR "\n",
1184 		&sInactivePageQueue, sInactivePageQueue.Count());
1185 	kprintf("cached queue: %p, count = %" B_PRIuPHYSADDR "\n",
1186 		&sCachedPageQueue, sCachedPageQueue.Count());
1187 	return 0;
1188 }
1189 
1190 
1191 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1192 
1193 static caller_info*
1194 get_caller_info(addr_t caller)
1195 {
1196 	// find the caller info
1197 	for (int32 i = 0; i < sCallerInfoCount; i++) {
1198 		if (caller == sCallerInfoTable[i].caller)
1199 			return &sCallerInfoTable[i];
1200 	}
1201 
1202 	// not found, add a new entry, if there are free slots
1203 	if (sCallerInfoCount >= kCallerInfoTableSize)
1204 		return NULL;
1205 
1206 	caller_info* info = &sCallerInfoTable[sCallerInfoCount++];
1207 	info->caller = caller;
1208 	info->count = 0;
1209 
1210 	return info;
1211 }
1212 
1213 
1214 static int
1215 caller_info_compare_count(const void* _a, const void* _b)
1216 {
1217 	const caller_info* a = (const caller_info*)_a;
1218 	const caller_info* b = (const caller_info*)_b;
1219 	return (int)(b->count - a->count);
1220 }
1221 
1222 
1223 static int
1224 dump_page_allocations_per_caller(int argc, char** argv)
1225 {
1226 	bool resetAllocationInfos = false;
1227 	bool printDetails = false;
1228 	addr_t caller = 0;
1229 
1230 	for (int32 i = 1; i < argc; i++) {
1231 		if (strcmp(argv[i], "-d") == 0) {
1232 			uint64 callerAddress;
1233 			if (++i >= argc
1234 				|| !evaluate_debug_expression(argv[i], &callerAddress, true)) {
1235 				print_debugger_command_usage(argv[0]);
1236 				return 0;
1237 			}
1238 
1239 			caller = callerAddress;
1240 			printDetails = true;
1241 		} else if (strcmp(argv[i], "-r") == 0) {
1242 			resetAllocationInfos = true;
1243 		} else {
1244 			print_debugger_command_usage(argv[0]);
1245 			return 0;
1246 		}
1247 	}
1248 
1249 	sCallerInfoCount = 0;
1250 
1251 	AllocationCollectorCallback collectorCallback(resetAllocationInfos);
1252 	AllocationDetailPrinterCallback detailsCallback(caller);
1253 	AllocationTrackingCallback& callback = printDetails
1254 		? (AllocationTrackingCallback&)detailsCallback
1255 		: (AllocationTrackingCallback&)collectorCallback;
1256 
1257 	for (page_num_t i = 0; i < sNumPages; i++)
1258 		callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i);
1259 
1260 	if (printDetails)
1261 		return 0;
1262 
1263 	// sort the array
1264 	qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info),
1265 		&caller_info_compare_count);
1266 
1267 	kprintf("%" B_PRId32 " different callers\n\n", sCallerInfoCount);
1268 
1269 	size_t totalAllocationCount = 0;
1270 
1271 	kprintf("     count      caller\n");
1272 	kprintf("----------------------------------\n");
1273 	for (int32 i = 0; i < sCallerInfoCount; i++) {
1274 		caller_info& info = sCallerInfoTable[i];
1275 		kprintf("%10" B_PRIuSIZE "  %p", info.count, (void*)info.caller);
1276 
1277 		const char* symbol;
1278 		const char* imageName;
1279 		bool exactMatch;
1280 		addr_t baseAddress;
1281 
1282 		if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol,
1283 				&imageName, &exactMatch) == B_OK) {
1284 			kprintf("  %s + %#" B_PRIxADDR " (%s)%s\n", symbol,
1285 				info.caller - baseAddress, imageName,
1286 				exactMatch ? "" : " (nearest)");
1287 		} else
1288 			kprintf("\n");
1289 
1290 		totalAllocationCount += info.count;
1291 	}
1292 
1293 	kprintf("\ntotal page allocations: %" B_PRIuSIZE "\n",
1294 		totalAllocationCount);
1295 
1296 	return 0;
1297 }
1298 
1299 
1300 static int
1301 dump_page_allocation_infos(int argc, char** argv)
1302 {
1303 	page_num_t pageFilter = 0;
1304 	team_id teamFilter = -1;
1305 	thread_id threadFilter = -1;
1306 	bool printStackTraces = false;
1307 
1308 	for (int32 i = 1; i < argc; i++) {
1309 		if (strcmp(argv[i], "--stacktrace") == 0)
1310 			printStackTraces = true;
1311 		else if (strcmp(argv[i], "-p") == 0) {
1312 			uint64 pageNumber;
1313 			if (++i >= argc
1314 				|| !evaluate_debug_expression(argv[i], &pageNumber, true)) {
1315 				print_debugger_command_usage(argv[0]);
1316 				return 0;
1317 			}
1318 
1319 			pageFilter = pageNumber;
1320 		} else if (strcmp(argv[i], "--team") == 0) {
1321 			uint64 team;
1322 			if (++i >= argc
1323 				|| !evaluate_debug_expression(argv[i], &team, true)) {
1324 				print_debugger_command_usage(argv[0]);
1325 				return 0;
1326 			}
1327 
1328 			teamFilter = team;
1329 		} else if (strcmp(argv[i], "--thread") == 0) {
1330 			uint64 thread;
1331 			if (++i >= argc
1332 				|| !evaluate_debug_expression(argv[i], &thread, true)) {
1333 				print_debugger_command_usage(argv[0]);
1334 				return 0;
1335 			}
1336 
1337 			threadFilter = thread;
1338 		} else {
1339 			print_debugger_command_usage(argv[0]);
1340 			return 0;
1341 		}
1342 	}
1343 
1344 	AllocationInfoPrinterCallback callback(printStackTraces, pageFilter,
1345 		teamFilter, threadFilter);
1346 
1347 	for (page_num_t i = 0; i < sNumPages; i++)
1348 		callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i);
1349 
1350 	return 0;
1351 }
1352 
1353 #endif	// VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1354 
1355 
1356 #ifdef TRACK_PAGE_USAGE_STATS
1357 
1358 static void
1359 track_page_usage(vm_page* page)
1360 {
1361 	if (page->WiredCount() == 0) {
1362 		sNextPageUsage[(int32)page->usage_count + 128]++;
1363 		sNextPageUsagePageCount++;
1364 	}
1365 }
1366 
1367 
1368 static void
1369 update_page_usage_stats()
1370 {
1371 	std::swap(sPageUsage, sNextPageUsage);
1372 	sPageUsagePageCount = sNextPageUsagePageCount;
1373 
1374 	memset(sNextPageUsage, 0, sizeof(page_num_t) * 256);
1375 	sNextPageUsagePageCount = 0;
1376 
1377 	// compute average
1378 	if (sPageUsagePageCount > 0) {
1379 		int64 sum = 0;
1380 		for (int32 i = 0; i < 256; i++)
1381 			sum += (int64)sPageUsage[i] * (i - 128);
1382 
1383 		TRACE_DAEMON("average page usage: %f (%lu pages)\n",
1384 			(float)sum / sPageUsagePageCount, sPageUsagePageCount);
1385 	}
1386 }
1387 
1388 
1389 static int
1390 dump_page_usage_stats(int argc, char** argv)
1391 {
1392 	kprintf("distribution of page usage counts (%lu pages):",
1393 		sPageUsagePageCount);
1394 
1395 	int64 sum = 0;
1396 	for (int32 i = 0; i < 256; i++) {
1397 		if (i % 8 == 0)
1398 			kprintf("\n%4ld:", i - 128);
1399 
1400 		int64 count = sPageUsage[i];
1401 		sum += count * (i - 128);
1402 
1403 		kprintf("  %9llu", count);
1404 	}
1405 
1406 	kprintf("\n\n");
1407 
1408 	kprintf("average usage count: %f\n",
1409 		sPageUsagePageCount > 0 ? (float)sum / sPageUsagePageCount : 0);
1410 
1411 	return 0;
1412 }
1413 
1414 #endif	// TRACK_PAGE_USAGE_STATS
1415 
1416 
1417 // #pragma mark - vm_page
1418 
1419 
1420 inline void
1421 vm_page::InitState(uint8 newState)
1422 {
1423 	state = newState;
1424 }
1425 
1426 
1427 inline void
1428 vm_page::SetState(uint8 newState)
1429 {
1430 	TPS(SetPageState(this, newState));
1431 
1432 	state = newState;
1433 }
1434 
1435 
1436 // #pragma mark -
1437 
1438 
1439 static void
1440 get_page_stats(page_stats& _pageStats)
1441 {
1442 	_pageStats.totalFreePages = sUnreservedFreePages;
1443 	_pageStats.cachedPages = sCachedPageQueue.Count();
1444 	_pageStats.unsatisfiedReservations = sUnsatisfiedPageReservations;
1445 	// TODO: We don't get an actual snapshot here!
1446 }
1447 
1448 
1449 static bool
1450 do_active_paging(const page_stats& pageStats)
1451 {
1452 	return pageStats.totalFreePages + pageStats.cachedPages
1453 		< pageStats.unsatisfiedReservations
1454 			+ (int32)sFreeOrCachedPagesTarget;
1455 }
1456 
1457 
1458 /*!	Reserves as many pages as possible from \c sUnreservedFreePages up to
1459 	\a count. Doesn't touch the last \a dontTouch pages of
1460 	\c sUnreservedFreePages, though.
1461 	\return The number of actually reserved pages.
1462 */
1463 static uint32
1464 reserve_some_pages(uint32 count, uint32 dontTouch)
1465 {
1466 	while (true) {
1467 		int32 freePages = atomic_get(&sUnreservedFreePages);
1468 		if (freePages <= (int32)dontTouch)
1469 			return 0;
1470 
1471 		int32 toReserve = std::min(count, freePages - dontTouch);
1472 		if (atomic_test_and_set(&sUnreservedFreePages,
1473 					freePages - toReserve, freePages)
1474 				== freePages) {
1475 			return toReserve;
1476 		}
1477 
1478 		// the count changed in the meantime -- retry
1479 	}
1480 }
1481 
1482 
1483 static void
1484 wake_up_page_reservation_waiters()
1485 {
1486 	MutexLocker pageDeficitLocker(sPageDeficitLock);
1487 
1488 	// TODO: If this is a low priority thread, we might want to disable
1489 	// interrupts or otherwise ensure that we aren't unscheduled. Otherwise
1490 	// high priority threads wait be kept waiting while a medium priority thread
1491 	// prevents us from running.
1492 
1493 	while (PageReservationWaiter* waiter = sPageReservationWaiters.Head()) {
1494 		int32 reserved = reserve_some_pages(waiter->missing,
1495 			waiter->dontTouch);
1496 		if (reserved == 0)
1497 			return;
1498 
1499 		atomic_add(&sUnsatisfiedPageReservations, -reserved);
1500 		waiter->missing -= reserved;
1501 
1502 		if (waiter->missing > 0)
1503 			return;
1504 
1505 		sPageReservationWaiters.Remove(waiter);
1506 
1507 		thread_unblock(waiter->thread, B_OK);
1508 	}
1509 }
1510 
1511 
1512 static inline void
1513 unreserve_pages(uint32 count)
1514 {
1515 	atomic_add(&sUnreservedFreePages, count);
1516 	if (atomic_get(&sUnsatisfiedPageReservations) != 0)
1517 		wake_up_page_reservation_waiters();
1518 }
1519 
1520 
1521 static void
1522 free_page(vm_page* page, bool clear)
1523 {
1524 	DEBUG_PAGE_ACCESS_CHECK(page);
1525 
1526 	PAGE_ASSERT(page, !page->IsMapped());
1527 
1528 	VMPageQueue* fromQueue;
1529 
1530 	switch (page->State()) {
1531 		case PAGE_STATE_ACTIVE:
1532 			fromQueue = &sActivePageQueue;
1533 			break;
1534 		case PAGE_STATE_INACTIVE:
1535 			fromQueue = &sInactivePageQueue;
1536 			break;
1537 		case PAGE_STATE_MODIFIED:
1538 			fromQueue = &sModifiedPageQueue;
1539 			break;
1540 		case PAGE_STATE_CACHED:
1541 			fromQueue = &sCachedPageQueue;
1542 			break;
1543 		case PAGE_STATE_FREE:
1544 		case PAGE_STATE_CLEAR:
1545 			panic("free_page(): page %p already free", page);
1546 			return;
1547 		case PAGE_STATE_WIRED:
1548 		case PAGE_STATE_UNUSED:
1549 			fromQueue = NULL;
1550 			break;
1551 		default:
1552 			panic("free_page(): page %p in invalid state %d",
1553 				page, page->State());
1554 			return;
1555 	}
1556 
1557 	if (page->CacheRef() != NULL)
1558 		panic("to be freed page %p has cache", page);
1559 	if (page->IsMapped())
1560 		panic("to be freed page %p has mappings", page);
1561 
1562 	if (fromQueue != NULL)
1563 		fromQueue->RemoveUnlocked(page);
1564 
1565 	TA(FreePage(page->physical_page_number));
1566 
1567 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1568 	page->allocation_tracking_info.Clear();
1569 #endif
1570 
1571 	ReadLocker locker(sFreePageQueuesLock);
1572 
1573 	DEBUG_PAGE_ACCESS_END(page);
1574 
1575 	if (clear) {
1576 		page->SetState(PAGE_STATE_CLEAR);
1577 		sClearPageQueue.PrependUnlocked(page);
1578 	} else {
1579 		page->SetState(PAGE_STATE_FREE);
1580 		sFreePageQueue.PrependUnlocked(page);
1581 		sFreePageCondition.NotifyAll();
1582 	}
1583 
1584 	locker.Unlock();
1585 }
1586 
1587 
1588 /*!	The caller must make sure that no-one else tries to change the page's state
1589 	while the function is called. If the page has a cache, this can be done by
1590 	locking the cache.
1591 */
1592 static void
1593 set_page_state(vm_page *page, int pageState)
1594 {
1595 	DEBUG_PAGE_ACCESS_CHECK(page);
1596 
1597 	if (pageState == page->State())
1598 		return;
1599 
1600 	VMPageQueue* fromQueue;
1601 
1602 	switch (page->State()) {
1603 		case PAGE_STATE_ACTIVE:
1604 			fromQueue = &sActivePageQueue;
1605 			break;
1606 		case PAGE_STATE_INACTIVE:
1607 			fromQueue = &sInactivePageQueue;
1608 			break;
1609 		case PAGE_STATE_MODIFIED:
1610 			fromQueue = &sModifiedPageQueue;
1611 			break;
1612 		case PAGE_STATE_CACHED:
1613 			fromQueue = &sCachedPageQueue;
1614 			break;
1615 		case PAGE_STATE_FREE:
1616 		case PAGE_STATE_CLEAR:
1617 			panic("set_page_state(): page %p is free/clear", page);
1618 			return;
1619 		case PAGE_STATE_WIRED:
1620 		case PAGE_STATE_UNUSED:
1621 			fromQueue = NULL;
1622 			break;
1623 		default:
1624 			panic("set_page_state(): page %p in invalid state %d",
1625 				page, page->State());
1626 			return;
1627 	}
1628 
1629 	VMPageQueue* toQueue;
1630 
1631 	switch (pageState) {
1632 		case PAGE_STATE_ACTIVE:
1633 			toQueue = &sActivePageQueue;
1634 			break;
1635 		case PAGE_STATE_INACTIVE:
1636 			toQueue = &sInactivePageQueue;
1637 			break;
1638 		case PAGE_STATE_MODIFIED:
1639 			toQueue = &sModifiedPageQueue;
1640 			break;
1641 		case PAGE_STATE_CACHED:
1642 			PAGE_ASSERT(page, !page->IsMapped());
1643 			PAGE_ASSERT(page, !page->modified);
1644 			toQueue = &sCachedPageQueue;
1645 			break;
1646 		case PAGE_STATE_FREE:
1647 		case PAGE_STATE_CLEAR:
1648 			panic("set_page_state(): target state is free/clear");
1649 			return;
1650 		case PAGE_STATE_WIRED:
1651 		case PAGE_STATE_UNUSED:
1652 			toQueue = NULL;
1653 			break;
1654 		default:
1655 			panic("set_page_state(): invalid target state %d", pageState);
1656 			return;
1657 	}
1658 
1659 	VMCache* cache = page->Cache();
1660 	if (cache != NULL && cache->temporary) {
1661 		if (pageState == PAGE_STATE_MODIFIED)
1662 			atomic_add(&sModifiedTemporaryPages, 1);
1663 		else if (page->State() == PAGE_STATE_MODIFIED)
1664 			atomic_add(&sModifiedTemporaryPages, -1);
1665 	}
1666 
1667 	// move the page
1668 	if (toQueue == fromQueue) {
1669 		// Note: Theoretically we are required to lock when changing the page
1670 		// state, even if we don't change the queue. We actually don't have to
1671 		// do this, though, since only for the active queue there are different
1672 		// page states and active pages have a cache that must be locked at
1673 		// this point. So we rely on the fact that everyone must lock the cache
1674 		// before trying to change/interpret the page state.
1675 		PAGE_ASSERT(page, cache != NULL);
1676 		cache->AssertLocked();
1677 		page->SetState(pageState);
1678 	} else {
1679 		if (fromQueue != NULL)
1680 			fromQueue->RemoveUnlocked(page);
1681 
1682 		page->SetState(pageState);
1683 
1684 		if (toQueue != NULL)
1685 			toQueue->AppendUnlocked(page);
1686 	}
1687 }
1688 
1689 
1690 /*! Moves a previously modified page into a now appropriate queue.
1691 	The page queues must not be locked.
1692 */
1693 static void
1694 move_page_to_appropriate_queue(vm_page *page)
1695 {
1696 	DEBUG_PAGE_ACCESS_CHECK(page);
1697 
1698 	// Note, this logic must be in sync with what the page daemon does.
1699 	int32 state;
1700 	if (page->IsMapped())
1701 		state = PAGE_STATE_ACTIVE;
1702 	else if (page->modified)
1703 		state = PAGE_STATE_MODIFIED;
1704 	else
1705 		state = PAGE_STATE_CACHED;
1706 
1707 // TODO: If free + cached pages are low, we might directly want to free the
1708 // page.
1709 	set_page_state(page, state);
1710 }
1711 
1712 
1713 static void
1714 clear_page(struct vm_page *page)
1715 {
1716 	vm_memset_physical(page->physical_page_number << PAGE_SHIFT, 0,
1717 		B_PAGE_SIZE);
1718 }
1719 
1720 
1721 static status_t
1722 mark_page_range_in_use(page_num_t startPage, page_num_t length, bool wired)
1723 {
1724 	TRACE(("mark_page_range_in_use: start %#" B_PRIxPHYSADDR ", len %#"
1725 		B_PRIxPHYSADDR "\n", startPage, length));
1726 
1727 	if (sPhysicalPageOffset > startPage) {
1728 		dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR
1729 			"): start page is before free list\n", startPage, length);
1730 		if (sPhysicalPageOffset - startPage >= length)
1731 			return B_OK;
1732 		length -= sPhysicalPageOffset - startPage;
1733 		startPage = sPhysicalPageOffset;
1734 	}
1735 
1736 	startPage -= sPhysicalPageOffset;
1737 
1738 	if (startPage + length > sNumPages) {
1739 		dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR
1740 			"): range would extend past free list\n", startPage, length);
1741 		if (startPage >= sNumPages)
1742 			return B_OK;
1743 		length = sNumPages - startPage;
1744 	}
1745 
1746 	WriteLocker locker(sFreePageQueuesLock);
1747 
1748 	for (page_num_t i = 0; i < length; i++) {
1749 		vm_page *page = &sPages[startPage + i];
1750 		switch (page->State()) {
1751 			case PAGE_STATE_FREE:
1752 			case PAGE_STATE_CLEAR:
1753 			{
1754 // TODO: This violates the page reservation policy, since we remove pages from
1755 // the free/clear queues without having reserved them before. This should happen
1756 // in the early boot process only, though.
1757 				DEBUG_PAGE_ACCESS_START(page);
1758 				VMPageQueue& queue = page->State() == PAGE_STATE_FREE
1759 					? sFreePageQueue : sClearPageQueue;
1760 				queue.Remove(page);
1761 				page->SetState(wired ? PAGE_STATE_WIRED : PAGE_STATE_UNUSED);
1762 				page->busy = false;
1763 				atomic_add(&sUnreservedFreePages, -1);
1764 				DEBUG_PAGE_ACCESS_END(page);
1765 				break;
1766 			}
1767 			case PAGE_STATE_WIRED:
1768 			case PAGE_STATE_UNUSED:
1769 				break;
1770 			case PAGE_STATE_ACTIVE:
1771 			case PAGE_STATE_INACTIVE:
1772 			case PAGE_STATE_MODIFIED:
1773 			case PAGE_STATE_CACHED:
1774 			default:
1775 				// uh
1776 				dprintf("mark_page_range_in_use: page %#" B_PRIxPHYSADDR
1777 					" in non-free state %d!\n", startPage + i, page->State());
1778 				break;
1779 		}
1780 	}
1781 
1782 	return B_OK;
1783 }
1784 
1785 
1786 /*!
1787 	This is a background thread that wakes up when its condition is notified
1788 	and moves some pages from the free queue over to the clear queue.
1789 	Given enough time, it will clear out all pages from the free queue - we
1790 	could probably slow it down after having reached a certain threshold.
1791 */
1792 static int32
1793 page_scrubber(void *unused)
1794 {
1795 	(void)(unused);
1796 
1797 	TRACE(("page_scrubber starting...\n"));
1798 
1799 	ConditionVariableEntry entry;
1800 	for (;;) {
1801 		while (sFreePageQueue.Count() == 0
1802 				|| atomic_get(&sUnreservedFreePages)
1803 					< (int32)sFreePagesTarget) {
1804 			sFreePageCondition.Add(&entry);
1805 			entry.Wait();
1806 		}
1807 
1808 		// Since we temporarily remove pages from the free pages reserve,
1809 		// we must make sure we don't cause a violation of the page
1810 		// reservation warranty. The following is usually stricter than
1811 		// necessary, because we don't have information on how many of the
1812 		// reserved pages have already been allocated.
1813 		int32 reserved = reserve_some_pages(SCRUB_SIZE,
1814 			kPageReserveForPriority[VM_PRIORITY_USER]);
1815 		if (reserved == 0)
1816 			continue;
1817 
1818 		// get some pages from the free queue
1819 		ReadLocker locker(sFreePageQueuesLock);
1820 
1821 		vm_page *page[SCRUB_SIZE];
1822 		int32 scrubCount = 0;
1823 		for (int32 i = 0; i < reserved; i++) {
1824 			page[i] = sFreePageQueue.RemoveHeadUnlocked();
1825 			if (page[i] == NULL)
1826 				break;
1827 
1828 			DEBUG_PAGE_ACCESS_START(page[i]);
1829 
1830 			page[i]->SetState(PAGE_STATE_ACTIVE);
1831 			page[i]->busy = true;
1832 			scrubCount++;
1833 		}
1834 
1835 		locker.Unlock();
1836 
1837 		if (scrubCount == 0) {
1838 			unreserve_pages(reserved);
1839 			continue;
1840 		}
1841 
1842 		TA(ScrubbingPages(scrubCount));
1843 
1844 		// clear them
1845 		for (int32 i = 0; i < scrubCount; i++)
1846 			clear_page(page[i]);
1847 
1848 		locker.Lock();
1849 
1850 		// and put them into the clear queue
1851 		for (int32 i = 0; i < scrubCount; i++) {
1852 			page[i]->SetState(PAGE_STATE_CLEAR);
1853 			page[i]->busy = false;
1854 			DEBUG_PAGE_ACCESS_END(page[i]);
1855 			sClearPageQueue.PrependUnlocked(page[i]);
1856 		}
1857 
1858 		locker.Unlock();
1859 
1860 		unreserve_pages(reserved);
1861 
1862 		TA(ScrubbedPages(scrubCount));
1863 
1864 		// wait at least 100ms between runs
1865 		snooze(100 * 1000);
1866 	}
1867 
1868 	return 0;
1869 }
1870 
1871 
1872 static void
1873 init_page_marker(vm_page &marker)
1874 {
1875 	marker.SetCacheRef(NULL);
1876 	marker.InitState(PAGE_STATE_UNUSED);
1877 	marker.busy = true;
1878 #if DEBUG_PAGE_QUEUE
1879 	marker.queue = NULL;
1880 #endif
1881 #if DEBUG_PAGE_ACCESS
1882 	marker.accessing_thread = thread_get_current_thread_id();
1883 #endif
1884 }
1885 
1886 
1887 static void
1888 remove_page_marker(struct vm_page &marker)
1889 {
1890 	DEBUG_PAGE_ACCESS_CHECK(&marker);
1891 
1892 	if (marker.State() < PAGE_STATE_FIRST_UNQUEUED)
1893 		sPageQueues[marker.State()].RemoveUnlocked(&marker);
1894 
1895 	marker.SetState(PAGE_STATE_UNUSED);
1896 }
1897 
1898 
1899 static vm_page*
1900 next_modified_page(page_num_t& maxPagesToSee)
1901 {
1902 	InterruptsSpinLocker locker(sModifiedPageQueue.GetLock());
1903 
1904 	while (maxPagesToSee > 0) {
1905 		vm_page* page = sModifiedPageQueue.Head();
1906 		if (page == NULL)
1907 			return NULL;
1908 
1909 		sModifiedPageQueue.Requeue(page, true);
1910 
1911 		maxPagesToSee--;
1912 
1913 		if (!page->busy)
1914 			return page;
1915 	}
1916 
1917 	return NULL;
1918 }
1919 
1920 
1921 // #pragma mark -
1922 
1923 
1924 class PageWriteTransfer;
1925 class PageWriteWrapper;
1926 
1927 
1928 class PageWriterRun {
1929 public:
1930 	status_t Init(uint32 maxPages);
1931 
1932 	void PrepareNextRun();
1933 	void AddPage(vm_page* page);
1934 	uint32 Go();
1935 
1936 	void PageWritten(PageWriteTransfer* transfer, status_t status,
1937 		bool partialTransfer, size_t bytesTransferred);
1938 
1939 private:
1940 	uint32				fMaxPages;
1941 	uint32				fWrapperCount;
1942 	uint32				fTransferCount;
1943 	int32				fPendingTransfers;
1944 	PageWriteWrapper*	fWrappers;
1945 	PageWriteTransfer*	fTransfers;
1946 	ConditionVariable	fAllFinishedCondition;
1947 };
1948 
1949 
1950 class PageWriteTransfer : public AsyncIOCallback {
1951 public:
1952 	void SetTo(PageWriterRun* run, vm_page* page, int32 maxPages);
1953 	bool AddPage(vm_page* page);
1954 
1955 	status_t Schedule(uint32 flags);
1956 
1957 	void SetStatus(status_t status, size_t transferred);
1958 
1959 	status_t Status() const	{ return fStatus; }
1960 	struct VMCache* Cache() const { return fCache; }
1961 	uint32 PageCount() const { return fPageCount; }
1962 
1963 	virtual void IOFinished(status_t status, bool partialTransfer,
1964 		generic_size_t bytesTransferred);
1965 private:
1966 	PageWriterRun*		fRun;
1967 	struct VMCache*		fCache;
1968 	off_t				fOffset;
1969 	uint32				fPageCount;
1970 	int32				fMaxPages;
1971 	status_t			fStatus;
1972 	uint32				fVecCount;
1973 	generic_io_vec		fVecs[32]; // TODO: make dynamic/configurable
1974 };
1975 
1976 
1977 class PageWriteWrapper {
1978 public:
1979 	PageWriteWrapper();
1980 	~PageWriteWrapper();
1981 	void SetTo(vm_page* page);
1982 	bool Done(status_t result);
1983 
1984 private:
1985 	vm_page*			fPage;
1986 	struct VMCache*		fCache;
1987 	bool				fIsActive;
1988 };
1989 
1990 
1991 PageWriteWrapper::PageWriteWrapper()
1992 	:
1993 	fIsActive(false)
1994 {
1995 }
1996 
1997 
1998 PageWriteWrapper::~PageWriteWrapper()
1999 {
2000 	if (fIsActive)
2001 		panic("page write wrapper going out of scope but isn't completed");
2002 }
2003 
2004 
2005 /*!	The page's cache must be locked.
2006 */
2007 void
2008 PageWriteWrapper::SetTo(vm_page* page)
2009 {
2010 	DEBUG_PAGE_ACCESS_CHECK(page);
2011 
2012 	if (page->busy)
2013 		panic("setting page write wrapper to busy page");
2014 
2015 	if (fIsActive)
2016 		panic("re-setting page write wrapper that isn't completed");
2017 
2018 	fPage = page;
2019 	fCache = page->Cache();
2020 	fIsActive = true;
2021 
2022 	fPage->busy = true;
2023 	fPage->busy_writing = true;
2024 
2025 	// We have a modified page -- however, while we're writing it back,
2026 	// the page might still be mapped. In order not to lose any changes to the
2027 	// page, we mark it clean before actually writing it back; if
2028 	// writing the page fails for some reason, we'll just keep it in the
2029 	// modified page list, but that should happen only rarely.
2030 
2031 	// If the page is changed after we cleared the dirty flag, but before we
2032 	// had the chance to write it back, then we'll write it again later -- that
2033 	// will probably not happen that often, though.
2034 
2035 	vm_clear_map_flags(fPage, PAGE_MODIFIED);
2036 }
2037 
2038 
2039 /*!	The page's cache must be locked.
2040 	The page queues must not be locked.
2041 	\return \c true if the page was written successfully respectively could be
2042 		handled somehow, \c false otherwise.
2043 */
2044 bool
2045 PageWriteWrapper::Done(status_t result)
2046 {
2047 	if (!fIsActive)
2048 		panic("completing page write wrapper that is not active");
2049 
2050 	DEBUG_PAGE_ACCESS_START(fPage);
2051 
2052 	fPage->busy = false;
2053 		// Set unbusy and notify later by hand, since we might free the page.
2054 
2055 	bool success = true;
2056 
2057 	if (result == B_OK) {
2058 		// put it into the active/inactive queue
2059 		move_page_to_appropriate_queue(fPage);
2060 		fPage->busy_writing = false;
2061 		DEBUG_PAGE_ACCESS_END(fPage);
2062 	} else {
2063 		// Writing the page failed. One reason would be that the cache has been
2064 		// shrunk and the page does no longer belong to the file. Otherwise the
2065 		// actual I/O failed, in which case we'll simply keep the page modified.
2066 
2067 		if (!fPage->busy_writing) {
2068 			// The busy_writing flag was cleared. That means the cache has been
2069 			// shrunk while we were trying to write the page and we have to free
2070 			// it now.
2071 			vm_remove_all_page_mappings(fPage);
2072 // TODO: Unmapping should already happen when resizing the cache!
2073 			fCache->RemovePage(fPage);
2074 			free_page(fPage, false);
2075 			unreserve_pages(1);
2076 		} else {
2077 			// Writing the page failed -- mark the page modified and move it to
2078 			// an appropriate queue other than the modified queue, so we don't
2079 			// keep trying to write it over and over again. We keep
2080 			// non-temporary pages in the modified queue, though, so they don't
2081 			// get lost in the inactive queue.
2082 			dprintf("PageWriteWrapper: Failed to write page %p: %s\n", fPage,
2083 				strerror(result));
2084 
2085 			fPage->modified = true;
2086 			if (!fCache->temporary)
2087 				set_page_state(fPage, PAGE_STATE_MODIFIED);
2088 			else if (fPage->IsMapped())
2089 				set_page_state(fPage, PAGE_STATE_ACTIVE);
2090 			else
2091 				set_page_state(fPage, PAGE_STATE_INACTIVE);
2092 
2093 			fPage->busy_writing = false;
2094 			DEBUG_PAGE_ACCESS_END(fPage);
2095 
2096 			success = false;
2097 		}
2098 	}
2099 
2100 	fCache->NotifyPageEvents(fPage, PAGE_EVENT_NOT_BUSY);
2101 	fIsActive = false;
2102 
2103 	return success;
2104 }
2105 
2106 
2107 /*!	The page's cache must be locked.
2108 */
2109 void
2110 PageWriteTransfer::SetTo(PageWriterRun* run, vm_page* page, int32 maxPages)
2111 {
2112 	fRun = run;
2113 	fCache = page->Cache();
2114 	fOffset = page->cache_offset;
2115 	fPageCount = 1;
2116 	fMaxPages = maxPages;
2117 	fStatus = B_OK;
2118 
2119 	fVecs[0].base = (phys_addr_t)page->physical_page_number << PAGE_SHIFT;
2120 	fVecs[0].length = B_PAGE_SIZE;
2121 	fVecCount = 1;
2122 }
2123 
2124 
2125 /*!	The page's cache must be locked.
2126 */
2127 bool
2128 PageWriteTransfer::AddPage(vm_page* page)
2129 {
2130 	if (page->Cache() != fCache
2131 		|| (fMaxPages >= 0 && fPageCount >= (uint32)fMaxPages))
2132 		return false;
2133 
2134 	phys_addr_t nextBase = fVecs[fVecCount - 1].base
2135 		+ fVecs[fVecCount - 1].length;
2136 
2137 	if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase
2138 		&& (off_t)page->cache_offset == fOffset + fPageCount) {
2139 		// append to last iovec
2140 		fVecs[fVecCount - 1].length += B_PAGE_SIZE;
2141 		fPageCount++;
2142 		return true;
2143 	}
2144 
2145 	nextBase = fVecs[0].base - B_PAGE_SIZE;
2146 	if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase
2147 		&& (off_t)page->cache_offset == fOffset - 1) {
2148 		// prepend to first iovec and adjust offset
2149 		fVecs[0].base = nextBase;
2150 		fVecs[0].length += B_PAGE_SIZE;
2151 		fOffset = page->cache_offset;
2152 		fPageCount++;
2153 		return true;
2154 	}
2155 
2156 	if (((off_t)page->cache_offset == fOffset + fPageCount
2157 			|| (off_t)page->cache_offset == fOffset - 1)
2158 		&& fVecCount < sizeof(fVecs) / sizeof(fVecs[0])) {
2159 		// not physically contiguous or not in the right order
2160 		uint32 vectorIndex;
2161 		if ((off_t)page->cache_offset < fOffset) {
2162 			// we are pre-pending another vector, move the other vecs
2163 			for (uint32 i = fVecCount; i > 0; i--)
2164 				fVecs[i] = fVecs[i - 1];
2165 
2166 			fOffset = page->cache_offset;
2167 			vectorIndex = 0;
2168 		} else
2169 			vectorIndex = fVecCount;
2170 
2171 		fVecs[vectorIndex].base
2172 			= (phys_addr_t)page->physical_page_number << PAGE_SHIFT;
2173 		fVecs[vectorIndex].length = B_PAGE_SIZE;
2174 
2175 		fVecCount++;
2176 		fPageCount++;
2177 		return true;
2178 	}
2179 
2180 	return false;
2181 }
2182 
2183 
2184 status_t
2185 PageWriteTransfer::Schedule(uint32 flags)
2186 {
2187 	off_t writeOffset = (off_t)fOffset << PAGE_SHIFT;
2188 	generic_size_t writeLength = (phys_size_t)fPageCount << PAGE_SHIFT;
2189 
2190 	if (fRun != NULL) {
2191 		return fCache->WriteAsync(writeOffset, fVecs, fVecCount, writeLength,
2192 			flags | B_PHYSICAL_IO_REQUEST, this);
2193 	}
2194 
2195 	status_t status = fCache->Write(writeOffset, fVecs, fVecCount,
2196 		flags | B_PHYSICAL_IO_REQUEST, &writeLength);
2197 
2198 	SetStatus(status, writeLength);
2199 	return fStatus;
2200 }
2201 
2202 
2203 void
2204 PageWriteTransfer::SetStatus(status_t status, size_t transferred)
2205 {
2206 	// only succeed if all pages up to the last one have been written fully
2207 	// and the last page has at least been written partially
2208 	if (status == B_OK && transferred <= (fPageCount - 1) * B_PAGE_SIZE)
2209 		status = B_ERROR;
2210 
2211 	fStatus = status;
2212 }
2213 
2214 
2215 void
2216 PageWriteTransfer::IOFinished(status_t status, bool partialTransfer,
2217 	generic_size_t bytesTransferred)
2218 {
2219 	SetStatus(status, bytesTransferred);
2220 	fRun->PageWritten(this, fStatus, partialTransfer, bytesTransferred);
2221 }
2222 
2223 
2224 status_t
2225 PageWriterRun::Init(uint32 maxPages)
2226 {
2227 	fMaxPages = maxPages;
2228 	fWrapperCount = 0;
2229 	fTransferCount = 0;
2230 	fPendingTransfers = 0;
2231 
2232 	fWrappers = new(std::nothrow) PageWriteWrapper[maxPages];
2233 	fTransfers = new(std::nothrow) PageWriteTransfer[maxPages];
2234 	if (fWrappers == NULL || fTransfers == NULL)
2235 		return B_NO_MEMORY;
2236 
2237 	return B_OK;
2238 }
2239 
2240 
2241 void
2242 PageWriterRun::PrepareNextRun()
2243 {
2244 	fWrapperCount = 0;
2245 	fTransferCount = 0;
2246 	fPendingTransfers = 0;
2247 }
2248 
2249 
2250 /*!	The page's cache must be locked.
2251 */
2252 void
2253 PageWriterRun::AddPage(vm_page* page)
2254 {
2255 	fWrappers[fWrapperCount++].SetTo(page);
2256 
2257 	if (fTransferCount == 0 || !fTransfers[fTransferCount - 1].AddPage(page)) {
2258 		fTransfers[fTransferCount++].SetTo(this, page,
2259 			page->Cache()->MaxPagesPerAsyncWrite());
2260 	}
2261 }
2262 
2263 
2264 /*!	Writes all pages previously added.
2265 	\return The number of pages that could not be written or otherwise handled.
2266 */
2267 uint32
2268 PageWriterRun::Go()
2269 {
2270 	atomic_set(&fPendingTransfers, fTransferCount);
2271 
2272 	fAllFinishedCondition.Init(this, "page writer wait for I/O");
2273 	ConditionVariableEntry waitEntry;
2274 	fAllFinishedCondition.Add(&waitEntry);
2275 
2276 	// schedule writes
2277 	for (uint32 i = 0; i < fTransferCount; i++)
2278 		fTransfers[i].Schedule(B_VIP_IO_REQUEST);
2279 
2280 	// wait until all pages have been written
2281 	waitEntry.Wait();
2282 
2283 	// mark pages depending on whether they could be written or not
2284 
2285 	uint32 failedPages = 0;
2286 	uint32 wrapperIndex = 0;
2287 	for (uint32 i = 0; i < fTransferCount; i++) {
2288 		PageWriteTransfer& transfer = fTransfers[i];
2289 		transfer.Cache()->Lock();
2290 
2291 		for (uint32 j = 0; j < transfer.PageCount(); j++) {
2292 			if (!fWrappers[wrapperIndex++].Done(transfer.Status()))
2293 				failedPages++;
2294 		}
2295 
2296 		transfer.Cache()->Unlock();
2297 	}
2298 
2299 	ASSERT(wrapperIndex == fWrapperCount);
2300 
2301 	for (uint32 i = 0; i < fTransferCount; i++) {
2302 		PageWriteTransfer& transfer = fTransfers[i];
2303 		struct VMCache* cache = transfer.Cache();
2304 
2305 		// We've acquired a references for each page
2306 		for (uint32 j = 0; j < transfer.PageCount(); j++) {
2307 			// We release the cache references after all pages were made
2308 			// unbusy again - otherwise releasing a vnode could deadlock.
2309 			cache->ReleaseStoreRef();
2310 			cache->ReleaseRef();
2311 		}
2312 	}
2313 
2314 	return failedPages;
2315 }
2316 
2317 
2318 void
2319 PageWriterRun::PageWritten(PageWriteTransfer* transfer, status_t status,
2320 	bool partialTransfer, size_t bytesTransferred)
2321 {
2322 	if (atomic_add(&fPendingTransfers, -1) == 1)
2323 		fAllFinishedCondition.NotifyAll();
2324 }
2325 
2326 
2327 /*!	The page writer continuously takes some pages from the modified
2328 	queue, writes them back, and moves them back to the active queue.
2329 	It runs in its own thread, and is only there to keep the number
2330 	of modified pages low, so that more pages can be reused with
2331 	fewer costs.
2332 */
2333 status_t
2334 page_writer(void* /*unused*/)
2335 {
2336 	const uint32 kNumPages = 256;
2337 #ifdef TRACE_VM_PAGE
2338 	uint32 writtenPages = 0;
2339 	bigtime_t lastWrittenTime = 0;
2340 	bigtime_t pageCollectionTime = 0;
2341 	bigtime_t pageWritingTime = 0;
2342 #endif
2343 
2344 	PageWriterRun run;
2345 	if (run.Init(kNumPages) != B_OK) {
2346 		panic("page writer: Failed to init PageWriterRun!");
2347 		return B_ERROR;
2348 	}
2349 
2350 	page_num_t pagesSinceLastSuccessfulWrite = 0;
2351 
2352 	while (true) {
2353 // TODO: Maybe wait shorter when memory is low!
2354 		if (sModifiedPageQueue.Count() < kNumPages) {
2355 			sPageWriterCondition.Wait(3000000, true);
2356 				// all 3 seconds when no one triggers us
2357 		}
2358 
2359 		page_num_t modifiedPages = sModifiedPageQueue.Count();
2360 		if (modifiedPages == 0)
2361 			continue;
2362 
2363 		if (modifiedPages <= pagesSinceLastSuccessfulWrite) {
2364 			// We ran through the whole queue without being able to write a
2365 			// single page. Take a break.
2366 			snooze(500000);
2367 			pagesSinceLastSuccessfulWrite = 0;
2368 		}
2369 
2370 #if ENABLE_SWAP_SUPPORT
2371 		page_stats pageStats;
2372 		get_page_stats(pageStats);
2373 		bool activePaging = do_active_paging(pageStats);
2374 #endif
2375 
2376 		// depending on how urgent it becomes to get pages to disk, we adjust
2377 		// our I/O priority
2378 		uint32 lowPagesState = low_resource_state(B_KERNEL_RESOURCE_PAGES);
2379 		int32 ioPriority = B_IDLE_PRIORITY;
2380 		if (lowPagesState >= B_LOW_RESOURCE_CRITICAL
2381 			|| modifiedPages > MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD) {
2382 			ioPriority = MAX_PAGE_WRITER_IO_PRIORITY;
2383 		} else {
2384 			ioPriority = (uint64)MAX_PAGE_WRITER_IO_PRIORITY * modifiedPages
2385 				/ MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD;
2386 		}
2387 
2388 		thread_set_io_priority(ioPriority);
2389 
2390 		uint32 numPages = 0;
2391 		run.PrepareNextRun();
2392 
2393 		// TODO: make this laptop friendly, too (ie. only start doing
2394 		// something if someone else did something or there is really
2395 		// enough to do).
2396 
2397 		// collect pages to be written
2398 #ifdef TRACE_VM_PAGE
2399 		pageCollectionTime -= system_time();
2400 #endif
2401 
2402 		page_num_t maxPagesToSee = modifiedPages;
2403 
2404 		while (numPages < kNumPages && maxPagesToSee > 0) {
2405 			vm_page *page = next_modified_page(maxPagesToSee);
2406 			if (page == NULL)
2407 				break;
2408 
2409 			PageCacheLocker cacheLocker(page, false);
2410 			if (!cacheLocker.IsLocked())
2411 				continue;
2412 
2413 			VMCache *cache = page->Cache();
2414 
2415 			// If the page is busy or its state has changed while we were
2416 			// locking the cache, just ignore it.
2417 			if (page->busy || page->State() != PAGE_STATE_MODIFIED)
2418 				continue;
2419 
2420 			DEBUG_PAGE_ACCESS_START(page);
2421 
2422 			// Don't write back wired (locked) pages.
2423 			if (page->WiredCount() > 0) {
2424 				set_page_state(page, PAGE_STATE_ACTIVE);
2425 				DEBUG_PAGE_ACCESS_END(page);
2426 				continue;
2427 			}
2428 
2429 			// Write back temporary pages only when we're actively paging.
2430 			if (cache->temporary
2431 #if ENABLE_SWAP_SUPPORT
2432 				&& (!activePaging
2433 					|| !cache->CanWritePage(
2434 							(off_t)page->cache_offset << PAGE_SHIFT))
2435 #endif
2436 				) {
2437 				// We can't/don't want to do anything with this page, so move it
2438 				// to one of the other queues.
2439 				if (page->mappings.IsEmpty())
2440 					set_page_state(page, PAGE_STATE_INACTIVE);
2441 				else
2442 					set_page_state(page, PAGE_STATE_ACTIVE);
2443 
2444 				DEBUG_PAGE_ACCESS_END(page);
2445 				continue;
2446 			}
2447 
2448 			// We need our own reference to the store, as it might currently be
2449 			// destroyed.
2450 			if (cache->AcquireUnreferencedStoreRef() != B_OK) {
2451 				DEBUG_PAGE_ACCESS_END(page);
2452 				cacheLocker.Unlock();
2453 				thread_yield();
2454 				continue;
2455 			}
2456 
2457 			run.AddPage(page);
2458 				// TODO: We're possibly adding pages of different caches and
2459 				// thus maybe of different underlying file systems here. This
2460 				// is a potential problem for loop file systems/devices, since
2461 				// we could mark a page busy that would need to be accessed
2462 				// when writing back another page, thus causing a deadlock.
2463 
2464 			DEBUG_PAGE_ACCESS_END(page);
2465 
2466 			//dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count);
2467 			TPW(WritePage(page));
2468 
2469 			cache->AcquireRefLocked();
2470 			numPages++;
2471 		}
2472 
2473 #ifdef TRACE_VM_PAGE
2474 		pageCollectionTime += system_time();
2475 #endif
2476 		if (numPages == 0)
2477 			continue;
2478 
2479 		// write pages to disk and do all the cleanup
2480 #ifdef TRACE_VM_PAGE
2481 		pageWritingTime -= system_time();
2482 #endif
2483 		uint32 failedPages = run.Go();
2484 #ifdef TRACE_VM_PAGE
2485 		pageWritingTime += system_time();
2486 
2487 		// debug output only...
2488 		writtenPages += numPages;
2489 		if (writtenPages >= 1024) {
2490 			bigtime_t now = system_time();
2491 			TRACE(("page writer: wrote 1024 pages (total: %" B_PRIu64 " ms, "
2492 				"collect: %" B_PRIu64 " ms, write: %" B_PRIu64 " ms)\n",
2493 				(now - lastWrittenTime) / 1000,
2494 				pageCollectionTime / 1000, pageWritingTime / 1000));
2495 			lastWrittenTime = now;
2496 
2497 			writtenPages -= 1024;
2498 			pageCollectionTime = 0;
2499 			pageWritingTime = 0;
2500 		}
2501 #endif
2502 
2503 		if (failedPages == numPages)
2504 			pagesSinceLastSuccessfulWrite += modifiedPages - maxPagesToSee;
2505 		else
2506 			pagesSinceLastSuccessfulWrite = 0;
2507 	}
2508 
2509 	return B_OK;
2510 }
2511 
2512 
2513 // #pragma mark -
2514 
2515 
2516 // TODO: This should be done in the page daemon!
2517 #if 0
2518 #if ENABLE_SWAP_SUPPORT
2519 static bool
2520 free_page_swap_space(int32 index)
2521 {
2522 	vm_page *page = vm_page_at_index(index);
2523 	PageCacheLocker locker(page);
2524 	if (!locker.IsLocked())
2525 		return false;
2526 
2527 	DEBUG_PAGE_ACCESS_START(page);
2528 
2529 	VMCache* cache = page->Cache();
2530 	if (cache->temporary && page->WiredCount() == 0
2531 			&& cache->HasPage(page->cache_offset << PAGE_SHIFT)
2532 			&& page->usage_count > 0) {
2533 		// TODO: how to judge a page is highly active?
2534 		if (swap_free_page_swap_space(page)) {
2535 			// We need to mark the page modified, since otherwise it could be
2536 			// stolen and we'd lose its data.
2537 			vm_page_set_state(page, PAGE_STATE_MODIFIED);
2538 			TD(FreedPageSwap(page));
2539 			DEBUG_PAGE_ACCESS_END(page);
2540 			return true;
2541 		}
2542 	}
2543 	DEBUG_PAGE_ACCESS_END(page);
2544 	return false;
2545 }
2546 #endif
2547 #endif	// 0
2548 
2549 
2550 static vm_page *
2551 find_cached_page_candidate(struct vm_page &marker)
2552 {
2553 	DEBUG_PAGE_ACCESS_CHECK(&marker);
2554 
2555 	InterruptsSpinLocker locker(sCachedPageQueue.GetLock());
2556 	vm_page *page;
2557 
2558 	if (marker.State() == PAGE_STATE_UNUSED) {
2559 		// Get the first free pages of the (in)active queue
2560 		page = sCachedPageQueue.Head();
2561 	} else {
2562 		// Get the next page of the current queue
2563 		if (marker.State() != PAGE_STATE_CACHED) {
2564 			panic("invalid marker %p state", &marker);
2565 			return NULL;
2566 		}
2567 
2568 		page = sCachedPageQueue.Next(&marker);
2569 		sCachedPageQueue.Remove(&marker);
2570 		marker.SetState(PAGE_STATE_UNUSED);
2571 	}
2572 
2573 	while (page != NULL) {
2574 		if (!page->busy) {
2575 			// we found a candidate, insert marker
2576 			marker.SetState(PAGE_STATE_CACHED);
2577 			sCachedPageQueue.InsertAfter(page, &marker);
2578 			return page;
2579 		}
2580 
2581 		page = sCachedPageQueue.Next(page);
2582 	}
2583 
2584 	return NULL;
2585 }
2586 
2587 
2588 static bool
2589 free_cached_page(vm_page *page, bool dontWait)
2590 {
2591 	// try to lock the page's cache
2592 	if (vm_cache_acquire_locked_page_cache(page, dontWait) == NULL)
2593 		return false;
2594 	VMCache* cache = page->Cache();
2595 
2596 	AutoLocker<VMCache> cacheLocker(cache, true);
2597 	MethodDeleter<VMCache, void, &VMCache::ReleaseRefLocked> _2(cache);
2598 
2599 	// check again if that page is still a candidate
2600 	if (page->busy || page->State() != PAGE_STATE_CACHED)
2601 		return false;
2602 
2603 	DEBUG_PAGE_ACCESS_START(page);
2604 
2605 	PAGE_ASSERT(page, !page->IsMapped());
2606 	PAGE_ASSERT(page, !page->modified);
2607 
2608 	// we can now steal this page
2609 
2610 	cache->RemovePage(page);
2611 		// Now the page doesn't have cache anymore, so no one else (e.g.
2612 		// vm_page_allocate_page_run() can pick it up), since they would be
2613 		// required to lock the cache first, which would fail.
2614 
2615 	sCachedPageQueue.RemoveUnlocked(page);
2616 	return true;
2617 }
2618 
2619 
2620 static uint32
2621 free_cached_pages(uint32 pagesToFree, bool dontWait)
2622 {
2623 	vm_page marker;
2624 	init_page_marker(marker);
2625 
2626 	uint32 pagesFreed = 0;
2627 
2628 	while (pagesFreed < pagesToFree) {
2629 		vm_page *page = find_cached_page_candidate(marker);
2630 		if (page == NULL)
2631 			break;
2632 
2633 		if (free_cached_page(page, dontWait)) {
2634 			ReadLocker locker(sFreePageQueuesLock);
2635 			page->SetState(PAGE_STATE_FREE);
2636 			DEBUG_PAGE_ACCESS_END(page);
2637 			sFreePageQueue.PrependUnlocked(page);
2638 			locker.Unlock();
2639 
2640 			TA(StolenPage());
2641 
2642 			pagesFreed++;
2643 		}
2644 	}
2645 
2646 	remove_page_marker(marker);
2647 
2648 	sFreePageCondition.NotifyAll();
2649 
2650 	return pagesFreed;
2651 }
2652 
2653 
2654 static void
2655 idle_scan_active_pages(page_stats& pageStats)
2656 {
2657 	VMPageQueue& queue = sActivePageQueue;
2658 
2659 	// We want to scan the whole queue in roughly kIdleRunsForFullQueue runs.
2660 	uint32 maxToScan = queue.Count() / kIdleRunsForFullQueue + 1;
2661 
2662 	while (maxToScan > 0) {
2663 		maxToScan--;
2664 
2665 		// Get the next page. Note that we don't bother to lock here. We go with
2666 		// the assumption that on all architectures reading/writing pointers is
2667 		// atomic. Beyond that it doesn't really matter. We have to unlock the
2668 		// queue anyway to lock the page's cache, and we'll recheck afterwards.
2669 		vm_page* page = queue.Head();
2670 		if (page == NULL)
2671 			break;
2672 
2673 		// lock the page's cache
2674 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2675 		if (cache == NULL)
2676 			continue;
2677 
2678 		if (page->State() != PAGE_STATE_ACTIVE) {
2679 			// page is no longer in the cache or in this queue
2680 			cache->ReleaseRefAndUnlock();
2681 			continue;
2682 		}
2683 
2684 		if (page->busy) {
2685 			// page is busy -- requeue at the end
2686 			vm_page_requeue(page, true);
2687 			cache->ReleaseRefAndUnlock();
2688 			continue;
2689 		}
2690 
2691 		DEBUG_PAGE_ACCESS_START(page);
2692 
2693 		// Get the page active/modified flags and update the page's usage count.
2694 		// We completely unmap inactive temporary pages. This saves us to
2695 		// iterate through the inactive list as well, since we'll be notified
2696 		// via page fault whenever such an inactive page is used again.
2697 		// We don't remove the mappings of non-temporary pages, since we
2698 		// wouldn't notice when those would become unused and could thus be
2699 		// moved to the cached list.
2700 		int32 usageCount;
2701 		if (page->WiredCount() > 0 || page->usage_count > 0
2702 			|| !cache->temporary) {
2703 			usageCount = vm_clear_page_mapping_accessed_flags(page);
2704 		} else
2705 			usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2706 
2707 		if (usageCount > 0) {
2708 			usageCount += page->usage_count + kPageUsageAdvance;
2709 			if (usageCount > kPageUsageMax)
2710 				usageCount = kPageUsageMax;
2711 // TODO: This would probably also be the place to reclaim swap space.
2712 		} else {
2713 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2714 			if (usageCount < 0) {
2715 				usageCount = 0;
2716 				set_page_state(page, PAGE_STATE_INACTIVE);
2717 			}
2718 		}
2719 
2720 		page->usage_count = usageCount;
2721 
2722 		DEBUG_PAGE_ACCESS_END(page);
2723 
2724 		cache->ReleaseRefAndUnlock();
2725 	}
2726 }
2727 
2728 
2729 static void
2730 full_scan_inactive_pages(page_stats& pageStats, int32 despairLevel)
2731 {
2732 	int32 pagesToFree = pageStats.unsatisfiedReservations
2733 		+ sFreeOrCachedPagesTarget
2734 		- (pageStats.totalFreePages + pageStats.cachedPages);
2735 	if (pagesToFree <= 0)
2736 		return;
2737 
2738 	bigtime_t time = system_time();
2739 	uint32 pagesScanned = 0;
2740 	uint32 pagesToCached = 0;
2741 	uint32 pagesToModified = 0;
2742 	uint32 pagesToActive = 0;
2743 
2744 	// Determine how many pages at maximum to send to the modified queue. Since
2745 	// it is relatively expensive to page out pages, we do that on a grander
2746 	// scale only when things get desperate.
2747 	uint32 maxToFlush = despairLevel <= 1 ? 32 : 10000;
2748 
2749 	vm_page marker;
2750 	init_page_marker(marker);
2751 
2752 	VMPageQueue& queue = sInactivePageQueue;
2753 	InterruptsSpinLocker queueLocker(queue.GetLock());
2754 	uint32 maxToScan = queue.Count();
2755 
2756 	vm_page* nextPage = queue.Head();
2757 
2758 	while (pagesToFree > 0 && maxToScan > 0) {
2759 		maxToScan--;
2760 
2761 		// get the next page
2762 		vm_page* page = nextPage;
2763 		if (page == NULL)
2764 			break;
2765 		nextPage = queue.Next(page);
2766 
2767 		if (page->busy)
2768 			continue;
2769 
2770 		// mark the position
2771 		queue.InsertAfter(page, &marker);
2772 		queueLocker.Unlock();
2773 
2774 		// lock the page's cache
2775 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2776 		if (cache == NULL || page->busy
2777 				|| page->State() != PAGE_STATE_INACTIVE) {
2778 			if (cache != NULL)
2779 				cache->ReleaseRefAndUnlock();
2780 			queueLocker.Lock();
2781 			nextPage = queue.Next(&marker);
2782 			queue.Remove(&marker);
2783 			continue;
2784 		}
2785 
2786 		pagesScanned++;
2787 
2788 		DEBUG_PAGE_ACCESS_START(page);
2789 
2790 		// Get the accessed count, clear the accessed/modified flags and
2791 		// unmap the page, if it hasn't been accessed.
2792 		int32 usageCount;
2793 		if (page->WiredCount() > 0)
2794 			usageCount = vm_clear_page_mapping_accessed_flags(page);
2795 		else
2796 			usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2797 
2798 		// update usage count
2799 		if (usageCount > 0) {
2800 			usageCount += page->usage_count + kPageUsageAdvance;
2801 			if (usageCount > kPageUsageMax)
2802 				usageCount = kPageUsageMax;
2803 		} else {
2804 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2805 			if (usageCount < 0)
2806 				usageCount = 0;
2807 		}
2808 
2809 		page->usage_count = usageCount;
2810 
2811 		// Move to fitting queue or requeue:
2812 		// * Active mapped pages go to the active queue.
2813 		// * Inactive mapped (i.e. wired) pages are requeued.
2814 		// * The remaining pages are cachable. Thus, if unmodified they go to
2815 		//   the cached queue, otherwise to the modified queue (up to a limit).
2816 		//   Note that until in the idle scanning we don't exempt pages of
2817 		//   temporary caches. Apparently we really need memory, so we better
2818 		//   page out memory as well.
2819 		bool isMapped = page->IsMapped();
2820 		if (usageCount > 0) {
2821 			if (isMapped) {
2822 				set_page_state(page, PAGE_STATE_ACTIVE);
2823 				pagesToActive++;
2824 			} else
2825 				vm_page_requeue(page, true);
2826 		} else if (isMapped) {
2827 			vm_page_requeue(page, true);
2828 		} else if (!page->modified) {
2829 			set_page_state(page, PAGE_STATE_CACHED);
2830 			pagesToFree--;
2831 			pagesToCached++;
2832 		} else if (maxToFlush > 0) {
2833 			set_page_state(page, PAGE_STATE_MODIFIED);
2834 			maxToFlush--;
2835 			pagesToModified++;
2836 		} else
2837 			vm_page_requeue(page, true);
2838 
2839 		DEBUG_PAGE_ACCESS_END(page);
2840 
2841 		cache->ReleaseRefAndUnlock();
2842 
2843 		// remove the marker
2844 		queueLocker.Lock();
2845 		nextPage = queue.Next(&marker);
2846 		queue.Remove(&marker);
2847 	}
2848 
2849 	queueLocker.Unlock();
2850 
2851 	time = system_time() - time;
2852 	TRACE_DAEMON("  -> inactive scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32
2853 		", moved: %" B_PRIu32 " -> cached, %" B_PRIu32 " -> modified, %"
2854 		B_PRIu32 " -> active\n", time, pagesScanned, pagesToCached,
2855 		pagesToModified, pagesToActive);
2856 
2857 	// wake up the page writer, if we tossed it some pages
2858 	if (pagesToModified > 0)
2859 		sPageWriterCondition.WakeUp();
2860 }
2861 
2862 
2863 static void
2864 full_scan_active_pages(page_stats& pageStats, int32 despairLevel)
2865 {
2866 	vm_page marker;
2867 	init_page_marker(marker);
2868 
2869 	VMPageQueue& queue = sActivePageQueue;
2870 	InterruptsSpinLocker queueLocker(queue.GetLock());
2871 	uint32 maxToScan = queue.Count();
2872 
2873 	int32 pagesToDeactivate = pageStats.unsatisfiedReservations
2874 		+ sFreeOrCachedPagesTarget
2875 		- (pageStats.totalFreePages + pageStats.cachedPages)
2876 		+ std::max((int32)sInactivePagesTarget - (int32)maxToScan, (int32)0);
2877 	if (pagesToDeactivate <= 0)
2878 		return;
2879 
2880 	bigtime_t time = system_time();
2881 	uint32 pagesAccessed = 0;
2882 	uint32 pagesToInactive = 0;
2883 	uint32 pagesScanned = 0;
2884 
2885 	vm_page* nextPage = queue.Head();
2886 
2887 	while (pagesToDeactivate > 0 && maxToScan > 0) {
2888 		maxToScan--;
2889 
2890 		// get the next page
2891 		vm_page* page = nextPage;
2892 		if (page == NULL)
2893 			break;
2894 		nextPage = queue.Next(page);
2895 
2896 		if (page->busy)
2897 			continue;
2898 
2899 		// mark the position
2900 		queue.InsertAfter(page, &marker);
2901 		queueLocker.Unlock();
2902 
2903 		// lock the page's cache
2904 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2905 		if (cache == NULL || page->busy || page->State() != PAGE_STATE_ACTIVE) {
2906 			if (cache != NULL)
2907 				cache->ReleaseRefAndUnlock();
2908 			queueLocker.Lock();
2909 			nextPage = queue.Next(&marker);
2910 			queue.Remove(&marker);
2911 			continue;
2912 		}
2913 
2914 		pagesScanned++;
2915 
2916 		DEBUG_PAGE_ACCESS_START(page);
2917 
2918 		// Get the page active/modified flags and update the page's usage count.
2919 		int32 usageCount = vm_clear_page_mapping_accessed_flags(page);
2920 
2921 		if (usageCount > 0) {
2922 			usageCount += page->usage_count + kPageUsageAdvance;
2923 			if (usageCount > kPageUsageMax)
2924 				usageCount = kPageUsageMax;
2925 			pagesAccessed++;
2926 // TODO: This would probably also be the place to reclaim swap space.
2927 		} else {
2928 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2929 			if (usageCount <= 0) {
2930 				usageCount = 0;
2931 				set_page_state(page, PAGE_STATE_INACTIVE);
2932 				pagesToInactive++;
2933 			}
2934 		}
2935 
2936 		page->usage_count = usageCount;
2937 
2938 		DEBUG_PAGE_ACCESS_END(page);
2939 
2940 		cache->ReleaseRefAndUnlock();
2941 
2942 		// remove the marker
2943 		queueLocker.Lock();
2944 		nextPage = queue.Next(&marker);
2945 		queue.Remove(&marker);
2946 	}
2947 
2948 	time = system_time() - time;
2949 	TRACE_DAEMON("  ->   active scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32
2950 		", moved: %" B_PRIu32 " -> inactive, encountered %" B_PRIu32 " accessed"
2951 		" ones\n", time, pagesScanned, pagesToInactive, pagesAccessed);
2952 }
2953 
2954 
2955 static void
2956 page_daemon_idle_scan(page_stats& pageStats)
2957 {
2958 	TRACE_DAEMON("page daemon: idle run\n");
2959 
2960 	if (pageStats.totalFreePages < (int32)sFreePagesTarget) {
2961 		// We want more actually free pages, so free some from the cached
2962 		// ones.
2963 		uint32 freed = free_cached_pages(
2964 			sFreePagesTarget - pageStats.totalFreePages, false);
2965 		if (freed > 0)
2966 			unreserve_pages(freed);
2967 		get_page_stats(pageStats);
2968 	}
2969 
2970 	// Walk the active list and move pages to the inactive queue.
2971 	get_page_stats(pageStats);
2972 	idle_scan_active_pages(pageStats);
2973 }
2974 
2975 
2976 static void
2977 page_daemon_full_scan(page_stats& pageStats, int32 despairLevel)
2978 {
2979 	TRACE_DAEMON("page daemon: full run: free: %" B_PRIu32 ", cached: %"
2980 		B_PRIu32 ", to free: %" B_PRIu32 "\n", pageStats.totalFreePages,
2981 		pageStats.cachedPages, pageStats.unsatisfiedReservations
2982 			+ sFreeOrCachedPagesTarget
2983 			- (pageStats.totalFreePages + pageStats.cachedPages));
2984 
2985 	// Walk the inactive list and transfer pages to the cached and modified
2986 	// queues.
2987 	full_scan_inactive_pages(pageStats, despairLevel);
2988 
2989 	// Free cached pages. Also wake up reservation waiters.
2990 	get_page_stats(pageStats);
2991 	int32 pagesToFree = pageStats.unsatisfiedReservations + sFreePagesTarget
2992 		- (pageStats.totalFreePages);
2993 	if (pagesToFree > 0) {
2994 		uint32 freed = free_cached_pages(pagesToFree, true);
2995 		if (freed > 0)
2996 			unreserve_pages(freed);
2997 	}
2998 
2999 	// Walk the active list and move pages to the inactive queue.
3000 	get_page_stats(pageStats);
3001 	full_scan_active_pages(pageStats, despairLevel);
3002 }
3003 
3004 
3005 static status_t
3006 page_daemon(void* /*unused*/)
3007 {
3008 	int32 despairLevel = 0;
3009 
3010 	while (true) {
3011 		sPageDaemonCondition.ClearActivated();
3012 
3013 		// evaluate the free pages situation
3014 		page_stats pageStats;
3015 		get_page_stats(pageStats);
3016 
3017 		if (!do_active_paging(pageStats)) {
3018 			// Things look good -- just maintain statistics and keep the pool
3019 			// of actually free pages full enough.
3020 			despairLevel = 0;
3021 			page_daemon_idle_scan(pageStats);
3022 			sPageDaemonCondition.Wait(kIdleScanWaitInterval, false);
3023 		} else {
3024 			// Not enough free pages. We need to do some real work.
3025 			despairLevel = std::max(despairLevel + 1, (int32)3);
3026 			page_daemon_full_scan(pageStats, despairLevel);
3027 
3028 			// Don't wait after the first full scan, but rather immediately
3029 			// check whether we were successful in freeing enough pages and
3030 			// re-run with increased despair level. The first scan is
3031 			// conservative with respect to moving inactive modified pages to
3032 			// the modified list to avoid thrashing. The second scan, however,
3033 			// will not hold back.
3034 			if (despairLevel > 1)
3035 				snooze(kBusyScanWaitInterval);
3036 		}
3037 	}
3038 
3039 	return B_OK;
3040 }
3041 
3042 
3043 /*!	Returns how many pages could *not* be reserved.
3044 */
3045 static uint32
3046 reserve_pages(uint32 count, int priority, bool dontWait)
3047 {
3048 	int32 dontTouch = kPageReserveForPriority[priority];
3049 
3050 	while (true) {
3051 		count -= reserve_some_pages(count, dontTouch);
3052 		if (count == 0)
3053 			return 0;
3054 
3055 		if (sUnsatisfiedPageReservations == 0) {
3056 			count -= free_cached_pages(count, dontWait);
3057 			if (count == 0)
3058 				return count;
3059 		}
3060 
3061 		if (dontWait)
3062 			return count;
3063 
3064 		// we need to wait for pages to become available
3065 
3066 		MutexLocker pageDeficitLocker(sPageDeficitLock);
3067 
3068 		bool notifyDaemon = sUnsatisfiedPageReservations == 0;
3069 		sUnsatisfiedPageReservations += count;
3070 
3071 		if (atomic_get(&sUnreservedFreePages) > dontTouch) {
3072 			// the situation changed
3073 			sUnsatisfiedPageReservations -= count;
3074 			continue;
3075 		}
3076 
3077 		PageReservationWaiter waiter;
3078 		waiter.dontTouch = dontTouch;
3079 		waiter.missing = count;
3080 		waiter.thread = thread_get_current_thread();
3081 		waiter.threadPriority = waiter.thread->priority;
3082 
3083 		// insert ordered (i.e. after all waiters with higher or equal priority)
3084 		PageReservationWaiter* otherWaiter = NULL;
3085 		for (PageReservationWaiterList::Iterator it
3086 				= sPageReservationWaiters.GetIterator();
3087 			(otherWaiter = it.Next()) != NULL;) {
3088 			if (waiter < *otherWaiter)
3089 				break;
3090 		}
3091 
3092 		sPageReservationWaiters.InsertBefore(otherWaiter, &waiter);
3093 
3094 		thread_prepare_to_block(waiter.thread, 0, THREAD_BLOCK_TYPE_OTHER,
3095 			"waiting for pages");
3096 
3097 		if (notifyDaemon)
3098 			sPageDaemonCondition.WakeUp();
3099 
3100 		pageDeficitLocker.Unlock();
3101 
3102 		low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0);
3103 		thread_block();
3104 
3105 		pageDeficitLocker.Lock();
3106 
3107 		return 0;
3108 	}
3109 }
3110 
3111 
3112 //	#pragma mark - private kernel API
3113 
3114 
3115 /*!	Writes a range of modified pages of a cache to disk.
3116 	You need to hold the VMCache lock when calling this function.
3117 	Note that the cache lock is released in this function.
3118 	\param cache The cache.
3119 	\param firstPage Offset (in page size units) of the first page in the range.
3120 	\param endPage End offset (in page size units) of the page range. The page
3121 		at this offset is not included.
3122 */
3123 status_t
3124 vm_page_write_modified_page_range(struct VMCache* cache, uint32 firstPage,
3125 	uint32 endPage)
3126 {
3127 	static const int32 kMaxPages = 256;
3128 	int32 maxPages = cache->MaxPagesPerWrite();
3129 	if (maxPages < 0 || maxPages > kMaxPages)
3130 		maxPages = kMaxPages;
3131 
3132 	const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
3133 		| HEAP_DONT_LOCK_KERNEL_SPACE;
3134 
3135 	PageWriteWrapper stackWrappersPool[2];
3136 	PageWriteWrapper* stackWrappers[1];
3137 	PageWriteWrapper* wrapperPool
3138 		= new(malloc_flags(allocationFlags)) PageWriteWrapper[maxPages + 1];
3139 	PageWriteWrapper** wrappers
3140 		= new(malloc_flags(allocationFlags)) PageWriteWrapper*[maxPages];
3141 	if (wrapperPool == NULL || wrappers == NULL) {
3142 		// don't fail, just limit our capabilities
3143 		delete[] wrapperPool;
3144 		delete[] wrappers;
3145 		wrapperPool = stackWrappersPool;
3146 		wrappers = stackWrappers;
3147 		maxPages = 1;
3148 	}
3149 
3150 	int32 nextWrapper = 0;
3151 	int32 usedWrappers = 0;
3152 
3153 	PageWriteTransfer transfer;
3154 	bool transferEmpty = true;
3155 
3156 	VMCachePagesTree::Iterator it
3157 		= cache->pages.GetIterator(firstPage, true, true);
3158 
3159 	while (true) {
3160 		vm_page* page = it.Next();
3161 		if (page == NULL || page->cache_offset >= endPage) {
3162 			if (transferEmpty)
3163 				break;
3164 
3165 			page = NULL;
3166 		}
3167 
3168 		if (page != NULL) {
3169 			if (page->busy
3170 				|| (page->State() != PAGE_STATE_MODIFIED
3171 					&& !vm_test_map_modification(page))) {
3172 				page = NULL;
3173 			}
3174 		}
3175 
3176 		PageWriteWrapper* wrapper = NULL;
3177 		if (page != NULL) {
3178 			wrapper = &wrapperPool[nextWrapper++];
3179 			if (nextWrapper > maxPages)
3180 				nextWrapper = 0;
3181 
3182 			DEBUG_PAGE_ACCESS_START(page);
3183 
3184 			wrapper->SetTo(page);
3185 
3186 			if (transferEmpty || transfer.AddPage(page)) {
3187 				if (transferEmpty) {
3188 					transfer.SetTo(NULL, page, maxPages);
3189 					transferEmpty = false;
3190 				}
3191 
3192 				DEBUG_PAGE_ACCESS_END(page);
3193 
3194 				wrappers[usedWrappers++] = wrapper;
3195 				continue;
3196 			}
3197 
3198 			DEBUG_PAGE_ACCESS_END(page);
3199 		}
3200 
3201 		if (transferEmpty)
3202 			continue;
3203 
3204 		cache->Unlock();
3205 		status_t status = transfer.Schedule(0);
3206 		cache->Lock();
3207 
3208 		for (int32 i = 0; i < usedWrappers; i++)
3209 			wrappers[i]->Done(status);
3210 
3211 		usedWrappers = 0;
3212 
3213 		if (page != NULL) {
3214 			transfer.SetTo(NULL, page, maxPages);
3215 			wrappers[usedWrappers++] = wrapper;
3216 		} else
3217 			transferEmpty = true;
3218 	}
3219 
3220 	if (wrapperPool != stackWrappersPool) {
3221 		delete[] wrapperPool;
3222 		delete[] wrappers;
3223 	}
3224 
3225 	return B_OK;
3226 }
3227 
3228 
3229 /*!	You need to hold the VMCache lock when calling this function.
3230 	Note that the cache lock is released in this function.
3231 */
3232 status_t
3233 vm_page_write_modified_pages(VMCache *cache)
3234 {
3235 	return vm_page_write_modified_page_range(cache, 0,
3236 		(cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT);
3237 }
3238 
3239 
3240 /*!	Schedules the page writer to write back the specified \a page.
3241 	Note, however, that it might not do this immediately, and it can well
3242 	take several seconds until the page is actually written out.
3243 */
3244 void
3245 vm_page_schedule_write_page(vm_page *page)
3246 {
3247 	PAGE_ASSERT(page, page->State() == PAGE_STATE_MODIFIED);
3248 
3249 	vm_page_requeue(page, false);
3250 
3251 	sPageWriterCondition.WakeUp();
3252 }
3253 
3254 
3255 /*!	Cache must be locked.
3256 */
3257 void
3258 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage,
3259 	uint32 endPage)
3260 {
3261 	uint32 modified = 0;
3262 	for (VMCachePagesTree::Iterator it
3263 				= cache->pages.GetIterator(firstPage, true, true);
3264 			vm_page *page = it.Next();) {
3265 		if (page->cache_offset >= endPage)
3266 			break;
3267 
3268 		if (!page->busy && page->State() == PAGE_STATE_MODIFIED) {
3269 			DEBUG_PAGE_ACCESS_START(page);
3270 			vm_page_requeue(page, false);
3271 			modified++;
3272 			DEBUG_PAGE_ACCESS_END(page);
3273 		}
3274 	}
3275 
3276 	if (modified > 0)
3277 		sPageWriterCondition.WakeUp();
3278 }
3279 
3280 
3281 void
3282 vm_page_init_num_pages(kernel_args *args)
3283 {
3284 	// calculate the size of memory by looking at the physical_memory_range array
3285 	sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE;
3286 	page_num_t physicalPagesEnd = sPhysicalPageOffset
3287 		+ args->physical_memory_range[0].size / B_PAGE_SIZE;
3288 
3289 	sNonExistingPages = 0;
3290 	sIgnoredPages = args->ignored_physical_memory / B_PAGE_SIZE;
3291 
3292 	for (uint32 i = 1; i < args->num_physical_memory_ranges; i++) {
3293 		page_num_t start = args->physical_memory_range[i].start / B_PAGE_SIZE;
3294 		if (start > physicalPagesEnd)
3295 			sNonExistingPages += start - physicalPagesEnd;
3296 		physicalPagesEnd = start
3297 			+ args->physical_memory_range[i].size / B_PAGE_SIZE;
3298 
3299 #ifdef LIMIT_AVAILABLE_MEMORY
3300 		page_num_t available
3301 			= physicalPagesEnd - sPhysicalPageOffset - sNonExistingPages;
3302 		if (available > LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE)) {
3303 			physicalPagesEnd = sPhysicalPageOffset + sNonExistingPages
3304 				+ LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE);
3305 			break;
3306 		}
3307 #endif
3308 	}
3309 
3310 	TRACE(("first phys page = %#" B_PRIxPHYSADDR ", end %#" B_PRIxPHYSADDR "\n",
3311 		sPhysicalPageOffset, physicalPagesEnd));
3312 
3313 	sNumPages = physicalPagesEnd - sPhysicalPageOffset;
3314 }
3315 
3316 
3317 status_t
3318 vm_page_init(kernel_args *args)
3319 {
3320 	TRACE(("vm_page_init: entry\n"));
3321 
3322 	// init page queues
3323 	sModifiedPageQueue.Init("modified pages queue");
3324 	sInactivePageQueue.Init("inactive pages queue");
3325 	sActivePageQueue.Init("active pages queue");
3326 	sCachedPageQueue.Init("cached pages queue");
3327 	sFreePageQueue.Init("free pages queue");
3328 	sClearPageQueue.Init("clear pages queue");
3329 
3330 	new (&sPageReservationWaiters) PageReservationWaiterList;
3331 
3332 	// map in the new free page table
3333 	sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page),
3334 		~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3335 
3336 	TRACE(("vm_init: putting free_page_table @ %p, # ents %" B_PRIuPHYSADDR
3337 		" (size %#" B_PRIxPHYSADDR ")\n", sPages, sNumPages,
3338 		(phys_addr_t)(sNumPages * sizeof(vm_page))));
3339 
3340 	// initialize the free page table
3341 	for (uint32 i = 0; i < sNumPages; i++) {
3342 		sPages[i].Init(sPhysicalPageOffset + i);
3343 		sFreePageQueue.Append(&sPages[i]);
3344 
3345 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3346 		sPages[i].allocation_tracking_info.Clear();
3347 #endif
3348 	}
3349 
3350 	sUnreservedFreePages = sNumPages;
3351 
3352 	TRACE(("initialized table\n"));
3353 
3354 	// mark the ranges between usable physical memory unused
3355 	phys_addr_t previousEnd = 0;
3356 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3357 		phys_addr_t base = args->physical_memory_range[i].start;
3358 		phys_size_t size = args->physical_memory_range[i].size;
3359 		if (base > previousEnd) {
3360 			mark_page_range_in_use(previousEnd / B_PAGE_SIZE,
3361 				(base - previousEnd) / B_PAGE_SIZE, false);
3362 		}
3363 		previousEnd = base + size;
3364 	}
3365 
3366 	// mark the allocated physical page ranges wired
3367 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3368 		mark_page_range_in_use(
3369 			args->physical_allocated_range[i].start / B_PAGE_SIZE,
3370 			args->physical_allocated_range[i].size / B_PAGE_SIZE, true);
3371 	}
3372 
3373 	// The target of actually free pages. This must be at least the system
3374 	// reserve, but should be a few more pages, so we don't have to extract
3375 	// a cached page with each allocation.
3376 	sFreePagesTarget = VM_PAGE_RESERVE_USER
3377 		+ std::max((page_num_t)32, (sNumPages - sNonExistingPages) / 1024);
3378 
3379 	// The target of free + cached and inactive pages. On low-memory machines
3380 	// keep things tight. free + cached is the pool of immediately allocatable
3381 	// pages. We want a few inactive pages, so when we're actually paging, we
3382 	// have a reasonably large set of pages to work with.
3383 	if (sUnreservedFreePages < 16 * 1024) {
3384 		sFreeOrCachedPagesTarget = sFreePagesTarget + 128;
3385 		sInactivePagesTarget = sFreePagesTarget / 3;
3386 	} else {
3387 		sFreeOrCachedPagesTarget = 2 * sFreePagesTarget;
3388 		sInactivePagesTarget = sFreePagesTarget / 2;
3389 	}
3390 
3391 	TRACE(("vm_page_init: exit\n"));
3392 
3393 	return B_OK;
3394 }
3395 
3396 
3397 status_t
3398 vm_page_init_post_area(kernel_args *args)
3399 {
3400 	void *dummy;
3401 
3402 	dummy = sPages;
3403 	create_area("page structures", &dummy, B_EXACT_ADDRESS,
3404 		PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED,
3405 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3406 
3407 	add_debugger_command("list_pages", &dump_page_list,
3408 		"List physical pages");
3409 	add_debugger_command("page_stats", &dump_page_stats,
3410 		"Dump statistics about page usage");
3411 	add_debugger_command_etc("page", &dump_page_long,
3412 		"Dump page info",
3413 		"[ \"-p\" | \"-v\" ] [ \"-m\" ] <address>\n"
3414 		"Prints information for the physical page. If neither \"-p\" nor\n"
3415 		"\"-v\" are given, the provided address is interpreted as address of\n"
3416 		"the vm_page data structure for the page in question. If \"-p\" is\n"
3417 		"given, the address is the physical address of the page. If \"-v\" is\n"
3418 		"given, the address is interpreted as virtual address in the current\n"
3419 		"thread's address space and for the page it is mapped to (if any)\n"
3420 		"information are printed. If \"-m\" is specified, the command will\n"
3421 		"search all known address spaces for mappings to that page and print\n"
3422 		"them.\n", 0);
3423 	add_debugger_command("page_queue", &dump_page_queue, "Dump page queue");
3424 	add_debugger_command("find_page", &find_page,
3425 		"Find out which queue a page is actually in");
3426 
3427 #ifdef TRACK_PAGE_USAGE_STATS
3428 	add_debugger_command_etc("page_usage", &dump_page_usage_stats,
3429 		"Dumps statistics about page usage counts",
3430 		"\n"
3431 		"Dumps statistics about page usage counts.\n",
3432 		B_KDEBUG_DONT_PARSE_ARGUMENTS);
3433 #endif
3434 
3435 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3436 	add_debugger_command_etc("page_allocations_per_caller",
3437 		&dump_page_allocations_per_caller,
3438 		"Dump current page allocations summed up per caller",
3439 		"[ -d <caller> ] [ -r ]\n"
3440 		"The current allocations will by summed up by caller (their count)\n"
3441 		"printed in decreasing order by count.\n"
3442 		"If \"-d\" is given, each allocation for caller <caller> is printed\n"
3443 		"including the respective stack trace.\n"
3444 		"If \"-r\" is given, the allocation infos are reset after gathering\n"
3445 		"the information, so the next command invocation will only show the\n"
3446 		"allocations made after the reset.\n", 0);
3447 	add_debugger_command_etc("page_allocation_infos",
3448 		&dump_page_allocation_infos,
3449 		"Dump current page allocations",
3450 		"[ --stacktrace ] [ -p <page number> ] [ --team <team ID> ] "
3451 		"[ --thread <thread ID> ]\n"
3452 		"The current allocations filtered by optional values will be printed.\n"
3453 		"The optional \"-p\" page number filters for a specific page,\n"
3454 		"with \"--team\" and \"--thread\" allocations by specific teams\n"
3455 		"and/or threads can be filtered (these only work if a corresponding\n"
3456 		"tracing entry is still available).\n"
3457 		"If \"--stacktrace\" is given, then stack traces of the allocation\n"
3458 		"callers are printed, where available\n", 0);
3459 #endif
3460 
3461 	return B_OK;
3462 }
3463 
3464 
3465 status_t
3466 vm_page_init_post_thread(kernel_args *args)
3467 {
3468 	new (&sFreePageCondition) ConditionVariable;
3469 
3470 	// create a kernel thread to clear out pages
3471 
3472 	thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber",
3473 		B_LOWEST_ACTIVE_PRIORITY, NULL);
3474 	resume_thread(thread);
3475 
3476 	// start page writer
3477 
3478 	sPageWriterCondition.Init("page writer");
3479 
3480 	thread = spawn_kernel_thread(&page_writer, "page writer",
3481 		B_NORMAL_PRIORITY + 1, NULL);
3482 	resume_thread(thread);
3483 
3484 	// start page daemon
3485 
3486 	sPageDaemonCondition.Init("page daemon");
3487 
3488 	thread = spawn_kernel_thread(&page_daemon, "page daemon",
3489 		B_NORMAL_PRIORITY, NULL);
3490 	resume_thread(thread);
3491 
3492 	return B_OK;
3493 }
3494 
3495 
3496 status_t
3497 vm_mark_page_inuse(page_num_t page)
3498 {
3499 	return vm_mark_page_range_inuse(page, 1);
3500 }
3501 
3502 
3503 status_t
3504 vm_mark_page_range_inuse(page_num_t startPage, page_num_t length)
3505 {
3506 	return mark_page_range_in_use(startPage, length, false);
3507 }
3508 
3509 
3510 /*!	Unreserve pages previously reserved with vm_page_reserve_pages().
3511 */
3512 void
3513 vm_page_unreserve_pages(vm_page_reservation* reservation)
3514 {
3515 	uint32 count = reservation->count;
3516 	reservation->count = 0;
3517 
3518 	if (count == 0)
3519 		return;
3520 
3521 	TA(UnreservePages(count));
3522 
3523 	unreserve_pages(count);
3524 }
3525 
3526 
3527 /*!	With this call, you can reserve a number of free pages in the system.
3528 	They will only be handed out to someone who has actually reserved them.
3529 	This call returns as soon as the number of requested pages has been
3530 	reached.
3531 	The caller must not hold any cache lock or the function might deadlock.
3532 */
3533 void
3534 vm_page_reserve_pages(vm_page_reservation* reservation, uint32 count,
3535 	int priority)
3536 {
3537 	reservation->count = count;
3538 
3539 	if (count == 0)
3540 		return;
3541 
3542 	TA(ReservePages(count));
3543 
3544 	reserve_pages(count, priority, false);
3545 }
3546 
3547 
3548 bool
3549 vm_page_try_reserve_pages(vm_page_reservation* reservation, uint32 count,
3550 	int priority)
3551 {
3552 	if (count == 0) {
3553 		reservation->count = count;
3554 		return true;
3555 	}
3556 
3557 	uint32 remaining = reserve_pages(count, priority, true);
3558 	if (remaining == 0) {
3559 		TA(ReservePages(count));
3560 		reservation->count = count;
3561 		return true;
3562 	}
3563 
3564 	unreserve_pages(count - remaining);
3565 
3566 	return false;
3567 }
3568 
3569 
3570 vm_page *
3571 vm_page_allocate_page(vm_page_reservation* reservation, uint32 flags)
3572 {
3573 	uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3574 	ASSERT(pageState != PAGE_STATE_FREE);
3575 	ASSERT(pageState != PAGE_STATE_CLEAR);
3576 
3577 	ASSERT(reservation->count > 0);
3578 	reservation->count--;
3579 
3580 	VMPageQueue* queue;
3581 	VMPageQueue* otherQueue;
3582 
3583 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3584 		queue = &sClearPageQueue;
3585 		otherQueue = &sFreePageQueue;
3586 	} else {
3587 		queue = &sFreePageQueue;
3588 		otherQueue = &sClearPageQueue;
3589 	}
3590 
3591 	ReadLocker locker(sFreePageQueuesLock);
3592 
3593 	vm_page* page = queue->RemoveHeadUnlocked();
3594 	if (page == NULL) {
3595 		// if the primary queue was empty, grab the page from the
3596 		// secondary queue
3597 		page = otherQueue->RemoveHeadUnlocked();
3598 
3599 		if (page == NULL) {
3600 			// Unlikely, but possible: the page we have reserved has moved
3601 			// between the queues after we checked the first queue. Grab the
3602 			// write locker to make sure this doesn't happen again.
3603 			locker.Unlock();
3604 			WriteLocker writeLocker(sFreePageQueuesLock);
3605 
3606 			page = queue->RemoveHead();
3607 			if (page == NULL)
3608 				otherQueue->RemoveHead();
3609 
3610 			if (page == NULL) {
3611 				panic("Had reserved page, but there is none!");
3612 				return NULL;
3613 			}
3614 
3615 			// downgrade to read lock
3616 			locker.Lock();
3617 		}
3618 	}
3619 
3620 	if (page->CacheRef() != NULL)
3621 		panic("supposed to be free page %p has cache\n", page);
3622 
3623 	DEBUG_PAGE_ACCESS_START(page);
3624 
3625 	int oldPageState = page->State();
3626 	page->SetState(pageState);
3627 	page->busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3628 	page->usage_count = 0;
3629 	page->accessed = false;
3630 	page->modified = false;
3631 
3632 	locker.Unlock();
3633 
3634 	if (pageState < PAGE_STATE_FIRST_UNQUEUED)
3635 		sPageQueues[pageState].AppendUnlocked(page);
3636 
3637 	// clear the page, if we had to take it from the free queue and a clear
3638 	// page was requested
3639 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0 && oldPageState != PAGE_STATE_CLEAR)
3640 		clear_page(page);
3641 
3642 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3643 	page->allocation_tracking_info.Init(
3644 		TA(AllocatePage(page->physical_page_number)));
3645 #else
3646 	TA(AllocatePage(page->physical_page_number));
3647 #endif
3648 
3649 	return page;
3650 }
3651 
3652 
3653 static void
3654 allocate_page_run_cleanup(VMPageQueue::PageList& freePages,
3655 	VMPageQueue::PageList& clearPages)
3656 {
3657 	while (vm_page* page = freePages.RemoveHead()) {
3658 		page->busy = false;
3659 		page->SetState(PAGE_STATE_FREE);
3660 		DEBUG_PAGE_ACCESS_END(page);
3661 		sFreePageQueue.PrependUnlocked(page);
3662 	}
3663 
3664 	while (vm_page* page = clearPages.RemoveHead()) {
3665 		page->busy = false;
3666 		page->SetState(PAGE_STATE_CLEAR);
3667 		DEBUG_PAGE_ACCESS_END(page);
3668 		sClearPageQueue.PrependUnlocked(page);
3669 	}
3670 
3671 	sFreePageCondition.NotifyAll();
3672 }
3673 
3674 
3675 /*!	Tries to allocate the a contiguous run of \a length pages starting at
3676 	index \a start.
3677 
3678 	The caller must have write-locked the free/clear page queues. The function
3679 	will unlock regardless of whether it succeeds or fails.
3680 
3681 	If the function fails, it cleans up after itself, i.e. it will free all
3682 	pages it managed to allocate.
3683 
3684 	\param start The start index (into \c sPages) of the run.
3685 	\param length The number of pages to allocate.
3686 	\param flags Page allocation flags. Encodes the state the function shall
3687 		set the allocated pages to, whether the pages shall be marked busy
3688 		(VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared
3689 		(VM_PAGE_ALLOC_CLEAR).
3690 	\param freeClearQueueLocker Locked WriteLocker for the free/clear page
3691 		queues in locked state. Will be unlocked by the function.
3692 	\return The index of the first page that could not be allocated. \a length
3693 		is returned when the function was successful.
3694 */
3695 static page_num_t
3696 allocate_page_run(page_num_t start, page_num_t length, uint32 flags,
3697 	WriteLocker& freeClearQueueLocker)
3698 {
3699 	uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3700 	ASSERT(pageState != PAGE_STATE_FREE);
3701 	ASSERT(pageState != PAGE_STATE_CLEAR);
3702 	ASSERT(start + length <= sNumPages);
3703 
3704 	// Pull the free/clear pages out of their respective queues. Cached pages
3705 	// are allocated later.
3706 	page_num_t cachedPages = 0;
3707 	VMPageQueue::PageList freePages;
3708 	VMPageQueue::PageList clearPages;
3709 	page_num_t i = 0;
3710 	for (; i < length; i++) {
3711 		bool pageAllocated = true;
3712 		bool noPage = false;
3713 		vm_page& page = sPages[start + i];
3714 		switch (page.State()) {
3715 			case PAGE_STATE_CLEAR:
3716 				DEBUG_PAGE_ACCESS_START(&page);
3717 				sClearPageQueue.Remove(&page);
3718 				clearPages.Add(&page);
3719 				break;
3720 			case PAGE_STATE_FREE:
3721 				DEBUG_PAGE_ACCESS_START(&page);
3722 				sFreePageQueue.Remove(&page);
3723 				freePages.Add(&page);
3724 				break;
3725 			case PAGE_STATE_CACHED:
3726 				// We allocate cached pages later.
3727 				cachedPages++;
3728 				pageAllocated = false;
3729 				break;
3730 
3731 			default:
3732 				// Probably a page was cached when our caller checked. Now it's
3733 				// gone and we have to abort.
3734 				noPage = true;
3735 				break;
3736 		}
3737 
3738 		if (noPage)
3739 			break;
3740 
3741 		if (pageAllocated) {
3742 			page.SetState(flags & VM_PAGE_ALLOC_STATE);
3743 			page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3744 			page.usage_count = 0;
3745 			page.accessed = false;
3746 			page.modified = false;
3747 		}
3748 	}
3749 
3750 	if (i < length) {
3751 		// failed to allocate a page -- free all that we've got
3752 		allocate_page_run_cleanup(freePages, clearPages);
3753 		return i;
3754 	}
3755 
3756 	freeClearQueueLocker.Unlock();
3757 
3758 	if (cachedPages > 0) {
3759 		// allocate the pages that weren't free but cached
3760 		page_num_t freedCachedPages = 0;
3761 		page_num_t nextIndex = start;
3762 		vm_page* freePage = freePages.Head();
3763 		vm_page* clearPage = clearPages.Head();
3764 		while (cachedPages > 0) {
3765 			// skip, if we've already got the page
3766 			if (freePage != NULL && size_t(freePage - sPages) == nextIndex) {
3767 				freePage = freePages.GetNext(freePage);
3768 				nextIndex++;
3769 				continue;
3770 			}
3771 			if (clearPage != NULL && size_t(clearPage - sPages) == nextIndex) {
3772 				clearPage = clearPages.GetNext(clearPage);
3773 				nextIndex++;
3774 				continue;
3775 			}
3776 
3777 			// free the page, if it is still cached
3778 			vm_page& page = sPages[nextIndex];
3779 			if (!free_cached_page(&page, false)) {
3780 				// TODO: if the page turns out to have been freed already,
3781 				// there would be no need to fail
3782 				break;
3783 			}
3784 
3785 			page.SetState(flags & VM_PAGE_ALLOC_STATE);
3786 			page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3787 			page.usage_count = 0;
3788 			page.accessed = false;
3789 			page.modified = false;
3790 
3791 			freePages.InsertBefore(freePage, &page);
3792 			freedCachedPages++;
3793 			cachedPages--;
3794 			nextIndex++;
3795 		}
3796 
3797 		// If we have freed cached pages, we need to balance things.
3798 		if (freedCachedPages > 0)
3799 			unreserve_pages(freedCachedPages);
3800 
3801 		if (nextIndex - start < length) {
3802 			// failed to allocate all cached pages -- free all that we've got
3803 			freeClearQueueLocker.Lock();
3804 			allocate_page_run_cleanup(freePages, clearPages);
3805 			freeClearQueueLocker.Unlock();
3806 
3807 			return nextIndex - start;
3808 		}
3809 	}
3810 
3811 	// clear pages, if requested
3812 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3813 		for (VMPageQueue::PageList::Iterator it = freePages.GetIterator();
3814 				vm_page* page = it.Next();) {
3815 			clear_page(page);
3816 		}
3817 	}
3818 
3819 	// add pages to target queue
3820 	if (pageState < PAGE_STATE_FIRST_UNQUEUED) {
3821 		freePages.MoveFrom(&clearPages);
3822 		sPageQueues[pageState].AppendUnlocked(freePages, length);
3823 	}
3824 
3825 	// Note: We don't unreserve the pages since we pulled them out of the
3826 	// free/clear queues without adjusting sUnreservedFreePages.
3827 
3828 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3829 	AbstractTraceEntryWithStackTrace* traceEntry
3830 		= TA(AllocatePageRun(start, length));
3831 
3832 	for (page_num_t i = start; i < start + length; i++)
3833 		sPages[i].allocation_tracking_info.Init(traceEntry);
3834 #else
3835 	TA(AllocatePageRun(start, length));
3836 #endif
3837 
3838 	return length;
3839 }
3840 
3841 
3842 /*! Allocate a physically contiguous range of pages.
3843 
3844 	\param flags Page allocation flags. Encodes the state the function shall
3845 		set the allocated pages to, whether the pages shall be marked busy
3846 		(VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared
3847 		(VM_PAGE_ALLOC_CLEAR).
3848 	\param length The number of contiguous pages to allocate.
3849 	\param restrictions Restrictions to the physical addresses of the page run
3850 		to allocate, including \c low_address, the first acceptable physical
3851 		address where the page run may start, \c high_address, the last
3852 		acceptable physical address where the page run may end (i.e. it must
3853 		hold \code runStartAddress + length <= high_address \endcode),
3854 		\c alignment, the alignment of the page run start address, and
3855 		\c boundary, multiples of which the page run must not cross.
3856 		Values set to \c 0 are ignored.
3857 	\param priority The page reservation priority (as passed to
3858 		vm_page_reserve_pages()).
3859 	\return The first page of the allocated page run on success; \c NULL
3860 		when the allocation failed.
3861 */
3862 vm_page*
3863 vm_page_allocate_page_run(uint32 flags, page_num_t length,
3864 	const physical_address_restrictions* restrictions, int priority)
3865 {
3866 	// compute start and end page index
3867 	page_num_t requestedStart
3868 		= std::max(restrictions->low_address / B_PAGE_SIZE, sPhysicalPageOffset)
3869 			- sPhysicalPageOffset;
3870 	page_num_t start = requestedStart;
3871 	page_num_t end;
3872 	if (restrictions->high_address > 0) {
3873 		end = std::max(restrictions->high_address / B_PAGE_SIZE,
3874 				sPhysicalPageOffset)
3875 			- sPhysicalPageOffset;
3876 		end = std::min(end, sNumPages);
3877 	} else
3878 		end = sNumPages;
3879 
3880 	// compute alignment mask
3881 	page_num_t alignmentMask
3882 		= std::max(restrictions->alignment / B_PAGE_SIZE, (phys_addr_t)1) - 1;
3883 	ASSERT(((alignmentMask + 1) & alignmentMask) == 0);
3884 		// alignment must be a power of 2
3885 
3886 	// compute the boundary mask
3887 	uint32 boundaryMask = 0;
3888 	if (restrictions->boundary != 0) {
3889 		page_num_t boundary = restrictions->boundary / B_PAGE_SIZE;
3890 		// boundary must be a power of two and not less than alignment and
3891 		// length
3892 		ASSERT(((boundary - 1) & boundary) == 0);
3893 		ASSERT(boundary >= alignmentMask + 1);
3894 		ASSERT(boundary >= length);
3895 
3896 		boundaryMask = -boundary;
3897 	}
3898 
3899 	vm_page_reservation reservation;
3900 	vm_page_reserve_pages(&reservation, length, priority);
3901 
3902 	WriteLocker freeClearQueueLocker(sFreePageQueuesLock);
3903 
3904 	// First we try to get a run with free pages only. If that fails, we also
3905 	// consider cached pages. If there are only few free pages and many cached
3906 	// ones, the odds are that we won't find enough contiguous ones, so we skip
3907 	// the first iteration in this case.
3908 	int32 freePages = sUnreservedFreePages;
3909 	int useCached = freePages > 0 && (page_num_t)freePages > 2 * length ? 0 : 1;
3910 
3911 	for (;;) {
3912 		if (alignmentMask != 0 || boundaryMask != 0) {
3913 			page_num_t offsetStart = start + sPhysicalPageOffset;
3914 
3915 			// enforce alignment
3916 			if ((offsetStart & alignmentMask) != 0)
3917 				offsetStart = (offsetStart + alignmentMask) & ~alignmentMask;
3918 
3919 			// enforce boundary
3920 			if (boundaryMask != 0 && ((offsetStart ^ (offsetStart
3921 				+ length - 1)) & boundaryMask) != 0) {
3922 				offsetStart = (offsetStart + length - 1) & boundaryMask;
3923 			}
3924 
3925 			start = offsetStart - sPhysicalPageOffset;
3926 		}
3927 
3928 		if (start + length > end) {
3929 			if (useCached == 0) {
3930 				// The first iteration with free pages only was unsuccessful.
3931 				// Try again also considering cached pages.
3932 				useCached = 1;
3933 				start = requestedStart;
3934 				continue;
3935 			}
3936 
3937 			dprintf("vm_page_allocate_page_run(): Failed to allocate run of "
3938 				"length %" B_PRIuPHYSADDR " (%" B_PRIuPHYSADDR " %"
3939 				B_PRIuPHYSADDR ") in second iteration (align: %" B_PRIuPHYSADDR
3940 				" boundary: %" B_PRIuPHYSADDR ")!\n", length, requestedStart,
3941 				end, restrictions->alignment, restrictions->boundary);
3942 
3943 			freeClearQueueLocker.Unlock();
3944 			vm_page_unreserve_pages(&reservation);
3945 			return NULL;
3946 		}
3947 
3948 		bool foundRun = true;
3949 		page_num_t i;
3950 		for (i = 0; i < length; i++) {
3951 			uint32 pageState = sPages[start + i].State();
3952 			if (pageState != PAGE_STATE_FREE
3953 				&& pageState != PAGE_STATE_CLEAR
3954 				&& (pageState != PAGE_STATE_CACHED || useCached == 0)) {
3955 				foundRun = false;
3956 				break;
3957 			}
3958 		}
3959 
3960 		if (foundRun) {
3961 			i = allocate_page_run(start, length, flags, freeClearQueueLocker);
3962 			if (i == length)
3963 				return &sPages[start];
3964 
3965 			// apparently a cached page couldn't be allocated -- skip it and
3966 			// continue
3967 			freeClearQueueLocker.Lock();
3968 		}
3969 
3970 		start += i + 1;
3971 	}
3972 }
3973 
3974 
3975 vm_page *
3976 vm_page_at_index(int32 index)
3977 {
3978 	return &sPages[index];
3979 }
3980 
3981 
3982 vm_page *
3983 vm_lookup_page(page_num_t pageNumber)
3984 {
3985 	if (pageNumber < sPhysicalPageOffset)
3986 		return NULL;
3987 
3988 	pageNumber -= sPhysicalPageOffset;
3989 	if (pageNumber >= sNumPages)
3990 		return NULL;
3991 
3992 	return &sPages[pageNumber];
3993 }
3994 
3995 
3996 bool
3997 vm_page_is_dummy(struct vm_page *page)
3998 {
3999 	return page < sPages || page >= sPages + sNumPages;
4000 }
4001 
4002 
4003 /*!	Free the page that belonged to a certain cache.
4004 	You can use vm_page_set_state() manually if you prefer, but only
4005 	if the page does not equal PAGE_STATE_MODIFIED.
4006 
4007 	\param cache The cache the page was previously owned by or NULL. The page
4008 		must have been removed from its cache before calling this method in
4009 		either case.
4010 	\param page The page to free.
4011 	\param reservation If not NULL, the page count of the reservation will be
4012 		incremented, thus allowing to allocate another page for the freed one at
4013 		a later time.
4014 */
4015 void
4016 vm_page_free_etc(VMCache* cache, vm_page* page,
4017 	vm_page_reservation* reservation)
4018 {
4019 	PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
4020 		&& page->State() != PAGE_STATE_CLEAR);
4021 
4022 	if (page->State() == PAGE_STATE_MODIFIED && cache->temporary)
4023 		atomic_add(&sModifiedTemporaryPages, -1);
4024 
4025 	free_page(page, false);
4026 	if (reservation == NULL)
4027 		unreserve_pages(1);
4028 }
4029 
4030 
4031 void
4032 vm_page_set_state(vm_page *page, int pageState)
4033 {
4034 	PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
4035 		&& page->State() != PAGE_STATE_CLEAR);
4036 
4037 	if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) {
4038 		free_page(page, pageState == PAGE_STATE_CLEAR);
4039 		unreserve_pages(1);
4040 	} else
4041 		set_page_state(page, pageState);
4042 }
4043 
4044 
4045 /*!	Moves a page to either the tail of the head of its current queue,
4046 	depending on \a tail.
4047 	The page must have a cache and the cache must be locked!
4048 */
4049 void
4050 vm_page_requeue(struct vm_page *page, bool tail)
4051 {
4052 	PAGE_ASSERT(page, page->Cache() != NULL);
4053 	page->Cache()->AssertLocked();
4054 	// DEBUG_PAGE_ACCESS_CHECK(page);
4055 		// TODO: This assertion cannot be satisfied by idle_scan_active_pages()
4056 		// when it requeues busy pages. The reason is that vm_soft_fault()
4057 		// (respectively fault_get_page()) and the file cache keep newly
4058 		// allocated pages accessed while they are reading them from disk. It
4059 		// would probably be better to change that code and reenable this
4060 		// check.
4061 
4062 	VMPageQueue *queue = NULL;
4063 
4064 	switch (page->State()) {
4065 		case PAGE_STATE_ACTIVE:
4066 			queue = &sActivePageQueue;
4067 			break;
4068 		case PAGE_STATE_INACTIVE:
4069 			queue = &sInactivePageQueue;
4070 			break;
4071 		case PAGE_STATE_MODIFIED:
4072 			queue = &sModifiedPageQueue;
4073 			break;
4074 		case PAGE_STATE_CACHED:
4075 			queue = &sCachedPageQueue;
4076 			break;
4077 		case PAGE_STATE_FREE:
4078 		case PAGE_STATE_CLEAR:
4079 			panic("vm_page_requeue() called for free/clear page %p", page);
4080 			return;
4081 		case PAGE_STATE_WIRED:
4082 		case PAGE_STATE_UNUSED:
4083 			return;
4084 		default:
4085 			panic("vm_page_touch: vm_page %p in invalid state %d\n",
4086 				page, page->State());
4087 			break;
4088 	}
4089 
4090 	queue->RequeueUnlocked(page, tail);
4091 }
4092 
4093 
4094 page_num_t
4095 vm_page_num_pages(void)
4096 {
4097 	return sNumPages - sNonExistingPages;
4098 }
4099 
4100 
4101 /*! There is a subtle distinction between the page counts returned by
4102 	this function and vm_page_num_free_pages():
4103 	The latter returns the number of pages that are completely uncommitted,
4104 	whereas this one returns the number of pages that are available for
4105 	use by being reclaimed as well (IOW it factors in things like cache pages
4106 	as available).
4107 */
4108 page_num_t
4109 vm_page_num_available_pages(void)
4110 {
4111 	return vm_available_memory() / B_PAGE_SIZE;
4112 }
4113 
4114 
4115 page_num_t
4116 vm_page_num_free_pages(void)
4117 {
4118 	int32 count = sUnreservedFreePages + sCachedPageQueue.Count();
4119 	return count > 0 ? count : 0;
4120 }
4121 
4122 
4123 page_num_t
4124 vm_page_num_unused_pages(void)
4125 {
4126 	int32 count = sUnreservedFreePages;
4127 	return count > 0 ? count : 0;
4128 }
4129 
4130 
4131 void
4132 vm_page_get_stats(system_info *info)
4133 {
4134 	// Note: there's no locking protecting any of the queues or counters here,
4135 	// so we run the risk of getting bogus values when evaluating them
4136 	// throughout this function. As these stats are for informational purposes
4137 	// only, it is not really worth introducing such locking. Therefore we just
4138 	// ensure that we don't under- or overflow any of the values.
4139 
4140 	// The pages used for the block cache buffers. Those should not be counted
4141 	// as used but as cached pages.
4142 	// TODO: We should subtract the blocks that are in use ATM, since those
4143 	// can't really be freed in a low memory situation.
4144 	page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE;
4145 	info->block_cache_pages = blockCachePages;
4146 
4147 	// Non-temporary modified pages are special as they represent pages that
4148 	// can be written back, so they could be freed if necessary, for us
4149 	// basically making them into cached pages with a higher overhead. The
4150 	// modified queue count is therefore split into temporary and non-temporary
4151 	// counts that are then added to the corresponding number.
4152 	page_num_t modifiedNonTemporaryPages
4153 		= (sModifiedPageQueue.Count() - sModifiedTemporaryPages);
4154 
4155 	info->max_pages = vm_page_num_pages();
4156 	info->cached_pages = sCachedPageQueue.Count() + modifiedNonTemporaryPages
4157 		+ blockCachePages;
4158 
4159 	// max_pages is composed of:
4160 	//	active + inactive + unused + wired + modified + cached + free + clear
4161 	// So taking out the cached (including modified non-temporary), free and
4162 	// clear ones leaves us with all used pages.
4163 	uint32 subtractPages = info->cached_pages + sFreePageQueue.Count()
4164 		+ sClearPageQueue.Count();
4165 	info->used_pages = subtractPages > info->max_pages
4166 		? 0 : info->max_pages - subtractPages;
4167 
4168 	if (info->used_pages + info->cached_pages > info->max_pages) {
4169 		// Something was shuffled around while we were summing up the counts.
4170 		// Make the values sane, preferring the worse case of more used pages.
4171 		info->cached_pages = info->max_pages - info->used_pages;
4172 	}
4173 
4174 	info->page_faults = vm_num_page_faults();
4175 	info->ignored_pages = sIgnoredPages;
4176 
4177 	// TODO: We don't consider pages used for page directories/tables yet.
4178 }
4179 
4180 
4181 /*!	Returns the greatest address within the last page of accessible physical
4182 	memory.
4183 	The value is inclusive, i.e. in case of a 32 bit phys_addr_t 0xffffffff
4184 	means the that the last page ends at exactly 4 GB.
4185 */
4186 phys_addr_t
4187 vm_page_max_address()
4188 {
4189 	return ((phys_addr_t)sPhysicalPageOffset + sNumPages) * B_PAGE_SIZE - 1;
4190 }
4191 
4192 
4193 RANGE_MARKER_FUNCTION_END(vm_page)
4194