xref: /haiku/src/system/kernel/vm/vm_page.cpp (revision 04a0e9c7b68cbe3a43d38e2bca8e860fd80936fb)
1 /*
2  * Copyright 2010-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <string.h>
12 #include <stdlib.h>
13 
14 #include <algorithm>
15 
16 #include <KernelExport.h>
17 #include <OS.h>
18 
19 #include <AutoDeleter.h>
20 
21 #include <arch/cpu.h>
22 #include <arch/vm_translation_map.h>
23 #include <block_cache.h>
24 #include <boot/kernel_args.h>
25 #include <condition_variable.h>
26 #include <elf.h>
27 #include <heap.h>
28 #include <kernel.h>
29 #include <low_resource_manager.h>
30 #include <thread.h>
31 #include <tracing.h>
32 #include <util/AutoLock.h>
33 #include <vfs.h>
34 #include <vm/vm.h>
35 #include <vm/vm_priv.h>
36 #include <vm/vm_page.h>
37 #include <vm/VMAddressSpace.h>
38 #include <vm/VMArea.h>
39 #include <vm/VMCache.h>
40 
41 #include "IORequest.h"
42 #include "PageCacheLocker.h"
43 #include "VMAnonymousCache.h"
44 #include "VMPageQueue.h"
45 
46 
47 //#define TRACE_VM_PAGE
48 #ifdef TRACE_VM_PAGE
49 #	define TRACE(x) dprintf x
50 #else
51 #	define TRACE(x) ;
52 #endif
53 
54 //#define TRACE_VM_DAEMONS
55 #ifdef TRACE_VM_DAEMONS
56 #define TRACE_DAEMON(x...) dprintf(x)
57 #else
58 #define TRACE_DAEMON(x...) do {} while (false)
59 #endif
60 
61 //#define TRACK_PAGE_USAGE_STATS	1
62 
63 #define PAGE_ASSERT(page, condition)	\
64 	ASSERT_PRINT((condition), "page: %p", (page))
65 
66 #define SCRUB_SIZE 16
67 	// this many pages will be cleared at once in the page scrubber thread
68 
69 #define MAX_PAGE_WRITER_IO_PRIORITY				B_URGENT_DISPLAY_PRIORITY
70 	// maximum I/O priority of the page writer
71 #define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD	10000
72 	// the maximum I/O priority shall be reached when this many pages need to
73 	// be written
74 
75 
76 // The page reserve an allocation of the certain priority must not touch.
77 static const size_t kPageReserveForPriority[] = {
78 	VM_PAGE_RESERVE_USER,		// user
79 	VM_PAGE_RESERVE_SYSTEM,		// system
80 	0							// VIP
81 };
82 
83 // Minimum number of free pages the page daemon will try to achieve.
84 static uint32 sFreePagesTarget;
85 static uint32 sFreeOrCachedPagesTarget;
86 static uint32 sInactivePagesTarget;
87 
88 // Wait interval between page daemon runs.
89 static const bigtime_t kIdleScanWaitInterval = 1000000LL;	// 1 sec
90 static const bigtime_t kBusyScanWaitInterval = 500000LL;	// 0.5 sec
91 
92 // Number of idle runs after which we want to have processed the full active
93 // queue.
94 static const uint32 kIdleRunsForFullQueue = 20;
95 
96 // Maximum limit for the vm_page::usage_count.
97 static const int32 kPageUsageMax = 64;
98 // vm_page::usage_count buff an accessed page receives in a scan.
99 static const int32 kPageUsageAdvance = 3;
100 // vm_page::usage_count debuff an unaccessed page receives in a scan.
101 static const int32 kPageUsageDecline = 1;
102 
103 int32 gMappedPagesCount;
104 
105 static VMPageQueue sPageQueues[PAGE_STATE_COUNT];
106 
107 static VMPageQueue& sFreePageQueue = sPageQueues[PAGE_STATE_FREE];
108 static VMPageQueue& sClearPageQueue = sPageQueues[PAGE_STATE_CLEAR];
109 static VMPageQueue& sModifiedPageQueue = sPageQueues[PAGE_STATE_MODIFIED];
110 static VMPageQueue& sInactivePageQueue = sPageQueues[PAGE_STATE_INACTIVE];
111 static VMPageQueue& sActivePageQueue = sPageQueues[PAGE_STATE_ACTIVE];
112 static VMPageQueue& sCachedPageQueue = sPageQueues[PAGE_STATE_CACHED];
113 
114 static vm_page *sPages;
115 static page_num_t sPhysicalPageOffset;
116 static page_num_t sNumPages;
117 static page_num_t sNonExistingPages;
118 	// pages in the sPages array that aren't backed by physical memory
119 static uint64 sIgnoredPages;
120 	// pages of physical memory ignored by the boot loader (and thus not
121 	// available here)
122 static vint32 sUnreservedFreePages;
123 static vint32 sUnsatisfiedPageReservations;
124 static vint32 sModifiedTemporaryPages;
125 
126 static ConditionVariable sFreePageCondition;
127 static mutex sPageDeficitLock = MUTEX_INITIALIZER("page deficit");
128 
129 // This lock must be used whenever the free or clear page queues are changed.
130 // If you need to work on both queues at the same time, you need to hold a write
131 // lock, otherwise, a read lock suffices (each queue still has a spinlock to
132 // guard against concurrent changes).
133 static rw_lock sFreePageQueuesLock
134 	= RW_LOCK_INITIALIZER("free/clear page queues");
135 
136 #ifdef TRACK_PAGE_USAGE_STATS
137 static page_num_t sPageUsageArrays[512];
138 static page_num_t* sPageUsage = sPageUsageArrays;
139 static page_num_t sPageUsagePageCount;
140 static page_num_t* sNextPageUsage = sPageUsageArrays + 256;
141 static page_num_t sNextPageUsagePageCount;
142 #endif
143 
144 
145 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
146 
147 struct caller_info {
148 	addr_t		caller;
149 	size_t		count;
150 };
151 
152 static const int32 kCallerInfoTableSize = 1024;
153 static caller_info sCallerInfoTable[kCallerInfoTableSize];
154 static int32 sCallerInfoCount = 0;
155 
156 static caller_info* get_caller_info(addr_t caller);
157 
158 
159 RANGE_MARKER_FUNCTION_PROTOTYPES(vm_page)
160 
161 static const addr_t kVMPageCodeAddressRange[] = {
162 	RANGE_MARKER_FUNCTION_ADDRESS_RANGE(vm_page)
163 };
164 
165 #endif
166 
167 
168 RANGE_MARKER_FUNCTION_BEGIN(vm_page)
169 
170 
171 struct page_stats {
172 	int32	totalFreePages;
173 	int32	unsatisfiedReservations;
174 	int32	cachedPages;
175 };
176 
177 
178 struct PageReservationWaiter
179 		: public DoublyLinkedListLinkImpl<PageReservationWaiter> {
180 	Thread*	thread;
181 	uint32	dontTouch;		// reserve not to touch
182 	uint32	missing;		// pages missing for the reservation
183 	int32	threadPriority;
184 
185 	bool operator<(const PageReservationWaiter& other) const
186 	{
187 		// Implies an order by descending VM priority (ascending dontTouch)
188 		// and (secondarily) descending thread priority.
189 		if (dontTouch != other.dontTouch)
190 			return dontTouch < other.dontTouch;
191 		return threadPriority > other.threadPriority;
192 	}
193 };
194 
195 typedef DoublyLinkedList<PageReservationWaiter> PageReservationWaiterList;
196 static PageReservationWaiterList sPageReservationWaiters;
197 
198 
199 struct DaemonCondition {
200 	void Init(const char* name)
201 	{
202 		mutex_init(&fLock, "daemon condition");
203 		fCondition.Init(this, name);
204 		fActivated = false;
205 	}
206 
207 	bool Lock()
208 	{
209 		return mutex_lock(&fLock) == B_OK;
210 	}
211 
212 	void Unlock()
213 	{
214 		mutex_unlock(&fLock);
215 	}
216 
217 	bool Wait(bigtime_t timeout, bool clearActivated)
218 	{
219 		MutexLocker locker(fLock);
220 		if (clearActivated)
221 			fActivated = false;
222 		else if (fActivated)
223 			return true;
224 
225 		ConditionVariableEntry entry;
226 		fCondition.Add(&entry);
227 
228 		locker.Unlock();
229 
230 		return entry.Wait(B_RELATIVE_TIMEOUT, timeout) == B_OK;
231 	}
232 
233 	void WakeUp()
234 	{
235 		if (fActivated)
236 			return;
237 
238 		MutexLocker locker(fLock);
239 		fActivated = true;
240 		fCondition.NotifyOne();
241 	}
242 
243 	void ClearActivated()
244 	{
245 		MutexLocker locker(fLock);
246 		fActivated = false;
247 	}
248 
249 private:
250 	mutex				fLock;
251 	ConditionVariable	fCondition;
252 	bool				fActivated;
253 };
254 
255 
256 static DaemonCondition sPageWriterCondition;
257 static DaemonCondition sPageDaemonCondition;
258 
259 
260 #if PAGE_ALLOCATION_TRACING
261 
262 namespace PageAllocationTracing {
263 
264 class ReservePages : public AbstractTraceEntry {
265 public:
266 	ReservePages(uint32 count)
267 		:
268 		fCount(count)
269 	{
270 		Initialized();
271 	}
272 
273 	virtual void AddDump(TraceOutput& out)
274 	{
275 		out.Print("page reserve:   %" B_PRIu32, fCount);
276 	}
277 
278 private:
279 	uint32		fCount;
280 };
281 
282 
283 class UnreservePages : public AbstractTraceEntry {
284 public:
285 	UnreservePages(uint32 count)
286 		:
287 		fCount(count)
288 	{
289 		Initialized();
290 	}
291 
292 	virtual void AddDump(TraceOutput& out)
293 	{
294 		out.Print("page unreserve: %" B_PRId32, fCount);
295 	}
296 
297 private:
298 	uint32		fCount;
299 };
300 
301 
302 class AllocatePage
303 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
304 public:
305 	AllocatePage(page_num_t pageNumber)
306 		:
307 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
308 		fPageNumber(pageNumber)
309 	{
310 		Initialized();
311 	}
312 
313 	virtual void AddDump(TraceOutput& out)
314 	{
315 		out.Print("page alloc: %#" B_PRIxPHYSADDR, fPageNumber);
316 	}
317 
318 private:
319 	page_num_t	fPageNumber;
320 };
321 
322 
323 class AllocatePageRun
324 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
325 public:
326 	AllocatePageRun(page_num_t startPage, uint32 length)
327 		:
328 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
329 		fStartPage(startPage),
330 		fLength(length)
331 	{
332 		Initialized();
333 	}
334 
335 	virtual void AddDump(TraceOutput& out)
336 	{
337 		out.Print("page alloc run: start %#" B_PRIxPHYSADDR " length: %"
338 			B_PRIu32, fStartPage, fLength);
339 	}
340 
341 private:
342 	page_num_t	fStartPage;
343 	uint32		fLength;
344 };
345 
346 
347 class FreePage
348 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
349 public:
350 	FreePage(page_num_t pageNumber)
351 		:
352 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
353 		fPageNumber(pageNumber)
354 	{
355 		Initialized();
356 	}
357 
358 	virtual void AddDump(TraceOutput& out)
359 	{
360 		out.Print("page free: %#" B_PRIxPHYSADDR, fPageNumber);
361 	}
362 
363 private:
364 	page_num_t	fPageNumber;
365 };
366 
367 
368 class ScrubbingPages : public AbstractTraceEntry {
369 public:
370 	ScrubbingPages(uint32 count)
371 		:
372 		fCount(count)
373 	{
374 		Initialized();
375 	}
376 
377 	virtual void AddDump(TraceOutput& out)
378 	{
379 		out.Print("page scrubbing: %" B_PRId32, fCount);
380 	}
381 
382 private:
383 	uint32		fCount;
384 };
385 
386 
387 class ScrubbedPages : public AbstractTraceEntry {
388 public:
389 	ScrubbedPages(uint32 count)
390 		:
391 		fCount(count)
392 	{
393 		Initialized();
394 	}
395 
396 	virtual void AddDump(TraceOutput& out)
397 	{
398 		out.Print("page scrubbed:  %" B_PRId32, fCount);
399 	}
400 
401 private:
402 	uint32		fCount;
403 };
404 
405 
406 class StolenPage : public AbstractTraceEntry {
407 public:
408 	StolenPage()
409 	{
410 		Initialized();
411 	}
412 
413 	virtual void AddDump(TraceOutput& out)
414 	{
415 		out.Print("page stolen");
416 	}
417 };
418 
419 }	// namespace PageAllocationTracing
420 
421 #	define TA(x)	new(std::nothrow) PageAllocationTracing::x
422 
423 #else
424 #	define TA(x)
425 #endif	// PAGE_ALLOCATION_TRACING
426 
427 
428 #if PAGE_DAEMON_TRACING
429 
430 namespace PageDaemonTracing {
431 
432 class ActivatePage : public AbstractTraceEntry {
433 	public:
434 		ActivatePage(vm_page* page)
435 			:
436 			fCache(page->cache),
437 			fPage(page)
438 		{
439 			Initialized();
440 		}
441 
442 		virtual void AddDump(TraceOutput& out)
443 		{
444 			out.Print("page activated:   %p, cache: %p", fPage, fCache);
445 		}
446 
447 	private:
448 		VMCache*	fCache;
449 		vm_page*	fPage;
450 };
451 
452 
453 class DeactivatePage : public AbstractTraceEntry {
454 	public:
455 		DeactivatePage(vm_page* page)
456 			:
457 			fCache(page->cache),
458 			fPage(page)
459 		{
460 			Initialized();
461 		}
462 
463 		virtual void AddDump(TraceOutput& out)
464 		{
465 			out.Print("page deactivated: %p, cache: %p", fPage, fCache);
466 		}
467 
468 	private:
469 		VMCache*	fCache;
470 		vm_page*	fPage;
471 };
472 
473 
474 class FreedPageSwap : public AbstractTraceEntry {
475 	public:
476 		FreedPageSwap(vm_page* page)
477 			:
478 			fCache(page->cache),
479 			fPage(page)
480 		{
481 			Initialized();
482 		}
483 
484 		virtual void AddDump(TraceOutput& out)
485 		{
486 			out.Print("page swap freed:  %p, cache: %p", fPage, fCache);
487 		}
488 
489 	private:
490 		VMCache*	fCache;
491 		vm_page*	fPage;
492 };
493 
494 }	// namespace PageDaemonTracing
495 
496 #	define TD(x)	new(std::nothrow) PageDaemonTracing::x
497 
498 #else
499 #	define TD(x)
500 #endif	// PAGE_DAEMON_TRACING
501 
502 
503 #if PAGE_WRITER_TRACING
504 
505 namespace PageWriterTracing {
506 
507 class WritePage : public AbstractTraceEntry {
508 	public:
509 		WritePage(vm_page* page)
510 			:
511 			fCache(page->Cache()),
512 			fPage(page)
513 		{
514 			Initialized();
515 		}
516 
517 		virtual void AddDump(TraceOutput& out)
518 		{
519 			out.Print("page write: %p, cache: %p", fPage, fCache);
520 		}
521 
522 	private:
523 		VMCache*	fCache;
524 		vm_page*	fPage;
525 };
526 
527 }	// namespace PageWriterTracing
528 
529 #	define TPW(x)	new(std::nothrow) PageWriterTracing::x
530 
531 #else
532 #	define TPW(x)
533 #endif	// PAGE_WRITER_TRACING
534 
535 
536 #if PAGE_STATE_TRACING
537 
538 namespace PageStateTracing {
539 
540 class SetPageState : public AbstractTraceEntry {
541 	public:
542 		SetPageState(vm_page* page, uint8 newState)
543 			:
544 			fPage(page),
545 			fOldState(page->State()),
546 			fNewState(newState),
547 			fBusy(page->busy),
548 			fWired(page->WiredCount() > 0),
549 			fMapped(!page->mappings.IsEmpty()),
550 			fAccessed(page->accessed),
551 			fModified(page->modified)
552 		{
553 #if PAGE_STATE_TRACING_STACK_TRACE
554 			fStackTrace = capture_tracing_stack_trace(
555 				PAGE_STATE_TRACING_STACK_TRACE, 0, true);
556 				// Don't capture userland stack trace to avoid potential
557 				// deadlocks.
558 #endif
559 			Initialized();
560 		}
561 
562 #if PAGE_STATE_TRACING_STACK_TRACE
563 		virtual void DumpStackTrace(TraceOutput& out)
564 		{
565 			out.PrintStackTrace(fStackTrace);
566 		}
567 #endif
568 
569 		virtual void AddDump(TraceOutput& out)
570 		{
571 			out.Print("page set state: %p (%c%c%c%c%c): %s -> %s", fPage,
572 				fBusy ? 'b' : '-',
573 				fWired ? 'w' : '-',
574 				fMapped ? 'm' : '-',
575 				fAccessed ? 'a' : '-',
576 				fModified ? 'm' : '-',
577 				page_state_to_string(fOldState),
578 				page_state_to_string(fNewState));
579 		}
580 
581 	private:
582 		vm_page*	fPage;
583 #if PAGE_STATE_TRACING_STACK_TRACE
584 		tracing_stack_trace* fStackTrace;
585 #endif
586 		uint8		fOldState;
587 		uint8		fNewState;
588 		bool		fBusy : 1;
589 		bool		fWired : 1;
590 		bool		fMapped : 1;
591 		bool		fAccessed : 1;
592 		bool		fModified : 1;
593 };
594 
595 }	// namespace PageStateTracing
596 
597 #	define TPS(x)	new(std::nothrow) PageStateTracing::x
598 
599 #else
600 #	define TPS(x)
601 #endif	// PAGE_STATE_TRACING
602 
603 
604 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
605 
606 namespace BKernel {
607 
608 class AllocationTrackingCallback {
609 public:
610 	virtual						~AllocationTrackingCallback();
611 
612 	virtual	bool				ProcessTrackingInfo(
613 									AllocationTrackingInfo* info,
614 									page_num_t pageNumber) = 0;
615 };
616 
617 }
618 
619 using BKernel::AllocationTrackingCallback;
620 
621 
622 class AllocationCollectorCallback : public AllocationTrackingCallback {
623 public:
624 	AllocationCollectorCallback(bool resetInfos)
625 		:
626 		fResetInfos(resetInfos)
627 	{
628 	}
629 
630 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
631 		page_num_t pageNumber)
632 	{
633 		if (!info->IsInitialized())
634 			return true;
635 
636 		addr_t caller = 0;
637 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
638 
639 		if (traceEntry != NULL && info->IsTraceEntryValid()) {
640 			caller = tracing_find_caller_in_stack_trace(
641 				traceEntry->StackTrace(), kVMPageCodeAddressRange, 1);
642 		}
643 
644 		caller_info* callerInfo = get_caller_info(caller);
645 		if (callerInfo == NULL) {
646 			kprintf("out of space for caller infos\n");
647 			return false;
648 		}
649 
650 		callerInfo->count++;
651 
652 		if (fResetInfos)
653 			info->Clear();
654 
655 		return true;
656 	}
657 
658 private:
659 	bool	fResetInfos;
660 };
661 
662 
663 class AllocationInfoPrinterCallback : public AllocationTrackingCallback {
664 public:
665 	AllocationInfoPrinterCallback(bool printStackTrace, page_num_t pageFilter,
666 		team_id teamFilter, thread_id threadFilter)
667 		:
668 		fPrintStackTrace(printStackTrace),
669 		fPageFilter(pageFilter),
670 		fTeamFilter(teamFilter),
671 		fThreadFilter(threadFilter)
672 	{
673 	}
674 
675 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
676 		page_num_t pageNumber)
677 	{
678 		if (!info->IsInitialized())
679 			return true;
680 
681 		if (fPageFilter != 0 && pageNumber != fPageFilter)
682 			return true;
683 
684 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
685 		if (traceEntry != NULL && !info->IsTraceEntryValid())
686 			traceEntry = NULL;
687 
688 		if (traceEntry != NULL) {
689 			if (fTeamFilter != -1 && traceEntry->TeamID() != fTeamFilter)
690 				return true;
691 			if (fThreadFilter != -1 && traceEntry->ThreadID() != fThreadFilter)
692 				return true;
693 		} else {
694 			// we need the info if we have filters set
695 			if (fTeamFilter != -1 || fThreadFilter != -1)
696 				return true;
697 		}
698 
699 		kprintf("page number %#" B_PRIxPHYSADDR, pageNumber);
700 
701 		if (traceEntry != NULL) {
702 			kprintf(", team: %" B_PRId32 ", thread %" B_PRId32
703 				", time %" B_PRId64 "\n", traceEntry->TeamID(),
704 				traceEntry->ThreadID(), traceEntry->Time());
705 
706 			if (fPrintStackTrace)
707 				tracing_print_stack_trace(traceEntry->StackTrace());
708 		} else
709 			kprintf("\n");
710 
711 		return true;
712 	}
713 
714 private:
715 	bool		fPrintStackTrace;
716 	page_num_t	fPageFilter;
717 	team_id		fTeamFilter;
718 	thread_id	fThreadFilter;
719 };
720 
721 
722 class AllocationDetailPrinterCallback : public AllocationTrackingCallback {
723 public:
724 	AllocationDetailPrinterCallback(addr_t caller)
725 		:
726 		fCaller(caller)
727 	{
728 	}
729 
730 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
731 		page_num_t pageNumber)
732 	{
733 		if (!info->IsInitialized())
734 			return true;
735 
736 		addr_t caller = 0;
737 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
738 		if (traceEntry != NULL && !info->IsTraceEntryValid())
739 			traceEntry = NULL;
740 
741 		if (traceEntry != NULL) {
742 			caller = tracing_find_caller_in_stack_trace(
743 				traceEntry->StackTrace(), kVMPageCodeAddressRange, 1);
744 		}
745 
746 		if (caller != fCaller)
747 			return true;
748 
749 		kprintf("page %#" B_PRIxPHYSADDR "\n", pageNumber);
750 		if (traceEntry != NULL)
751 			tracing_print_stack_trace(traceEntry->StackTrace());
752 
753 		return true;
754 	}
755 
756 private:
757 	addr_t	fCaller;
758 };
759 
760 #endif	// VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
761 
762 
763 static int
764 find_page(int argc, char **argv)
765 {
766 	struct vm_page *page;
767 	addr_t address;
768 	int32 index = 1;
769 	int i;
770 
771 	struct {
772 		const char*	name;
773 		VMPageQueue*	queue;
774 	} pageQueueInfos[] = {
775 		{ "free",		&sFreePageQueue },
776 		{ "clear",		&sClearPageQueue },
777 		{ "modified",	&sModifiedPageQueue },
778 		{ "active",		&sActivePageQueue },
779 		{ "inactive",	&sInactivePageQueue },
780 		{ "cached",		&sCachedPageQueue },
781 		{ NULL, NULL }
782 	};
783 
784 	if (argc < 2
785 		|| strlen(argv[index]) <= 2
786 		|| argv[index][0] != '0'
787 		|| argv[index][1] != 'x') {
788 		kprintf("usage: find_page <address>\n");
789 		return 0;
790 	}
791 
792 	address = strtoul(argv[index], NULL, 0);
793 	page = (vm_page*)address;
794 
795 	for (i = 0; pageQueueInfos[i].name; i++) {
796 		VMPageQueue::Iterator it = pageQueueInfos[i].queue->GetIterator();
797 		while (vm_page* p = it.Next()) {
798 			if (p == page) {
799 				kprintf("found page %p in queue %p (%s)\n", page,
800 					pageQueueInfos[i].queue, pageQueueInfos[i].name);
801 				return 0;
802 			}
803 		}
804 	}
805 
806 	kprintf("page %p isn't in any queue\n", page);
807 
808 	return 0;
809 }
810 
811 
812 const char *
813 page_state_to_string(int state)
814 {
815 	switch(state) {
816 		case PAGE_STATE_ACTIVE:
817 			return "active";
818 		case PAGE_STATE_INACTIVE:
819 			return "inactive";
820 		case PAGE_STATE_MODIFIED:
821 			return "modified";
822 		case PAGE_STATE_CACHED:
823 			return "cached";
824 		case PAGE_STATE_FREE:
825 			return "free";
826 		case PAGE_STATE_CLEAR:
827 			return "clear";
828 		case PAGE_STATE_WIRED:
829 			return "wired";
830 		case PAGE_STATE_UNUSED:
831 			return "unused";
832 		default:
833 			return "unknown";
834 	}
835 }
836 
837 
838 static int
839 dump_page(int argc, char **argv)
840 {
841 	bool addressIsPointer = true;
842 	bool physical = false;
843 	bool searchMappings = false;
844 	int32 index = 1;
845 
846 	while (index < argc) {
847 		if (argv[index][0] != '-')
848 			break;
849 
850 		if (!strcmp(argv[index], "-p")) {
851 			addressIsPointer = false;
852 			physical = true;
853 		} else if (!strcmp(argv[index], "-v")) {
854 			addressIsPointer = false;
855 		} else if (!strcmp(argv[index], "-m")) {
856 			searchMappings = true;
857 		} else {
858 			print_debugger_command_usage(argv[0]);
859 			return 0;
860 		}
861 
862 		index++;
863 	}
864 
865 	if (index + 1 != argc) {
866 		print_debugger_command_usage(argv[0]);
867 		return 0;
868 	}
869 
870 	uint64 value;
871 	if (!evaluate_debug_expression(argv[index], &value, false))
872 		return 0;
873 
874 	uint64 pageAddress = value;
875 	struct vm_page* page;
876 
877 	if (addressIsPointer) {
878 		page = (struct vm_page *)(addr_t)pageAddress;
879 	} else {
880 		if (!physical) {
881 			VMAddressSpace *addressSpace = VMAddressSpace::Kernel();
882 
883 			if (debug_get_debugged_thread()->team->address_space != NULL)
884 				addressSpace = debug_get_debugged_thread()->team->address_space;
885 
886 			uint32 flags = 0;
887 			phys_addr_t physicalAddress;
888 			if (addressSpace->TranslationMap()->QueryInterrupt(pageAddress,
889 					&physicalAddress, &flags) != B_OK
890 				|| (flags & PAGE_PRESENT) == 0) {
891 				kprintf("Virtual address not mapped to a physical page in this "
892 					"address space.\n");
893 				return 0;
894 			}
895 			pageAddress = physicalAddress;
896 		}
897 
898 		page = vm_lookup_page(pageAddress / B_PAGE_SIZE);
899 	}
900 
901 	kprintf("PAGE: %p\n", page);
902 	kprintf("queue_next,prev: %p, %p\n", page->queue_link.next,
903 		page->queue_link.previous);
904 	kprintf("physical_number: %#" B_PRIxPHYSADDR "\n",
905 		page->physical_page_number);
906 	kprintf("cache:           %p\n", page->Cache());
907 	kprintf("cache_offset:    %" B_PRIuPHYSADDR "\n", page->cache_offset);
908 	kprintf("cache_next:      %p\n", page->cache_next);
909 	kprintf("state:           %s\n", page_state_to_string(page->State()));
910 	kprintf("wired_count:     %d\n", page->WiredCount());
911 	kprintf("usage_count:     %d\n", page->usage_count);
912 	kprintf("busy:            %d\n", page->busy);
913 	kprintf("busy_writing:    %d\n", page->busy_writing);
914 	kprintf("accessed:        %d\n", page->accessed);
915 	kprintf("modified:        %d\n", page->modified);
916 	#if DEBUG_PAGE_QUEUE
917 		kprintf("queue:           %p\n", page->queue);
918 	#endif
919 	#if DEBUG_PAGE_ACCESS
920 		kprintf("accessor:        %" B_PRId32 "\n", page->accessing_thread);
921 	#endif
922 	kprintf("area mappings:\n");
923 
924 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
925 	vm_page_mapping *mapping;
926 	while ((mapping = iterator.Next()) != NULL) {
927 		kprintf("  %p (%" B_PRId32 ")\n", mapping->area, mapping->area->id);
928 		mapping = mapping->page_link.next;
929 	}
930 
931 	if (searchMappings) {
932 		kprintf("all mappings:\n");
933 		VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
934 		while (addressSpace != NULL) {
935 			size_t pageCount = addressSpace->Size() / B_PAGE_SIZE;
936 			for (addr_t address = addressSpace->Base(); pageCount != 0;
937 					address += B_PAGE_SIZE, pageCount--) {
938 				phys_addr_t physicalAddress;
939 				uint32 flags = 0;
940 				if (addressSpace->TranslationMap()->QueryInterrupt(address,
941 						&physicalAddress, &flags) == B_OK
942 					&& (flags & PAGE_PRESENT) != 0
943 					&& physicalAddress / B_PAGE_SIZE
944 						== page->physical_page_number) {
945 					VMArea* area = addressSpace->LookupArea(address);
946 					kprintf("  aspace %" B_PRId32 ", area %" B_PRId32 ": %#"
947 						B_PRIxADDR " (%c%c%s%s)\n", addressSpace->ID(),
948 						area != NULL ? area->id : -1, address,
949 						(flags & B_KERNEL_READ_AREA) != 0 ? 'r' : '-',
950 						(flags & B_KERNEL_WRITE_AREA) != 0 ? 'w' : '-',
951 						(flags & PAGE_MODIFIED) != 0 ? " modified" : "",
952 						(flags & PAGE_ACCESSED) != 0 ? " accessed" : "");
953 				}
954 			}
955 			addressSpace = VMAddressSpace::DebugNext(addressSpace);
956 		}
957 	}
958 
959 	set_debug_variable("_cache", (addr_t)page->Cache());
960 	#if DEBUG_PAGE_ACCESS
961 		set_debug_variable("_accessor", page->accessing_thread);
962 	#endif
963 
964 	return 0;
965 }
966 
967 
968 static int
969 dump_page_queue(int argc, char **argv)
970 {
971 	struct VMPageQueue *queue;
972 
973 	if (argc < 2) {
974 		kprintf("usage: page_queue <address/name> [list]\n");
975 		return 0;
976 	}
977 
978 	if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x')
979 		queue = (VMPageQueue*)strtoul(argv[1], NULL, 16);
980 	if (!strcmp(argv[1], "free"))
981 		queue = &sFreePageQueue;
982 	else if (!strcmp(argv[1], "clear"))
983 		queue = &sClearPageQueue;
984 	else if (!strcmp(argv[1], "modified"))
985 		queue = &sModifiedPageQueue;
986 	else if (!strcmp(argv[1], "active"))
987 		queue = &sActivePageQueue;
988 	else if (!strcmp(argv[1], "inactive"))
989 		queue = &sInactivePageQueue;
990 	else if (!strcmp(argv[1], "cached"))
991 		queue = &sCachedPageQueue;
992 	else {
993 		kprintf("page_queue: unknown queue \"%s\".\n", argv[1]);
994 		return 0;
995 	}
996 
997 	kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %"
998 		B_PRIuPHYSADDR "\n", queue, queue->Head(), queue->Tail(),
999 		queue->Count());
1000 
1001 	if (argc == 3) {
1002 		struct vm_page *page = queue->Head();
1003 
1004 		kprintf("page        cache       type       state  wired  usage\n");
1005 		for (page_num_t i = 0; page; i++, page = queue->Next(page)) {
1006 			kprintf("%p  %p  %-7s %8s  %5d  %5d\n", page, page->Cache(),
1007 				vm_cache_type_to_string(page->Cache()->type),
1008 				page_state_to_string(page->State()),
1009 				page->WiredCount(), page->usage_count);
1010 		}
1011 	}
1012 	return 0;
1013 }
1014 
1015 
1016 static int
1017 dump_page_stats(int argc, char **argv)
1018 {
1019 	page_num_t swappableModified = 0;
1020 	page_num_t swappableModifiedInactive = 0;
1021 
1022 	size_t counter[8];
1023 	size_t busyCounter[8];
1024 	memset(counter, 0, sizeof(counter));
1025 	memset(busyCounter, 0, sizeof(busyCounter));
1026 
1027 	struct page_run {
1028 		page_num_t	start;
1029 		page_num_t	end;
1030 
1031 		page_num_t Length() const	{ return end - start; }
1032 	};
1033 
1034 	page_run currentFreeRun = { 0, 0 };
1035 	page_run currentCachedRun = { 0, 0 };
1036 	page_run longestFreeRun = { 0, 0 };
1037 	page_run longestCachedRun = { 0, 0 };
1038 
1039 	for (page_num_t i = 0; i < sNumPages; i++) {
1040 		if (sPages[i].State() > 7) {
1041 			panic("page %" B_PRIuPHYSADDR " at %p has invalid state!\n", i,
1042 				&sPages[i]);
1043 		}
1044 
1045 		uint32 pageState = sPages[i].State();
1046 
1047 		counter[pageState]++;
1048 		if (sPages[i].busy)
1049 			busyCounter[pageState]++;
1050 
1051 		if (pageState == PAGE_STATE_MODIFIED
1052 			&& sPages[i].Cache() != NULL
1053 			&& sPages[i].Cache()->temporary && sPages[i].WiredCount() == 0) {
1054 			swappableModified++;
1055 			if (sPages[i].usage_count == 0)
1056 				swappableModifiedInactive++;
1057 		}
1058 
1059 		// track free and cached pages runs
1060 		if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) {
1061 			currentFreeRun.end = i + 1;
1062 			currentCachedRun.end = i + 1;
1063 		} else {
1064 			if (currentFreeRun.Length() > longestFreeRun.Length())
1065 				longestFreeRun = currentFreeRun;
1066 			currentFreeRun.start = currentFreeRun.end = i + 1;
1067 
1068 			if (pageState == PAGE_STATE_CACHED) {
1069 				currentCachedRun.end = i + 1;
1070 			} else {
1071 				if (currentCachedRun.Length() > longestCachedRun.Length())
1072 					longestCachedRun = currentCachedRun;
1073 				currentCachedRun.start = currentCachedRun.end = i + 1;
1074 			}
1075 		}
1076 	}
1077 
1078 	kprintf("page stats:\n");
1079 	kprintf("total: %" B_PRIuPHYSADDR "\n", sNumPages);
1080 
1081 	kprintf("active: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1082 		counter[PAGE_STATE_ACTIVE], busyCounter[PAGE_STATE_ACTIVE]);
1083 	kprintf("inactive: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1084 		counter[PAGE_STATE_INACTIVE], busyCounter[PAGE_STATE_INACTIVE]);
1085 	kprintf("cached: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1086 		counter[PAGE_STATE_CACHED], busyCounter[PAGE_STATE_CACHED]);
1087 	kprintf("unused: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1088 		counter[PAGE_STATE_UNUSED], busyCounter[PAGE_STATE_UNUSED]);
1089 	kprintf("wired: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1090 		counter[PAGE_STATE_WIRED], busyCounter[PAGE_STATE_WIRED]);
1091 	kprintf("modified: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1092 		counter[PAGE_STATE_MODIFIED], busyCounter[PAGE_STATE_MODIFIED]);
1093 	kprintf("free: %" B_PRIuSIZE "\n", counter[PAGE_STATE_FREE]);
1094 	kprintf("clear: %" B_PRIuSIZE "\n", counter[PAGE_STATE_CLEAR]);
1095 
1096 	kprintf("unreserved free pages: %" B_PRId32 "\n", sUnreservedFreePages);
1097 	kprintf("unsatisfied page reservations: %" B_PRId32 "\n",
1098 		sUnsatisfiedPageReservations);
1099 	kprintf("mapped pages: %" B_PRId32 "\n", gMappedPagesCount);
1100 	kprintf("longest free pages run: %" B_PRIuPHYSADDR " pages (at %"
1101 		B_PRIuPHYSADDR ")\n", longestFreeRun.Length(),
1102 		sPages[longestFreeRun.start].physical_page_number);
1103 	kprintf("longest free/cached pages run: %" B_PRIuPHYSADDR " pages (at %"
1104 		B_PRIuPHYSADDR ")\n", longestCachedRun.Length(),
1105 		sPages[longestCachedRun.start].physical_page_number);
1106 
1107 	kprintf("waiting threads:\n");
1108 	for (PageReservationWaiterList::Iterator it
1109 			= sPageReservationWaiters.GetIterator();
1110 		PageReservationWaiter* waiter = it.Next();) {
1111 		kprintf("  %6" B_PRId32 ": missing: %6" B_PRIu32
1112 			", don't touch: %6" B_PRIu32 "\n", waiter->thread->id,
1113 			waiter->missing, waiter->dontTouch);
1114 	}
1115 
1116 	kprintf("\nfree queue: %p, count = %" B_PRIuPHYSADDR "\n", &sFreePageQueue,
1117 		sFreePageQueue.Count());
1118 	kprintf("clear queue: %p, count = %" B_PRIuPHYSADDR "\n", &sClearPageQueue,
1119 		sClearPageQueue.Count());
1120 	kprintf("modified queue: %p, count = %" B_PRIuPHYSADDR " (%" B_PRId32
1121 		" temporary, %" B_PRIuPHYSADDR " swappable, " "inactive: %"
1122 		B_PRIuPHYSADDR ")\n", &sModifiedPageQueue, sModifiedPageQueue.Count(),
1123 		sModifiedTemporaryPages, swappableModified, swappableModifiedInactive);
1124 	kprintf("active queue: %p, count = %" B_PRIuPHYSADDR "\n",
1125 		&sActivePageQueue, sActivePageQueue.Count());
1126 	kprintf("inactive queue: %p, count = %" B_PRIuPHYSADDR "\n",
1127 		&sInactivePageQueue, sInactivePageQueue.Count());
1128 	kprintf("cached queue: %p, count = %" B_PRIuPHYSADDR "\n",
1129 		&sCachedPageQueue, sCachedPageQueue.Count());
1130 	return 0;
1131 }
1132 
1133 
1134 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1135 
1136 static caller_info*
1137 get_caller_info(addr_t caller)
1138 {
1139 	// find the caller info
1140 	for (int32 i = 0; i < sCallerInfoCount; i++) {
1141 		if (caller == sCallerInfoTable[i].caller)
1142 			return &sCallerInfoTable[i];
1143 	}
1144 
1145 	// not found, add a new entry, if there are free slots
1146 	if (sCallerInfoCount >= kCallerInfoTableSize)
1147 		return NULL;
1148 
1149 	caller_info* info = &sCallerInfoTable[sCallerInfoCount++];
1150 	info->caller = caller;
1151 	info->count = 0;
1152 
1153 	return info;
1154 }
1155 
1156 
1157 static int
1158 caller_info_compare_count(const void* _a, const void* _b)
1159 {
1160 	const caller_info* a = (const caller_info*)_a;
1161 	const caller_info* b = (const caller_info*)_b;
1162 	return (int)(b->count - a->count);
1163 }
1164 
1165 
1166 static int
1167 dump_page_allocations_per_caller(int argc, char** argv)
1168 {
1169 	bool resetAllocationInfos = false;
1170 	bool printDetails = false;
1171 	addr_t caller = 0;
1172 
1173 	for (int32 i = 1; i < argc; i++) {
1174 		if (strcmp(argv[i], "-d") == 0) {
1175 			uint64 callerAddress;
1176 			if (++i >= argc
1177 				|| !evaluate_debug_expression(argv[i], &callerAddress, true)) {
1178 				print_debugger_command_usage(argv[0]);
1179 				return 0;
1180 			}
1181 
1182 			caller = callerAddress;
1183 			printDetails = true;
1184 		} else if (strcmp(argv[i], "-r") == 0) {
1185 			resetAllocationInfos = true;
1186 		} else {
1187 			print_debugger_command_usage(argv[0]);
1188 			return 0;
1189 		}
1190 	}
1191 
1192 	sCallerInfoCount = 0;
1193 
1194 	AllocationCollectorCallback collectorCallback(resetAllocationInfos);
1195 	AllocationDetailPrinterCallback detailsCallback(caller);
1196 	AllocationTrackingCallback& callback = printDetails
1197 		? (AllocationTrackingCallback&)detailsCallback
1198 		: (AllocationTrackingCallback&)collectorCallback;
1199 
1200 	for (page_num_t i = 0; i < sNumPages; i++)
1201 		callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i);
1202 
1203 	if (printDetails)
1204 		return 0;
1205 
1206 	// sort the array
1207 	qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info),
1208 		&caller_info_compare_count);
1209 
1210 	kprintf("%" B_PRId32 " different callers\n\n", sCallerInfoCount);
1211 
1212 	size_t totalAllocationCount = 0;
1213 
1214 	kprintf("     count      caller\n");
1215 	kprintf("----------------------------------\n");
1216 	for (int32 i = 0; i < sCallerInfoCount; i++) {
1217 		caller_info& info = sCallerInfoTable[i];
1218 		kprintf("%10" B_PRIuSIZE "  %p", info.count, (void*)info.caller);
1219 
1220 		const char* symbol;
1221 		const char* imageName;
1222 		bool exactMatch;
1223 		addr_t baseAddress;
1224 
1225 		if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol,
1226 				&imageName, &exactMatch) == B_OK) {
1227 			kprintf("  %s + %#" B_PRIxADDR " (%s)%s\n", symbol,
1228 				info.caller - baseAddress, imageName,
1229 				exactMatch ? "" : " (nearest)");
1230 		} else
1231 			kprintf("\n");
1232 
1233 		totalAllocationCount += info.count;
1234 	}
1235 
1236 	kprintf("\ntotal page allocations: %" B_PRIuSIZE "\n",
1237 		totalAllocationCount);
1238 
1239 	return 0;
1240 }
1241 
1242 
1243 static int
1244 dump_page_allocation_infos(int argc, char** argv)
1245 {
1246 	page_num_t pageFilter = 0;
1247 	team_id teamFilter = -1;
1248 	thread_id threadFilter = -1;
1249 	bool printStackTraces = false;
1250 
1251 	for (int32 i = 1; i < argc; i++) {
1252 		if (strcmp(argv[i], "--stacktrace") == 0)
1253 			printStackTraces = true;
1254 		else if (strcmp(argv[i], "-p") == 0) {
1255 			uint64 pageNumber;
1256 			if (++i >= argc
1257 				|| !evaluate_debug_expression(argv[i], &pageNumber, true)) {
1258 				print_debugger_command_usage(argv[0]);
1259 				return 0;
1260 			}
1261 
1262 			pageFilter = pageNumber;
1263 		} else if (strcmp(argv[i], "--team") == 0) {
1264 			uint64 team;
1265 			if (++i >= argc
1266 				|| !evaluate_debug_expression(argv[i], &team, true)) {
1267 				print_debugger_command_usage(argv[0]);
1268 				return 0;
1269 			}
1270 
1271 			teamFilter = team;
1272 		} else if (strcmp(argv[i], "--thread") == 0) {
1273 			uint64 thread;
1274 			if (++i >= argc
1275 				|| !evaluate_debug_expression(argv[i], &thread, true)) {
1276 				print_debugger_command_usage(argv[0]);
1277 				return 0;
1278 			}
1279 
1280 			threadFilter = thread;
1281 		} else {
1282 			print_debugger_command_usage(argv[0]);
1283 			return 0;
1284 		}
1285 	}
1286 
1287 	AllocationInfoPrinterCallback callback(printStackTraces, pageFilter,
1288 		teamFilter, threadFilter);
1289 
1290 	for (page_num_t i = 0; i < sNumPages; i++)
1291 		callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i);
1292 
1293 	return 0;
1294 }
1295 
1296 #endif	// VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1297 
1298 
1299 #ifdef TRACK_PAGE_USAGE_STATS
1300 
1301 static void
1302 track_page_usage(vm_page* page)
1303 {
1304 	if (page->WiredCount() == 0) {
1305 		sNextPageUsage[(int32)page->usage_count + 128]++;
1306 		sNextPageUsagePageCount++;
1307 	}
1308 }
1309 
1310 
1311 static void
1312 update_page_usage_stats()
1313 {
1314 	std::swap(sPageUsage, sNextPageUsage);
1315 	sPageUsagePageCount = sNextPageUsagePageCount;
1316 
1317 	memset(sNextPageUsage, 0, sizeof(page_num_t) * 256);
1318 	sNextPageUsagePageCount = 0;
1319 
1320 	// compute average
1321 	if (sPageUsagePageCount > 0) {
1322 		int64 sum = 0;
1323 		for (int32 i = 0; i < 256; i++)
1324 			sum += (int64)sPageUsage[i] * (i - 128);
1325 
1326 		TRACE_DAEMON("average page usage: %f (%lu pages)\n",
1327 			(float)sum / sPageUsagePageCount, sPageUsagePageCount);
1328 	}
1329 }
1330 
1331 
1332 static int
1333 dump_page_usage_stats(int argc, char** argv)
1334 {
1335 	kprintf("distribution of page usage counts (%lu pages):",
1336 		sPageUsagePageCount);
1337 
1338 	int64 sum = 0;
1339 	for (int32 i = 0; i < 256; i++) {
1340 		if (i % 8 == 0)
1341 			kprintf("\n%4ld:", i - 128);
1342 
1343 		int64 count = sPageUsage[i];
1344 		sum += count * (i - 128);
1345 
1346 		kprintf("  %9llu", count);
1347 	}
1348 
1349 	kprintf("\n\n");
1350 
1351 	kprintf("average usage count: %f\n",
1352 		sPageUsagePageCount > 0 ? (float)sum / sPageUsagePageCount : 0);
1353 
1354 	return 0;
1355 }
1356 
1357 #endif	// TRACK_PAGE_USAGE_STATS
1358 
1359 
1360 // #pragma mark - vm_page
1361 
1362 
1363 inline void
1364 vm_page::InitState(uint8 newState)
1365 {
1366 	state = newState;
1367 }
1368 
1369 
1370 inline void
1371 vm_page::SetState(uint8 newState)
1372 {
1373 	TPS(SetPageState(this, newState));
1374 
1375 	state = newState;
1376 }
1377 
1378 
1379 // #pragma mark -
1380 
1381 
1382 static void
1383 get_page_stats(page_stats& _pageStats)
1384 {
1385 	_pageStats.totalFreePages = sUnreservedFreePages;
1386 	_pageStats.cachedPages = sCachedPageQueue.Count();
1387 	_pageStats.unsatisfiedReservations = sUnsatisfiedPageReservations;
1388 	// TODO: We don't get an actual snapshot here!
1389 }
1390 
1391 
1392 static bool
1393 do_active_paging(const page_stats& pageStats)
1394 {
1395 	return pageStats.totalFreePages + pageStats.cachedPages
1396 		< pageStats.unsatisfiedReservations
1397 			+ (int32)sFreeOrCachedPagesTarget;
1398 }
1399 
1400 
1401 /*!	Reserves as many pages as possible from \c sUnreservedFreePages up to
1402 	\a count. Doesn't touch the last \a dontTouch pages of
1403 	\c sUnreservedFreePages, though.
1404 	\return The number of actually reserved pages.
1405 */
1406 static uint32
1407 reserve_some_pages(uint32 count, uint32 dontTouch)
1408 {
1409 	while (true) {
1410 		int32 freePages = sUnreservedFreePages;
1411 		if (freePages <= (int32)dontTouch)
1412 			return 0;
1413 
1414 		int32 toReserve = std::min(count, freePages - dontTouch);
1415 		if (atomic_test_and_set(&sUnreservedFreePages,
1416 					freePages - toReserve, freePages)
1417 				== freePages) {
1418 			return toReserve;
1419 		}
1420 
1421 		// the count changed in the meantime -- retry
1422 	}
1423 }
1424 
1425 
1426 static void
1427 wake_up_page_reservation_waiters()
1428 {
1429 	MutexLocker pageDeficitLocker(sPageDeficitLock);
1430 
1431 	// TODO: If this is a low priority thread, we might want to disable
1432 	// interrupts or otherwise ensure that we aren't unscheduled. Otherwise
1433 	// high priority threads wait be kept waiting while a medium priority thread
1434 	// prevents us from running.
1435 
1436 	while (PageReservationWaiter* waiter = sPageReservationWaiters.Head()) {
1437 		int32 reserved = reserve_some_pages(waiter->missing,
1438 			waiter->dontTouch);
1439 		if (reserved == 0)
1440 			return;
1441 
1442 		atomic_add(&sUnsatisfiedPageReservations, -reserved);
1443 		waiter->missing -= reserved;
1444 
1445 		if (waiter->missing > 0)
1446 			return;
1447 
1448 		sPageReservationWaiters.Remove(waiter);
1449 
1450 		InterruptsSpinLocker schedulerLocker(gSchedulerLock);
1451 		thread_unblock_locked(waiter->thread, B_OK);
1452 	}
1453 }
1454 
1455 
1456 static inline void
1457 unreserve_pages(uint32 count)
1458 {
1459 	atomic_add(&sUnreservedFreePages, count);
1460 	if (sUnsatisfiedPageReservations != 0)
1461 		wake_up_page_reservation_waiters();
1462 }
1463 
1464 
1465 static void
1466 free_page(vm_page* page, bool clear)
1467 {
1468 	DEBUG_PAGE_ACCESS_CHECK(page);
1469 
1470 	PAGE_ASSERT(page, !page->IsMapped());
1471 
1472 	VMPageQueue* fromQueue;
1473 
1474 	switch (page->State()) {
1475 		case PAGE_STATE_ACTIVE:
1476 			fromQueue = &sActivePageQueue;
1477 			break;
1478 		case PAGE_STATE_INACTIVE:
1479 			fromQueue = &sInactivePageQueue;
1480 			break;
1481 		case PAGE_STATE_MODIFIED:
1482 			fromQueue = &sModifiedPageQueue;
1483 			break;
1484 		case PAGE_STATE_CACHED:
1485 			fromQueue = &sCachedPageQueue;
1486 			break;
1487 		case PAGE_STATE_FREE:
1488 		case PAGE_STATE_CLEAR:
1489 			panic("free_page(): page %p already free", page);
1490 			return;
1491 		case PAGE_STATE_WIRED:
1492 		case PAGE_STATE_UNUSED:
1493 			fromQueue = NULL;
1494 			break;
1495 		default:
1496 			panic("free_page(): page %p in invalid state %d",
1497 				page, page->State());
1498 			return;
1499 	}
1500 
1501 	if (page->CacheRef() != NULL)
1502 		panic("to be freed page %p has cache", page);
1503 	if (page->IsMapped())
1504 		panic("to be freed page %p has mappings", page);
1505 
1506 	if (fromQueue != NULL)
1507 		fromQueue->RemoveUnlocked(page);
1508 
1509 	TA(FreePage(page->physical_page_number));
1510 
1511 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1512 	page->allocation_tracking_info.Clear();
1513 #endif
1514 
1515 	ReadLocker locker(sFreePageQueuesLock);
1516 
1517 	DEBUG_PAGE_ACCESS_END(page);
1518 
1519 	if (clear) {
1520 		page->SetState(PAGE_STATE_CLEAR);
1521 		sClearPageQueue.PrependUnlocked(page);
1522 	} else {
1523 		page->SetState(PAGE_STATE_FREE);
1524 		sFreePageQueue.PrependUnlocked(page);
1525 	}
1526 
1527 	locker.Unlock();
1528 
1529 	unreserve_pages(1);
1530 }
1531 
1532 
1533 /*!	The caller must make sure that no-one else tries to change the page's state
1534 	while the function is called. If the page has a cache, this can be done by
1535 	locking the cache.
1536 */
1537 static void
1538 set_page_state(vm_page *page, int pageState)
1539 {
1540 	DEBUG_PAGE_ACCESS_CHECK(page);
1541 
1542 	if (pageState == page->State())
1543 		return;
1544 
1545 	VMPageQueue* fromQueue;
1546 
1547 	switch (page->State()) {
1548 		case PAGE_STATE_ACTIVE:
1549 			fromQueue = &sActivePageQueue;
1550 			break;
1551 		case PAGE_STATE_INACTIVE:
1552 			fromQueue = &sInactivePageQueue;
1553 			break;
1554 		case PAGE_STATE_MODIFIED:
1555 			fromQueue = &sModifiedPageQueue;
1556 			break;
1557 		case PAGE_STATE_CACHED:
1558 			fromQueue = &sCachedPageQueue;
1559 			break;
1560 		case PAGE_STATE_FREE:
1561 		case PAGE_STATE_CLEAR:
1562 			panic("set_page_state(): page %p is free/clear", page);
1563 			return;
1564 		case PAGE_STATE_WIRED:
1565 		case PAGE_STATE_UNUSED:
1566 			fromQueue = NULL;
1567 			break;
1568 		default:
1569 			panic("set_page_state(): page %p in invalid state %d",
1570 				page, page->State());
1571 			return;
1572 	}
1573 
1574 	VMPageQueue* toQueue;
1575 
1576 	switch (pageState) {
1577 		case PAGE_STATE_ACTIVE:
1578 			toQueue = &sActivePageQueue;
1579 			break;
1580 		case PAGE_STATE_INACTIVE:
1581 			toQueue = &sInactivePageQueue;
1582 			break;
1583 		case PAGE_STATE_MODIFIED:
1584 			toQueue = &sModifiedPageQueue;
1585 			break;
1586 		case PAGE_STATE_CACHED:
1587 			PAGE_ASSERT(page, !page->IsMapped());
1588 			PAGE_ASSERT(page, !page->modified);
1589 			toQueue = &sCachedPageQueue;
1590 			break;
1591 		case PAGE_STATE_FREE:
1592 		case PAGE_STATE_CLEAR:
1593 			panic("set_page_state(): target state is free/clear");
1594 			return;
1595 		case PAGE_STATE_WIRED:
1596 		case PAGE_STATE_UNUSED:
1597 			toQueue = NULL;
1598 			break;
1599 		default:
1600 			panic("set_page_state(): invalid target state %d", pageState);
1601 			return;
1602 	}
1603 
1604 	VMCache* cache = page->Cache();
1605 	if (cache != NULL && cache->temporary) {
1606 		if (pageState == PAGE_STATE_MODIFIED)
1607 			atomic_add(&sModifiedTemporaryPages, 1);
1608 		else if (page->State() == PAGE_STATE_MODIFIED)
1609 			atomic_add(&sModifiedTemporaryPages, -1);
1610 	}
1611 
1612 	// move the page
1613 	if (toQueue == fromQueue) {
1614 		// Note: Theoretically we are required to lock when changing the page
1615 		// state, even if we don't change the queue. We actually don't have to
1616 		// do this, though, since only for the active queue there are different
1617 		// page states and active pages have a cache that must be locked at
1618 		// this point. So we rely on the fact that everyone must lock the cache
1619 		// before trying to change/interpret the page state.
1620 		PAGE_ASSERT(page, cache != NULL);
1621 		cache->AssertLocked();
1622 		page->SetState(pageState);
1623 	} else {
1624 		if (fromQueue != NULL)
1625 			fromQueue->RemoveUnlocked(page);
1626 
1627 		page->SetState(pageState);
1628 
1629 		if (toQueue != NULL)
1630 			toQueue->AppendUnlocked(page);
1631 	}
1632 }
1633 
1634 
1635 /*! Moves a previously modified page into a now appropriate queue.
1636 	The page queues must not be locked.
1637 */
1638 static void
1639 move_page_to_appropriate_queue(vm_page *page)
1640 {
1641 	DEBUG_PAGE_ACCESS_CHECK(page);
1642 
1643 	// Note, this logic must be in sync with what the page daemon does.
1644 	int32 state;
1645 	if (page->IsMapped())
1646 		state = PAGE_STATE_ACTIVE;
1647 	else if (page->modified)
1648 		state = PAGE_STATE_MODIFIED;
1649 	else
1650 		state = PAGE_STATE_CACHED;
1651 
1652 // TODO: If free + cached pages are low, we might directly want to free the
1653 // page.
1654 	set_page_state(page, state);
1655 }
1656 
1657 
1658 static void
1659 clear_page(struct vm_page *page)
1660 {
1661 	vm_memset_physical(page->physical_page_number << PAGE_SHIFT, 0,
1662 		B_PAGE_SIZE);
1663 }
1664 
1665 
1666 static status_t
1667 mark_page_range_in_use(page_num_t startPage, page_num_t length, bool wired)
1668 {
1669 	TRACE(("mark_page_range_in_use: start %#" B_PRIxPHYSADDR ", len %#"
1670 		B_PRIxPHYSADDR "\n", startPage, length));
1671 
1672 	if (sPhysicalPageOffset > startPage) {
1673 		dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR
1674 			"): start page is before free list\n", startPage, length);
1675 		if (sPhysicalPageOffset - startPage >= length)
1676 			return B_OK;
1677 		length -= sPhysicalPageOffset - startPage;
1678 		startPage = sPhysicalPageOffset;
1679 	}
1680 
1681 	startPage -= sPhysicalPageOffset;
1682 
1683 	if (startPage + length > sNumPages) {
1684 		dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR
1685 			"): range would extend past free list\n", startPage, length);
1686 		if (startPage >= sNumPages)
1687 			return B_OK;
1688 		length = sNumPages - startPage;
1689 	}
1690 
1691 	WriteLocker locker(sFreePageQueuesLock);
1692 
1693 	for (page_num_t i = 0; i < length; i++) {
1694 		vm_page *page = &sPages[startPage + i];
1695 		switch (page->State()) {
1696 			case PAGE_STATE_FREE:
1697 			case PAGE_STATE_CLEAR:
1698 			{
1699 // TODO: This violates the page reservation policy, since we remove pages from
1700 // the free/clear queues without having reserved them before. This should happen
1701 // in the early boot process only, though.
1702 				DEBUG_PAGE_ACCESS_START(page);
1703 				VMPageQueue& queue = page->State() == PAGE_STATE_FREE
1704 					? sFreePageQueue : sClearPageQueue;
1705 				queue.Remove(page);
1706 				page->SetState(wired ? PAGE_STATE_WIRED : PAGE_STATE_UNUSED);
1707 				page->busy = false;
1708 				atomic_add(&sUnreservedFreePages, -1);
1709 				DEBUG_PAGE_ACCESS_END(page);
1710 				break;
1711 			}
1712 			case PAGE_STATE_WIRED:
1713 			case PAGE_STATE_UNUSED:
1714 				break;
1715 			case PAGE_STATE_ACTIVE:
1716 			case PAGE_STATE_INACTIVE:
1717 			case PAGE_STATE_MODIFIED:
1718 			case PAGE_STATE_CACHED:
1719 			default:
1720 				// uh
1721 				dprintf("mark_page_range_in_use: page %#" B_PRIxPHYSADDR
1722 					" in non-free state %d!\n", startPage + i, page->State());
1723 				break;
1724 		}
1725 	}
1726 
1727 	return B_OK;
1728 }
1729 
1730 
1731 /*!
1732 	This is a background thread that wakes up every now and then (every 100ms)
1733 	and moves some pages from the free queue over to the clear queue.
1734 	Given enough time, it will clear out all pages from the free queue - we
1735 	could probably slow it down after having reached a certain threshold.
1736 */
1737 static int32
1738 page_scrubber(void *unused)
1739 {
1740 	(void)(unused);
1741 
1742 	TRACE(("page_scrubber starting...\n"));
1743 
1744 	for (;;) {
1745 		snooze(100000); // 100ms
1746 
1747 		if (sFreePageQueue.Count() == 0
1748 				|| sUnreservedFreePages < (int32)sFreePagesTarget) {
1749 			continue;
1750 		}
1751 
1752 		// Since we temporarily remove pages from the free pages reserve,
1753 		// we must make sure we don't cause a violation of the page
1754 		// reservation warranty. The following is usually stricter than
1755 		// necessary, because we don't have information on how many of the
1756 		// reserved pages have already been allocated.
1757 		int32 reserved = reserve_some_pages(SCRUB_SIZE,
1758 			kPageReserveForPriority[VM_PRIORITY_USER]);
1759 		if (reserved == 0)
1760 			continue;
1761 
1762 		// get some pages from the free queue
1763 		ReadLocker locker(sFreePageQueuesLock);
1764 
1765 		vm_page *page[SCRUB_SIZE];
1766 		int32 scrubCount = 0;
1767 		for (int32 i = 0; i < reserved; i++) {
1768 			page[i] = sFreePageQueue.RemoveHeadUnlocked();
1769 			if (page[i] == NULL)
1770 				break;
1771 
1772 			DEBUG_PAGE_ACCESS_START(page[i]);
1773 
1774 			page[i]->SetState(PAGE_STATE_ACTIVE);
1775 			page[i]->busy = true;
1776 			scrubCount++;
1777 		}
1778 
1779 		locker.Unlock();
1780 
1781 		if (scrubCount == 0) {
1782 			unreserve_pages(reserved);
1783 			continue;
1784 		}
1785 
1786 		TA(ScrubbingPages(scrubCount));
1787 
1788 		// clear them
1789 		for (int32 i = 0; i < scrubCount; i++)
1790 			clear_page(page[i]);
1791 
1792 		locker.Lock();
1793 
1794 		// and put them into the clear queue
1795 		for (int32 i = 0; i < scrubCount; i++) {
1796 			page[i]->SetState(PAGE_STATE_CLEAR);
1797 			page[i]->busy = false;
1798 			DEBUG_PAGE_ACCESS_END(page[i]);
1799 			sClearPageQueue.PrependUnlocked(page[i]);
1800 		}
1801 
1802 		locker.Unlock();
1803 
1804 		unreserve_pages(reserved);
1805 
1806 		TA(ScrubbedPages(scrubCount));
1807 	}
1808 
1809 	return 0;
1810 }
1811 
1812 
1813 static void
1814 init_page_marker(vm_page &marker)
1815 {
1816 	marker.SetCacheRef(NULL);
1817 	marker.InitState(PAGE_STATE_UNUSED);
1818 	marker.busy = true;
1819 #if DEBUG_PAGE_QUEUE
1820 	marker.queue = NULL;
1821 #endif
1822 #if DEBUG_PAGE_ACCESS
1823 	marker.accessing_thread = thread_get_current_thread_id();
1824 #endif
1825 }
1826 
1827 
1828 static void
1829 remove_page_marker(struct vm_page &marker)
1830 {
1831 	DEBUG_PAGE_ACCESS_CHECK(&marker);
1832 
1833 	if (marker.State() < PAGE_STATE_FIRST_UNQUEUED)
1834 		sPageQueues[marker.State()].RemoveUnlocked(&marker);
1835 
1836 	marker.SetState(PAGE_STATE_UNUSED);
1837 }
1838 
1839 
1840 static vm_page*
1841 next_modified_page(page_num_t& maxPagesToSee)
1842 {
1843 	InterruptsSpinLocker locker(sModifiedPageQueue.GetLock());
1844 
1845 	while (maxPagesToSee > 0) {
1846 		vm_page* page = sModifiedPageQueue.Head();
1847 		if (page == NULL)
1848 			return NULL;
1849 
1850 		sModifiedPageQueue.Requeue(page, true);
1851 
1852 		maxPagesToSee--;
1853 
1854 		if (!page->busy)
1855 			return page;
1856 	}
1857 
1858 	return NULL;
1859 }
1860 
1861 
1862 // #pragma mark -
1863 
1864 
1865 class PageWriteTransfer;
1866 class PageWriteWrapper;
1867 
1868 
1869 class PageWriterRun {
1870 public:
1871 	status_t Init(uint32 maxPages);
1872 
1873 	void PrepareNextRun();
1874 	void AddPage(vm_page* page);
1875 	uint32 Go();
1876 
1877 	void PageWritten(PageWriteTransfer* transfer, status_t status,
1878 		bool partialTransfer, size_t bytesTransferred);
1879 
1880 private:
1881 	uint32				fMaxPages;
1882 	uint32				fWrapperCount;
1883 	uint32				fTransferCount;
1884 	vint32				fPendingTransfers;
1885 	PageWriteWrapper*	fWrappers;
1886 	PageWriteTransfer*	fTransfers;
1887 	ConditionVariable	fAllFinishedCondition;
1888 };
1889 
1890 
1891 class PageWriteTransfer : public AsyncIOCallback {
1892 public:
1893 	void SetTo(PageWriterRun* run, vm_page* page, int32 maxPages);
1894 	bool AddPage(vm_page* page);
1895 
1896 	status_t Schedule(uint32 flags);
1897 
1898 	void SetStatus(status_t status, size_t transferred);
1899 
1900 	status_t Status() const	{ return fStatus; }
1901 	struct VMCache* Cache() const { return fCache; }
1902 	uint32 PageCount() const { return fPageCount; }
1903 
1904 	virtual void IOFinished(status_t status, bool partialTransfer,
1905 		generic_size_t bytesTransferred);
1906 private:
1907 	PageWriterRun*		fRun;
1908 	struct VMCache*		fCache;
1909 	off_t				fOffset;
1910 	uint32				fPageCount;
1911 	int32				fMaxPages;
1912 	status_t			fStatus;
1913 	uint32				fVecCount;
1914 	generic_io_vec		fVecs[32]; // TODO: make dynamic/configurable
1915 };
1916 
1917 
1918 class PageWriteWrapper {
1919 public:
1920 	PageWriteWrapper();
1921 	~PageWriteWrapper();
1922 	void SetTo(vm_page* page);
1923 	bool Done(status_t result);
1924 
1925 private:
1926 	vm_page*			fPage;
1927 	struct VMCache*		fCache;
1928 	bool				fIsActive;
1929 };
1930 
1931 
1932 PageWriteWrapper::PageWriteWrapper()
1933 	:
1934 	fIsActive(false)
1935 {
1936 }
1937 
1938 
1939 PageWriteWrapper::~PageWriteWrapper()
1940 {
1941 	if (fIsActive)
1942 		panic("page write wrapper going out of scope but isn't completed");
1943 }
1944 
1945 
1946 /*!	The page's cache must be locked.
1947 */
1948 void
1949 PageWriteWrapper::SetTo(vm_page* page)
1950 {
1951 	DEBUG_PAGE_ACCESS_CHECK(page);
1952 
1953 	if (page->busy)
1954 		panic("setting page write wrapper to busy page");
1955 
1956 	if (fIsActive)
1957 		panic("re-setting page write wrapper that isn't completed");
1958 
1959 	fPage = page;
1960 	fCache = page->Cache();
1961 	fIsActive = true;
1962 
1963 	fPage->busy = true;
1964 	fPage->busy_writing = true;
1965 
1966 	// We have a modified page -- however, while we're writing it back,
1967 	// the page might still be mapped. In order not to lose any changes to the
1968 	// page, we mark it clean before actually writing it back; if
1969 	// writing the page fails for some reason, we'll just keep it in the
1970 	// modified page list, but that should happen only rarely.
1971 
1972 	// If the page is changed after we cleared the dirty flag, but before we
1973 	// had the chance to write it back, then we'll write it again later -- that
1974 	// will probably not happen that often, though.
1975 
1976 	vm_clear_map_flags(fPage, PAGE_MODIFIED);
1977 }
1978 
1979 
1980 /*!	The page's cache must be locked.
1981 	The page queues must not be locked.
1982 	\return \c true if the page was written successfully respectively could be
1983 		handled somehow, \c false otherwise.
1984 */
1985 bool
1986 PageWriteWrapper::Done(status_t result)
1987 {
1988 	if (!fIsActive)
1989 		panic("completing page write wrapper that is not active");
1990 
1991 	DEBUG_PAGE_ACCESS_START(fPage);
1992 
1993 	fPage->busy = false;
1994 		// Set unbusy and notify later by hand, since we might free the page.
1995 
1996 	bool success = true;
1997 
1998 	if (result == B_OK) {
1999 		// put it into the active/inactive queue
2000 		move_page_to_appropriate_queue(fPage);
2001 		fPage->busy_writing = false;
2002 		DEBUG_PAGE_ACCESS_END(fPage);
2003 	} else {
2004 		// Writing the page failed. One reason would be that the cache has been
2005 		// shrunk and the page does no longer belong to the file. Otherwise the
2006 		// actual I/O failed, in which case we'll simply keep the page modified.
2007 
2008 		if (!fPage->busy_writing) {
2009 			// The busy_writing flag was cleared. That means the cache has been
2010 			// shrunk while we were trying to write the page and we have to free
2011 			// it now.
2012 			vm_remove_all_page_mappings(fPage);
2013 // TODO: Unmapping should already happen when resizing the cache!
2014 			fCache->RemovePage(fPage);
2015 			free_page(fPage, false);
2016 		} else {
2017 			// Writing the page failed -- mark the page modified and move it to
2018 			// an appropriate queue other than the modified queue, so we don't
2019 			// keep trying to write it over and over again. We keep
2020 			// non-temporary pages in the modified queue, though, so they don't
2021 			// get lost in the inactive queue.
2022 			dprintf("PageWriteWrapper: Failed to write page %p: %s\n", fPage,
2023 				strerror(result));
2024 
2025 			fPage->modified = true;
2026 			if (!fCache->temporary)
2027 				set_page_state(fPage, PAGE_STATE_MODIFIED);
2028 			else if (fPage->IsMapped())
2029 				set_page_state(fPage, PAGE_STATE_ACTIVE);
2030 			else
2031 				set_page_state(fPage, PAGE_STATE_INACTIVE);
2032 
2033 			fPage->busy_writing = false;
2034 			DEBUG_PAGE_ACCESS_END(fPage);
2035 
2036 			success = false;
2037 		}
2038 	}
2039 
2040 	fCache->NotifyPageEvents(fPage, PAGE_EVENT_NOT_BUSY);
2041 	fIsActive = false;
2042 
2043 	return success;
2044 }
2045 
2046 
2047 /*!	The page's cache must be locked.
2048 */
2049 void
2050 PageWriteTransfer::SetTo(PageWriterRun* run, vm_page* page, int32 maxPages)
2051 {
2052 	fRun = run;
2053 	fCache = page->Cache();
2054 	fOffset = page->cache_offset;
2055 	fPageCount = 1;
2056 	fMaxPages = maxPages;
2057 	fStatus = B_OK;
2058 
2059 	fVecs[0].base = (phys_addr_t)page->physical_page_number << PAGE_SHIFT;
2060 	fVecs[0].length = B_PAGE_SIZE;
2061 	fVecCount = 1;
2062 }
2063 
2064 
2065 /*!	The page's cache must be locked.
2066 */
2067 bool
2068 PageWriteTransfer::AddPage(vm_page* page)
2069 {
2070 	if (page->Cache() != fCache
2071 		|| (fMaxPages >= 0 && fPageCount >= (uint32)fMaxPages))
2072 		return false;
2073 
2074 	phys_addr_t nextBase = fVecs[fVecCount - 1].base
2075 		+ fVecs[fVecCount - 1].length;
2076 
2077 	if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase
2078 		&& (off_t)page->cache_offset == fOffset + fPageCount) {
2079 		// append to last iovec
2080 		fVecs[fVecCount - 1].length += B_PAGE_SIZE;
2081 		fPageCount++;
2082 		return true;
2083 	}
2084 
2085 	nextBase = fVecs[0].base - B_PAGE_SIZE;
2086 	if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase
2087 		&& (off_t)page->cache_offset == fOffset - 1) {
2088 		// prepend to first iovec and adjust offset
2089 		fVecs[0].base = nextBase;
2090 		fVecs[0].length += B_PAGE_SIZE;
2091 		fOffset = page->cache_offset;
2092 		fPageCount++;
2093 		return true;
2094 	}
2095 
2096 	if (((off_t)page->cache_offset == fOffset + fPageCount
2097 			|| (off_t)page->cache_offset == fOffset - 1)
2098 		&& fVecCount < sizeof(fVecs) / sizeof(fVecs[0])) {
2099 		// not physically contiguous or not in the right order
2100 		uint32 vectorIndex;
2101 		if ((off_t)page->cache_offset < fOffset) {
2102 			// we are pre-pending another vector, move the other vecs
2103 			for (uint32 i = fVecCount; i > 0; i--)
2104 				fVecs[i] = fVecs[i - 1];
2105 
2106 			fOffset = page->cache_offset;
2107 			vectorIndex = 0;
2108 		} else
2109 			vectorIndex = fVecCount;
2110 
2111 		fVecs[vectorIndex].base
2112 			= (phys_addr_t)page->physical_page_number << PAGE_SHIFT;
2113 		fVecs[vectorIndex].length = B_PAGE_SIZE;
2114 
2115 		fVecCount++;
2116 		fPageCount++;
2117 		return true;
2118 	}
2119 
2120 	return false;
2121 }
2122 
2123 
2124 status_t
2125 PageWriteTransfer::Schedule(uint32 flags)
2126 {
2127 	off_t writeOffset = (off_t)fOffset << PAGE_SHIFT;
2128 	generic_size_t writeLength = (phys_size_t)fPageCount << PAGE_SHIFT;
2129 
2130 	if (fRun != NULL) {
2131 		return fCache->WriteAsync(writeOffset, fVecs, fVecCount, writeLength,
2132 			flags | B_PHYSICAL_IO_REQUEST, this);
2133 	}
2134 
2135 	status_t status = fCache->Write(writeOffset, fVecs, fVecCount,
2136 		flags | B_PHYSICAL_IO_REQUEST, &writeLength);
2137 
2138 	SetStatus(status, writeLength);
2139 	return fStatus;
2140 }
2141 
2142 
2143 void
2144 PageWriteTransfer::SetStatus(status_t status, size_t transferred)
2145 {
2146 	// only succeed if all pages up to the last one have been written fully
2147 	// and the last page has at least been written partially
2148 	if (status == B_OK && transferred <= (fPageCount - 1) * B_PAGE_SIZE)
2149 		status = B_ERROR;
2150 
2151 	fStatus = status;
2152 }
2153 
2154 
2155 void
2156 PageWriteTransfer::IOFinished(status_t status, bool partialTransfer,
2157 	generic_size_t bytesTransferred)
2158 {
2159 	SetStatus(status, bytesTransferred);
2160 	fRun->PageWritten(this, fStatus, partialTransfer, bytesTransferred);
2161 }
2162 
2163 
2164 status_t
2165 PageWriterRun::Init(uint32 maxPages)
2166 {
2167 	fMaxPages = maxPages;
2168 	fWrapperCount = 0;
2169 	fTransferCount = 0;
2170 	fPendingTransfers = 0;
2171 
2172 	fWrappers = new(std::nothrow) PageWriteWrapper[maxPages];
2173 	fTransfers = new(std::nothrow) PageWriteTransfer[maxPages];
2174 	if (fWrappers == NULL || fTransfers == NULL)
2175 		return B_NO_MEMORY;
2176 
2177 	return B_OK;
2178 }
2179 
2180 
2181 void
2182 PageWriterRun::PrepareNextRun()
2183 {
2184 	fWrapperCount = 0;
2185 	fTransferCount = 0;
2186 	fPendingTransfers = 0;
2187 }
2188 
2189 
2190 /*!	The page's cache must be locked.
2191 */
2192 void
2193 PageWriterRun::AddPage(vm_page* page)
2194 {
2195 	fWrappers[fWrapperCount++].SetTo(page);
2196 
2197 	if (fTransferCount == 0 || !fTransfers[fTransferCount - 1].AddPage(page)) {
2198 		fTransfers[fTransferCount++].SetTo(this, page,
2199 			page->Cache()->MaxPagesPerAsyncWrite());
2200 	}
2201 }
2202 
2203 
2204 /*!	Writes all pages previously added.
2205 	\return The number of pages that could not be written or otherwise handled.
2206 */
2207 uint32
2208 PageWriterRun::Go()
2209 {
2210 	fPendingTransfers = fTransferCount;
2211 
2212 	fAllFinishedCondition.Init(this, "page writer wait for I/O");
2213 	ConditionVariableEntry waitEntry;
2214 	fAllFinishedCondition.Add(&waitEntry);
2215 
2216 	// schedule writes
2217 	for (uint32 i = 0; i < fTransferCount; i++)
2218 		fTransfers[i].Schedule(B_VIP_IO_REQUEST);
2219 
2220 	// wait until all pages have been written
2221 	waitEntry.Wait();
2222 
2223 	// mark pages depending on whether they could be written or not
2224 
2225 	uint32 failedPages = 0;
2226 	uint32 wrapperIndex = 0;
2227 	for (uint32 i = 0; i < fTransferCount; i++) {
2228 		PageWriteTransfer& transfer = fTransfers[i];
2229 		transfer.Cache()->Lock();
2230 
2231 		for (uint32 j = 0; j < transfer.PageCount(); j++) {
2232 			if (!fWrappers[wrapperIndex++].Done(transfer.Status()))
2233 				failedPages++;
2234 		}
2235 
2236 		transfer.Cache()->Unlock();
2237 	}
2238 
2239 	ASSERT(wrapperIndex == fWrapperCount);
2240 
2241 	for (uint32 i = 0; i < fTransferCount; i++) {
2242 		PageWriteTransfer& transfer = fTransfers[i];
2243 		struct VMCache* cache = transfer.Cache();
2244 
2245 		// We've acquired a references for each page
2246 		for (uint32 j = 0; j < transfer.PageCount(); j++) {
2247 			// We release the cache references after all pages were made
2248 			// unbusy again - otherwise releasing a vnode could deadlock.
2249 			cache->ReleaseStoreRef();
2250 			cache->ReleaseRef();
2251 		}
2252 	}
2253 
2254 	return failedPages;
2255 }
2256 
2257 
2258 void
2259 PageWriterRun::PageWritten(PageWriteTransfer* transfer, status_t status,
2260 	bool partialTransfer, size_t bytesTransferred)
2261 {
2262 	if (atomic_add(&fPendingTransfers, -1) == 1)
2263 		fAllFinishedCondition.NotifyAll();
2264 }
2265 
2266 
2267 /*!	The page writer continuously takes some pages from the modified
2268 	queue, writes them back, and moves them back to the active queue.
2269 	It runs in its own thread, and is only there to keep the number
2270 	of modified pages low, so that more pages can be reused with
2271 	fewer costs.
2272 */
2273 status_t
2274 page_writer(void* /*unused*/)
2275 {
2276 	const uint32 kNumPages = 256;
2277 #ifdef TRACE_VM_PAGE
2278 	uint32 writtenPages = 0;
2279 	bigtime_t lastWrittenTime = 0;
2280 	bigtime_t pageCollectionTime = 0;
2281 	bigtime_t pageWritingTime = 0;
2282 #endif
2283 
2284 	PageWriterRun run;
2285 	if (run.Init(kNumPages) != B_OK) {
2286 		panic("page writer: Failed to init PageWriterRun!");
2287 		return B_ERROR;
2288 	}
2289 
2290 	page_num_t pagesSinceLastSuccessfulWrite = 0;
2291 
2292 	while (true) {
2293 // TODO: Maybe wait shorter when memory is low!
2294 		if (sModifiedPageQueue.Count() < kNumPages) {
2295 			sPageWriterCondition.Wait(3000000, true);
2296 				// all 3 seconds when no one triggers us
2297 		}
2298 
2299 		page_num_t modifiedPages = sModifiedPageQueue.Count();
2300 		if (modifiedPages == 0)
2301 			continue;
2302 
2303 		if (modifiedPages <= pagesSinceLastSuccessfulWrite) {
2304 			// We ran through the whole queue without being able to write a
2305 			// single page. Take a break.
2306 			snooze(500000);
2307 			pagesSinceLastSuccessfulWrite = 0;
2308 		}
2309 
2310 #if ENABLE_SWAP_SUPPORT
2311 		page_stats pageStats;
2312 		get_page_stats(pageStats);
2313 		bool activePaging = do_active_paging(pageStats);
2314 #endif
2315 
2316 		// depending on how urgent it becomes to get pages to disk, we adjust
2317 		// our I/O priority
2318 		uint32 lowPagesState = low_resource_state(B_KERNEL_RESOURCE_PAGES);
2319 		int32 ioPriority = B_IDLE_PRIORITY;
2320 		if (lowPagesState >= B_LOW_RESOURCE_CRITICAL
2321 			|| modifiedPages > MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD) {
2322 			ioPriority = MAX_PAGE_WRITER_IO_PRIORITY;
2323 		} else {
2324 			ioPriority = (uint64)MAX_PAGE_WRITER_IO_PRIORITY * modifiedPages
2325 				/ MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD;
2326 		}
2327 
2328 		thread_set_io_priority(ioPriority);
2329 
2330 		uint32 numPages = 0;
2331 		run.PrepareNextRun();
2332 
2333 		// TODO: make this laptop friendly, too (ie. only start doing
2334 		// something if someone else did something or there is really
2335 		// enough to do).
2336 
2337 		// collect pages to be written
2338 #ifdef TRACE_VM_PAGE
2339 		pageCollectionTime -= system_time();
2340 #endif
2341 
2342 		page_num_t maxPagesToSee = modifiedPages;
2343 
2344 		while (numPages < kNumPages && maxPagesToSee > 0) {
2345 			vm_page *page = next_modified_page(maxPagesToSee);
2346 			if (page == NULL)
2347 				break;
2348 
2349 			PageCacheLocker cacheLocker(page, false);
2350 			if (!cacheLocker.IsLocked())
2351 				continue;
2352 
2353 			VMCache *cache = page->Cache();
2354 
2355 			// If the page is busy or its state has changed while we were
2356 			// locking the cache, just ignore it.
2357 			if (page->busy || page->State() != PAGE_STATE_MODIFIED)
2358 				continue;
2359 
2360 			DEBUG_PAGE_ACCESS_START(page);
2361 
2362 			// Don't write back wired (locked) pages.
2363 			if (page->WiredCount() > 0) {
2364 				set_page_state(page, PAGE_STATE_ACTIVE);
2365 				DEBUG_PAGE_ACCESS_END(page);
2366 				continue;
2367 			}
2368 
2369 			// Write back temporary pages only when we're actively paging.
2370 			if (cache->temporary
2371 #if ENABLE_SWAP_SUPPORT
2372 				&& (!activePaging
2373 					|| !cache->CanWritePage(
2374 							(off_t)page->cache_offset << PAGE_SHIFT))
2375 #endif
2376 				) {
2377 				// We can't/don't want to do anything with this page, so move it
2378 				// to one of the other queues.
2379 				if (page->mappings.IsEmpty())
2380 					set_page_state(page, PAGE_STATE_INACTIVE);
2381 				else
2382 					set_page_state(page, PAGE_STATE_ACTIVE);
2383 
2384 				DEBUG_PAGE_ACCESS_END(page);
2385 				continue;
2386 			}
2387 
2388 			// We need our own reference to the store, as it might currently be
2389 			// destroyed.
2390 			if (cache->AcquireUnreferencedStoreRef() != B_OK) {
2391 				DEBUG_PAGE_ACCESS_END(page);
2392 				cacheLocker.Unlock();
2393 				thread_yield(true);
2394 				continue;
2395 			}
2396 
2397 			run.AddPage(page);
2398 				// TODO: We're possibly adding pages of different caches and
2399 				// thus maybe of different underlying file systems here. This
2400 				// is a potential problem for loop file systems/devices, since
2401 				// we could mark a page busy that would need to be accessed
2402 				// when writing back another page, thus causing a deadlock.
2403 
2404 			DEBUG_PAGE_ACCESS_END(page);
2405 
2406 			//dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count);
2407 			TPW(WritePage(page));
2408 
2409 			cache->AcquireRefLocked();
2410 			numPages++;
2411 		}
2412 
2413 #ifdef TRACE_VM_PAGE
2414 		pageCollectionTime += system_time();
2415 #endif
2416 		if (numPages == 0)
2417 			continue;
2418 
2419 		// write pages to disk and do all the cleanup
2420 #ifdef TRACE_VM_PAGE
2421 		pageWritingTime -= system_time();
2422 #endif
2423 		uint32 failedPages = run.Go();
2424 #ifdef TRACE_VM_PAGE
2425 		pageWritingTime += system_time();
2426 
2427 		// debug output only...
2428 		writtenPages += numPages;
2429 		if (writtenPages >= 1024) {
2430 			bigtime_t now = system_time();
2431 			TRACE(("page writer: wrote 1024 pages (total: %" B_PRIu64 " ms, "
2432 				"collect: %" B_PRIu64 " ms, write: %" B_PRIu64 " ms)\n",
2433 				(now - lastWrittenTime) / 1000,
2434 				pageCollectionTime / 1000, pageWritingTime / 1000));
2435 			lastWrittenTime = now;
2436 
2437 			writtenPages -= 1024;
2438 			pageCollectionTime = 0;
2439 			pageWritingTime = 0;
2440 		}
2441 #endif
2442 
2443 		if (failedPages == numPages)
2444 			pagesSinceLastSuccessfulWrite += modifiedPages - maxPagesToSee;
2445 		else
2446 			pagesSinceLastSuccessfulWrite = 0;
2447 	}
2448 
2449 	return B_OK;
2450 }
2451 
2452 
2453 // #pragma mark -
2454 
2455 
2456 // TODO: This should be done in the page daemon!
2457 #if 0
2458 #if ENABLE_SWAP_SUPPORT
2459 static bool
2460 free_page_swap_space(int32 index)
2461 {
2462 	vm_page *page = vm_page_at_index(index);
2463 	PageCacheLocker locker(page);
2464 	if (!locker.IsLocked())
2465 		return false;
2466 
2467 	DEBUG_PAGE_ACCESS_START(page);
2468 
2469 	VMCache* cache = page->Cache();
2470 	if (cache->temporary && page->WiredCount() == 0
2471 			&& cache->HasPage(page->cache_offset << PAGE_SHIFT)
2472 			&& page->usage_count > 0) {
2473 		// TODO: how to judge a page is highly active?
2474 		if (swap_free_page_swap_space(page)) {
2475 			// We need to mark the page modified, since otherwise it could be
2476 			// stolen and we'd lose its data.
2477 			vm_page_set_state(page, PAGE_STATE_MODIFIED);
2478 			TD(FreedPageSwap(page));
2479 			DEBUG_PAGE_ACCESS_END(page);
2480 			return true;
2481 		}
2482 	}
2483 	DEBUG_PAGE_ACCESS_END(page);
2484 	return false;
2485 }
2486 #endif
2487 #endif	// 0
2488 
2489 
2490 static vm_page *
2491 find_cached_page_candidate(struct vm_page &marker)
2492 {
2493 	DEBUG_PAGE_ACCESS_CHECK(&marker);
2494 
2495 	InterruptsSpinLocker locker(sCachedPageQueue.GetLock());
2496 	vm_page *page;
2497 
2498 	if (marker.State() == PAGE_STATE_UNUSED) {
2499 		// Get the first free pages of the (in)active queue
2500 		page = sCachedPageQueue.Head();
2501 	} else {
2502 		// Get the next page of the current queue
2503 		if (marker.State() != PAGE_STATE_CACHED) {
2504 			panic("invalid marker %p state", &marker);
2505 			return NULL;
2506 		}
2507 
2508 		page = sCachedPageQueue.Next(&marker);
2509 		sCachedPageQueue.Remove(&marker);
2510 		marker.SetState(PAGE_STATE_UNUSED);
2511 	}
2512 
2513 	while (page != NULL) {
2514 		if (!page->busy) {
2515 			// we found a candidate, insert marker
2516 			marker.SetState(PAGE_STATE_CACHED);
2517 			sCachedPageQueue.InsertAfter(page, &marker);
2518 			return page;
2519 		}
2520 
2521 		page = sCachedPageQueue.Next(page);
2522 	}
2523 
2524 	return NULL;
2525 }
2526 
2527 
2528 static bool
2529 free_cached_page(vm_page *page, bool dontWait)
2530 {
2531 	// try to lock the page's cache
2532 	if (vm_cache_acquire_locked_page_cache(page, dontWait) == NULL)
2533 		return false;
2534 	VMCache* cache = page->Cache();
2535 
2536 	AutoLocker<VMCache> cacheLocker(cache, true);
2537 	MethodDeleter<VMCache> _2(cache, &VMCache::ReleaseRefLocked);
2538 
2539 	// check again if that page is still a candidate
2540 	if (page->busy || page->State() != PAGE_STATE_CACHED)
2541 		return false;
2542 
2543 	DEBUG_PAGE_ACCESS_START(page);
2544 
2545 	PAGE_ASSERT(page, !page->IsMapped());
2546 	PAGE_ASSERT(page, !page->modified);
2547 
2548 	// we can now steal this page
2549 
2550 	cache->RemovePage(page);
2551 		// Now the page doesn't have cache anymore, so no one else (e.g.
2552 		// vm_page_allocate_page_run() can pick it up), since they would be
2553 		// required to lock the cache first, which would fail.
2554 
2555 	sCachedPageQueue.RemoveUnlocked(page);
2556 	return true;
2557 }
2558 
2559 
2560 static uint32
2561 free_cached_pages(uint32 pagesToFree, bool dontWait)
2562 {
2563 	vm_page marker;
2564 	init_page_marker(marker);
2565 
2566 	uint32 pagesFreed = 0;
2567 
2568 	while (pagesFreed < pagesToFree) {
2569 		vm_page *page = find_cached_page_candidate(marker);
2570 		if (page == NULL)
2571 			break;
2572 
2573 		if (free_cached_page(page, dontWait)) {
2574 			ReadLocker locker(sFreePageQueuesLock);
2575 			page->SetState(PAGE_STATE_FREE);
2576 			DEBUG_PAGE_ACCESS_END(page);
2577 			sFreePageQueue.PrependUnlocked(page);
2578 			locker.Unlock();
2579 
2580 			TA(StolenPage());
2581 
2582 			pagesFreed++;
2583 		}
2584 	}
2585 
2586 	remove_page_marker(marker);
2587 
2588 	return pagesFreed;
2589 }
2590 
2591 
2592 static void
2593 idle_scan_active_pages(page_stats& pageStats)
2594 {
2595 	VMPageQueue& queue = sActivePageQueue;
2596 
2597 	// We want to scan the whole queue in roughly kIdleRunsForFullQueue runs.
2598 	uint32 maxToScan = queue.Count() / kIdleRunsForFullQueue + 1;
2599 
2600 	while (maxToScan > 0) {
2601 		maxToScan--;
2602 
2603 		// Get the next page. Note that we don't bother to lock here. We go with
2604 		// the assumption that on all architectures reading/writing pointers is
2605 		// atomic. Beyond that it doesn't really matter. We have to unlock the
2606 		// queue anyway to lock the page's cache, and we'll recheck afterwards.
2607 		vm_page* page = queue.Head();
2608 		if (page == NULL)
2609 			break;
2610 
2611 		// lock the page's cache
2612 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2613 		if (cache == NULL)
2614 			continue;
2615 
2616 		if (page->State() != PAGE_STATE_ACTIVE) {
2617 			// page is no longer in the cache or in this queue
2618 			cache->ReleaseRefAndUnlock();
2619 			continue;
2620 		}
2621 
2622 		if (page->busy) {
2623 			// page is busy -- requeue at the end
2624 			vm_page_requeue(page, true);
2625 			cache->ReleaseRefAndUnlock();
2626 			continue;
2627 		}
2628 
2629 		DEBUG_PAGE_ACCESS_START(page);
2630 
2631 		// Get the page active/modified flags and update the page's usage count.
2632 		// We completely unmap inactive temporary pages. This saves us to
2633 		// iterate through the inactive list as well, since we'll be notified
2634 		// via page fault whenever such an inactive page is used again.
2635 		// We don't remove the mappings of non-temporary pages, since we
2636 		// wouldn't notice when those would become unused and could thus be
2637 		// moved to the cached list.
2638 		int32 usageCount;
2639 		if (page->WiredCount() > 0 || page->usage_count > 0
2640 			|| !cache->temporary) {
2641 			usageCount = vm_clear_page_mapping_accessed_flags(page);
2642 		} else
2643 			usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2644 
2645 		if (usageCount > 0) {
2646 			usageCount += page->usage_count + kPageUsageAdvance;
2647 			if (usageCount > kPageUsageMax)
2648 				usageCount = kPageUsageMax;
2649 // TODO: This would probably also be the place to reclaim swap space.
2650 		} else {
2651 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2652 			if (usageCount < 0) {
2653 				usageCount = 0;
2654 				set_page_state(page, PAGE_STATE_INACTIVE);
2655 			}
2656 		}
2657 
2658 		page->usage_count = usageCount;
2659 
2660 		DEBUG_PAGE_ACCESS_END(page);
2661 
2662 		cache->ReleaseRefAndUnlock();
2663 	}
2664 }
2665 
2666 
2667 static void
2668 full_scan_inactive_pages(page_stats& pageStats, int32 despairLevel)
2669 {
2670 	int32 pagesToFree = pageStats.unsatisfiedReservations
2671 		+ sFreeOrCachedPagesTarget
2672 		- (pageStats.totalFreePages + pageStats.cachedPages);
2673 	if (pagesToFree <= 0)
2674 		return;
2675 
2676 	bigtime_t time = system_time();
2677 	uint32 pagesScanned = 0;
2678 	uint32 pagesToCached = 0;
2679 	uint32 pagesToModified = 0;
2680 	uint32 pagesToActive = 0;
2681 
2682 	// Determine how many pages at maximum to send to the modified queue. Since
2683 	// it is relatively expensive to page out pages, we do that on a grander
2684 	// scale only when things get desperate.
2685 	uint32 maxToFlush = despairLevel <= 1 ? 32 : 10000;
2686 
2687 	vm_page marker;
2688 	init_page_marker(marker);
2689 
2690 	VMPageQueue& queue = sInactivePageQueue;
2691 	InterruptsSpinLocker queueLocker(queue.GetLock());
2692 	uint32 maxToScan = queue.Count();
2693 
2694 	vm_page* nextPage = queue.Head();
2695 
2696 	while (pagesToFree > 0 && maxToScan > 0) {
2697 		maxToScan--;
2698 
2699 		// get the next page
2700 		vm_page* page = nextPage;
2701 		if (page == NULL)
2702 			break;
2703 		nextPage = queue.Next(page);
2704 
2705 		if (page->busy)
2706 			continue;
2707 
2708 		// mark the position
2709 		queue.InsertAfter(page, &marker);
2710 		queueLocker.Unlock();
2711 
2712 		// lock the page's cache
2713 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2714 		if (cache == NULL || page->busy
2715 				|| page->State() != PAGE_STATE_INACTIVE) {
2716 			if (cache != NULL)
2717 				cache->ReleaseRefAndUnlock();
2718 			queueLocker.Lock();
2719 			nextPage = queue.Next(&marker);
2720 			queue.Remove(&marker);
2721 			continue;
2722 		}
2723 
2724 		pagesScanned++;
2725 
2726 		DEBUG_PAGE_ACCESS_START(page);
2727 
2728 		// Get the accessed count, clear the accessed/modified flags and
2729 		// unmap the page, if it hasn't been accessed.
2730 		int32 usageCount;
2731 		if (page->WiredCount() > 0)
2732 			usageCount = vm_clear_page_mapping_accessed_flags(page);
2733 		else
2734 			usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2735 
2736 		// update usage count
2737 		if (usageCount > 0) {
2738 			usageCount += page->usage_count + kPageUsageAdvance;
2739 			if (usageCount > kPageUsageMax)
2740 				usageCount = kPageUsageMax;
2741 		} else {
2742 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2743 			if (usageCount < 0)
2744 				usageCount = 0;
2745 		}
2746 
2747 		page->usage_count = usageCount;
2748 
2749 		// Move to fitting queue or requeue:
2750 		// * Active mapped pages go to the active queue.
2751 		// * Inactive mapped (i.e. wired) pages are requeued.
2752 		// * The remaining pages are cachable. Thus, if unmodified they go to
2753 		//   the cached queue, otherwise to the modified queue (up to a limit).
2754 		//   Note that until in the idle scanning we don't exempt pages of
2755 		//   temporary caches. Apparently we really need memory, so we better
2756 		//   page out memory as well.
2757 		bool isMapped = page->IsMapped();
2758 		if (usageCount > 0) {
2759 			if (isMapped) {
2760 				set_page_state(page, PAGE_STATE_ACTIVE);
2761 				pagesToActive++;
2762 			} else
2763 				vm_page_requeue(page, true);
2764 		} else if (isMapped) {
2765 			vm_page_requeue(page, true);
2766 		} else if (!page->modified) {
2767 			set_page_state(page, PAGE_STATE_CACHED);
2768 			pagesToFree--;
2769 			pagesToCached++;
2770 		} else if (maxToFlush > 0) {
2771 			set_page_state(page, PAGE_STATE_MODIFIED);
2772 			maxToFlush--;
2773 			pagesToModified++;
2774 		} else
2775 			vm_page_requeue(page, true);
2776 
2777 		DEBUG_PAGE_ACCESS_END(page);
2778 
2779 		cache->ReleaseRefAndUnlock();
2780 
2781 		// remove the marker
2782 		queueLocker.Lock();
2783 		nextPage = queue.Next(&marker);
2784 		queue.Remove(&marker);
2785 	}
2786 
2787 	queueLocker.Unlock();
2788 
2789 	time = system_time() - time;
2790 	TRACE_DAEMON("  -> inactive scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32
2791 		", moved: %" B_PRIu32 " -> cached, %" B_PRIu32 " -> modified, %"
2792 		B_PRIu32 " -> active\n", time, pagesScanned, pagesToCached,
2793 		pagesToModified, pagesToActive);
2794 
2795 	// wake up the page writer, if we tossed it some pages
2796 	if (pagesToModified > 0)
2797 		sPageWriterCondition.WakeUp();
2798 }
2799 
2800 
2801 static void
2802 full_scan_active_pages(page_stats& pageStats, int32 despairLevel)
2803 {
2804 	vm_page marker;
2805 	init_page_marker(marker);
2806 
2807 	VMPageQueue& queue = sActivePageQueue;
2808 	InterruptsSpinLocker queueLocker(queue.GetLock());
2809 	uint32 maxToScan = queue.Count();
2810 
2811 	int32 pagesToDeactivate = pageStats.unsatisfiedReservations
2812 		+ sFreeOrCachedPagesTarget
2813 		- (pageStats.totalFreePages + pageStats.cachedPages)
2814 		+ std::max((int32)sInactivePagesTarget - (int32)maxToScan, (int32)0);
2815 	if (pagesToDeactivate <= 0)
2816 		return;
2817 
2818 	bigtime_t time = system_time();
2819 	uint32 pagesAccessed = 0;
2820 	uint32 pagesToInactive = 0;
2821 	uint32 pagesScanned = 0;
2822 
2823 	vm_page* nextPage = queue.Head();
2824 
2825 	while (pagesToDeactivate > 0 && maxToScan > 0) {
2826 		maxToScan--;
2827 
2828 		// get the next page
2829 		vm_page* page = nextPage;
2830 		if (page == NULL)
2831 			break;
2832 		nextPage = queue.Next(page);
2833 
2834 		if (page->busy)
2835 			continue;
2836 
2837 		// mark the position
2838 		queue.InsertAfter(page, &marker);
2839 		queueLocker.Unlock();
2840 
2841 		// lock the page's cache
2842 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2843 		if (cache == NULL || page->busy || page->State() != PAGE_STATE_ACTIVE) {
2844 			if (cache != NULL)
2845 				cache->ReleaseRefAndUnlock();
2846 			queueLocker.Lock();
2847 			nextPage = queue.Next(&marker);
2848 			queue.Remove(&marker);
2849 			continue;
2850 		}
2851 
2852 		pagesScanned++;
2853 
2854 		DEBUG_PAGE_ACCESS_START(page);
2855 
2856 		// Get the page active/modified flags and update the page's usage count.
2857 		int32 usageCount = vm_clear_page_mapping_accessed_flags(page);
2858 
2859 		if (usageCount > 0) {
2860 			usageCount += page->usage_count + kPageUsageAdvance;
2861 			if (usageCount > kPageUsageMax)
2862 				usageCount = kPageUsageMax;
2863 			pagesAccessed++;
2864 // TODO: This would probably also be the place to reclaim swap space.
2865 		} else {
2866 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2867 			if (usageCount <= 0) {
2868 				usageCount = 0;
2869 				set_page_state(page, PAGE_STATE_INACTIVE);
2870 				pagesToInactive++;
2871 			}
2872 		}
2873 
2874 		page->usage_count = usageCount;
2875 
2876 		DEBUG_PAGE_ACCESS_END(page);
2877 
2878 		cache->ReleaseRefAndUnlock();
2879 
2880 		// remove the marker
2881 		queueLocker.Lock();
2882 		nextPage = queue.Next(&marker);
2883 		queue.Remove(&marker);
2884 	}
2885 
2886 	time = system_time() - time;
2887 	TRACE_DAEMON("  ->   active scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32
2888 		", moved: %" B_PRIu32 " -> inactive, encountered %" B_PRIu32 " accessed"
2889 		" ones\n", time, pagesScanned, pagesToInactive, pagesAccessed);
2890 }
2891 
2892 
2893 static void
2894 page_daemon_idle_scan(page_stats& pageStats)
2895 {
2896 	TRACE_DAEMON("page daemon: idle run\n");
2897 
2898 	if (pageStats.totalFreePages < (int32)sFreePagesTarget) {
2899 		// We want more actually free pages, so free some from the cached
2900 		// ones.
2901 		uint32 freed = free_cached_pages(
2902 			sFreePagesTarget - pageStats.totalFreePages, false);
2903 		if (freed > 0)
2904 			unreserve_pages(freed);
2905 		get_page_stats(pageStats);
2906 	}
2907 
2908 	// Walk the active list and move pages to the inactive queue.
2909 	get_page_stats(pageStats);
2910 	idle_scan_active_pages(pageStats);
2911 }
2912 
2913 
2914 static void
2915 page_daemon_full_scan(page_stats& pageStats, int32 despairLevel)
2916 {
2917 	TRACE_DAEMON("page daemon: full run: free: %" B_PRIu32 ", cached: %"
2918 		B_PRIu32 ", to free: %" B_PRIu32 "\n", pageStats.totalFreePages,
2919 		pageStats.cachedPages, pageStats.unsatisfiedReservations
2920 			+ sFreeOrCachedPagesTarget
2921 			- (pageStats.totalFreePages + pageStats.cachedPages));
2922 
2923 	// Walk the inactive list and transfer pages to the cached and modified
2924 	// queues.
2925 	full_scan_inactive_pages(pageStats, despairLevel);
2926 
2927 	// Free cached pages. Also wake up reservation waiters.
2928 	get_page_stats(pageStats);
2929 	int32 pagesToFree = pageStats.unsatisfiedReservations + sFreePagesTarget
2930 		- (pageStats.totalFreePages);
2931 	if (pagesToFree > 0) {
2932 		uint32 freed = free_cached_pages(pagesToFree, true);
2933 		if (freed > 0)
2934 			unreserve_pages(freed);
2935 	}
2936 
2937 	// Walk the active list and move pages to the inactive queue.
2938 	get_page_stats(pageStats);
2939 	full_scan_active_pages(pageStats, despairLevel);
2940 }
2941 
2942 
2943 static status_t
2944 page_daemon(void* /*unused*/)
2945 {
2946 	int32 despairLevel = 0;
2947 
2948 	while (true) {
2949 		sPageDaemonCondition.ClearActivated();
2950 
2951 		// evaluate the free pages situation
2952 		page_stats pageStats;
2953 		get_page_stats(pageStats);
2954 
2955 		if (!do_active_paging(pageStats)) {
2956 			// Things look good -- just maintain statistics and keep the pool
2957 			// of actually free pages full enough.
2958 			despairLevel = 0;
2959 			page_daemon_idle_scan(pageStats);
2960 			sPageDaemonCondition.Wait(kIdleScanWaitInterval, false);
2961 		} else {
2962 			// Not enough free pages. We need to do some real work.
2963 			despairLevel = std::max(despairLevel + 1, (int32)3);
2964 			page_daemon_full_scan(pageStats, despairLevel);
2965 
2966 			// Don't wait after the first full scan, but rather immediately
2967 			// check whether we were successful in freeing enough pages and
2968 			// re-run with increased despair level. The first scan is
2969 			// conservative with respect to moving inactive modified pages to
2970 			// the modified list to avoid thrashing. The second scan, however,
2971 			// will not hold back.
2972 			if (despairLevel > 1)
2973 				snooze(kBusyScanWaitInterval);
2974 		}
2975 	}
2976 
2977 	return B_OK;
2978 }
2979 
2980 
2981 /*!	Returns how many pages could *not* be reserved.
2982 */
2983 static uint32
2984 reserve_pages(uint32 count, int priority, bool dontWait)
2985 {
2986 	int32 dontTouch = kPageReserveForPriority[priority];
2987 
2988 	while (true) {
2989 		count -= reserve_some_pages(count, dontTouch);
2990 		if (count == 0)
2991 			return 0;
2992 
2993 		if (sUnsatisfiedPageReservations == 0) {
2994 			count -= free_cached_pages(count, dontWait);
2995 			if (count == 0)
2996 				return count;
2997 		}
2998 
2999 		if (dontWait)
3000 			return count;
3001 
3002 		// we need to wait for pages to become available
3003 
3004 		MutexLocker pageDeficitLocker(sPageDeficitLock);
3005 
3006 		bool notifyDaemon = sUnsatisfiedPageReservations == 0;
3007 		sUnsatisfiedPageReservations += count;
3008 
3009 		if (sUnreservedFreePages > dontTouch) {
3010 			// the situation changed
3011 			sUnsatisfiedPageReservations -= count;
3012 			continue;
3013 		}
3014 
3015 		PageReservationWaiter waiter;
3016 		waiter.dontTouch = dontTouch;
3017 		waiter.missing = count;
3018 		waiter.thread = thread_get_current_thread();
3019 		waiter.threadPriority = waiter.thread->priority;
3020 
3021 		// insert ordered (i.e. after all waiters with higher or equal priority)
3022 		PageReservationWaiter* otherWaiter = NULL;
3023 		for (PageReservationWaiterList::Iterator it
3024 				= sPageReservationWaiters.GetIterator();
3025 			(otherWaiter = it.Next()) != NULL;) {
3026 			if (waiter < *otherWaiter)
3027 				break;
3028 		}
3029 
3030 		sPageReservationWaiters.InsertBefore(otherWaiter, &waiter);
3031 
3032 		thread_prepare_to_block(waiter.thread, 0, THREAD_BLOCK_TYPE_OTHER,
3033 			"waiting for pages");
3034 
3035 		if (notifyDaemon)
3036 			sPageDaemonCondition.WakeUp();
3037 
3038 		pageDeficitLocker.Unlock();
3039 
3040 		low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0);
3041 		thread_block();
3042 
3043 		pageDeficitLocker.Lock();
3044 
3045 		return 0;
3046 	}
3047 }
3048 
3049 
3050 //	#pragma mark - private kernel API
3051 
3052 
3053 /*!	Writes a range of modified pages of a cache to disk.
3054 	You need to hold the VMCache lock when calling this function.
3055 	Note that the cache lock is released in this function.
3056 	\param cache The cache.
3057 	\param firstPage Offset (in page size units) of the first page in the range.
3058 	\param endPage End offset (in page size units) of the page range. The page
3059 		at this offset is not included.
3060 */
3061 status_t
3062 vm_page_write_modified_page_range(struct VMCache* cache, uint32 firstPage,
3063 	uint32 endPage)
3064 {
3065 	static const int32 kMaxPages = 256;
3066 	int32 maxPages = cache->MaxPagesPerWrite();
3067 	if (maxPages < 0 || maxPages > kMaxPages)
3068 		maxPages = kMaxPages;
3069 
3070 	const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
3071 		| HEAP_DONT_LOCK_KERNEL_SPACE;
3072 
3073 	PageWriteWrapper stackWrappersPool[2];
3074 	PageWriteWrapper* stackWrappers[1];
3075 	PageWriteWrapper* wrapperPool
3076 		= new(malloc_flags(allocationFlags)) PageWriteWrapper[maxPages + 1];
3077 	PageWriteWrapper** wrappers
3078 		= new(malloc_flags(allocationFlags)) PageWriteWrapper*[maxPages];
3079 	if (wrapperPool == NULL || wrappers == NULL) {
3080 		// don't fail, just limit our capabilities
3081 		free(wrapperPool);
3082 		free(wrappers);
3083 		wrapperPool = stackWrappersPool;
3084 		wrappers = stackWrappers;
3085 		maxPages = 1;
3086 	}
3087 
3088 	int32 nextWrapper = 0;
3089 	int32 usedWrappers = 0;
3090 
3091 	PageWriteTransfer transfer;
3092 	bool transferEmpty = true;
3093 
3094 	VMCachePagesTree::Iterator it
3095 		= cache->pages.GetIterator(firstPage, true, true);
3096 
3097 	while (true) {
3098 		vm_page* page = it.Next();
3099 		if (page == NULL || page->cache_offset >= endPage) {
3100 			if (transferEmpty)
3101 				break;
3102 
3103 			page = NULL;
3104 		}
3105 
3106 		if (page != NULL) {
3107 			if (page->busy
3108 				|| (page->State() != PAGE_STATE_MODIFIED
3109 					&& !vm_test_map_modification(page))) {
3110 				page = NULL;
3111 			}
3112 		}
3113 
3114 		PageWriteWrapper* wrapper = NULL;
3115 		if (page != NULL) {
3116 			wrapper = &wrapperPool[nextWrapper++];
3117 			if (nextWrapper > maxPages)
3118 				nextWrapper = 0;
3119 
3120 			DEBUG_PAGE_ACCESS_START(page);
3121 
3122 			wrapper->SetTo(page);
3123 
3124 			if (transferEmpty || transfer.AddPage(page)) {
3125 				if (transferEmpty) {
3126 					transfer.SetTo(NULL, page, maxPages);
3127 					transferEmpty = false;
3128 				}
3129 
3130 				DEBUG_PAGE_ACCESS_END(page);
3131 
3132 				wrappers[usedWrappers++] = wrapper;
3133 				continue;
3134 			}
3135 
3136 			DEBUG_PAGE_ACCESS_END(page);
3137 		}
3138 
3139 		if (transferEmpty)
3140 			continue;
3141 
3142 		cache->Unlock();
3143 		status_t status = transfer.Schedule(0);
3144 		cache->Lock();
3145 
3146 		for (int32 i = 0; i < usedWrappers; i++)
3147 			wrappers[i]->Done(status);
3148 
3149 		usedWrappers = 0;
3150 
3151 		if (page != NULL) {
3152 			transfer.SetTo(NULL, page, maxPages);
3153 			wrappers[usedWrappers++] = wrapper;
3154 		} else
3155 			transferEmpty = true;
3156 	}
3157 
3158 	if (wrapperPool != stackWrappersPool) {
3159 		delete[] wrapperPool;
3160 		delete[] wrappers;
3161 	}
3162 
3163 	return B_OK;
3164 }
3165 
3166 
3167 /*!	You need to hold the VMCache lock when calling this function.
3168 	Note that the cache lock is released in this function.
3169 */
3170 status_t
3171 vm_page_write_modified_pages(VMCache *cache)
3172 {
3173 	return vm_page_write_modified_page_range(cache, 0,
3174 		(cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT);
3175 }
3176 
3177 
3178 /*!	Schedules the page writer to write back the specified \a page.
3179 	Note, however, that it might not do this immediately, and it can well
3180 	take several seconds until the page is actually written out.
3181 */
3182 void
3183 vm_page_schedule_write_page(vm_page *page)
3184 {
3185 	PAGE_ASSERT(page, page->State() == PAGE_STATE_MODIFIED);
3186 
3187 	vm_page_requeue(page, false);
3188 
3189 	sPageWriterCondition.WakeUp();
3190 }
3191 
3192 
3193 /*!	Cache must be locked.
3194 */
3195 void
3196 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage,
3197 	uint32 endPage)
3198 {
3199 	uint32 modified = 0;
3200 	for (VMCachePagesTree::Iterator it
3201 				= cache->pages.GetIterator(firstPage, true, true);
3202 			vm_page *page = it.Next();) {
3203 		if (page->cache_offset >= endPage)
3204 			break;
3205 
3206 		if (!page->busy && page->State() == PAGE_STATE_MODIFIED) {
3207 			DEBUG_PAGE_ACCESS_START(page);
3208 			vm_page_requeue(page, false);
3209 			modified++;
3210 			DEBUG_PAGE_ACCESS_END(page);
3211 		}
3212 	}
3213 
3214 	if (modified > 0)
3215 		sPageWriterCondition.WakeUp();
3216 }
3217 
3218 
3219 void
3220 vm_page_init_num_pages(kernel_args *args)
3221 {
3222 	// calculate the size of memory by looking at the physical_memory_range array
3223 	sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE;
3224 	page_num_t physicalPagesEnd = sPhysicalPageOffset
3225 		+ args->physical_memory_range[0].size / B_PAGE_SIZE;
3226 
3227 	sNonExistingPages = 0;
3228 	sIgnoredPages = args->ignored_physical_memory / B_PAGE_SIZE;
3229 
3230 	for (uint32 i = 1; i < args->num_physical_memory_ranges; i++) {
3231 		page_num_t start = args->physical_memory_range[i].start / B_PAGE_SIZE;
3232 		if (start > physicalPagesEnd)
3233 			sNonExistingPages += start - physicalPagesEnd;
3234 		physicalPagesEnd = start
3235 			+ args->physical_memory_range[i].size / B_PAGE_SIZE;
3236 
3237 #ifdef LIMIT_AVAILABLE_MEMORY
3238 		page_num_t available
3239 			= physicalPagesEnd - sPhysicalPageOffset - sNonExistingPages;
3240 		if (available > LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE)) {
3241 			physicalPagesEnd = sPhysicalPageOffset + sNonExistingPages
3242 				+ LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE);
3243 			break;
3244 		}
3245 #endif
3246 	}
3247 
3248 	TRACE(("first phys page = %#" B_PRIxPHYSADDR ", end %#" B_PRIxPHYSADDR "\n",
3249 		sPhysicalPageOffset, physicalPagesEnd));
3250 
3251 	sNumPages = physicalPagesEnd - sPhysicalPageOffset;
3252 }
3253 
3254 
3255 status_t
3256 vm_page_init(kernel_args *args)
3257 {
3258 	TRACE(("vm_page_init: entry\n"));
3259 
3260 	// init page queues
3261 	sModifiedPageQueue.Init("modified pages queue");
3262 	sInactivePageQueue.Init("inactive pages queue");
3263 	sActivePageQueue.Init("active pages queue");
3264 	sCachedPageQueue.Init("cached pages queue");
3265 	sFreePageQueue.Init("free pages queue");
3266 	sClearPageQueue.Init("clear pages queue");
3267 
3268 	new (&sPageReservationWaiters) PageReservationWaiterList;
3269 
3270 	// map in the new free page table
3271 	sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page),
3272 		~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3273 
3274 	TRACE(("vm_init: putting free_page_table @ %p, # ents %" B_PRIuPHYSADDR
3275 		" (size %#" B_PRIxPHYSADDR ")\n", sPages, sNumPages,
3276 		(phys_addr_t)(sNumPages * sizeof(vm_page))));
3277 
3278 	// initialize the free page table
3279 	for (uint32 i = 0; i < sNumPages; i++) {
3280 		sPages[i].Init(sPhysicalPageOffset + i);
3281 		sFreePageQueue.Append(&sPages[i]);
3282 
3283 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3284 		sPages[i].allocation_tracking_info.Clear();
3285 #endif
3286 	}
3287 
3288 	sUnreservedFreePages = sNumPages;
3289 
3290 	TRACE(("initialized table\n"));
3291 
3292 	// mark the ranges between usable physical memory unused
3293 	phys_addr_t previousEnd = 0;
3294 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3295 		phys_addr_t base = args->physical_memory_range[i].start;
3296 		phys_size_t size = args->physical_memory_range[i].size;
3297 		if (base > previousEnd) {
3298 			mark_page_range_in_use(previousEnd / B_PAGE_SIZE,
3299 				(base - previousEnd) / B_PAGE_SIZE, false);
3300 		}
3301 		previousEnd = base + size;
3302 	}
3303 
3304 	// mark the allocated physical page ranges wired
3305 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3306 		mark_page_range_in_use(
3307 			args->physical_allocated_range[i].start / B_PAGE_SIZE,
3308 			args->physical_allocated_range[i].size / B_PAGE_SIZE, true);
3309 	}
3310 
3311 	// The target of actually free pages. This must be at least the system
3312 	// reserve, but should be a few more pages, so we don't have to extract
3313 	// a cached page with each allocation.
3314 	sFreePagesTarget = VM_PAGE_RESERVE_USER
3315 		+ std::max((page_num_t)32, (sNumPages - sNonExistingPages) / 1024);
3316 
3317 	// The target of free + cached and inactive pages. On low-memory machines
3318 	// keep things tight. free + cached is the pool of immediately allocatable
3319 	// pages. We want a few inactive pages, so when we're actually paging, we
3320 	// have a reasonably large set of pages to work with.
3321 	if (sUnreservedFreePages < 16 * 1024) {
3322 		sFreeOrCachedPagesTarget = sFreePagesTarget + 128;
3323 		sInactivePagesTarget = sFreePagesTarget / 3;
3324 	} else {
3325 		sFreeOrCachedPagesTarget = 2 * sFreePagesTarget;
3326 		sInactivePagesTarget = sFreePagesTarget / 2;
3327 	}
3328 
3329 	TRACE(("vm_page_init: exit\n"));
3330 
3331 	return B_OK;
3332 }
3333 
3334 
3335 status_t
3336 vm_page_init_post_area(kernel_args *args)
3337 {
3338 	void *dummy;
3339 
3340 	dummy = sPages;
3341 	create_area("page structures", &dummy, B_EXACT_ADDRESS,
3342 		PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED,
3343 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3344 
3345 	add_debugger_command("page_stats", &dump_page_stats,
3346 		"Dump statistics about page usage");
3347 	add_debugger_command_etc("page", &dump_page,
3348 		"Dump page info",
3349 		"[ \"-p\" | \"-v\" ] [ \"-m\" ] <address>\n"
3350 		"Prints information for the physical page. If neither \"-p\" nor\n"
3351 		"\"-v\" are given, the provided address is interpreted as address of\n"
3352 		"the vm_page data structure for the page in question. If \"-p\" is\n"
3353 		"given, the address is the physical address of the page. If \"-v\" is\n"
3354 		"given, the address is interpreted as virtual address in the current\n"
3355 		"thread's address space and for the page it is mapped to (if any)\n"
3356 		"information are printed. If \"-m\" is specified, the command will\n"
3357 		"search all known address spaces for mappings to that page and print\n"
3358 		"them.\n", 0);
3359 	add_debugger_command("page_queue", &dump_page_queue, "Dump page queue");
3360 	add_debugger_command("find_page", &find_page,
3361 		"Find out which queue a page is actually in");
3362 
3363 #ifdef TRACK_PAGE_USAGE_STATS
3364 	add_debugger_command_etc("page_usage", &dump_page_usage_stats,
3365 		"Dumps statistics about page usage counts",
3366 		"\n"
3367 		"Dumps statistics about page usage counts.\n",
3368 		B_KDEBUG_DONT_PARSE_ARGUMENTS);
3369 #endif
3370 
3371 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3372 	add_debugger_command_etc("page_allocations_per_caller",
3373 		&dump_page_allocations_per_caller,
3374 		"Dump current page allocations summed up per caller",
3375 		"[ -d <caller> ] [ -r ]\n"
3376 		"The current allocations will by summed up by caller (their count)\n"
3377 		"printed in decreasing order by count.\n"
3378 		"If \"-d\" is given, each allocation for caller <caller> is printed\n"
3379 		"including the respective stack trace.\n"
3380 		"If \"-r\" is given, the allocation infos are reset after gathering\n"
3381 		"the information, so the next command invocation will only show the\n"
3382 		"allocations made after the reset.\n", 0);
3383 	add_debugger_command_etc("page_allocation_infos",
3384 		&dump_page_allocation_infos,
3385 		"Dump current page allocations",
3386 		"[ --stacktrace ] [ -p <page number> ] [ --team <team ID> ] "
3387 		"[ --thread <thread ID> ]\n"
3388 		"The current allocations filtered by optional values will be printed.\n"
3389 		"The optional \"-p\" page number filters for a specific page,\n"
3390 		"with \"--team\" and \"--thread\" allocations by specific teams\n"
3391 		"and/or threads can be filtered (these only work if a corresponding\n"
3392 		"tracing entry is still available).\n"
3393 		"If \"--stacktrace\" is given, then stack traces of the allocation\n"
3394 		"callers are printed, where available\n", 0);
3395 #endif
3396 
3397 	return B_OK;
3398 }
3399 
3400 
3401 status_t
3402 vm_page_init_post_thread(kernel_args *args)
3403 {
3404 	new (&sFreePageCondition) ConditionVariable;
3405 	sFreePageCondition.Publish(&sFreePageQueue, "free page");
3406 
3407 	// create a kernel thread to clear out pages
3408 
3409 	thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber",
3410 		B_LOWEST_ACTIVE_PRIORITY, NULL);
3411 	resume_thread(thread);
3412 
3413 	// start page writer
3414 
3415 	sPageWriterCondition.Init("page writer");
3416 
3417 	thread = spawn_kernel_thread(&page_writer, "page writer",
3418 		B_NORMAL_PRIORITY + 1, NULL);
3419 	resume_thread(thread);
3420 
3421 	// start page daemon
3422 
3423 	sPageDaemonCondition.Init("page daemon");
3424 
3425 	thread = spawn_kernel_thread(&page_daemon, "page daemon",
3426 		B_NORMAL_PRIORITY, NULL);
3427 	resume_thread(thread);
3428 
3429 	return B_OK;
3430 }
3431 
3432 
3433 status_t
3434 vm_mark_page_inuse(page_num_t page)
3435 {
3436 	return vm_mark_page_range_inuse(page, 1);
3437 }
3438 
3439 
3440 status_t
3441 vm_mark_page_range_inuse(page_num_t startPage, page_num_t length)
3442 {
3443 	return mark_page_range_in_use(startPage, length, false);
3444 }
3445 
3446 
3447 /*!	Unreserve pages previously reserved with vm_page_reserve_pages().
3448 */
3449 void
3450 vm_page_unreserve_pages(vm_page_reservation* reservation)
3451 {
3452 	uint32 count = reservation->count;
3453 	reservation->count = 0;
3454 
3455 	if (count == 0)
3456 		return;
3457 
3458 	TA(UnreservePages(count));
3459 
3460 	unreserve_pages(count);
3461 }
3462 
3463 
3464 /*!	With this call, you can reserve a number of free pages in the system.
3465 	They will only be handed out to someone who has actually reserved them.
3466 	This call returns as soon as the number of requested pages has been
3467 	reached.
3468 	The caller must not hold any cache lock or the function might deadlock.
3469 */
3470 void
3471 vm_page_reserve_pages(vm_page_reservation* reservation, uint32 count,
3472 	int priority)
3473 {
3474 	reservation->count = count;
3475 
3476 	if (count == 0)
3477 		return;
3478 
3479 	TA(ReservePages(count));
3480 
3481 	reserve_pages(count, priority, false);
3482 }
3483 
3484 
3485 bool
3486 vm_page_try_reserve_pages(vm_page_reservation* reservation, uint32 count,
3487 	int priority)
3488 {
3489 	if (count == 0) {
3490 		reservation->count = count;
3491 		return true;
3492 	}
3493 
3494 	uint32 remaining = reserve_pages(count, priority, true);
3495 	if (remaining == 0) {
3496 		TA(ReservePages(count));
3497 		reservation->count = count;
3498 		return true;
3499 	}
3500 
3501 	unreserve_pages(count - remaining);
3502 
3503 	return false;
3504 }
3505 
3506 
3507 vm_page *
3508 vm_page_allocate_page(vm_page_reservation* reservation, uint32 flags)
3509 {
3510 	uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3511 	ASSERT(pageState != PAGE_STATE_FREE);
3512 	ASSERT(pageState != PAGE_STATE_CLEAR);
3513 
3514 	ASSERT(reservation->count > 0);
3515 	reservation->count--;
3516 
3517 	VMPageQueue* queue;
3518 	VMPageQueue* otherQueue;
3519 
3520 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3521 		queue = &sClearPageQueue;
3522 		otherQueue = &sFreePageQueue;
3523 	} else {
3524 		queue = &sFreePageQueue;
3525 		otherQueue = &sClearPageQueue;
3526 	}
3527 
3528 	ReadLocker locker(sFreePageQueuesLock);
3529 
3530 	vm_page* page = queue->RemoveHeadUnlocked();
3531 	if (page == NULL) {
3532 		// if the primary queue was empty, grab the page from the
3533 		// secondary queue
3534 		page = otherQueue->RemoveHeadUnlocked();
3535 
3536 		if (page == NULL) {
3537 			// Unlikely, but possible: the page we have reserved has moved
3538 			// between the queues after we checked the first queue. Grab the
3539 			// write locker to make sure this doesn't happen again.
3540 			locker.Unlock();
3541 			WriteLocker writeLocker(sFreePageQueuesLock);
3542 
3543 			page = queue->RemoveHead();
3544 			if (page == NULL)
3545 				otherQueue->RemoveHead();
3546 
3547 			if (page == NULL) {
3548 				panic("Had reserved page, but there is none!");
3549 				return NULL;
3550 			}
3551 
3552 			// downgrade to read lock
3553 			locker.Lock();
3554 		}
3555 	}
3556 
3557 	if (page->CacheRef() != NULL)
3558 		panic("supposed to be free page %p has cache\n", page);
3559 
3560 	DEBUG_PAGE_ACCESS_START(page);
3561 
3562 	int oldPageState = page->State();
3563 	page->SetState(pageState);
3564 	page->busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3565 	page->usage_count = 0;
3566 	page->accessed = false;
3567 	page->modified = false;
3568 
3569 	locker.Unlock();
3570 
3571 	if (pageState < PAGE_STATE_FIRST_UNQUEUED)
3572 		sPageQueues[pageState].AppendUnlocked(page);
3573 
3574 	// clear the page, if we had to take it from the free queue and a clear
3575 	// page was requested
3576 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0 && oldPageState != PAGE_STATE_CLEAR)
3577 		clear_page(page);
3578 
3579 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3580 	page->allocation_tracking_info.Init(
3581 		TA(AllocatePage(page->physical_page_number)));
3582 #else
3583 	TA(AllocatePage(page->physical_page_number));
3584 #endif
3585 
3586 	return page;
3587 }
3588 
3589 
3590 static void
3591 allocate_page_run_cleanup(VMPageQueue::PageList& freePages,
3592 	VMPageQueue::PageList& clearPages)
3593 {
3594 	while (vm_page* page = freePages.RemoveHead()) {
3595 		page->busy = false;
3596 		page->SetState(PAGE_STATE_FREE);
3597 		DEBUG_PAGE_ACCESS_END(page);
3598 		sFreePageQueue.PrependUnlocked(page);
3599 	}
3600 
3601 	while (vm_page* page = clearPages.RemoveHead()) {
3602 		page->busy = false;
3603 		page->SetState(PAGE_STATE_CLEAR);
3604 		DEBUG_PAGE_ACCESS_END(page);
3605 		sClearPageQueue.PrependUnlocked(page);
3606 	}
3607 }
3608 
3609 
3610 /*!	Tries to allocate the a contiguous run of \a length pages starting at
3611 	index \a start.
3612 
3613 	The caller must have write-locked the free/clear page queues. The function
3614 	will unlock regardless of whether it succeeds or fails.
3615 
3616 	If the function fails, it cleans up after itself, i.e. it will free all
3617 	pages it managed to allocate.
3618 
3619 	\param start The start index (into \c sPages) of the run.
3620 	\param length The number of pages to allocate.
3621 	\param flags Page allocation flags. Encodes the state the function shall
3622 		set the allocated pages to, whether the pages shall be marked busy
3623 		(VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared
3624 		(VM_PAGE_ALLOC_CLEAR).
3625 	\param freeClearQueueLocker Locked WriteLocker for the free/clear page
3626 		queues in locked state. Will be unlocked by the function.
3627 	\return The index of the first page that could not be allocated. \a length
3628 		is returned when the function was successful.
3629 */
3630 static page_num_t
3631 allocate_page_run(page_num_t start, page_num_t length, uint32 flags,
3632 	WriteLocker& freeClearQueueLocker)
3633 {
3634 	uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3635 	ASSERT(pageState != PAGE_STATE_FREE);
3636 	ASSERT(pageState != PAGE_STATE_CLEAR);
3637 	ASSERT(start + length <= sNumPages);
3638 
3639 	// Pull the free/clear pages out of their respective queues. Cached pages
3640 	// are allocated later.
3641 	page_num_t cachedPages = 0;
3642 	VMPageQueue::PageList freePages;
3643 	VMPageQueue::PageList clearPages;
3644 	page_num_t i = 0;
3645 	for (; i < length; i++) {
3646 		bool pageAllocated = true;
3647 		bool noPage = false;
3648 		vm_page& page = sPages[start + i];
3649 		switch (page.State()) {
3650 			case PAGE_STATE_CLEAR:
3651 				DEBUG_PAGE_ACCESS_START(&page);
3652 				sClearPageQueue.Remove(&page);
3653 				clearPages.Add(&page);
3654 				break;
3655 			case PAGE_STATE_FREE:
3656 				DEBUG_PAGE_ACCESS_START(&page);
3657 				sFreePageQueue.Remove(&page);
3658 				freePages.Add(&page);
3659 				break;
3660 			case PAGE_STATE_CACHED:
3661 				// We allocate cached pages later.
3662 				cachedPages++;
3663 				pageAllocated = false;
3664 				break;
3665 
3666 			default:
3667 				// Probably a page was cached when our caller checked. Now it's
3668 				// gone and we have to abort.
3669 				noPage = true;
3670 				break;
3671 		}
3672 
3673 		if (noPage)
3674 			break;
3675 
3676 		if (pageAllocated) {
3677 			page.SetState(flags & VM_PAGE_ALLOC_STATE);
3678 			page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3679 			page.usage_count = 0;
3680 			page.accessed = false;
3681 			page.modified = false;
3682 		}
3683 	}
3684 
3685 	if (i < length) {
3686 		// failed to allocate a page -- free all that we've got
3687 		allocate_page_run_cleanup(freePages, clearPages);
3688 		return i;
3689 	}
3690 
3691 	freeClearQueueLocker.Unlock();
3692 
3693 	if (cachedPages > 0) {
3694 		// allocate the pages that weren't free but cached
3695 		page_num_t freedCachedPages = 0;
3696 		page_num_t nextIndex = start;
3697 		vm_page* freePage = freePages.Head();
3698 		vm_page* clearPage = clearPages.Head();
3699 		while (cachedPages > 0) {
3700 			// skip, if we've already got the page
3701 			if (freePage != NULL && size_t(freePage - sPages) == nextIndex) {
3702 				freePage = freePages.GetNext(freePage);
3703 				nextIndex++;
3704 				continue;
3705 			}
3706 			if (clearPage != NULL && size_t(clearPage - sPages) == nextIndex) {
3707 				clearPage = clearPages.GetNext(clearPage);
3708 				nextIndex++;
3709 				continue;
3710 			}
3711 
3712 			// free the page, if it is still cached
3713 			vm_page& page = sPages[nextIndex];
3714 			if (!free_cached_page(&page, false)) {
3715 				// TODO: if the page turns out to have been freed already,
3716 				// there would be no need to fail
3717 				break;
3718 			}
3719 
3720 			page.SetState(flags & VM_PAGE_ALLOC_STATE);
3721 			page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3722 			page.usage_count = 0;
3723 			page.accessed = false;
3724 			page.modified = false;
3725 
3726 			freePages.InsertBefore(freePage, &page);
3727 			freedCachedPages++;
3728 			cachedPages--;
3729 			nextIndex++;
3730 		}
3731 
3732 		// If we have freed cached pages, we need to balance things.
3733 		if (freedCachedPages > 0)
3734 			unreserve_pages(freedCachedPages);
3735 
3736 		if (nextIndex - start < length) {
3737 			// failed to allocate all cached pages -- free all that we've got
3738 			freeClearQueueLocker.Lock();
3739 			allocate_page_run_cleanup(freePages, clearPages);
3740 			freeClearQueueLocker.Unlock();
3741 
3742 			return nextIndex - start;
3743 		}
3744 	}
3745 
3746 	// clear pages, if requested
3747 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3748 		for (VMPageQueue::PageList::Iterator it = freePages.GetIterator();
3749 				vm_page* page = it.Next();) {
3750  			clear_page(page);
3751 		}
3752 	}
3753 
3754 	// add pages to target queue
3755 	if (pageState < PAGE_STATE_FIRST_UNQUEUED) {
3756 		freePages.MoveFrom(&clearPages);
3757 		sPageQueues[pageState].AppendUnlocked(freePages, length);
3758 	}
3759 
3760 	// Note: We don't unreserve the pages since we pulled them out of the
3761 	// free/clear queues without adjusting sUnreservedFreePages.
3762 
3763 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3764 	AbstractTraceEntryWithStackTrace* traceEntry
3765 		= TA(AllocatePageRun(start, length));
3766 
3767 	for (page_num_t i = start; i < start + length; i++)
3768 		sPages[i].allocation_tracking_info.Init(traceEntry);
3769 #else
3770 	TA(AllocatePageRun(start, length));
3771 #endif
3772 
3773 	return length;
3774 }
3775 
3776 
3777 /*! Allocate a physically contiguous range of pages.
3778 
3779 	\param flags Page allocation flags. Encodes the state the function shall
3780 		set the allocated pages to, whether the pages shall be marked busy
3781 		(VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared
3782 		(VM_PAGE_ALLOC_CLEAR).
3783 	\param length The number of contiguous pages to allocate.
3784 	\param restrictions Restrictions to the physical addresses of the page run
3785 		to allocate, including \c low_address, the first acceptable physical
3786 		address where the page run may start, \c high_address, the last
3787 		acceptable physical address where the page run may end (i.e. it must
3788 		hold \code runStartAddress + length <= high_address \endcode),
3789 		\c alignment, the alignment of the page run start address, and
3790 		\c boundary, multiples of which the page run must not cross.
3791 		Values set to \c 0 are ignored.
3792 	\param priority The page reservation priority (as passed to
3793 		vm_page_reserve_pages()).
3794 	\return The first page of the allocated page run on success; \c NULL
3795 		when the allocation failed.
3796 */
3797 vm_page*
3798 vm_page_allocate_page_run(uint32 flags, page_num_t length,
3799 	const physical_address_restrictions* restrictions, int priority)
3800 {
3801 	// compute start and end page index
3802 	page_num_t requestedStart
3803 		= std::max(restrictions->low_address / B_PAGE_SIZE, sPhysicalPageOffset)
3804 			- sPhysicalPageOffset;
3805 	page_num_t start = requestedStart;
3806 	page_num_t end;
3807 	if (restrictions->high_address > 0) {
3808 		end = std::max(restrictions->high_address / B_PAGE_SIZE,
3809 				sPhysicalPageOffset)
3810 			- sPhysicalPageOffset;
3811 		end = std::min(end, sNumPages);
3812 	} else
3813 		end = sNumPages;
3814 
3815 	// compute alignment mask
3816 	page_num_t alignmentMask
3817 		= std::max(restrictions->alignment / B_PAGE_SIZE, (phys_addr_t)1) - 1;
3818 	ASSERT(((alignmentMask + 1) & alignmentMask) == 0);
3819 		// alignment must be a power of 2
3820 
3821 	// compute the boundary mask
3822 	uint32 boundaryMask = 0;
3823 	if (restrictions->boundary != 0) {
3824 		page_num_t boundary = restrictions->boundary / B_PAGE_SIZE;
3825 		// boundary must be a power of two and not less than alignment and
3826 		// length
3827 		ASSERT(((boundary - 1) & boundary) == 0);
3828 		ASSERT(boundary >= alignmentMask + 1);
3829 		ASSERT(boundary >= length);
3830 
3831 		boundaryMask = -boundary;
3832 	}
3833 
3834 	vm_page_reservation reservation;
3835 	vm_page_reserve_pages(&reservation, length, priority);
3836 
3837 	WriteLocker freeClearQueueLocker(sFreePageQueuesLock);
3838 
3839 	// First we try to get a run with free pages only. If that fails, we also
3840 	// consider cached pages. If there are only few free pages and many cached
3841 	// ones, the odds are that we won't find enough contiguous ones, so we skip
3842 	// the first iteration in this case.
3843 	int32 freePages = sUnreservedFreePages;
3844 	int useCached = freePages > 0 && (page_num_t)freePages > 2 * length ? 0 : 1;
3845 
3846 	for (;;) {
3847 		if (alignmentMask != 0 || boundaryMask != 0) {
3848 			page_num_t offsetStart = start + sPhysicalPageOffset;
3849 
3850 			// enforce alignment
3851 			if ((offsetStart & alignmentMask) != 0)
3852 				offsetStart = (offsetStart + alignmentMask) & ~alignmentMask;
3853 
3854 			// enforce boundary
3855 			if (boundaryMask != 0 && ((offsetStart ^ (offsetStart
3856 				+ length - 1)) & boundaryMask) != 0) {
3857 				offsetStart = (offsetStart + length - 1) & boundaryMask;
3858 			}
3859 
3860 			start = offsetStart - sPhysicalPageOffset;
3861 		}
3862 
3863 		if (start + length > end) {
3864 			if (useCached == 0) {
3865 				// The first iteration with free pages only was unsuccessful.
3866 				// Try again also considering cached pages.
3867 				useCached = 1;
3868 				start = requestedStart;
3869 				continue;
3870 			}
3871 
3872 			dprintf("vm_page_allocate_page_run(): Failed to allocate run of "
3873 				"length %" B_PRIuPHYSADDR " (%" B_PRIuPHYSADDR " %"
3874 				B_PRIuPHYSADDR ") in second iteration (align: %" B_PRIuPHYSADDR
3875 				" boundary: %" B_PRIuPHYSADDR ")!\n", length, requestedStart,
3876 				end, restrictions->alignment, restrictions->boundary);
3877 
3878 			freeClearQueueLocker.Unlock();
3879 			vm_page_unreserve_pages(&reservation);
3880 			return NULL;
3881 		}
3882 
3883 		bool foundRun = true;
3884 		page_num_t i;
3885 		for (i = 0; i < length; i++) {
3886 			uint32 pageState = sPages[start + i].State();
3887 			if (pageState != PAGE_STATE_FREE
3888 				&& pageState != PAGE_STATE_CLEAR
3889 				&& (pageState != PAGE_STATE_CACHED || useCached == 0)) {
3890 				foundRun = false;
3891 				break;
3892 			}
3893 		}
3894 
3895 		if (foundRun) {
3896 			i = allocate_page_run(start, length, flags, freeClearQueueLocker);
3897 			if (i == length)
3898 				return &sPages[start];
3899 
3900 			// apparently a cached page couldn't be allocated -- skip it and
3901 			// continue
3902 			freeClearQueueLocker.Lock();
3903 		}
3904 
3905 		start += i + 1;
3906 	}
3907 }
3908 
3909 
3910 vm_page *
3911 vm_page_at_index(int32 index)
3912 {
3913 	return &sPages[index];
3914 }
3915 
3916 
3917 vm_page *
3918 vm_lookup_page(page_num_t pageNumber)
3919 {
3920 	if (pageNumber < sPhysicalPageOffset)
3921 		return NULL;
3922 
3923 	pageNumber -= sPhysicalPageOffset;
3924 	if (pageNumber >= sNumPages)
3925 		return NULL;
3926 
3927 	return &sPages[pageNumber];
3928 }
3929 
3930 
3931 bool
3932 vm_page_is_dummy(struct vm_page *page)
3933 {
3934 	return page < sPages || page >= sPages + sNumPages;
3935 }
3936 
3937 
3938 /*!	Free the page that belonged to a certain cache.
3939 	You can use vm_page_set_state() manually if you prefer, but only
3940 	if the page does not equal PAGE_STATE_MODIFIED.
3941 */
3942 void
3943 vm_page_free(VMCache *cache, vm_page *page)
3944 {
3945 	PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
3946 		&& page->State() != PAGE_STATE_CLEAR);
3947 
3948 	if (page->State() == PAGE_STATE_MODIFIED && cache->temporary)
3949 		atomic_add(&sModifiedTemporaryPages, -1);
3950 
3951 	free_page(page, false);
3952 }
3953 
3954 
3955 void
3956 vm_page_set_state(vm_page *page, int pageState)
3957 {
3958 	PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
3959 		&& page->State() != PAGE_STATE_CLEAR);
3960 
3961 	if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR)
3962 		free_page(page, pageState == PAGE_STATE_CLEAR);
3963 	else
3964 		set_page_state(page, pageState);
3965 }
3966 
3967 
3968 /*!	Moves a page to either the tail of the head of its current queue,
3969 	depending on \a tail.
3970 	The page must have a cache and the cache must be locked!
3971 */
3972 void
3973 vm_page_requeue(struct vm_page *page, bool tail)
3974 {
3975 	PAGE_ASSERT(page, page->Cache() != NULL);
3976 	page->Cache()->AssertLocked();
3977 	// DEBUG_PAGE_ACCESS_CHECK(page);
3978 		// TODO: This assertion cannot be satisfied by idle_scan_active_pages()
3979 		// when it requeues busy pages. The reason is that vm_soft_fault()
3980 		// (respectively fault_get_page()) and the file cache keep newly
3981 		// allocated pages accessed while they are reading them from disk. It
3982 		// would probably be better to change that code and reenable this
3983 		// check.
3984 
3985 	VMPageQueue *queue = NULL;
3986 
3987 	switch (page->State()) {
3988 		case PAGE_STATE_ACTIVE:
3989 			queue = &sActivePageQueue;
3990 			break;
3991 		case PAGE_STATE_INACTIVE:
3992 			queue = &sInactivePageQueue;
3993 			break;
3994 		case PAGE_STATE_MODIFIED:
3995 			queue = &sModifiedPageQueue;
3996 			break;
3997 		case PAGE_STATE_CACHED:
3998 			queue = &sCachedPageQueue;
3999 			break;
4000 		case PAGE_STATE_FREE:
4001 		case PAGE_STATE_CLEAR:
4002 			panic("vm_page_requeue() called for free/clear page %p", page);
4003 			return;
4004 		case PAGE_STATE_WIRED:
4005 		case PAGE_STATE_UNUSED:
4006 			return;
4007 		default:
4008 			panic("vm_page_touch: vm_page %p in invalid state %d\n",
4009 				page, page->State());
4010 			break;
4011 	}
4012 
4013 	queue->RequeueUnlocked(page, tail);
4014 }
4015 
4016 
4017 page_num_t
4018 vm_page_num_pages(void)
4019 {
4020 	return sNumPages - sNonExistingPages;
4021 }
4022 
4023 
4024 /*! There is a subtle distinction between the page counts returned by
4025 	this function and vm_page_num_free_pages():
4026 	The latter returns the number of pages that are completely uncommitted,
4027 	whereas this one returns the number of pages that are available for
4028 	use by being reclaimed as well (IOW it factors in things like cache pages
4029 	as available).
4030 */
4031 page_num_t
4032 vm_page_num_available_pages(void)
4033 {
4034 	return vm_available_memory() / B_PAGE_SIZE;
4035 }
4036 
4037 
4038 page_num_t
4039 vm_page_num_free_pages(void)
4040 {
4041 	int32 count = sUnreservedFreePages + sCachedPageQueue.Count();
4042 	return count > 0 ? count : 0;
4043 }
4044 
4045 
4046 page_num_t
4047 vm_page_num_unused_pages(void)
4048 {
4049 	int32 count = sUnreservedFreePages;
4050 	return count > 0 ? count : 0;
4051 }
4052 
4053 
4054 void
4055 vm_page_get_stats(system_info *info)
4056 {
4057 	// Note: there's no locking protecting any of the queues or counters here,
4058 	// so we run the risk of getting bogus values when evaluating them
4059 	// throughout this function. As these stats are for informational purposes
4060 	// only, it is not really worth introducing such locking. Therefore we just
4061 	// ensure that we don't under- or overflow any of the values.
4062 
4063 	// The pages used for the block cache buffers. Those should not be counted
4064 	// as used but as cached pages.
4065 	// TODO: We should subtract the blocks that are in use ATM, since those
4066 	// can't really be freed in a low memory situation.
4067 	page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE;
4068 
4069 	// Non-temporary modified pages are special as they represent pages that
4070 	// can be written back, so they could be freed if necessary, for us
4071 	// basically making them into cached pages with a higher overhead. The
4072 	// modified queue count is therefore split into temporary and non-temporary
4073 	// counts that are then added to the corresponding number.
4074 	page_num_t modifiedNonTemporaryPages
4075 		= (sModifiedPageQueue.Count() - sModifiedTemporaryPages);
4076 
4077 	info->max_pages = vm_page_num_pages();
4078 	info->cached_pages = sCachedPageQueue.Count() + modifiedNonTemporaryPages
4079 		+ blockCachePages;
4080 
4081 	// max_pages is composed of:
4082 	//	active + inactive + unused + wired + modified + cached + free + clear
4083 	// So taking out the cached (including modified non-temporary), free and
4084 	// clear ones leaves us with all used pages.
4085 	int32 subtractPages = info->cached_pages + sFreePageQueue.Count()
4086 		+ sClearPageQueue.Count();
4087 	info->used_pages = subtractPages > info->max_pages
4088 		? 0 : info->max_pages - subtractPages;
4089 
4090 	if (info->used_pages + info->cached_pages > info->max_pages) {
4091 		// Something was shuffled around while we were summing up the counts.
4092 		// Make the values sane, preferring the worse case of more used pages.
4093 		info->cached_pages = info->max_pages - info->used_pages;
4094 	}
4095 
4096 	info->page_faults = vm_num_page_faults();
4097 	info->ignored_pages = sIgnoredPages;
4098 
4099 	// TODO: We don't consider pages used for page directories/tables yet.
4100 }
4101 
4102 
4103 /*!	Returns the greatest address within the last page of accessible physical
4104 	memory.
4105 	The value is inclusive, i.e. in case of a 32 bit phys_addr_t 0xffffffff
4106 	means the that the last page ends at exactly 4 GB.
4107 */
4108 phys_addr_t
4109 vm_page_max_address()
4110 {
4111 	return ((phys_addr_t)sPhysicalPageOffset + sNumPages) * B_PAGE_SIZE - 1;
4112 }
4113 
4114 
4115 RANGE_MARKER_FUNCTION_END(vm_page)
4116