xref: /haiku/src/system/kernel/vm/vm_page.cpp (revision fc7456e9b1ec38c941134ed6d01c438cf289381e)
1 /*
2  * Copyright 2010-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <string.h>
12 #include <stdlib.h>
13 
14 #include <algorithm>
15 
16 #include <KernelExport.h>
17 #include <OS.h>
18 
19 #include <AutoDeleter.h>
20 
21 #include <arch/cpu.h>
22 #include <arch/vm_translation_map.h>
23 #include <block_cache.h>
24 #include <boot/kernel_args.h>
25 #include <condition_variable.h>
26 #include <elf.h>
27 #include <heap.h>
28 #include <kernel.h>
29 #include <low_resource_manager.h>
30 #include <thread.h>
31 #include <tracing.h>
32 #include <util/AutoLock.h>
33 #include <vfs.h>
34 #include <vm/vm.h>
35 #include <vm/vm_priv.h>
36 #include <vm/vm_page.h>
37 #include <vm/VMAddressSpace.h>
38 #include <vm/VMArea.h>
39 #include <vm/VMCache.h>
40 
41 #include "IORequest.h"
42 #include "PageCacheLocker.h"
43 #include "VMAnonymousCache.h"
44 #include "VMPageQueue.h"
45 
46 
47 //#define TRACE_VM_PAGE
48 #ifdef TRACE_VM_PAGE
49 #	define TRACE(x) dprintf x
50 #else
51 #	define TRACE(x) ;
52 #endif
53 
54 //#define TRACE_VM_DAEMONS
55 #ifdef TRACE_VM_DAEMONS
56 #define TRACE_DAEMON(x...) dprintf(x)
57 #else
58 #define TRACE_DAEMON(x...) do {} while (false)
59 #endif
60 
61 //#define TRACK_PAGE_USAGE_STATS	1
62 
63 #define PAGE_ASSERT(page, condition)	\
64 	ASSERT_PRINT((condition), "page: %p", (page))
65 
66 #define SCRUB_SIZE 32
67 	// this many pages will be cleared at once in the page scrubber thread
68 
69 #define MAX_PAGE_WRITER_IO_PRIORITY				B_URGENT_DISPLAY_PRIORITY
70 	// maximum I/O priority of the page writer
71 #define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD	10000
72 	// the maximum I/O priority shall be reached when this many pages need to
73 	// be written
74 
75 
76 // The page reserve an allocation of the certain priority must not touch.
77 static const size_t kPageReserveForPriority[] = {
78 	VM_PAGE_RESERVE_USER,		// user
79 	VM_PAGE_RESERVE_SYSTEM,		// system
80 	0							// VIP
81 };
82 
83 // Minimum number of free pages the page daemon will try to achieve.
84 static uint32 sFreePagesTarget;
85 static uint32 sFreeOrCachedPagesTarget;
86 static uint32 sInactivePagesTarget;
87 
88 // Wait interval between page daemon runs.
89 static const bigtime_t kIdleScanWaitInterval = 1000000LL;	// 1 sec
90 static const bigtime_t kBusyScanWaitInterval = 500000LL;	// 0.5 sec
91 
92 // Number of idle runs after which we want to have processed the full active
93 // queue.
94 static const uint32 kIdleRunsForFullQueue = 20;
95 
96 // Maximum limit for the vm_page::usage_count.
97 static const int32 kPageUsageMax = 64;
98 // vm_page::usage_count buff an accessed page receives in a scan.
99 static const int32 kPageUsageAdvance = 3;
100 // vm_page::usage_count debuff an unaccessed page receives in a scan.
101 static const int32 kPageUsageDecline = 1;
102 
103 int32 gMappedPagesCount;
104 
105 static VMPageQueue sPageQueues[PAGE_STATE_COUNT];
106 
107 static VMPageQueue& sFreePageQueue = sPageQueues[PAGE_STATE_FREE];
108 static VMPageQueue& sClearPageQueue = sPageQueues[PAGE_STATE_CLEAR];
109 static VMPageQueue& sModifiedPageQueue = sPageQueues[PAGE_STATE_MODIFIED];
110 static VMPageQueue& sInactivePageQueue = sPageQueues[PAGE_STATE_INACTIVE];
111 static VMPageQueue& sActivePageQueue = sPageQueues[PAGE_STATE_ACTIVE];
112 static VMPageQueue& sCachedPageQueue = sPageQueues[PAGE_STATE_CACHED];
113 
114 static vm_page *sPages;
115 static page_num_t sPhysicalPageOffset;
116 static page_num_t sNumPages;
117 static page_num_t sNonExistingPages;
118 	// pages in the sPages array that aren't backed by physical memory
119 static uint64 sIgnoredPages;
120 	// pages of physical memory ignored by the boot loader (and thus not
121 	// available here)
122 static int32 sUnreservedFreePages;
123 static int32 sUnsatisfiedPageReservations;
124 static int32 sModifiedTemporaryPages;
125 
126 static ConditionVariable sFreePageCondition;
127 static mutex sPageDeficitLock = MUTEX_INITIALIZER("page deficit");
128 
129 // This lock must be used whenever the free or clear page queues are changed.
130 // If you need to work on both queues at the same time, you need to hold a write
131 // lock, otherwise, a read lock suffices (each queue still has a spinlock to
132 // guard against concurrent changes).
133 static rw_lock sFreePageQueuesLock
134 	= RW_LOCK_INITIALIZER("free/clear page queues");
135 
136 #ifdef TRACK_PAGE_USAGE_STATS
137 static page_num_t sPageUsageArrays[512];
138 static page_num_t* sPageUsage = sPageUsageArrays;
139 static page_num_t sPageUsagePageCount;
140 static page_num_t* sNextPageUsage = sPageUsageArrays + 256;
141 static page_num_t sNextPageUsagePageCount;
142 #endif
143 
144 
145 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
146 
147 struct caller_info {
148 	addr_t		caller;
149 	size_t		count;
150 };
151 
152 static const int32 kCallerInfoTableSize = 1024;
153 static caller_info sCallerInfoTable[kCallerInfoTableSize];
154 static int32 sCallerInfoCount = 0;
155 
156 static caller_info* get_caller_info(addr_t caller);
157 
158 
159 RANGE_MARKER_FUNCTION_PROTOTYPES(vm_page)
160 
161 static const addr_t kVMPageCodeAddressRange[] = {
162 	RANGE_MARKER_FUNCTION_ADDRESS_RANGE(vm_page)
163 };
164 
165 #endif
166 
167 
168 RANGE_MARKER_FUNCTION_BEGIN(vm_page)
169 
170 
171 struct page_stats {
172 	int32	totalFreePages;
173 	int32	unsatisfiedReservations;
174 	int32	cachedPages;
175 };
176 
177 
178 struct PageReservationWaiter
179 		: public DoublyLinkedListLinkImpl<PageReservationWaiter> {
180 	Thread*	thread;
181 	uint32	dontTouch;		// reserve not to touch
182 	uint32	missing;		// pages missing for the reservation
183 	int32	threadPriority;
184 
185 	bool operator<(const PageReservationWaiter& other) const
186 	{
187 		// Implies an order by descending VM priority (ascending dontTouch)
188 		// and (secondarily) descending thread priority.
189 		if (dontTouch != other.dontTouch)
190 			return dontTouch < other.dontTouch;
191 		return threadPriority > other.threadPriority;
192 	}
193 };
194 
195 typedef DoublyLinkedList<PageReservationWaiter> PageReservationWaiterList;
196 static PageReservationWaiterList sPageReservationWaiters;
197 
198 
199 struct DaemonCondition {
200 	void Init(const char* name)
201 	{
202 		mutex_init(&fLock, "daemon condition");
203 		fCondition.Init(this, name);
204 		fActivated = false;
205 	}
206 
207 	bool Lock()
208 	{
209 		return mutex_lock(&fLock) == B_OK;
210 	}
211 
212 	void Unlock()
213 	{
214 		mutex_unlock(&fLock);
215 	}
216 
217 	bool Wait(bigtime_t timeout, bool clearActivated)
218 	{
219 		MutexLocker locker(fLock);
220 		if (clearActivated)
221 			fActivated = false;
222 		else if (fActivated)
223 			return true;
224 
225 		ConditionVariableEntry entry;
226 		fCondition.Add(&entry);
227 
228 		locker.Unlock();
229 
230 		return entry.Wait(B_RELATIVE_TIMEOUT, timeout) == B_OK;
231 	}
232 
233 	void WakeUp()
234 	{
235 		if (fActivated)
236 			return;
237 
238 		MutexLocker locker(fLock);
239 		fActivated = true;
240 		fCondition.NotifyOne();
241 	}
242 
243 	void ClearActivated()
244 	{
245 		MutexLocker locker(fLock);
246 		fActivated = false;
247 	}
248 
249 private:
250 	mutex				fLock;
251 	ConditionVariable	fCondition;
252 	bool				fActivated;
253 };
254 
255 
256 static DaemonCondition sPageWriterCondition;
257 static DaemonCondition sPageDaemonCondition;
258 
259 
260 #if PAGE_ALLOCATION_TRACING
261 
262 namespace PageAllocationTracing {
263 
264 class ReservePages : public AbstractTraceEntry {
265 public:
266 	ReservePages(uint32 count)
267 		:
268 		fCount(count)
269 	{
270 		Initialized();
271 	}
272 
273 	virtual void AddDump(TraceOutput& out)
274 	{
275 		out.Print("page reserve:   %" B_PRIu32, fCount);
276 	}
277 
278 private:
279 	uint32		fCount;
280 };
281 
282 
283 class UnreservePages : public AbstractTraceEntry {
284 public:
285 	UnreservePages(uint32 count)
286 		:
287 		fCount(count)
288 	{
289 		Initialized();
290 	}
291 
292 	virtual void AddDump(TraceOutput& out)
293 	{
294 		out.Print("page unreserve: %" B_PRId32, fCount);
295 	}
296 
297 private:
298 	uint32		fCount;
299 };
300 
301 
302 class AllocatePage
303 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
304 public:
305 	AllocatePage(page_num_t pageNumber)
306 		:
307 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
308 		fPageNumber(pageNumber)
309 	{
310 		Initialized();
311 	}
312 
313 	virtual void AddDump(TraceOutput& out)
314 	{
315 		out.Print("page alloc: %#" B_PRIxPHYSADDR, fPageNumber);
316 	}
317 
318 private:
319 	page_num_t	fPageNumber;
320 };
321 
322 
323 class AllocatePageRun
324 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
325 public:
326 	AllocatePageRun(page_num_t startPage, uint32 length)
327 		:
328 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
329 		fStartPage(startPage),
330 		fLength(length)
331 	{
332 		Initialized();
333 	}
334 
335 	virtual void AddDump(TraceOutput& out)
336 	{
337 		out.Print("page alloc run: start %#" B_PRIxPHYSADDR " length: %"
338 			B_PRIu32, fStartPage, fLength);
339 	}
340 
341 private:
342 	page_num_t	fStartPage;
343 	uint32		fLength;
344 };
345 
346 
347 class FreePage
348 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
349 public:
350 	FreePage(page_num_t pageNumber)
351 		:
352 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
353 		fPageNumber(pageNumber)
354 	{
355 		Initialized();
356 	}
357 
358 	virtual void AddDump(TraceOutput& out)
359 	{
360 		out.Print("page free: %#" B_PRIxPHYSADDR, fPageNumber);
361 	}
362 
363 private:
364 	page_num_t	fPageNumber;
365 };
366 
367 
368 class ScrubbingPages : public AbstractTraceEntry {
369 public:
370 	ScrubbingPages(uint32 count)
371 		:
372 		fCount(count)
373 	{
374 		Initialized();
375 	}
376 
377 	virtual void AddDump(TraceOutput& out)
378 	{
379 		out.Print("page scrubbing: %" B_PRId32, fCount);
380 	}
381 
382 private:
383 	uint32		fCount;
384 };
385 
386 
387 class ScrubbedPages : public AbstractTraceEntry {
388 public:
389 	ScrubbedPages(uint32 count)
390 		:
391 		fCount(count)
392 	{
393 		Initialized();
394 	}
395 
396 	virtual void AddDump(TraceOutput& out)
397 	{
398 		out.Print("page scrubbed:  %" B_PRId32, fCount);
399 	}
400 
401 private:
402 	uint32		fCount;
403 };
404 
405 
406 class StolenPage : public AbstractTraceEntry {
407 public:
408 	StolenPage()
409 	{
410 		Initialized();
411 	}
412 
413 	virtual void AddDump(TraceOutput& out)
414 	{
415 		out.Print("page stolen");
416 	}
417 };
418 
419 }	// namespace PageAllocationTracing
420 
421 #	define TA(x)	new(std::nothrow) PageAllocationTracing::x
422 
423 #else
424 #	define TA(x)
425 #endif	// PAGE_ALLOCATION_TRACING
426 
427 
428 #if PAGE_DAEMON_TRACING
429 
430 namespace PageDaemonTracing {
431 
432 class ActivatePage : public AbstractTraceEntry {
433 	public:
434 		ActivatePage(vm_page* page)
435 			:
436 			fCache(page->cache),
437 			fPage(page)
438 		{
439 			Initialized();
440 		}
441 
442 		virtual void AddDump(TraceOutput& out)
443 		{
444 			out.Print("page activated:   %p, cache: %p", fPage, fCache);
445 		}
446 
447 	private:
448 		VMCache*	fCache;
449 		vm_page*	fPage;
450 };
451 
452 
453 class DeactivatePage : public AbstractTraceEntry {
454 	public:
455 		DeactivatePage(vm_page* page)
456 			:
457 			fCache(page->cache),
458 			fPage(page)
459 		{
460 			Initialized();
461 		}
462 
463 		virtual void AddDump(TraceOutput& out)
464 		{
465 			out.Print("page deactivated: %p, cache: %p", fPage, fCache);
466 		}
467 
468 	private:
469 		VMCache*	fCache;
470 		vm_page*	fPage;
471 };
472 
473 
474 class FreedPageSwap : public AbstractTraceEntry {
475 	public:
476 		FreedPageSwap(vm_page* page)
477 			:
478 			fCache(page->cache),
479 			fPage(page)
480 		{
481 			Initialized();
482 		}
483 
484 		virtual void AddDump(TraceOutput& out)
485 		{
486 			out.Print("page swap freed:  %p, cache: %p", fPage, fCache);
487 		}
488 
489 	private:
490 		VMCache*	fCache;
491 		vm_page*	fPage;
492 };
493 
494 }	// namespace PageDaemonTracing
495 
496 #	define TD(x)	new(std::nothrow) PageDaemonTracing::x
497 
498 #else
499 #	define TD(x)
500 #endif	// PAGE_DAEMON_TRACING
501 
502 
503 #if PAGE_WRITER_TRACING
504 
505 namespace PageWriterTracing {
506 
507 class WritePage : public AbstractTraceEntry {
508 	public:
509 		WritePage(vm_page* page)
510 			:
511 			fCache(page->Cache()),
512 			fPage(page)
513 		{
514 			Initialized();
515 		}
516 
517 		virtual void AddDump(TraceOutput& out)
518 		{
519 			out.Print("page write: %p, cache: %p", fPage, fCache);
520 		}
521 
522 	private:
523 		VMCache*	fCache;
524 		vm_page*	fPage;
525 };
526 
527 }	// namespace PageWriterTracing
528 
529 #	define TPW(x)	new(std::nothrow) PageWriterTracing::x
530 
531 #else
532 #	define TPW(x)
533 #endif	// PAGE_WRITER_TRACING
534 
535 
536 #if PAGE_STATE_TRACING
537 
538 namespace PageStateTracing {
539 
540 class SetPageState : public AbstractTraceEntry {
541 	public:
542 		SetPageState(vm_page* page, uint8 newState)
543 			:
544 			fPage(page),
545 			fOldState(page->State()),
546 			fNewState(newState),
547 			fBusy(page->busy),
548 			fWired(page->WiredCount() > 0),
549 			fMapped(!page->mappings.IsEmpty()),
550 			fAccessed(page->accessed),
551 			fModified(page->modified)
552 		{
553 #if PAGE_STATE_TRACING_STACK_TRACE
554 			fStackTrace = capture_tracing_stack_trace(
555 				PAGE_STATE_TRACING_STACK_TRACE, 0, true);
556 				// Don't capture userland stack trace to avoid potential
557 				// deadlocks.
558 #endif
559 			Initialized();
560 		}
561 
562 #if PAGE_STATE_TRACING_STACK_TRACE
563 		virtual void DumpStackTrace(TraceOutput& out)
564 		{
565 			out.PrintStackTrace(fStackTrace);
566 		}
567 #endif
568 
569 		virtual void AddDump(TraceOutput& out)
570 		{
571 			out.Print("page set state: %p (%c%c%c%c%c): %s -> %s", fPage,
572 				fBusy ? 'b' : '-',
573 				fWired ? 'w' : '-',
574 				fMapped ? 'm' : '-',
575 				fAccessed ? 'a' : '-',
576 				fModified ? 'm' : '-',
577 				page_state_to_string(fOldState),
578 				page_state_to_string(fNewState));
579 		}
580 
581 	private:
582 		vm_page*	fPage;
583 #if PAGE_STATE_TRACING_STACK_TRACE
584 		tracing_stack_trace* fStackTrace;
585 #endif
586 		uint8		fOldState;
587 		uint8		fNewState;
588 		bool		fBusy : 1;
589 		bool		fWired : 1;
590 		bool		fMapped : 1;
591 		bool		fAccessed : 1;
592 		bool		fModified : 1;
593 };
594 
595 }	// namespace PageStateTracing
596 
597 #	define TPS(x)	new(std::nothrow) PageStateTracing::x
598 
599 #else
600 #	define TPS(x)
601 #endif	// PAGE_STATE_TRACING
602 
603 
604 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
605 
606 namespace BKernel {
607 
608 class AllocationTrackingCallback {
609 public:
610 	virtual						~AllocationTrackingCallback();
611 
612 	virtual	bool				ProcessTrackingInfo(
613 									AllocationTrackingInfo* info,
614 									page_num_t pageNumber) = 0;
615 };
616 
617 }
618 
619 using BKernel::AllocationTrackingCallback;
620 
621 
622 class AllocationCollectorCallback : public AllocationTrackingCallback {
623 public:
624 	AllocationCollectorCallback(bool resetInfos)
625 		:
626 		fResetInfos(resetInfos)
627 	{
628 	}
629 
630 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
631 		page_num_t pageNumber)
632 	{
633 		if (!info->IsInitialized())
634 			return true;
635 
636 		addr_t caller = 0;
637 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
638 
639 		if (traceEntry != NULL && info->IsTraceEntryValid()) {
640 			caller = tracing_find_caller_in_stack_trace(
641 				traceEntry->StackTrace(), kVMPageCodeAddressRange, 1);
642 		}
643 
644 		caller_info* callerInfo = get_caller_info(caller);
645 		if (callerInfo == NULL) {
646 			kprintf("out of space for caller infos\n");
647 			return false;
648 		}
649 
650 		callerInfo->count++;
651 
652 		if (fResetInfos)
653 			info->Clear();
654 
655 		return true;
656 	}
657 
658 private:
659 	bool	fResetInfos;
660 };
661 
662 
663 class AllocationInfoPrinterCallback : public AllocationTrackingCallback {
664 public:
665 	AllocationInfoPrinterCallback(bool printStackTrace, page_num_t pageFilter,
666 		team_id teamFilter, thread_id threadFilter)
667 		:
668 		fPrintStackTrace(printStackTrace),
669 		fPageFilter(pageFilter),
670 		fTeamFilter(teamFilter),
671 		fThreadFilter(threadFilter)
672 	{
673 	}
674 
675 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
676 		page_num_t pageNumber)
677 	{
678 		if (!info->IsInitialized())
679 			return true;
680 
681 		if (fPageFilter != 0 && pageNumber != fPageFilter)
682 			return true;
683 
684 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
685 		if (traceEntry != NULL && !info->IsTraceEntryValid())
686 			traceEntry = NULL;
687 
688 		if (traceEntry != NULL) {
689 			if (fTeamFilter != -1 && traceEntry->TeamID() != fTeamFilter)
690 				return true;
691 			if (fThreadFilter != -1 && traceEntry->ThreadID() != fThreadFilter)
692 				return true;
693 		} else {
694 			// we need the info if we have filters set
695 			if (fTeamFilter != -1 || fThreadFilter != -1)
696 				return true;
697 		}
698 
699 		kprintf("page number %#" B_PRIxPHYSADDR, pageNumber);
700 
701 		if (traceEntry != NULL) {
702 			kprintf(", team: %" B_PRId32 ", thread %" B_PRId32
703 				", time %" B_PRId64 "\n", traceEntry->TeamID(),
704 				traceEntry->ThreadID(), traceEntry->Time());
705 
706 			if (fPrintStackTrace)
707 				tracing_print_stack_trace(traceEntry->StackTrace());
708 		} else
709 			kprintf("\n");
710 
711 		return true;
712 	}
713 
714 private:
715 	bool		fPrintStackTrace;
716 	page_num_t	fPageFilter;
717 	team_id		fTeamFilter;
718 	thread_id	fThreadFilter;
719 };
720 
721 
722 class AllocationDetailPrinterCallback : public AllocationTrackingCallback {
723 public:
724 	AllocationDetailPrinterCallback(addr_t caller)
725 		:
726 		fCaller(caller)
727 	{
728 	}
729 
730 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
731 		page_num_t pageNumber)
732 	{
733 		if (!info->IsInitialized())
734 			return true;
735 
736 		addr_t caller = 0;
737 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
738 		if (traceEntry != NULL && !info->IsTraceEntryValid())
739 			traceEntry = NULL;
740 
741 		if (traceEntry != NULL) {
742 			caller = tracing_find_caller_in_stack_trace(
743 				traceEntry->StackTrace(), kVMPageCodeAddressRange, 1);
744 		}
745 
746 		if (caller != fCaller)
747 			return true;
748 
749 		kprintf("page %#" B_PRIxPHYSADDR "\n", pageNumber);
750 		if (traceEntry != NULL)
751 			tracing_print_stack_trace(traceEntry->StackTrace());
752 
753 		return true;
754 	}
755 
756 private:
757 	addr_t	fCaller;
758 };
759 
760 #endif	// VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
761 
762 
763 static void
764 list_page(vm_page* page)
765 {
766 	kprintf("0x%08" B_PRIxADDR " ",
767 		(addr_t)(page->physical_page_number * B_PAGE_SIZE));
768 	switch (page->State()) {
769 		case PAGE_STATE_ACTIVE:   kprintf("A"); break;
770 		case PAGE_STATE_INACTIVE: kprintf("I"); break;
771 		case PAGE_STATE_MODIFIED: kprintf("M"); break;
772 		case PAGE_STATE_CACHED:   kprintf("C"); break;
773 		case PAGE_STATE_FREE:     kprintf("F"); break;
774 		case PAGE_STATE_CLEAR:    kprintf("L"); break;
775 		case PAGE_STATE_WIRED:    kprintf("W"); break;
776 		case PAGE_STATE_UNUSED:   kprintf("-"); break;
777 	}
778 	kprintf(" ");
779 	if (page->busy)         kprintf("B"); else kprintf("-");
780 	if (page->busy_writing) kprintf("W"); else kprintf("-");
781 	if (page->accessed)     kprintf("A"); else kprintf("-");
782 	if (page->modified)     kprintf("M"); else kprintf("-");
783 	kprintf("-");
784 
785 	kprintf(" usage:%3u", page->usage_count);
786 	kprintf(" wired:%5u", page->WiredCount());
787 
788 	bool first = true;
789 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
790 	vm_page_mapping* mapping;
791 	while ((mapping = iterator.Next()) != NULL) {
792 		if (first) {
793 			kprintf(": ");
794 			first = false;
795 		} else
796 			kprintf(", ");
797 
798 		kprintf("%" B_PRId32 " (%s)", mapping->area->id, mapping->area->name);
799 		mapping = mapping->page_link.next;
800 	}
801 }
802 
803 
804 static int
805 dump_page_list(int argc, char **argv)
806 {
807 	kprintf("page table:\n");
808 	for (page_num_t i = 0; i < sNumPages; i++) {
809 		if (sPages[i].State() != PAGE_STATE_UNUSED) {
810 			list_page(&sPages[i]);
811 			kprintf("\n");
812 		}
813 	}
814 	kprintf("end of page table\n");
815 
816 	return 0;
817 }
818 
819 
820 static int
821 find_page(int argc, char **argv)
822 {
823 	struct vm_page *page;
824 	addr_t address;
825 	int32 index = 1;
826 	int i;
827 
828 	struct {
829 		const char*	name;
830 		VMPageQueue*	queue;
831 	} pageQueueInfos[] = {
832 		{ "free",		&sFreePageQueue },
833 		{ "clear",		&sClearPageQueue },
834 		{ "modified",	&sModifiedPageQueue },
835 		{ "active",		&sActivePageQueue },
836 		{ "inactive",	&sInactivePageQueue },
837 		{ "cached",		&sCachedPageQueue },
838 		{ NULL, NULL }
839 	};
840 
841 	if (argc < 2
842 		|| strlen(argv[index]) <= 2
843 		|| argv[index][0] != '0'
844 		|| argv[index][1] != 'x') {
845 		kprintf("usage: find_page <address>\n");
846 		return 0;
847 	}
848 
849 	address = strtoul(argv[index], NULL, 0);
850 	page = (vm_page*)address;
851 
852 	for (i = 0; pageQueueInfos[i].name; i++) {
853 		VMPageQueue::Iterator it = pageQueueInfos[i].queue->GetIterator();
854 		while (vm_page* p = it.Next()) {
855 			if (p == page) {
856 				kprintf("found page %p in queue %p (%s)\n", page,
857 					pageQueueInfos[i].queue, pageQueueInfos[i].name);
858 				return 0;
859 			}
860 		}
861 	}
862 
863 	kprintf("page %p isn't in any queue\n", page);
864 
865 	return 0;
866 }
867 
868 
869 const char *
870 page_state_to_string(int state)
871 {
872 	switch(state) {
873 		case PAGE_STATE_ACTIVE:
874 			return "active";
875 		case PAGE_STATE_INACTIVE:
876 			return "inactive";
877 		case PAGE_STATE_MODIFIED:
878 			return "modified";
879 		case PAGE_STATE_CACHED:
880 			return "cached";
881 		case PAGE_STATE_FREE:
882 			return "free";
883 		case PAGE_STATE_CLEAR:
884 			return "clear";
885 		case PAGE_STATE_WIRED:
886 			return "wired";
887 		case PAGE_STATE_UNUSED:
888 			return "unused";
889 		default:
890 			return "unknown";
891 	}
892 }
893 
894 
895 static int
896 dump_page_long(int argc, char **argv)
897 {
898 	bool addressIsPointer = true;
899 	bool physical = false;
900 	bool searchMappings = false;
901 	int32 index = 1;
902 
903 	while (index < argc) {
904 		if (argv[index][0] != '-')
905 			break;
906 
907 		if (!strcmp(argv[index], "-p")) {
908 			addressIsPointer = false;
909 			physical = true;
910 		} else if (!strcmp(argv[index], "-v")) {
911 			addressIsPointer = false;
912 		} else if (!strcmp(argv[index], "-m")) {
913 			searchMappings = true;
914 		} else {
915 			print_debugger_command_usage(argv[0]);
916 			return 0;
917 		}
918 
919 		index++;
920 	}
921 
922 	if (index + 1 != argc) {
923 		print_debugger_command_usage(argv[0]);
924 		return 0;
925 	}
926 
927 	uint64 value;
928 	if (!evaluate_debug_expression(argv[index], &value, false))
929 		return 0;
930 
931 	uint64 pageAddress = value;
932 	struct vm_page* page;
933 
934 	if (addressIsPointer) {
935 		page = (struct vm_page *)(addr_t)pageAddress;
936 	} else {
937 		if (!physical) {
938 			VMAddressSpace *addressSpace = VMAddressSpace::Kernel();
939 
940 			if (debug_get_debugged_thread()->team->address_space != NULL)
941 				addressSpace = debug_get_debugged_thread()->team->address_space;
942 
943 			uint32 flags = 0;
944 			phys_addr_t physicalAddress;
945 			if (addressSpace->TranslationMap()->QueryInterrupt(pageAddress,
946 					&physicalAddress, &flags) != B_OK
947 				|| (flags & PAGE_PRESENT) == 0) {
948 				kprintf("Virtual address not mapped to a physical page in this "
949 					"address space.\n");
950 				return 0;
951 			}
952 			pageAddress = physicalAddress;
953 		}
954 
955 		page = vm_lookup_page(pageAddress / B_PAGE_SIZE);
956 	}
957 
958 	if (page == NULL) {
959 		kprintf("Page not found.\n");
960 		return 0;
961 	}
962 
963 	kprintf("PAGE: %p\n", page);
964 
965 	const off_t pageOffset = (addr_t)page - (addr_t)sPages;
966 	const off_t pageIndex = pageOffset / (off_t)sizeof(vm_page);
967 	if (pageIndex < 0) {
968 		kprintf("\taddress is before start of page array!"
969 			" (offset %" B_PRIdOFF ")\n", pageOffset);
970 	} else if ((page_num_t)pageIndex >= sNumPages) {
971 		kprintf("\taddress is after end of page array!"
972 			" (offset %" B_PRIdOFF ")\n", pageOffset);
973 	} else if ((pageIndex * (off_t)sizeof(vm_page)) != pageOffset) {
974 		kprintf("\taddress isn't a multiple of page structure size!"
975 			" (offset %" B_PRIdOFF ", expected align %" B_PRIuSIZE ")\n",
976 			pageOffset, sizeof(vm_page));
977 	}
978 
979 	kprintf("queue_next,prev: %p, %p\n", page->queue_link.next,
980 		page->queue_link.previous);
981 	kprintf("physical_number: %#" B_PRIxPHYSADDR "\n", page->physical_page_number);
982 	kprintf("cache:           %p\n", page->Cache());
983 	kprintf("cache_offset:    %" B_PRIuPHYSADDR "\n", page->cache_offset);
984 	kprintf("cache_next:      %p\n", page->cache_next);
985 	kprintf("state:           %s\n", page_state_to_string(page->State()));
986 	kprintf("wired_count:     %d\n", page->WiredCount());
987 	kprintf("usage_count:     %d\n", page->usage_count);
988 	kprintf("busy:            %d\n", page->busy);
989 	kprintf("busy_writing:    %d\n", page->busy_writing);
990 	kprintf("accessed:        %d\n", page->accessed);
991 	kprintf("modified:        %d\n", page->modified);
992 #if DEBUG_PAGE_QUEUE
993 	kprintf("queue:           %p\n", page->queue);
994 #endif
995 #if DEBUG_PAGE_ACCESS
996 	kprintf("accessor:        %" B_PRId32 "\n", page->accessing_thread);
997 #endif
998 
999 	if (pageIndex < 0 || (page_num_t)pageIndex >= sNumPages) {
1000 		// Don't try to read the mappings.
1001 		return 0;
1002 	}
1003 
1004 	kprintf("area mappings:\n");
1005 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
1006 	vm_page_mapping *mapping;
1007 	while ((mapping = iterator.Next()) != NULL) {
1008 		kprintf("  %p (%" B_PRId32 ")\n", mapping->area, mapping->area->id);
1009 		mapping = mapping->page_link.next;
1010 	}
1011 
1012 	if (searchMappings) {
1013 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
1014 			VMAddressSpace*	fAddressSpace;
1015 
1016 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
1017 			{
1018 				phys_addr_t physicalAddress;
1019 				uint32 flags = 0;
1020 				if (fAddressSpace->TranslationMap()->QueryInterrupt(virtualAddress,
1021 						&physicalAddress, &flags) != B_OK) {
1022 					kprintf(" aspace %" B_PRId32 ": %#"	B_PRIxADDR " (querying failed)\n",
1023 						fAddressSpace->ID(), virtualAddress);
1024 					return false;
1025 				}
1026 				VMArea* area = fAddressSpace->LookupArea(virtualAddress);
1027 				kprintf("  aspace %" B_PRId32 ", area %" B_PRId32 ": %#"
1028 					B_PRIxADDR " (%c%c%s%s)\n", fAddressSpace->ID(),
1029 					area != NULL ? area->id : -1, virtualAddress,
1030 					(flags & B_KERNEL_READ_AREA) != 0 ? 'r' : '-',
1031 					(flags & B_KERNEL_WRITE_AREA) != 0 ? 'w' : '-',
1032 					(flags & PAGE_MODIFIED) != 0 ? " modified" : "",
1033 					(flags & PAGE_ACCESSED) != 0 ? " accessed" : "");
1034 				return false;
1035 			}
1036 		} callback;
1037 
1038 		kprintf("all mappings:\n");
1039 		VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
1040 		while (addressSpace != NULL) {
1041 			callback.fAddressSpace = addressSpace;
1042 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
1043 				page->physical_page_number * B_PAGE_SIZE, callback);
1044 			addressSpace = VMAddressSpace::DebugNext(addressSpace);
1045 		}
1046 	}
1047 
1048 	set_debug_variable("_cache", (addr_t)page->Cache());
1049 #if DEBUG_PAGE_ACCESS
1050 	set_debug_variable("_accessor", page->accessing_thread);
1051 #endif
1052 
1053 	return 0;
1054 }
1055 
1056 
1057 static int
1058 dump_page_queue(int argc, char **argv)
1059 {
1060 	struct VMPageQueue *queue;
1061 
1062 	if (argc < 2) {
1063 		kprintf("usage: page_queue <address/name> [list]\n");
1064 		return 0;
1065 	}
1066 
1067 	if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x')
1068 		queue = (VMPageQueue*)strtoul(argv[1], NULL, 16);
1069 	else if (!strcmp(argv[1], "free"))
1070 		queue = &sFreePageQueue;
1071 	else if (!strcmp(argv[1], "clear"))
1072 		queue = &sClearPageQueue;
1073 	else if (!strcmp(argv[1], "modified"))
1074 		queue = &sModifiedPageQueue;
1075 	else if (!strcmp(argv[1], "active"))
1076 		queue = &sActivePageQueue;
1077 	else if (!strcmp(argv[1], "inactive"))
1078 		queue = &sInactivePageQueue;
1079 	else if (!strcmp(argv[1], "cached"))
1080 		queue = &sCachedPageQueue;
1081 	else {
1082 		kprintf("page_queue: unknown queue \"%s\".\n", argv[1]);
1083 		return 0;
1084 	}
1085 
1086 	kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %"
1087 		B_PRIuPHYSADDR "\n", queue, queue->Head(), queue->Tail(),
1088 		queue->Count());
1089 
1090 	if (argc == 3) {
1091 		struct vm_page *page = queue->Head();
1092 
1093 		kprintf("page        cache       type       state  wired  usage\n");
1094 		for (page_num_t i = 0; page; i++, page = queue->Next(page)) {
1095 			kprintf("%p  %p  %-7s %8s  %5d  %5d\n", page, page->Cache(),
1096 				vm_cache_type_to_string(page->Cache()->type),
1097 				page_state_to_string(page->State()),
1098 				page->WiredCount(), page->usage_count);
1099 		}
1100 	}
1101 	return 0;
1102 }
1103 
1104 
1105 static int
1106 dump_page_stats(int argc, char **argv)
1107 {
1108 	page_num_t swappableModified = 0;
1109 	page_num_t swappableModifiedInactive = 0;
1110 
1111 	size_t counter[8];
1112 	size_t busyCounter[8];
1113 	memset(counter, 0, sizeof(counter));
1114 	memset(busyCounter, 0, sizeof(busyCounter));
1115 
1116 	struct page_run {
1117 		page_num_t	start;
1118 		page_num_t	end;
1119 
1120 		page_num_t Length() const	{ return end - start; }
1121 	};
1122 
1123 	page_run currentFreeRun = { 0, 0 };
1124 	page_run currentCachedRun = { 0, 0 };
1125 	page_run longestFreeRun = { 0, 0 };
1126 	page_run longestCachedRun = { 0, 0 };
1127 
1128 	for (page_num_t i = 0; i < sNumPages; i++) {
1129 		if (sPages[i].State() > 7) {
1130 			panic("page %" B_PRIuPHYSADDR " at %p has invalid state!\n", i,
1131 				&sPages[i]);
1132 		}
1133 
1134 		uint32 pageState = sPages[i].State();
1135 
1136 		counter[pageState]++;
1137 		if (sPages[i].busy)
1138 			busyCounter[pageState]++;
1139 
1140 		if (pageState == PAGE_STATE_MODIFIED
1141 			&& sPages[i].Cache() != NULL
1142 			&& sPages[i].Cache()->temporary && sPages[i].WiredCount() == 0) {
1143 			swappableModified++;
1144 			if (sPages[i].usage_count == 0)
1145 				swappableModifiedInactive++;
1146 		}
1147 
1148 		// track free and cached pages runs
1149 		if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) {
1150 			currentFreeRun.end = i + 1;
1151 			currentCachedRun.end = i + 1;
1152 		} else {
1153 			if (currentFreeRun.Length() > longestFreeRun.Length())
1154 				longestFreeRun = currentFreeRun;
1155 			currentFreeRun.start = currentFreeRun.end = i + 1;
1156 
1157 			if (pageState == PAGE_STATE_CACHED) {
1158 				currentCachedRun.end = i + 1;
1159 			} else {
1160 				if (currentCachedRun.Length() > longestCachedRun.Length())
1161 					longestCachedRun = currentCachedRun;
1162 				currentCachedRun.start = currentCachedRun.end = i + 1;
1163 			}
1164 		}
1165 	}
1166 
1167 	kprintf("page stats:\n");
1168 	kprintf("total: %" B_PRIuPHYSADDR "\n", sNumPages);
1169 
1170 	kprintf("active: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1171 		counter[PAGE_STATE_ACTIVE], busyCounter[PAGE_STATE_ACTIVE]);
1172 	kprintf("inactive: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1173 		counter[PAGE_STATE_INACTIVE], busyCounter[PAGE_STATE_INACTIVE]);
1174 	kprintf("cached: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1175 		counter[PAGE_STATE_CACHED], busyCounter[PAGE_STATE_CACHED]);
1176 	kprintf("unused: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1177 		counter[PAGE_STATE_UNUSED], busyCounter[PAGE_STATE_UNUSED]);
1178 	kprintf("wired: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1179 		counter[PAGE_STATE_WIRED], busyCounter[PAGE_STATE_WIRED]);
1180 	kprintf("modified: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1181 		counter[PAGE_STATE_MODIFIED], busyCounter[PAGE_STATE_MODIFIED]);
1182 	kprintf("free: %" B_PRIuSIZE "\n", counter[PAGE_STATE_FREE]);
1183 	kprintf("clear: %" B_PRIuSIZE "\n", counter[PAGE_STATE_CLEAR]);
1184 
1185 	kprintf("unreserved free pages: %" B_PRId32 "\n", sUnreservedFreePages);
1186 	kprintf("unsatisfied page reservations: %" B_PRId32 "\n",
1187 		sUnsatisfiedPageReservations);
1188 	kprintf("mapped pages: %" B_PRId32 "\n", gMappedPagesCount);
1189 	kprintf("longest free pages run: %" B_PRIuPHYSADDR " pages (at %"
1190 		B_PRIuPHYSADDR ")\n", longestFreeRun.Length(),
1191 		sPages[longestFreeRun.start].physical_page_number);
1192 	kprintf("longest free/cached pages run: %" B_PRIuPHYSADDR " pages (at %"
1193 		B_PRIuPHYSADDR ")\n", longestCachedRun.Length(),
1194 		sPages[longestCachedRun.start].physical_page_number);
1195 
1196 	kprintf("waiting threads:\n");
1197 	for (PageReservationWaiterList::Iterator it
1198 			= sPageReservationWaiters.GetIterator();
1199 		PageReservationWaiter* waiter = it.Next();) {
1200 		kprintf("  %6" B_PRId32 ": missing: %6" B_PRIu32
1201 			", don't touch: %6" B_PRIu32 "\n", waiter->thread->id,
1202 			waiter->missing, waiter->dontTouch);
1203 	}
1204 
1205 	kprintf("\nfree queue: %p, count = %" B_PRIuPHYSADDR "\n", &sFreePageQueue,
1206 		sFreePageQueue.Count());
1207 	kprintf("clear queue: %p, count = %" B_PRIuPHYSADDR "\n", &sClearPageQueue,
1208 		sClearPageQueue.Count());
1209 	kprintf("modified queue: %p, count = %" B_PRIuPHYSADDR " (%" B_PRId32
1210 		" temporary, %" B_PRIuPHYSADDR " swappable, " "inactive: %"
1211 		B_PRIuPHYSADDR ")\n", &sModifiedPageQueue, sModifiedPageQueue.Count(),
1212 		sModifiedTemporaryPages, swappableModified, swappableModifiedInactive);
1213 	kprintf("active queue: %p, count = %" B_PRIuPHYSADDR "\n",
1214 		&sActivePageQueue, sActivePageQueue.Count());
1215 	kprintf("inactive queue: %p, count = %" B_PRIuPHYSADDR "\n",
1216 		&sInactivePageQueue, sInactivePageQueue.Count());
1217 	kprintf("cached queue: %p, count = %" B_PRIuPHYSADDR "\n",
1218 		&sCachedPageQueue, sCachedPageQueue.Count());
1219 	return 0;
1220 }
1221 
1222 
1223 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1224 
1225 static caller_info*
1226 get_caller_info(addr_t caller)
1227 {
1228 	// find the caller info
1229 	for (int32 i = 0; i < sCallerInfoCount; i++) {
1230 		if (caller == sCallerInfoTable[i].caller)
1231 			return &sCallerInfoTable[i];
1232 	}
1233 
1234 	// not found, add a new entry, if there are free slots
1235 	if (sCallerInfoCount >= kCallerInfoTableSize)
1236 		return NULL;
1237 
1238 	caller_info* info = &sCallerInfoTable[sCallerInfoCount++];
1239 	info->caller = caller;
1240 	info->count = 0;
1241 
1242 	return info;
1243 }
1244 
1245 
1246 static int
1247 caller_info_compare_count(const void* _a, const void* _b)
1248 {
1249 	const caller_info* a = (const caller_info*)_a;
1250 	const caller_info* b = (const caller_info*)_b;
1251 	return (int)(b->count - a->count);
1252 }
1253 
1254 
1255 static int
1256 dump_page_allocations_per_caller(int argc, char** argv)
1257 {
1258 	bool resetAllocationInfos = false;
1259 	bool printDetails = false;
1260 	addr_t caller = 0;
1261 
1262 	for (int32 i = 1; i < argc; i++) {
1263 		if (strcmp(argv[i], "-d") == 0) {
1264 			uint64 callerAddress;
1265 			if (++i >= argc
1266 				|| !evaluate_debug_expression(argv[i], &callerAddress, true)) {
1267 				print_debugger_command_usage(argv[0]);
1268 				return 0;
1269 			}
1270 
1271 			caller = callerAddress;
1272 			printDetails = true;
1273 		} else if (strcmp(argv[i], "-r") == 0) {
1274 			resetAllocationInfos = true;
1275 		} else {
1276 			print_debugger_command_usage(argv[0]);
1277 			return 0;
1278 		}
1279 	}
1280 
1281 	sCallerInfoCount = 0;
1282 
1283 	AllocationCollectorCallback collectorCallback(resetAllocationInfos);
1284 	AllocationDetailPrinterCallback detailsCallback(caller);
1285 	AllocationTrackingCallback& callback = printDetails
1286 		? (AllocationTrackingCallback&)detailsCallback
1287 		: (AllocationTrackingCallback&)collectorCallback;
1288 
1289 	for (page_num_t i = 0; i < sNumPages; i++)
1290 		callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i);
1291 
1292 	if (printDetails)
1293 		return 0;
1294 
1295 	// sort the array
1296 	qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info),
1297 		&caller_info_compare_count);
1298 
1299 	kprintf("%" B_PRId32 " different callers\n\n", sCallerInfoCount);
1300 
1301 	size_t totalAllocationCount = 0;
1302 
1303 	kprintf("     count      caller\n");
1304 	kprintf("----------------------------------\n");
1305 	for (int32 i = 0; i < sCallerInfoCount; i++) {
1306 		caller_info& info = sCallerInfoTable[i];
1307 		kprintf("%10" B_PRIuSIZE "  %p", info.count, (void*)info.caller);
1308 
1309 		const char* symbol;
1310 		const char* imageName;
1311 		bool exactMatch;
1312 		addr_t baseAddress;
1313 
1314 		if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol,
1315 				&imageName, &exactMatch) == B_OK) {
1316 			kprintf("  %s + %#" B_PRIxADDR " (%s)%s\n", symbol,
1317 				info.caller - baseAddress, imageName,
1318 				exactMatch ? "" : " (nearest)");
1319 		} else
1320 			kprintf("\n");
1321 
1322 		totalAllocationCount += info.count;
1323 	}
1324 
1325 	kprintf("\ntotal page allocations: %" B_PRIuSIZE "\n",
1326 		totalAllocationCount);
1327 
1328 	return 0;
1329 }
1330 
1331 
1332 static int
1333 dump_page_allocation_infos(int argc, char** argv)
1334 {
1335 	page_num_t pageFilter = 0;
1336 	team_id teamFilter = -1;
1337 	thread_id threadFilter = -1;
1338 	bool printStackTraces = false;
1339 
1340 	for (int32 i = 1; i < argc; i++) {
1341 		if (strcmp(argv[i], "--stacktrace") == 0)
1342 			printStackTraces = true;
1343 		else if (strcmp(argv[i], "-p") == 0) {
1344 			uint64 pageNumber;
1345 			if (++i >= argc
1346 				|| !evaluate_debug_expression(argv[i], &pageNumber, true)) {
1347 				print_debugger_command_usage(argv[0]);
1348 				return 0;
1349 			}
1350 
1351 			pageFilter = pageNumber;
1352 		} else if (strcmp(argv[i], "--team") == 0) {
1353 			uint64 team;
1354 			if (++i >= argc
1355 				|| !evaluate_debug_expression(argv[i], &team, true)) {
1356 				print_debugger_command_usage(argv[0]);
1357 				return 0;
1358 			}
1359 
1360 			teamFilter = team;
1361 		} else if (strcmp(argv[i], "--thread") == 0) {
1362 			uint64 thread;
1363 			if (++i >= argc
1364 				|| !evaluate_debug_expression(argv[i], &thread, true)) {
1365 				print_debugger_command_usage(argv[0]);
1366 				return 0;
1367 			}
1368 
1369 			threadFilter = thread;
1370 		} else {
1371 			print_debugger_command_usage(argv[0]);
1372 			return 0;
1373 		}
1374 	}
1375 
1376 	AllocationInfoPrinterCallback callback(printStackTraces, pageFilter,
1377 		teamFilter, threadFilter);
1378 
1379 	for (page_num_t i = 0; i < sNumPages; i++)
1380 		callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i);
1381 
1382 	return 0;
1383 }
1384 
1385 #endif	// VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1386 
1387 
1388 #ifdef TRACK_PAGE_USAGE_STATS
1389 
1390 static void
1391 track_page_usage(vm_page* page)
1392 {
1393 	if (page->WiredCount() == 0) {
1394 		sNextPageUsage[(int32)page->usage_count + 128]++;
1395 		sNextPageUsagePageCount++;
1396 	}
1397 }
1398 
1399 
1400 static void
1401 update_page_usage_stats()
1402 {
1403 	std::swap(sPageUsage, sNextPageUsage);
1404 	sPageUsagePageCount = sNextPageUsagePageCount;
1405 
1406 	memset(sNextPageUsage, 0, sizeof(page_num_t) * 256);
1407 	sNextPageUsagePageCount = 0;
1408 
1409 	// compute average
1410 	if (sPageUsagePageCount > 0) {
1411 		int64 sum = 0;
1412 		for (int32 i = 0; i < 256; i++)
1413 			sum += (int64)sPageUsage[i] * (i - 128);
1414 
1415 		TRACE_DAEMON("average page usage: %f (%lu pages)\n",
1416 			(float)sum / sPageUsagePageCount, sPageUsagePageCount);
1417 	}
1418 }
1419 
1420 
1421 static int
1422 dump_page_usage_stats(int argc, char** argv)
1423 {
1424 	kprintf("distribution of page usage counts (%lu pages):",
1425 		sPageUsagePageCount);
1426 
1427 	int64 sum = 0;
1428 	for (int32 i = 0; i < 256; i++) {
1429 		if (i % 8 == 0)
1430 			kprintf("\n%4ld:", i - 128);
1431 
1432 		int64 count = sPageUsage[i];
1433 		sum += count * (i - 128);
1434 
1435 		kprintf("  %9llu", count);
1436 	}
1437 
1438 	kprintf("\n\n");
1439 
1440 	kprintf("average usage count: %f\n",
1441 		sPageUsagePageCount > 0 ? (float)sum / sPageUsagePageCount : 0);
1442 
1443 	return 0;
1444 }
1445 
1446 #endif	// TRACK_PAGE_USAGE_STATS
1447 
1448 
1449 // #pragma mark - vm_page
1450 
1451 
1452 inline void
1453 vm_page::InitState(uint8 newState)
1454 {
1455 	state = newState;
1456 }
1457 
1458 
1459 inline void
1460 vm_page::SetState(uint8 newState)
1461 {
1462 	TPS(SetPageState(this, newState));
1463 
1464 	state = newState;
1465 }
1466 
1467 
1468 // #pragma mark -
1469 
1470 
1471 static void
1472 get_page_stats(page_stats& _pageStats)
1473 {
1474 	_pageStats.totalFreePages = sUnreservedFreePages;
1475 	_pageStats.cachedPages = sCachedPageQueue.Count();
1476 	_pageStats.unsatisfiedReservations = sUnsatisfiedPageReservations;
1477 	// TODO: We don't get an actual snapshot here!
1478 }
1479 
1480 
1481 static bool
1482 do_active_paging(const page_stats& pageStats)
1483 {
1484 	return pageStats.totalFreePages + pageStats.cachedPages
1485 		< pageStats.unsatisfiedReservations
1486 			+ (int32)sFreeOrCachedPagesTarget;
1487 }
1488 
1489 
1490 /*!	Reserves as many pages as possible from \c sUnreservedFreePages up to
1491 	\a count. Doesn't touch the last \a dontTouch pages of
1492 	\c sUnreservedFreePages, though.
1493 	\return The number of actually reserved pages.
1494 */
1495 static uint32
1496 reserve_some_pages(uint32 count, uint32 dontTouch)
1497 {
1498 	while (true) {
1499 		int32 freePages = atomic_get(&sUnreservedFreePages);
1500 		if (freePages <= (int32)dontTouch)
1501 			return 0;
1502 
1503 		int32 toReserve = std::min(count, freePages - dontTouch);
1504 		if (atomic_test_and_set(&sUnreservedFreePages,
1505 					freePages - toReserve, freePages)
1506 				== freePages) {
1507 			return toReserve;
1508 		}
1509 
1510 		// the count changed in the meantime -- retry
1511 	}
1512 }
1513 
1514 
1515 static void
1516 wake_up_page_reservation_waiters()
1517 {
1518 	MutexLocker pageDeficitLocker(sPageDeficitLock);
1519 
1520 	// TODO: If this is a low priority thread, we might want to disable
1521 	// interrupts or otherwise ensure that we aren't unscheduled. Otherwise
1522 	// high priority threads wait be kept waiting while a medium priority thread
1523 	// prevents us from running.
1524 
1525 	while (PageReservationWaiter* waiter = sPageReservationWaiters.Head()) {
1526 		int32 reserved = reserve_some_pages(waiter->missing,
1527 			waiter->dontTouch);
1528 		if (reserved == 0)
1529 			return;
1530 
1531 		atomic_add(&sUnsatisfiedPageReservations, -reserved);
1532 		waiter->missing -= reserved;
1533 
1534 		if (waiter->missing > 0)
1535 			return;
1536 
1537 		sPageReservationWaiters.Remove(waiter);
1538 
1539 		thread_unblock(waiter->thread, B_OK);
1540 	}
1541 }
1542 
1543 
1544 static inline void
1545 unreserve_pages(uint32 count)
1546 {
1547 	atomic_add(&sUnreservedFreePages, count);
1548 	if (atomic_get(&sUnsatisfiedPageReservations) != 0)
1549 		wake_up_page_reservation_waiters();
1550 }
1551 
1552 
1553 static void
1554 free_page(vm_page* page, bool clear)
1555 {
1556 	DEBUG_PAGE_ACCESS_CHECK(page);
1557 
1558 	PAGE_ASSERT(page, !page->IsMapped());
1559 
1560 	VMPageQueue* fromQueue;
1561 
1562 	switch (page->State()) {
1563 		case PAGE_STATE_ACTIVE:
1564 			fromQueue = &sActivePageQueue;
1565 			break;
1566 		case PAGE_STATE_INACTIVE:
1567 			fromQueue = &sInactivePageQueue;
1568 			break;
1569 		case PAGE_STATE_MODIFIED:
1570 			fromQueue = &sModifiedPageQueue;
1571 			break;
1572 		case PAGE_STATE_CACHED:
1573 			fromQueue = &sCachedPageQueue;
1574 			break;
1575 		case PAGE_STATE_FREE:
1576 		case PAGE_STATE_CLEAR:
1577 			panic("free_page(): page %p already free", page);
1578 			return;
1579 		case PAGE_STATE_WIRED:
1580 		case PAGE_STATE_UNUSED:
1581 			fromQueue = NULL;
1582 			break;
1583 		default:
1584 			panic("free_page(): page %p in invalid state %d",
1585 				page, page->State());
1586 			return;
1587 	}
1588 
1589 	if (page->CacheRef() != NULL)
1590 		panic("to be freed page %p has cache", page);
1591 	if (page->IsMapped())
1592 		panic("to be freed page %p has mappings", page);
1593 
1594 	if (fromQueue != NULL)
1595 		fromQueue->RemoveUnlocked(page);
1596 
1597 	TA(FreePage(page->physical_page_number));
1598 
1599 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1600 	page->allocation_tracking_info.Clear();
1601 #endif
1602 
1603 	ReadLocker locker(sFreePageQueuesLock);
1604 
1605 	DEBUG_PAGE_ACCESS_END(page);
1606 
1607 	if (clear) {
1608 		page->SetState(PAGE_STATE_CLEAR);
1609 		sClearPageQueue.PrependUnlocked(page);
1610 	} else {
1611 		page->SetState(PAGE_STATE_FREE);
1612 		sFreePageQueue.PrependUnlocked(page);
1613 		sFreePageCondition.NotifyAll();
1614 	}
1615 
1616 	locker.Unlock();
1617 }
1618 
1619 
1620 /*!	The caller must make sure that no-one else tries to change the page's state
1621 	while the function is called. If the page has a cache, this can be done by
1622 	locking the cache.
1623 */
1624 static void
1625 set_page_state(vm_page *page, int pageState)
1626 {
1627 	DEBUG_PAGE_ACCESS_CHECK(page);
1628 
1629 	if (pageState == page->State())
1630 		return;
1631 
1632 	VMPageQueue* fromQueue;
1633 
1634 	switch (page->State()) {
1635 		case PAGE_STATE_ACTIVE:
1636 			fromQueue = &sActivePageQueue;
1637 			break;
1638 		case PAGE_STATE_INACTIVE:
1639 			fromQueue = &sInactivePageQueue;
1640 			break;
1641 		case PAGE_STATE_MODIFIED:
1642 			fromQueue = &sModifiedPageQueue;
1643 			break;
1644 		case PAGE_STATE_CACHED:
1645 			fromQueue = &sCachedPageQueue;
1646 			break;
1647 		case PAGE_STATE_FREE:
1648 		case PAGE_STATE_CLEAR:
1649 			panic("set_page_state(): page %p is free/clear", page);
1650 			return;
1651 		case PAGE_STATE_WIRED:
1652 		case PAGE_STATE_UNUSED:
1653 			fromQueue = NULL;
1654 			break;
1655 		default:
1656 			panic("set_page_state(): page %p in invalid state %d",
1657 				page, page->State());
1658 			return;
1659 	}
1660 
1661 	VMPageQueue* toQueue;
1662 
1663 	switch (pageState) {
1664 		case PAGE_STATE_ACTIVE:
1665 			toQueue = &sActivePageQueue;
1666 			break;
1667 		case PAGE_STATE_INACTIVE:
1668 			toQueue = &sInactivePageQueue;
1669 			break;
1670 		case PAGE_STATE_MODIFIED:
1671 			toQueue = &sModifiedPageQueue;
1672 			break;
1673 		case PAGE_STATE_CACHED:
1674 			PAGE_ASSERT(page, !page->IsMapped());
1675 			PAGE_ASSERT(page, !page->modified);
1676 			toQueue = &sCachedPageQueue;
1677 			break;
1678 		case PAGE_STATE_FREE:
1679 		case PAGE_STATE_CLEAR:
1680 			panic("set_page_state(): target state is free/clear");
1681 			return;
1682 		case PAGE_STATE_WIRED:
1683 		case PAGE_STATE_UNUSED:
1684 			toQueue = NULL;
1685 			break;
1686 		default:
1687 			panic("set_page_state(): invalid target state %d", pageState);
1688 			return;
1689 	}
1690 
1691 	VMCache* cache = page->Cache();
1692 	if (cache != NULL && cache->temporary) {
1693 		if (pageState == PAGE_STATE_MODIFIED)
1694 			atomic_add(&sModifiedTemporaryPages, 1);
1695 		else if (page->State() == PAGE_STATE_MODIFIED)
1696 			atomic_add(&sModifiedTemporaryPages, -1);
1697 	}
1698 
1699 	// move the page
1700 	if (toQueue == fromQueue) {
1701 		// Note: Theoretically we are required to lock when changing the page
1702 		// state, even if we don't change the queue. We actually don't have to
1703 		// do this, though, since only for the active queue there are different
1704 		// page states and active pages have a cache that must be locked at
1705 		// this point. So we rely on the fact that everyone must lock the cache
1706 		// before trying to change/interpret the page state.
1707 		PAGE_ASSERT(page, cache != NULL);
1708 		cache->AssertLocked();
1709 		page->SetState(pageState);
1710 	} else {
1711 		if (fromQueue != NULL)
1712 			fromQueue->RemoveUnlocked(page);
1713 
1714 		page->SetState(pageState);
1715 
1716 		if (toQueue != NULL)
1717 			toQueue->AppendUnlocked(page);
1718 	}
1719 }
1720 
1721 
1722 /*! Moves a previously modified page into a now appropriate queue.
1723 	The page queues must not be locked.
1724 */
1725 static void
1726 move_page_to_appropriate_queue(vm_page *page)
1727 {
1728 	DEBUG_PAGE_ACCESS_CHECK(page);
1729 
1730 	// Note, this logic must be in sync with what the page daemon does.
1731 	int32 state;
1732 	if (page->IsMapped())
1733 		state = PAGE_STATE_ACTIVE;
1734 	else if (page->modified)
1735 		state = PAGE_STATE_MODIFIED;
1736 	else
1737 		state = PAGE_STATE_CACHED;
1738 
1739 // TODO: If free + cached pages are low, we might directly want to free the
1740 // page.
1741 	set_page_state(page, state);
1742 }
1743 
1744 
1745 static void
1746 clear_page(struct vm_page *page)
1747 {
1748 	vm_memset_physical(page->physical_page_number << PAGE_SHIFT, 0,
1749 		B_PAGE_SIZE);
1750 }
1751 
1752 
1753 static status_t
1754 mark_page_range_in_use(page_num_t startPage, page_num_t length, bool wired)
1755 {
1756 	TRACE(("mark_page_range_in_use: start %#" B_PRIxPHYSADDR ", len %#"
1757 		B_PRIxPHYSADDR "\n", startPage, length));
1758 
1759 	if (sPhysicalPageOffset > startPage) {
1760 		dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR
1761 			"): start page is before free list\n", startPage, length);
1762 		if (sPhysicalPageOffset - startPage >= length)
1763 			return B_OK;
1764 		length -= sPhysicalPageOffset - startPage;
1765 		startPage = sPhysicalPageOffset;
1766 	}
1767 
1768 	startPage -= sPhysicalPageOffset;
1769 
1770 	if (startPage + length > sNumPages) {
1771 		dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR
1772 			"): range would extend past free list\n", startPage, length);
1773 		if (startPage >= sNumPages)
1774 			return B_OK;
1775 		length = sNumPages - startPage;
1776 	}
1777 
1778 	WriteLocker locker(sFreePageQueuesLock);
1779 
1780 	for (page_num_t i = 0; i < length; i++) {
1781 		vm_page *page = &sPages[startPage + i];
1782 		switch (page->State()) {
1783 			case PAGE_STATE_FREE:
1784 			case PAGE_STATE_CLEAR:
1785 			{
1786 				// This violates the page reservation policy, since we remove pages
1787 				// from the free/clear queues without having reserved them before.
1788 				// This should happen in the early boot process only, though.
1789 				ASSERT(gKernelStartup);
1790 
1791 				DEBUG_PAGE_ACCESS_START(page);
1792 				VMPageQueue& queue = page->State() == PAGE_STATE_FREE
1793 					? sFreePageQueue : sClearPageQueue;
1794 				queue.Remove(page);
1795 				page->SetState(wired ? PAGE_STATE_WIRED : PAGE_STATE_UNUSED);
1796 				page->busy = false;
1797 				atomic_add(&sUnreservedFreePages, -1);
1798 				DEBUG_PAGE_ACCESS_END(page);
1799 				break;
1800 			}
1801 			case PAGE_STATE_WIRED:
1802 			case PAGE_STATE_UNUSED:
1803 				break;
1804 			case PAGE_STATE_ACTIVE:
1805 			case PAGE_STATE_INACTIVE:
1806 			case PAGE_STATE_MODIFIED:
1807 			case PAGE_STATE_CACHED:
1808 			default:
1809 				// uh
1810 				panic("mark_page_range_in_use: page %#" B_PRIxPHYSADDR
1811 					" in non-free state %d!\n", startPage + i, page->State());
1812 				break;
1813 		}
1814 	}
1815 
1816 	return B_OK;
1817 }
1818 
1819 
1820 /*!
1821 	This is a background thread that wakes up when its condition is notified
1822 	and moves some pages from the free queue over to the clear queue.
1823 	Given enough time, it will clear out all pages from the free queue - we
1824 	could probably slow it down after having reached a certain threshold.
1825 */
1826 static int32
1827 page_scrubber(void *unused)
1828 {
1829 	(void)(unused);
1830 
1831 	TRACE(("page_scrubber starting...\n"));
1832 
1833 	ConditionVariableEntry entry;
1834 	for (;;) {
1835 		while (sFreePageQueue.Count() == 0
1836 				|| atomic_get(&sUnreservedFreePages)
1837 					< (int32)sFreePagesTarget) {
1838 			sFreePageCondition.Add(&entry);
1839 			entry.Wait();
1840 		}
1841 
1842 		// Since we temporarily remove pages from the free pages reserve,
1843 		// we must make sure we don't cause a violation of the page
1844 		// reservation warranty. The following is usually stricter than
1845 		// necessary, because we don't have information on how many of the
1846 		// reserved pages have already been allocated.
1847 		int32 reserved = reserve_some_pages(SCRUB_SIZE,
1848 			kPageReserveForPriority[VM_PRIORITY_USER]);
1849 		if (reserved == 0)
1850 			continue;
1851 
1852 		// get some pages from the free queue, mostly sorted
1853 		ReadLocker locker(sFreePageQueuesLock);
1854 
1855 		vm_page *page[SCRUB_SIZE];
1856 		int32 scrubCount = 0;
1857 		for (int32 i = 0; i < reserved; i++) {
1858 			page[i] = sFreePageQueue.RemoveHeadUnlocked();
1859 			if (page[i] == NULL)
1860 				break;
1861 
1862 			DEBUG_PAGE_ACCESS_START(page[i]);
1863 
1864 			page[i]->SetState(PAGE_STATE_ACTIVE);
1865 			page[i]->busy = true;
1866 			scrubCount++;
1867 		}
1868 
1869 		locker.Unlock();
1870 
1871 		if (scrubCount == 0) {
1872 			unreserve_pages(reserved);
1873 			continue;
1874 		}
1875 
1876 		TA(ScrubbingPages(scrubCount));
1877 
1878 		// clear them
1879 		for (int32 i = 0; i < scrubCount; i++)
1880 			clear_page(page[i]);
1881 
1882 		locker.Lock();
1883 
1884 		// and put them into the clear queue
1885 		// process the array reversed when prepending to preserve sequential order
1886 		for (int32 i = scrubCount - 1; i >= 0; i--) {
1887 			page[i]->SetState(PAGE_STATE_CLEAR);
1888 			page[i]->busy = false;
1889 			DEBUG_PAGE_ACCESS_END(page[i]);
1890 			sClearPageQueue.PrependUnlocked(page[i]);
1891 		}
1892 
1893 		locker.Unlock();
1894 
1895 		unreserve_pages(reserved);
1896 
1897 		TA(ScrubbedPages(scrubCount));
1898 
1899 		// wait at least 100ms between runs
1900 		snooze(100 * 1000);
1901 	}
1902 
1903 	return 0;
1904 }
1905 
1906 
1907 static void
1908 init_page_marker(vm_page &marker)
1909 {
1910 	marker.SetCacheRef(NULL);
1911 	marker.InitState(PAGE_STATE_UNUSED);
1912 	marker.busy = true;
1913 #if DEBUG_PAGE_QUEUE
1914 	marker.queue = NULL;
1915 #endif
1916 #if DEBUG_PAGE_ACCESS
1917 	marker.accessing_thread = thread_get_current_thread_id();
1918 #endif
1919 }
1920 
1921 
1922 static void
1923 remove_page_marker(struct vm_page &marker)
1924 {
1925 	DEBUG_PAGE_ACCESS_CHECK(&marker);
1926 
1927 	if (marker.State() < PAGE_STATE_FIRST_UNQUEUED)
1928 		sPageQueues[marker.State()].RemoveUnlocked(&marker);
1929 
1930 	marker.SetState(PAGE_STATE_UNUSED);
1931 }
1932 
1933 
1934 static vm_page*
1935 next_modified_page(page_num_t& maxPagesToSee)
1936 {
1937 	InterruptsSpinLocker locker(sModifiedPageQueue.GetLock());
1938 
1939 	while (maxPagesToSee > 0) {
1940 		vm_page* page = sModifiedPageQueue.Head();
1941 		if (page == NULL)
1942 			return NULL;
1943 
1944 		sModifiedPageQueue.Requeue(page, true);
1945 
1946 		maxPagesToSee--;
1947 
1948 		if (!page->busy)
1949 			return page;
1950 	}
1951 
1952 	return NULL;
1953 }
1954 
1955 
1956 // #pragma mark -
1957 
1958 
1959 class PageWriteTransfer;
1960 class PageWriteWrapper;
1961 
1962 
1963 class PageWriterRun {
1964 public:
1965 	status_t Init(uint32 maxPages);
1966 
1967 	void PrepareNextRun();
1968 	void AddPage(vm_page* page);
1969 	uint32 Go();
1970 
1971 	void PageWritten(PageWriteTransfer* transfer, status_t status,
1972 		bool partialTransfer, size_t bytesTransferred);
1973 
1974 private:
1975 	uint32				fMaxPages;
1976 	uint32				fWrapperCount;
1977 	uint32				fTransferCount;
1978 	int32				fPendingTransfers;
1979 	PageWriteWrapper*	fWrappers;
1980 	PageWriteTransfer*	fTransfers;
1981 	ConditionVariable	fAllFinishedCondition;
1982 };
1983 
1984 
1985 class PageWriteTransfer : public AsyncIOCallback {
1986 public:
1987 	void SetTo(PageWriterRun* run, vm_page* page, int32 maxPages);
1988 	bool AddPage(vm_page* page);
1989 
1990 	status_t Schedule(uint32 flags);
1991 
1992 	void SetStatus(status_t status, size_t transferred);
1993 
1994 	status_t Status() const	{ return fStatus; }
1995 	struct VMCache* Cache() const { return fCache; }
1996 	uint32 PageCount() const { return fPageCount; }
1997 
1998 	virtual void IOFinished(status_t status, bool partialTransfer,
1999 		generic_size_t bytesTransferred);
2000 
2001 private:
2002 	PageWriterRun*		fRun;
2003 	struct VMCache*		fCache;
2004 	off_t				fOffset;
2005 	uint32				fPageCount;
2006 	int32				fMaxPages;
2007 	status_t			fStatus;
2008 	uint32				fVecCount;
2009 	generic_io_vec		fVecs[32]; // TODO: make dynamic/configurable
2010 };
2011 
2012 
2013 class PageWriteWrapper {
2014 public:
2015 	PageWriteWrapper();
2016 	~PageWriteWrapper();
2017 	void SetTo(vm_page* page);
2018 	bool Done(status_t result);
2019 
2020 private:
2021 	vm_page*			fPage;
2022 	struct VMCache*		fCache;
2023 	bool				fIsActive;
2024 };
2025 
2026 
2027 PageWriteWrapper::PageWriteWrapper()
2028 	:
2029 	fIsActive(false)
2030 {
2031 }
2032 
2033 
2034 PageWriteWrapper::~PageWriteWrapper()
2035 {
2036 	if (fIsActive)
2037 		panic("page write wrapper going out of scope but isn't completed");
2038 }
2039 
2040 
2041 /*!	The page's cache must be locked.
2042 */
2043 void
2044 PageWriteWrapper::SetTo(vm_page* page)
2045 {
2046 	DEBUG_PAGE_ACCESS_CHECK(page);
2047 
2048 	if (page->busy)
2049 		panic("setting page write wrapper to busy page");
2050 
2051 	if (fIsActive)
2052 		panic("re-setting page write wrapper that isn't completed");
2053 
2054 	fPage = page;
2055 	fCache = page->Cache();
2056 	fIsActive = true;
2057 
2058 	fPage->busy = true;
2059 	fPage->busy_writing = true;
2060 
2061 	// We have a modified page -- however, while we're writing it back,
2062 	// the page might still be mapped. In order not to lose any changes to the
2063 	// page, we mark it clean before actually writing it back; if
2064 	// writing the page fails for some reason, we'll just keep it in the
2065 	// modified page list, but that should happen only rarely.
2066 
2067 	// If the page is changed after we cleared the dirty flag, but before we
2068 	// had the chance to write it back, then we'll write it again later -- that
2069 	// will probably not happen that often, though.
2070 
2071 	vm_clear_map_flags(fPage, PAGE_MODIFIED);
2072 }
2073 
2074 
2075 /*!	The page's cache must be locked.
2076 	The page queues must not be locked.
2077 	\return \c true if the page was written successfully respectively could be
2078 		handled somehow, \c false otherwise.
2079 */
2080 bool
2081 PageWriteWrapper::Done(status_t result)
2082 {
2083 	if (!fIsActive)
2084 		panic("completing page write wrapper that is not active");
2085 
2086 	DEBUG_PAGE_ACCESS_START(fPage);
2087 
2088 	fPage->busy = false;
2089 		// Set unbusy and notify later by hand, since we might free the page.
2090 
2091 	bool success = true;
2092 
2093 	if (result == B_OK) {
2094 		// put it into the active/inactive queue
2095 		move_page_to_appropriate_queue(fPage);
2096 		fPage->busy_writing = false;
2097 		DEBUG_PAGE_ACCESS_END(fPage);
2098 	} else {
2099 		// Writing the page failed. One reason would be that the cache has been
2100 		// shrunk and the page does no longer belong to the file. Otherwise the
2101 		// actual I/O failed, in which case we'll simply keep the page modified.
2102 
2103 		if (!fPage->busy_writing) {
2104 			// The busy_writing flag was cleared. That means the cache has been
2105 			// shrunk while we were trying to write the page and we have to free
2106 			// it now.
2107 			vm_remove_all_page_mappings(fPage);
2108 // TODO: Unmapping should already happen when resizing the cache!
2109 			fCache->RemovePage(fPage);
2110 			free_page(fPage, false);
2111 			unreserve_pages(1);
2112 		} else {
2113 			// Writing the page failed -- mark the page modified and move it to
2114 			// an appropriate queue other than the modified queue, so we don't
2115 			// keep trying to write it over and over again. We keep
2116 			// non-temporary pages in the modified queue, though, so they don't
2117 			// get lost in the inactive queue.
2118 			dprintf("PageWriteWrapper: Failed to write page %p: %s\n", fPage,
2119 				strerror(result));
2120 
2121 			fPage->modified = true;
2122 			if (!fCache->temporary)
2123 				set_page_state(fPage, PAGE_STATE_MODIFIED);
2124 			else if (fPage->IsMapped())
2125 				set_page_state(fPage, PAGE_STATE_ACTIVE);
2126 			else
2127 				set_page_state(fPage, PAGE_STATE_INACTIVE);
2128 
2129 			fPage->busy_writing = false;
2130 			DEBUG_PAGE_ACCESS_END(fPage);
2131 
2132 			success = false;
2133 		}
2134 	}
2135 
2136 	fCache->NotifyPageEvents(fPage, PAGE_EVENT_NOT_BUSY);
2137 	fIsActive = false;
2138 
2139 	return success;
2140 }
2141 
2142 
2143 /*!	The page's cache must be locked.
2144 */
2145 void
2146 PageWriteTransfer::SetTo(PageWriterRun* run, vm_page* page, int32 maxPages)
2147 {
2148 	fRun = run;
2149 	fCache = page->Cache();
2150 	fOffset = page->cache_offset;
2151 	fPageCount = 1;
2152 	fMaxPages = maxPages;
2153 	fStatus = B_OK;
2154 
2155 	fVecs[0].base = (phys_addr_t)page->physical_page_number << PAGE_SHIFT;
2156 	fVecs[0].length = B_PAGE_SIZE;
2157 	fVecCount = 1;
2158 }
2159 
2160 
2161 /*!	The page's cache must be locked.
2162 */
2163 bool
2164 PageWriteTransfer::AddPage(vm_page* page)
2165 {
2166 	if (page->Cache() != fCache
2167 		|| (fMaxPages >= 0 && fPageCount >= (uint32)fMaxPages))
2168 		return false;
2169 
2170 	phys_addr_t nextBase = fVecs[fVecCount - 1].base
2171 		+ fVecs[fVecCount - 1].length;
2172 
2173 	if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase
2174 		&& (off_t)page->cache_offset == fOffset + fPageCount) {
2175 		// append to last iovec
2176 		fVecs[fVecCount - 1].length += B_PAGE_SIZE;
2177 		fPageCount++;
2178 		return true;
2179 	}
2180 
2181 	nextBase = fVecs[0].base - B_PAGE_SIZE;
2182 	if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase
2183 		&& (off_t)page->cache_offset == fOffset - 1) {
2184 		// prepend to first iovec and adjust offset
2185 		fVecs[0].base = nextBase;
2186 		fVecs[0].length += B_PAGE_SIZE;
2187 		fOffset = page->cache_offset;
2188 		fPageCount++;
2189 		return true;
2190 	}
2191 
2192 	if (((off_t)page->cache_offset == fOffset + fPageCount
2193 			|| (off_t)page->cache_offset == fOffset - 1)
2194 		&& fVecCount < sizeof(fVecs) / sizeof(fVecs[0])) {
2195 		// not physically contiguous or not in the right order
2196 		uint32 vectorIndex;
2197 		if ((off_t)page->cache_offset < fOffset) {
2198 			// we are pre-pending another vector, move the other vecs
2199 			for (uint32 i = fVecCount; i > 0; i--)
2200 				fVecs[i] = fVecs[i - 1];
2201 
2202 			fOffset = page->cache_offset;
2203 			vectorIndex = 0;
2204 		} else
2205 			vectorIndex = fVecCount;
2206 
2207 		fVecs[vectorIndex].base
2208 			= (phys_addr_t)page->physical_page_number << PAGE_SHIFT;
2209 		fVecs[vectorIndex].length = B_PAGE_SIZE;
2210 
2211 		fVecCount++;
2212 		fPageCount++;
2213 		return true;
2214 	}
2215 
2216 	return false;
2217 }
2218 
2219 
2220 status_t
2221 PageWriteTransfer::Schedule(uint32 flags)
2222 {
2223 	off_t writeOffset = (off_t)fOffset << PAGE_SHIFT;
2224 	generic_size_t writeLength = (phys_size_t)fPageCount << PAGE_SHIFT;
2225 
2226 	if (fRun != NULL) {
2227 		return fCache->WriteAsync(writeOffset, fVecs, fVecCount, writeLength,
2228 			flags | B_PHYSICAL_IO_REQUEST, this);
2229 	}
2230 
2231 	status_t status = fCache->Write(writeOffset, fVecs, fVecCount,
2232 		flags | B_PHYSICAL_IO_REQUEST, &writeLength);
2233 
2234 	SetStatus(status, writeLength);
2235 	return fStatus;
2236 }
2237 
2238 
2239 void
2240 PageWriteTransfer::SetStatus(status_t status, size_t transferred)
2241 {
2242 	// only succeed if all pages up to the last one have been written fully
2243 	// and the last page has at least been written partially
2244 	if (status == B_OK && transferred <= (fPageCount - 1) * B_PAGE_SIZE)
2245 		status = B_ERROR;
2246 
2247 	fStatus = status;
2248 }
2249 
2250 
2251 void
2252 PageWriteTransfer::IOFinished(status_t status, bool partialTransfer,
2253 	generic_size_t bytesTransferred)
2254 {
2255 	SetStatus(status, bytesTransferred);
2256 	fRun->PageWritten(this, fStatus, partialTransfer, bytesTransferred);
2257 }
2258 
2259 
2260 status_t
2261 PageWriterRun::Init(uint32 maxPages)
2262 {
2263 	fMaxPages = maxPages;
2264 	fWrapperCount = 0;
2265 	fTransferCount = 0;
2266 	fPendingTransfers = 0;
2267 
2268 	fWrappers = new(std::nothrow) PageWriteWrapper[maxPages];
2269 	fTransfers = new(std::nothrow) PageWriteTransfer[maxPages];
2270 	if (fWrappers == NULL || fTransfers == NULL)
2271 		return B_NO_MEMORY;
2272 
2273 	return B_OK;
2274 }
2275 
2276 
2277 void
2278 PageWriterRun::PrepareNextRun()
2279 {
2280 	fWrapperCount = 0;
2281 	fTransferCount = 0;
2282 	fPendingTransfers = 0;
2283 }
2284 
2285 
2286 /*!	The page's cache must be locked.
2287 */
2288 void
2289 PageWriterRun::AddPage(vm_page* page)
2290 {
2291 	fWrappers[fWrapperCount++].SetTo(page);
2292 
2293 	if (fTransferCount == 0 || !fTransfers[fTransferCount - 1].AddPage(page)) {
2294 		fTransfers[fTransferCount++].SetTo(this, page,
2295 			page->Cache()->MaxPagesPerAsyncWrite());
2296 	}
2297 }
2298 
2299 
2300 /*!	Writes all pages previously added.
2301 	\return The number of pages that could not be written or otherwise handled.
2302 */
2303 uint32
2304 PageWriterRun::Go()
2305 {
2306 	atomic_set(&fPendingTransfers, fTransferCount);
2307 
2308 	fAllFinishedCondition.Init(this, "page writer wait for I/O");
2309 	ConditionVariableEntry waitEntry;
2310 	fAllFinishedCondition.Add(&waitEntry);
2311 
2312 	// schedule writes
2313 	for (uint32 i = 0; i < fTransferCount; i++)
2314 		fTransfers[i].Schedule(B_VIP_IO_REQUEST);
2315 
2316 	// wait until all pages have been written
2317 	waitEntry.Wait();
2318 
2319 	// mark pages depending on whether they could be written or not
2320 
2321 	uint32 failedPages = 0;
2322 	uint32 wrapperIndex = 0;
2323 	for (uint32 i = 0; i < fTransferCount; i++) {
2324 		PageWriteTransfer& transfer = fTransfers[i];
2325 		transfer.Cache()->Lock();
2326 
2327 		for (uint32 j = 0; j < transfer.PageCount(); j++) {
2328 			if (!fWrappers[wrapperIndex++].Done(transfer.Status()))
2329 				failedPages++;
2330 		}
2331 
2332 		transfer.Cache()->Unlock();
2333 	}
2334 
2335 	ASSERT(wrapperIndex == fWrapperCount);
2336 
2337 	for (uint32 i = 0; i < fTransferCount; i++) {
2338 		PageWriteTransfer& transfer = fTransfers[i];
2339 		struct VMCache* cache = transfer.Cache();
2340 
2341 		// We've acquired a references for each page
2342 		for (uint32 j = 0; j < transfer.PageCount(); j++) {
2343 			// We release the cache references after all pages were made
2344 			// unbusy again - otherwise releasing a vnode could deadlock.
2345 			cache->ReleaseStoreRef();
2346 			cache->ReleaseRef();
2347 		}
2348 	}
2349 
2350 	return failedPages;
2351 }
2352 
2353 
2354 void
2355 PageWriterRun::PageWritten(PageWriteTransfer* transfer, status_t status,
2356 	bool partialTransfer, size_t bytesTransferred)
2357 {
2358 	if (atomic_add(&fPendingTransfers, -1) == 1)
2359 		fAllFinishedCondition.NotifyAll();
2360 }
2361 
2362 
2363 /*!	The page writer continuously takes some pages from the modified
2364 	queue, writes them back, and moves them back to the active queue.
2365 	It runs in its own thread, and is only there to keep the number
2366 	of modified pages low, so that more pages can be reused with
2367 	fewer costs.
2368 */
2369 status_t
2370 page_writer(void* /*unused*/)
2371 {
2372 	const uint32 kNumPages = 256;
2373 #ifdef TRACE_VM_PAGE
2374 	uint32 writtenPages = 0;
2375 	bigtime_t lastWrittenTime = 0;
2376 	bigtime_t pageCollectionTime = 0;
2377 	bigtime_t pageWritingTime = 0;
2378 #endif
2379 
2380 	PageWriterRun run;
2381 	if (run.Init(kNumPages) != B_OK) {
2382 		panic("page writer: Failed to init PageWriterRun!");
2383 		return B_ERROR;
2384 	}
2385 
2386 	page_num_t pagesSinceLastSuccessfulWrite = 0;
2387 
2388 	while (true) {
2389 // TODO: Maybe wait shorter when memory is low!
2390 		if (sModifiedPageQueue.Count() < kNumPages) {
2391 			sPageWriterCondition.Wait(3000000, true);
2392 				// all 3 seconds when no one triggers us
2393 		}
2394 
2395 		page_num_t modifiedPages = sModifiedPageQueue.Count();
2396 		if (modifiedPages == 0)
2397 			continue;
2398 
2399 		if (modifiedPages <= pagesSinceLastSuccessfulWrite) {
2400 			// We ran through the whole queue without being able to write a
2401 			// single page. Take a break.
2402 			snooze(500000);
2403 			pagesSinceLastSuccessfulWrite = 0;
2404 		}
2405 
2406 #if ENABLE_SWAP_SUPPORT
2407 		page_stats pageStats;
2408 		get_page_stats(pageStats);
2409 		bool activePaging = do_active_paging(pageStats);
2410 #endif
2411 
2412 		// depending on how urgent it becomes to get pages to disk, we adjust
2413 		// our I/O priority
2414 		uint32 lowPagesState = low_resource_state(B_KERNEL_RESOURCE_PAGES);
2415 		int32 ioPriority = B_IDLE_PRIORITY;
2416 		if (lowPagesState >= B_LOW_RESOURCE_CRITICAL
2417 			|| modifiedPages > MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD) {
2418 			ioPriority = MAX_PAGE_WRITER_IO_PRIORITY;
2419 		} else {
2420 			ioPriority = (uint64)MAX_PAGE_WRITER_IO_PRIORITY * modifiedPages
2421 				/ MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD;
2422 		}
2423 
2424 		thread_set_io_priority(ioPriority);
2425 
2426 		uint32 numPages = 0;
2427 		run.PrepareNextRun();
2428 
2429 		// TODO: make this laptop friendly, too (ie. only start doing
2430 		// something if someone else did something or there is really
2431 		// enough to do).
2432 
2433 		// collect pages to be written
2434 #ifdef TRACE_VM_PAGE
2435 		pageCollectionTime -= system_time();
2436 #endif
2437 
2438 		page_num_t maxPagesToSee = modifiedPages;
2439 
2440 		while (numPages < kNumPages && maxPagesToSee > 0) {
2441 			vm_page *page = next_modified_page(maxPagesToSee);
2442 			if (page == NULL)
2443 				break;
2444 
2445 			PageCacheLocker cacheLocker(page, false);
2446 			if (!cacheLocker.IsLocked())
2447 				continue;
2448 
2449 			VMCache *cache = page->Cache();
2450 
2451 			// If the page is busy or its state has changed while we were
2452 			// locking the cache, just ignore it.
2453 			if (page->busy || page->State() != PAGE_STATE_MODIFIED)
2454 				continue;
2455 
2456 			DEBUG_PAGE_ACCESS_START(page);
2457 
2458 			// Don't write back wired (locked) pages.
2459 			if (page->WiredCount() > 0) {
2460 				set_page_state(page, PAGE_STATE_ACTIVE);
2461 				DEBUG_PAGE_ACCESS_END(page);
2462 				continue;
2463 			}
2464 
2465 			// Write back temporary pages only when we're actively paging.
2466 			if (cache->temporary
2467 #if ENABLE_SWAP_SUPPORT
2468 				&& (!activePaging
2469 					|| !cache->CanWritePage(
2470 							(off_t)page->cache_offset << PAGE_SHIFT))
2471 #endif
2472 				) {
2473 				// We can't/don't want to do anything with this page, so move it
2474 				// to one of the other queues.
2475 				if (page->mappings.IsEmpty())
2476 					set_page_state(page, PAGE_STATE_INACTIVE);
2477 				else
2478 					set_page_state(page, PAGE_STATE_ACTIVE);
2479 
2480 				DEBUG_PAGE_ACCESS_END(page);
2481 				continue;
2482 			}
2483 
2484 			// We need our own reference to the store, as it might currently be
2485 			// destroyed.
2486 			if (cache->AcquireUnreferencedStoreRef() != B_OK) {
2487 				DEBUG_PAGE_ACCESS_END(page);
2488 				cacheLocker.Unlock();
2489 				thread_yield();
2490 				continue;
2491 			}
2492 
2493 			run.AddPage(page);
2494 				// TODO: We're possibly adding pages of different caches and
2495 				// thus maybe of different underlying file systems here. This
2496 				// is a potential problem for loop file systems/devices, since
2497 				// we could mark a page busy that would need to be accessed
2498 				// when writing back another page, thus causing a deadlock.
2499 
2500 			DEBUG_PAGE_ACCESS_END(page);
2501 
2502 			//dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count);
2503 			TPW(WritePage(page));
2504 
2505 			cache->AcquireRefLocked();
2506 			numPages++;
2507 		}
2508 
2509 #ifdef TRACE_VM_PAGE
2510 		pageCollectionTime += system_time();
2511 #endif
2512 		if (numPages == 0)
2513 			continue;
2514 
2515 		// write pages to disk and do all the cleanup
2516 #ifdef TRACE_VM_PAGE
2517 		pageWritingTime -= system_time();
2518 #endif
2519 		uint32 failedPages = run.Go();
2520 #ifdef TRACE_VM_PAGE
2521 		pageWritingTime += system_time();
2522 
2523 		// debug output only...
2524 		writtenPages += numPages;
2525 		if (writtenPages >= 1024) {
2526 			bigtime_t now = system_time();
2527 			TRACE(("page writer: wrote 1024 pages (total: %" B_PRIu64 " ms, "
2528 				"collect: %" B_PRIu64 " ms, write: %" B_PRIu64 " ms)\n",
2529 				(now - lastWrittenTime) / 1000,
2530 				pageCollectionTime / 1000, pageWritingTime / 1000));
2531 			lastWrittenTime = now;
2532 
2533 			writtenPages -= 1024;
2534 			pageCollectionTime = 0;
2535 			pageWritingTime = 0;
2536 		}
2537 #endif
2538 
2539 		if (failedPages == numPages)
2540 			pagesSinceLastSuccessfulWrite += modifiedPages - maxPagesToSee;
2541 		else
2542 			pagesSinceLastSuccessfulWrite = 0;
2543 	}
2544 
2545 	return B_OK;
2546 }
2547 
2548 
2549 // #pragma mark -
2550 
2551 
2552 // TODO: This should be done in the page daemon!
2553 #if 0
2554 #if ENABLE_SWAP_SUPPORT
2555 static bool
2556 free_page_swap_space(int32 index)
2557 {
2558 	vm_page *page = vm_page_at_index(index);
2559 	PageCacheLocker locker(page);
2560 	if (!locker.IsLocked())
2561 		return false;
2562 
2563 	DEBUG_PAGE_ACCESS_START(page);
2564 
2565 	VMCache* cache = page->Cache();
2566 	if (cache->temporary && page->WiredCount() == 0
2567 			&& cache->HasPage(page->cache_offset << PAGE_SHIFT)
2568 			&& page->usage_count > 0) {
2569 		// TODO: how to judge a page is highly active?
2570 		if (swap_free_page_swap_space(page)) {
2571 			// We need to mark the page modified, since otherwise it could be
2572 			// stolen and we'd lose its data.
2573 			vm_page_set_state(page, PAGE_STATE_MODIFIED);
2574 			TD(FreedPageSwap(page));
2575 			DEBUG_PAGE_ACCESS_END(page);
2576 			return true;
2577 		}
2578 	}
2579 	DEBUG_PAGE_ACCESS_END(page);
2580 	return false;
2581 }
2582 #endif
2583 #endif	// 0
2584 
2585 
2586 static vm_page *
2587 find_cached_page_candidate(struct vm_page &marker)
2588 {
2589 	DEBUG_PAGE_ACCESS_CHECK(&marker);
2590 
2591 	InterruptsSpinLocker locker(sCachedPageQueue.GetLock());
2592 	vm_page *page;
2593 
2594 	if (marker.State() == PAGE_STATE_UNUSED) {
2595 		// Get the first free pages of the (in)active queue
2596 		page = sCachedPageQueue.Head();
2597 	} else {
2598 		// Get the next page of the current queue
2599 		if (marker.State() != PAGE_STATE_CACHED) {
2600 			panic("invalid marker %p state", &marker);
2601 			return NULL;
2602 		}
2603 
2604 		page = sCachedPageQueue.Next(&marker);
2605 		sCachedPageQueue.Remove(&marker);
2606 		marker.SetState(PAGE_STATE_UNUSED);
2607 	}
2608 
2609 	while (page != NULL) {
2610 		if (!page->busy) {
2611 			// we found a candidate, insert marker
2612 			marker.SetState(PAGE_STATE_CACHED);
2613 			sCachedPageQueue.InsertAfter(page, &marker);
2614 			return page;
2615 		}
2616 
2617 		page = sCachedPageQueue.Next(page);
2618 	}
2619 
2620 	return NULL;
2621 }
2622 
2623 
2624 static bool
2625 free_cached_page(vm_page *page, bool dontWait)
2626 {
2627 	// try to lock the page's cache
2628 	if (vm_cache_acquire_locked_page_cache(page, dontWait) == NULL)
2629 		return false;
2630 	VMCache* cache = page->Cache();
2631 
2632 	AutoLocker<VMCache> cacheLocker(cache, true);
2633 	MethodDeleter<VMCache, void, &VMCache::ReleaseRefLocked> _2(cache);
2634 
2635 	// check again if that page is still a candidate
2636 	if (page->busy || page->State() != PAGE_STATE_CACHED)
2637 		return false;
2638 
2639 	DEBUG_PAGE_ACCESS_START(page);
2640 
2641 	PAGE_ASSERT(page, !page->IsMapped());
2642 	PAGE_ASSERT(page, !page->modified);
2643 
2644 	// we can now steal this page
2645 
2646 	cache->RemovePage(page);
2647 		// Now the page doesn't have cache anymore, so no one else (e.g.
2648 		// vm_page_allocate_page_run() can pick it up), since they would be
2649 		// required to lock the cache first, which would fail.
2650 
2651 	sCachedPageQueue.RemoveUnlocked(page);
2652 	return true;
2653 }
2654 
2655 
2656 static uint32
2657 free_cached_pages(uint32 pagesToFree, bool dontWait)
2658 {
2659 	vm_page marker;
2660 	init_page_marker(marker);
2661 
2662 	uint32 pagesFreed = 0;
2663 
2664 	while (pagesFreed < pagesToFree) {
2665 		vm_page *page = find_cached_page_candidate(marker);
2666 		if (page == NULL)
2667 			break;
2668 
2669 		if (free_cached_page(page, dontWait)) {
2670 			ReadLocker locker(sFreePageQueuesLock);
2671 			page->SetState(PAGE_STATE_FREE);
2672 			DEBUG_PAGE_ACCESS_END(page);
2673 			sFreePageQueue.PrependUnlocked(page);
2674 			locker.Unlock();
2675 
2676 			TA(StolenPage());
2677 
2678 			pagesFreed++;
2679 		}
2680 	}
2681 
2682 	remove_page_marker(marker);
2683 
2684 	sFreePageCondition.NotifyAll();
2685 
2686 	return pagesFreed;
2687 }
2688 
2689 
2690 static void
2691 idle_scan_active_pages(page_stats& pageStats)
2692 {
2693 	VMPageQueue& queue = sActivePageQueue;
2694 
2695 	// We want to scan the whole queue in roughly kIdleRunsForFullQueue runs.
2696 	uint32 maxToScan = queue.Count() / kIdleRunsForFullQueue + 1;
2697 
2698 	while (maxToScan > 0) {
2699 		maxToScan--;
2700 
2701 		// Get the next page. Note that we don't bother to lock here. We go with
2702 		// the assumption that on all architectures reading/writing pointers is
2703 		// atomic. Beyond that it doesn't really matter. We have to unlock the
2704 		// queue anyway to lock the page's cache, and we'll recheck afterwards.
2705 		vm_page* page = queue.Head();
2706 		if (page == NULL)
2707 			break;
2708 
2709 		// lock the page's cache
2710 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2711 		if (cache == NULL)
2712 			continue;
2713 
2714 		if (page->State() != PAGE_STATE_ACTIVE) {
2715 			// page is no longer in the cache or in this queue
2716 			cache->ReleaseRefAndUnlock();
2717 			continue;
2718 		}
2719 
2720 		if (page->busy) {
2721 			// page is busy -- requeue at the end
2722 			vm_page_requeue(page, true);
2723 			cache->ReleaseRefAndUnlock();
2724 			continue;
2725 		}
2726 
2727 		DEBUG_PAGE_ACCESS_START(page);
2728 
2729 		// Get the page active/modified flags and update the page's usage count.
2730 		// We completely unmap inactive temporary pages. This saves us to
2731 		// iterate through the inactive list as well, since we'll be notified
2732 		// via page fault whenever such an inactive page is used again.
2733 		// We don't remove the mappings of non-temporary pages, since we
2734 		// wouldn't notice when those would become unused and could thus be
2735 		// moved to the cached list.
2736 		int32 usageCount;
2737 		if (page->WiredCount() > 0 || page->usage_count > 0
2738 			|| !cache->temporary) {
2739 			usageCount = vm_clear_page_mapping_accessed_flags(page);
2740 		} else
2741 			usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2742 
2743 		if (usageCount > 0) {
2744 			usageCount += page->usage_count + kPageUsageAdvance;
2745 			if (usageCount > kPageUsageMax)
2746 				usageCount = kPageUsageMax;
2747 // TODO: This would probably also be the place to reclaim swap space.
2748 		} else {
2749 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2750 			if (usageCount < 0) {
2751 				usageCount = 0;
2752 				set_page_state(page, PAGE_STATE_INACTIVE);
2753 			}
2754 		}
2755 
2756 		page->usage_count = usageCount;
2757 
2758 		DEBUG_PAGE_ACCESS_END(page);
2759 
2760 		cache->ReleaseRefAndUnlock();
2761 	}
2762 }
2763 
2764 
2765 static void
2766 full_scan_inactive_pages(page_stats& pageStats, int32 despairLevel)
2767 {
2768 	int32 pagesToFree = pageStats.unsatisfiedReservations
2769 		+ sFreeOrCachedPagesTarget
2770 		- (pageStats.totalFreePages + pageStats.cachedPages);
2771 	if (pagesToFree <= 0)
2772 		return;
2773 
2774 	bigtime_t time = system_time();
2775 	uint32 pagesScanned = 0;
2776 	uint32 pagesToCached = 0;
2777 	uint32 pagesToModified = 0;
2778 	uint32 pagesToActive = 0;
2779 
2780 	// Determine how many pages at maximum to send to the modified queue. Since
2781 	// it is relatively expensive to page out pages, we do that on a grander
2782 	// scale only when things get desperate.
2783 	uint32 maxToFlush = despairLevel <= 1 ? 32 : 10000;
2784 
2785 	vm_page marker;
2786 	init_page_marker(marker);
2787 
2788 	VMPageQueue& queue = sInactivePageQueue;
2789 	InterruptsSpinLocker queueLocker(queue.GetLock());
2790 	uint32 maxToScan = queue.Count();
2791 
2792 	vm_page* nextPage = queue.Head();
2793 
2794 	while (pagesToFree > 0 && maxToScan > 0) {
2795 		maxToScan--;
2796 
2797 		// get the next page
2798 		vm_page* page = nextPage;
2799 		if (page == NULL)
2800 			break;
2801 		nextPage = queue.Next(page);
2802 
2803 		if (page->busy)
2804 			continue;
2805 
2806 		// mark the position
2807 		queue.InsertAfter(page, &marker);
2808 		queueLocker.Unlock();
2809 
2810 		// lock the page's cache
2811 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2812 		if (cache == NULL || page->busy
2813 				|| page->State() != PAGE_STATE_INACTIVE) {
2814 			if (cache != NULL)
2815 				cache->ReleaseRefAndUnlock();
2816 			queueLocker.Lock();
2817 			nextPage = queue.Next(&marker);
2818 			queue.Remove(&marker);
2819 			continue;
2820 		}
2821 
2822 		pagesScanned++;
2823 
2824 		DEBUG_PAGE_ACCESS_START(page);
2825 
2826 		// Get the accessed count, clear the accessed/modified flags and
2827 		// unmap the page, if it hasn't been accessed.
2828 		int32 usageCount;
2829 		if (page->WiredCount() > 0)
2830 			usageCount = vm_clear_page_mapping_accessed_flags(page);
2831 		else
2832 			usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2833 
2834 		// update usage count
2835 		if (usageCount > 0) {
2836 			usageCount += page->usage_count + kPageUsageAdvance;
2837 			if (usageCount > kPageUsageMax)
2838 				usageCount = kPageUsageMax;
2839 		} else {
2840 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2841 			if (usageCount < 0)
2842 				usageCount = 0;
2843 		}
2844 
2845 		page->usage_count = usageCount;
2846 
2847 		// Move to fitting queue or requeue:
2848 		// * Active mapped pages go to the active queue.
2849 		// * Inactive mapped (i.e. wired) pages are requeued.
2850 		// * The remaining pages are cachable. Thus, if unmodified they go to
2851 		//   the cached queue, otherwise to the modified queue (up to a limit).
2852 		//   Note that until in the idle scanning we don't exempt pages of
2853 		//   temporary caches. Apparently we really need memory, so we better
2854 		//   page out memory as well.
2855 		bool isMapped = page->IsMapped();
2856 		if (usageCount > 0) {
2857 			if (isMapped) {
2858 				set_page_state(page, PAGE_STATE_ACTIVE);
2859 				pagesToActive++;
2860 			} else
2861 				vm_page_requeue(page, true);
2862 		} else if (isMapped) {
2863 			vm_page_requeue(page, true);
2864 		} else if (!page->modified) {
2865 			set_page_state(page, PAGE_STATE_CACHED);
2866 			pagesToFree--;
2867 			pagesToCached++;
2868 		} else if (maxToFlush > 0) {
2869 			set_page_state(page, PAGE_STATE_MODIFIED);
2870 			maxToFlush--;
2871 			pagesToModified++;
2872 		} else
2873 			vm_page_requeue(page, true);
2874 
2875 		DEBUG_PAGE_ACCESS_END(page);
2876 
2877 		cache->ReleaseRefAndUnlock();
2878 
2879 		// remove the marker
2880 		queueLocker.Lock();
2881 		nextPage = queue.Next(&marker);
2882 		queue.Remove(&marker);
2883 	}
2884 
2885 	queueLocker.Unlock();
2886 
2887 	time = system_time() - time;
2888 	TRACE_DAEMON("  -> inactive scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32
2889 		", moved: %" B_PRIu32 " -> cached, %" B_PRIu32 " -> modified, %"
2890 		B_PRIu32 " -> active\n", time, pagesScanned, pagesToCached,
2891 		pagesToModified, pagesToActive);
2892 
2893 	// wake up the page writer, if we tossed it some pages
2894 	if (pagesToModified > 0)
2895 		sPageWriterCondition.WakeUp();
2896 }
2897 
2898 
2899 static void
2900 full_scan_active_pages(page_stats& pageStats, int32 despairLevel)
2901 {
2902 	vm_page marker;
2903 	init_page_marker(marker);
2904 
2905 	VMPageQueue& queue = sActivePageQueue;
2906 	InterruptsSpinLocker queueLocker(queue.GetLock());
2907 	uint32 maxToScan = queue.Count();
2908 
2909 	int32 pagesToDeactivate = pageStats.unsatisfiedReservations
2910 		+ sFreeOrCachedPagesTarget
2911 		- (pageStats.totalFreePages + pageStats.cachedPages)
2912 		+ std::max((int32)sInactivePagesTarget - (int32)maxToScan, (int32)0);
2913 	if (pagesToDeactivate <= 0)
2914 		return;
2915 
2916 	bigtime_t time = system_time();
2917 	uint32 pagesAccessed = 0;
2918 	uint32 pagesToInactive = 0;
2919 	uint32 pagesScanned = 0;
2920 
2921 	vm_page* nextPage = queue.Head();
2922 
2923 	while (pagesToDeactivate > 0 && maxToScan > 0) {
2924 		maxToScan--;
2925 
2926 		// get the next page
2927 		vm_page* page = nextPage;
2928 		if (page == NULL)
2929 			break;
2930 		nextPage = queue.Next(page);
2931 
2932 		if (page->busy)
2933 			continue;
2934 
2935 		// mark the position
2936 		queue.InsertAfter(page, &marker);
2937 		queueLocker.Unlock();
2938 
2939 		// lock the page's cache
2940 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2941 		if (cache == NULL || page->busy || page->State() != PAGE_STATE_ACTIVE) {
2942 			if (cache != NULL)
2943 				cache->ReleaseRefAndUnlock();
2944 			queueLocker.Lock();
2945 			nextPage = queue.Next(&marker);
2946 			queue.Remove(&marker);
2947 			continue;
2948 		}
2949 
2950 		pagesScanned++;
2951 
2952 		DEBUG_PAGE_ACCESS_START(page);
2953 
2954 		// Get the page active/modified flags and update the page's usage count.
2955 		int32 usageCount = vm_clear_page_mapping_accessed_flags(page);
2956 
2957 		if (usageCount > 0) {
2958 			usageCount += page->usage_count + kPageUsageAdvance;
2959 			if (usageCount > kPageUsageMax)
2960 				usageCount = kPageUsageMax;
2961 			pagesAccessed++;
2962 // TODO: This would probably also be the place to reclaim swap space.
2963 		} else {
2964 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2965 			if (usageCount <= 0) {
2966 				usageCount = 0;
2967 				set_page_state(page, PAGE_STATE_INACTIVE);
2968 				pagesToInactive++;
2969 			}
2970 		}
2971 
2972 		page->usage_count = usageCount;
2973 
2974 		DEBUG_PAGE_ACCESS_END(page);
2975 
2976 		cache->ReleaseRefAndUnlock();
2977 
2978 		// remove the marker
2979 		queueLocker.Lock();
2980 		nextPage = queue.Next(&marker);
2981 		queue.Remove(&marker);
2982 	}
2983 
2984 	time = system_time() - time;
2985 	TRACE_DAEMON("  ->   active scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32
2986 		", moved: %" B_PRIu32 " -> inactive, encountered %" B_PRIu32 " accessed"
2987 		" ones\n", time, pagesScanned, pagesToInactive, pagesAccessed);
2988 }
2989 
2990 
2991 static void
2992 page_daemon_idle_scan(page_stats& pageStats)
2993 {
2994 	TRACE_DAEMON("page daemon: idle run\n");
2995 
2996 	if (pageStats.totalFreePages < (int32)sFreePagesTarget) {
2997 		// We want more actually free pages, so free some from the cached
2998 		// ones.
2999 		uint32 freed = free_cached_pages(
3000 			sFreePagesTarget - pageStats.totalFreePages, false);
3001 		if (freed > 0)
3002 			unreserve_pages(freed);
3003 		get_page_stats(pageStats);
3004 	}
3005 
3006 	// Walk the active list and move pages to the inactive queue.
3007 	get_page_stats(pageStats);
3008 	idle_scan_active_pages(pageStats);
3009 }
3010 
3011 
3012 static void
3013 page_daemon_full_scan(page_stats& pageStats, int32 despairLevel)
3014 {
3015 	TRACE_DAEMON("page daemon: full run: free: %" B_PRIu32 ", cached: %"
3016 		B_PRIu32 ", to free: %" B_PRIu32 "\n", pageStats.totalFreePages,
3017 		pageStats.cachedPages, pageStats.unsatisfiedReservations
3018 			+ sFreeOrCachedPagesTarget
3019 			- (pageStats.totalFreePages + pageStats.cachedPages));
3020 
3021 	// Walk the inactive list and transfer pages to the cached and modified
3022 	// queues.
3023 	full_scan_inactive_pages(pageStats, despairLevel);
3024 
3025 	// Free cached pages. Also wake up reservation waiters.
3026 	get_page_stats(pageStats);
3027 	int32 pagesToFree = pageStats.unsatisfiedReservations + sFreePagesTarget
3028 		- (pageStats.totalFreePages);
3029 	if (pagesToFree > 0) {
3030 		uint32 freed = free_cached_pages(pagesToFree, true);
3031 		if (freed > 0)
3032 			unreserve_pages(freed);
3033 	}
3034 
3035 	// Walk the active list and move pages to the inactive queue.
3036 	get_page_stats(pageStats);
3037 	full_scan_active_pages(pageStats, despairLevel);
3038 }
3039 
3040 
3041 static status_t
3042 page_daemon(void* /*unused*/)
3043 {
3044 	int32 despairLevel = 0;
3045 
3046 	while (true) {
3047 		sPageDaemonCondition.ClearActivated();
3048 
3049 		// evaluate the free pages situation
3050 		page_stats pageStats;
3051 		get_page_stats(pageStats);
3052 
3053 		if (!do_active_paging(pageStats)) {
3054 			// Things look good -- just maintain statistics and keep the pool
3055 			// of actually free pages full enough.
3056 			despairLevel = 0;
3057 			page_daemon_idle_scan(pageStats);
3058 			sPageDaemonCondition.Wait(kIdleScanWaitInterval, false);
3059 		} else {
3060 			// Not enough free pages. We need to do some real work.
3061 			despairLevel = std::max(despairLevel + 1, (int32)3);
3062 			page_daemon_full_scan(pageStats, despairLevel);
3063 
3064 			// Don't wait after the first full scan, but rather immediately
3065 			// check whether we were successful in freeing enough pages and
3066 			// re-run with increased despair level. The first scan is
3067 			// conservative with respect to moving inactive modified pages to
3068 			// the modified list to avoid thrashing. The second scan, however,
3069 			// will not hold back.
3070 			if (despairLevel > 1)
3071 				snooze(kBusyScanWaitInterval);
3072 		}
3073 	}
3074 
3075 	return B_OK;
3076 }
3077 
3078 
3079 /*!	Returns how many pages could *not* be reserved.
3080 */
3081 static uint32
3082 reserve_pages(uint32 count, int priority, bool dontWait)
3083 {
3084 	int32 dontTouch = kPageReserveForPriority[priority];
3085 
3086 	while (true) {
3087 		count -= reserve_some_pages(count, dontTouch);
3088 		if (count == 0)
3089 			return 0;
3090 
3091 		if (sUnsatisfiedPageReservations == 0) {
3092 			count -= free_cached_pages(count, dontWait);
3093 			if (count == 0)
3094 				return count;
3095 		}
3096 
3097 		if (dontWait)
3098 			return count;
3099 
3100 		// we need to wait for pages to become available
3101 
3102 		MutexLocker pageDeficitLocker(sPageDeficitLock);
3103 
3104 		bool notifyDaemon = sUnsatisfiedPageReservations == 0;
3105 		sUnsatisfiedPageReservations += count;
3106 
3107 		if (atomic_get(&sUnreservedFreePages) > dontTouch) {
3108 			// the situation changed
3109 			sUnsatisfiedPageReservations -= count;
3110 			continue;
3111 		}
3112 
3113 		PageReservationWaiter waiter;
3114 		waiter.dontTouch = dontTouch;
3115 		waiter.missing = count;
3116 		waiter.thread = thread_get_current_thread();
3117 		waiter.threadPriority = waiter.thread->priority;
3118 
3119 		// insert ordered (i.e. after all waiters with higher or equal priority)
3120 		PageReservationWaiter* otherWaiter = NULL;
3121 		for (PageReservationWaiterList::Iterator it
3122 				= sPageReservationWaiters.GetIterator();
3123 			(otherWaiter = it.Next()) != NULL;) {
3124 			if (waiter < *otherWaiter)
3125 				break;
3126 		}
3127 
3128 		sPageReservationWaiters.InsertBefore(otherWaiter, &waiter);
3129 
3130 		thread_prepare_to_block(waiter.thread, 0, THREAD_BLOCK_TYPE_OTHER,
3131 			"waiting for pages");
3132 
3133 		if (notifyDaemon)
3134 			sPageDaemonCondition.WakeUp();
3135 
3136 		pageDeficitLocker.Unlock();
3137 
3138 		low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0);
3139 		thread_block();
3140 
3141 		pageDeficitLocker.Lock();
3142 
3143 		return 0;
3144 	}
3145 }
3146 
3147 
3148 //	#pragma mark - private kernel API
3149 
3150 
3151 /*!	Writes a range of modified pages of a cache to disk.
3152 	You need to hold the VMCache lock when calling this function.
3153 	Note that the cache lock is released in this function.
3154 	\param cache The cache.
3155 	\param firstPage Offset (in page size units) of the first page in the range.
3156 	\param endPage End offset (in page size units) of the page range. The page
3157 		at this offset is not included.
3158 */
3159 status_t
3160 vm_page_write_modified_page_range(struct VMCache* cache, uint32 firstPage,
3161 	uint32 endPage)
3162 {
3163 	static const int32 kMaxPages = 256;
3164 	int32 maxPages = cache->MaxPagesPerWrite();
3165 	if (maxPages < 0 || maxPages > kMaxPages)
3166 		maxPages = kMaxPages;
3167 
3168 	const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
3169 		| HEAP_DONT_LOCK_KERNEL_SPACE;
3170 
3171 	PageWriteWrapper stackWrappersPool[2];
3172 	PageWriteWrapper* stackWrappers[1];
3173 	PageWriteWrapper* wrapperPool
3174 		= new(malloc_flags(allocationFlags)) PageWriteWrapper[maxPages + 1];
3175 	PageWriteWrapper** wrappers
3176 		= new(malloc_flags(allocationFlags)) PageWriteWrapper*[maxPages];
3177 	if (wrapperPool == NULL || wrappers == NULL) {
3178 		// don't fail, just limit our capabilities
3179 		delete[] wrapperPool;
3180 		delete[] wrappers;
3181 		wrapperPool = stackWrappersPool;
3182 		wrappers = stackWrappers;
3183 		maxPages = 1;
3184 	}
3185 
3186 	int32 nextWrapper = 0;
3187 	int32 usedWrappers = 0;
3188 
3189 	PageWriteTransfer transfer;
3190 	bool transferEmpty = true;
3191 
3192 	VMCachePagesTree::Iterator it
3193 		= cache->pages.GetIterator(firstPage, true, true);
3194 
3195 	while (true) {
3196 		vm_page* page = it.Next();
3197 		if (page == NULL || page->cache_offset >= endPage) {
3198 			if (transferEmpty)
3199 				break;
3200 
3201 			page = NULL;
3202 		}
3203 
3204 		if (page != NULL) {
3205 			if (page->busy
3206 				|| (page->State() != PAGE_STATE_MODIFIED
3207 					&& !vm_test_map_modification(page))) {
3208 				page = NULL;
3209 			}
3210 		}
3211 
3212 		PageWriteWrapper* wrapper = NULL;
3213 		if (page != NULL) {
3214 			wrapper = &wrapperPool[nextWrapper++];
3215 			if (nextWrapper > maxPages)
3216 				nextWrapper = 0;
3217 
3218 			DEBUG_PAGE_ACCESS_START(page);
3219 
3220 			wrapper->SetTo(page);
3221 
3222 			if (transferEmpty || transfer.AddPage(page)) {
3223 				if (transferEmpty) {
3224 					transfer.SetTo(NULL, page, maxPages);
3225 					transferEmpty = false;
3226 				}
3227 
3228 				DEBUG_PAGE_ACCESS_END(page);
3229 
3230 				wrappers[usedWrappers++] = wrapper;
3231 				continue;
3232 			}
3233 
3234 			DEBUG_PAGE_ACCESS_END(page);
3235 		}
3236 
3237 		if (transferEmpty)
3238 			continue;
3239 
3240 		cache->Unlock();
3241 		status_t status = transfer.Schedule(0);
3242 		cache->Lock();
3243 
3244 		for (int32 i = 0; i < usedWrappers; i++)
3245 			wrappers[i]->Done(status);
3246 
3247 		usedWrappers = 0;
3248 
3249 		if (page != NULL) {
3250 			transfer.SetTo(NULL, page, maxPages);
3251 			wrappers[usedWrappers++] = wrapper;
3252 		} else
3253 			transferEmpty = true;
3254 	}
3255 
3256 	if (wrapperPool != stackWrappersPool) {
3257 		delete[] wrapperPool;
3258 		delete[] wrappers;
3259 	}
3260 
3261 	return B_OK;
3262 }
3263 
3264 
3265 /*!	You need to hold the VMCache lock when calling this function.
3266 	Note that the cache lock is released in this function.
3267 */
3268 status_t
3269 vm_page_write_modified_pages(VMCache *cache)
3270 {
3271 	return vm_page_write_modified_page_range(cache, 0,
3272 		(cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT);
3273 }
3274 
3275 
3276 /*!	Schedules the page writer to write back the specified \a page.
3277 	Note, however, that it might not do this immediately, and it can well
3278 	take several seconds until the page is actually written out.
3279 */
3280 void
3281 vm_page_schedule_write_page(vm_page *page)
3282 {
3283 	PAGE_ASSERT(page, page->State() == PAGE_STATE_MODIFIED);
3284 
3285 	vm_page_requeue(page, false);
3286 
3287 	sPageWriterCondition.WakeUp();
3288 }
3289 
3290 
3291 /*!	Cache must be locked.
3292 */
3293 void
3294 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage,
3295 	uint32 endPage)
3296 {
3297 	uint32 modified = 0;
3298 	for (VMCachePagesTree::Iterator it
3299 				= cache->pages.GetIterator(firstPage, true, true);
3300 			vm_page *page = it.Next();) {
3301 		if (page->cache_offset >= endPage)
3302 			break;
3303 
3304 		if (!page->busy && page->State() == PAGE_STATE_MODIFIED) {
3305 			DEBUG_PAGE_ACCESS_START(page);
3306 			vm_page_requeue(page, false);
3307 			modified++;
3308 			DEBUG_PAGE_ACCESS_END(page);
3309 		}
3310 	}
3311 
3312 	if (modified > 0)
3313 		sPageWriterCondition.WakeUp();
3314 }
3315 
3316 
3317 void
3318 vm_page_init_num_pages(kernel_args *args)
3319 {
3320 	// calculate the size of memory by looking at the physical_memory_range array
3321 	sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE;
3322 	page_num_t physicalPagesEnd = sPhysicalPageOffset
3323 		+ args->physical_memory_range[0].size / B_PAGE_SIZE;
3324 
3325 	sNonExistingPages = 0;
3326 	sIgnoredPages = args->ignored_physical_memory / B_PAGE_SIZE;
3327 
3328 	for (uint32 i = 1; i < args->num_physical_memory_ranges; i++) {
3329 		page_num_t start = args->physical_memory_range[i].start / B_PAGE_SIZE;
3330 		if (start > physicalPagesEnd)
3331 			sNonExistingPages += start - physicalPagesEnd;
3332 		physicalPagesEnd = start
3333 			+ args->physical_memory_range[i].size / B_PAGE_SIZE;
3334 
3335 #ifdef LIMIT_AVAILABLE_MEMORY
3336 		page_num_t available
3337 			= physicalPagesEnd - sPhysicalPageOffset - sNonExistingPages;
3338 		if (available > LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE)) {
3339 			physicalPagesEnd = sPhysicalPageOffset + sNonExistingPages
3340 				+ LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE);
3341 			break;
3342 		}
3343 #endif
3344 	}
3345 
3346 	TRACE(("first phys page = %#" B_PRIxPHYSADDR ", end %#" B_PRIxPHYSADDR "\n",
3347 		sPhysicalPageOffset, physicalPagesEnd));
3348 
3349 	sNumPages = physicalPagesEnd - sPhysicalPageOffset;
3350 }
3351 
3352 
3353 status_t
3354 vm_page_init(kernel_args *args)
3355 {
3356 	TRACE(("vm_page_init: entry\n"));
3357 
3358 	// init page queues
3359 	sModifiedPageQueue.Init("modified pages queue");
3360 	sInactivePageQueue.Init("inactive pages queue");
3361 	sActivePageQueue.Init("active pages queue");
3362 	sCachedPageQueue.Init("cached pages queue");
3363 	sFreePageQueue.Init("free pages queue");
3364 	sClearPageQueue.Init("clear pages queue");
3365 
3366 	new (&sPageReservationWaiters) PageReservationWaiterList;
3367 
3368 	// map in the new free page table
3369 	sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page),
3370 		~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3371 
3372 	TRACE(("vm_init: putting free_page_table @ %p, # ents %" B_PRIuPHYSADDR
3373 		" (size %#" B_PRIxPHYSADDR ")\n", sPages, sNumPages,
3374 		(phys_addr_t)(sNumPages * sizeof(vm_page))));
3375 
3376 	// initialize the free page table
3377 	for (uint32 i = 0; i < sNumPages; i++) {
3378 		sPages[i].Init(sPhysicalPageOffset + i);
3379 		sFreePageQueue.Append(&sPages[i]);
3380 
3381 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3382 		sPages[i].allocation_tracking_info.Clear();
3383 #endif
3384 	}
3385 
3386 	sUnreservedFreePages = sNumPages;
3387 
3388 	TRACE(("initialized table\n"));
3389 
3390 	// mark the ranges between usable physical memory unused
3391 	phys_addr_t previousEnd = 0;
3392 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3393 		phys_addr_t base = args->physical_memory_range[i].start;
3394 		phys_size_t size = args->physical_memory_range[i].size;
3395 		if (base > previousEnd) {
3396 			mark_page_range_in_use(previousEnd / B_PAGE_SIZE,
3397 				(base - previousEnd) / B_PAGE_SIZE, false);
3398 		}
3399 		previousEnd = base + size;
3400 	}
3401 
3402 	// mark the allocated physical page ranges wired
3403 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3404 		mark_page_range_in_use(
3405 			args->physical_allocated_range[i].start / B_PAGE_SIZE,
3406 			args->physical_allocated_range[i].size / B_PAGE_SIZE, true);
3407 	}
3408 
3409 	// prevent future allocations from the kernel args ranges
3410 	args->num_physical_allocated_ranges = 0;
3411 
3412 	// The target of actually free pages. This must be at least the system
3413 	// reserve, but should be a few more pages, so we don't have to extract
3414 	// a cached page with each allocation.
3415 	sFreePagesTarget = VM_PAGE_RESERVE_USER
3416 		+ std::max((page_num_t)32, (sNumPages - sNonExistingPages) / 1024);
3417 
3418 	// The target of free + cached and inactive pages. On low-memory machines
3419 	// keep things tight. free + cached is the pool of immediately allocatable
3420 	// pages. We want a few inactive pages, so when we're actually paging, we
3421 	// have a reasonably large set of pages to work with.
3422 	if (sUnreservedFreePages < 16 * 1024) {
3423 		sFreeOrCachedPagesTarget = sFreePagesTarget + 128;
3424 		sInactivePagesTarget = sFreePagesTarget / 3;
3425 	} else {
3426 		sFreeOrCachedPagesTarget = 2 * sFreePagesTarget;
3427 		sInactivePagesTarget = sFreePagesTarget / 2;
3428 	}
3429 
3430 	TRACE(("vm_page_init: exit\n"));
3431 
3432 	return B_OK;
3433 }
3434 
3435 
3436 status_t
3437 vm_page_init_post_area(kernel_args *args)
3438 {
3439 	void *dummy;
3440 
3441 	dummy = sPages;
3442 	create_area("page structures", &dummy, B_EXACT_ADDRESS,
3443 		PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED,
3444 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3445 
3446 	add_debugger_command("list_pages", &dump_page_list,
3447 		"List physical pages");
3448 	add_debugger_command("page_stats", &dump_page_stats,
3449 		"Dump statistics about page usage");
3450 	add_debugger_command_etc("page", &dump_page_long,
3451 		"Dump page info",
3452 		"[ \"-p\" | \"-v\" ] [ \"-m\" ] <address>\n"
3453 		"Prints information for the physical page. If neither \"-p\" nor\n"
3454 		"\"-v\" are given, the provided address is interpreted as address of\n"
3455 		"the vm_page data structure for the page in question. If \"-p\" is\n"
3456 		"given, the address is the physical address of the page. If \"-v\" is\n"
3457 		"given, the address is interpreted as virtual address in the current\n"
3458 		"thread's address space and for the page it is mapped to (if any)\n"
3459 		"information are printed. If \"-m\" is specified, the command will\n"
3460 		"search all known address spaces for mappings to that page and print\n"
3461 		"them.\n", 0);
3462 	add_debugger_command("page_queue", &dump_page_queue, "Dump page queue");
3463 	add_debugger_command("find_page", &find_page,
3464 		"Find out which queue a page is actually in");
3465 
3466 #ifdef TRACK_PAGE_USAGE_STATS
3467 	add_debugger_command_etc("page_usage", &dump_page_usage_stats,
3468 		"Dumps statistics about page usage counts",
3469 		"\n"
3470 		"Dumps statistics about page usage counts.\n",
3471 		B_KDEBUG_DONT_PARSE_ARGUMENTS);
3472 #endif
3473 
3474 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3475 	add_debugger_command_etc("page_allocations_per_caller",
3476 		&dump_page_allocations_per_caller,
3477 		"Dump current page allocations summed up per caller",
3478 		"[ -d <caller> ] [ -r ]\n"
3479 		"The current allocations will by summed up by caller (their count)\n"
3480 		"printed in decreasing order by count.\n"
3481 		"If \"-d\" is given, each allocation for caller <caller> is printed\n"
3482 		"including the respective stack trace.\n"
3483 		"If \"-r\" is given, the allocation infos are reset after gathering\n"
3484 		"the information, so the next command invocation will only show the\n"
3485 		"allocations made after the reset.\n", 0);
3486 	add_debugger_command_etc("page_allocation_infos",
3487 		&dump_page_allocation_infos,
3488 		"Dump current page allocations",
3489 		"[ --stacktrace ] [ -p <page number> ] [ --team <team ID> ] "
3490 		"[ --thread <thread ID> ]\n"
3491 		"The current allocations filtered by optional values will be printed.\n"
3492 		"The optional \"-p\" page number filters for a specific page,\n"
3493 		"with \"--team\" and \"--thread\" allocations by specific teams\n"
3494 		"and/or threads can be filtered (these only work if a corresponding\n"
3495 		"tracing entry is still available).\n"
3496 		"If \"--stacktrace\" is given, then stack traces of the allocation\n"
3497 		"callers are printed, where available\n", 0);
3498 #endif
3499 
3500 	return B_OK;
3501 }
3502 
3503 
3504 status_t
3505 vm_page_init_post_thread(kernel_args *args)
3506 {
3507 	new (&sFreePageCondition) ConditionVariable;
3508 
3509 	// create a kernel thread to clear out pages
3510 
3511 	thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber",
3512 		B_LOWEST_ACTIVE_PRIORITY, NULL);
3513 	resume_thread(thread);
3514 
3515 	// start page writer
3516 
3517 	sPageWriterCondition.Init("page writer");
3518 
3519 	thread = spawn_kernel_thread(&page_writer, "page writer",
3520 		B_NORMAL_PRIORITY + 1, NULL);
3521 	resume_thread(thread);
3522 
3523 	// start page daemon
3524 
3525 	sPageDaemonCondition.Init("page daemon");
3526 
3527 	thread = spawn_kernel_thread(&page_daemon, "page daemon",
3528 		B_NORMAL_PRIORITY, NULL);
3529 	resume_thread(thread);
3530 
3531 	return B_OK;
3532 }
3533 
3534 
3535 status_t
3536 vm_mark_page_inuse(page_num_t page)
3537 {
3538 	return vm_mark_page_range_inuse(page, 1);
3539 }
3540 
3541 
3542 status_t
3543 vm_mark_page_range_inuse(page_num_t startPage, page_num_t length)
3544 {
3545 	return mark_page_range_in_use(startPage, length, false);
3546 }
3547 
3548 
3549 /*!	Unreserve pages previously reserved with vm_page_reserve_pages().
3550 */
3551 void
3552 vm_page_unreserve_pages(vm_page_reservation* reservation)
3553 {
3554 	uint32 count = reservation->count;
3555 	reservation->count = 0;
3556 
3557 	if (count == 0)
3558 		return;
3559 
3560 	TA(UnreservePages(count));
3561 
3562 	unreserve_pages(count);
3563 }
3564 
3565 
3566 /*!	With this call, you can reserve a number of free pages in the system.
3567 	They will only be handed out to someone who has actually reserved them.
3568 	This call returns as soon as the number of requested pages has been
3569 	reached.
3570 	The caller must not hold any cache lock or the function might deadlock.
3571 */
3572 void
3573 vm_page_reserve_pages(vm_page_reservation* reservation, uint32 count,
3574 	int priority)
3575 {
3576 	reservation->count = count;
3577 
3578 	if (count == 0)
3579 		return;
3580 
3581 	TA(ReservePages(count));
3582 
3583 	reserve_pages(count, priority, false);
3584 }
3585 
3586 
3587 bool
3588 vm_page_try_reserve_pages(vm_page_reservation* reservation, uint32 count,
3589 	int priority)
3590 {
3591 	if (count == 0) {
3592 		reservation->count = count;
3593 		return true;
3594 	}
3595 
3596 	uint32 remaining = reserve_pages(count, priority, true);
3597 	if (remaining == 0) {
3598 		TA(ReservePages(count));
3599 		reservation->count = count;
3600 		return true;
3601 	}
3602 
3603 	unreserve_pages(count - remaining);
3604 
3605 	return false;
3606 }
3607 
3608 
3609 vm_page *
3610 vm_page_allocate_page(vm_page_reservation* reservation, uint32 flags)
3611 {
3612 	uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3613 	ASSERT(pageState != PAGE_STATE_FREE);
3614 	ASSERT(pageState != PAGE_STATE_CLEAR);
3615 
3616 	ASSERT(reservation->count > 0);
3617 	reservation->count--;
3618 
3619 	VMPageQueue* queue;
3620 	VMPageQueue* otherQueue;
3621 
3622 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3623 		queue = &sClearPageQueue;
3624 		otherQueue = &sFreePageQueue;
3625 	} else {
3626 		queue = &sFreePageQueue;
3627 		otherQueue = &sClearPageQueue;
3628 	}
3629 
3630 	ReadLocker locker(sFreePageQueuesLock);
3631 
3632 	vm_page* page = queue->RemoveHeadUnlocked();
3633 	if (page == NULL) {
3634 		// if the primary queue was empty, grab the page from the
3635 		// secondary queue
3636 		page = otherQueue->RemoveHeadUnlocked();
3637 
3638 		if (page == NULL) {
3639 			// Unlikely, but possible: the page we have reserved has moved
3640 			// between the queues after we checked the first queue. Grab the
3641 			// write locker to make sure this doesn't happen again.
3642 			locker.Unlock();
3643 			WriteLocker writeLocker(sFreePageQueuesLock);
3644 
3645 			page = queue->RemoveHead();
3646 			if (page == NULL)
3647 				otherQueue->RemoveHead();
3648 
3649 			if (page == NULL) {
3650 				panic("Had reserved page, but there is none!");
3651 				return NULL;
3652 			}
3653 
3654 			// downgrade to read lock
3655 			locker.Lock();
3656 		}
3657 	}
3658 
3659 	if (page->CacheRef() != NULL)
3660 		panic("supposed to be free page %p has cache @! page %p; cache _cache", page, page);
3661 
3662 	DEBUG_PAGE_ACCESS_START(page);
3663 
3664 	int oldPageState = page->State();
3665 	page->SetState(pageState);
3666 	page->busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3667 	page->usage_count = 0;
3668 	page->accessed = false;
3669 	page->modified = false;
3670 
3671 	locker.Unlock();
3672 
3673 	if (pageState < PAGE_STATE_FIRST_UNQUEUED)
3674 		sPageQueues[pageState].AppendUnlocked(page);
3675 
3676 	// clear the page, if we had to take it from the free queue and a clear
3677 	// page was requested
3678 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0 && oldPageState != PAGE_STATE_CLEAR)
3679 		clear_page(page);
3680 
3681 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3682 	page->allocation_tracking_info.Init(
3683 		TA(AllocatePage(page->physical_page_number)));
3684 #else
3685 	TA(AllocatePage(page->physical_page_number));
3686 #endif
3687 
3688 	return page;
3689 }
3690 
3691 
3692 static void
3693 allocate_page_run_cleanup(VMPageQueue::PageList& freePages,
3694 	VMPageQueue::PageList& clearPages)
3695 {
3696 	// Page lists are sorted, so remove tails before prepending to the respective queue.
3697 
3698 	while (vm_page* page = freePages.RemoveTail()) {
3699 		page->busy = false;
3700 		page->SetState(PAGE_STATE_FREE);
3701 		DEBUG_PAGE_ACCESS_END(page);
3702 		sFreePageQueue.PrependUnlocked(page);
3703 	}
3704 
3705 	while (vm_page* page = clearPages.RemoveTail()) {
3706 		page->busy = false;
3707 		page->SetState(PAGE_STATE_CLEAR);
3708 		DEBUG_PAGE_ACCESS_END(page);
3709 		sClearPageQueue.PrependUnlocked(page);
3710 	}
3711 
3712 	sFreePageCondition.NotifyAll();
3713 }
3714 
3715 
3716 /*!	Tries to allocate the a contiguous run of \a length pages starting at
3717 	index \a start.
3718 
3719 	The caller must have write-locked the free/clear page queues. The function
3720 	will unlock regardless of whether it succeeds or fails.
3721 
3722 	If the function fails, it cleans up after itself, i.e. it will free all
3723 	pages it managed to allocate.
3724 
3725 	\param start The start index (into \c sPages) of the run.
3726 	\param length The number of pages to allocate.
3727 	\param flags Page allocation flags. Encodes the state the function shall
3728 		set the allocated pages to, whether the pages shall be marked busy
3729 		(VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared
3730 		(VM_PAGE_ALLOC_CLEAR).
3731 	\param freeClearQueueLocker Locked WriteLocker for the free/clear page
3732 		queues in locked state. Will be unlocked by the function.
3733 	\return The index of the first page that could not be allocated. \a length
3734 		is returned when the function was successful.
3735 */
3736 static page_num_t
3737 allocate_page_run(page_num_t start, page_num_t length, uint32 flags,
3738 	WriteLocker& freeClearQueueLocker)
3739 {
3740 	uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3741 	ASSERT(pageState != PAGE_STATE_FREE);
3742 	ASSERT(pageState != PAGE_STATE_CLEAR);
3743 	ASSERT(start + length <= sNumPages);
3744 
3745 	// Pull the free/clear pages out of their respective queues. Cached pages
3746 	// are allocated later.
3747 	page_num_t cachedPages = 0;
3748 	VMPageQueue::PageList freePages;
3749 	VMPageQueue::PageList clearPages;
3750 	page_num_t i = 0;
3751 	for (; i < length; i++) {
3752 		bool pageAllocated = true;
3753 		bool noPage = false;
3754 		vm_page& page = sPages[start + i];
3755 		switch (page.State()) {
3756 			case PAGE_STATE_CLEAR:
3757 				DEBUG_PAGE_ACCESS_START(&page);
3758 				sClearPageQueue.Remove(&page);
3759 				clearPages.Add(&page);
3760 				break;
3761 			case PAGE_STATE_FREE:
3762 				DEBUG_PAGE_ACCESS_START(&page);
3763 				sFreePageQueue.Remove(&page);
3764 				freePages.Add(&page);
3765 				break;
3766 			case PAGE_STATE_CACHED:
3767 				// We allocate cached pages later.
3768 				cachedPages++;
3769 				pageAllocated = false;
3770 				break;
3771 
3772 			default:
3773 				// Probably a page was cached when our caller checked. Now it's
3774 				// gone and we have to abort.
3775 				noPage = true;
3776 				break;
3777 		}
3778 
3779 		if (noPage)
3780 			break;
3781 
3782 		if (pageAllocated) {
3783 			page.SetState(flags & VM_PAGE_ALLOC_STATE);
3784 			page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3785 			page.usage_count = 0;
3786 			page.accessed = false;
3787 			page.modified = false;
3788 		}
3789 	}
3790 
3791 	if (i < length) {
3792 		// failed to allocate a page -- free all that we've got
3793 		allocate_page_run_cleanup(freePages, clearPages);
3794 		return i;
3795 	}
3796 
3797 	freeClearQueueLocker.Unlock();
3798 
3799 	if (cachedPages > 0) {
3800 		// allocate the pages that weren't free but cached
3801 		page_num_t freedCachedPages = 0;
3802 		page_num_t nextIndex = start;
3803 		vm_page* freePage = freePages.Head();
3804 		vm_page* clearPage = clearPages.Head();
3805 		while (cachedPages > 0) {
3806 			// skip, if we've already got the page
3807 			if (freePage != NULL && size_t(freePage - sPages) == nextIndex) {
3808 				freePage = freePages.GetNext(freePage);
3809 				nextIndex++;
3810 				continue;
3811 			}
3812 			if (clearPage != NULL && size_t(clearPage - sPages) == nextIndex) {
3813 				clearPage = clearPages.GetNext(clearPage);
3814 				nextIndex++;
3815 				continue;
3816 			}
3817 
3818 			// free the page, if it is still cached
3819 			vm_page& page = sPages[nextIndex];
3820 			if (!free_cached_page(&page, false)) {
3821 				// TODO: if the page turns out to have been freed already,
3822 				// there would be no need to fail
3823 				break;
3824 			}
3825 
3826 			page.SetState(flags & VM_PAGE_ALLOC_STATE);
3827 			page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3828 			page.usage_count = 0;
3829 			page.accessed = false;
3830 			page.modified = false;
3831 
3832 			freePages.InsertBefore(freePage, &page);
3833 			freedCachedPages++;
3834 			cachedPages--;
3835 			nextIndex++;
3836 		}
3837 
3838 		// If we have freed cached pages, we need to balance things.
3839 		if (freedCachedPages > 0)
3840 			unreserve_pages(freedCachedPages);
3841 
3842 		if (nextIndex - start < length) {
3843 			// failed to allocate all cached pages -- free all that we've got
3844 			freeClearQueueLocker.Lock();
3845 			allocate_page_run_cleanup(freePages, clearPages);
3846 			freeClearQueueLocker.Unlock();
3847 
3848 			return nextIndex - start;
3849 		}
3850 	}
3851 
3852 	// clear pages, if requested
3853 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3854 		for (VMPageQueue::PageList::Iterator it = freePages.GetIterator();
3855 				vm_page* page = it.Next();) {
3856 			clear_page(page);
3857 		}
3858 	}
3859 
3860 	// add pages to target queue
3861 	if (pageState < PAGE_STATE_FIRST_UNQUEUED) {
3862 		freePages.MoveFrom(&clearPages);
3863 		sPageQueues[pageState].AppendUnlocked(freePages, length);
3864 	}
3865 
3866 	// Note: We don't unreserve the pages since we pulled them out of the
3867 	// free/clear queues without adjusting sUnreservedFreePages.
3868 
3869 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3870 	AbstractTraceEntryWithStackTrace* traceEntry
3871 		= TA(AllocatePageRun(start, length));
3872 
3873 	for (page_num_t i = start; i < start + length; i++)
3874 		sPages[i].allocation_tracking_info.Init(traceEntry);
3875 #else
3876 	TA(AllocatePageRun(start, length));
3877 #endif
3878 
3879 	return length;
3880 }
3881 
3882 
3883 /*! Allocate a physically contiguous range of pages.
3884 
3885 	\param flags Page allocation flags. Encodes the state the function shall
3886 		set the allocated pages to, whether the pages shall be marked busy
3887 		(VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared
3888 		(VM_PAGE_ALLOC_CLEAR).
3889 	\param length The number of contiguous pages to allocate.
3890 	\param restrictions Restrictions to the physical addresses of the page run
3891 		to allocate, including \c low_address, the first acceptable physical
3892 		address where the page run may start, \c high_address, the last
3893 		acceptable physical address where the page run may end (i.e. it must
3894 		hold \code runStartAddress + length <= high_address \endcode),
3895 		\c alignment, the alignment of the page run start address, and
3896 		\c boundary, multiples of which the page run must not cross.
3897 		Values set to \c 0 are ignored.
3898 	\param priority The page reservation priority (as passed to
3899 		vm_page_reserve_pages()).
3900 	\return The first page of the allocated page run on success; \c NULL
3901 		when the allocation failed.
3902 */
3903 vm_page*
3904 vm_page_allocate_page_run(uint32 flags, page_num_t length,
3905 	const physical_address_restrictions* restrictions, int priority)
3906 {
3907 	// compute start and end page index
3908 	page_num_t requestedStart
3909 		= std::max(restrictions->low_address / B_PAGE_SIZE, sPhysicalPageOffset)
3910 			- sPhysicalPageOffset;
3911 	page_num_t start = requestedStart;
3912 	page_num_t end;
3913 	if (restrictions->high_address > 0) {
3914 		end = std::max(restrictions->high_address / B_PAGE_SIZE,
3915 				sPhysicalPageOffset)
3916 			- sPhysicalPageOffset;
3917 		end = std::min(end, sNumPages);
3918 	} else
3919 		end = sNumPages;
3920 
3921 	// compute alignment mask
3922 	page_num_t alignmentMask
3923 		= std::max(restrictions->alignment / B_PAGE_SIZE, (phys_addr_t)1) - 1;
3924 	ASSERT(((alignmentMask + 1) & alignmentMask) == 0);
3925 		// alignment must be a power of 2
3926 
3927 	// compute the boundary mask
3928 	uint32 boundaryMask = 0;
3929 	if (restrictions->boundary != 0) {
3930 		page_num_t boundary = restrictions->boundary / B_PAGE_SIZE;
3931 		// boundary must be a power of two and not less than alignment and
3932 		// length
3933 		ASSERT(((boundary - 1) & boundary) == 0);
3934 		ASSERT(boundary >= alignmentMask + 1);
3935 		ASSERT(boundary >= length);
3936 
3937 		boundaryMask = -boundary;
3938 	}
3939 
3940 	vm_page_reservation reservation;
3941 	vm_page_reserve_pages(&reservation, length, priority);
3942 
3943 	WriteLocker freeClearQueueLocker(sFreePageQueuesLock);
3944 
3945 	// First we try to get a run with free pages only. If that fails, we also
3946 	// consider cached pages. If there are only few free pages and many cached
3947 	// ones, the odds are that we won't find enough contiguous ones, so we skip
3948 	// the first iteration in this case.
3949 	int32 freePages = sUnreservedFreePages;
3950 	int useCached = freePages > 0 && (page_num_t)freePages > 2 * length ? 0 : 1;
3951 
3952 	for (;;) {
3953 		if (alignmentMask != 0 || boundaryMask != 0) {
3954 			page_num_t offsetStart = start + sPhysicalPageOffset;
3955 
3956 			// enforce alignment
3957 			if ((offsetStart & alignmentMask) != 0)
3958 				offsetStart = (offsetStart + alignmentMask) & ~alignmentMask;
3959 
3960 			// enforce boundary
3961 			if (boundaryMask != 0 && ((offsetStart ^ (offsetStart
3962 				+ length - 1)) & boundaryMask) != 0) {
3963 				offsetStart = (offsetStart + length - 1) & boundaryMask;
3964 			}
3965 
3966 			start = offsetStart - sPhysicalPageOffset;
3967 		}
3968 
3969 		if (start + length > end) {
3970 			if (useCached == 0) {
3971 				// The first iteration with free pages only was unsuccessful.
3972 				// Try again also considering cached pages.
3973 				useCached = 1;
3974 				start = requestedStart;
3975 				continue;
3976 			}
3977 
3978 			dprintf("vm_page_allocate_page_run(): Failed to allocate run of "
3979 				"length %" B_PRIuPHYSADDR " (%" B_PRIuPHYSADDR " %"
3980 				B_PRIuPHYSADDR ") in second iteration (align: %" B_PRIuPHYSADDR
3981 				" boundary: %" B_PRIuPHYSADDR ")!\n", length, requestedStart,
3982 				end, restrictions->alignment, restrictions->boundary);
3983 
3984 			freeClearQueueLocker.Unlock();
3985 			vm_page_unreserve_pages(&reservation);
3986 			return NULL;
3987 		}
3988 
3989 		bool foundRun = true;
3990 		page_num_t i;
3991 		for (i = 0; i < length; i++) {
3992 			uint32 pageState = sPages[start + i].State();
3993 			if (pageState != PAGE_STATE_FREE
3994 				&& pageState != PAGE_STATE_CLEAR
3995 				&& (pageState != PAGE_STATE_CACHED || useCached == 0)) {
3996 				foundRun = false;
3997 				break;
3998 			}
3999 		}
4000 
4001 		if (foundRun) {
4002 			i = allocate_page_run(start, length, flags, freeClearQueueLocker);
4003 			if (i == length) {
4004 				reservation.count = 0;
4005 				return &sPages[start];
4006 			}
4007 
4008 			// apparently a cached page couldn't be allocated -- skip it and
4009 			// continue
4010 			freeClearQueueLocker.Lock();
4011 		}
4012 
4013 		start += i + 1;
4014 	}
4015 }
4016 
4017 
4018 vm_page *
4019 vm_page_at_index(int32 index)
4020 {
4021 	return &sPages[index];
4022 }
4023 
4024 
4025 vm_page *
4026 vm_lookup_page(page_num_t pageNumber)
4027 {
4028 	if (pageNumber < sPhysicalPageOffset)
4029 		return NULL;
4030 
4031 	pageNumber -= sPhysicalPageOffset;
4032 	if (pageNumber >= sNumPages)
4033 		return NULL;
4034 
4035 	return &sPages[pageNumber];
4036 }
4037 
4038 
4039 bool
4040 vm_page_is_dummy(struct vm_page *page)
4041 {
4042 	return page < sPages || page >= sPages + sNumPages;
4043 }
4044 
4045 
4046 /*!	Free the page that belonged to a certain cache.
4047 	You can use vm_page_set_state() manually if you prefer, but only
4048 	if the page does not equal PAGE_STATE_MODIFIED.
4049 
4050 	\param cache The cache the page was previously owned by or NULL. The page
4051 		must have been removed from its cache before calling this method in
4052 		either case.
4053 	\param page The page to free.
4054 	\param reservation If not NULL, the page count of the reservation will be
4055 		incremented, thus allowing to allocate another page for the freed one at
4056 		a later time.
4057 */
4058 void
4059 vm_page_free_etc(VMCache* cache, vm_page* page,
4060 	vm_page_reservation* reservation)
4061 {
4062 	PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
4063 		&& page->State() != PAGE_STATE_CLEAR);
4064 
4065 	if (page->State() == PAGE_STATE_MODIFIED && cache->temporary)
4066 		atomic_add(&sModifiedTemporaryPages, -1);
4067 
4068 	free_page(page, false);
4069 	if (reservation == NULL)
4070 		unreserve_pages(1);
4071 }
4072 
4073 
4074 void
4075 vm_page_set_state(vm_page *page, int pageState)
4076 {
4077 	PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
4078 		&& page->State() != PAGE_STATE_CLEAR);
4079 
4080 	if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) {
4081 		free_page(page, pageState == PAGE_STATE_CLEAR);
4082 		unreserve_pages(1);
4083 	} else
4084 		set_page_state(page, pageState);
4085 }
4086 
4087 
4088 /*!	Moves a page to either the tail of the head of its current queue,
4089 	depending on \a tail.
4090 	The page must have a cache and the cache must be locked!
4091 */
4092 void
4093 vm_page_requeue(struct vm_page *page, bool tail)
4094 {
4095 	PAGE_ASSERT(page, page->Cache() != NULL);
4096 	page->Cache()->AssertLocked();
4097 	// DEBUG_PAGE_ACCESS_CHECK(page);
4098 		// TODO: This assertion cannot be satisfied by idle_scan_active_pages()
4099 		// when it requeues busy pages. The reason is that vm_soft_fault()
4100 		// (respectively fault_get_page()) and the file cache keep newly
4101 		// allocated pages accessed while they are reading them from disk. It
4102 		// would probably be better to change that code and reenable this
4103 		// check.
4104 
4105 	VMPageQueue *queue = NULL;
4106 
4107 	switch (page->State()) {
4108 		case PAGE_STATE_ACTIVE:
4109 			queue = &sActivePageQueue;
4110 			break;
4111 		case PAGE_STATE_INACTIVE:
4112 			queue = &sInactivePageQueue;
4113 			break;
4114 		case PAGE_STATE_MODIFIED:
4115 			queue = &sModifiedPageQueue;
4116 			break;
4117 		case PAGE_STATE_CACHED:
4118 			queue = &sCachedPageQueue;
4119 			break;
4120 		case PAGE_STATE_FREE:
4121 		case PAGE_STATE_CLEAR:
4122 			panic("vm_page_requeue() called for free/clear page %p", page);
4123 			return;
4124 		case PAGE_STATE_WIRED:
4125 		case PAGE_STATE_UNUSED:
4126 			return;
4127 		default:
4128 			panic("vm_page_touch: vm_page %p in invalid state %d\n",
4129 				page, page->State());
4130 			break;
4131 	}
4132 
4133 	queue->RequeueUnlocked(page, tail);
4134 }
4135 
4136 
4137 page_num_t
4138 vm_page_num_pages(void)
4139 {
4140 	return sNumPages - sNonExistingPages;
4141 }
4142 
4143 
4144 /*! There is a subtle distinction between the page counts returned by
4145 	this function and vm_page_num_free_pages():
4146 	The latter returns the number of pages that are completely uncommitted,
4147 	whereas this one returns the number of pages that are available for
4148 	use by being reclaimed as well (IOW it factors in things like cache pages
4149 	as available).
4150 */
4151 page_num_t
4152 vm_page_num_available_pages(void)
4153 {
4154 	return vm_available_memory() / B_PAGE_SIZE;
4155 }
4156 
4157 
4158 page_num_t
4159 vm_page_num_free_pages(void)
4160 {
4161 	int32 count = sUnreservedFreePages + sCachedPageQueue.Count();
4162 	return count > 0 ? count : 0;
4163 }
4164 
4165 
4166 page_num_t
4167 vm_page_num_unused_pages(void)
4168 {
4169 	int32 count = sUnreservedFreePages;
4170 	return count > 0 ? count : 0;
4171 }
4172 
4173 
4174 void
4175 vm_page_get_stats(system_info *info)
4176 {
4177 	// Note: there's no locking protecting any of the queues or counters here,
4178 	// so we run the risk of getting bogus values when evaluating them
4179 	// throughout this function. As these stats are for informational purposes
4180 	// only, it is not really worth introducing such locking. Therefore we just
4181 	// ensure that we don't under- or overflow any of the values.
4182 
4183 	// The pages used for the block cache buffers. Those should not be counted
4184 	// as used but as cached pages.
4185 	// TODO: We should subtract the blocks that are in use ATM, since those
4186 	// can't really be freed in a low memory situation.
4187 	page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE;
4188 	info->block_cache_pages = blockCachePages;
4189 
4190 	// Non-temporary modified pages are special as they represent pages that
4191 	// can be written back, so they could be freed if necessary, for us
4192 	// basically making them into cached pages with a higher overhead. The
4193 	// modified queue count is therefore split into temporary and non-temporary
4194 	// counts that are then added to the corresponding number.
4195 	page_num_t modifiedNonTemporaryPages
4196 		= (sModifiedPageQueue.Count() - sModifiedTemporaryPages);
4197 
4198 	info->max_pages = vm_page_num_pages();
4199 	info->cached_pages = sCachedPageQueue.Count() + modifiedNonTemporaryPages
4200 		+ blockCachePages;
4201 
4202 	// max_pages is composed of:
4203 	//	active + inactive + unused + wired + modified + cached + free + clear
4204 	// So taking out the cached (including modified non-temporary), free and
4205 	// clear ones leaves us with all used pages.
4206 	uint32 subtractPages = info->cached_pages + sFreePageQueue.Count()
4207 		+ sClearPageQueue.Count();
4208 	info->used_pages = subtractPages > info->max_pages
4209 		? 0 : info->max_pages - subtractPages;
4210 
4211 	if (info->used_pages + info->cached_pages > info->max_pages) {
4212 		// Something was shuffled around while we were summing up the counts.
4213 		// Make the values sane, preferring the worse case of more used pages.
4214 		info->cached_pages = info->max_pages - info->used_pages;
4215 	}
4216 
4217 	info->page_faults = vm_num_page_faults();
4218 	info->ignored_pages = sIgnoredPages;
4219 
4220 	// TODO: We don't consider pages used for page directories/tables yet.
4221 }
4222 
4223 
4224 /*!	Returns the greatest address within the last page of accessible physical
4225 	memory.
4226 	The value is inclusive, i.e. in case of a 32 bit phys_addr_t 0xffffffff
4227 	means the that the last page ends at exactly 4 GB.
4228 */
4229 phys_addr_t
4230 vm_page_max_address()
4231 {
4232 	return ((phys_addr_t)sPhysicalPageOffset + sNumPages) * B_PAGE_SIZE - 1;
4233 }
4234 
4235 
4236 RANGE_MARKER_FUNCTION_END(vm_page)
4237