xref: /haiku/src/system/kernel/vm/vm_page.cpp (revision 909af08f4328301fbdef1ffb41f566c3b5bec0c7)
1 /*
2  * Copyright 2010-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <string.h>
12 #include <stdlib.h>
13 
14 #include <algorithm>
15 
16 #include <KernelExport.h>
17 #include <OS.h>
18 
19 #include <AutoDeleter.h>
20 
21 #include <arch/cpu.h>
22 #include <arch/vm_translation_map.h>
23 #include <block_cache.h>
24 #include <boot/kernel_args.h>
25 #include <condition_variable.h>
26 #include <elf.h>
27 #include <heap.h>
28 #include <kernel.h>
29 #include <low_resource_manager.h>
30 #include <thread.h>
31 #include <tracing.h>
32 #include <util/AutoLock.h>
33 #include <vfs.h>
34 #include <vm/vm.h>
35 #include <vm/vm_priv.h>
36 #include <vm/vm_page.h>
37 #include <vm/VMAddressSpace.h>
38 #include <vm/VMArea.h>
39 #include <vm/VMCache.h>
40 
41 #include "IORequest.h"
42 #include "PageCacheLocker.h"
43 #include "VMAnonymousCache.h"
44 #include "VMPageQueue.h"
45 
46 
47 //#define TRACE_VM_PAGE
48 #ifdef TRACE_VM_PAGE
49 #	define TRACE(x) dprintf x
50 #else
51 #	define TRACE(x) ;
52 #endif
53 
54 //#define TRACE_VM_DAEMONS
55 #ifdef TRACE_VM_DAEMONS
56 #define TRACE_DAEMON(x...) dprintf(x)
57 #else
58 #define TRACE_DAEMON(x...) do {} while (false)
59 #endif
60 
61 //#define TRACK_PAGE_USAGE_STATS	1
62 
63 #define PAGE_ASSERT(page, condition)	\
64 	ASSERT_PRINT((condition), "page: %p", (page))
65 
66 #define SCRUB_SIZE 32
67 	// this many pages will be cleared at once in the page scrubber thread
68 
69 #define MAX_PAGE_WRITER_IO_PRIORITY				B_URGENT_DISPLAY_PRIORITY
70 	// maximum I/O priority of the page writer
71 #define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD	10000
72 	// the maximum I/O priority shall be reached when this many pages need to
73 	// be written
74 
75 
76 // The page reserve an allocation of the certain priority must not touch.
77 static const size_t kPageReserveForPriority[] = {
78 	VM_PAGE_RESERVE_USER,		// user
79 	VM_PAGE_RESERVE_SYSTEM,		// system
80 	0							// VIP
81 };
82 
83 // Minimum number of free pages the page daemon will try to achieve.
84 static uint32 sFreePagesTarget;
85 static uint32 sFreeOrCachedPagesTarget;
86 static uint32 sInactivePagesTarget;
87 
88 // Wait interval between page daemon runs.
89 static const bigtime_t kIdleScanWaitInterval = 1000000LL;	// 1 sec
90 static const bigtime_t kBusyScanWaitInterval = 500000LL;	// 0.5 sec
91 
92 // Number of idle runs after which we want to have processed the full active
93 // queue.
94 static const uint32 kIdleRunsForFullQueue = 20;
95 
96 // Maximum limit for the vm_page::usage_count.
97 static const int32 kPageUsageMax = 64;
98 // vm_page::usage_count buff an accessed page receives in a scan.
99 static const int32 kPageUsageAdvance = 3;
100 // vm_page::usage_count debuff an unaccessed page receives in a scan.
101 static const int32 kPageUsageDecline = 1;
102 
103 int32 gMappedPagesCount;
104 
105 static VMPageQueue sPageQueues[PAGE_STATE_COUNT];
106 
107 static VMPageQueue& sFreePageQueue = sPageQueues[PAGE_STATE_FREE];
108 static VMPageQueue& sClearPageQueue = sPageQueues[PAGE_STATE_CLEAR];
109 static VMPageQueue& sModifiedPageQueue = sPageQueues[PAGE_STATE_MODIFIED];
110 static VMPageQueue& sInactivePageQueue = sPageQueues[PAGE_STATE_INACTIVE];
111 static VMPageQueue& sActivePageQueue = sPageQueues[PAGE_STATE_ACTIVE];
112 static VMPageQueue& sCachedPageQueue = sPageQueues[PAGE_STATE_CACHED];
113 
114 static vm_page *sPages;
115 static page_num_t sPhysicalPageOffset;
116 static page_num_t sNumPages;
117 static page_num_t sNonExistingPages;
118 	// pages in the sPages array that aren't backed by physical memory
119 static uint64 sIgnoredPages;
120 	// pages of physical memory ignored by the boot loader (and thus not
121 	// available here)
122 static int32 sUnreservedFreePages;
123 static int32 sUnsatisfiedPageReservations;
124 static int32 sModifiedTemporaryPages;
125 
126 static ConditionVariable sFreePageCondition;
127 static mutex sPageDeficitLock = MUTEX_INITIALIZER("page deficit");
128 
129 // This lock must be used whenever the free or clear page queues are changed.
130 // If you need to work on both queues at the same time, you need to hold a write
131 // lock, otherwise, a read lock suffices (each queue still has a spinlock to
132 // guard against concurrent changes).
133 static rw_lock sFreePageQueuesLock
134 	= RW_LOCK_INITIALIZER("free/clear page queues");
135 
136 #ifdef TRACK_PAGE_USAGE_STATS
137 static page_num_t sPageUsageArrays[512];
138 static page_num_t* sPageUsage = sPageUsageArrays;
139 static page_num_t sPageUsagePageCount;
140 static page_num_t* sNextPageUsage = sPageUsageArrays + 256;
141 static page_num_t sNextPageUsagePageCount;
142 #endif
143 
144 
145 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
146 
147 struct caller_info {
148 	addr_t		caller;
149 	size_t		count;
150 };
151 
152 static const int32 kCallerInfoTableSize = 1024;
153 static caller_info sCallerInfoTable[kCallerInfoTableSize];
154 static int32 sCallerInfoCount = 0;
155 
156 static caller_info* get_caller_info(addr_t caller);
157 
158 
159 RANGE_MARKER_FUNCTION_PROTOTYPES(vm_page)
160 
161 static const addr_t kVMPageCodeAddressRange[] = {
162 	RANGE_MARKER_FUNCTION_ADDRESS_RANGE(vm_page)
163 };
164 
165 #endif
166 
167 
168 RANGE_MARKER_FUNCTION_BEGIN(vm_page)
169 
170 
171 struct page_stats {
172 	int32	totalFreePages;
173 	int32	unsatisfiedReservations;
174 	int32	cachedPages;
175 };
176 
177 
178 struct PageReservationWaiter
179 		: public DoublyLinkedListLinkImpl<PageReservationWaiter> {
180 	Thread*	thread;
181 	uint32	dontTouch;		// reserve not to touch
182 	uint32	missing;		// pages missing for the reservation
183 	int32	threadPriority;
184 
185 	bool operator<(const PageReservationWaiter& other) const
186 	{
187 		// Implies an order by descending VM priority (ascending dontTouch)
188 		// and (secondarily) descending thread priority.
189 		if (dontTouch != other.dontTouch)
190 			return dontTouch < other.dontTouch;
191 		return threadPriority > other.threadPriority;
192 	}
193 };
194 
195 typedef DoublyLinkedList<PageReservationWaiter> PageReservationWaiterList;
196 static PageReservationWaiterList sPageReservationWaiters;
197 
198 
199 struct DaemonCondition {
200 	void Init(const char* name)
201 	{
202 		mutex_init(&fLock, "daemon condition");
203 		fCondition.Init(this, name);
204 		fActivated = false;
205 	}
206 
207 	bool Lock()
208 	{
209 		return mutex_lock(&fLock) == B_OK;
210 	}
211 
212 	void Unlock()
213 	{
214 		mutex_unlock(&fLock);
215 	}
216 
217 	bool Wait(bigtime_t timeout, bool clearActivated)
218 	{
219 		MutexLocker locker(fLock);
220 		if (clearActivated)
221 			fActivated = false;
222 		else if (fActivated)
223 			return true;
224 
225 		ConditionVariableEntry entry;
226 		fCondition.Add(&entry);
227 
228 		locker.Unlock();
229 
230 		return entry.Wait(B_RELATIVE_TIMEOUT, timeout) == B_OK;
231 	}
232 
233 	void WakeUp()
234 	{
235 		if (fActivated)
236 			return;
237 
238 		MutexLocker locker(fLock);
239 		fActivated = true;
240 		fCondition.NotifyOne();
241 	}
242 
243 	void ClearActivated()
244 	{
245 		MutexLocker locker(fLock);
246 		fActivated = false;
247 	}
248 
249 private:
250 	mutex				fLock;
251 	ConditionVariable	fCondition;
252 	bool				fActivated;
253 };
254 
255 
256 static DaemonCondition sPageWriterCondition;
257 static DaemonCondition sPageDaemonCondition;
258 
259 
260 #if PAGE_ALLOCATION_TRACING
261 
262 namespace PageAllocationTracing {
263 
264 class ReservePages : public AbstractTraceEntry {
265 public:
266 	ReservePages(uint32 count)
267 		:
268 		fCount(count)
269 	{
270 		Initialized();
271 	}
272 
273 	virtual void AddDump(TraceOutput& out)
274 	{
275 		out.Print("page reserve:   %" B_PRIu32, fCount);
276 	}
277 
278 private:
279 	uint32		fCount;
280 };
281 
282 
283 class UnreservePages : public AbstractTraceEntry {
284 public:
285 	UnreservePages(uint32 count)
286 		:
287 		fCount(count)
288 	{
289 		Initialized();
290 	}
291 
292 	virtual void AddDump(TraceOutput& out)
293 	{
294 		out.Print("page unreserve: %" B_PRId32, fCount);
295 	}
296 
297 private:
298 	uint32		fCount;
299 };
300 
301 
302 class AllocatePage
303 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
304 public:
305 	AllocatePage(page_num_t pageNumber)
306 		:
307 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
308 		fPageNumber(pageNumber)
309 	{
310 		Initialized();
311 	}
312 
313 	virtual void AddDump(TraceOutput& out)
314 	{
315 		out.Print("page alloc: %#" B_PRIxPHYSADDR, fPageNumber);
316 	}
317 
318 private:
319 	page_num_t	fPageNumber;
320 };
321 
322 
323 class AllocatePageRun
324 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
325 public:
326 	AllocatePageRun(page_num_t startPage, uint32 length)
327 		:
328 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
329 		fStartPage(startPage),
330 		fLength(length)
331 	{
332 		Initialized();
333 	}
334 
335 	virtual void AddDump(TraceOutput& out)
336 	{
337 		out.Print("page alloc run: start %#" B_PRIxPHYSADDR " length: %"
338 			B_PRIu32, fStartPage, fLength);
339 	}
340 
341 private:
342 	page_num_t	fStartPage;
343 	uint32		fLength;
344 };
345 
346 
347 class FreePage
348 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
349 public:
350 	FreePage(page_num_t pageNumber)
351 		:
352 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
353 		fPageNumber(pageNumber)
354 	{
355 		Initialized();
356 	}
357 
358 	virtual void AddDump(TraceOutput& out)
359 	{
360 		out.Print("page free: %#" B_PRIxPHYSADDR, fPageNumber);
361 	}
362 
363 private:
364 	page_num_t	fPageNumber;
365 };
366 
367 
368 class ScrubbingPages : public AbstractTraceEntry {
369 public:
370 	ScrubbingPages(uint32 count)
371 		:
372 		fCount(count)
373 	{
374 		Initialized();
375 	}
376 
377 	virtual void AddDump(TraceOutput& out)
378 	{
379 		out.Print("page scrubbing: %" B_PRId32, fCount);
380 	}
381 
382 private:
383 	uint32		fCount;
384 };
385 
386 
387 class ScrubbedPages : public AbstractTraceEntry {
388 public:
389 	ScrubbedPages(uint32 count)
390 		:
391 		fCount(count)
392 	{
393 		Initialized();
394 	}
395 
396 	virtual void AddDump(TraceOutput& out)
397 	{
398 		out.Print("page scrubbed:  %" B_PRId32, fCount);
399 	}
400 
401 private:
402 	uint32		fCount;
403 };
404 
405 
406 class StolenPage : public AbstractTraceEntry {
407 public:
408 	StolenPage()
409 	{
410 		Initialized();
411 	}
412 
413 	virtual void AddDump(TraceOutput& out)
414 	{
415 		out.Print("page stolen");
416 	}
417 };
418 
419 }	// namespace PageAllocationTracing
420 
421 #	define TA(x)	new(std::nothrow) PageAllocationTracing::x
422 
423 #else
424 #	define TA(x)
425 #endif	// PAGE_ALLOCATION_TRACING
426 
427 
428 #if PAGE_DAEMON_TRACING
429 
430 namespace PageDaemonTracing {
431 
432 class ActivatePage : public AbstractTraceEntry {
433 	public:
434 		ActivatePage(vm_page* page)
435 			:
436 			fCache(page->cache),
437 			fPage(page)
438 		{
439 			Initialized();
440 		}
441 
442 		virtual void AddDump(TraceOutput& out)
443 		{
444 			out.Print("page activated:   %p, cache: %p", fPage, fCache);
445 		}
446 
447 	private:
448 		VMCache*	fCache;
449 		vm_page*	fPage;
450 };
451 
452 
453 class DeactivatePage : public AbstractTraceEntry {
454 	public:
455 		DeactivatePage(vm_page* page)
456 			:
457 			fCache(page->cache),
458 			fPage(page)
459 		{
460 			Initialized();
461 		}
462 
463 		virtual void AddDump(TraceOutput& out)
464 		{
465 			out.Print("page deactivated: %p, cache: %p", fPage, fCache);
466 		}
467 
468 	private:
469 		VMCache*	fCache;
470 		vm_page*	fPage;
471 };
472 
473 
474 class FreedPageSwap : public AbstractTraceEntry {
475 	public:
476 		FreedPageSwap(vm_page* page)
477 			:
478 			fCache(page->cache),
479 			fPage(page)
480 		{
481 			Initialized();
482 		}
483 
484 		virtual void AddDump(TraceOutput& out)
485 		{
486 			out.Print("page swap freed:  %p, cache: %p", fPage, fCache);
487 		}
488 
489 	private:
490 		VMCache*	fCache;
491 		vm_page*	fPage;
492 };
493 
494 }	// namespace PageDaemonTracing
495 
496 #	define TD(x)	new(std::nothrow) PageDaemonTracing::x
497 
498 #else
499 #	define TD(x)
500 #endif	// PAGE_DAEMON_TRACING
501 
502 
503 #if PAGE_WRITER_TRACING
504 
505 namespace PageWriterTracing {
506 
507 class WritePage : public AbstractTraceEntry {
508 	public:
509 		WritePage(vm_page* page)
510 			:
511 			fCache(page->Cache()),
512 			fPage(page)
513 		{
514 			Initialized();
515 		}
516 
517 		virtual void AddDump(TraceOutput& out)
518 		{
519 			out.Print("page write: %p, cache: %p", fPage, fCache);
520 		}
521 
522 	private:
523 		VMCache*	fCache;
524 		vm_page*	fPage;
525 };
526 
527 }	// namespace PageWriterTracing
528 
529 #	define TPW(x)	new(std::nothrow) PageWriterTracing::x
530 
531 #else
532 #	define TPW(x)
533 #endif	// PAGE_WRITER_TRACING
534 
535 
536 #if PAGE_STATE_TRACING
537 
538 namespace PageStateTracing {
539 
540 class SetPageState : public AbstractTraceEntry {
541 	public:
542 		SetPageState(vm_page* page, uint8 newState)
543 			:
544 			fPage(page),
545 			fOldState(page->State()),
546 			fNewState(newState),
547 			fBusy(page->busy),
548 			fWired(page->WiredCount() > 0),
549 			fMapped(!page->mappings.IsEmpty()),
550 			fAccessed(page->accessed),
551 			fModified(page->modified)
552 		{
553 #if PAGE_STATE_TRACING_STACK_TRACE
554 			fStackTrace = capture_tracing_stack_trace(
555 				PAGE_STATE_TRACING_STACK_TRACE, 0, true);
556 				// Don't capture userland stack trace to avoid potential
557 				// deadlocks.
558 #endif
559 			Initialized();
560 		}
561 
562 #if PAGE_STATE_TRACING_STACK_TRACE
563 		virtual void DumpStackTrace(TraceOutput& out)
564 		{
565 			out.PrintStackTrace(fStackTrace);
566 		}
567 #endif
568 
569 		virtual void AddDump(TraceOutput& out)
570 		{
571 			out.Print("page set state: %p (%c%c%c%c%c): %s -> %s", fPage,
572 				fBusy ? 'b' : '-',
573 				fWired ? 'w' : '-',
574 				fMapped ? 'm' : '-',
575 				fAccessed ? 'a' : '-',
576 				fModified ? 'm' : '-',
577 				page_state_to_string(fOldState),
578 				page_state_to_string(fNewState));
579 		}
580 
581 	private:
582 		vm_page*	fPage;
583 #if PAGE_STATE_TRACING_STACK_TRACE
584 		tracing_stack_trace* fStackTrace;
585 #endif
586 		uint8		fOldState;
587 		uint8		fNewState;
588 		bool		fBusy : 1;
589 		bool		fWired : 1;
590 		bool		fMapped : 1;
591 		bool		fAccessed : 1;
592 		bool		fModified : 1;
593 };
594 
595 }	// namespace PageStateTracing
596 
597 #	define TPS(x)	new(std::nothrow) PageStateTracing::x
598 
599 #else
600 #	define TPS(x)
601 #endif	// PAGE_STATE_TRACING
602 
603 
604 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
605 
606 namespace BKernel {
607 
608 class AllocationTrackingCallback {
609 public:
610 	virtual						~AllocationTrackingCallback();
611 
612 	virtual	bool				ProcessTrackingInfo(
613 									AllocationTrackingInfo* info,
614 									page_num_t pageNumber) = 0;
615 };
616 
617 }
618 
619 using BKernel::AllocationTrackingCallback;
620 
621 
622 class AllocationCollectorCallback : public AllocationTrackingCallback {
623 public:
624 	AllocationCollectorCallback(bool resetInfos)
625 		:
626 		fResetInfos(resetInfos)
627 	{
628 	}
629 
630 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
631 		page_num_t pageNumber)
632 	{
633 		if (!info->IsInitialized())
634 			return true;
635 
636 		addr_t caller = 0;
637 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
638 
639 		if (traceEntry != NULL && info->IsTraceEntryValid()) {
640 			caller = tracing_find_caller_in_stack_trace(
641 				traceEntry->StackTrace(), kVMPageCodeAddressRange, 1);
642 		}
643 
644 		caller_info* callerInfo = get_caller_info(caller);
645 		if (callerInfo == NULL) {
646 			kprintf("out of space for caller infos\n");
647 			return false;
648 		}
649 
650 		callerInfo->count++;
651 
652 		if (fResetInfos)
653 			info->Clear();
654 
655 		return true;
656 	}
657 
658 private:
659 	bool	fResetInfos;
660 };
661 
662 
663 class AllocationInfoPrinterCallback : public AllocationTrackingCallback {
664 public:
665 	AllocationInfoPrinterCallback(bool printStackTrace, page_num_t pageFilter,
666 		team_id teamFilter, thread_id threadFilter)
667 		:
668 		fPrintStackTrace(printStackTrace),
669 		fPageFilter(pageFilter),
670 		fTeamFilter(teamFilter),
671 		fThreadFilter(threadFilter)
672 	{
673 	}
674 
675 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
676 		page_num_t pageNumber)
677 	{
678 		if (!info->IsInitialized())
679 			return true;
680 
681 		if (fPageFilter != 0 && pageNumber != fPageFilter)
682 			return true;
683 
684 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
685 		if (traceEntry != NULL && !info->IsTraceEntryValid())
686 			traceEntry = NULL;
687 
688 		if (traceEntry != NULL) {
689 			if (fTeamFilter != -1 && traceEntry->TeamID() != fTeamFilter)
690 				return true;
691 			if (fThreadFilter != -1 && traceEntry->ThreadID() != fThreadFilter)
692 				return true;
693 		} else {
694 			// we need the info if we have filters set
695 			if (fTeamFilter != -1 || fThreadFilter != -1)
696 				return true;
697 		}
698 
699 		kprintf("page number %#" B_PRIxPHYSADDR, pageNumber);
700 
701 		if (traceEntry != NULL) {
702 			kprintf(", team: %" B_PRId32 ", thread %" B_PRId32
703 				", time %" B_PRId64 "\n", traceEntry->TeamID(),
704 				traceEntry->ThreadID(), traceEntry->Time());
705 
706 			if (fPrintStackTrace)
707 				tracing_print_stack_trace(traceEntry->StackTrace());
708 		} else
709 			kprintf("\n");
710 
711 		return true;
712 	}
713 
714 private:
715 	bool		fPrintStackTrace;
716 	page_num_t	fPageFilter;
717 	team_id		fTeamFilter;
718 	thread_id	fThreadFilter;
719 };
720 
721 
722 class AllocationDetailPrinterCallback : public AllocationTrackingCallback {
723 public:
724 	AllocationDetailPrinterCallback(addr_t caller)
725 		:
726 		fCaller(caller)
727 	{
728 	}
729 
730 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
731 		page_num_t pageNumber)
732 	{
733 		if (!info->IsInitialized())
734 			return true;
735 
736 		addr_t caller = 0;
737 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
738 		if (traceEntry != NULL && !info->IsTraceEntryValid())
739 			traceEntry = NULL;
740 
741 		if (traceEntry != NULL) {
742 			caller = tracing_find_caller_in_stack_trace(
743 				traceEntry->StackTrace(), kVMPageCodeAddressRange, 1);
744 		}
745 
746 		if (caller != fCaller)
747 			return true;
748 
749 		kprintf("page %#" B_PRIxPHYSADDR "\n", pageNumber);
750 		if (traceEntry != NULL)
751 			tracing_print_stack_trace(traceEntry->StackTrace());
752 
753 		return true;
754 	}
755 
756 private:
757 	addr_t	fCaller;
758 };
759 
760 #endif	// VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
761 
762 
763 static void
764 list_page(vm_page* page)
765 {
766 	kprintf("0x%08" B_PRIxADDR " ",
767 		(addr_t)(page->physical_page_number * B_PAGE_SIZE));
768 	switch (page->State()) {
769 		case PAGE_STATE_ACTIVE:   kprintf("A"); break;
770 		case PAGE_STATE_INACTIVE: kprintf("I"); break;
771 		case PAGE_STATE_MODIFIED: kprintf("M"); break;
772 		case PAGE_STATE_CACHED:   kprintf("C"); break;
773 		case PAGE_STATE_FREE:     kprintf("F"); break;
774 		case PAGE_STATE_CLEAR:    kprintf("L"); break;
775 		case PAGE_STATE_WIRED:    kprintf("W"); break;
776 		case PAGE_STATE_UNUSED:   kprintf("-"); break;
777 	}
778 	kprintf(" ");
779 	if (page->busy)         kprintf("B"); else kprintf("-");
780 	if (page->busy_writing) kprintf("W"); else kprintf("-");
781 	if (page->accessed)     kprintf("A"); else kprintf("-");
782 	if (page->modified)     kprintf("M"); else kprintf("-");
783 	kprintf("-");
784 
785 	kprintf(" usage:%3u", page->usage_count);
786 	kprintf(" wired:%5u", page->WiredCount());
787 
788 	bool first = true;
789 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
790 	vm_page_mapping* mapping;
791 	while ((mapping = iterator.Next()) != NULL) {
792 		if (first) {
793 			kprintf(": ");
794 			first = false;
795 		} else
796 			kprintf(", ");
797 
798 		kprintf("%" B_PRId32 " (%s)", mapping->area->id, mapping->area->name);
799 		mapping = mapping->page_link.next;
800 	}
801 }
802 
803 
804 static int
805 dump_page_list(int argc, char **argv)
806 {
807 	kprintf("page table:\n");
808 	for (page_num_t i = 0; i < sNumPages; i++) {
809 		if (sPages[i].State() != PAGE_STATE_UNUSED) {
810 			list_page(&sPages[i]);
811 			kprintf("\n");
812 		}
813 	}
814 	kprintf("end of page table\n");
815 
816 	return 0;
817 }
818 
819 
820 static int
821 find_page(int argc, char **argv)
822 {
823 	struct vm_page *page;
824 	addr_t address;
825 	int32 index = 1;
826 	int i;
827 
828 	struct {
829 		const char*	name;
830 		VMPageQueue*	queue;
831 	} pageQueueInfos[] = {
832 		{ "free",		&sFreePageQueue },
833 		{ "clear",		&sClearPageQueue },
834 		{ "modified",	&sModifiedPageQueue },
835 		{ "active",		&sActivePageQueue },
836 		{ "inactive",	&sInactivePageQueue },
837 		{ "cached",		&sCachedPageQueue },
838 		{ NULL, NULL }
839 	};
840 
841 	if (argc < 2
842 		|| strlen(argv[index]) <= 2
843 		|| argv[index][0] != '0'
844 		|| argv[index][1] != 'x') {
845 		kprintf("usage: find_page <address>\n");
846 		return 0;
847 	}
848 
849 	address = strtoul(argv[index], NULL, 0);
850 	page = (vm_page*)address;
851 
852 	for (i = 0; pageQueueInfos[i].name; i++) {
853 		VMPageQueue::Iterator it = pageQueueInfos[i].queue->GetIterator();
854 		while (vm_page* p = it.Next()) {
855 			if (p == page) {
856 				kprintf("found page %p in queue %p (%s)\n", page,
857 					pageQueueInfos[i].queue, pageQueueInfos[i].name);
858 				return 0;
859 			}
860 		}
861 	}
862 
863 	kprintf("page %p isn't in any queue\n", page);
864 
865 	return 0;
866 }
867 
868 
869 const char *
870 page_state_to_string(int state)
871 {
872 	switch(state) {
873 		case PAGE_STATE_ACTIVE:
874 			return "active";
875 		case PAGE_STATE_INACTIVE:
876 			return "inactive";
877 		case PAGE_STATE_MODIFIED:
878 			return "modified";
879 		case PAGE_STATE_CACHED:
880 			return "cached";
881 		case PAGE_STATE_FREE:
882 			return "free";
883 		case PAGE_STATE_CLEAR:
884 			return "clear";
885 		case PAGE_STATE_WIRED:
886 			return "wired";
887 		case PAGE_STATE_UNUSED:
888 			return "unused";
889 		default:
890 			return "unknown";
891 	}
892 }
893 
894 
895 static int
896 dump_page_long(int argc, char **argv)
897 {
898 	bool addressIsPointer = true;
899 	bool physical = false;
900 	bool searchMappings = false;
901 	int32 index = 1;
902 
903 	while (index < argc) {
904 		if (argv[index][0] != '-')
905 			break;
906 
907 		if (!strcmp(argv[index], "-p")) {
908 			addressIsPointer = false;
909 			physical = true;
910 		} else if (!strcmp(argv[index], "-v")) {
911 			addressIsPointer = false;
912 		} else if (!strcmp(argv[index], "-m")) {
913 			searchMappings = true;
914 		} else {
915 			print_debugger_command_usage(argv[0]);
916 			return 0;
917 		}
918 
919 		index++;
920 	}
921 
922 	if (index + 1 != argc) {
923 		print_debugger_command_usage(argv[0]);
924 		return 0;
925 	}
926 
927 	uint64 value;
928 	if (!evaluate_debug_expression(argv[index], &value, false))
929 		return 0;
930 
931 	uint64 pageAddress = value;
932 	struct vm_page* page;
933 
934 	if (addressIsPointer) {
935 		page = (struct vm_page *)(addr_t)pageAddress;
936 	} else {
937 		if (!physical) {
938 			VMAddressSpace *addressSpace = VMAddressSpace::Kernel();
939 
940 			if (debug_get_debugged_thread()->team->address_space != NULL)
941 				addressSpace = debug_get_debugged_thread()->team->address_space;
942 
943 			uint32 flags = 0;
944 			phys_addr_t physicalAddress;
945 			if (addressSpace->TranslationMap()->QueryInterrupt(pageAddress,
946 					&physicalAddress, &flags) != B_OK
947 				|| (flags & PAGE_PRESENT) == 0) {
948 				kprintf("Virtual address not mapped to a physical page in this "
949 					"address space.\n");
950 				return 0;
951 			}
952 			pageAddress = physicalAddress;
953 		}
954 
955 		page = vm_lookup_page(pageAddress / B_PAGE_SIZE);
956 	}
957 
958 	const page_num_t expected = sPhysicalPageOffset + (page - sPages);
959 
960 	kprintf("PAGE: %p\n", page);
961 	kprintf("queue_next,prev: %p, %p\n", page->queue_link.next,
962 		page->queue_link.previous);
963 	kprintf("physical_number: %#" B_PRIxPHYSADDR "\n", page->physical_page_number);
964 	if (page->physical_page_number != expected)
965 		kprintf("\t(expected %#" B_PRIxPHYSADDR ")!\n", expected);
966 	kprintf("cache:           %p\n", page->Cache());
967 	kprintf("cache_offset:    %" B_PRIuPHYSADDR "\n", page->cache_offset);
968 	kprintf("cache_next:      %p\n", page->cache_next);
969 	kprintf("state:           %s\n", page_state_to_string(page->State()));
970 	kprintf("wired_count:     %d\n", page->WiredCount());
971 	kprintf("usage_count:     %d\n", page->usage_count);
972 	kprintf("busy:            %d\n", page->busy);
973 	kprintf("busy_writing:    %d\n", page->busy_writing);
974 	kprintf("accessed:        %d\n", page->accessed);
975 	kprintf("modified:        %d\n", page->modified);
976 	#if DEBUG_PAGE_QUEUE
977 		kprintf("queue:           %p\n", page->queue);
978 	#endif
979 	#if DEBUG_PAGE_ACCESS
980 		kprintf("accessor:        %" B_PRId32 "\n", page->accessing_thread);
981 	#endif
982 	kprintf("area mappings:\n");
983 
984 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
985 	vm_page_mapping *mapping;
986 	while ((mapping = iterator.Next()) != NULL) {
987 		kprintf("  %p (%" B_PRId32 ")\n", mapping->area, mapping->area->id);
988 		mapping = mapping->page_link.next;
989 	}
990 
991 	if (searchMappings) {
992 		kprintf("all mappings:\n");
993 		VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
994 		while (addressSpace != NULL) {
995 			size_t pageCount = addressSpace->Size() / B_PAGE_SIZE;
996 			for (addr_t address = addressSpace->Base(); pageCount != 0;
997 					address += B_PAGE_SIZE, pageCount--) {
998 				phys_addr_t physicalAddress;
999 				uint32 flags = 0;
1000 				if (addressSpace->TranslationMap()->QueryInterrupt(address,
1001 						&physicalAddress, &flags) == B_OK
1002 					&& (flags & PAGE_PRESENT) != 0
1003 					&& physicalAddress / B_PAGE_SIZE
1004 						== page->physical_page_number) {
1005 					VMArea* area = addressSpace->LookupArea(address);
1006 					kprintf("  aspace %" B_PRId32 ", area %" B_PRId32 ": %#"
1007 						B_PRIxADDR " (%c%c%s%s)\n", addressSpace->ID(),
1008 						area != NULL ? area->id : -1, address,
1009 						(flags & B_KERNEL_READ_AREA) != 0 ? 'r' : '-',
1010 						(flags & B_KERNEL_WRITE_AREA) != 0 ? 'w' : '-',
1011 						(flags & PAGE_MODIFIED) != 0 ? " modified" : "",
1012 						(flags & PAGE_ACCESSED) != 0 ? " accessed" : "");
1013 				}
1014 			}
1015 			addressSpace = VMAddressSpace::DebugNext(addressSpace);
1016 		}
1017 	}
1018 
1019 	set_debug_variable("_cache", (addr_t)page->Cache());
1020 	#if DEBUG_PAGE_ACCESS
1021 		set_debug_variable("_accessor", page->accessing_thread);
1022 	#endif
1023 
1024 	return 0;
1025 }
1026 
1027 
1028 static int
1029 dump_page_queue(int argc, char **argv)
1030 {
1031 	struct VMPageQueue *queue;
1032 
1033 	if (argc < 2) {
1034 		kprintf("usage: page_queue <address/name> [list]\n");
1035 		return 0;
1036 	}
1037 
1038 	if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x')
1039 		queue = (VMPageQueue*)strtoul(argv[1], NULL, 16);
1040 	else if (!strcmp(argv[1], "free"))
1041 		queue = &sFreePageQueue;
1042 	else if (!strcmp(argv[1], "clear"))
1043 		queue = &sClearPageQueue;
1044 	else if (!strcmp(argv[1], "modified"))
1045 		queue = &sModifiedPageQueue;
1046 	else if (!strcmp(argv[1], "active"))
1047 		queue = &sActivePageQueue;
1048 	else if (!strcmp(argv[1], "inactive"))
1049 		queue = &sInactivePageQueue;
1050 	else if (!strcmp(argv[1], "cached"))
1051 		queue = &sCachedPageQueue;
1052 	else {
1053 		kprintf("page_queue: unknown queue \"%s\".\n", argv[1]);
1054 		return 0;
1055 	}
1056 
1057 	kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %"
1058 		B_PRIuPHYSADDR "\n", queue, queue->Head(), queue->Tail(),
1059 		queue->Count());
1060 
1061 	if (argc == 3) {
1062 		struct vm_page *page = queue->Head();
1063 
1064 		kprintf("page        cache       type       state  wired  usage\n");
1065 		for (page_num_t i = 0; page; i++, page = queue->Next(page)) {
1066 			kprintf("%p  %p  %-7s %8s  %5d  %5d\n", page, page->Cache(),
1067 				vm_cache_type_to_string(page->Cache()->type),
1068 				page_state_to_string(page->State()),
1069 				page->WiredCount(), page->usage_count);
1070 		}
1071 	}
1072 	return 0;
1073 }
1074 
1075 
1076 static int
1077 dump_page_stats(int argc, char **argv)
1078 {
1079 	page_num_t swappableModified = 0;
1080 	page_num_t swappableModifiedInactive = 0;
1081 
1082 	size_t counter[8];
1083 	size_t busyCounter[8];
1084 	memset(counter, 0, sizeof(counter));
1085 	memset(busyCounter, 0, sizeof(busyCounter));
1086 
1087 	struct page_run {
1088 		page_num_t	start;
1089 		page_num_t	end;
1090 
1091 		page_num_t Length() const	{ return end - start; }
1092 	};
1093 
1094 	page_run currentFreeRun = { 0, 0 };
1095 	page_run currentCachedRun = { 0, 0 };
1096 	page_run longestFreeRun = { 0, 0 };
1097 	page_run longestCachedRun = { 0, 0 };
1098 
1099 	for (page_num_t i = 0; i < sNumPages; i++) {
1100 		if (sPages[i].State() > 7) {
1101 			panic("page %" B_PRIuPHYSADDR " at %p has invalid state!\n", i,
1102 				&sPages[i]);
1103 		}
1104 
1105 		uint32 pageState = sPages[i].State();
1106 
1107 		counter[pageState]++;
1108 		if (sPages[i].busy)
1109 			busyCounter[pageState]++;
1110 
1111 		if (pageState == PAGE_STATE_MODIFIED
1112 			&& sPages[i].Cache() != NULL
1113 			&& sPages[i].Cache()->temporary && sPages[i].WiredCount() == 0) {
1114 			swappableModified++;
1115 			if (sPages[i].usage_count == 0)
1116 				swappableModifiedInactive++;
1117 		}
1118 
1119 		// track free and cached pages runs
1120 		if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) {
1121 			currentFreeRun.end = i + 1;
1122 			currentCachedRun.end = i + 1;
1123 		} else {
1124 			if (currentFreeRun.Length() > longestFreeRun.Length())
1125 				longestFreeRun = currentFreeRun;
1126 			currentFreeRun.start = currentFreeRun.end = i + 1;
1127 
1128 			if (pageState == PAGE_STATE_CACHED) {
1129 				currentCachedRun.end = i + 1;
1130 			} else {
1131 				if (currentCachedRun.Length() > longestCachedRun.Length())
1132 					longestCachedRun = currentCachedRun;
1133 				currentCachedRun.start = currentCachedRun.end = i + 1;
1134 			}
1135 		}
1136 	}
1137 
1138 	kprintf("page stats:\n");
1139 	kprintf("total: %" B_PRIuPHYSADDR "\n", sNumPages);
1140 
1141 	kprintf("active: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1142 		counter[PAGE_STATE_ACTIVE], busyCounter[PAGE_STATE_ACTIVE]);
1143 	kprintf("inactive: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1144 		counter[PAGE_STATE_INACTIVE], busyCounter[PAGE_STATE_INACTIVE]);
1145 	kprintf("cached: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1146 		counter[PAGE_STATE_CACHED], busyCounter[PAGE_STATE_CACHED]);
1147 	kprintf("unused: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1148 		counter[PAGE_STATE_UNUSED], busyCounter[PAGE_STATE_UNUSED]);
1149 	kprintf("wired: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1150 		counter[PAGE_STATE_WIRED], busyCounter[PAGE_STATE_WIRED]);
1151 	kprintf("modified: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1152 		counter[PAGE_STATE_MODIFIED], busyCounter[PAGE_STATE_MODIFIED]);
1153 	kprintf("free: %" B_PRIuSIZE "\n", counter[PAGE_STATE_FREE]);
1154 	kprintf("clear: %" B_PRIuSIZE "\n", counter[PAGE_STATE_CLEAR]);
1155 
1156 	kprintf("unreserved free pages: %" B_PRId32 "\n", sUnreservedFreePages);
1157 	kprintf("unsatisfied page reservations: %" B_PRId32 "\n",
1158 		sUnsatisfiedPageReservations);
1159 	kprintf("mapped pages: %" B_PRId32 "\n", gMappedPagesCount);
1160 	kprintf("longest free pages run: %" B_PRIuPHYSADDR " pages (at %"
1161 		B_PRIuPHYSADDR ")\n", longestFreeRun.Length(),
1162 		sPages[longestFreeRun.start].physical_page_number);
1163 	kprintf("longest free/cached pages run: %" B_PRIuPHYSADDR " pages (at %"
1164 		B_PRIuPHYSADDR ")\n", longestCachedRun.Length(),
1165 		sPages[longestCachedRun.start].physical_page_number);
1166 
1167 	kprintf("waiting threads:\n");
1168 	for (PageReservationWaiterList::Iterator it
1169 			= sPageReservationWaiters.GetIterator();
1170 		PageReservationWaiter* waiter = it.Next();) {
1171 		kprintf("  %6" B_PRId32 ": missing: %6" B_PRIu32
1172 			", don't touch: %6" B_PRIu32 "\n", waiter->thread->id,
1173 			waiter->missing, waiter->dontTouch);
1174 	}
1175 
1176 	kprintf("\nfree queue: %p, count = %" B_PRIuPHYSADDR "\n", &sFreePageQueue,
1177 		sFreePageQueue.Count());
1178 	kprintf("clear queue: %p, count = %" B_PRIuPHYSADDR "\n", &sClearPageQueue,
1179 		sClearPageQueue.Count());
1180 	kprintf("modified queue: %p, count = %" B_PRIuPHYSADDR " (%" B_PRId32
1181 		" temporary, %" B_PRIuPHYSADDR " swappable, " "inactive: %"
1182 		B_PRIuPHYSADDR ")\n", &sModifiedPageQueue, sModifiedPageQueue.Count(),
1183 		sModifiedTemporaryPages, swappableModified, swappableModifiedInactive);
1184 	kprintf("active queue: %p, count = %" B_PRIuPHYSADDR "\n",
1185 		&sActivePageQueue, sActivePageQueue.Count());
1186 	kprintf("inactive queue: %p, count = %" B_PRIuPHYSADDR "\n",
1187 		&sInactivePageQueue, sInactivePageQueue.Count());
1188 	kprintf("cached queue: %p, count = %" B_PRIuPHYSADDR "\n",
1189 		&sCachedPageQueue, sCachedPageQueue.Count());
1190 	return 0;
1191 }
1192 
1193 
1194 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1195 
1196 static caller_info*
1197 get_caller_info(addr_t caller)
1198 {
1199 	// find the caller info
1200 	for (int32 i = 0; i < sCallerInfoCount; i++) {
1201 		if (caller == sCallerInfoTable[i].caller)
1202 			return &sCallerInfoTable[i];
1203 	}
1204 
1205 	// not found, add a new entry, if there are free slots
1206 	if (sCallerInfoCount >= kCallerInfoTableSize)
1207 		return NULL;
1208 
1209 	caller_info* info = &sCallerInfoTable[sCallerInfoCount++];
1210 	info->caller = caller;
1211 	info->count = 0;
1212 
1213 	return info;
1214 }
1215 
1216 
1217 static int
1218 caller_info_compare_count(const void* _a, const void* _b)
1219 {
1220 	const caller_info* a = (const caller_info*)_a;
1221 	const caller_info* b = (const caller_info*)_b;
1222 	return (int)(b->count - a->count);
1223 }
1224 
1225 
1226 static int
1227 dump_page_allocations_per_caller(int argc, char** argv)
1228 {
1229 	bool resetAllocationInfos = false;
1230 	bool printDetails = false;
1231 	addr_t caller = 0;
1232 
1233 	for (int32 i = 1; i < argc; i++) {
1234 		if (strcmp(argv[i], "-d") == 0) {
1235 			uint64 callerAddress;
1236 			if (++i >= argc
1237 				|| !evaluate_debug_expression(argv[i], &callerAddress, true)) {
1238 				print_debugger_command_usage(argv[0]);
1239 				return 0;
1240 			}
1241 
1242 			caller = callerAddress;
1243 			printDetails = true;
1244 		} else if (strcmp(argv[i], "-r") == 0) {
1245 			resetAllocationInfos = true;
1246 		} else {
1247 			print_debugger_command_usage(argv[0]);
1248 			return 0;
1249 		}
1250 	}
1251 
1252 	sCallerInfoCount = 0;
1253 
1254 	AllocationCollectorCallback collectorCallback(resetAllocationInfos);
1255 	AllocationDetailPrinterCallback detailsCallback(caller);
1256 	AllocationTrackingCallback& callback = printDetails
1257 		? (AllocationTrackingCallback&)detailsCallback
1258 		: (AllocationTrackingCallback&)collectorCallback;
1259 
1260 	for (page_num_t i = 0; i < sNumPages; i++)
1261 		callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i);
1262 
1263 	if (printDetails)
1264 		return 0;
1265 
1266 	// sort the array
1267 	qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info),
1268 		&caller_info_compare_count);
1269 
1270 	kprintf("%" B_PRId32 " different callers\n\n", sCallerInfoCount);
1271 
1272 	size_t totalAllocationCount = 0;
1273 
1274 	kprintf("     count      caller\n");
1275 	kprintf("----------------------------------\n");
1276 	for (int32 i = 0; i < sCallerInfoCount; i++) {
1277 		caller_info& info = sCallerInfoTable[i];
1278 		kprintf("%10" B_PRIuSIZE "  %p", info.count, (void*)info.caller);
1279 
1280 		const char* symbol;
1281 		const char* imageName;
1282 		bool exactMatch;
1283 		addr_t baseAddress;
1284 
1285 		if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol,
1286 				&imageName, &exactMatch) == B_OK) {
1287 			kprintf("  %s + %#" B_PRIxADDR " (%s)%s\n", symbol,
1288 				info.caller - baseAddress, imageName,
1289 				exactMatch ? "" : " (nearest)");
1290 		} else
1291 			kprintf("\n");
1292 
1293 		totalAllocationCount += info.count;
1294 	}
1295 
1296 	kprintf("\ntotal page allocations: %" B_PRIuSIZE "\n",
1297 		totalAllocationCount);
1298 
1299 	return 0;
1300 }
1301 
1302 
1303 static int
1304 dump_page_allocation_infos(int argc, char** argv)
1305 {
1306 	page_num_t pageFilter = 0;
1307 	team_id teamFilter = -1;
1308 	thread_id threadFilter = -1;
1309 	bool printStackTraces = false;
1310 
1311 	for (int32 i = 1; i < argc; i++) {
1312 		if (strcmp(argv[i], "--stacktrace") == 0)
1313 			printStackTraces = true;
1314 		else if (strcmp(argv[i], "-p") == 0) {
1315 			uint64 pageNumber;
1316 			if (++i >= argc
1317 				|| !evaluate_debug_expression(argv[i], &pageNumber, true)) {
1318 				print_debugger_command_usage(argv[0]);
1319 				return 0;
1320 			}
1321 
1322 			pageFilter = pageNumber;
1323 		} else if (strcmp(argv[i], "--team") == 0) {
1324 			uint64 team;
1325 			if (++i >= argc
1326 				|| !evaluate_debug_expression(argv[i], &team, true)) {
1327 				print_debugger_command_usage(argv[0]);
1328 				return 0;
1329 			}
1330 
1331 			teamFilter = team;
1332 		} else if (strcmp(argv[i], "--thread") == 0) {
1333 			uint64 thread;
1334 			if (++i >= argc
1335 				|| !evaluate_debug_expression(argv[i], &thread, true)) {
1336 				print_debugger_command_usage(argv[0]);
1337 				return 0;
1338 			}
1339 
1340 			threadFilter = thread;
1341 		} else {
1342 			print_debugger_command_usage(argv[0]);
1343 			return 0;
1344 		}
1345 	}
1346 
1347 	AllocationInfoPrinterCallback callback(printStackTraces, pageFilter,
1348 		teamFilter, threadFilter);
1349 
1350 	for (page_num_t i = 0; i < sNumPages; i++)
1351 		callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i);
1352 
1353 	return 0;
1354 }
1355 
1356 #endif	// VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1357 
1358 
1359 #ifdef TRACK_PAGE_USAGE_STATS
1360 
1361 static void
1362 track_page_usage(vm_page* page)
1363 {
1364 	if (page->WiredCount() == 0) {
1365 		sNextPageUsage[(int32)page->usage_count + 128]++;
1366 		sNextPageUsagePageCount++;
1367 	}
1368 }
1369 
1370 
1371 static void
1372 update_page_usage_stats()
1373 {
1374 	std::swap(sPageUsage, sNextPageUsage);
1375 	sPageUsagePageCount = sNextPageUsagePageCount;
1376 
1377 	memset(sNextPageUsage, 0, sizeof(page_num_t) * 256);
1378 	sNextPageUsagePageCount = 0;
1379 
1380 	// compute average
1381 	if (sPageUsagePageCount > 0) {
1382 		int64 sum = 0;
1383 		for (int32 i = 0; i < 256; i++)
1384 			sum += (int64)sPageUsage[i] * (i - 128);
1385 
1386 		TRACE_DAEMON("average page usage: %f (%lu pages)\n",
1387 			(float)sum / sPageUsagePageCount, sPageUsagePageCount);
1388 	}
1389 }
1390 
1391 
1392 static int
1393 dump_page_usage_stats(int argc, char** argv)
1394 {
1395 	kprintf("distribution of page usage counts (%lu pages):",
1396 		sPageUsagePageCount);
1397 
1398 	int64 sum = 0;
1399 	for (int32 i = 0; i < 256; i++) {
1400 		if (i % 8 == 0)
1401 			kprintf("\n%4ld:", i - 128);
1402 
1403 		int64 count = sPageUsage[i];
1404 		sum += count * (i - 128);
1405 
1406 		kprintf("  %9llu", count);
1407 	}
1408 
1409 	kprintf("\n\n");
1410 
1411 	kprintf("average usage count: %f\n",
1412 		sPageUsagePageCount > 0 ? (float)sum / sPageUsagePageCount : 0);
1413 
1414 	return 0;
1415 }
1416 
1417 #endif	// TRACK_PAGE_USAGE_STATS
1418 
1419 
1420 // #pragma mark - vm_page
1421 
1422 
1423 inline void
1424 vm_page::InitState(uint8 newState)
1425 {
1426 	state = newState;
1427 }
1428 
1429 
1430 inline void
1431 vm_page::SetState(uint8 newState)
1432 {
1433 	TPS(SetPageState(this, newState));
1434 
1435 	state = newState;
1436 }
1437 
1438 
1439 // #pragma mark -
1440 
1441 
1442 static void
1443 get_page_stats(page_stats& _pageStats)
1444 {
1445 	_pageStats.totalFreePages = sUnreservedFreePages;
1446 	_pageStats.cachedPages = sCachedPageQueue.Count();
1447 	_pageStats.unsatisfiedReservations = sUnsatisfiedPageReservations;
1448 	// TODO: We don't get an actual snapshot here!
1449 }
1450 
1451 
1452 static bool
1453 do_active_paging(const page_stats& pageStats)
1454 {
1455 	return pageStats.totalFreePages + pageStats.cachedPages
1456 		< pageStats.unsatisfiedReservations
1457 			+ (int32)sFreeOrCachedPagesTarget;
1458 }
1459 
1460 
1461 /*!	Reserves as many pages as possible from \c sUnreservedFreePages up to
1462 	\a count. Doesn't touch the last \a dontTouch pages of
1463 	\c sUnreservedFreePages, though.
1464 	\return The number of actually reserved pages.
1465 */
1466 static uint32
1467 reserve_some_pages(uint32 count, uint32 dontTouch)
1468 {
1469 	while (true) {
1470 		int32 freePages = atomic_get(&sUnreservedFreePages);
1471 		if (freePages <= (int32)dontTouch)
1472 			return 0;
1473 
1474 		int32 toReserve = std::min(count, freePages - dontTouch);
1475 		if (atomic_test_and_set(&sUnreservedFreePages,
1476 					freePages - toReserve, freePages)
1477 				== freePages) {
1478 			return toReserve;
1479 		}
1480 
1481 		// the count changed in the meantime -- retry
1482 	}
1483 }
1484 
1485 
1486 static void
1487 wake_up_page_reservation_waiters()
1488 {
1489 	MutexLocker pageDeficitLocker(sPageDeficitLock);
1490 
1491 	// TODO: If this is a low priority thread, we might want to disable
1492 	// interrupts or otherwise ensure that we aren't unscheduled. Otherwise
1493 	// high priority threads wait be kept waiting while a medium priority thread
1494 	// prevents us from running.
1495 
1496 	while (PageReservationWaiter* waiter = sPageReservationWaiters.Head()) {
1497 		int32 reserved = reserve_some_pages(waiter->missing,
1498 			waiter->dontTouch);
1499 		if (reserved == 0)
1500 			return;
1501 
1502 		atomic_add(&sUnsatisfiedPageReservations, -reserved);
1503 		waiter->missing -= reserved;
1504 
1505 		if (waiter->missing > 0)
1506 			return;
1507 
1508 		sPageReservationWaiters.Remove(waiter);
1509 
1510 		thread_unblock(waiter->thread, B_OK);
1511 	}
1512 }
1513 
1514 
1515 static inline void
1516 unreserve_pages(uint32 count)
1517 {
1518 	atomic_add(&sUnreservedFreePages, count);
1519 	if (atomic_get(&sUnsatisfiedPageReservations) != 0)
1520 		wake_up_page_reservation_waiters();
1521 }
1522 
1523 
1524 static void
1525 free_page(vm_page* page, bool clear)
1526 {
1527 	DEBUG_PAGE_ACCESS_CHECK(page);
1528 
1529 	PAGE_ASSERT(page, !page->IsMapped());
1530 
1531 	VMPageQueue* fromQueue;
1532 
1533 	switch (page->State()) {
1534 		case PAGE_STATE_ACTIVE:
1535 			fromQueue = &sActivePageQueue;
1536 			break;
1537 		case PAGE_STATE_INACTIVE:
1538 			fromQueue = &sInactivePageQueue;
1539 			break;
1540 		case PAGE_STATE_MODIFIED:
1541 			fromQueue = &sModifiedPageQueue;
1542 			break;
1543 		case PAGE_STATE_CACHED:
1544 			fromQueue = &sCachedPageQueue;
1545 			break;
1546 		case PAGE_STATE_FREE:
1547 		case PAGE_STATE_CLEAR:
1548 			panic("free_page(): page %p already free", page);
1549 			return;
1550 		case PAGE_STATE_WIRED:
1551 		case PAGE_STATE_UNUSED:
1552 			fromQueue = NULL;
1553 			break;
1554 		default:
1555 			panic("free_page(): page %p in invalid state %d",
1556 				page, page->State());
1557 			return;
1558 	}
1559 
1560 	if (page->CacheRef() != NULL)
1561 		panic("to be freed page %p has cache", page);
1562 	if (page->IsMapped())
1563 		panic("to be freed page %p has mappings", page);
1564 
1565 	if (fromQueue != NULL)
1566 		fromQueue->RemoveUnlocked(page);
1567 
1568 	TA(FreePage(page->physical_page_number));
1569 
1570 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1571 	page->allocation_tracking_info.Clear();
1572 #endif
1573 
1574 	ReadLocker locker(sFreePageQueuesLock);
1575 
1576 	DEBUG_PAGE_ACCESS_END(page);
1577 
1578 	if (clear) {
1579 		page->SetState(PAGE_STATE_CLEAR);
1580 		sClearPageQueue.PrependUnlocked(page);
1581 	} else {
1582 		page->SetState(PAGE_STATE_FREE);
1583 		sFreePageQueue.PrependUnlocked(page);
1584 		sFreePageCondition.NotifyAll();
1585 	}
1586 
1587 	locker.Unlock();
1588 }
1589 
1590 
1591 /*!	The caller must make sure that no-one else tries to change the page's state
1592 	while the function is called. If the page has a cache, this can be done by
1593 	locking the cache.
1594 */
1595 static void
1596 set_page_state(vm_page *page, int pageState)
1597 {
1598 	DEBUG_PAGE_ACCESS_CHECK(page);
1599 
1600 	if (pageState == page->State())
1601 		return;
1602 
1603 	VMPageQueue* fromQueue;
1604 
1605 	switch (page->State()) {
1606 		case PAGE_STATE_ACTIVE:
1607 			fromQueue = &sActivePageQueue;
1608 			break;
1609 		case PAGE_STATE_INACTIVE:
1610 			fromQueue = &sInactivePageQueue;
1611 			break;
1612 		case PAGE_STATE_MODIFIED:
1613 			fromQueue = &sModifiedPageQueue;
1614 			break;
1615 		case PAGE_STATE_CACHED:
1616 			fromQueue = &sCachedPageQueue;
1617 			break;
1618 		case PAGE_STATE_FREE:
1619 		case PAGE_STATE_CLEAR:
1620 			panic("set_page_state(): page %p is free/clear", page);
1621 			return;
1622 		case PAGE_STATE_WIRED:
1623 		case PAGE_STATE_UNUSED:
1624 			fromQueue = NULL;
1625 			break;
1626 		default:
1627 			panic("set_page_state(): page %p in invalid state %d",
1628 				page, page->State());
1629 			return;
1630 	}
1631 
1632 	VMPageQueue* toQueue;
1633 
1634 	switch (pageState) {
1635 		case PAGE_STATE_ACTIVE:
1636 			toQueue = &sActivePageQueue;
1637 			break;
1638 		case PAGE_STATE_INACTIVE:
1639 			toQueue = &sInactivePageQueue;
1640 			break;
1641 		case PAGE_STATE_MODIFIED:
1642 			toQueue = &sModifiedPageQueue;
1643 			break;
1644 		case PAGE_STATE_CACHED:
1645 			PAGE_ASSERT(page, !page->IsMapped());
1646 			PAGE_ASSERT(page, !page->modified);
1647 			toQueue = &sCachedPageQueue;
1648 			break;
1649 		case PAGE_STATE_FREE:
1650 		case PAGE_STATE_CLEAR:
1651 			panic("set_page_state(): target state is free/clear");
1652 			return;
1653 		case PAGE_STATE_WIRED:
1654 		case PAGE_STATE_UNUSED:
1655 			toQueue = NULL;
1656 			break;
1657 		default:
1658 			panic("set_page_state(): invalid target state %d", pageState);
1659 			return;
1660 	}
1661 
1662 	VMCache* cache = page->Cache();
1663 	if (cache != NULL && cache->temporary) {
1664 		if (pageState == PAGE_STATE_MODIFIED)
1665 			atomic_add(&sModifiedTemporaryPages, 1);
1666 		else if (page->State() == PAGE_STATE_MODIFIED)
1667 			atomic_add(&sModifiedTemporaryPages, -1);
1668 	}
1669 
1670 	// move the page
1671 	if (toQueue == fromQueue) {
1672 		// Note: Theoretically we are required to lock when changing the page
1673 		// state, even if we don't change the queue. We actually don't have to
1674 		// do this, though, since only for the active queue there are different
1675 		// page states and active pages have a cache that must be locked at
1676 		// this point. So we rely on the fact that everyone must lock the cache
1677 		// before trying to change/interpret the page state.
1678 		PAGE_ASSERT(page, cache != NULL);
1679 		cache->AssertLocked();
1680 		page->SetState(pageState);
1681 	} else {
1682 		if (fromQueue != NULL)
1683 			fromQueue->RemoveUnlocked(page);
1684 
1685 		page->SetState(pageState);
1686 
1687 		if (toQueue != NULL)
1688 			toQueue->AppendUnlocked(page);
1689 	}
1690 }
1691 
1692 
1693 /*! Moves a previously modified page into a now appropriate queue.
1694 	The page queues must not be locked.
1695 */
1696 static void
1697 move_page_to_appropriate_queue(vm_page *page)
1698 {
1699 	DEBUG_PAGE_ACCESS_CHECK(page);
1700 
1701 	// Note, this logic must be in sync with what the page daemon does.
1702 	int32 state;
1703 	if (page->IsMapped())
1704 		state = PAGE_STATE_ACTIVE;
1705 	else if (page->modified)
1706 		state = PAGE_STATE_MODIFIED;
1707 	else
1708 		state = PAGE_STATE_CACHED;
1709 
1710 // TODO: If free + cached pages are low, we might directly want to free the
1711 // page.
1712 	set_page_state(page, state);
1713 }
1714 
1715 
1716 static void
1717 clear_page(struct vm_page *page)
1718 {
1719 	vm_memset_physical(page->physical_page_number << PAGE_SHIFT, 0,
1720 		B_PAGE_SIZE);
1721 }
1722 
1723 
1724 static status_t
1725 mark_page_range_in_use(page_num_t startPage, page_num_t length, bool wired)
1726 {
1727 	TRACE(("mark_page_range_in_use: start %#" B_PRIxPHYSADDR ", len %#"
1728 		B_PRIxPHYSADDR "\n", startPage, length));
1729 
1730 	if (sPhysicalPageOffset > startPage) {
1731 		dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR
1732 			"): start page is before free list\n", startPage, length);
1733 		if (sPhysicalPageOffset - startPage >= length)
1734 			return B_OK;
1735 		length -= sPhysicalPageOffset - startPage;
1736 		startPage = sPhysicalPageOffset;
1737 	}
1738 
1739 	startPage -= sPhysicalPageOffset;
1740 
1741 	if (startPage + length > sNumPages) {
1742 		dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR
1743 			"): range would extend past free list\n", startPage, length);
1744 		if (startPage >= sNumPages)
1745 			return B_OK;
1746 		length = sNumPages - startPage;
1747 	}
1748 
1749 	WriteLocker locker(sFreePageQueuesLock);
1750 
1751 	for (page_num_t i = 0; i < length; i++) {
1752 		vm_page *page = &sPages[startPage + i];
1753 		switch (page->State()) {
1754 			case PAGE_STATE_FREE:
1755 			case PAGE_STATE_CLEAR:
1756 			{
1757 // TODO: This violates the page reservation policy, since we remove pages from
1758 // the free/clear queues without having reserved them before. This should happen
1759 // in the early boot process only, though.
1760 				DEBUG_PAGE_ACCESS_START(page);
1761 				VMPageQueue& queue = page->State() == PAGE_STATE_FREE
1762 					? sFreePageQueue : sClearPageQueue;
1763 				queue.Remove(page);
1764 				page->SetState(wired ? PAGE_STATE_WIRED : PAGE_STATE_UNUSED);
1765 				page->busy = false;
1766 				atomic_add(&sUnreservedFreePages, -1);
1767 				DEBUG_PAGE_ACCESS_END(page);
1768 				break;
1769 			}
1770 			case PAGE_STATE_WIRED:
1771 			case PAGE_STATE_UNUSED:
1772 				break;
1773 			case PAGE_STATE_ACTIVE:
1774 			case PAGE_STATE_INACTIVE:
1775 			case PAGE_STATE_MODIFIED:
1776 			case PAGE_STATE_CACHED:
1777 			default:
1778 				// uh
1779 				dprintf("mark_page_range_in_use: page %#" B_PRIxPHYSADDR
1780 					" in non-free state %d!\n", startPage + i, page->State());
1781 				break;
1782 		}
1783 	}
1784 
1785 	return B_OK;
1786 }
1787 
1788 
1789 /*!
1790 	This is a background thread that wakes up when its condition is notified
1791 	and moves some pages from the free queue over to the clear queue.
1792 	Given enough time, it will clear out all pages from the free queue - we
1793 	could probably slow it down after having reached a certain threshold.
1794 */
1795 static int32
1796 page_scrubber(void *unused)
1797 {
1798 	(void)(unused);
1799 
1800 	TRACE(("page_scrubber starting...\n"));
1801 
1802 	ConditionVariableEntry entry;
1803 	for (;;) {
1804 		while (sFreePageQueue.Count() == 0
1805 				|| atomic_get(&sUnreservedFreePages)
1806 					< (int32)sFreePagesTarget) {
1807 			sFreePageCondition.Add(&entry);
1808 			entry.Wait();
1809 		}
1810 
1811 		// Since we temporarily remove pages from the free pages reserve,
1812 		// we must make sure we don't cause a violation of the page
1813 		// reservation warranty. The following is usually stricter than
1814 		// necessary, because we don't have information on how many of the
1815 		// reserved pages have already been allocated.
1816 		int32 reserved = reserve_some_pages(SCRUB_SIZE,
1817 			kPageReserveForPriority[VM_PRIORITY_USER]);
1818 		if (reserved == 0)
1819 			continue;
1820 
1821 		// get some pages from the free queue, mostly sorted
1822 		ReadLocker locker(sFreePageQueuesLock);
1823 
1824 		vm_page *page[SCRUB_SIZE];
1825 		int32 scrubCount = 0;
1826 		for (int32 i = 0; i < reserved; i++) {
1827 			page[i] = sFreePageQueue.RemoveHeadUnlocked();
1828 			if (page[i] == NULL)
1829 				break;
1830 
1831 			DEBUG_PAGE_ACCESS_START(page[i]);
1832 
1833 			page[i]->SetState(PAGE_STATE_ACTIVE);
1834 			page[i]->busy = true;
1835 			scrubCount++;
1836 		}
1837 
1838 		locker.Unlock();
1839 
1840 		if (scrubCount == 0) {
1841 			unreserve_pages(reserved);
1842 			continue;
1843 		}
1844 
1845 		TA(ScrubbingPages(scrubCount));
1846 
1847 		// clear them
1848 		for (int32 i = 0; i < scrubCount; i++)
1849 			clear_page(page[i]);
1850 
1851 		locker.Lock();
1852 
1853 		// and put them into the clear queue
1854 		// process the array reversed when prepending to preserve sequential order
1855 		for (int32 i = scrubCount - 1; i >= 0; i--) {
1856 			page[i]->SetState(PAGE_STATE_CLEAR);
1857 			page[i]->busy = false;
1858 			DEBUG_PAGE_ACCESS_END(page[i]);
1859 			sClearPageQueue.PrependUnlocked(page[i]);
1860 		}
1861 
1862 		locker.Unlock();
1863 
1864 		unreserve_pages(reserved);
1865 
1866 		TA(ScrubbedPages(scrubCount));
1867 
1868 		// wait at least 100ms between runs
1869 		snooze(100 * 1000);
1870 	}
1871 
1872 	return 0;
1873 }
1874 
1875 
1876 static void
1877 init_page_marker(vm_page &marker)
1878 {
1879 	marker.SetCacheRef(NULL);
1880 	marker.InitState(PAGE_STATE_UNUSED);
1881 	marker.busy = true;
1882 #if DEBUG_PAGE_QUEUE
1883 	marker.queue = NULL;
1884 #endif
1885 #if DEBUG_PAGE_ACCESS
1886 	marker.accessing_thread = thread_get_current_thread_id();
1887 #endif
1888 }
1889 
1890 
1891 static void
1892 remove_page_marker(struct vm_page &marker)
1893 {
1894 	DEBUG_PAGE_ACCESS_CHECK(&marker);
1895 
1896 	if (marker.State() < PAGE_STATE_FIRST_UNQUEUED)
1897 		sPageQueues[marker.State()].RemoveUnlocked(&marker);
1898 
1899 	marker.SetState(PAGE_STATE_UNUSED);
1900 }
1901 
1902 
1903 static vm_page*
1904 next_modified_page(page_num_t& maxPagesToSee)
1905 {
1906 	InterruptsSpinLocker locker(sModifiedPageQueue.GetLock());
1907 
1908 	while (maxPagesToSee > 0) {
1909 		vm_page* page = sModifiedPageQueue.Head();
1910 		if (page == NULL)
1911 			return NULL;
1912 
1913 		sModifiedPageQueue.Requeue(page, true);
1914 
1915 		maxPagesToSee--;
1916 
1917 		if (!page->busy)
1918 			return page;
1919 	}
1920 
1921 	return NULL;
1922 }
1923 
1924 
1925 // #pragma mark -
1926 
1927 
1928 class PageWriteTransfer;
1929 class PageWriteWrapper;
1930 
1931 
1932 class PageWriterRun {
1933 public:
1934 	status_t Init(uint32 maxPages);
1935 
1936 	void PrepareNextRun();
1937 	void AddPage(vm_page* page);
1938 	uint32 Go();
1939 
1940 	void PageWritten(PageWriteTransfer* transfer, status_t status,
1941 		bool partialTransfer, size_t bytesTransferred);
1942 
1943 private:
1944 	uint32				fMaxPages;
1945 	uint32				fWrapperCount;
1946 	uint32				fTransferCount;
1947 	int32				fPendingTransfers;
1948 	PageWriteWrapper*	fWrappers;
1949 	PageWriteTransfer*	fTransfers;
1950 	ConditionVariable	fAllFinishedCondition;
1951 };
1952 
1953 
1954 class PageWriteTransfer : public AsyncIOCallback {
1955 public:
1956 	void SetTo(PageWriterRun* run, vm_page* page, int32 maxPages);
1957 	bool AddPage(vm_page* page);
1958 
1959 	status_t Schedule(uint32 flags);
1960 
1961 	void SetStatus(status_t status, size_t transferred);
1962 
1963 	status_t Status() const	{ return fStatus; }
1964 	struct VMCache* Cache() const { return fCache; }
1965 	uint32 PageCount() const { return fPageCount; }
1966 
1967 	virtual void IOFinished(status_t status, bool partialTransfer,
1968 		generic_size_t bytesTransferred);
1969 
1970 private:
1971 	PageWriterRun*		fRun;
1972 	struct VMCache*		fCache;
1973 	off_t				fOffset;
1974 	uint32				fPageCount;
1975 	int32				fMaxPages;
1976 	status_t			fStatus;
1977 	uint32				fVecCount;
1978 	generic_io_vec		fVecs[32]; // TODO: make dynamic/configurable
1979 };
1980 
1981 
1982 class PageWriteWrapper {
1983 public:
1984 	PageWriteWrapper();
1985 	~PageWriteWrapper();
1986 	void SetTo(vm_page* page);
1987 	bool Done(status_t result);
1988 
1989 private:
1990 	vm_page*			fPage;
1991 	struct VMCache*		fCache;
1992 	bool				fIsActive;
1993 };
1994 
1995 
1996 PageWriteWrapper::PageWriteWrapper()
1997 	:
1998 	fIsActive(false)
1999 {
2000 }
2001 
2002 
2003 PageWriteWrapper::~PageWriteWrapper()
2004 {
2005 	if (fIsActive)
2006 		panic("page write wrapper going out of scope but isn't completed");
2007 }
2008 
2009 
2010 /*!	The page's cache must be locked.
2011 */
2012 void
2013 PageWriteWrapper::SetTo(vm_page* page)
2014 {
2015 	DEBUG_PAGE_ACCESS_CHECK(page);
2016 
2017 	if (page->busy)
2018 		panic("setting page write wrapper to busy page");
2019 
2020 	if (fIsActive)
2021 		panic("re-setting page write wrapper that isn't completed");
2022 
2023 	fPage = page;
2024 	fCache = page->Cache();
2025 	fIsActive = true;
2026 
2027 	fPage->busy = true;
2028 	fPage->busy_writing = true;
2029 
2030 	// We have a modified page -- however, while we're writing it back,
2031 	// the page might still be mapped. In order not to lose any changes to the
2032 	// page, we mark it clean before actually writing it back; if
2033 	// writing the page fails for some reason, we'll just keep it in the
2034 	// modified page list, but that should happen only rarely.
2035 
2036 	// If the page is changed after we cleared the dirty flag, but before we
2037 	// had the chance to write it back, then we'll write it again later -- that
2038 	// will probably not happen that often, though.
2039 
2040 	vm_clear_map_flags(fPage, PAGE_MODIFIED);
2041 }
2042 
2043 
2044 /*!	The page's cache must be locked.
2045 	The page queues must not be locked.
2046 	\return \c true if the page was written successfully respectively could be
2047 		handled somehow, \c false otherwise.
2048 */
2049 bool
2050 PageWriteWrapper::Done(status_t result)
2051 {
2052 	if (!fIsActive)
2053 		panic("completing page write wrapper that is not active");
2054 
2055 	DEBUG_PAGE_ACCESS_START(fPage);
2056 
2057 	fPage->busy = false;
2058 		// Set unbusy and notify later by hand, since we might free the page.
2059 
2060 	bool success = true;
2061 
2062 	if (result == B_OK) {
2063 		// put it into the active/inactive queue
2064 		move_page_to_appropriate_queue(fPage);
2065 		fPage->busy_writing = false;
2066 		DEBUG_PAGE_ACCESS_END(fPage);
2067 	} else {
2068 		// Writing the page failed. One reason would be that the cache has been
2069 		// shrunk and the page does no longer belong to the file. Otherwise the
2070 		// actual I/O failed, in which case we'll simply keep the page modified.
2071 
2072 		if (!fPage->busy_writing) {
2073 			// The busy_writing flag was cleared. That means the cache has been
2074 			// shrunk while we were trying to write the page and we have to free
2075 			// it now.
2076 			vm_remove_all_page_mappings(fPage);
2077 // TODO: Unmapping should already happen when resizing the cache!
2078 			fCache->RemovePage(fPage);
2079 			free_page(fPage, false);
2080 			unreserve_pages(1);
2081 		} else {
2082 			// Writing the page failed -- mark the page modified and move it to
2083 			// an appropriate queue other than the modified queue, so we don't
2084 			// keep trying to write it over and over again. We keep
2085 			// non-temporary pages in the modified queue, though, so they don't
2086 			// get lost in the inactive queue.
2087 			dprintf("PageWriteWrapper: Failed to write page %p: %s\n", fPage,
2088 				strerror(result));
2089 
2090 			fPage->modified = true;
2091 			if (!fCache->temporary)
2092 				set_page_state(fPage, PAGE_STATE_MODIFIED);
2093 			else if (fPage->IsMapped())
2094 				set_page_state(fPage, PAGE_STATE_ACTIVE);
2095 			else
2096 				set_page_state(fPage, PAGE_STATE_INACTIVE);
2097 
2098 			fPage->busy_writing = false;
2099 			DEBUG_PAGE_ACCESS_END(fPage);
2100 
2101 			success = false;
2102 		}
2103 	}
2104 
2105 	fCache->NotifyPageEvents(fPage, PAGE_EVENT_NOT_BUSY);
2106 	fIsActive = false;
2107 
2108 	return success;
2109 }
2110 
2111 
2112 /*!	The page's cache must be locked.
2113 */
2114 void
2115 PageWriteTransfer::SetTo(PageWriterRun* run, vm_page* page, int32 maxPages)
2116 {
2117 	fRun = run;
2118 	fCache = page->Cache();
2119 	fOffset = page->cache_offset;
2120 	fPageCount = 1;
2121 	fMaxPages = maxPages;
2122 	fStatus = B_OK;
2123 
2124 	fVecs[0].base = (phys_addr_t)page->physical_page_number << PAGE_SHIFT;
2125 	fVecs[0].length = B_PAGE_SIZE;
2126 	fVecCount = 1;
2127 }
2128 
2129 
2130 /*!	The page's cache must be locked.
2131 */
2132 bool
2133 PageWriteTransfer::AddPage(vm_page* page)
2134 {
2135 	if (page->Cache() != fCache
2136 		|| (fMaxPages >= 0 && fPageCount >= (uint32)fMaxPages))
2137 		return false;
2138 
2139 	phys_addr_t nextBase = fVecs[fVecCount - 1].base
2140 		+ fVecs[fVecCount - 1].length;
2141 
2142 	if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase
2143 		&& (off_t)page->cache_offset == fOffset + fPageCount) {
2144 		// append to last iovec
2145 		fVecs[fVecCount - 1].length += B_PAGE_SIZE;
2146 		fPageCount++;
2147 		return true;
2148 	}
2149 
2150 	nextBase = fVecs[0].base - B_PAGE_SIZE;
2151 	if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase
2152 		&& (off_t)page->cache_offset == fOffset - 1) {
2153 		// prepend to first iovec and adjust offset
2154 		fVecs[0].base = nextBase;
2155 		fVecs[0].length += B_PAGE_SIZE;
2156 		fOffset = page->cache_offset;
2157 		fPageCount++;
2158 		return true;
2159 	}
2160 
2161 	if (((off_t)page->cache_offset == fOffset + fPageCount
2162 			|| (off_t)page->cache_offset == fOffset - 1)
2163 		&& fVecCount < sizeof(fVecs) / sizeof(fVecs[0])) {
2164 		// not physically contiguous or not in the right order
2165 		uint32 vectorIndex;
2166 		if ((off_t)page->cache_offset < fOffset) {
2167 			// we are pre-pending another vector, move the other vecs
2168 			for (uint32 i = fVecCount; i > 0; i--)
2169 				fVecs[i] = fVecs[i - 1];
2170 
2171 			fOffset = page->cache_offset;
2172 			vectorIndex = 0;
2173 		} else
2174 			vectorIndex = fVecCount;
2175 
2176 		fVecs[vectorIndex].base
2177 			= (phys_addr_t)page->physical_page_number << PAGE_SHIFT;
2178 		fVecs[vectorIndex].length = B_PAGE_SIZE;
2179 
2180 		fVecCount++;
2181 		fPageCount++;
2182 		return true;
2183 	}
2184 
2185 	return false;
2186 }
2187 
2188 
2189 status_t
2190 PageWriteTransfer::Schedule(uint32 flags)
2191 {
2192 	off_t writeOffset = (off_t)fOffset << PAGE_SHIFT;
2193 	generic_size_t writeLength = (phys_size_t)fPageCount << PAGE_SHIFT;
2194 
2195 	if (fRun != NULL) {
2196 		return fCache->WriteAsync(writeOffset, fVecs, fVecCount, writeLength,
2197 			flags | B_PHYSICAL_IO_REQUEST, this);
2198 	}
2199 
2200 	status_t status = fCache->Write(writeOffset, fVecs, fVecCount,
2201 		flags | B_PHYSICAL_IO_REQUEST, &writeLength);
2202 
2203 	SetStatus(status, writeLength);
2204 	return fStatus;
2205 }
2206 
2207 
2208 void
2209 PageWriteTransfer::SetStatus(status_t status, size_t transferred)
2210 {
2211 	// only succeed if all pages up to the last one have been written fully
2212 	// and the last page has at least been written partially
2213 	if (status == B_OK && transferred <= (fPageCount - 1) * B_PAGE_SIZE)
2214 		status = B_ERROR;
2215 
2216 	fStatus = status;
2217 }
2218 
2219 
2220 void
2221 PageWriteTransfer::IOFinished(status_t status, bool partialTransfer,
2222 	generic_size_t bytesTransferred)
2223 {
2224 	SetStatus(status, bytesTransferred);
2225 	fRun->PageWritten(this, fStatus, partialTransfer, bytesTransferred);
2226 }
2227 
2228 
2229 status_t
2230 PageWriterRun::Init(uint32 maxPages)
2231 {
2232 	fMaxPages = maxPages;
2233 	fWrapperCount = 0;
2234 	fTransferCount = 0;
2235 	fPendingTransfers = 0;
2236 
2237 	fWrappers = new(std::nothrow) PageWriteWrapper[maxPages];
2238 	fTransfers = new(std::nothrow) PageWriteTransfer[maxPages];
2239 	if (fWrappers == NULL || fTransfers == NULL)
2240 		return B_NO_MEMORY;
2241 
2242 	return B_OK;
2243 }
2244 
2245 
2246 void
2247 PageWriterRun::PrepareNextRun()
2248 {
2249 	fWrapperCount = 0;
2250 	fTransferCount = 0;
2251 	fPendingTransfers = 0;
2252 }
2253 
2254 
2255 /*!	The page's cache must be locked.
2256 */
2257 void
2258 PageWriterRun::AddPage(vm_page* page)
2259 {
2260 	fWrappers[fWrapperCount++].SetTo(page);
2261 
2262 	if (fTransferCount == 0 || !fTransfers[fTransferCount - 1].AddPage(page)) {
2263 		fTransfers[fTransferCount++].SetTo(this, page,
2264 			page->Cache()->MaxPagesPerAsyncWrite());
2265 	}
2266 }
2267 
2268 
2269 /*!	Writes all pages previously added.
2270 	\return The number of pages that could not be written or otherwise handled.
2271 */
2272 uint32
2273 PageWriterRun::Go()
2274 {
2275 	atomic_set(&fPendingTransfers, fTransferCount);
2276 
2277 	fAllFinishedCondition.Init(this, "page writer wait for I/O");
2278 	ConditionVariableEntry waitEntry;
2279 	fAllFinishedCondition.Add(&waitEntry);
2280 
2281 	// schedule writes
2282 	for (uint32 i = 0; i < fTransferCount; i++)
2283 		fTransfers[i].Schedule(B_VIP_IO_REQUEST);
2284 
2285 	// wait until all pages have been written
2286 	waitEntry.Wait();
2287 
2288 	// mark pages depending on whether they could be written or not
2289 
2290 	uint32 failedPages = 0;
2291 	uint32 wrapperIndex = 0;
2292 	for (uint32 i = 0; i < fTransferCount; i++) {
2293 		PageWriteTransfer& transfer = fTransfers[i];
2294 		transfer.Cache()->Lock();
2295 
2296 		for (uint32 j = 0; j < transfer.PageCount(); j++) {
2297 			if (!fWrappers[wrapperIndex++].Done(transfer.Status()))
2298 				failedPages++;
2299 		}
2300 
2301 		transfer.Cache()->Unlock();
2302 	}
2303 
2304 	ASSERT(wrapperIndex == fWrapperCount);
2305 
2306 	for (uint32 i = 0; i < fTransferCount; i++) {
2307 		PageWriteTransfer& transfer = fTransfers[i];
2308 		struct VMCache* cache = transfer.Cache();
2309 
2310 		// We've acquired a references for each page
2311 		for (uint32 j = 0; j < transfer.PageCount(); j++) {
2312 			// We release the cache references after all pages were made
2313 			// unbusy again - otherwise releasing a vnode could deadlock.
2314 			cache->ReleaseStoreRef();
2315 			cache->ReleaseRef();
2316 		}
2317 	}
2318 
2319 	return failedPages;
2320 }
2321 
2322 
2323 void
2324 PageWriterRun::PageWritten(PageWriteTransfer* transfer, status_t status,
2325 	bool partialTransfer, size_t bytesTransferred)
2326 {
2327 	if (atomic_add(&fPendingTransfers, -1) == 1)
2328 		fAllFinishedCondition.NotifyAll();
2329 }
2330 
2331 
2332 /*!	The page writer continuously takes some pages from the modified
2333 	queue, writes them back, and moves them back to the active queue.
2334 	It runs in its own thread, and is only there to keep the number
2335 	of modified pages low, so that more pages can be reused with
2336 	fewer costs.
2337 */
2338 status_t
2339 page_writer(void* /*unused*/)
2340 {
2341 	const uint32 kNumPages = 256;
2342 #ifdef TRACE_VM_PAGE
2343 	uint32 writtenPages = 0;
2344 	bigtime_t lastWrittenTime = 0;
2345 	bigtime_t pageCollectionTime = 0;
2346 	bigtime_t pageWritingTime = 0;
2347 #endif
2348 
2349 	PageWriterRun run;
2350 	if (run.Init(kNumPages) != B_OK) {
2351 		panic("page writer: Failed to init PageWriterRun!");
2352 		return B_ERROR;
2353 	}
2354 
2355 	page_num_t pagesSinceLastSuccessfulWrite = 0;
2356 
2357 	while (true) {
2358 // TODO: Maybe wait shorter when memory is low!
2359 		if (sModifiedPageQueue.Count() < kNumPages) {
2360 			sPageWriterCondition.Wait(3000000, true);
2361 				// all 3 seconds when no one triggers us
2362 		}
2363 
2364 		page_num_t modifiedPages = sModifiedPageQueue.Count();
2365 		if (modifiedPages == 0)
2366 			continue;
2367 
2368 		if (modifiedPages <= pagesSinceLastSuccessfulWrite) {
2369 			// We ran through the whole queue without being able to write a
2370 			// single page. Take a break.
2371 			snooze(500000);
2372 			pagesSinceLastSuccessfulWrite = 0;
2373 		}
2374 
2375 #if ENABLE_SWAP_SUPPORT
2376 		page_stats pageStats;
2377 		get_page_stats(pageStats);
2378 		bool activePaging = do_active_paging(pageStats);
2379 #endif
2380 
2381 		// depending on how urgent it becomes to get pages to disk, we adjust
2382 		// our I/O priority
2383 		uint32 lowPagesState = low_resource_state(B_KERNEL_RESOURCE_PAGES);
2384 		int32 ioPriority = B_IDLE_PRIORITY;
2385 		if (lowPagesState >= B_LOW_RESOURCE_CRITICAL
2386 			|| modifiedPages > MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD) {
2387 			ioPriority = MAX_PAGE_WRITER_IO_PRIORITY;
2388 		} else {
2389 			ioPriority = (uint64)MAX_PAGE_WRITER_IO_PRIORITY * modifiedPages
2390 				/ MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD;
2391 		}
2392 
2393 		thread_set_io_priority(ioPriority);
2394 
2395 		uint32 numPages = 0;
2396 		run.PrepareNextRun();
2397 
2398 		// TODO: make this laptop friendly, too (ie. only start doing
2399 		// something if someone else did something or there is really
2400 		// enough to do).
2401 
2402 		// collect pages to be written
2403 #ifdef TRACE_VM_PAGE
2404 		pageCollectionTime -= system_time();
2405 #endif
2406 
2407 		page_num_t maxPagesToSee = modifiedPages;
2408 
2409 		while (numPages < kNumPages && maxPagesToSee > 0) {
2410 			vm_page *page = next_modified_page(maxPagesToSee);
2411 			if (page == NULL)
2412 				break;
2413 
2414 			PageCacheLocker cacheLocker(page, false);
2415 			if (!cacheLocker.IsLocked())
2416 				continue;
2417 
2418 			VMCache *cache = page->Cache();
2419 
2420 			// If the page is busy or its state has changed while we were
2421 			// locking the cache, just ignore it.
2422 			if (page->busy || page->State() != PAGE_STATE_MODIFIED)
2423 				continue;
2424 
2425 			DEBUG_PAGE_ACCESS_START(page);
2426 
2427 			// Don't write back wired (locked) pages.
2428 			if (page->WiredCount() > 0) {
2429 				set_page_state(page, PAGE_STATE_ACTIVE);
2430 				DEBUG_PAGE_ACCESS_END(page);
2431 				continue;
2432 			}
2433 
2434 			// Write back temporary pages only when we're actively paging.
2435 			if (cache->temporary
2436 #if ENABLE_SWAP_SUPPORT
2437 				&& (!activePaging
2438 					|| !cache->CanWritePage(
2439 							(off_t)page->cache_offset << PAGE_SHIFT))
2440 #endif
2441 				) {
2442 				// We can't/don't want to do anything with this page, so move it
2443 				// to one of the other queues.
2444 				if (page->mappings.IsEmpty())
2445 					set_page_state(page, PAGE_STATE_INACTIVE);
2446 				else
2447 					set_page_state(page, PAGE_STATE_ACTIVE);
2448 
2449 				DEBUG_PAGE_ACCESS_END(page);
2450 				continue;
2451 			}
2452 
2453 			// We need our own reference to the store, as it might currently be
2454 			// destroyed.
2455 			if (cache->AcquireUnreferencedStoreRef() != B_OK) {
2456 				DEBUG_PAGE_ACCESS_END(page);
2457 				cacheLocker.Unlock();
2458 				thread_yield();
2459 				continue;
2460 			}
2461 
2462 			run.AddPage(page);
2463 				// TODO: We're possibly adding pages of different caches and
2464 				// thus maybe of different underlying file systems here. This
2465 				// is a potential problem for loop file systems/devices, since
2466 				// we could mark a page busy that would need to be accessed
2467 				// when writing back another page, thus causing a deadlock.
2468 
2469 			DEBUG_PAGE_ACCESS_END(page);
2470 
2471 			//dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count);
2472 			TPW(WritePage(page));
2473 
2474 			cache->AcquireRefLocked();
2475 			numPages++;
2476 		}
2477 
2478 #ifdef TRACE_VM_PAGE
2479 		pageCollectionTime += system_time();
2480 #endif
2481 		if (numPages == 0)
2482 			continue;
2483 
2484 		// write pages to disk and do all the cleanup
2485 #ifdef TRACE_VM_PAGE
2486 		pageWritingTime -= system_time();
2487 #endif
2488 		uint32 failedPages = run.Go();
2489 #ifdef TRACE_VM_PAGE
2490 		pageWritingTime += system_time();
2491 
2492 		// debug output only...
2493 		writtenPages += numPages;
2494 		if (writtenPages >= 1024) {
2495 			bigtime_t now = system_time();
2496 			TRACE(("page writer: wrote 1024 pages (total: %" B_PRIu64 " ms, "
2497 				"collect: %" B_PRIu64 " ms, write: %" B_PRIu64 " ms)\n",
2498 				(now - lastWrittenTime) / 1000,
2499 				pageCollectionTime / 1000, pageWritingTime / 1000));
2500 			lastWrittenTime = now;
2501 
2502 			writtenPages -= 1024;
2503 			pageCollectionTime = 0;
2504 			pageWritingTime = 0;
2505 		}
2506 #endif
2507 
2508 		if (failedPages == numPages)
2509 			pagesSinceLastSuccessfulWrite += modifiedPages - maxPagesToSee;
2510 		else
2511 			pagesSinceLastSuccessfulWrite = 0;
2512 	}
2513 
2514 	return B_OK;
2515 }
2516 
2517 
2518 // #pragma mark -
2519 
2520 
2521 // TODO: This should be done in the page daemon!
2522 #if 0
2523 #if ENABLE_SWAP_SUPPORT
2524 static bool
2525 free_page_swap_space(int32 index)
2526 {
2527 	vm_page *page = vm_page_at_index(index);
2528 	PageCacheLocker locker(page);
2529 	if (!locker.IsLocked())
2530 		return false;
2531 
2532 	DEBUG_PAGE_ACCESS_START(page);
2533 
2534 	VMCache* cache = page->Cache();
2535 	if (cache->temporary && page->WiredCount() == 0
2536 			&& cache->HasPage(page->cache_offset << PAGE_SHIFT)
2537 			&& page->usage_count > 0) {
2538 		// TODO: how to judge a page is highly active?
2539 		if (swap_free_page_swap_space(page)) {
2540 			// We need to mark the page modified, since otherwise it could be
2541 			// stolen and we'd lose its data.
2542 			vm_page_set_state(page, PAGE_STATE_MODIFIED);
2543 			TD(FreedPageSwap(page));
2544 			DEBUG_PAGE_ACCESS_END(page);
2545 			return true;
2546 		}
2547 	}
2548 	DEBUG_PAGE_ACCESS_END(page);
2549 	return false;
2550 }
2551 #endif
2552 #endif	// 0
2553 
2554 
2555 static vm_page *
2556 find_cached_page_candidate(struct vm_page &marker)
2557 {
2558 	DEBUG_PAGE_ACCESS_CHECK(&marker);
2559 
2560 	InterruptsSpinLocker locker(sCachedPageQueue.GetLock());
2561 	vm_page *page;
2562 
2563 	if (marker.State() == PAGE_STATE_UNUSED) {
2564 		// Get the first free pages of the (in)active queue
2565 		page = sCachedPageQueue.Head();
2566 	} else {
2567 		// Get the next page of the current queue
2568 		if (marker.State() != PAGE_STATE_CACHED) {
2569 			panic("invalid marker %p state", &marker);
2570 			return NULL;
2571 		}
2572 
2573 		page = sCachedPageQueue.Next(&marker);
2574 		sCachedPageQueue.Remove(&marker);
2575 		marker.SetState(PAGE_STATE_UNUSED);
2576 	}
2577 
2578 	while (page != NULL) {
2579 		if (!page->busy) {
2580 			// we found a candidate, insert marker
2581 			marker.SetState(PAGE_STATE_CACHED);
2582 			sCachedPageQueue.InsertAfter(page, &marker);
2583 			return page;
2584 		}
2585 
2586 		page = sCachedPageQueue.Next(page);
2587 	}
2588 
2589 	return NULL;
2590 }
2591 
2592 
2593 static bool
2594 free_cached_page(vm_page *page, bool dontWait)
2595 {
2596 	// try to lock the page's cache
2597 	if (vm_cache_acquire_locked_page_cache(page, dontWait) == NULL)
2598 		return false;
2599 	VMCache* cache = page->Cache();
2600 
2601 	AutoLocker<VMCache> cacheLocker(cache, true);
2602 	MethodDeleter<VMCache, void, &VMCache::ReleaseRefLocked> _2(cache);
2603 
2604 	// check again if that page is still a candidate
2605 	if (page->busy || page->State() != PAGE_STATE_CACHED)
2606 		return false;
2607 
2608 	DEBUG_PAGE_ACCESS_START(page);
2609 
2610 	PAGE_ASSERT(page, !page->IsMapped());
2611 	PAGE_ASSERT(page, !page->modified);
2612 
2613 	// we can now steal this page
2614 
2615 	cache->RemovePage(page);
2616 		// Now the page doesn't have cache anymore, so no one else (e.g.
2617 		// vm_page_allocate_page_run() can pick it up), since they would be
2618 		// required to lock the cache first, which would fail.
2619 
2620 	sCachedPageQueue.RemoveUnlocked(page);
2621 	return true;
2622 }
2623 
2624 
2625 static uint32
2626 free_cached_pages(uint32 pagesToFree, bool dontWait)
2627 {
2628 	vm_page marker;
2629 	init_page_marker(marker);
2630 
2631 	uint32 pagesFreed = 0;
2632 
2633 	while (pagesFreed < pagesToFree) {
2634 		vm_page *page = find_cached_page_candidate(marker);
2635 		if (page == NULL)
2636 			break;
2637 
2638 		if (free_cached_page(page, dontWait)) {
2639 			ReadLocker locker(sFreePageQueuesLock);
2640 			page->SetState(PAGE_STATE_FREE);
2641 			DEBUG_PAGE_ACCESS_END(page);
2642 			sFreePageQueue.PrependUnlocked(page);
2643 			locker.Unlock();
2644 
2645 			TA(StolenPage());
2646 
2647 			pagesFreed++;
2648 		}
2649 	}
2650 
2651 	remove_page_marker(marker);
2652 
2653 	sFreePageCondition.NotifyAll();
2654 
2655 	return pagesFreed;
2656 }
2657 
2658 
2659 static void
2660 idle_scan_active_pages(page_stats& pageStats)
2661 {
2662 	VMPageQueue& queue = sActivePageQueue;
2663 
2664 	// We want to scan the whole queue in roughly kIdleRunsForFullQueue runs.
2665 	uint32 maxToScan = queue.Count() / kIdleRunsForFullQueue + 1;
2666 
2667 	while (maxToScan > 0) {
2668 		maxToScan--;
2669 
2670 		// Get the next page. Note that we don't bother to lock here. We go with
2671 		// the assumption that on all architectures reading/writing pointers is
2672 		// atomic. Beyond that it doesn't really matter. We have to unlock the
2673 		// queue anyway to lock the page's cache, and we'll recheck afterwards.
2674 		vm_page* page = queue.Head();
2675 		if (page == NULL)
2676 			break;
2677 
2678 		// lock the page's cache
2679 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2680 		if (cache == NULL)
2681 			continue;
2682 
2683 		if (page->State() != PAGE_STATE_ACTIVE) {
2684 			// page is no longer in the cache or in this queue
2685 			cache->ReleaseRefAndUnlock();
2686 			continue;
2687 		}
2688 
2689 		if (page->busy) {
2690 			// page is busy -- requeue at the end
2691 			vm_page_requeue(page, true);
2692 			cache->ReleaseRefAndUnlock();
2693 			continue;
2694 		}
2695 
2696 		DEBUG_PAGE_ACCESS_START(page);
2697 
2698 		// Get the page active/modified flags and update the page's usage count.
2699 		// We completely unmap inactive temporary pages. This saves us to
2700 		// iterate through the inactive list as well, since we'll be notified
2701 		// via page fault whenever such an inactive page is used again.
2702 		// We don't remove the mappings of non-temporary pages, since we
2703 		// wouldn't notice when those would become unused and could thus be
2704 		// moved to the cached list.
2705 		int32 usageCount;
2706 		if (page->WiredCount() > 0 || page->usage_count > 0
2707 			|| !cache->temporary) {
2708 			usageCount = vm_clear_page_mapping_accessed_flags(page);
2709 		} else
2710 			usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2711 
2712 		if (usageCount > 0) {
2713 			usageCount += page->usage_count + kPageUsageAdvance;
2714 			if (usageCount > kPageUsageMax)
2715 				usageCount = kPageUsageMax;
2716 // TODO: This would probably also be the place to reclaim swap space.
2717 		} else {
2718 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2719 			if (usageCount < 0) {
2720 				usageCount = 0;
2721 				set_page_state(page, PAGE_STATE_INACTIVE);
2722 			}
2723 		}
2724 
2725 		page->usage_count = usageCount;
2726 
2727 		DEBUG_PAGE_ACCESS_END(page);
2728 
2729 		cache->ReleaseRefAndUnlock();
2730 	}
2731 }
2732 
2733 
2734 static void
2735 full_scan_inactive_pages(page_stats& pageStats, int32 despairLevel)
2736 {
2737 	int32 pagesToFree = pageStats.unsatisfiedReservations
2738 		+ sFreeOrCachedPagesTarget
2739 		- (pageStats.totalFreePages + pageStats.cachedPages);
2740 	if (pagesToFree <= 0)
2741 		return;
2742 
2743 	bigtime_t time = system_time();
2744 	uint32 pagesScanned = 0;
2745 	uint32 pagesToCached = 0;
2746 	uint32 pagesToModified = 0;
2747 	uint32 pagesToActive = 0;
2748 
2749 	// Determine how many pages at maximum to send to the modified queue. Since
2750 	// it is relatively expensive to page out pages, we do that on a grander
2751 	// scale only when things get desperate.
2752 	uint32 maxToFlush = despairLevel <= 1 ? 32 : 10000;
2753 
2754 	vm_page marker;
2755 	init_page_marker(marker);
2756 
2757 	VMPageQueue& queue = sInactivePageQueue;
2758 	InterruptsSpinLocker queueLocker(queue.GetLock());
2759 	uint32 maxToScan = queue.Count();
2760 
2761 	vm_page* nextPage = queue.Head();
2762 
2763 	while (pagesToFree > 0 && maxToScan > 0) {
2764 		maxToScan--;
2765 
2766 		// get the next page
2767 		vm_page* page = nextPage;
2768 		if (page == NULL)
2769 			break;
2770 		nextPage = queue.Next(page);
2771 
2772 		if (page->busy)
2773 			continue;
2774 
2775 		// mark the position
2776 		queue.InsertAfter(page, &marker);
2777 		queueLocker.Unlock();
2778 
2779 		// lock the page's cache
2780 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2781 		if (cache == NULL || page->busy
2782 				|| page->State() != PAGE_STATE_INACTIVE) {
2783 			if (cache != NULL)
2784 				cache->ReleaseRefAndUnlock();
2785 			queueLocker.Lock();
2786 			nextPage = queue.Next(&marker);
2787 			queue.Remove(&marker);
2788 			continue;
2789 		}
2790 
2791 		pagesScanned++;
2792 
2793 		DEBUG_PAGE_ACCESS_START(page);
2794 
2795 		// Get the accessed count, clear the accessed/modified flags and
2796 		// unmap the page, if it hasn't been accessed.
2797 		int32 usageCount;
2798 		if (page->WiredCount() > 0)
2799 			usageCount = vm_clear_page_mapping_accessed_flags(page);
2800 		else
2801 			usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2802 
2803 		// update usage count
2804 		if (usageCount > 0) {
2805 			usageCount += page->usage_count + kPageUsageAdvance;
2806 			if (usageCount > kPageUsageMax)
2807 				usageCount = kPageUsageMax;
2808 		} else {
2809 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2810 			if (usageCount < 0)
2811 				usageCount = 0;
2812 		}
2813 
2814 		page->usage_count = usageCount;
2815 
2816 		// Move to fitting queue or requeue:
2817 		// * Active mapped pages go to the active queue.
2818 		// * Inactive mapped (i.e. wired) pages are requeued.
2819 		// * The remaining pages are cachable. Thus, if unmodified they go to
2820 		//   the cached queue, otherwise to the modified queue (up to a limit).
2821 		//   Note that until in the idle scanning we don't exempt pages of
2822 		//   temporary caches. Apparently we really need memory, so we better
2823 		//   page out memory as well.
2824 		bool isMapped = page->IsMapped();
2825 		if (usageCount > 0) {
2826 			if (isMapped) {
2827 				set_page_state(page, PAGE_STATE_ACTIVE);
2828 				pagesToActive++;
2829 			} else
2830 				vm_page_requeue(page, true);
2831 		} else if (isMapped) {
2832 			vm_page_requeue(page, true);
2833 		} else if (!page->modified) {
2834 			set_page_state(page, PAGE_STATE_CACHED);
2835 			pagesToFree--;
2836 			pagesToCached++;
2837 		} else if (maxToFlush > 0) {
2838 			set_page_state(page, PAGE_STATE_MODIFIED);
2839 			maxToFlush--;
2840 			pagesToModified++;
2841 		} else
2842 			vm_page_requeue(page, true);
2843 
2844 		DEBUG_PAGE_ACCESS_END(page);
2845 
2846 		cache->ReleaseRefAndUnlock();
2847 
2848 		// remove the marker
2849 		queueLocker.Lock();
2850 		nextPage = queue.Next(&marker);
2851 		queue.Remove(&marker);
2852 	}
2853 
2854 	queueLocker.Unlock();
2855 
2856 	time = system_time() - time;
2857 	TRACE_DAEMON("  -> inactive scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32
2858 		", moved: %" B_PRIu32 " -> cached, %" B_PRIu32 " -> modified, %"
2859 		B_PRIu32 " -> active\n", time, pagesScanned, pagesToCached,
2860 		pagesToModified, pagesToActive);
2861 
2862 	// wake up the page writer, if we tossed it some pages
2863 	if (pagesToModified > 0)
2864 		sPageWriterCondition.WakeUp();
2865 }
2866 
2867 
2868 static void
2869 full_scan_active_pages(page_stats& pageStats, int32 despairLevel)
2870 {
2871 	vm_page marker;
2872 	init_page_marker(marker);
2873 
2874 	VMPageQueue& queue = sActivePageQueue;
2875 	InterruptsSpinLocker queueLocker(queue.GetLock());
2876 	uint32 maxToScan = queue.Count();
2877 
2878 	int32 pagesToDeactivate = pageStats.unsatisfiedReservations
2879 		+ sFreeOrCachedPagesTarget
2880 		- (pageStats.totalFreePages + pageStats.cachedPages)
2881 		+ std::max((int32)sInactivePagesTarget - (int32)maxToScan, (int32)0);
2882 	if (pagesToDeactivate <= 0)
2883 		return;
2884 
2885 	bigtime_t time = system_time();
2886 	uint32 pagesAccessed = 0;
2887 	uint32 pagesToInactive = 0;
2888 	uint32 pagesScanned = 0;
2889 
2890 	vm_page* nextPage = queue.Head();
2891 
2892 	while (pagesToDeactivate > 0 && maxToScan > 0) {
2893 		maxToScan--;
2894 
2895 		// get the next page
2896 		vm_page* page = nextPage;
2897 		if (page == NULL)
2898 			break;
2899 		nextPage = queue.Next(page);
2900 
2901 		if (page->busy)
2902 			continue;
2903 
2904 		// mark the position
2905 		queue.InsertAfter(page, &marker);
2906 		queueLocker.Unlock();
2907 
2908 		// lock the page's cache
2909 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2910 		if (cache == NULL || page->busy || page->State() != PAGE_STATE_ACTIVE) {
2911 			if (cache != NULL)
2912 				cache->ReleaseRefAndUnlock();
2913 			queueLocker.Lock();
2914 			nextPage = queue.Next(&marker);
2915 			queue.Remove(&marker);
2916 			continue;
2917 		}
2918 
2919 		pagesScanned++;
2920 
2921 		DEBUG_PAGE_ACCESS_START(page);
2922 
2923 		// Get the page active/modified flags and update the page's usage count.
2924 		int32 usageCount = vm_clear_page_mapping_accessed_flags(page);
2925 
2926 		if (usageCount > 0) {
2927 			usageCount += page->usage_count + kPageUsageAdvance;
2928 			if (usageCount > kPageUsageMax)
2929 				usageCount = kPageUsageMax;
2930 			pagesAccessed++;
2931 // TODO: This would probably also be the place to reclaim swap space.
2932 		} else {
2933 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2934 			if (usageCount <= 0) {
2935 				usageCount = 0;
2936 				set_page_state(page, PAGE_STATE_INACTIVE);
2937 				pagesToInactive++;
2938 			}
2939 		}
2940 
2941 		page->usage_count = usageCount;
2942 
2943 		DEBUG_PAGE_ACCESS_END(page);
2944 
2945 		cache->ReleaseRefAndUnlock();
2946 
2947 		// remove the marker
2948 		queueLocker.Lock();
2949 		nextPage = queue.Next(&marker);
2950 		queue.Remove(&marker);
2951 	}
2952 
2953 	time = system_time() - time;
2954 	TRACE_DAEMON("  ->   active scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32
2955 		", moved: %" B_PRIu32 " -> inactive, encountered %" B_PRIu32 " accessed"
2956 		" ones\n", time, pagesScanned, pagesToInactive, pagesAccessed);
2957 }
2958 
2959 
2960 static void
2961 page_daemon_idle_scan(page_stats& pageStats)
2962 {
2963 	TRACE_DAEMON("page daemon: idle run\n");
2964 
2965 	if (pageStats.totalFreePages < (int32)sFreePagesTarget) {
2966 		// We want more actually free pages, so free some from the cached
2967 		// ones.
2968 		uint32 freed = free_cached_pages(
2969 			sFreePagesTarget - pageStats.totalFreePages, false);
2970 		if (freed > 0)
2971 			unreserve_pages(freed);
2972 		get_page_stats(pageStats);
2973 	}
2974 
2975 	// Walk the active list and move pages to the inactive queue.
2976 	get_page_stats(pageStats);
2977 	idle_scan_active_pages(pageStats);
2978 }
2979 
2980 
2981 static void
2982 page_daemon_full_scan(page_stats& pageStats, int32 despairLevel)
2983 {
2984 	TRACE_DAEMON("page daemon: full run: free: %" B_PRIu32 ", cached: %"
2985 		B_PRIu32 ", to free: %" B_PRIu32 "\n", pageStats.totalFreePages,
2986 		pageStats.cachedPages, pageStats.unsatisfiedReservations
2987 			+ sFreeOrCachedPagesTarget
2988 			- (pageStats.totalFreePages + pageStats.cachedPages));
2989 
2990 	// Walk the inactive list and transfer pages to the cached and modified
2991 	// queues.
2992 	full_scan_inactive_pages(pageStats, despairLevel);
2993 
2994 	// Free cached pages. Also wake up reservation waiters.
2995 	get_page_stats(pageStats);
2996 	int32 pagesToFree = pageStats.unsatisfiedReservations + sFreePagesTarget
2997 		- (pageStats.totalFreePages);
2998 	if (pagesToFree > 0) {
2999 		uint32 freed = free_cached_pages(pagesToFree, true);
3000 		if (freed > 0)
3001 			unreserve_pages(freed);
3002 	}
3003 
3004 	// Walk the active list and move pages to the inactive queue.
3005 	get_page_stats(pageStats);
3006 	full_scan_active_pages(pageStats, despairLevel);
3007 }
3008 
3009 
3010 static status_t
3011 page_daemon(void* /*unused*/)
3012 {
3013 	int32 despairLevel = 0;
3014 
3015 	while (true) {
3016 		sPageDaemonCondition.ClearActivated();
3017 
3018 		// evaluate the free pages situation
3019 		page_stats pageStats;
3020 		get_page_stats(pageStats);
3021 
3022 		if (!do_active_paging(pageStats)) {
3023 			// Things look good -- just maintain statistics and keep the pool
3024 			// of actually free pages full enough.
3025 			despairLevel = 0;
3026 			page_daemon_idle_scan(pageStats);
3027 			sPageDaemonCondition.Wait(kIdleScanWaitInterval, false);
3028 		} else {
3029 			// Not enough free pages. We need to do some real work.
3030 			despairLevel = std::max(despairLevel + 1, (int32)3);
3031 			page_daemon_full_scan(pageStats, despairLevel);
3032 
3033 			// Don't wait after the first full scan, but rather immediately
3034 			// check whether we were successful in freeing enough pages and
3035 			// re-run with increased despair level. The first scan is
3036 			// conservative with respect to moving inactive modified pages to
3037 			// the modified list to avoid thrashing. The second scan, however,
3038 			// will not hold back.
3039 			if (despairLevel > 1)
3040 				snooze(kBusyScanWaitInterval);
3041 		}
3042 	}
3043 
3044 	return B_OK;
3045 }
3046 
3047 
3048 /*!	Returns how many pages could *not* be reserved.
3049 */
3050 static uint32
3051 reserve_pages(uint32 count, int priority, bool dontWait)
3052 {
3053 	int32 dontTouch = kPageReserveForPriority[priority];
3054 
3055 	while (true) {
3056 		count -= reserve_some_pages(count, dontTouch);
3057 		if (count == 0)
3058 			return 0;
3059 
3060 		if (sUnsatisfiedPageReservations == 0) {
3061 			count -= free_cached_pages(count, dontWait);
3062 			if (count == 0)
3063 				return count;
3064 		}
3065 
3066 		if (dontWait)
3067 			return count;
3068 
3069 		// we need to wait for pages to become available
3070 
3071 		MutexLocker pageDeficitLocker(sPageDeficitLock);
3072 
3073 		bool notifyDaemon = sUnsatisfiedPageReservations == 0;
3074 		sUnsatisfiedPageReservations += count;
3075 
3076 		if (atomic_get(&sUnreservedFreePages) > dontTouch) {
3077 			// the situation changed
3078 			sUnsatisfiedPageReservations -= count;
3079 			continue;
3080 		}
3081 
3082 		PageReservationWaiter waiter;
3083 		waiter.dontTouch = dontTouch;
3084 		waiter.missing = count;
3085 		waiter.thread = thread_get_current_thread();
3086 		waiter.threadPriority = waiter.thread->priority;
3087 
3088 		// insert ordered (i.e. after all waiters with higher or equal priority)
3089 		PageReservationWaiter* otherWaiter = NULL;
3090 		for (PageReservationWaiterList::Iterator it
3091 				= sPageReservationWaiters.GetIterator();
3092 			(otherWaiter = it.Next()) != NULL;) {
3093 			if (waiter < *otherWaiter)
3094 				break;
3095 		}
3096 
3097 		sPageReservationWaiters.InsertBefore(otherWaiter, &waiter);
3098 
3099 		thread_prepare_to_block(waiter.thread, 0, THREAD_BLOCK_TYPE_OTHER,
3100 			"waiting for pages");
3101 
3102 		if (notifyDaemon)
3103 			sPageDaemonCondition.WakeUp();
3104 
3105 		pageDeficitLocker.Unlock();
3106 
3107 		low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0);
3108 		thread_block();
3109 
3110 		pageDeficitLocker.Lock();
3111 
3112 		return 0;
3113 	}
3114 }
3115 
3116 
3117 //	#pragma mark - private kernel API
3118 
3119 
3120 /*!	Writes a range of modified pages of a cache to disk.
3121 	You need to hold the VMCache lock when calling this function.
3122 	Note that the cache lock is released in this function.
3123 	\param cache The cache.
3124 	\param firstPage Offset (in page size units) of the first page in the range.
3125 	\param endPage End offset (in page size units) of the page range. The page
3126 		at this offset is not included.
3127 */
3128 status_t
3129 vm_page_write_modified_page_range(struct VMCache* cache, uint32 firstPage,
3130 	uint32 endPage)
3131 {
3132 	static const int32 kMaxPages = 256;
3133 	int32 maxPages = cache->MaxPagesPerWrite();
3134 	if (maxPages < 0 || maxPages > kMaxPages)
3135 		maxPages = kMaxPages;
3136 
3137 	const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
3138 		| HEAP_DONT_LOCK_KERNEL_SPACE;
3139 
3140 	PageWriteWrapper stackWrappersPool[2];
3141 	PageWriteWrapper* stackWrappers[1];
3142 	PageWriteWrapper* wrapperPool
3143 		= new(malloc_flags(allocationFlags)) PageWriteWrapper[maxPages + 1];
3144 	PageWriteWrapper** wrappers
3145 		= new(malloc_flags(allocationFlags)) PageWriteWrapper*[maxPages];
3146 	if (wrapperPool == NULL || wrappers == NULL) {
3147 		// don't fail, just limit our capabilities
3148 		delete[] wrapperPool;
3149 		delete[] wrappers;
3150 		wrapperPool = stackWrappersPool;
3151 		wrappers = stackWrappers;
3152 		maxPages = 1;
3153 	}
3154 
3155 	int32 nextWrapper = 0;
3156 	int32 usedWrappers = 0;
3157 
3158 	PageWriteTransfer transfer;
3159 	bool transferEmpty = true;
3160 
3161 	VMCachePagesTree::Iterator it
3162 		= cache->pages.GetIterator(firstPage, true, true);
3163 
3164 	while (true) {
3165 		vm_page* page = it.Next();
3166 		if (page == NULL || page->cache_offset >= endPage) {
3167 			if (transferEmpty)
3168 				break;
3169 
3170 			page = NULL;
3171 		}
3172 
3173 		if (page != NULL) {
3174 			if (page->busy
3175 				|| (page->State() != PAGE_STATE_MODIFIED
3176 					&& !vm_test_map_modification(page))) {
3177 				page = NULL;
3178 			}
3179 		}
3180 
3181 		PageWriteWrapper* wrapper = NULL;
3182 		if (page != NULL) {
3183 			wrapper = &wrapperPool[nextWrapper++];
3184 			if (nextWrapper > maxPages)
3185 				nextWrapper = 0;
3186 
3187 			DEBUG_PAGE_ACCESS_START(page);
3188 
3189 			wrapper->SetTo(page);
3190 
3191 			if (transferEmpty || transfer.AddPage(page)) {
3192 				if (transferEmpty) {
3193 					transfer.SetTo(NULL, page, maxPages);
3194 					transferEmpty = false;
3195 				}
3196 
3197 				DEBUG_PAGE_ACCESS_END(page);
3198 
3199 				wrappers[usedWrappers++] = wrapper;
3200 				continue;
3201 			}
3202 
3203 			DEBUG_PAGE_ACCESS_END(page);
3204 		}
3205 
3206 		if (transferEmpty)
3207 			continue;
3208 
3209 		cache->Unlock();
3210 		status_t status = transfer.Schedule(0);
3211 		cache->Lock();
3212 
3213 		for (int32 i = 0; i < usedWrappers; i++)
3214 			wrappers[i]->Done(status);
3215 
3216 		usedWrappers = 0;
3217 
3218 		if (page != NULL) {
3219 			transfer.SetTo(NULL, page, maxPages);
3220 			wrappers[usedWrappers++] = wrapper;
3221 		} else
3222 			transferEmpty = true;
3223 	}
3224 
3225 	if (wrapperPool != stackWrappersPool) {
3226 		delete[] wrapperPool;
3227 		delete[] wrappers;
3228 	}
3229 
3230 	return B_OK;
3231 }
3232 
3233 
3234 /*!	You need to hold the VMCache lock when calling this function.
3235 	Note that the cache lock is released in this function.
3236 */
3237 status_t
3238 vm_page_write_modified_pages(VMCache *cache)
3239 {
3240 	return vm_page_write_modified_page_range(cache, 0,
3241 		(cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT);
3242 }
3243 
3244 
3245 /*!	Schedules the page writer to write back the specified \a page.
3246 	Note, however, that it might not do this immediately, and it can well
3247 	take several seconds until the page is actually written out.
3248 */
3249 void
3250 vm_page_schedule_write_page(vm_page *page)
3251 {
3252 	PAGE_ASSERT(page, page->State() == PAGE_STATE_MODIFIED);
3253 
3254 	vm_page_requeue(page, false);
3255 
3256 	sPageWriterCondition.WakeUp();
3257 }
3258 
3259 
3260 /*!	Cache must be locked.
3261 */
3262 void
3263 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage,
3264 	uint32 endPage)
3265 {
3266 	uint32 modified = 0;
3267 	for (VMCachePagesTree::Iterator it
3268 				= cache->pages.GetIterator(firstPage, true, true);
3269 			vm_page *page = it.Next();) {
3270 		if (page->cache_offset >= endPage)
3271 			break;
3272 
3273 		if (!page->busy && page->State() == PAGE_STATE_MODIFIED) {
3274 			DEBUG_PAGE_ACCESS_START(page);
3275 			vm_page_requeue(page, false);
3276 			modified++;
3277 			DEBUG_PAGE_ACCESS_END(page);
3278 		}
3279 	}
3280 
3281 	if (modified > 0)
3282 		sPageWriterCondition.WakeUp();
3283 }
3284 
3285 
3286 void
3287 vm_page_init_num_pages(kernel_args *args)
3288 {
3289 	// calculate the size of memory by looking at the physical_memory_range array
3290 	sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE;
3291 	page_num_t physicalPagesEnd = sPhysicalPageOffset
3292 		+ args->physical_memory_range[0].size / B_PAGE_SIZE;
3293 
3294 	sNonExistingPages = 0;
3295 	sIgnoredPages = args->ignored_physical_memory / B_PAGE_SIZE;
3296 
3297 	for (uint32 i = 1; i < args->num_physical_memory_ranges; i++) {
3298 		page_num_t start = args->physical_memory_range[i].start / B_PAGE_SIZE;
3299 		if (start > physicalPagesEnd)
3300 			sNonExistingPages += start - physicalPagesEnd;
3301 		physicalPagesEnd = start
3302 			+ args->physical_memory_range[i].size / B_PAGE_SIZE;
3303 
3304 #ifdef LIMIT_AVAILABLE_MEMORY
3305 		page_num_t available
3306 			= physicalPagesEnd - sPhysicalPageOffset - sNonExistingPages;
3307 		if (available > LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE)) {
3308 			physicalPagesEnd = sPhysicalPageOffset + sNonExistingPages
3309 				+ LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE);
3310 			break;
3311 		}
3312 #endif
3313 	}
3314 
3315 	TRACE(("first phys page = %#" B_PRIxPHYSADDR ", end %#" B_PRIxPHYSADDR "\n",
3316 		sPhysicalPageOffset, physicalPagesEnd));
3317 
3318 	sNumPages = physicalPagesEnd - sPhysicalPageOffset;
3319 }
3320 
3321 
3322 status_t
3323 vm_page_init(kernel_args *args)
3324 {
3325 	TRACE(("vm_page_init: entry\n"));
3326 
3327 	// init page queues
3328 	sModifiedPageQueue.Init("modified pages queue");
3329 	sInactivePageQueue.Init("inactive pages queue");
3330 	sActivePageQueue.Init("active pages queue");
3331 	sCachedPageQueue.Init("cached pages queue");
3332 	sFreePageQueue.Init("free pages queue");
3333 	sClearPageQueue.Init("clear pages queue");
3334 
3335 	new (&sPageReservationWaiters) PageReservationWaiterList;
3336 
3337 	// map in the new free page table
3338 	sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page),
3339 		~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3340 
3341 	TRACE(("vm_init: putting free_page_table @ %p, # ents %" B_PRIuPHYSADDR
3342 		" (size %#" B_PRIxPHYSADDR ")\n", sPages, sNumPages,
3343 		(phys_addr_t)(sNumPages * sizeof(vm_page))));
3344 
3345 	// initialize the free page table
3346 	for (uint32 i = 0; i < sNumPages; i++) {
3347 		sPages[i].Init(sPhysicalPageOffset + i);
3348 		sFreePageQueue.Append(&sPages[i]);
3349 
3350 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3351 		sPages[i].allocation_tracking_info.Clear();
3352 #endif
3353 	}
3354 
3355 	sUnreservedFreePages = sNumPages;
3356 
3357 	TRACE(("initialized table\n"));
3358 
3359 	// mark the ranges between usable physical memory unused
3360 	phys_addr_t previousEnd = 0;
3361 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3362 		phys_addr_t base = args->physical_memory_range[i].start;
3363 		phys_size_t size = args->physical_memory_range[i].size;
3364 		if (base > previousEnd) {
3365 			mark_page_range_in_use(previousEnd / B_PAGE_SIZE,
3366 				(base - previousEnd) / B_PAGE_SIZE, false);
3367 		}
3368 		previousEnd = base + size;
3369 	}
3370 
3371 	// mark the allocated physical page ranges wired
3372 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3373 		mark_page_range_in_use(
3374 			args->physical_allocated_range[i].start / B_PAGE_SIZE,
3375 			args->physical_allocated_range[i].size / B_PAGE_SIZE, true);
3376 	}
3377 
3378 	// prevent future allocations from the kernel args ranges
3379 	args->num_physical_allocated_ranges = 0;
3380 
3381 	// The target of actually free pages. This must be at least the system
3382 	// reserve, but should be a few more pages, so we don't have to extract
3383 	// a cached page with each allocation.
3384 	sFreePagesTarget = VM_PAGE_RESERVE_USER
3385 		+ std::max((page_num_t)32, (sNumPages - sNonExistingPages) / 1024);
3386 
3387 	// The target of free + cached and inactive pages. On low-memory machines
3388 	// keep things tight. free + cached is the pool of immediately allocatable
3389 	// pages. We want a few inactive pages, so when we're actually paging, we
3390 	// have a reasonably large set of pages to work with.
3391 	if (sUnreservedFreePages < 16 * 1024) {
3392 		sFreeOrCachedPagesTarget = sFreePagesTarget + 128;
3393 		sInactivePagesTarget = sFreePagesTarget / 3;
3394 	} else {
3395 		sFreeOrCachedPagesTarget = 2 * sFreePagesTarget;
3396 		sInactivePagesTarget = sFreePagesTarget / 2;
3397 	}
3398 
3399 	TRACE(("vm_page_init: exit\n"));
3400 
3401 	return B_OK;
3402 }
3403 
3404 
3405 status_t
3406 vm_page_init_post_area(kernel_args *args)
3407 {
3408 	void *dummy;
3409 
3410 	dummy = sPages;
3411 	create_area("page structures", &dummy, B_EXACT_ADDRESS,
3412 		PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED,
3413 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3414 
3415 	add_debugger_command("list_pages", &dump_page_list,
3416 		"List physical pages");
3417 	add_debugger_command("page_stats", &dump_page_stats,
3418 		"Dump statistics about page usage");
3419 	add_debugger_command_etc("page", &dump_page_long,
3420 		"Dump page info",
3421 		"[ \"-p\" | \"-v\" ] [ \"-m\" ] <address>\n"
3422 		"Prints information for the physical page. If neither \"-p\" nor\n"
3423 		"\"-v\" are given, the provided address is interpreted as address of\n"
3424 		"the vm_page data structure for the page in question. If \"-p\" is\n"
3425 		"given, the address is the physical address of the page. If \"-v\" is\n"
3426 		"given, the address is interpreted as virtual address in the current\n"
3427 		"thread's address space and for the page it is mapped to (if any)\n"
3428 		"information are printed. If \"-m\" is specified, the command will\n"
3429 		"search all known address spaces for mappings to that page and print\n"
3430 		"them.\n", 0);
3431 	add_debugger_command("page_queue", &dump_page_queue, "Dump page queue");
3432 	add_debugger_command("find_page", &find_page,
3433 		"Find out which queue a page is actually in");
3434 
3435 #ifdef TRACK_PAGE_USAGE_STATS
3436 	add_debugger_command_etc("page_usage", &dump_page_usage_stats,
3437 		"Dumps statistics about page usage counts",
3438 		"\n"
3439 		"Dumps statistics about page usage counts.\n",
3440 		B_KDEBUG_DONT_PARSE_ARGUMENTS);
3441 #endif
3442 
3443 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3444 	add_debugger_command_etc("page_allocations_per_caller",
3445 		&dump_page_allocations_per_caller,
3446 		"Dump current page allocations summed up per caller",
3447 		"[ -d <caller> ] [ -r ]\n"
3448 		"The current allocations will by summed up by caller (their count)\n"
3449 		"printed in decreasing order by count.\n"
3450 		"If \"-d\" is given, each allocation for caller <caller> is printed\n"
3451 		"including the respective stack trace.\n"
3452 		"If \"-r\" is given, the allocation infos are reset after gathering\n"
3453 		"the information, so the next command invocation will only show the\n"
3454 		"allocations made after the reset.\n", 0);
3455 	add_debugger_command_etc("page_allocation_infos",
3456 		&dump_page_allocation_infos,
3457 		"Dump current page allocations",
3458 		"[ --stacktrace ] [ -p <page number> ] [ --team <team ID> ] "
3459 		"[ --thread <thread ID> ]\n"
3460 		"The current allocations filtered by optional values will be printed.\n"
3461 		"The optional \"-p\" page number filters for a specific page,\n"
3462 		"with \"--team\" and \"--thread\" allocations by specific teams\n"
3463 		"and/or threads can be filtered (these only work if a corresponding\n"
3464 		"tracing entry is still available).\n"
3465 		"If \"--stacktrace\" is given, then stack traces of the allocation\n"
3466 		"callers are printed, where available\n", 0);
3467 #endif
3468 
3469 	return B_OK;
3470 }
3471 
3472 
3473 status_t
3474 vm_page_init_post_thread(kernel_args *args)
3475 {
3476 	new (&sFreePageCondition) ConditionVariable;
3477 
3478 	// create a kernel thread to clear out pages
3479 
3480 	thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber",
3481 		B_LOWEST_ACTIVE_PRIORITY, NULL);
3482 	resume_thread(thread);
3483 
3484 	// start page writer
3485 
3486 	sPageWriterCondition.Init("page writer");
3487 
3488 	thread = spawn_kernel_thread(&page_writer, "page writer",
3489 		B_NORMAL_PRIORITY + 1, NULL);
3490 	resume_thread(thread);
3491 
3492 	// start page daemon
3493 
3494 	sPageDaemonCondition.Init("page daemon");
3495 
3496 	thread = spawn_kernel_thread(&page_daemon, "page daemon",
3497 		B_NORMAL_PRIORITY, NULL);
3498 	resume_thread(thread);
3499 
3500 	return B_OK;
3501 }
3502 
3503 
3504 status_t
3505 vm_mark_page_inuse(page_num_t page)
3506 {
3507 	return vm_mark_page_range_inuse(page, 1);
3508 }
3509 
3510 
3511 status_t
3512 vm_mark_page_range_inuse(page_num_t startPage, page_num_t length)
3513 {
3514 	return mark_page_range_in_use(startPage, length, false);
3515 }
3516 
3517 
3518 /*!	Unreserve pages previously reserved with vm_page_reserve_pages().
3519 */
3520 void
3521 vm_page_unreserve_pages(vm_page_reservation* reservation)
3522 {
3523 	uint32 count = reservation->count;
3524 	reservation->count = 0;
3525 
3526 	if (count == 0)
3527 		return;
3528 
3529 	TA(UnreservePages(count));
3530 
3531 	unreserve_pages(count);
3532 }
3533 
3534 
3535 /*!	With this call, you can reserve a number of free pages in the system.
3536 	They will only be handed out to someone who has actually reserved them.
3537 	This call returns as soon as the number of requested pages has been
3538 	reached.
3539 	The caller must not hold any cache lock or the function might deadlock.
3540 */
3541 void
3542 vm_page_reserve_pages(vm_page_reservation* reservation, uint32 count,
3543 	int priority)
3544 {
3545 	reservation->count = count;
3546 
3547 	if (count == 0)
3548 		return;
3549 
3550 	TA(ReservePages(count));
3551 
3552 	reserve_pages(count, priority, false);
3553 }
3554 
3555 
3556 bool
3557 vm_page_try_reserve_pages(vm_page_reservation* reservation, uint32 count,
3558 	int priority)
3559 {
3560 	if (count == 0) {
3561 		reservation->count = count;
3562 		return true;
3563 	}
3564 
3565 	uint32 remaining = reserve_pages(count, priority, true);
3566 	if (remaining == 0) {
3567 		TA(ReservePages(count));
3568 		reservation->count = count;
3569 		return true;
3570 	}
3571 
3572 	unreserve_pages(count - remaining);
3573 
3574 	return false;
3575 }
3576 
3577 
3578 vm_page *
3579 vm_page_allocate_page(vm_page_reservation* reservation, uint32 flags)
3580 {
3581 	uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3582 	ASSERT(pageState != PAGE_STATE_FREE);
3583 	ASSERT(pageState != PAGE_STATE_CLEAR);
3584 
3585 	ASSERT(reservation->count > 0);
3586 	reservation->count--;
3587 
3588 	VMPageQueue* queue;
3589 	VMPageQueue* otherQueue;
3590 
3591 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3592 		queue = &sClearPageQueue;
3593 		otherQueue = &sFreePageQueue;
3594 	} else {
3595 		queue = &sFreePageQueue;
3596 		otherQueue = &sClearPageQueue;
3597 	}
3598 
3599 	ReadLocker locker(sFreePageQueuesLock);
3600 
3601 	vm_page* page = queue->RemoveHeadUnlocked();
3602 	if (page == NULL) {
3603 		// if the primary queue was empty, grab the page from the
3604 		// secondary queue
3605 		page = otherQueue->RemoveHeadUnlocked();
3606 
3607 		if (page == NULL) {
3608 			// Unlikely, but possible: the page we have reserved has moved
3609 			// between the queues after we checked the first queue. Grab the
3610 			// write locker to make sure this doesn't happen again.
3611 			locker.Unlock();
3612 			WriteLocker writeLocker(sFreePageQueuesLock);
3613 
3614 			page = queue->RemoveHead();
3615 			if (page == NULL)
3616 				otherQueue->RemoveHead();
3617 
3618 			if (page == NULL) {
3619 				panic("Had reserved page, but there is none!");
3620 				return NULL;
3621 			}
3622 
3623 			// downgrade to read lock
3624 			locker.Lock();
3625 		}
3626 	}
3627 
3628 	if (page->CacheRef() != NULL)
3629 		panic("supposed to be free page %p has cache\n", page);
3630 
3631 	DEBUG_PAGE_ACCESS_START(page);
3632 
3633 	int oldPageState = page->State();
3634 	page->SetState(pageState);
3635 	page->busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3636 	page->usage_count = 0;
3637 	page->accessed = false;
3638 	page->modified = false;
3639 
3640 	locker.Unlock();
3641 
3642 	if (pageState < PAGE_STATE_FIRST_UNQUEUED)
3643 		sPageQueues[pageState].AppendUnlocked(page);
3644 
3645 	// clear the page, if we had to take it from the free queue and a clear
3646 	// page was requested
3647 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0 && oldPageState != PAGE_STATE_CLEAR)
3648 		clear_page(page);
3649 
3650 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3651 	page->allocation_tracking_info.Init(
3652 		TA(AllocatePage(page->physical_page_number)));
3653 #else
3654 	TA(AllocatePage(page->physical_page_number));
3655 #endif
3656 
3657 	return page;
3658 }
3659 
3660 
3661 static void
3662 allocate_page_run_cleanup(VMPageQueue::PageList& freePages,
3663 	VMPageQueue::PageList& clearPages)
3664 {
3665 	// Page lists are sorted, so remove tails before prepending to the respective queue.
3666 
3667 	while (vm_page* page = freePages.RemoveTail()) {
3668 		page->busy = false;
3669 		page->SetState(PAGE_STATE_FREE);
3670 		DEBUG_PAGE_ACCESS_END(page);
3671 		sFreePageQueue.PrependUnlocked(page);
3672 	}
3673 
3674 	while (vm_page* page = clearPages.RemoveTail()) {
3675 		page->busy = false;
3676 		page->SetState(PAGE_STATE_CLEAR);
3677 		DEBUG_PAGE_ACCESS_END(page);
3678 		sClearPageQueue.PrependUnlocked(page);
3679 	}
3680 
3681 	sFreePageCondition.NotifyAll();
3682 }
3683 
3684 
3685 /*!	Tries to allocate the a contiguous run of \a length pages starting at
3686 	index \a start.
3687 
3688 	The caller must have write-locked the free/clear page queues. The function
3689 	will unlock regardless of whether it succeeds or fails.
3690 
3691 	If the function fails, it cleans up after itself, i.e. it will free all
3692 	pages it managed to allocate.
3693 
3694 	\param start The start index (into \c sPages) of the run.
3695 	\param length The number of pages to allocate.
3696 	\param flags Page allocation flags. Encodes the state the function shall
3697 		set the allocated pages to, whether the pages shall be marked busy
3698 		(VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared
3699 		(VM_PAGE_ALLOC_CLEAR).
3700 	\param freeClearQueueLocker Locked WriteLocker for the free/clear page
3701 		queues in locked state. Will be unlocked by the function.
3702 	\return The index of the first page that could not be allocated. \a length
3703 		is returned when the function was successful.
3704 */
3705 static page_num_t
3706 allocate_page_run(page_num_t start, page_num_t length, uint32 flags,
3707 	WriteLocker& freeClearQueueLocker)
3708 {
3709 	uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3710 	ASSERT(pageState != PAGE_STATE_FREE);
3711 	ASSERT(pageState != PAGE_STATE_CLEAR);
3712 	ASSERT(start + length <= sNumPages);
3713 
3714 	// Pull the free/clear pages out of their respective queues. Cached pages
3715 	// are allocated later.
3716 	page_num_t cachedPages = 0;
3717 	VMPageQueue::PageList freePages;
3718 	VMPageQueue::PageList clearPages;
3719 	page_num_t i = 0;
3720 	for (; i < length; i++) {
3721 		bool pageAllocated = true;
3722 		bool noPage = false;
3723 		vm_page& page = sPages[start + i];
3724 		switch (page.State()) {
3725 			case PAGE_STATE_CLEAR:
3726 				DEBUG_PAGE_ACCESS_START(&page);
3727 				sClearPageQueue.Remove(&page);
3728 				clearPages.Add(&page);
3729 				break;
3730 			case PAGE_STATE_FREE:
3731 				DEBUG_PAGE_ACCESS_START(&page);
3732 				sFreePageQueue.Remove(&page);
3733 				freePages.Add(&page);
3734 				break;
3735 			case PAGE_STATE_CACHED:
3736 				// We allocate cached pages later.
3737 				cachedPages++;
3738 				pageAllocated = false;
3739 				break;
3740 
3741 			default:
3742 				// Probably a page was cached when our caller checked. Now it's
3743 				// gone and we have to abort.
3744 				noPage = true;
3745 				break;
3746 		}
3747 
3748 		if (noPage)
3749 			break;
3750 
3751 		if (pageAllocated) {
3752 			page.SetState(flags & VM_PAGE_ALLOC_STATE);
3753 			page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3754 			page.usage_count = 0;
3755 			page.accessed = false;
3756 			page.modified = false;
3757 		}
3758 	}
3759 
3760 	if (i < length) {
3761 		// failed to allocate a page -- free all that we've got
3762 		allocate_page_run_cleanup(freePages, clearPages);
3763 		return i;
3764 	}
3765 
3766 	freeClearQueueLocker.Unlock();
3767 
3768 	if (cachedPages > 0) {
3769 		// allocate the pages that weren't free but cached
3770 		page_num_t freedCachedPages = 0;
3771 		page_num_t nextIndex = start;
3772 		vm_page* freePage = freePages.Head();
3773 		vm_page* clearPage = clearPages.Head();
3774 		while (cachedPages > 0) {
3775 			// skip, if we've already got the page
3776 			if (freePage != NULL && size_t(freePage - sPages) == nextIndex) {
3777 				freePage = freePages.GetNext(freePage);
3778 				nextIndex++;
3779 				continue;
3780 			}
3781 			if (clearPage != NULL && size_t(clearPage - sPages) == nextIndex) {
3782 				clearPage = clearPages.GetNext(clearPage);
3783 				nextIndex++;
3784 				continue;
3785 			}
3786 
3787 			// free the page, if it is still cached
3788 			vm_page& page = sPages[nextIndex];
3789 			if (!free_cached_page(&page, false)) {
3790 				// TODO: if the page turns out to have been freed already,
3791 				// there would be no need to fail
3792 				break;
3793 			}
3794 
3795 			page.SetState(flags & VM_PAGE_ALLOC_STATE);
3796 			page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3797 			page.usage_count = 0;
3798 			page.accessed = false;
3799 			page.modified = false;
3800 
3801 			freePages.InsertBefore(freePage, &page);
3802 			freedCachedPages++;
3803 			cachedPages--;
3804 			nextIndex++;
3805 		}
3806 
3807 		// If we have freed cached pages, we need to balance things.
3808 		if (freedCachedPages > 0)
3809 			unreserve_pages(freedCachedPages);
3810 
3811 		if (nextIndex - start < length) {
3812 			// failed to allocate all cached pages -- free all that we've got
3813 			freeClearQueueLocker.Lock();
3814 			allocate_page_run_cleanup(freePages, clearPages);
3815 			freeClearQueueLocker.Unlock();
3816 
3817 			return nextIndex - start;
3818 		}
3819 	}
3820 
3821 	// clear pages, if requested
3822 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3823 		for (VMPageQueue::PageList::Iterator it = freePages.GetIterator();
3824 				vm_page* page = it.Next();) {
3825 			clear_page(page);
3826 		}
3827 	}
3828 
3829 	// add pages to target queue
3830 	if (pageState < PAGE_STATE_FIRST_UNQUEUED) {
3831 		freePages.MoveFrom(&clearPages);
3832 		sPageQueues[pageState].AppendUnlocked(freePages, length);
3833 	}
3834 
3835 	// Note: We don't unreserve the pages since we pulled them out of the
3836 	// free/clear queues without adjusting sUnreservedFreePages.
3837 
3838 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3839 	AbstractTraceEntryWithStackTrace* traceEntry
3840 		= TA(AllocatePageRun(start, length));
3841 
3842 	for (page_num_t i = start; i < start + length; i++)
3843 		sPages[i].allocation_tracking_info.Init(traceEntry);
3844 #else
3845 	TA(AllocatePageRun(start, length));
3846 #endif
3847 
3848 	return length;
3849 }
3850 
3851 
3852 /*! Allocate a physically contiguous range of pages.
3853 
3854 	\param flags Page allocation flags. Encodes the state the function shall
3855 		set the allocated pages to, whether the pages shall be marked busy
3856 		(VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared
3857 		(VM_PAGE_ALLOC_CLEAR).
3858 	\param length The number of contiguous pages to allocate.
3859 	\param restrictions Restrictions to the physical addresses of the page run
3860 		to allocate, including \c low_address, the first acceptable physical
3861 		address where the page run may start, \c high_address, the last
3862 		acceptable physical address where the page run may end (i.e. it must
3863 		hold \code runStartAddress + length <= high_address \endcode),
3864 		\c alignment, the alignment of the page run start address, and
3865 		\c boundary, multiples of which the page run must not cross.
3866 		Values set to \c 0 are ignored.
3867 	\param priority The page reservation priority (as passed to
3868 		vm_page_reserve_pages()).
3869 	\return The first page of the allocated page run on success; \c NULL
3870 		when the allocation failed.
3871 */
3872 vm_page*
3873 vm_page_allocate_page_run(uint32 flags, page_num_t length,
3874 	const physical_address_restrictions* restrictions, int priority)
3875 {
3876 	// compute start and end page index
3877 	page_num_t requestedStart
3878 		= std::max(restrictions->low_address / B_PAGE_SIZE, sPhysicalPageOffset)
3879 			- sPhysicalPageOffset;
3880 	page_num_t start = requestedStart;
3881 	page_num_t end;
3882 	if (restrictions->high_address > 0) {
3883 		end = std::max(restrictions->high_address / B_PAGE_SIZE,
3884 				sPhysicalPageOffset)
3885 			- sPhysicalPageOffset;
3886 		end = std::min(end, sNumPages);
3887 	} else
3888 		end = sNumPages;
3889 
3890 	// compute alignment mask
3891 	page_num_t alignmentMask
3892 		= std::max(restrictions->alignment / B_PAGE_SIZE, (phys_addr_t)1) - 1;
3893 	ASSERT(((alignmentMask + 1) & alignmentMask) == 0);
3894 		// alignment must be a power of 2
3895 
3896 	// compute the boundary mask
3897 	uint32 boundaryMask = 0;
3898 	if (restrictions->boundary != 0) {
3899 		page_num_t boundary = restrictions->boundary / B_PAGE_SIZE;
3900 		// boundary must be a power of two and not less than alignment and
3901 		// length
3902 		ASSERT(((boundary - 1) & boundary) == 0);
3903 		ASSERT(boundary >= alignmentMask + 1);
3904 		ASSERT(boundary >= length);
3905 
3906 		boundaryMask = -boundary;
3907 	}
3908 
3909 	vm_page_reservation reservation;
3910 	vm_page_reserve_pages(&reservation, length, priority);
3911 
3912 	WriteLocker freeClearQueueLocker(sFreePageQueuesLock);
3913 
3914 	// First we try to get a run with free pages only. If that fails, we also
3915 	// consider cached pages. If there are only few free pages and many cached
3916 	// ones, the odds are that we won't find enough contiguous ones, so we skip
3917 	// the first iteration in this case.
3918 	int32 freePages = sUnreservedFreePages;
3919 	int useCached = freePages > 0 && (page_num_t)freePages > 2 * length ? 0 : 1;
3920 
3921 	for (;;) {
3922 		if (alignmentMask != 0 || boundaryMask != 0) {
3923 			page_num_t offsetStart = start + sPhysicalPageOffset;
3924 
3925 			// enforce alignment
3926 			if ((offsetStart & alignmentMask) != 0)
3927 				offsetStart = (offsetStart + alignmentMask) & ~alignmentMask;
3928 
3929 			// enforce boundary
3930 			if (boundaryMask != 0 && ((offsetStart ^ (offsetStart
3931 				+ length - 1)) & boundaryMask) != 0) {
3932 				offsetStart = (offsetStart + length - 1) & boundaryMask;
3933 			}
3934 
3935 			start = offsetStart - sPhysicalPageOffset;
3936 		}
3937 
3938 		if (start + length > end) {
3939 			if (useCached == 0) {
3940 				// The first iteration with free pages only was unsuccessful.
3941 				// Try again also considering cached pages.
3942 				useCached = 1;
3943 				start = requestedStart;
3944 				continue;
3945 			}
3946 
3947 			dprintf("vm_page_allocate_page_run(): Failed to allocate run of "
3948 				"length %" B_PRIuPHYSADDR " (%" B_PRIuPHYSADDR " %"
3949 				B_PRIuPHYSADDR ") in second iteration (align: %" B_PRIuPHYSADDR
3950 				" boundary: %" B_PRIuPHYSADDR ")!\n", length, requestedStart,
3951 				end, restrictions->alignment, restrictions->boundary);
3952 
3953 			freeClearQueueLocker.Unlock();
3954 			vm_page_unreserve_pages(&reservation);
3955 			return NULL;
3956 		}
3957 
3958 		bool foundRun = true;
3959 		page_num_t i;
3960 		for (i = 0; i < length; i++) {
3961 			uint32 pageState = sPages[start + i].State();
3962 			if (pageState != PAGE_STATE_FREE
3963 				&& pageState != PAGE_STATE_CLEAR
3964 				&& (pageState != PAGE_STATE_CACHED || useCached == 0)) {
3965 				foundRun = false;
3966 				break;
3967 			}
3968 		}
3969 
3970 		if (foundRun) {
3971 			i = allocate_page_run(start, length, flags, freeClearQueueLocker);
3972 			if (i == length)
3973 				return &sPages[start];
3974 
3975 			// apparently a cached page couldn't be allocated -- skip it and
3976 			// continue
3977 			freeClearQueueLocker.Lock();
3978 		}
3979 
3980 		start += i + 1;
3981 	}
3982 }
3983 
3984 
3985 vm_page *
3986 vm_page_at_index(int32 index)
3987 {
3988 	return &sPages[index];
3989 }
3990 
3991 
3992 vm_page *
3993 vm_lookup_page(page_num_t pageNumber)
3994 {
3995 	if (pageNumber < sPhysicalPageOffset)
3996 		return NULL;
3997 
3998 	pageNumber -= sPhysicalPageOffset;
3999 	if (pageNumber >= sNumPages)
4000 		return NULL;
4001 
4002 	return &sPages[pageNumber];
4003 }
4004 
4005 
4006 bool
4007 vm_page_is_dummy(struct vm_page *page)
4008 {
4009 	return page < sPages || page >= sPages + sNumPages;
4010 }
4011 
4012 
4013 /*!	Free the page that belonged to a certain cache.
4014 	You can use vm_page_set_state() manually if you prefer, but only
4015 	if the page does not equal PAGE_STATE_MODIFIED.
4016 
4017 	\param cache The cache the page was previously owned by or NULL. The page
4018 		must have been removed from its cache before calling this method in
4019 		either case.
4020 	\param page The page to free.
4021 	\param reservation If not NULL, the page count of the reservation will be
4022 		incremented, thus allowing to allocate another page for the freed one at
4023 		a later time.
4024 */
4025 void
4026 vm_page_free_etc(VMCache* cache, vm_page* page,
4027 	vm_page_reservation* reservation)
4028 {
4029 	PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
4030 		&& page->State() != PAGE_STATE_CLEAR);
4031 
4032 	if (page->State() == PAGE_STATE_MODIFIED && cache->temporary)
4033 		atomic_add(&sModifiedTemporaryPages, -1);
4034 
4035 	free_page(page, false);
4036 	if (reservation == NULL)
4037 		unreserve_pages(1);
4038 }
4039 
4040 
4041 void
4042 vm_page_set_state(vm_page *page, int pageState)
4043 {
4044 	PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
4045 		&& page->State() != PAGE_STATE_CLEAR);
4046 
4047 	if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) {
4048 		free_page(page, pageState == PAGE_STATE_CLEAR);
4049 		unreserve_pages(1);
4050 	} else
4051 		set_page_state(page, pageState);
4052 }
4053 
4054 
4055 /*!	Moves a page to either the tail of the head of its current queue,
4056 	depending on \a tail.
4057 	The page must have a cache and the cache must be locked!
4058 */
4059 void
4060 vm_page_requeue(struct vm_page *page, bool tail)
4061 {
4062 	PAGE_ASSERT(page, page->Cache() != NULL);
4063 	page->Cache()->AssertLocked();
4064 	// DEBUG_PAGE_ACCESS_CHECK(page);
4065 		// TODO: This assertion cannot be satisfied by idle_scan_active_pages()
4066 		// when it requeues busy pages. The reason is that vm_soft_fault()
4067 		// (respectively fault_get_page()) and the file cache keep newly
4068 		// allocated pages accessed while they are reading them from disk. It
4069 		// would probably be better to change that code and reenable this
4070 		// check.
4071 
4072 	VMPageQueue *queue = NULL;
4073 
4074 	switch (page->State()) {
4075 		case PAGE_STATE_ACTIVE:
4076 			queue = &sActivePageQueue;
4077 			break;
4078 		case PAGE_STATE_INACTIVE:
4079 			queue = &sInactivePageQueue;
4080 			break;
4081 		case PAGE_STATE_MODIFIED:
4082 			queue = &sModifiedPageQueue;
4083 			break;
4084 		case PAGE_STATE_CACHED:
4085 			queue = &sCachedPageQueue;
4086 			break;
4087 		case PAGE_STATE_FREE:
4088 		case PAGE_STATE_CLEAR:
4089 			panic("vm_page_requeue() called for free/clear page %p", page);
4090 			return;
4091 		case PAGE_STATE_WIRED:
4092 		case PAGE_STATE_UNUSED:
4093 			return;
4094 		default:
4095 			panic("vm_page_touch: vm_page %p in invalid state %d\n",
4096 				page, page->State());
4097 			break;
4098 	}
4099 
4100 	queue->RequeueUnlocked(page, tail);
4101 }
4102 
4103 
4104 page_num_t
4105 vm_page_num_pages(void)
4106 {
4107 	return sNumPages - sNonExistingPages;
4108 }
4109 
4110 
4111 /*! There is a subtle distinction between the page counts returned by
4112 	this function and vm_page_num_free_pages():
4113 	The latter returns the number of pages that are completely uncommitted,
4114 	whereas this one returns the number of pages that are available for
4115 	use by being reclaimed as well (IOW it factors in things like cache pages
4116 	as available).
4117 */
4118 page_num_t
4119 vm_page_num_available_pages(void)
4120 {
4121 	return vm_available_memory() / B_PAGE_SIZE;
4122 }
4123 
4124 
4125 page_num_t
4126 vm_page_num_free_pages(void)
4127 {
4128 	int32 count = sUnreservedFreePages + sCachedPageQueue.Count();
4129 	return count > 0 ? count : 0;
4130 }
4131 
4132 
4133 page_num_t
4134 vm_page_num_unused_pages(void)
4135 {
4136 	int32 count = sUnreservedFreePages;
4137 	return count > 0 ? count : 0;
4138 }
4139 
4140 
4141 void
4142 vm_page_get_stats(system_info *info)
4143 {
4144 	// Note: there's no locking protecting any of the queues or counters here,
4145 	// so we run the risk of getting bogus values when evaluating them
4146 	// throughout this function. As these stats are for informational purposes
4147 	// only, it is not really worth introducing such locking. Therefore we just
4148 	// ensure that we don't under- or overflow any of the values.
4149 
4150 	// The pages used for the block cache buffers. Those should not be counted
4151 	// as used but as cached pages.
4152 	// TODO: We should subtract the blocks that are in use ATM, since those
4153 	// can't really be freed in a low memory situation.
4154 	page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE;
4155 	info->block_cache_pages = blockCachePages;
4156 
4157 	// Non-temporary modified pages are special as they represent pages that
4158 	// can be written back, so they could be freed if necessary, for us
4159 	// basically making them into cached pages with a higher overhead. The
4160 	// modified queue count is therefore split into temporary and non-temporary
4161 	// counts that are then added to the corresponding number.
4162 	page_num_t modifiedNonTemporaryPages
4163 		= (sModifiedPageQueue.Count() - sModifiedTemporaryPages);
4164 
4165 	info->max_pages = vm_page_num_pages();
4166 	info->cached_pages = sCachedPageQueue.Count() + modifiedNonTemporaryPages
4167 		+ blockCachePages;
4168 
4169 	// max_pages is composed of:
4170 	//	active + inactive + unused + wired + modified + cached + free + clear
4171 	// So taking out the cached (including modified non-temporary), free and
4172 	// clear ones leaves us with all used pages.
4173 	uint32 subtractPages = info->cached_pages + sFreePageQueue.Count()
4174 		+ sClearPageQueue.Count();
4175 	info->used_pages = subtractPages > info->max_pages
4176 		? 0 : info->max_pages - subtractPages;
4177 
4178 	if (info->used_pages + info->cached_pages > info->max_pages) {
4179 		// Something was shuffled around while we were summing up the counts.
4180 		// Make the values sane, preferring the worse case of more used pages.
4181 		info->cached_pages = info->max_pages - info->used_pages;
4182 	}
4183 
4184 	info->page_faults = vm_num_page_faults();
4185 	info->ignored_pages = sIgnoredPages;
4186 
4187 	// TODO: We don't consider pages used for page directories/tables yet.
4188 }
4189 
4190 
4191 /*!	Returns the greatest address within the last page of accessible physical
4192 	memory.
4193 	The value is inclusive, i.e. in case of a 32 bit phys_addr_t 0xffffffff
4194 	means the that the last page ends at exactly 4 GB.
4195 */
4196 phys_addr_t
4197 vm_page_max_address()
4198 {
4199 	return ((phys_addr_t)sPhysicalPageOffset + sNumPages) * B_PAGE_SIZE - 1;
4200 }
4201 
4202 
4203 RANGE_MARKER_FUNCTION_END(vm_page)
4204