xref: /haiku/src/system/kernel/vm/vm_page.cpp (revision 68d37cfb3a755a7270d772b505ee15c8b18aa5e0)
1 /*
2  * Copyright 2010-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <string.h>
12 #include <stdlib.h>
13 
14 #include <algorithm>
15 
16 #include <KernelExport.h>
17 #include <OS.h>
18 
19 #include <AutoDeleter.h>
20 
21 #include <arch/cpu.h>
22 #include <arch/vm_translation_map.h>
23 #include <block_cache.h>
24 #include <boot/kernel_args.h>
25 #include <condition_variable.h>
26 #include <elf.h>
27 #include <heap.h>
28 #include <kernel.h>
29 #include <low_resource_manager.h>
30 #include <thread.h>
31 #include <tracing.h>
32 #include <util/AutoLock.h>
33 #include <vfs.h>
34 #include <vm/vm.h>
35 #include <vm/vm_priv.h>
36 #include <vm/vm_page.h>
37 #include <vm/VMAddressSpace.h>
38 #include <vm/VMArea.h>
39 #include <vm/VMCache.h>
40 
41 #include "IORequest.h"
42 #include "PageCacheLocker.h"
43 #include "VMAnonymousCache.h"
44 #include "VMPageQueue.h"
45 
46 
47 //#define TRACE_VM_PAGE
48 #ifdef TRACE_VM_PAGE
49 #	define TRACE(x) dprintf x
50 #else
51 #	define TRACE(x) ;
52 #endif
53 
54 //#define TRACE_VM_DAEMONS
55 #ifdef TRACE_VM_DAEMONS
56 #define TRACE_DAEMON(x...) dprintf(x)
57 #else
58 #define TRACE_DAEMON(x...) do {} while (false)
59 #endif
60 
61 //#define TRACK_PAGE_USAGE_STATS	1
62 
63 #define PAGE_ASSERT(page, condition)	\
64 	ASSERT_PRINT((condition), "page: %p", (page))
65 
66 #define SCRUB_SIZE 32
67 	// this many pages will be cleared at once in the page scrubber thread
68 
69 #define MAX_PAGE_WRITER_IO_PRIORITY				B_URGENT_DISPLAY_PRIORITY
70 	// maximum I/O priority of the page writer
71 #define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD	10000
72 	// the maximum I/O priority shall be reached when this many pages need to
73 	// be written
74 
75 
76 // The page reserve an allocation of the certain priority must not touch.
77 static const size_t kPageReserveForPriority[] = {
78 	VM_PAGE_RESERVE_USER,		// user
79 	VM_PAGE_RESERVE_SYSTEM,		// system
80 	0							// VIP
81 };
82 
83 // Minimum number of free pages the page daemon will try to achieve.
84 static uint32 sFreePagesTarget;
85 static uint32 sFreeOrCachedPagesTarget;
86 static uint32 sInactivePagesTarget;
87 
88 // Wait interval between page daemon runs.
89 static const bigtime_t kIdleScanWaitInterval = 1000000LL;	// 1 sec
90 static const bigtime_t kBusyScanWaitInterval = 500000LL;	// 0.5 sec
91 
92 // Number of idle runs after which we want to have processed the full active
93 // queue.
94 static const uint32 kIdleRunsForFullQueue = 20;
95 
96 // Maximum limit for the vm_page::usage_count.
97 static const int32 kPageUsageMax = 64;
98 // vm_page::usage_count buff an accessed page receives in a scan.
99 static const int32 kPageUsageAdvance = 3;
100 // vm_page::usage_count debuff an unaccessed page receives in a scan.
101 static const int32 kPageUsageDecline = 1;
102 
103 int32 gMappedPagesCount;
104 
105 static VMPageQueue sPageQueues[PAGE_STATE_COUNT];
106 
107 static VMPageQueue& sFreePageQueue = sPageQueues[PAGE_STATE_FREE];
108 static VMPageQueue& sClearPageQueue = sPageQueues[PAGE_STATE_CLEAR];
109 static VMPageQueue& sModifiedPageQueue = sPageQueues[PAGE_STATE_MODIFIED];
110 static VMPageQueue& sInactivePageQueue = sPageQueues[PAGE_STATE_INACTIVE];
111 static VMPageQueue& sActivePageQueue = sPageQueues[PAGE_STATE_ACTIVE];
112 static VMPageQueue& sCachedPageQueue = sPageQueues[PAGE_STATE_CACHED];
113 
114 static vm_page *sPages;
115 static page_num_t sPhysicalPageOffset;
116 static page_num_t sNumPages;
117 static page_num_t sNonExistingPages;
118 	// pages in the sPages array that aren't backed by physical memory
119 static uint64 sIgnoredPages;
120 	// pages of physical memory ignored by the boot loader (and thus not
121 	// available here)
122 static int32 sUnreservedFreePages;
123 static int32 sUnsatisfiedPageReservations;
124 static int32 sModifiedTemporaryPages;
125 
126 static ConditionVariable sFreePageCondition;
127 static mutex sPageDeficitLock = MUTEX_INITIALIZER("page deficit");
128 
129 // This lock must be used whenever the free or clear page queues are changed.
130 // If you need to work on both queues at the same time, you need to hold a write
131 // lock, otherwise, a read lock suffices (each queue still has a spinlock to
132 // guard against concurrent changes).
133 static rw_lock sFreePageQueuesLock
134 	= RW_LOCK_INITIALIZER("free/clear page queues");
135 
136 #ifdef TRACK_PAGE_USAGE_STATS
137 static page_num_t sPageUsageArrays[512];
138 static page_num_t* sPageUsage = sPageUsageArrays;
139 static page_num_t sPageUsagePageCount;
140 static page_num_t* sNextPageUsage = sPageUsageArrays + 256;
141 static page_num_t sNextPageUsagePageCount;
142 #endif
143 
144 
145 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
146 
147 struct caller_info {
148 	addr_t		caller;
149 	size_t		count;
150 };
151 
152 static const int32 kCallerInfoTableSize = 1024;
153 static caller_info sCallerInfoTable[kCallerInfoTableSize];
154 static int32 sCallerInfoCount = 0;
155 
156 static caller_info* get_caller_info(addr_t caller);
157 
158 
159 RANGE_MARKER_FUNCTION_PROTOTYPES(vm_page)
160 
161 static const addr_t kVMPageCodeAddressRange[] = {
162 	RANGE_MARKER_FUNCTION_ADDRESS_RANGE(vm_page)
163 };
164 
165 #endif
166 
167 
168 RANGE_MARKER_FUNCTION_BEGIN(vm_page)
169 
170 
171 struct page_stats {
172 	int32	totalFreePages;
173 	int32	unsatisfiedReservations;
174 	int32	cachedPages;
175 };
176 
177 
178 struct PageReservationWaiter
179 		: public DoublyLinkedListLinkImpl<PageReservationWaiter> {
180 	Thread*	thread;
181 	uint32	dontTouch;		// reserve not to touch
182 	uint32	missing;		// pages missing for the reservation
183 	int32	threadPriority;
184 
185 	bool operator<(const PageReservationWaiter& other) const
186 	{
187 		// Implies an order by descending VM priority (ascending dontTouch)
188 		// and (secondarily) descending thread priority.
189 		if (dontTouch != other.dontTouch)
190 			return dontTouch < other.dontTouch;
191 		return threadPriority > other.threadPriority;
192 	}
193 };
194 
195 typedef DoublyLinkedList<PageReservationWaiter> PageReservationWaiterList;
196 static PageReservationWaiterList sPageReservationWaiters;
197 
198 
199 struct DaemonCondition {
200 	void Init(const char* name)
201 	{
202 		mutex_init(&fLock, "daemon condition");
203 		fCondition.Init(this, name);
204 		fActivated = false;
205 	}
206 
207 	bool Lock()
208 	{
209 		return mutex_lock(&fLock) == B_OK;
210 	}
211 
212 	void Unlock()
213 	{
214 		mutex_unlock(&fLock);
215 	}
216 
217 	bool Wait(bigtime_t timeout, bool clearActivated)
218 	{
219 		MutexLocker locker(fLock);
220 		if (clearActivated)
221 			fActivated = false;
222 		else if (fActivated)
223 			return true;
224 
225 		ConditionVariableEntry entry;
226 		fCondition.Add(&entry);
227 
228 		locker.Unlock();
229 
230 		return entry.Wait(B_RELATIVE_TIMEOUT, timeout) == B_OK;
231 	}
232 
233 	void WakeUp()
234 	{
235 		if (fActivated)
236 			return;
237 
238 		MutexLocker locker(fLock);
239 		fActivated = true;
240 		fCondition.NotifyOne();
241 	}
242 
243 	void ClearActivated()
244 	{
245 		MutexLocker locker(fLock);
246 		fActivated = false;
247 	}
248 
249 private:
250 	mutex				fLock;
251 	ConditionVariable	fCondition;
252 	bool				fActivated;
253 };
254 
255 
256 static DaemonCondition sPageWriterCondition;
257 static DaemonCondition sPageDaemonCondition;
258 
259 
260 #if PAGE_ALLOCATION_TRACING
261 
262 namespace PageAllocationTracing {
263 
264 class ReservePages : public AbstractTraceEntry {
265 public:
266 	ReservePages(uint32 count)
267 		:
268 		fCount(count)
269 	{
270 		Initialized();
271 	}
272 
273 	virtual void AddDump(TraceOutput& out)
274 	{
275 		out.Print("page reserve:   %" B_PRIu32, fCount);
276 	}
277 
278 private:
279 	uint32		fCount;
280 };
281 
282 
283 class UnreservePages : public AbstractTraceEntry {
284 public:
285 	UnreservePages(uint32 count)
286 		:
287 		fCount(count)
288 	{
289 		Initialized();
290 	}
291 
292 	virtual void AddDump(TraceOutput& out)
293 	{
294 		out.Print("page unreserve: %" B_PRId32, fCount);
295 	}
296 
297 private:
298 	uint32		fCount;
299 };
300 
301 
302 class AllocatePage
303 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
304 public:
305 	AllocatePage(page_num_t pageNumber)
306 		:
307 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
308 		fPageNumber(pageNumber)
309 	{
310 		Initialized();
311 	}
312 
313 	virtual void AddDump(TraceOutput& out)
314 	{
315 		out.Print("page alloc: %#" B_PRIxPHYSADDR, fPageNumber);
316 	}
317 
318 private:
319 	page_num_t	fPageNumber;
320 };
321 
322 
323 class AllocatePageRun
324 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
325 public:
326 	AllocatePageRun(page_num_t startPage, uint32 length)
327 		:
328 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
329 		fStartPage(startPage),
330 		fLength(length)
331 	{
332 		Initialized();
333 	}
334 
335 	virtual void AddDump(TraceOutput& out)
336 	{
337 		out.Print("page alloc run: start %#" B_PRIxPHYSADDR " length: %"
338 			B_PRIu32, fStartPage, fLength);
339 	}
340 
341 private:
342 	page_num_t	fStartPage;
343 	uint32		fLength;
344 };
345 
346 
347 class FreePage
348 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
349 public:
350 	FreePage(page_num_t pageNumber)
351 		:
352 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
353 		fPageNumber(pageNumber)
354 	{
355 		Initialized();
356 	}
357 
358 	virtual void AddDump(TraceOutput& out)
359 	{
360 		out.Print("page free: %#" B_PRIxPHYSADDR, fPageNumber);
361 	}
362 
363 private:
364 	page_num_t	fPageNumber;
365 };
366 
367 
368 class ScrubbingPages : public AbstractTraceEntry {
369 public:
370 	ScrubbingPages(uint32 count)
371 		:
372 		fCount(count)
373 	{
374 		Initialized();
375 	}
376 
377 	virtual void AddDump(TraceOutput& out)
378 	{
379 		out.Print("page scrubbing: %" B_PRId32, fCount);
380 	}
381 
382 private:
383 	uint32		fCount;
384 };
385 
386 
387 class ScrubbedPages : public AbstractTraceEntry {
388 public:
389 	ScrubbedPages(uint32 count)
390 		:
391 		fCount(count)
392 	{
393 		Initialized();
394 	}
395 
396 	virtual void AddDump(TraceOutput& out)
397 	{
398 		out.Print("page scrubbed:  %" B_PRId32, fCount);
399 	}
400 
401 private:
402 	uint32		fCount;
403 };
404 
405 
406 class StolenPage : public AbstractTraceEntry {
407 public:
408 	StolenPage()
409 	{
410 		Initialized();
411 	}
412 
413 	virtual void AddDump(TraceOutput& out)
414 	{
415 		out.Print("page stolen");
416 	}
417 };
418 
419 }	// namespace PageAllocationTracing
420 
421 #	define TA(x)	new(std::nothrow) PageAllocationTracing::x
422 
423 #else
424 #	define TA(x)
425 #endif	// PAGE_ALLOCATION_TRACING
426 
427 
428 #if PAGE_DAEMON_TRACING
429 
430 namespace PageDaemonTracing {
431 
432 class ActivatePage : public AbstractTraceEntry {
433 	public:
434 		ActivatePage(vm_page* page)
435 			:
436 			fCache(page->cache),
437 			fPage(page)
438 		{
439 			Initialized();
440 		}
441 
442 		virtual void AddDump(TraceOutput& out)
443 		{
444 			out.Print("page activated:   %p, cache: %p", fPage, fCache);
445 		}
446 
447 	private:
448 		VMCache*	fCache;
449 		vm_page*	fPage;
450 };
451 
452 
453 class DeactivatePage : public AbstractTraceEntry {
454 	public:
455 		DeactivatePage(vm_page* page)
456 			:
457 			fCache(page->cache),
458 			fPage(page)
459 		{
460 			Initialized();
461 		}
462 
463 		virtual void AddDump(TraceOutput& out)
464 		{
465 			out.Print("page deactivated: %p, cache: %p", fPage, fCache);
466 		}
467 
468 	private:
469 		VMCache*	fCache;
470 		vm_page*	fPage;
471 };
472 
473 
474 class FreedPageSwap : public AbstractTraceEntry {
475 	public:
476 		FreedPageSwap(vm_page* page)
477 			:
478 			fCache(page->cache),
479 			fPage(page)
480 		{
481 			Initialized();
482 		}
483 
484 		virtual void AddDump(TraceOutput& out)
485 		{
486 			out.Print("page swap freed:  %p, cache: %p", fPage, fCache);
487 		}
488 
489 	private:
490 		VMCache*	fCache;
491 		vm_page*	fPage;
492 };
493 
494 }	// namespace PageDaemonTracing
495 
496 #	define TD(x)	new(std::nothrow) PageDaemonTracing::x
497 
498 #else
499 #	define TD(x)
500 #endif	// PAGE_DAEMON_TRACING
501 
502 
503 #if PAGE_WRITER_TRACING
504 
505 namespace PageWriterTracing {
506 
507 class WritePage : public AbstractTraceEntry {
508 	public:
509 		WritePage(vm_page* page)
510 			:
511 			fCache(page->Cache()),
512 			fPage(page)
513 		{
514 			Initialized();
515 		}
516 
517 		virtual void AddDump(TraceOutput& out)
518 		{
519 			out.Print("page write: %p, cache: %p", fPage, fCache);
520 		}
521 
522 	private:
523 		VMCache*	fCache;
524 		vm_page*	fPage;
525 };
526 
527 }	// namespace PageWriterTracing
528 
529 #	define TPW(x)	new(std::nothrow) PageWriterTracing::x
530 
531 #else
532 #	define TPW(x)
533 #endif	// PAGE_WRITER_TRACING
534 
535 
536 #if PAGE_STATE_TRACING
537 
538 namespace PageStateTracing {
539 
540 class SetPageState : public AbstractTraceEntry {
541 	public:
542 		SetPageState(vm_page* page, uint8 newState)
543 			:
544 			fPage(page),
545 			fOldState(page->State()),
546 			fNewState(newState),
547 			fBusy(page->busy),
548 			fWired(page->WiredCount() > 0),
549 			fMapped(!page->mappings.IsEmpty()),
550 			fAccessed(page->accessed),
551 			fModified(page->modified)
552 		{
553 #if PAGE_STATE_TRACING_STACK_TRACE
554 			fStackTrace = capture_tracing_stack_trace(
555 				PAGE_STATE_TRACING_STACK_TRACE, 0, true);
556 				// Don't capture userland stack trace to avoid potential
557 				// deadlocks.
558 #endif
559 			Initialized();
560 		}
561 
562 #if PAGE_STATE_TRACING_STACK_TRACE
563 		virtual void DumpStackTrace(TraceOutput& out)
564 		{
565 			out.PrintStackTrace(fStackTrace);
566 		}
567 #endif
568 
569 		virtual void AddDump(TraceOutput& out)
570 		{
571 			out.Print("page set state: %p (%c%c%c%c%c): %s -> %s", fPage,
572 				fBusy ? 'b' : '-',
573 				fWired ? 'w' : '-',
574 				fMapped ? 'm' : '-',
575 				fAccessed ? 'a' : '-',
576 				fModified ? 'm' : '-',
577 				page_state_to_string(fOldState),
578 				page_state_to_string(fNewState));
579 		}
580 
581 	private:
582 		vm_page*	fPage;
583 #if PAGE_STATE_TRACING_STACK_TRACE
584 		tracing_stack_trace* fStackTrace;
585 #endif
586 		uint8		fOldState;
587 		uint8		fNewState;
588 		bool		fBusy : 1;
589 		bool		fWired : 1;
590 		bool		fMapped : 1;
591 		bool		fAccessed : 1;
592 		bool		fModified : 1;
593 };
594 
595 }	// namespace PageStateTracing
596 
597 #	define TPS(x)	new(std::nothrow) PageStateTracing::x
598 
599 #else
600 #	define TPS(x)
601 #endif	// PAGE_STATE_TRACING
602 
603 
604 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
605 
606 namespace BKernel {
607 
608 class AllocationTrackingCallback {
609 public:
610 	virtual						~AllocationTrackingCallback();
611 
612 	virtual	bool				ProcessTrackingInfo(
613 									AllocationTrackingInfo* info,
614 									page_num_t pageNumber) = 0;
615 };
616 
617 }
618 
619 using BKernel::AllocationTrackingCallback;
620 
621 
622 class AllocationCollectorCallback : public AllocationTrackingCallback {
623 public:
624 	AllocationCollectorCallback(bool resetInfos)
625 		:
626 		fResetInfos(resetInfos)
627 	{
628 	}
629 
630 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
631 		page_num_t pageNumber)
632 	{
633 		if (!info->IsInitialized())
634 			return true;
635 
636 		addr_t caller = 0;
637 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
638 
639 		if (traceEntry != NULL && info->IsTraceEntryValid()) {
640 			caller = tracing_find_caller_in_stack_trace(
641 				traceEntry->StackTrace(), kVMPageCodeAddressRange, 1);
642 		}
643 
644 		caller_info* callerInfo = get_caller_info(caller);
645 		if (callerInfo == NULL) {
646 			kprintf("out of space for caller infos\n");
647 			return false;
648 		}
649 
650 		callerInfo->count++;
651 
652 		if (fResetInfos)
653 			info->Clear();
654 
655 		return true;
656 	}
657 
658 private:
659 	bool	fResetInfos;
660 };
661 
662 
663 class AllocationInfoPrinterCallback : public AllocationTrackingCallback {
664 public:
665 	AllocationInfoPrinterCallback(bool printStackTrace, page_num_t pageFilter,
666 		team_id teamFilter, thread_id threadFilter)
667 		:
668 		fPrintStackTrace(printStackTrace),
669 		fPageFilter(pageFilter),
670 		fTeamFilter(teamFilter),
671 		fThreadFilter(threadFilter)
672 	{
673 	}
674 
675 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
676 		page_num_t pageNumber)
677 	{
678 		if (!info->IsInitialized())
679 			return true;
680 
681 		if (fPageFilter != 0 && pageNumber != fPageFilter)
682 			return true;
683 
684 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
685 		if (traceEntry != NULL && !info->IsTraceEntryValid())
686 			traceEntry = NULL;
687 
688 		if (traceEntry != NULL) {
689 			if (fTeamFilter != -1 && traceEntry->TeamID() != fTeamFilter)
690 				return true;
691 			if (fThreadFilter != -1 && traceEntry->ThreadID() != fThreadFilter)
692 				return true;
693 		} else {
694 			// we need the info if we have filters set
695 			if (fTeamFilter != -1 || fThreadFilter != -1)
696 				return true;
697 		}
698 
699 		kprintf("page number %#" B_PRIxPHYSADDR, pageNumber);
700 
701 		if (traceEntry != NULL) {
702 			kprintf(", team: %" B_PRId32 ", thread %" B_PRId32
703 				", time %" B_PRId64 "\n", traceEntry->TeamID(),
704 				traceEntry->ThreadID(), traceEntry->Time());
705 
706 			if (fPrintStackTrace)
707 				tracing_print_stack_trace(traceEntry->StackTrace());
708 		} else
709 			kprintf("\n");
710 
711 		return true;
712 	}
713 
714 private:
715 	bool		fPrintStackTrace;
716 	page_num_t	fPageFilter;
717 	team_id		fTeamFilter;
718 	thread_id	fThreadFilter;
719 };
720 
721 
722 class AllocationDetailPrinterCallback : public AllocationTrackingCallback {
723 public:
724 	AllocationDetailPrinterCallback(addr_t caller)
725 		:
726 		fCaller(caller)
727 	{
728 	}
729 
730 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
731 		page_num_t pageNumber)
732 	{
733 		if (!info->IsInitialized())
734 			return true;
735 
736 		addr_t caller = 0;
737 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
738 		if (traceEntry != NULL && !info->IsTraceEntryValid())
739 			traceEntry = NULL;
740 
741 		if (traceEntry != NULL) {
742 			caller = tracing_find_caller_in_stack_trace(
743 				traceEntry->StackTrace(), kVMPageCodeAddressRange, 1);
744 		}
745 
746 		if (caller != fCaller)
747 			return true;
748 
749 		kprintf("page %#" B_PRIxPHYSADDR "\n", pageNumber);
750 		if (traceEntry != NULL)
751 			tracing_print_stack_trace(traceEntry->StackTrace());
752 
753 		return true;
754 	}
755 
756 private:
757 	addr_t	fCaller;
758 };
759 
760 #endif	// VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
761 
762 
763 static int
764 find_page(int argc, char **argv)
765 {
766 	struct vm_page *page;
767 	addr_t address;
768 	int32 index = 1;
769 	int i;
770 
771 	struct {
772 		const char*	name;
773 		VMPageQueue*	queue;
774 	} pageQueueInfos[] = {
775 		{ "free",		&sFreePageQueue },
776 		{ "clear",		&sClearPageQueue },
777 		{ "modified",	&sModifiedPageQueue },
778 		{ "active",		&sActivePageQueue },
779 		{ "inactive",	&sInactivePageQueue },
780 		{ "cached",		&sCachedPageQueue },
781 		{ NULL, NULL }
782 	};
783 
784 	if (argc < 2
785 		|| strlen(argv[index]) <= 2
786 		|| argv[index][0] != '0'
787 		|| argv[index][1] != 'x') {
788 		kprintf("usage: find_page <address>\n");
789 		return 0;
790 	}
791 
792 	address = strtoul(argv[index], NULL, 0);
793 	page = (vm_page*)address;
794 
795 	for (i = 0; pageQueueInfos[i].name; i++) {
796 		VMPageQueue::Iterator it = pageQueueInfos[i].queue->GetIterator();
797 		while (vm_page* p = it.Next()) {
798 			if (p == page) {
799 				kprintf("found page %p in queue %p (%s)\n", page,
800 					pageQueueInfos[i].queue, pageQueueInfos[i].name);
801 				return 0;
802 			}
803 		}
804 	}
805 
806 	kprintf("page %p isn't in any queue\n", page);
807 
808 	return 0;
809 }
810 
811 
812 const char *
813 page_state_to_string(int state)
814 {
815 	switch(state) {
816 		case PAGE_STATE_ACTIVE:
817 			return "active";
818 		case PAGE_STATE_INACTIVE:
819 			return "inactive";
820 		case PAGE_STATE_MODIFIED:
821 			return "modified";
822 		case PAGE_STATE_CACHED:
823 			return "cached";
824 		case PAGE_STATE_FREE:
825 			return "free";
826 		case PAGE_STATE_CLEAR:
827 			return "clear";
828 		case PAGE_STATE_WIRED:
829 			return "wired";
830 		case PAGE_STATE_UNUSED:
831 			return "unused";
832 		default:
833 			return "unknown";
834 	}
835 }
836 
837 
838 static int
839 dump_page(int argc, char **argv)
840 {
841 	bool addressIsPointer = true;
842 	bool physical = false;
843 	bool searchMappings = false;
844 	int32 index = 1;
845 
846 	while (index < argc) {
847 		if (argv[index][0] != '-')
848 			break;
849 
850 		if (!strcmp(argv[index], "-p")) {
851 			addressIsPointer = false;
852 			physical = true;
853 		} else if (!strcmp(argv[index], "-v")) {
854 			addressIsPointer = false;
855 		} else if (!strcmp(argv[index], "-m")) {
856 			searchMappings = true;
857 		} else {
858 			print_debugger_command_usage(argv[0]);
859 			return 0;
860 		}
861 
862 		index++;
863 	}
864 
865 	if (index + 1 != argc) {
866 		print_debugger_command_usage(argv[0]);
867 		return 0;
868 	}
869 
870 	uint64 value;
871 	if (!evaluate_debug_expression(argv[index], &value, false))
872 		return 0;
873 
874 	uint64 pageAddress = value;
875 	struct vm_page* page;
876 
877 	if (addressIsPointer) {
878 		page = (struct vm_page *)(addr_t)pageAddress;
879 	} else {
880 		if (!physical) {
881 			VMAddressSpace *addressSpace = VMAddressSpace::Kernel();
882 
883 			if (debug_get_debugged_thread()->team->address_space != NULL)
884 				addressSpace = debug_get_debugged_thread()->team->address_space;
885 
886 			uint32 flags = 0;
887 			phys_addr_t physicalAddress;
888 			if (addressSpace->TranslationMap()->QueryInterrupt(pageAddress,
889 					&physicalAddress, &flags) != B_OK
890 				|| (flags & PAGE_PRESENT) == 0) {
891 				kprintf("Virtual address not mapped to a physical page in this "
892 					"address space.\n");
893 				return 0;
894 			}
895 			pageAddress = physicalAddress;
896 		}
897 
898 		page = vm_lookup_page(pageAddress / B_PAGE_SIZE);
899 	}
900 
901 	kprintf("PAGE: %p\n", page);
902 	kprintf("queue_next,prev: %p, %p\n", page->queue_link.next,
903 		page->queue_link.previous);
904 	kprintf("physical_number: %#" B_PRIxPHYSADDR "\n",
905 		page->physical_page_number);
906 	kprintf("cache:           %p\n", page->Cache());
907 	kprintf("cache_offset:    %" B_PRIuPHYSADDR "\n", page->cache_offset);
908 	kprintf("cache_next:      %p\n", page->cache_next);
909 	kprintf("state:           %s\n", page_state_to_string(page->State()));
910 	kprintf("wired_count:     %d\n", page->WiredCount());
911 	kprintf("usage_count:     %d\n", page->usage_count);
912 	kprintf("busy:            %d\n", page->busy);
913 	kprintf("busy_writing:    %d\n", page->busy_writing);
914 	kprintf("accessed:        %d\n", page->accessed);
915 	kprintf("modified:        %d\n", page->modified);
916 	#if DEBUG_PAGE_QUEUE
917 		kprintf("queue:           %p\n", page->queue);
918 	#endif
919 	#if DEBUG_PAGE_ACCESS
920 		kprintf("accessor:        %" B_PRId32 "\n", page->accessing_thread);
921 	#endif
922 	kprintf("area mappings:\n");
923 
924 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
925 	vm_page_mapping *mapping;
926 	while ((mapping = iterator.Next()) != NULL) {
927 		kprintf("  %p (%" B_PRId32 ")\n", mapping->area, mapping->area->id);
928 		mapping = mapping->page_link.next;
929 	}
930 
931 	if (searchMappings) {
932 		kprintf("all mappings:\n");
933 		VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
934 		while (addressSpace != NULL) {
935 			size_t pageCount = addressSpace->Size() / B_PAGE_SIZE;
936 			for (addr_t address = addressSpace->Base(); pageCount != 0;
937 					address += B_PAGE_SIZE, pageCount--) {
938 				phys_addr_t physicalAddress;
939 				uint32 flags = 0;
940 				if (addressSpace->TranslationMap()->QueryInterrupt(address,
941 						&physicalAddress, &flags) == B_OK
942 					&& (flags & PAGE_PRESENT) != 0
943 					&& physicalAddress / B_PAGE_SIZE
944 						== page->physical_page_number) {
945 					VMArea* area = addressSpace->LookupArea(address);
946 					kprintf("  aspace %" B_PRId32 ", area %" B_PRId32 ": %#"
947 						B_PRIxADDR " (%c%c%s%s)\n", addressSpace->ID(),
948 						area != NULL ? area->id : -1, address,
949 						(flags & B_KERNEL_READ_AREA) != 0 ? 'r' : '-',
950 						(flags & B_KERNEL_WRITE_AREA) != 0 ? 'w' : '-',
951 						(flags & PAGE_MODIFIED) != 0 ? " modified" : "",
952 						(flags & PAGE_ACCESSED) != 0 ? " accessed" : "");
953 				}
954 			}
955 			addressSpace = VMAddressSpace::DebugNext(addressSpace);
956 		}
957 	}
958 
959 	set_debug_variable("_cache", (addr_t)page->Cache());
960 	#if DEBUG_PAGE_ACCESS
961 		set_debug_variable("_accessor", page->accessing_thread);
962 	#endif
963 
964 	return 0;
965 }
966 
967 
968 static int
969 dump_page_queue(int argc, char **argv)
970 {
971 	struct VMPageQueue *queue;
972 
973 	if (argc < 2) {
974 		kprintf("usage: page_queue <address/name> [list]\n");
975 		return 0;
976 	}
977 
978 	if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x')
979 		queue = (VMPageQueue*)strtoul(argv[1], NULL, 16);
980 	else if (!strcmp(argv[1], "free"))
981 		queue = &sFreePageQueue;
982 	else if (!strcmp(argv[1], "clear"))
983 		queue = &sClearPageQueue;
984 	else if (!strcmp(argv[1], "modified"))
985 		queue = &sModifiedPageQueue;
986 	else if (!strcmp(argv[1], "active"))
987 		queue = &sActivePageQueue;
988 	else if (!strcmp(argv[1], "inactive"))
989 		queue = &sInactivePageQueue;
990 	else if (!strcmp(argv[1], "cached"))
991 		queue = &sCachedPageQueue;
992 	else {
993 		kprintf("page_queue: unknown queue \"%s\".\n", argv[1]);
994 		return 0;
995 	}
996 
997 	kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %"
998 		B_PRIuPHYSADDR "\n", queue, queue->Head(), queue->Tail(),
999 		queue->Count());
1000 
1001 	if (argc == 3) {
1002 		struct vm_page *page = queue->Head();
1003 
1004 		kprintf("page        cache       type       state  wired  usage\n");
1005 		for (page_num_t i = 0; page; i++, page = queue->Next(page)) {
1006 			kprintf("%p  %p  %-7s %8s  %5d  %5d\n", page, page->Cache(),
1007 				vm_cache_type_to_string(page->Cache()->type),
1008 				page_state_to_string(page->State()),
1009 				page->WiredCount(), page->usage_count);
1010 		}
1011 	}
1012 	return 0;
1013 }
1014 
1015 
1016 static int
1017 dump_page_stats(int argc, char **argv)
1018 {
1019 	page_num_t swappableModified = 0;
1020 	page_num_t swappableModifiedInactive = 0;
1021 
1022 	size_t counter[8];
1023 	size_t busyCounter[8];
1024 	memset(counter, 0, sizeof(counter));
1025 	memset(busyCounter, 0, sizeof(busyCounter));
1026 
1027 	struct page_run {
1028 		page_num_t	start;
1029 		page_num_t	end;
1030 
1031 		page_num_t Length() const	{ return end - start; }
1032 	};
1033 
1034 	page_run currentFreeRun = { 0, 0 };
1035 	page_run currentCachedRun = { 0, 0 };
1036 	page_run longestFreeRun = { 0, 0 };
1037 	page_run longestCachedRun = { 0, 0 };
1038 
1039 	for (page_num_t i = 0; i < sNumPages; i++) {
1040 		if (sPages[i].State() > 7) {
1041 			panic("page %" B_PRIuPHYSADDR " at %p has invalid state!\n", i,
1042 				&sPages[i]);
1043 		}
1044 
1045 		uint32 pageState = sPages[i].State();
1046 
1047 		counter[pageState]++;
1048 		if (sPages[i].busy)
1049 			busyCounter[pageState]++;
1050 
1051 		if (pageState == PAGE_STATE_MODIFIED
1052 			&& sPages[i].Cache() != NULL
1053 			&& sPages[i].Cache()->temporary && sPages[i].WiredCount() == 0) {
1054 			swappableModified++;
1055 			if (sPages[i].usage_count == 0)
1056 				swappableModifiedInactive++;
1057 		}
1058 
1059 		// track free and cached pages runs
1060 		if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) {
1061 			currentFreeRun.end = i + 1;
1062 			currentCachedRun.end = i + 1;
1063 		} else {
1064 			if (currentFreeRun.Length() > longestFreeRun.Length())
1065 				longestFreeRun = currentFreeRun;
1066 			currentFreeRun.start = currentFreeRun.end = i + 1;
1067 
1068 			if (pageState == PAGE_STATE_CACHED) {
1069 				currentCachedRun.end = i + 1;
1070 			} else {
1071 				if (currentCachedRun.Length() > longestCachedRun.Length())
1072 					longestCachedRun = currentCachedRun;
1073 				currentCachedRun.start = currentCachedRun.end = i + 1;
1074 			}
1075 		}
1076 	}
1077 
1078 	kprintf("page stats:\n");
1079 	kprintf("total: %" B_PRIuPHYSADDR "\n", sNumPages);
1080 
1081 	kprintf("active: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1082 		counter[PAGE_STATE_ACTIVE], busyCounter[PAGE_STATE_ACTIVE]);
1083 	kprintf("inactive: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1084 		counter[PAGE_STATE_INACTIVE], busyCounter[PAGE_STATE_INACTIVE]);
1085 	kprintf("cached: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1086 		counter[PAGE_STATE_CACHED], busyCounter[PAGE_STATE_CACHED]);
1087 	kprintf("unused: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1088 		counter[PAGE_STATE_UNUSED], busyCounter[PAGE_STATE_UNUSED]);
1089 	kprintf("wired: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1090 		counter[PAGE_STATE_WIRED], busyCounter[PAGE_STATE_WIRED]);
1091 	kprintf("modified: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1092 		counter[PAGE_STATE_MODIFIED], busyCounter[PAGE_STATE_MODIFIED]);
1093 	kprintf("free: %" B_PRIuSIZE "\n", counter[PAGE_STATE_FREE]);
1094 	kprintf("clear: %" B_PRIuSIZE "\n", counter[PAGE_STATE_CLEAR]);
1095 
1096 	kprintf("unreserved free pages: %" B_PRId32 "\n", sUnreservedFreePages);
1097 	kprintf("unsatisfied page reservations: %" B_PRId32 "\n",
1098 		sUnsatisfiedPageReservations);
1099 	kprintf("mapped pages: %" B_PRId32 "\n", gMappedPagesCount);
1100 	kprintf("longest free pages run: %" B_PRIuPHYSADDR " pages (at %"
1101 		B_PRIuPHYSADDR ")\n", longestFreeRun.Length(),
1102 		sPages[longestFreeRun.start].physical_page_number);
1103 	kprintf("longest free/cached pages run: %" B_PRIuPHYSADDR " pages (at %"
1104 		B_PRIuPHYSADDR ")\n", longestCachedRun.Length(),
1105 		sPages[longestCachedRun.start].physical_page_number);
1106 
1107 	kprintf("waiting threads:\n");
1108 	for (PageReservationWaiterList::Iterator it
1109 			= sPageReservationWaiters.GetIterator();
1110 		PageReservationWaiter* waiter = it.Next();) {
1111 		kprintf("  %6" B_PRId32 ": missing: %6" B_PRIu32
1112 			", don't touch: %6" B_PRIu32 "\n", waiter->thread->id,
1113 			waiter->missing, waiter->dontTouch);
1114 	}
1115 
1116 	kprintf("\nfree queue: %p, count = %" B_PRIuPHYSADDR "\n", &sFreePageQueue,
1117 		sFreePageQueue.Count());
1118 	kprintf("clear queue: %p, count = %" B_PRIuPHYSADDR "\n", &sClearPageQueue,
1119 		sClearPageQueue.Count());
1120 	kprintf("modified queue: %p, count = %" B_PRIuPHYSADDR " (%" B_PRId32
1121 		" temporary, %" B_PRIuPHYSADDR " swappable, " "inactive: %"
1122 		B_PRIuPHYSADDR ")\n", &sModifiedPageQueue, sModifiedPageQueue.Count(),
1123 		sModifiedTemporaryPages, swappableModified, swappableModifiedInactive);
1124 	kprintf("active queue: %p, count = %" B_PRIuPHYSADDR "\n",
1125 		&sActivePageQueue, sActivePageQueue.Count());
1126 	kprintf("inactive queue: %p, count = %" B_PRIuPHYSADDR "\n",
1127 		&sInactivePageQueue, sInactivePageQueue.Count());
1128 	kprintf("cached queue: %p, count = %" B_PRIuPHYSADDR "\n",
1129 		&sCachedPageQueue, sCachedPageQueue.Count());
1130 	return 0;
1131 }
1132 
1133 
1134 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1135 
1136 static caller_info*
1137 get_caller_info(addr_t caller)
1138 {
1139 	// find the caller info
1140 	for (int32 i = 0; i < sCallerInfoCount; i++) {
1141 		if (caller == sCallerInfoTable[i].caller)
1142 			return &sCallerInfoTable[i];
1143 	}
1144 
1145 	// not found, add a new entry, if there are free slots
1146 	if (sCallerInfoCount >= kCallerInfoTableSize)
1147 		return NULL;
1148 
1149 	caller_info* info = &sCallerInfoTable[sCallerInfoCount++];
1150 	info->caller = caller;
1151 	info->count = 0;
1152 
1153 	return info;
1154 }
1155 
1156 
1157 static int
1158 caller_info_compare_count(const void* _a, const void* _b)
1159 {
1160 	const caller_info* a = (const caller_info*)_a;
1161 	const caller_info* b = (const caller_info*)_b;
1162 	return (int)(b->count - a->count);
1163 }
1164 
1165 
1166 static int
1167 dump_page_allocations_per_caller(int argc, char** argv)
1168 {
1169 	bool resetAllocationInfos = false;
1170 	bool printDetails = false;
1171 	addr_t caller = 0;
1172 
1173 	for (int32 i = 1; i < argc; i++) {
1174 		if (strcmp(argv[i], "-d") == 0) {
1175 			uint64 callerAddress;
1176 			if (++i >= argc
1177 				|| !evaluate_debug_expression(argv[i], &callerAddress, true)) {
1178 				print_debugger_command_usage(argv[0]);
1179 				return 0;
1180 			}
1181 
1182 			caller = callerAddress;
1183 			printDetails = true;
1184 		} else if (strcmp(argv[i], "-r") == 0) {
1185 			resetAllocationInfos = true;
1186 		} else {
1187 			print_debugger_command_usage(argv[0]);
1188 			return 0;
1189 		}
1190 	}
1191 
1192 	sCallerInfoCount = 0;
1193 
1194 	AllocationCollectorCallback collectorCallback(resetAllocationInfos);
1195 	AllocationDetailPrinterCallback detailsCallback(caller);
1196 	AllocationTrackingCallback& callback = printDetails
1197 		? (AllocationTrackingCallback&)detailsCallback
1198 		: (AllocationTrackingCallback&)collectorCallback;
1199 
1200 	for (page_num_t i = 0; i < sNumPages; i++)
1201 		callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i);
1202 
1203 	if (printDetails)
1204 		return 0;
1205 
1206 	// sort the array
1207 	qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info),
1208 		&caller_info_compare_count);
1209 
1210 	kprintf("%" B_PRId32 " different callers\n\n", sCallerInfoCount);
1211 
1212 	size_t totalAllocationCount = 0;
1213 
1214 	kprintf("     count      caller\n");
1215 	kprintf("----------------------------------\n");
1216 	for (int32 i = 0; i < sCallerInfoCount; i++) {
1217 		caller_info& info = sCallerInfoTable[i];
1218 		kprintf("%10" B_PRIuSIZE "  %p", info.count, (void*)info.caller);
1219 
1220 		const char* symbol;
1221 		const char* imageName;
1222 		bool exactMatch;
1223 		addr_t baseAddress;
1224 
1225 		if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol,
1226 				&imageName, &exactMatch) == B_OK) {
1227 			kprintf("  %s + %#" B_PRIxADDR " (%s)%s\n", symbol,
1228 				info.caller - baseAddress, imageName,
1229 				exactMatch ? "" : " (nearest)");
1230 		} else
1231 			kprintf("\n");
1232 
1233 		totalAllocationCount += info.count;
1234 	}
1235 
1236 	kprintf("\ntotal page allocations: %" B_PRIuSIZE "\n",
1237 		totalAllocationCount);
1238 
1239 	return 0;
1240 }
1241 
1242 
1243 static int
1244 dump_page_allocation_infos(int argc, char** argv)
1245 {
1246 	page_num_t pageFilter = 0;
1247 	team_id teamFilter = -1;
1248 	thread_id threadFilter = -1;
1249 	bool printStackTraces = false;
1250 
1251 	for (int32 i = 1; i < argc; i++) {
1252 		if (strcmp(argv[i], "--stacktrace") == 0)
1253 			printStackTraces = true;
1254 		else if (strcmp(argv[i], "-p") == 0) {
1255 			uint64 pageNumber;
1256 			if (++i >= argc
1257 				|| !evaluate_debug_expression(argv[i], &pageNumber, true)) {
1258 				print_debugger_command_usage(argv[0]);
1259 				return 0;
1260 			}
1261 
1262 			pageFilter = pageNumber;
1263 		} else if (strcmp(argv[i], "--team") == 0) {
1264 			uint64 team;
1265 			if (++i >= argc
1266 				|| !evaluate_debug_expression(argv[i], &team, true)) {
1267 				print_debugger_command_usage(argv[0]);
1268 				return 0;
1269 			}
1270 
1271 			teamFilter = team;
1272 		} else if (strcmp(argv[i], "--thread") == 0) {
1273 			uint64 thread;
1274 			if (++i >= argc
1275 				|| !evaluate_debug_expression(argv[i], &thread, true)) {
1276 				print_debugger_command_usage(argv[0]);
1277 				return 0;
1278 			}
1279 
1280 			threadFilter = thread;
1281 		} else {
1282 			print_debugger_command_usage(argv[0]);
1283 			return 0;
1284 		}
1285 	}
1286 
1287 	AllocationInfoPrinterCallback callback(printStackTraces, pageFilter,
1288 		teamFilter, threadFilter);
1289 
1290 	for (page_num_t i = 0; i < sNumPages; i++)
1291 		callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i);
1292 
1293 	return 0;
1294 }
1295 
1296 #endif	// VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1297 
1298 
1299 #ifdef TRACK_PAGE_USAGE_STATS
1300 
1301 static void
1302 track_page_usage(vm_page* page)
1303 {
1304 	if (page->WiredCount() == 0) {
1305 		sNextPageUsage[(int32)page->usage_count + 128]++;
1306 		sNextPageUsagePageCount++;
1307 	}
1308 }
1309 
1310 
1311 static void
1312 update_page_usage_stats()
1313 {
1314 	std::swap(sPageUsage, sNextPageUsage);
1315 	sPageUsagePageCount = sNextPageUsagePageCount;
1316 
1317 	memset(sNextPageUsage, 0, sizeof(page_num_t) * 256);
1318 	sNextPageUsagePageCount = 0;
1319 
1320 	// compute average
1321 	if (sPageUsagePageCount > 0) {
1322 		int64 sum = 0;
1323 		for (int32 i = 0; i < 256; i++)
1324 			sum += (int64)sPageUsage[i] * (i - 128);
1325 
1326 		TRACE_DAEMON("average page usage: %f (%lu pages)\n",
1327 			(float)sum / sPageUsagePageCount, sPageUsagePageCount);
1328 	}
1329 }
1330 
1331 
1332 static int
1333 dump_page_usage_stats(int argc, char** argv)
1334 {
1335 	kprintf("distribution of page usage counts (%lu pages):",
1336 		sPageUsagePageCount);
1337 
1338 	int64 sum = 0;
1339 	for (int32 i = 0; i < 256; i++) {
1340 		if (i % 8 == 0)
1341 			kprintf("\n%4ld:", i - 128);
1342 
1343 		int64 count = sPageUsage[i];
1344 		sum += count * (i - 128);
1345 
1346 		kprintf("  %9llu", count);
1347 	}
1348 
1349 	kprintf("\n\n");
1350 
1351 	kprintf("average usage count: %f\n",
1352 		sPageUsagePageCount > 0 ? (float)sum / sPageUsagePageCount : 0);
1353 
1354 	return 0;
1355 }
1356 
1357 #endif	// TRACK_PAGE_USAGE_STATS
1358 
1359 
1360 // #pragma mark - vm_page
1361 
1362 
1363 inline void
1364 vm_page::InitState(uint8 newState)
1365 {
1366 	state = newState;
1367 }
1368 
1369 
1370 inline void
1371 vm_page::SetState(uint8 newState)
1372 {
1373 	TPS(SetPageState(this, newState));
1374 
1375 	state = newState;
1376 }
1377 
1378 
1379 // #pragma mark -
1380 
1381 
1382 static void
1383 get_page_stats(page_stats& _pageStats)
1384 {
1385 	_pageStats.totalFreePages = sUnreservedFreePages;
1386 	_pageStats.cachedPages = sCachedPageQueue.Count();
1387 	_pageStats.unsatisfiedReservations = sUnsatisfiedPageReservations;
1388 	// TODO: We don't get an actual snapshot here!
1389 }
1390 
1391 
1392 static bool
1393 do_active_paging(const page_stats& pageStats)
1394 {
1395 	return pageStats.totalFreePages + pageStats.cachedPages
1396 		< pageStats.unsatisfiedReservations
1397 			+ (int32)sFreeOrCachedPagesTarget;
1398 }
1399 
1400 
1401 /*!	Reserves as many pages as possible from \c sUnreservedFreePages up to
1402 	\a count. Doesn't touch the last \a dontTouch pages of
1403 	\c sUnreservedFreePages, though.
1404 	\return The number of actually reserved pages.
1405 */
1406 static uint32
1407 reserve_some_pages(uint32 count, uint32 dontTouch)
1408 {
1409 	while (true) {
1410 		int32 freePages = atomic_get(&sUnreservedFreePages);
1411 		if (freePages <= (int32)dontTouch)
1412 			return 0;
1413 
1414 		int32 toReserve = std::min(count, freePages - dontTouch);
1415 		if (atomic_test_and_set(&sUnreservedFreePages,
1416 					freePages - toReserve, freePages)
1417 				== freePages) {
1418 			return toReserve;
1419 		}
1420 
1421 		// the count changed in the meantime -- retry
1422 	}
1423 }
1424 
1425 
1426 static void
1427 wake_up_page_reservation_waiters()
1428 {
1429 	MutexLocker pageDeficitLocker(sPageDeficitLock);
1430 
1431 	// TODO: If this is a low priority thread, we might want to disable
1432 	// interrupts or otherwise ensure that we aren't unscheduled. Otherwise
1433 	// high priority threads wait be kept waiting while a medium priority thread
1434 	// prevents us from running.
1435 
1436 	while (PageReservationWaiter* waiter = sPageReservationWaiters.Head()) {
1437 		int32 reserved = reserve_some_pages(waiter->missing,
1438 			waiter->dontTouch);
1439 		if (reserved == 0)
1440 			return;
1441 
1442 		atomic_add(&sUnsatisfiedPageReservations, -reserved);
1443 		waiter->missing -= reserved;
1444 
1445 		if (waiter->missing > 0)
1446 			return;
1447 
1448 		sPageReservationWaiters.Remove(waiter);
1449 
1450 		thread_unblock(waiter->thread, B_OK);
1451 	}
1452 }
1453 
1454 
1455 static inline void
1456 unreserve_pages(uint32 count)
1457 {
1458 	atomic_add(&sUnreservedFreePages, count);
1459 	if (atomic_get(&sUnsatisfiedPageReservations) != 0)
1460 		wake_up_page_reservation_waiters();
1461 }
1462 
1463 
1464 static void
1465 free_page(vm_page* page, bool clear)
1466 {
1467 	DEBUG_PAGE_ACCESS_CHECK(page);
1468 
1469 	PAGE_ASSERT(page, !page->IsMapped());
1470 
1471 	VMPageQueue* fromQueue;
1472 
1473 	switch (page->State()) {
1474 		case PAGE_STATE_ACTIVE:
1475 			fromQueue = &sActivePageQueue;
1476 			break;
1477 		case PAGE_STATE_INACTIVE:
1478 			fromQueue = &sInactivePageQueue;
1479 			break;
1480 		case PAGE_STATE_MODIFIED:
1481 			fromQueue = &sModifiedPageQueue;
1482 			break;
1483 		case PAGE_STATE_CACHED:
1484 			fromQueue = &sCachedPageQueue;
1485 			break;
1486 		case PAGE_STATE_FREE:
1487 		case PAGE_STATE_CLEAR:
1488 			panic("free_page(): page %p already free", page);
1489 			return;
1490 		case PAGE_STATE_WIRED:
1491 		case PAGE_STATE_UNUSED:
1492 			fromQueue = NULL;
1493 			break;
1494 		default:
1495 			panic("free_page(): page %p in invalid state %d",
1496 				page, page->State());
1497 			return;
1498 	}
1499 
1500 	if (page->CacheRef() != NULL)
1501 		panic("to be freed page %p has cache", page);
1502 	if (page->IsMapped())
1503 		panic("to be freed page %p has mappings", page);
1504 
1505 	if (fromQueue != NULL)
1506 		fromQueue->RemoveUnlocked(page);
1507 
1508 	TA(FreePage(page->physical_page_number));
1509 
1510 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1511 	page->allocation_tracking_info.Clear();
1512 #endif
1513 
1514 	ReadLocker locker(sFreePageQueuesLock);
1515 
1516 	DEBUG_PAGE_ACCESS_END(page);
1517 
1518 	if (clear) {
1519 		page->SetState(PAGE_STATE_CLEAR);
1520 		sClearPageQueue.PrependUnlocked(page);
1521 	} else {
1522 		page->SetState(PAGE_STATE_FREE);
1523 		sFreePageQueue.PrependUnlocked(page);
1524 		sFreePageCondition.NotifyAll();
1525 	}
1526 
1527 	locker.Unlock();
1528 }
1529 
1530 
1531 /*!	The caller must make sure that no-one else tries to change the page's state
1532 	while the function is called. If the page has a cache, this can be done by
1533 	locking the cache.
1534 */
1535 static void
1536 set_page_state(vm_page *page, int pageState)
1537 {
1538 	DEBUG_PAGE_ACCESS_CHECK(page);
1539 
1540 	if (pageState == page->State())
1541 		return;
1542 
1543 	VMPageQueue* fromQueue;
1544 
1545 	switch (page->State()) {
1546 		case PAGE_STATE_ACTIVE:
1547 			fromQueue = &sActivePageQueue;
1548 			break;
1549 		case PAGE_STATE_INACTIVE:
1550 			fromQueue = &sInactivePageQueue;
1551 			break;
1552 		case PAGE_STATE_MODIFIED:
1553 			fromQueue = &sModifiedPageQueue;
1554 			break;
1555 		case PAGE_STATE_CACHED:
1556 			fromQueue = &sCachedPageQueue;
1557 			break;
1558 		case PAGE_STATE_FREE:
1559 		case PAGE_STATE_CLEAR:
1560 			panic("set_page_state(): page %p is free/clear", page);
1561 			return;
1562 		case PAGE_STATE_WIRED:
1563 		case PAGE_STATE_UNUSED:
1564 			fromQueue = NULL;
1565 			break;
1566 		default:
1567 			panic("set_page_state(): page %p in invalid state %d",
1568 				page, page->State());
1569 			return;
1570 	}
1571 
1572 	VMPageQueue* toQueue;
1573 
1574 	switch (pageState) {
1575 		case PAGE_STATE_ACTIVE:
1576 			toQueue = &sActivePageQueue;
1577 			break;
1578 		case PAGE_STATE_INACTIVE:
1579 			toQueue = &sInactivePageQueue;
1580 			break;
1581 		case PAGE_STATE_MODIFIED:
1582 			toQueue = &sModifiedPageQueue;
1583 			break;
1584 		case PAGE_STATE_CACHED:
1585 			PAGE_ASSERT(page, !page->IsMapped());
1586 			PAGE_ASSERT(page, !page->modified);
1587 			toQueue = &sCachedPageQueue;
1588 			break;
1589 		case PAGE_STATE_FREE:
1590 		case PAGE_STATE_CLEAR:
1591 			panic("set_page_state(): target state is free/clear");
1592 			return;
1593 		case PAGE_STATE_WIRED:
1594 		case PAGE_STATE_UNUSED:
1595 			toQueue = NULL;
1596 			break;
1597 		default:
1598 			panic("set_page_state(): invalid target state %d", pageState);
1599 			return;
1600 	}
1601 
1602 	VMCache* cache = page->Cache();
1603 	if (cache != NULL && cache->temporary) {
1604 		if (pageState == PAGE_STATE_MODIFIED)
1605 			atomic_add(&sModifiedTemporaryPages, 1);
1606 		else if (page->State() == PAGE_STATE_MODIFIED)
1607 			atomic_add(&sModifiedTemporaryPages, -1);
1608 	}
1609 
1610 	// move the page
1611 	if (toQueue == fromQueue) {
1612 		// Note: Theoretically we are required to lock when changing the page
1613 		// state, even if we don't change the queue. We actually don't have to
1614 		// do this, though, since only for the active queue there are different
1615 		// page states and active pages have a cache that must be locked at
1616 		// this point. So we rely on the fact that everyone must lock the cache
1617 		// before trying to change/interpret the page state.
1618 		PAGE_ASSERT(page, cache != NULL);
1619 		cache->AssertLocked();
1620 		page->SetState(pageState);
1621 	} else {
1622 		if (fromQueue != NULL)
1623 			fromQueue->RemoveUnlocked(page);
1624 
1625 		page->SetState(pageState);
1626 
1627 		if (toQueue != NULL)
1628 			toQueue->AppendUnlocked(page);
1629 	}
1630 }
1631 
1632 
1633 /*! Moves a previously modified page into a now appropriate queue.
1634 	The page queues must not be locked.
1635 */
1636 static void
1637 move_page_to_appropriate_queue(vm_page *page)
1638 {
1639 	DEBUG_PAGE_ACCESS_CHECK(page);
1640 
1641 	// Note, this logic must be in sync with what the page daemon does.
1642 	int32 state;
1643 	if (page->IsMapped())
1644 		state = PAGE_STATE_ACTIVE;
1645 	else if (page->modified)
1646 		state = PAGE_STATE_MODIFIED;
1647 	else
1648 		state = PAGE_STATE_CACHED;
1649 
1650 // TODO: If free + cached pages are low, we might directly want to free the
1651 // page.
1652 	set_page_state(page, state);
1653 }
1654 
1655 
1656 static void
1657 clear_page(struct vm_page *page)
1658 {
1659 	vm_memset_physical(page->physical_page_number << PAGE_SHIFT, 0,
1660 		B_PAGE_SIZE);
1661 }
1662 
1663 
1664 static status_t
1665 mark_page_range_in_use(page_num_t startPage, page_num_t length, bool wired)
1666 {
1667 	TRACE(("mark_page_range_in_use: start %#" B_PRIxPHYSADDR ", len %#"
1668 		B_PRIxPHYSADDR "\n", startPage, length));
1669 
1670 	if (sPhysicalPageOffset > startPage) {
1671 		dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR
1672 			"): start page is before free list\n", startPage, length);
1673 		if (sPhysicalPageOffset - startPage >= length)
1674 			return B_OK;
1675 		length -= sPhysicalPageOffset - startPage;
1676 		startPage = sPhysicalPageOffset;
1677 	}
1678 
1679 	startPage -= sPhysicalPageOffset;
1680 
1681 	if (startPage + length > sNumPages) {
1682 		dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR
1683 			"): range would extend past free list\n", startPage, length);
1684 		if (startPage >= sNumPages)
1685 			return B_OK;
1686 		length = sNumPages - startPage;
1687 	}
1688 
1689 	WriteLocker locker(sFreePageQueuesLock);
1690 
1691 	for (page_num_t i = 0; i < length; i++) {
1692 		vm_page *page = &sPages[startPage + i];
1693 		switch (page->State()) {
1694 			case PAGE_STATE_FREE:
1695 			case PAGE_STATE_CLEAR:
1696 			{
1697 // TODO: This violates the page reservation policy, since we remove pages from
1698 // the free/clear queues without having reserved them before. This should happen
1699 // in the early boot process only, though.
1700 				DEBUG_PAGE_ACCESS_START(page);
1701 				VMPageQueue& queue = page->State() == PAGE_STATE_FREE
1702 					? sFreePageQueue : sClearPageQueue;
1703 				queue.Remove(page);
1704 				page->SetState(wired ? PAGE_STATE_WIRED : PAGE_STATE_UNUSED);
1705 				page->busy = false;
1706 				atomic_add(&sUnreservedFreePages, -1);
1707 				DEBUG_PAGE_ACCESS_END(page);
1708 				break;
1709 			}
1710 			case PAGE_STATE_WIRED:
1711 			case PAGE_STATE_UNUSED:
1712 				break;
1713 			case PAGE_STATE_ACTIVE:
1714 			case PAGE_STATE_INACTIVE:
1715 			case PAGE_STATE_MODIFIED:
1716 			case PAGE_STATE_CACHED:
1717 			default:
1718 				// uh
1719 				dprintf("mark_page_range_in_use: page %#" B_PRIxPHYSADDR
1720 					" in non-free state %d!\n", startPage + i, page->State());
1721 				break;
1722 		}
1723 	}
1724 
1725 	return B_OK;
1726 }
1727 
1728 
1729 /*!
1730 	This is a background thread that wakes up when its condition is notified
1731 	and moves some pages from the free queue over to the clear queue.
1732 	Given enough time, it will clear out all pages from the free queue - we
1733 	could probably slow it down after having reached a certain threshold.
1734 */
1735 static int32
1736 page_scrubber(void *unused)
1737 {
1738 	(void)(unused);
1739 
1740 	TRACE(("page_scrubber starting...\n"));
1741 
1742 	ConditionVariableEntry entry;
1743 	for (;;) {
1744 		while (sFreePageQueue.Count() == 0
1745 				|| atomic_get(&sUnreservedFreePages)
1746 					< (int32)sFreePagesTarget) {
1747 			sFreePageCondition.Add(&entry);
1748 			entry.Wait();
1749 		}
1750 
1751 		// Since we temporarily remove pages from the free pages reserve,
1752 		// we must make sure we don't cause a violation of the page
1753 		// reservation warranty. The following is usually stricter than
1754 		// necessary, because we don't have information on how many of the
1755 		// reserved pages have already been allocated.
1756 		int32 reserved = reserve_some_pages(SCRUB_SIZE,
1757 			kPageReserveForPriority[VM_PRIORITY_USER]);
1758 		if (reserved == 0)
1759 			continue;
1760 
1761 		// get some pages from the free queue
1762 		ReadLocker locker(sFreePageQueuesLock);
1763 
1764 		vm_page *page[SCRUB_SIZE];
1765 		int32 scrubCount = 0;
1766 		for (int32 i = 0; i < reserved; i++) {
1767 			page[i] = sFreePageQueue.RemoveHeadUnlocked();
1768 			if (page[i] == NULL)
1769 				break;
1770 
1771 			DEBUG_PAGE_ACCESS_START(page[i]);
1772 
1773 			page[i]->SetState(PAGE_STATE_ACTIVE);
1774 			page[i]->busy = true;
1775 			scrubCount++;
1776 		}
1777 
1778 		locker.Unlock();
1779 
1780 		if (scrubCount == 0) {
1781 			unreserve_pages(reserved);
1782 			continue;
1783 		}
1784 
1785 		TA(ScrubbingPages(scrubCount));
1786 
1787 		// clear them
1788 		for (int32 i = 0; i < scrubCount; i++)
1789 			clear_page(page[i]);
1790 
1791 		locker.Lock();
1792 
1793 		// and put them into the clear queue
1794 		for (int32 i = 0; i < scrubCount; i++) {
1795 			page[i]->SetState(PAGE_STATE_CLEAR);
1796 			page[i]->busy = false;
1797 			DEBUG_PAGE_ACCESS_END(page[i]);
1798 			sClearPageQueue.PrependUnlocked(page[i]);
1799 		}
1800 
1801 		locker.Unlock();
1802 
1803 		unreserve_pages(reserved);
1804 
1805 		TA(ScrubbedPages(scrubCount));
1806 
1807 		// wait at least 100ms between runs
1808 		snooze(100 * 1000);
1809 	}
1810 
1811 	return 0;
1812 }
1813 
1814 
1815 static void
1816 init_page_marker(vm_page &marker)
1817 {
1818 	marker.SetCacheRef(NULL);
1819 	marker.InitState(PAGE_STATE_UNUSED);
1820 	marker.busy = true;
1821 #if DEBUG_PAGE_QUEUE
1822 	marker.queue = NULL;
1823 #endif
1824 #if DEBUG_PAGE_ACCESS
1825 	marker.accessing_thread = thread_get_current_thread_id();
1826 #endif
1827 }
1828 
1829 
1830 static void
1831 remove_page_marker(struct vm_page &marker)
1832 {
1833 	DEBUG_PAGE_ACCESS_CHECK(&marker);
1834 
1835 	if (marker.State() < PAGE_STATE_FIRST_UNQUEUED)
1836 		sPageQueues[marker.State()].RemoveUnlocked(&marker);
1837 
1838 	marker.SetState(PAGE_STATE_UNUSED);
1839 }
1840 
1841 
1842 static vm_page*
1843 next_modified_page(page_num_t& maxPagesToSee)
1844 {
1845 	InterruptsSpinLocker locker(sModifiedPageQueue.GetLock());
1846 
1847 	while (maxPagesToSee > 0) {
1848 		vm_page* page = sModifiedPageQueue.Head();
1849 		if (page == NULL)
1850 			return NULL;
1851 
1852 		sModifiedPageQueue.Requeue(page, true);
1853 
1854 		maxPagesToSee--;
1855 
1856 		if (!page->busy)
1857 			return page;
1858 	}
1859 
1860 	return NULL;
1861 }
1862 
1863 
1864 // #pragma mark -
1865 
1866 
1867 class PageWriteTransfer;
1868 class PageWriteWrapper;
1869 
1870 
1871 class PageWriterRun {
1872 public:
1873 	status_t Init(uint32 maxPages);
1874 
1875 	void PrepareNextRun();
1876 	void AddPage(vm_page* page);
1877 	uint32 Go();
1878 
1879 	void PageWritten(PageWriteTransfer* transfer, status_t status,
1880 		bool partialTransfer, size_t bytesTransferred);
1881 
1882 private:
1883 	uint32				fMaxPages;
1884 	uint32				fWrapperCount;
1885 	uint32				fTransferCount;
1886 	int32				fPendingTransfers;
1887 	PageWriteWrapper*	fWrappers;
1888 	PageWriteTransfer*	fTransfers;
1889 	ConditionVariable	fAllFinishedCondition;
1890 };
1891 
1892 
1893 class PageWriteTransfer : public AsyncIOCallback {
1894 public:
1895 	void SetTo(PageWriterRun* run, vm_page* page, int32 maxPages);
1896 	bool AddPage(vm_page* page);
1897 
1898 	status_t Schedule(uint32 flags);
1899 
1900 	void SetStatus(status_t status, size_t transferred);
1901 
1902 	status_t Status() const	{ return fStatus; }
1903 	struct VMCache* Cache() const { return fCache; }
1904 	uint32 PageCount() const { return fPageCount; }
1905 
1906 	virtual void IOFinished(status_t status, bool partialTransfer,
1907 		generic_size_t bytesTransferred);
1908 private:
1909 	PageWriterRun*		fRun;
1910 	struct VMCache*		fCache;
1911 	off_t				fOffset;
1912 	uint32				fPageCount;
1913 	int32				fMaxPages;
1914 	status_t			fStatus;
1915 	uint32				fVecCount;
1916 	generic_io_vec		fVecs[32]; // TODO: make dynamic/configurable
1917 };
1918 
1919 
1920 class PageWriteWrapper {
1921 public:
1922 	PageWriteWrapper();
1923 	~PageWriteWrapper();
1924 	void SetTo(vm_page* page);
1925 	bool Done(status_t result);
1926 
1927 private:
1928 	vm_page*			fPage;
1929 	struct VMCache*		fCache;
1930 	bool				fIsActive;
1931 };
1932 
1933 
1934 PageWriteWrapper::PageWriteWrapper()
1935 	:
1936 	fIsActive(false)
1937 {
1938 }
1939 
1940 
1941 PageWriteWrapper::~PageWriteWrapper()
1942 {
1943 	if (fIsActive)
1944 		panic("page write wrapper going out of scope but isn't completed");
1945 }
1946 
1947 
1948 /*!	The page's cache must be locked.
1949 */
1950 void
1951 PageWriteWrapper::SetTo(vm_page* page)
1952 {
1953 	DEBUG_PAGE_ACCESS_CHECK(page);
1954 
1955 	if (page->busy)
1956 		panic("setting page write wrapper to busy page");
1957 
1958 	if (fIsActive)
1959 		panic("re-setting page write wrapper that isn't completed");
1960 
1961 	fPage = page;
1962 	fCache = page->Cache();
1963 	fIsActive = true;
1964 
1965 	fPage->busy = true;
1966 	fPage->busy_writing = true;
1967 
1968 	// We have a modified page -- however, while we're writing it back,
1969 	// the page might still be mapped. In order not to lose any changes to the
1970 	// page, we mark it clean before actually writing it back; if
1971 	// writing the page fails for some reason, we'll just keep it in the
1972 	// modified page list, but that should happen only rarely.
1973 
1974 	// If the page is changed after we cleared the dirty flag, but before we
1975 	// had the chance to write it back, then we'll write it again later -- that
1976 	// will probably not happen that often, though.
1977 
1978 	vm_clear_map_flags(fPage, PAGE_MODIFIED);
1979 }
1980 
1981 
1982 /*!	The page's cache must be locked.
1983 	The page queues must not be locked.
1984 	\return \c true if the page was written successfully respectively could be
1985 		handled somehow, \c false otherwise.
1986 */
1987 bool
1988 PageWriteWrapper::Done(status_t result)
1989 {
1990 	if (!fIsActive)
1991 		panic("completing page write wrapper that is not active");
1992 
1993 	DEBUG_PAGE_ACCESS_START(fPage);
1994 
1995 	fPage->busy = false;
1996 		// Set unbusy and notify later by hand, since we might free the page.
1997 
1998 	bool success = true;
1999 
2000 	if (result == B_OK) {
2001 		// put it into the active/inactive queue
2002 		move_page_to_appropriate_queue(fPage);
2003 		fPage->busy_writing = false;
2004 		DEBUG_PAGE_ACCESS_END(fPage);
2005 	} else {
2006 		// Writing the page failed. One reason would be that the cache has been
2007 		// shrunk and the page does no longer belong to the file. Otherwise the
2008 		// actual I/O failed, in which case we'll simply keep the page modified.
2009 
2010 		if (!fPage->busy_writing) {
2011 			// The busy_writing flag was cleared. That means the cache has been
2012 			// shrunk while we were trying to write the page and we have to free
2013 			// it now.
2014 			vm_remove_all_page_mappings(fPage);
2015 // TODO: Unmapping should already happen when resizing the cache!
2016 			fCache->RemovePage(fPage);
2017 			free_page(fPage, false);
2018 			unreserve_pages(1);
2019 		} else {
2020 			// Writing the page failed -- mark the page modified and move it to
2021 			// an appropriate queue other than the modified queue, so we don't
2022 			// keep trying to write it over and over again. We keep
2023 			// non-temporary pages in the modified queue, though, so they don't
2024 			// get lost in the inactive queue.
2025 			dprintf("PageWriteWrapper: Failed to write page %p: %s\n", fPage,
2026 				strerror(result));
2027 
2028 			fPage->modified = true;
2029 			if (!fCache->temporary)
2030 				set_page_state(fPage, PAGE_STATE_MODIFIED);
2031 			else if (fPage->IsMapped())
2032 				set_page_state(fPage, PAGE_STATE_ACTIVE);
2033 			else
2034 				set_page_state(fPage, PAGE_STATE_INACTIVE);
2035 
2036 			fPage->busy_writing = false;
2037 			DEBUG_PAGE_ACCESS_END(fPage);
2038 
2039 			success = false;
2040 		}
2041 	}
2042 
2043 	fCache->NotifyPageEvents(fPage, PAGE_EVENT_NOT_BUSY);
2044 	fIsActive = false;
2045 
2046 	return success;
2047 }
2048 
2049 
2050 /*!	The page's cache must be locked.
2051 */
2052 void
2053 PageWriteTransfer::SetTo(PageWriterRun* run, vm_page* page, int32 maxPages)
2054 {
2055 	fRun = run;
2056 	fCache = page->Cache();
2057 	fOffset = page->cache_offset;
2058 	fPageCount = 1;
2059 	fMaxPages = maxPages;
2060 	fStatus = B_OK;
2061 
2062 	fVecs[0].base = (phys_addr_t)page->physical_page_number << PAGE_SHIFT;
2063 	fVecs[0].length = B_PAGE_SIZE;
2064 	fVecCount = 1;
2065 }
2066 
2067 
2068 /*!	The page's cache must be locked.
2069 */
2070 bool
2071 PageWriteTransfer::AddPage(vm_page* page)
2072 {
2073 	if (page->Cache() != fCache
2074 		|| (fMaxPages >= 0 && fPageCount >= (uint32)fMaxPages))
2075 		return false;
2076 
2077 	phys_addr_t nextBase = fVecs[fVecCount - 1].base
2078 		+ fVecs[fVecCount - 1].length;
2079 
2080 	if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase
2081 		&& (off_t)page->cache_offset == fOffset + fPageCount) {
2082 		// append to last iovec
2083 		fVecs[fVecCount - 1].length += B_PAGE_SIZE;
2084 		fPageCount++;
2085 		return true;
2086 	}
2087 
2088 	nextBase = fVecs[0].base - B_PAGE_SIZE;
2089 	if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase
2090 		&& (off_t)page->cache_offset == fOffset - 1) {
2091 		// prepend to first iovec and adjust offset
2092 		fVecs[0].base = nextBase;
2093 		fVecs[0].length += B_PAGE_SIZE;
2094 		fOffset = page->cache_offset;
2095 		fPageCount++;
2096 		return true;
2097 	}
2098 
2099 	if (((off_t)page->cache_offset == fOffset + fPageCount
2100 			|| (off_t)page->cache_offset == fOffset - 1)
2101 		&& fVecCount < sizeof(fVecs) / sizeof(fVecs[0])) {
2102 		// not physically contiguous or not in the right order
2103 		uint32 vectorIndex;
2104 		if ((off_t)page->cache_offset < fOffset) {
2105 			// we are pre-pending another vector, move the other vecs
2106 			for (uint32 i = fVecCount; i > 0; i--)
2107 				fVecs[i] = fVecs[i - 1];
2108 
2109 			fOffset = page->cache_offset;
2110 			vectorIndex = 0;
2111 		} else
2112 			vectorIndex = fVecCount;
2113 
2114 		fVecs[vectorIndex].base
2115 			= (phys_addr_t)page->physical_page_number << PAGE_SHIFT;
2116 		fVecs[vectorIndex].length = B_PAGE_SIZE;
2117 
2118 		fVecCount++;
2119 		fPageCount++;
2120 		return true;
2121 	}
2122 
2123 	return false;
2124 }
2125 
2126 
2127 status_t
2128 PageWriteTransfer::Schedule(uint32 flags)
2129 {
2130 	off_t writeOffset = (off_t)fOffset << PAGE_SHIFT;
2131 	generic_size_t writeLength = (phys_size_t)fPageCount << PAGE_SHIFT;
2132 
2133 	if (fRun != NULL) {
2134 		return fCache->WriteAsync(writeOffset, fVecs, fVecCount, writeLength,
2135 			flags | B_PHYSICAL_IO_REQUEST, this);
2136 	}
2137 
2138 	status_t status = fCache->Write(writeOffset, fVecs, fVecCount,
2139 		flags | B_PHYSICAL_IO_REQUEST, &writeLength);
2140 
2141 	SetStatus(status, writeLength);
2142 	return fStatus;
2143 }
2144 
2145 
2146 void
2147 PageWriteTransfer::SetStatus(status_t status, size_t transferred)
2148 {
2149 	// only succeed if all pages up to the last one have been written fully
2150 	// and the last page has at least been written partially
2151 	if (status == B_OK && transferred <= (fPageCount - 1) * B_PAGE_SIZE)
2152 		status = B_ERROR;
2153 
2154 	fStatus = status;
2155 }
2156 
2157 
2158 void
2159 PageWriteTransfer::IOFinished(status_t status, bool partialTransfer,
2160 	generic_size_t bytesTransferred)
2161 {
2162 	SetStatus(status, bytesTransferred);
2163 	fRun->PageWritten(this, fStatus, partialTransfer, bytesTransferred);
2164 }
2165 
2166 
2167 status_t
2168 PageWriterRun::Init(uint32 maxPages)
2169 {
2170 	fMaxPages = maxPages;
2171 	fWrapperCount = 0;
2172 	fTransferCount = 0;
2173 	fPendingTransfers = 0;
2174 
2175 	fWrappers = new(std::nothrow) PageWriteWrapper[maxPages];
2176 	fTransfers = new(std::nothrow) PageWriteTransfer[maxPages];
2177 	if (fWrappers == NULL || fTransfers == NULL)
2178 		return B_NO_MEMORY;
2179 
2180 	return B_OK;
2181 }
2182 
2183 
2184 void
2185 PageWriterRun::PrepareNextRun()
2186 {
2187 	fWrapperCount = 0;
2188 	fTransferCount = 0;
2189 	fPendingTransfers = 0;
2190 }
2191 
2192 
2193 /*!	The page's cache must be locked.
2194 */
2195 void
2196 PageWriterRun::AddPage(vm_page* page)
2197 {
2198 	fWrappers[fWrapperCount++].SetTo(page);
2199 
2200 	if (fTransferCount == 0 || !fTransfers[fTransferCount - 1].AddPage(page)) {
2201 		fTransfers[fTransferCount++].SetTo(this, page,
2202 			page->Cache()->MaxPagesPerAsyncWrite());
2203 	}
2204 }
2205 
2206 
2207 /*!	Writes all pages previously added.
2208 	\return The number of pages that could not be written or otherwise handled.
2209 */
2210 uint32
2211 PageWriterRun::Go()
2212 {
2213 	atomic_set(&fPendingTransfers, fTransferCount);
2214 
2215 	fAllFinishedCondition.Init(this, "page writer wait for I/O");
2216 	ConditionVariableEntry waitEntry;
2217 	fAllFinishedCondition.Add(&waitEntry);
2218 
2219 	// schedule writes
2220 	for (uint32 i = 0; i < fTransferCount; i++)
2221 		fTransfers[i].Schedule(B_VIP_IO_REQUEST);
2222 
2223 	// wait until all pages have been written
2224 	waitEntry.Wait();
2225 
2226 	// mark pages depending on whether they could be written or not
2227 
2228 	uint32 failedPages = 0;
2229 	uint32 wrapperIndex = 0;
2230 	for (uint32 i = 0; i < fTransferCount; i++) {
2231 		PageWriteTransfer& transfer = fTransfers[i];
2232 		transfer.Cache()->Lock();
2233 
2234 		for (uint32 j = 0; j < transfer.PageCount(); j++) {
2235 			if (!fWrappers[wrapperIndex++].Done(transfer.Status()))
2236 				failedPages++;
2237 		}
2238 
2239 		transfer.Cache()->Unlock();
2240 	}
2241 
2242 	ASSERT(wrapperIndex == fWrapperCount);
2243 
2244 	for (uint32 i = 0; i < fTransferCount; i++) {
2245 		PageWriteTransfer& transfer = fTransfers[i];
2246 		struct VMCache* cache = transfer.Cache();
2247 
2248 		// We've acquired a references for each page
2249 		for (uint32 j = 0; j < transfer.PageCount(); j++) {
2250 			// We release the cache references after all pages were made
2251 			// unbusy again - otherwise releasing a vnode could deadlock.
2252 			cache->ReleaseStoreRef();
2253 			cache->ReleaseRef();
2254 		}
2255 	}
2256 
2257 	return failedPages;
2258 }
2259 
2260 
2261 void
2262 PageWriterRun::PageWritten(PageWriteTransfer* transfer, status_t status,
2263 	bool partialTransfer, size_t bytesTransferred)
2264 {
2265 	if (atomic_add(&fPendingTransfers, -1) == 1)
2266 		fAllFinishedCondition.NotifyAll();
2267 }
2268 
2269 
2270 /*!	The page writer continuously takes some pages from the modified
2271 	queue, writes them back, and moves them back to the active queue.
2272 	It runs in its own thread, and is only there to keep the number
2273 	of modified pages low, so that more pages can be reused with
2274 	fewer costs.
2275 */
2276 status_t
2277 page_writer(void* /*unused*/)
2278 {
2279 	const uint32 kNumPages = 256;
2280 #ifdef TRACE_VM_PAGE
2281 	uint32 writtenPages = 0;
2282 	bigtime_t lastWrittenTime = 0;
2283 	bigtime_t pageCollectionTime = 0;
2284 	bigtime_t pageWritingTime = 0;
2285 #endif
2286 
2287 	PageWriterRun run;
2288 	if (run.Init(kNumPages) != B_OK) {
2289 		panic("page writer: Failed to init PageWriterRun!");
2290 		return B_ERROR;
2291 	}
2292 
2293 	page_num_t pagesSinceLastSuccessfulWrite = 0;
2294 
2295 	while (true) {
2296 // TODO: Maybe wait shorter when memory is low!
2297 		if (sModifiedPageQueue.Count() < kNumPages) {
2298 			sPageWriterCondition.Wait(3000000, true);
2299 				// all 3 seconds when no one triggers us
2300 		}
2301 
2302 		page_num_t modifiedPages = sModifiedPageQueue.Count();
2303 		if (modifiedPages == 0)
2304 			continue;
2305 
2306 		if (modifiedPages <= pagesSinceLastSuccessfulWrite) {
2307 			// We ran through the whole queue without being able to write a
2308 			// single page. Take a break.
2309 			snooze(500000);
2310 			pagesSinceLastSuccessfulWrite = 0;
2311 		}
2312 
2313 #if ENABLE_SWAP_SUPPORT
2314 		page_stats pageStats;
2315 		get_page_stats(pageStats);
2316 		bool activePaging = do_active_paging(pageStats);
2317 #endif
2318 
2319 		// depending on how urgent it becomes to get pages to disk, we adjust
2320 		// our I/O priority
2321 		uint32 lowPagesState = low_resource_state(B_KERNEL_RESOURCE_PAGES);
2322 		int32 ioPriority = B_IDLE_PRIORITY;
2323 		if (lowPagesState >= B_LOW_RESOURCE_CRITICAL
2324 			|| modifiedPages > MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD) {
2325 			ioPriority = MAX_PAGE_WRITER_IO_PRIORITY;
2326 		} else {
2327 			ioPriority = (uint64)MAX_PAGE_WRITER_IO_PRIORITY * modifiedPages
2328 				/ MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD;
2329 		}
2330 
2331 		thread_set_io_priority(ioPriority);
2332 
2333 		uint32 numPages = 0;
2334 		run.PrepareNextRun();
2335 
2336 		// TODO: make this laptop friendly, too (ie. only start doing
2337 		// something if someone else did something or there is really
2338 		// enough to do).
2339 
2340 		// collect pages to be written
2341 #ifdef TRACE_VM_PAGE
2342 		pageCollectionTime -= system_time();
2343 #endif
2344 
2345 		page_num_t maxPagesToSee = modifiedPages;
2346 
2347 		while (numPages < kNumPages && maxPagesToSee > 0) {
2348 			vm_page *page = next_modified_page(maxPagesToSee);
2349 			if (page == NULL)
2350 				break;
2351 
2352 			PageCacheLocker cacheLocker(page, false);
2353 			if (!cacheLocker.IsLocked())
2354 				continue;
2355 
2356 			VMCache *cache = page->Cache();
2357 
2358 			// If the page is busy or its state has changed while we were
2359 			// locking the cache, just ignore it.
2360 			if (page->busy || page->State() != PAGE_STATE_MODIFIED)
2361 				continue;
2362 
2363 			DEBUG_PAGE_ACCESS_START(page);
2364 
2365 			// Don't write back wired (locked) pages.
2366 			if (page->WiredCount() > 0) {
2367 				set_page_state(page, PAGE_STATE_ACTIVE);
2368 				DEBUG_PAGE_ACCESS_END(page);
2369 				continue;
2370 			}
2371 
2372 			// Write back temporary pages only when we're actively paging.
2373 			if (cache->temporary
2374 #if ENABLE_SWAP_SUPPORT
2375 				&& (!activePaging
2376 					|| !cache->CanWritePage(
2377 							(off_t)page->cache_offset << PAGE_SHIFT))
2378 #endif
2379 				) {
2380 				// We can't/don't want to do anything with this page, so move it
2381 				// to one of the other queues.
2382 				if (page->mappings.IsEmpty())
2383 					set_page_state(page, PAGE_STATE_INACTIVE);
2384 				else
2385 					set_page_state(page, PAGE_STATE_ACTIVE);
2386 
2387 				DEBUG_PAGE_ACCESS_END(page);
2388 				continue;
2389 			}
2390 
2391 			// We need our own reference to the store, as it might currently be
2392 			// destroyed.
2393 			if (cache->AcquireUnreferencedStoreRef() != B_OK) {
2394 				DEBUG_PAGE_ACCESS_END(page);
2395 				cacheLocker.Unlock();
2396 				thread_yield();
2397 				continue;
2398 			}
2399 
2400 			run.AddPage(page);
2401 				// TODO: We're possibly adding pages of different caches and
2402 				// thus maybe of different underlying file systems here. This
2403 				// is a potential problem for loop file systems/devices, since
2404 				// we could mark a page busy that would need to be accessed
2405 				// when writing back another page, thus causing a deadlock.
2406 
2407 			DEBUG_PAGE_ACCESS_END(page);
2408 
2409 			//dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count);
2410 			TPW(WritePage(page));
2411 
2412 			cache->AcquireRefLocked();
2413 			numPages++;
2414 		}
2415 
2416 #ifdef TRACE_VM_PAGE
2417 		pageCollectionTime += system_time();
2418 #endif
2419 		if (numPages == 0)
2420 			continue;
2421 
2422 		// write pages to disk and do all the cleanup
2423 #ifdef TRACE_VM_PAGE
2424 		pageWritingTime -= system_time();
2425 #endif
2426 		uint32 failedPages = run.Go();
2427 #ifdef TRACE_VM_PAGE
2428 		pageWritingTime += system_time();
2429 
2430 		// debug output only...
2431 		writtenPages += numPages;
2432 		if (writtenPages >= 1024) {
2433 			bigtime_t now = system_time();
2434 			TRACE(("page writer: wrote 1024 pages (total: %" B_PRIu64 " ms, "
2435 				"collect: %" B_PRIu64 " ms, write: %" B_PRIu64 " ms)\n",
2436 				(now - lastWrittenTime) / 1000,
2437 				pageCollectionTime / 1000, pageWritingTime / 1000));
2438 			lastWrittenTime = now;
2439 
2440 			writtenPages -= 1024;
2441 			pageCollectionTime = 0;
2442 			pageWritingTime = 0;
2443 		}
2444 #endif
2445 
2446 		if (failedPages == numPages)
2447 			pagesSinceLastSuccessfulWrite += modifiedPages - maxPagesToSee;
2448 		else
2449 			pagesSinceLastSuccessfulWrite = 0;
2450 	}
2451 
2452 	return B_OK;
2453 }
2454 
2455 
2456 // #pragma mark -
2457 
2458 
2459 // TODO: This should be done in the page daemon!
2460 #if 0
2461 #if ENABLE_SWAP_SUPPORT
2462 static bool
2463 free_page_swap_space(int32 index)
2464 {
2465 	vm_page *page = vm_page_at_index(index);
2466 	PageCacheLocker locker(page);
2467 	if (!locker.IsLocked())
2468 		return false;
2469 
2470 	DEBUG_PAGE_ACCESS_START(page);
2471 
2472 	VMCache* cache = page->Cache();
2473 	if (cache->temporary && page->WiredCount() == 0
2474 			&& cache->HasPage(page->cache_offset << PAGE_SHIFT)
2475 			&& page->usage_count > 0) {
2476 		// TODO: how to judge a page is highly active?
2477 		if (swap_free_page_swap_space(page)) {
2478 			// We need to mark the page modified, since otherwise it could be
2479 			// stolen and we'd lose its data.
2480 			vm_page_set_state(page, PAGE_STATE_MODIFIED);
2481 			TD(FreedPageSwap(page));
2482 			DEBUG_PAGE_ACCESS_END(page);
2483 			return true;
2484 		}
2485 	}
2486 	DEBUG_PAGE_ACCESS_END(page);
2487 	return false;
2488 }
2489 #endif
2490 #endif	// 0
2491 
2492 
2493 static vm_page *
2494 find_cached_page_candidate(struct vm_page &marker)
2495 {
2496 	DEBUG_PAGE_ACCESS_CHECK(&marker);
2497 
2498 	InterruptsSpinLocker locker(sCachedPageQueue.GetLock());
2499 	vm_page *page;
2500 
2501 	if (marker.State() == PAGE_STATE_UNUSED) {
2502 		// Get the first free pages of the (in)active queue
2503 		page = sCachedPageQueue.Head();
2504 	} else {
2505 		// Get the next page of the current queue
2506 		if (marker.State() != PAGE_STATE_CACHED) {
2507 			panic("invalid marker %p state", &marker);
2508 			return NULL;
2509 		}
2510 
2511 		page = sCachedPageQueue.Next(&marker);
2512 		sCachedPageQueue.Remove(&marker);
2513 		marker.SetState(PAGE_STATE_UNUSED);
2514 	}
2515 
2516 	while (page != NULL) {
2517 		if (!page->busy) {
2518 			// we found a candidate, insert marker
2519 			marker.SetState(PAGE_STATE_CACHED);
2520 			sCachedPageQueue.InsertAfter(page, &marker);
2521 			return page;
2522 		}
2523 
2524 		page = sCachedPageQueue.Next(page);
2525 	}
2526 
2527 	return NULL;
2528 }
2529 
2530 
2531 static bool
2532 free_cached_page(vm_page *page, bool dontWait)
2533 {
2534 	// try to lock the page's cache
2535 	if (vm_cache_acquire_locked_page_cache(page, dontWait) == NULL)
2536 		return false;
2537 	VMCache* cache = page->Cache();
2538 
2539 	AutoLocker<VMCache> cacheLocker(cache, true);
2540 	MethodDeleter<VMCache, void, &VMCache::ReleaseRefLocked> _2(cache);
2541 
2542 	// check again if that page is still a candidate
2543 	if (page->busy || page->State() != PAGE_STATE_CACHED)
2544 		return false;
2545 
2546 	DEBUG_PAGE_ACCESS_START(page);
2547 
2548 	PAGE_ASSERT(page, !page->IsMapped());
2549 	PAGE_ASSERT(page, !page->modified);
2550 
2551 	// we can now steal this page
2552 
2553 	cache->RemovePage(page);
2554 		// Now the page doesn't have cache anymore, so no one else (e.g.
2555 		// vm_page_allocate_page_run() can pick it up), since they would be
2556 		// required to lock the cache first, which would fail.
2557 
2558 	sCachedPageQueue.RemoveUnlocked(page);
2559 	return true;
2560 }
2561 
2562 
2563 static uint32
2564 free_cached_pages(uint32 pagesToFree, bool dontWait)
2565 {
2566 	vm_page marker;
2567 	init_page_marker(marker);
2568 
2569 	uint32 pagesFreed = 0;
2570 
2571 	while (pagesFreed < pagesToFree) {
2572 		vm_page *page = find_cached_page_candidate(marker);
2573 		if (page == NULL)
2574 			break;
2575 
2576 		if (free_cached_page(page, dontWait)) {
2577 			ReadLocker locker(sFreePageQueuesLock);
2578 			page->SetState(PAGE_STATE_FREE);
2579 			DEBUG_PAGE_ACCESS_END(page);
2580 			sFreePageQueue.PrependUnlocked(page);
2581 			locker.Unlock();
2582 
2583 			TA(StolenPage());
2584 
2585 			pagesFreed++;
2586 		}
2587 	}
2588 
2589 	remove_page_marker(marker);
2590 
2591 	sFreePageCondition.NotifyAll();
2592 
2593 	return pagesFreed;
2594 }
2595 
2596 
2597 static void
2598 idle_scan_active_pages(page_stats& pageStats)
2599 {
2600 	VMPageQueue& queue = sActivePageQueue;
2601 
2602 	// We want to scan the whole queue in roughly kIdleRunsForFullQueue runs.
2603 	uint32 maxToScan = queue.Count() / kIdleRunsForFullQueue + 1;
2604 
2605 	while (maxToScan > 0) {
2606 		maxToScan--;
2607 
2608 		// Get the next page. Note that we don't bother to lock here. We go with
2609 		// the assumption that on all architectures reading/writing pointers is
2610 		// atomic. Beyond that it doesn't really matter. We have to unlock the
2611 		// queue anyway to lock the page's cache, and we'll recheck afterwards.
2612 		vm_page* page = queue.Head();
2613 		if (page == NULL)
2614 			break;
2615 
2616 		// lock the page's cache
2617 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2618 		if (cache == NULL)
2619 			continue;
2620 
2621 		if (page->State() != PAGE_STATE_ACTIVE) {
2622 			// page is no longer in the cache or in this queue
2623 			cache->ReleaseRefAndUnlock();
2624 			continue;
2625 		}
2626 
2627 		if (page->busy) {
2628 			// page is busy -- requeue at the end
2629 			vm_page_requeue(page, true);
2630 			cache->ReleaseRefAndUnlock();
2631 			continue;
2632 		}
2633 
2634 		DEBUG_PAGE_ACCESS_START(page);
2635 
2636 		// Get the page active/modified flags and update the page's usage count.
2637 		// We completely unmap inactive temporary pages. This saves us to
2638 		// iterate through the inactive list as well, since we'll be notified
2639 		// via page fault whenever such an inactive page is used again.
2640 		// We don't remove the mappings of non-temporary pages, since we
2641 		// wouldn't notice when those would become unused and could thus be
2642 		// moved to the cached list.
2643 		int32 usageCount;
2644 		if (page->WiredCount() > 0 || page->usage_count > 0
2645 			|| !cache->temporary) {
2646 			usageCount = vm_clear_page_mapping_accessed_flags(page);
2647 		} else
2648 			usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2649 
2650 		if (usageCount > 0) {
2651 			usageCount += page->usage_count + kPageUsageAdvance;
2652 			if (usageCount > kPageUsageMax)
2653 				usageCount = kPageUsageMax;
2654 // TODO: This would probably also be the place to reclaim swap space.
2655 		} else {
2656 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2657 			if (usageCount < 0) {
2658 				usageCount = 0;
2659 				set_page_state(page, PAGE_STATE_INACTIVE);
2660 			}
2661 		}
2662 
2663 		page->usage_count = usageCount;
2664 
2665 		DEBUG_PAGE_ACCESS_END(page);
2666 
2667 		cache->ReleaseRefAndUnlock();
2668 	}
2669 }
2670 
2671 
2672 static void
2673 full_scan_inactive_pages(page_stats& pageStats, int32 despairLevel)
2674 {
2675 	int32 pagesToFree = pageStats.unsatisfiedReservations
2676 		+ sFreeOrCachedPagesTarget
2677 		- (pageStats.totalFreePages + pageStats.cachedPages);
2678 	if (pagesToFree <= 0)
2679 		return;
2680 
2681 	bigtime_t time = system_time();
2682 	uint32 pagesScanned = 0;
2683 	uint32 pagesToCached = 0;
2684 	uint32 pagesToModified = 0;
2685 	uint32 pagesToActive = 0;
2686 
2687 	// Determine how many pages at maximum to send to the modified queue. Since
2688 	// it is relatively expensive to page out pages, we do that on a grander
2689 	// scale only when things get desperate.
2690 	uint32 maxToFlush = despairLevel <= 1 ? 32 : 10000;
2691 
2692 	vm_page marker;
2693 	init_page_marker(marker);
2694 
2695 	VMPageQueue& queue = sInactivePageQueue;
2696 	InterruptsSpinLocker queueLocker(queue.GetLock());
2697 	uint32 maxToScan = queue.Count();
2698 
2699 	vm_page* nextPage = queue.Head();
2700 
2701 	while (pagesToFree > 0 && maxToScan > 0) {
2702 		maxToScan--;
2703 
2704 		// get the next page
2705 		vm_page* page = nextPage;
2706 		if (page == NULL)
2707 			break;
2708 		nextPage = queue.Next(page);
2709 
2710 		if (page->busy)
2711 			continue;
2712 
2713 		// mark the position
2714 		queue.InsertAfter(page, &marker);
2715 		queueLocker.Unlock();
2716 
2717 		// lock the page's cache
2718 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2719 		if (cache == NULL || page->busy
2720 				|| page->State() != PAGE_STATE_INACTIVE) {
2721 			if (cache != NULL)
2722 				cache->ReleaseRefAndUnlock();
2723 			queueLocker.Lock();
2724 			nextPage = queue.Next(&marker);
2725 			queue.Remove(&marker);
2726 			continue;
2727 		}
2728 
2729 		pagesScanned++;
2730 
2731 		DEBUG_PAGE_ACCESS_START(page);
2732 
2733 		// Get the accessed count, clear the accessed/modified flags and
2734 		// unmap the page, if it hasn't been accessed.
2735 		int32 usageCount;
2736 		if (page->WiredCount() > 0)
2737 			usageCount = vm_clear_page_mapping_accessed_flags(page);
2738 		else
2739 			usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2740 
2741 		// update usage count
2742 		if (usageCount > 0) {
2743 			usageCount += page->usage_count + kPageUsageAdvance;
2744 			if (usageCount > kPageUsageMax)
2745 				usageCount = kPageUsageMax;
2746 		} else {
2747 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2748 			if (usageCount < 0)
2749 				usageCount = 0;
2750 		}
2751 
2752 		page->usage_count = usageCount;
2753 
2754 		// Move to fitting queue or requeue:
2755 		// * Active mapped pages go to the active queue.
2756 		// * Inactive mapped (i.e. wired) pages are requeued.
2757 		// * The remaining pages are cachable. Thus, if unmodified they go to
2758 		//   the cached queue, otherwise to the modified queue (up to a limit).
2759 		//   Note that until in the idle scanning we don't exempt pages of
2760 		//   temporary caches. Apparently we really need memory, so we better
2761 		//   page out memory as well.
2762 		bool isMapped = page->IsMapped();
2763 		if (usageCount > 0) {
2764 			if (isMapped) {
2765 				set_page_state(page, PAGE_STATE_ACTIVE);
2766 				pagesToActive++;
2767 			} else
2768 				vm_page_requeue(page, true);
2769 		} else if (isMapped) {
2770 			vm_page_requeue(page, true);
2771 		} else if (!page->modified) {
2772 			set_page_state(page, PAGE_STATE_CACHED);
2773 			pagesToFree--;
2774 			pagesToCached++;
2775 		} else if (maxToFlush > 0) {
2776 			set_page_state(page, PAGE_STATE_MODIFIED);
2777 			maxToFlush--;
2778 			pagesToModified++;
2779 		} else
2780 			vm_page_requeue(page, true);
2781 
2782 		DEBUG_PAGE_ACCESS_END(page);
2783 
2784 		cache->ReleaseRefAndUnlock();
2785 
2786 		// remove the marker
2787 		queueLocker.Lock();
2788 		nextPage = queue.Next(&marker);
2789 		queue.Remove(&marker);
2790 	}
2791 
2792 	queueLocker.Unlock();
2793 
2794 	time = system_time() - time;
2795 	TRACE_DAEMON("  -> inactive scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32
2796 		", moved: %" B_PRIu32 " -> cached, %" B_PRIu32 " -> modified, %"
2797 		B_PRIu32 " -> active\n", time, pagesScanned, pagesToCached,
2798 		pagesToModified, pagesToActive);
2799 
2800 	// wake up the page writer, if we tossed it some pages
2801 	if (pagesToModified > 0)
2802 		sPageWriterCondition.WakeUp();
2803 }
2804 
2805 
2806 static void
2807 full_scan_active_pages(page_stats& pageStats, int32 despairLevel)
2808 {
2809 	vm_page marker;
2810 	init_page_marker(marker);
2811 
2812 	VMPageQueue& queue = sActivePageQueue;
2813 	InterruptsSpinLocker queueLocker(queue.GetLock());
2814 	uint32 maxToScan = queue.Count();
2815 
2816 	int32 pagesToDeactivate = pageStats.unsatisfiedReservations
2817 		+ sFreeOrCachedPagesTarget
2818 		- (pageStats.totalFreePages + pageStats.cachedPages)
2819 		+ std::max((int32)sInactivePagesTarget - (int32)maxToScan, (int32)0);
2820 	if (pagesToDeactivate <= 0)
2821 		return;
2822 
2823 	bigtime_t time = system_time();
2824 	uint32 pagesAccessed = 0;
2825 	uint32 pagesToInactive = 0;
2826 	uint32 pagesScanned = 0;
2827 
2828 	vm_page* nextPage = queue.Head();
2829 
2830 	while (pagesToDeactivate > 0 && maxToScan > 0) {
2831 		maxToScan--;
2832 
2833 		// get the next page
2834 		vm_page* page = nextPage;
2835 		if (page == NULL)
2836 			break;
2837 		nextPage = queue.Next(page);
2838 
2839 		if (page->busy)
2840 			continue;
2841 
2842 		// mark the position
2843 		queue.InsertAfter(page, &marker);
2844 		queueLocker.Unlock();
2845 
2846 		// lock the page's cache
2847 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2848 		if (cache == NULL || page->busy || page->State() != PAGE_STATE_ACTIVE) {
2849 			if (cache != NULL)
2850 				cache->ReleaseRefAndUnlock();
2851 			queueLocker.Lock();
2852 			nextPage = queue.Next(&marker);
2853 			queue.Remove(&marker);
2854 			continue;
2855 		}
2856 
2857 		pagesScanned++;
2858 
2859 		DEBUG_PAGE_ACCESS_START(page);
2860 
2861 		// Get the page active/modified flags and update the page's usage count.
2862 		int32 usageCount = vm_clear_page_mapping_accessed_flags(page);
2863 
2864 		if (usageCount > 0) {
2865 			usageCount += page->usage_count + kPageUsageAdvance;
2866 			if (usageCount > kPageUsageMax)
2867 				usageCount = kPageUsageMax;
2868 			pagesAccessed++;
2869 // TODO: This would probably also be the place to reclaim swap space.
2870 		} else {
2871 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2872 			if (usageCount <= 0) {
2873 				usageCount = 0;
2874 				set_page_state(page, PAGE_STATE_INACTIVE);
2875 				pagesToInactive++;
2876 			}
2877 		}
2878 
2879 		page->usage_count = usageCount;
2880 
2881 		DEBUG_PAGE_ACCESS_END(page);
2882 
2883 		cache->ReleaseRefAndUnlock();
2884 
2885 		// remove the marker
2886 		queueLocker.Lock();
2887 		nextPage = queue.Next(&marker);
2888 		queue.Remove(&marker);
2889 	}
2890 
2891 	time = system_time() - time;
2892 	TRACE_DAEMON("  ->   active scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32
2893 		", moved: %" B_PRIu32 " -> inactive, encountered %" B_PRIu32 " accessed"
2894 		" ones\n", time, pagesScanned, pagesToInactive, pagesAccessed);
2895 }
2896 
2897 
2898 static void
2899 page_daemon_idle_scan(page_stats& pageStats)
2900 {
2901 	TRACE_DAEMON("page daemon: idle run\n");
2902 
2903 	if (pageStats.totalFreePages < (int32)sFreePagesTarget) {
2904 		// We want more actually free pages, so free some from the cached
2905 		// ones.
2906 		uint32 freed = free_cached_pages(
2907 			sFreePagesTarget - pageStats.totalFreePages, false);
2908 		if (freed > 0)
2909 			unreserve_pages(freed);
2910 		get_page_stats(pageStats);
2911 	}
2912 
2913 	// Walk the active list and move pages to the inactive queue.
2914 	get_page_stats(pageStats);
2915 	idle_scan_active_pages(pageStats);
2916 }
2917 
2918 
2919 static void
2920 page_daemon_full_scan(page_stats& pageStats, int32 despairLevel)
2921 {
2922 	TRACE_DAEMON("page daemon: full run: free: %" B_PRIu32 ", cached: %"
2923 		B_PRIu32 ", to free: %" B_PRIu32 "\n", pageStats.totalFreePages,
2924 		pageStats.cachedPages, pageStats.unsatisfiedReservations
2925 			+ sFreeOrCachedPagesTarget
2926 			- (pageStats.totalFreePages + pageStats.cachedPages));
2927 
2928 	// Walk the inactive list and transfer pages to the cached and modified
2929 	// queues.
2930 	full_scan_inactive_pages(pageStats, despairLevel);
2931 
2932 	// Free cached pages. Also wake up reservation waiters.
2933 	get_page_stats(pageStats);
2934 	int32 pagesToFree = pageStats.unsatisfiedReservations + sFreePagesTarget
2935 		- (pageStats.totalFreePages);
2936 	if (pagesToFree > 0) {
2937 		uint32 freed = free_cached_pages(pagesToFree, true);
2938 		if (freed > 0)
2939 			unreserve_pages(freed);
2940 	}
2941 
2942 	// Walk the active list and move pages to the inactive queue.
2943 	get_page_stats(pageStats);
2944 	full_scan_active_pages(pageStats, despairLevel);
2945 }
2946 
2947 
2948 static status_t
2949 page_daemon(void* /*unused*/)
2950 {
2951 	int32 despairLevel = 0;
2952 
2953 	while (true) {
2954 		sPageDaemonCondition.ClearActivated();
2955 
2956 		// evaluate the free pages situation
2957 		page_stats pageStats;
2958 		get_page_stats(pageStats);
2959 
2960 		if (!do_active_paging(pageStats)) {
2961 			// Things look good -- just maintain statistics and keep the pool
2962 			// of actually free pages full enough.
2963 			despairLevel = 0;
2964 			page_daemon_idle_scan(pageStats);
2965 			sPageDaemonCondition.Wait(kIdleScanWaitInterval, false);
2966 		} else {
2967 			// Not enough free pages. We need to do some real work.
2968 			despairLevel = std::max(despairLevel + 1, (int32)3);
2969 			page_daemon_full_scan(pageStats, despairLevel);
2970 
2971 			// Don't wait after the first full scan, but rather immediately
2972 			// check whether we were successful in freeing enough pages and
2973 			// re-run with increased despair level. The first scan is
2974 			// conservative with respect to moving inactive modified pages to
2975 			// the modified list to avoid thrashing. The second scan, however,
2976 			// will not hold back.
2977 			if (despairLevel > 1)
2978 				snooze(kBusyScanWaitInterval);
2979 		}
2980 	}
2981 
2982 	return B_OK;
2983 }
2984 
2985 
2986 /*!	Returns how many pages could *not* be reserved.
2987 */
2988 static uint32
2989 reserve_pages(uint32 count, int priority, bool dontWait)
2990 {
2991 	int32 dontTouch = kPageReserveForPriority[priority];
2992 
2993 	while (true) {
2994 		count -= reserve_some_pages(count, dontTouch);
2995 		if (count == 0)
2996 			return 0;
2997 
2998 		if (sUnsatisfiedPageReservations == 0) {
2999 			count -= free_cached_pages(count, dontWait);
3000 			if (count == 0)
3001 				return count;
3002 		}
3003 
3004 		if (dontWait)
3005 			return count;
3006 
3007 		// we need to wait for pages to become available
3008 
3009 		MutexLocker pageDeficitLocker(sPageDeficitLock);
3010 
3011 		bool notifyDaemon = sUnsatisfiedPageReservations == 0;
3012 		sUnsatisfiedPageReservations += count;
3013 
3014 		if (atomic_get(&sUnreservedFreePages) > dontTouch) {
3015 			// the situation changed
3016 			sUnsatisfiedPageReservations -= count;
3017 			continue;
3018 		}
3019 
3020 		PageReservationWaiter waiter;
3021 		waiter.dontTouch = dontTouch;
3022 		waiter.missing = count;
3023 		waiter.thread = thread_get_current_thread();
3024 		waiter.threadPriority = waiter.thread->priority;
3025 
3026 		// insert ordered (i.e. after all waiters with higher or equal priority)
3027 		PageReservationWaiter* otherWaiter = NULL;
3028 		for (PageReservationWaiterList::Iterator it
3029 				= sPageReservationWaiters.GetIterator();
3030 			(otherWaiter = it.Next()) != NULL;) {
3031 			if (waiter < *otherWaiter)
3032 				break;
3033 		}
3034 
3035 		sPageReservationWaiters.InsertBefore(otherWaiter, &waiter);
3036 
3037 		thread_prepare_to_block(waiter.thread, 0, THREAD_BLOCK_TYPE_OTHER,
3038 			"waiting for pages");
3039 
3040 		if (notifyDaemon)
3041 			sPageDaemonCondition.WakeUp();
3042 
3043 		pageDeficitLocker.Unlock();
3044 
3045 		low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0);
3046 		thread_block();
3047 
3048 		pageDeficitLocker.Lock();
3049 
3050 		return 0;
3051 	}
3052 }
3053 
3054 
3055 //	#pragma mark - private kernel API
3056 
3057 
3058 /*!	Writes a range of modified pages of a cache to disk.
3059 	You need to hold the VMCache lock when calling this function.
3060 	Note that the cache lock is released in this function.
3061 	\param cache The cache.
3062 	\param firstPage Offset (in page size units) of the first page in the range.
3063 	\param endPage End offset (in page size units) of the page range. The page
3064 		at this offset is not included.
3065 */
3066 status_t
3067 vm_page_write_modified_page_range(struct VMCache* cache, uint32 firstPage,
3068 	uint32 endPage)
3069 {
3070 	static const int32 kMaxPages = 256;
3071 	int32 maxPages = cache->MaxPagesPerWrite();
3072 	if (maxPages < 0 || maxPages > kMaxPages)
3073 		maxPages = kMaxPages;
3074 
3075 	const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
3076 		| HEAP_DONT_LOCK_KERNEL_SPACE;
3077 
3078 	PageWriteWrapper stackWrappersPool[2];
3079 	PageWriteWrapper* stackWrappers[1];
3080 	PageWriteWrapper* wrapperPool
3081 		= new(malloc_flags(allocationFlags)) PageWriteWrapper[maxPages + 1];
3082 	PageWriteWrapper** wrappers
3083 		= new(malloc_flags(allocationFlags)) PageWriteWrapper*[maxPages];
3084 	if (wrapperPool == NULL || wrappers == NULL) {
3085 		// don't fail, just limit our capabilities
3086 		delete[] wrapperPool;
3087 		delete[] wrappers;
3088 		wrapperPool = stackWrappersPool;
3089 		wrappers = stackWrappers;
3090 		maxPages = 1;
3091 	}
3092 
3093 	int32 nextWrapper = 0;
3094 	int32 usedWrappers = 0;
3095 
3096 	PageWriteTransfer transfer;
3097 	bool transferEmpty = true;
3098 
3099 	VMCachePagesTree::Iterator it
3100 		= cache->pages.GetIterator(firstPage, true, true);
3101 
3102 	while (true) {
3103 		vm_page* page = it.Next();
3104 		if (page == NULL || page->cache_offset >= endPage) {
3105 			if (transferEmpty)
3106 				break;
3107 
3108 			page = NULL;
3109 		}
3110 
3111 		if (page != NULL) {
3112 			if (page->busy
3113 				|| (page->State() != PAGE_STATE_MODIFIED
3114 					&& !vm_test_map_modification(page))) {
3115 				page = NULL;
3116 			}
3117 		}
3118 
3119 		PageWriteWrapper* wrapper = NULL;
3120 		if (page != NULL) {
3121 			wrapper = &wrapperPool[nextWrapper++];
3122 			if (nextWrapper > maxPages)
3123 				nextWrapper = 0;
3124 
3125 			DEBUG_PAGE_ACCESS_START(page);
3126 
3127 			wrapper->SetTo(page);
3128 
3129 			if (transferEmpty || transfer.AddPage(page)) {
3130 				if (transferEmpty) {
3131 					transfer.SetTo(NULL, page, maxPages);
3132 					transferEmpty = false;
3133 				}
3134 
3135 				DEBUG_PAGE_ACCESS_END(page);
3136 
3137 				wrappers[usedWrappers++] = wrapper;
3138 				continue;
3139 			}
3140 
3141 			DEBUG_PAGE_ACCESS_END(page);
3142 		}
3143 
3144 		if (transferEmpty)
3145 			continue;
3146 
3147 		cache->Unlock();
3148 		status_t status = transfer.Schedule(0);
3149 		cache->Lock();
3150 
3151 		for (int32 i = 0; i < usedWrappers; i++)
3152 			wrappers[i]->Done(status);
3153 
3154 		usedWrappers = 0;
3155 
3156 		if (page != NULL) {
3157 			transfer.SetTo(NULL, page, maxPages);
3158 			wrappers[usedWrappers++] = wrapper;
3159 		} else
3160 			transferEmpty = true;
3161 	}
3162 
3163 	if (wrapperPool != stackWrappersPool) {
3164 		delete[] wrapperPool;
3165 		delete[] wrappers;
3166 	}
3167 
3168 	return B_OK;
3169 }
3170 
3171 
3172 /*!	You need to hold the VMCache lock when calling this function.
3173 	Note that the cache lock is released in this function.
3174 */
3175 status_t
3176 vm_page_write_modified_pages(VMCache *cache)
3177 {
3178 	return vm_page_write_modified_page_range(cache, 0,
3179 		(cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT);
3180 }
3181 
3182 
3183 /*!	Schedules the page writer to write back the specified \a page.
3184 	Note, however, that it might not do this immediately, and it can well
3185 	take several seconds until the page is actually written out.
3186 */
3187 void
3188 vm_page_schedule_write_page(vm_page *page)
3189 {
3190 	PAGE_ASSERT(page, page->State() == PAGE_STATE_MODIFIED);
3191 
3192 	vm_page_requeue(page, false);
3193 
3194 	sPageWriterCondition.WakeUp();
3195 }
3196 
3197 
3198 /*!	Cache must be locked.
3199 */
3200 void
3201 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage,
3202 	uint32 endPage)
3203 {
3204 	uint32 modified = 0;
3205 	for (VMCachePagesTree::Iterator it
3206 				= cache->pages.GetIterator(firstPage, true, true);
3207 			vm_page *page = it.Next();) {
3208 		if (page->cache_offset >= endPage)
3209 			break;
3210 
3211 		if (!page->busy && page->State() == PAGE_STATE_MODIFIED) {
3212 			DEBUG_PAGE_ACCESS_START(page);
3213 			vm_page_requeue(page, false);
3214 			modified++;
3215 			DEBUG_PAGE_ACCESS_END(page);
3216 		}
3217 	}
3218 
3219 	if (modified > 0)
3220 		sPageWriterCondition.WakeUp();
3221 }
3222 
3223 
3224 void
3225 vm_page_init_num_pages(kernel_args *args)
3226 {
3227 	// calculate the size of memory by looking at the physical_memory_range array
3228 	sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE;
3229 	page_num_t physicalPagesEnd = sPhysicalPageOffset
3230 		+ args->physical_memory_range[0].size / B_PAGE_SIZE;
3231 
3232 	sNonExistingPages = 0;
3233 	sIgnoredPages = args->ignored_physical_memory / B_PAGE_SIZE;
3234 
3235 	for (uint32 i = 1; i < args->num_physical_memory_ranges; i++) {
3236 		page_num_t start = args->physical_memory_range[i].start / B_PAGE_SIZE;
3237 		if (start > physicalPagesEnd)
3238 			sNonExistingPages += start - physicalPagesEnd;
3239 		physicalPagesEnd = start
3240 			+ args->physical_memory_range[i].size / B_PAGE_SIZE;
3241 
3242 #ifdef LIMIT_AVAILABLE_MEMORY
3243 		page_num_t available
3244 			= physicalPagesEnd - sPhysicalPageOffset - sNonExistingPages;
3245 		if (available > LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE)) {
3246 			physicalPagesEnd = sPhysicalPageOffset + sNonExistingPages
3247 				+ LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE);
3248 			break;
3249 		}
3250 #endif
3251 	}
3252 
3253 	TRACE(("first phys page = %#" B_PRIxPHYSADDR ", end %#" B_PRIxPHYSADDR "\n",
3254 		sPhysicalPageOffset, physicalPagesEnd));
3255 
3256 	sNumPages = physicalPagesEnd - sPhysicalPageOffset;
3257 }
3258 
3259 
3260 status_t
3261 vm_page_init(kernel_args *args)
3262 {
3263 	TRACE(("vm_page_init: entry\n"));
3264 
3265 	// init page queues
3266 	sModifiedPageQueue.Init("modified pages queue");
3267 	sInactivePageQueue.Init("inactive pages queue");
3268 	sActivePageQueue.Init("active pages queue");
3269 	sCachedPageQueue.Init("cached pages queue");
3270 	sFreePageQueue.Init("free pages queue");
3271 	sClearPageQueue.Init("clear pages queue");
3272 
3273 	new (&sPageReservationWaiters) PageReservationWaiterList;
3274 
3275 	// map in the new free page table
3276 	sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page),
3277 		~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3278 
3279 	TRACE(("vm_init: putting free_page_table @ %p, # ents %" B_PRIuPHYSADDR
3280 		" (size %#" B_PRIxPHYSADDR ")\n", sPages, sNumPages,
3281 		(phys_addr_t)(sNumPages * sizeof(vm_page))));
3282 
3283 	// initialize the free page table
3284 	for (uint32 i = 0; i < sNumPages; i++) {
3285 		sPages[i].Init(sPhysicalPageOffset + i);
3286 		sFreePageQueue.Append(&sPages[i]);
3287 
3288 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3289 		sPages[i].allocation_tracking_info.Clear();
3290 #endif
3291 	}
3292 
3293 	sUnreservedFreePages = sNumPages;
3294 
3295 	TRACE(("initialized table\n"));
3296 
3297 	// mark the ranges between usable physical memory unused
3298 	phys_addr_t previousEnd = 0;
3299 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3300 		phys_addr_t base = args->physical_memory_range[i].start;
3301 		phys_size_t size = args->physical_memory_range[i].size;
3302 		if (base > previousEnd) {
3303 			mark_page_range_in_use(previousEnd / B_PAGE_SIZE,
3304 				(base - previousEnd) / B_PAGE_SIZE, false);
3305 		}
3306 		previousEnd = base + size;
3307 	}
3308 
3309 	// mark the allocated physical page ranges wired
3310 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3311 		mark_page_range_in_use(
3312 			args->physical_allocated_range[i].start / B_PAGE_SIZE,
3313 			args->physical_allocated_range[i].size / B_PAGE_SIZE, true);
3314 	}
3315 
3316 	// The target of actually free pages. This must be at least the system
3317 	// reserve, but should be a few more pages, so we don't have to extract
3318 	// a cached page with each allocation.
3319 	sFreePagesTarget = VM_PAGE_RESERVE_USER
3320 		+ std::max((page_num_t)32, (sNumPages - sNonExistingPages) / 1024);
3321 
3322 	// The target of free + cached and inactive pages. On low-memory machines
3323 	// keep things tight. free + cached is the pool of immediately allocatable
3324 	// pages. We want a few inactive pages, so when we're actually paging, we
3325 	// have a reasonably large set of pages to work with.
3326 	if (sUnreservedFreePages < 16 * 1024) {
3327 		sFreeOrCachedPagesTarget = sFreePagesTarget + 128;
3328 		sInactivePagesTarget = sFreePagesTarget / 3;
3329 	} else {
3330 		sFreeOrCachedPagesTarget = 2 * sFreePagesTarget;
3331 		sInactivePagesTarget = sFreePagesTarget / 2;
3332 	}
3333 
3334 	TRACE(("vm_page_init: exit\n"));
3335 
3336 	return B_OK;
3337 }
3338 
3339 
3340 status_t
3341 vm_page_init_post_area(kernel_args *args)
3342 {
3343 	void *dummy;
3344 
3345 	dummy = sPages;
3346 	create_area("page structures", &dummy, B_EXACT_ADDRESS,
3347 		PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED,
3348 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3349 
3350 	add_debugger_command("page_stats", &dump_page_stats,
3351 		"Dump statistics about page usage");
3352 	add_debugger_command_etc("page", &dump_page,
3353 		"Dump page info",
3354 		"[ \"-p\" | \"-v\" ] [ \"-m\" ] <address>\n"
3355 		"Prints information for the physical page. If neither \"-p\" nor\n"
3356 		"\"-v\" are given, the provided address is interpreted as address of\n"
3357 		"the vm_page data structure for the page in question. If \"-p\" is\n"
3358 		"given, the address is the physical address of the page. If \"-v\" is\n"
3359 		"given, the address is interpreted as virtual address in the current\n"
3360 		"thread's address space and for the page it is mapped to (if any)\n"
3361 		"information are printed. If \"-m\" is specified, the command will\n"
3362 		"search all known address spaces for mappings to that page and print\n"
3363 		"them.\n", 0);
3364 	add_debugger_command("page_queue", &dump_page_queue, "Dump page queue");
3365 	add_debugger_command("find_page", &find_page,
3366 		"Find out which queue a page is actually in");
3367 
3368 #ifdef TRACK_PAGE_USAGE_STATS
3369 	add_debugger_command_etc("page_usage", &dump_page_usage_stats,
3370 		"Dumps statistics about page usage counts",
3371 		"\n"
3372 		"Dumps statistics about page usage counts.\n",
3373 		B_KDEBUG_DONT_PARSE_ARGUMENTS);
3374 #endif
3375 
3376 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3377 	add_debugger_command_etc("page_allocations_per_caller",
3378 		&dump_page_allocations_per_caller,
3379 		"Dump current page allocations summed up per caller",
3380 		"[ -d <caller> ] [ -r ]\n"
3381 		"The current allocations will by summed up by caller (their count)\n"
3382 		"printed in decreasing order by count.\n"
3383 		"If \"-d\" is given, each allocation for caller <caller> is printed\n"
3384 		"including the respective stack trace.\n"
3385 		"If \"-r\" is given, the allocation infos are reset after gathering\n"
3386 		"the information, so the next command invocation will only show the\n"
3387 		"allocations made after the reset.\n", 0);
3388 	add_debugger_command_etc("page_allocation_infos",
3389 		&dump_page_allocation_infos,
3390 		"Dump current page allocations",
3391 		"[ --stacktrace ] [ -p <page number> ] [ --team <team ID> ] "
3392 		"[ --thread <thread ID> ]\n"
3393 		"The current allocations filtered by optional values will be printed.\n"
3394 		"The optional \"-p\" page number filters for a specific page,\n"
3395 		"with \"--team\" and \"--thread\" allocations by specific teams\n"
3396 		"and/or threads can be filtered (these only work if a corresponding\n"
3397 		"tracing entry is still available).\n"
3398 		"If \"--stacktrace\" is given, then stack traces of the allocation\n"
3399 		"callers are printed, where available\n", 0);
3400 #endif
3401 
3402 	return B_OK;
3403 }
3404 
3405 
3406 status_t
3407 vm_page_init_post_thread(kernel_args *args)
3408 {
3409 	new (&sFreePageCondition) ConditionVariable;
3410 
3411 	// create a kernel thread to clear out pages
3412 
3413 	thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber",
3414 		B_LOWEST_ACTIVE_PRIORITY, NULL);
3415 	resume_thread(thread);
3416 
3417 	// start page writer
3418 
3419 	sPageWriterCondition.Init("page writer");
3420 
3421 	thread = spawn_kernel_thread(&page_writer, "page writer",
3422 		B_NORMAL_PRIORITY + 1, NULL);
3423 	resume_thread(thread);
3424 
3425 	// start page daemon
3426 
3427 	sPageDaemonCondition.Init("page daemon");
3428 
3429 	thread = spawn_kernel_thread(&page_daemon, "page daemon",
3430 		B_NORMAL_PRIORITY, NULL);
3431 	resume_thread(thread);
3432 
3433 	return B_OK;
3434 }
3435 
3436 
3437 status_t
3438 vm_mark_page_inuse(page_num_t page)
3439 {
3440 	return vm_mark_page_range_inuse(page, 1);
3441 }
3442 
3443 
3444 status_t
3445 vm_mark_page_range_inuse(page_num_t startPage, page_num_t length)
3446 {
3447 	return mark_page_range_in_use(startPage, length, false);
3448 }
3449 
3450 
3451 /*!	Unreserve pages previously reserved with vm_page_reserve_pages().
3452 */
3453 void
3454 vm_page_unreserve_pages(vm_page_reservation* reservation)
3455 {
3456 	uint32 count = reservation->count;
3457 	reservation->count = 0;
3458 
3459 	if (count == 0)
3460 		return;
3461 
3462 	TA(UnreservePages(count));
3463 
3464 	unreserve_pages(count);
3465 }
3466 
3467 
3468 /*!	With this call, you can reserve a number of free pages in the system.
3469 	They will only be handed out to someone who has actually reserved them.
3470 	This call returns as soon as the number of requested pages has been
3471 	reached.
3472 	The caller must not hold any cache lock or the function might deadlock.
3473 */
3474 void
3475 vm_page_reserve_pages(vm_page_reservation* reservation, uint32 count,
3476 	int priority)
3477 {
3478 	reservation->count = count;
3479 
3480 	if (count == 0)
3481 		return;
3482 
3483 	TA(ReservePages(count));
3484 
3485 	reserve_pages(count, priority, false);
3486 }
3487 
3488 
3489 bool
3490 vm_page_try_reserve_pages(vm_page_reservation* reservation, uint32 count,
3491 	int priority)
3492 {
3493 	if (count == 0) {
3494 		reservation->count = count;
3495 		return true;
3496 	}
3497 
3498 	uint32 remaining = reserve_pages(count, priority, true);
3499 	if (remaining == 0) {
3500 		TA(ReservePages(count));
3501 		reservation->count = count;
3502 		return true;
3503 	}
3504 
3505 	unreserve_pages(count - remaining);
3506 
3507 	return false;
3508 }
3509 
3510 
3511 vm_page *
3512 vm_page_allocate_page(vm_page_reservation* reservation, uint32 flags)
3513 {
3514 	uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3515 	ASSERT(pageState != PAGE_STATE_FREE);
3516 	ASSERT(pageState != PAGE_STATE_CLEAR);
3517 
3518 	ASSERT(reservation->count > 0);
3519 	reservation->count--;
3520 
3521 	VMPageQueue* queue;
3522 	VMPageQueue* otherQueue;
3523 
3524 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3525 		queue = &sClearPageQueue;
3526 		otherQueue = &sFreePageQueue;
3527 	} else {
3528 		queue = &sFreePageQueue;
3529 		otherQueue = &sClearPageQueue;
3530 	}
3531 
3532 	ReadLocker locker(sFreePageQueuesLock);
3533 
3534 	vm_page* page = queue->RemoveHeadUnlocked();
3535 	if (page == NULL) {
3536 		// if the primary queue was empty, grab the page from the
3537 		// secondary queue
3538 		page = otherQueue->RemoveHeadUnlocked();
3539 
3540 		if (page == NULL) {
3541 			// Unlikely, but possible: the page we have reserved has moved
3542 			// between the queues after we checked the first queue. Grab the
3543 			// write locker to make sure this doesn't happen again.
3544 			locker.Unlock();
3545 			WriteLocker writeLocker(sFreePageQueuesLock);
3546 
3547 			page = queue->RemoveHead();
3548 			if (page == NULL)
3549 				otherQueue->RemoveHead();
3550 
3551 			if (page == NULL) {
3552 				panic("Had reserved page, but there is none!");
3553 				return NULL;
3554 			}
3555 
3556 			// downgrade to read lock
3557 			locker.Lock();
3558 		}
3559 	}
3560 
3561 	if (page->CacheRef() != NULL)
3562 		panic("supposed to be free page %p has cache\n", page);
3563 
3564 	DEBUG_PAGE_ACCESS_START(page);
3565 
3566 	int oldPageState = page->State();
3567 	page->SetState(pageState);
3568 	page->busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3569 	page->usage_count = 0;
3570 	page->accessed = false;
3571 	page->modified = false;
3572 
3573 	locker.Unlock();
3574 
3575 	if (pageState < PAGE_STATE_FIRST_UNQUEUED)
3576 		sPageQueues[pageState].AppendUnlocked(page);
3577 
3578 	// clear the page, if we had to take it from the free queue and a clear
3579 	// page was requested
3580 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0 && oldPageState != PAGE_STATE_CLEAR)
3581 		clear_page(page);
3582 
3583 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3584 	page->allocation_tracking_info.Init(
3585 		TA(AllocatePage(page->physical_page_number)));
3586 #else
3587 	TA(AllocatePage(page->physical_page_number));
3588 #endif
3589 
3590 	return page;
3591 }
3592 
3593 
3594 static void
3595 allocate_page_run_cleanup(VMPageQueue::PageList& freePages,
3596 	VMPageQueue::PageList& clearPages)
3597 {
3598 	while (vm_page* page = freePages.RemoveHead()) {
3599 		page->busy = false;
3600 		page->SetState(PAGE_STATE_FREE);
3601 		DEBUG_PAGE_ACCESS_END(page);
3602 		sFreePageQueue.PrependUnlocked(page);
3603 	}
3604 
3605 	while (vm_page* page = clearPages.RemoveHead()) {
3606 		page->busy = false;
3607 		page->SetState(PAGE_STATE_CLEAR);
3608 		DEBUG_PAGE_ACCESS_END(page);
3609 		sClearPageQueue.PrependUnlocked(page);
3610 	}
3611 
3612 	sFreePageCondition.NotifyAll();
3613 }
3614 
3615 
3616 /*!	Tries to allocate the a contiguous run of \a length pages starting at
3617 	index \a start.
3618 
3619 	The caller must have write-locked the free/clear page queues. The function
3620 	will unlock regardless of whether it succeeds or fails.
3621 
3622 	If the function fails, it cleans up after itself, i.e. it will free all
3623 	pages it managed to allocate.
3624 
3625 	\param start The start index (into \c sPages) of the run.
3626 	\param length The number of pages to allocate.
3627 	\param flags Page allocation flags. Encodes the state the function shall
3628 		set the allocated pages to, whether the pages shall be marked busy
3629 		(VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared
3630 		(VM_PAGE_ALLOC_CLEAR).
3631 	\param freeClearQueueLocker Locked WriteLocker for the free/clear page
3632 		queues in locked state. Will be unlocked by the function.
3633 	\return The index of the first page that could not be allocated. \a length
3634 		is returned when the function was successful.
3635 */
3636 static page_num_t
3637 allocate_page_run(page_num_t start, page_num_t length, uint32 flags,
3638 	WriteLocker& freeClearQueueLocker)
3639 {
3640 	uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3641 	ASSERT(pageState != PAGE_STATE_FREE);
3642 	ASSERT(pageState != PAGE_STATE_CLEAR);
3643 	ASSERT(start + length <= sNumPages);
3644 
3645 	// Pull the free/clear pages out of their respective queues. Cached pages
3646 	// are allocated later.
3647 	page_num_t cachedPages = 0;
3648 	VMPageQueue::PageList freePages;
3649 	VMPageQueue::PageList clearPages;
3650 	page_num_t i = 0;
3651 	for (; i < length; i++) {
3652 		bool pageAllocated = true;
3653 		bool noPage = false;
3654 		vm_page& page = sPages[start + i];
3655 		switch (page.State()) {
3656 			case PAGE_STATE_CLEAR:
3657 				DEBUG_PAGE_ACCESS_START(&page);
3658 				sClearPageQueue.Remove(&page);
3659 				clearPages.Add(&page);
3660 				break;
3661 			case PAGE_STATE_FREE:
3662 				DEBUG_PAGE_ACCESS_START(&page);
3663 				sFreePageQueue.Remove(&page);
3664 				freePages.Add(&page);
3665 				break;
3666 			case PAGE_STATE_CACHED:
3667 				// We allocate cached pages later.
3668 				cachedPages++;
3669 				pageAllocated = false;
3670 				break;
3671 
3672 			default:
3673 				// Probably a page was cached when our caller checked. Now it's
3674 				// gone and we have to abort.
3675 				noPage = true;
3676 				break;
3677 		}
3678 
3679 		if (noPage)
3680 			break;
3681 
3682 		if (pageAllocated) {
3683 			page.SetState(flags & VM_PAGE_ALLOC_STATE);
3684 			page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3685 			page.usage_count = 0;
3686 			page.accessed = false;
3687 			page.modified = false;
3688 		}
3689 	}
3690 
3691 	if (i < length) {
3692 		// failed to allocate a page -- free all that we've got
3693 		allocate_page_run_cleanup(freePages, clearPages);
3694 		return i;
3695 	}
3696 
3697 	freeClearQueueLocker.Unlock();
3698 
3699 	if (cachedPages > 0) {
3700 		// allocate the pages that weren't free but cached
3701 		page_num_t freedCachedPages = 0;
3702 		page_num_t nextIndex = start;
3703 		vm_page* freePage = freePages.Head();
3704 		vm_page* clearPage = clearPages.Head();
3705 		while (cachedPages > 0) {
3706 			// skip, if we've already got the page
3707 			if (freePage != NULL && size_t(freePage - sPages) == nextIndex) {
3708 				freePage = freePages.GetNext(freePage);
3709 				nextIndex++;
3710 				continue;
3711 			}
3712 			if (clearPage != NULL && size_t(clearPage - sPages) == nextIndex) {
3713 				clearPage = clearPages.GetNext(clearPage);
3714 				nextIndex++;
3715 				continue;
3716 			}
3717 
3718 			// free the page, if it is still cached
3719 			vm_page& page = sPages[nextIndex];
3720 			if (!free_cached_page(&page, false)) {
3721 				// TODO: if the page turns out to have been freed already,
3722 				// there would be no need to fail
3723 				break;
3724 			}
3725 
3726 			page.SetState(flags & VM_PAGE_ALLOC_STATE);
3727 			page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3728 			page.usage_count = 0;
3729 			page.accessed = false;
3730 			page.modified = false;
3731 
3732 			freePages.InsertBefore(freePage, &page);
3733 			freedCachedPages++;
3734 			cachedPages--;
3735 			nextIndex++;
3736 		}
3737 
3738 		// If we have freed cached pages, we need to balance things.
3739 		if (freedCachedPages > 0)
3740 			unreserve_pages(freedCachedPages);
3741 
3742 		if (nextIndex - start < length) {
3743 			// failed to allocate all cached pages -- free all that we've got
3744 			freeClearQueueLocker.Lock();
3745 			allocate_page_run_cleanup(freePages, clearPages);
3746 			freeClearQueueLocker.Unlock();
3747 
3748 			return nextIndex - start;
3749 		}
3750 	}
3751 
3752 	// clear pages, if requested
3753 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3754 		for (VMPageQueue::PageList::Iterator it = freePages.GetIterator();
3755 				vm_page* page = it.Next();) {
3756 			clear_page(page);
3757 		}
3758 	}
3759 
3760 	// add pages to target queue
3761 	if (pageState < PAGE_STATE_FIRST_UNQUEUED) {
3762 		freePages.MoveFrom(&clearPages);
3763 		sPageQueues[pageState].AppendUnlocked(freePages, length);
3764 	}
3765 
3766 	// Note: We don't unreserve the pages since we pulled them out of the
3767 	// free/clear queues without adjusting sUnreservedFreePages.
3768 
3769 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3770 	AbstractTraceEntryWithStackTrace* traceEntry
3771 		= TA(AllocatePageRun(start, length));
3772 
3773 	for (page_num_t i = start; i < start + length; i++)
3774 		sPages[i].allocation_tracking_info.Init(traceEntry);
3775 #else
3776 	TA(AllocatePageRun(start, length));
3777 #endif
3778 
3779 	return length;
3780 }
3781 
3782 
3783 /*! Allocate a physically contiguous range of pages.
3784 
3785 	\param flags Page allocation flags. Encodes the state the function shall
3786 		set the allocated pages to, whether the pages shall be marked busy
3787 		(VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared
3788 		(VM_PAGE_ALLOC_CLEAR).
3789 	\param length The number of contiguous pages to allocate.
3790 	\param restrictions Restrictions to the physical addresses of the page run
3791 		to allocate, including \c low_address, the first acceptable physical
3792 		address where the page run may start, \c high_address, the last
3793 		acceptable physical address where the page run may end (i.e. it must
3794 		hold \code runStartAddress + length <= high_address \endcode),
3795 		\c alignment, the alignment of the page run start address, and
3796 		\c boundary, multiples of which the page run must not cross.
3797 		Values set to \c 0 are ignored.
3798 	\param priority The page reservation priority (as passed to
3799 		vm_page_reserve_pages()).
3800 	\return The first page of the allocated page run on success; \c NULL
3801 		when the allocation failed.
3802 */
3803 vm_page*
3804 vm_page_allocate_page_run(uint32 flags, page_num_t length,
3805 	const physical_address_restrictions* restrictions, int priority)
3806 {
3807 	// compute start and end page index
3808 	page_num_t requestedStart
3809 		= std::max(restrictions->low_address / B_PAGE_SIZE, sPhysicalPageOffset)
3810 			- sPhysicalPageOffset;
3811 	page_num_t start = requestedStart;
3812 	page_num_t end;
3813 	if (restrictions->high_address > 0) {
3814 		end = std::max(restrictions->high_address / B_PAGE_SIZE,
3815 				sPhysicalPageOffset)
3816 			- sPhysicalPageOffset;
3817 		end = std::min(end, sNumPages);
3818 	} else
3819 		end = sNumPages;
3820 
3821 	// compute alignment mask
3822 	page_num_t alignmentMask
3823 		= std::max(restrictions->alignment / B_PAGE_SIZE, (phys_addr_t)1) - 1;
3824 	ASSERT(((alignmentMask + 1) & alignmentMask) == 0);
3825 		// alignment must be a power of 2
3826 
3827 	// compute the boundary mask
3828 	uint32 boundaryMask = 0;
3829 	if (restrictions->boundary != 0) {
3830 		page_num_t boundary = restrictions->boundary / B_PAGE_SIZE;
3831 		// boundary must be a power of two and not less than alignment and
3832 		// length
3833 		ASSERT(((boundary - 1) & boundary) == 0);
3834 		ASSERT(boundary >= alignmentMask + 1);
3835 		ASSERT(boundary >= length);
3836 
3837 		boundaryMask = -boundary;
3838 	}
3839 
3840 	vm_page_reservation reservation;
3841 	vm_page_reserve_pages(&reservation, length, priority);
3842 
3843 	WriteLocker freeClearQueueLocker(sFreePageQueuesLock);
3844 
3845 	// First we try to get a run with free pages only. If that fails, we also
3846 	// consider cached pages. If there are only few free pages and many cached
3847 	// ones, the odds are that we won't find enough contiguous ones, so we skip
3848 	// the first iteration in this case.
3849 	int32 freePages = sUnreservedFreePages;
3850 	int useCached = freePages > 0 && (page_num_t)freePages > 2 * length ? 0 : 1;
3851 
3852 	for (;;) {
3853 		if (alignmentMask != 0 || boundaryMask != 0) {
3854 			page_num_t offsetStart = start + sPhysicalPageOffset;
3855 
3856 			// enforce alignment
3857 			if ((offsetStart & alignmentMask) != 0)
3858 				offsetStart = (offsetStart + alignmentMask) & ~alignmentMask;
3859 
3860 			// enforce boundary
3861 			if (boundaryMask != 0 && ((offsetStart ^ (offsetStart
3862 				+ length - 1)) & boundaryMask) != 0) {
3863 				offsetStart = (offsetStart + length - 1) & boundaryMask;
3864 			}
3865 
3866 			start = offsetStart - sPhysicalPageOffset;
3867 		}
3868 
3869 		if (start + length > end) {
3870 			if (useCached == 0) {
3871 				// The first iteration with free pages only was unsuccessful.
3872 				// Try again also considering cached pages.
3873 				useCached = 1;
3874 				start = requestedStart;
3875 				continue;
3876 			}
3877 
3878 			dprintf("vm_page_allocate_page_run(): Failed to allocate run of "
3879 				"length %" B_PRIuPHYSADDR " (%" B_PRIuPHYSADDR " %"
3880 				B_PRIuPHYSADDR ") in second iteration (align: %" B_PRIuPHYSADDR
3881 				" boundary: %" B_PRIuPHYSADDR ")!\n", length, requestedStart,
3882 				end, restrictions->alignment, restrictions->boundary);
3883 
3884 			freeClearQueueLocker.Unlock();
3885 			vm_page_unreserve_pages(&reservation);
3886 			return NULL;
3887 		}
3888 
3889 		bool foundRun = true;
3890 		page_num_t i;
3891 		for (i = 0; i < length; i++) {
3892 			uint32 pageState = sPages[start + i].State();
3893 			if (pageState != PAGE_STATE_FREE
3894 				&& pageState != PAGE_STATE_CLEAR
3895 				&& (pageState != PAGE_STATE_CACHED || useCached == 0)) {
3896 				foundRun = false;
3897 				break;
3898 			}
3899 		}
3900 
3901 		if (foundRun) {
3902 			i = allocate_page_run(start, length, flags, freeClearQueueLocker);
3903 			if (i == length)
3904 				return &sPages[start];
3905 
3906 			// apparently a cached page couldn't be allocated -- skip it and
3907 			// continue
3908 			freeClearQueueLocker.Lock();
3909 		}
3910 
3911 		start += i + 1;
3912 	}
3913 }
3914 
3915 
3916 vm_page *
3917 vm_page_at_index(int32 index)
3918 {
3919 	return &sPages[index];
3920 }
3921 
3922 
3923 vm_page *
3924 vm_lookup_page(page_num_t pageNumber)
3925 {
3926 	if (pageNumber < sPhysicalPageOffset)
3927 		return NULL;
3928 
3929 	pageNumber -= sPhysicalPageOffset;
3930 	if (pageNumber >= sNumPages)
3931 		return NULL;
3932 
3933 	return &sPages[pageNumber];
3934 }
3935 
3936 
3937 bool
3938 vm_page_is_dummy(struct vm_page *page)
3939 {
3940 	return page < sPages || page >= sPages + sNumPages;
3941 }
3942 
3943 
3944 /*!	Free the page that belonged to a certain cache.
3945 	You can use vm_page_set_state() manually if you prefer, but only
3946 	if the page does not equal PAGE_STATE_MODIFIED.
3947 
3948 	\param cache The cache the page was previously owned by or NULL. The page
3949 		must have been removed from its cache before calling this method in
3950 		either case.
3951 	\param page The page to free.
3952 	\param reservation If not NULL, the page count of the reservation will be
3953 		incremented, thus allowing to allocate another page for the freed one at
3954 		a later time.
3955 */
3956 void
3957 vm_page_free_etc(VMCache* cache, vm_page* page,
3958 	vm_page_reservation* reservation)
3959 {
3960 	PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
3961 		&& page->State() != PAGE_STATE_CLEAR);
3962 
3963 	if (page->State() == PAGE_STATE_MODIFIED && cache->temporary)
3964 		atomic_add(&sModifiedTemporaryPages, -1);
3965 
3966 	free_page(page, false);
3967 	if (reservation == NULL)
3968 		unreserve_pages(1);
3969 }
3970 
3971 
3972 void
3973 vm_page_set_state(vm_page *page, int pageState)
3974 {
3975 	PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
3976 		&& page->State() != PAGE_STATE_CLEAR);
3977 
3978 	if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) {
3979 		free_page(page, pageState == PAGE_STATE_CLEAR);
3980 		unreserve_pages(1);
3981 	} else
3982 		set_page_state(page, pageState);
3983 }
3984 
3985 
3986 /*!	Moves a page to either the tail of the head of its current queue,
3987 	depending on \a tail.
3988 	The page must have a cache and the cache must be locked!
3989 */
3990 void
3991 vm_page_requeue(struct vm_page *page, bool tail)
3992 {
3993 	PAGE_ASSERT(page, page->Cache() != NULL);
3994 	page->Cache()->AssertLocked();
3995 	// DEBUG_PAGE_ACCESS_CHECK(page);
3996 		// TODO: This assertion cannot be satisfied by idle_scan_active_pages()
3997 		// when it requeues busy pages. The reason is that vm_soft_fault()
3998 		// (respectively fault_get_page()) and the file cache keep newly
3999 		// allocated pages accessed while they are reading them from disk. It
4000 		// would probably be better to change that code and reenable this
4001 		// check.
4002 
4003 	VMPageQueue *queue = NULL;
4004 
4005 	switch (page->State()) {
4006 		case PAGE_STATE_ACTIVE:
4007 			queue = &sActivePageQueue;
4008 			break;
4009 		case PAGE_STATE_INACTIVE:
4010 			queue = &sInactivePageQueue;
4011 			break;
4012 		case PAGE_STATE_MODIFIED:
4013 			queue = &sModifiedPageQueue;
4014 			break;
4015 		case PAGE_STATE_CACHED:
4016 			queue = &sCachedPageQueue;
4017 			break;
4018 		case PAGE_STATE_FREE:
4019 		case PAGE_STATE_CLEAR:
4020 			panic("vm_page_requeue() called for free/clear page %p", page);
4021 			return;
4022 		case PAGE_STATE_WIRED:
4023 		case PAGE_STATE_UNUSED:
4024 			return;
4025 		default:
4026 			panic("vm_page_touch: vm_page %p in invalid state %d\n",
4027 				page, page->State());
4028 			break;
4029 	}
4030 
4031 	queue->RequeueUnlocked(page, tail);
4032 }
4033 
4034 
4035 page_num_t
4036 vm_page_num_pages(void)
4037 {
4038 	return sNumPages - sNonExistingPages;
4039 }
4040 
4041 
4042 /*! There is a subtle distinction between the page counts returned by
4043 	this function and vm_page_num_free_pages():
4044 	The latter returns the number of pages that are completely uncommitted,
4045 	whereas this one returns the number of pages that are available for
4046 	use by being reclaimed as well (IOW it factors in things like cache pages
4047 	as available).
4048 */
4049 page_num_t
4050 vm_page_num_available_pages(void)
4051 {
4052 	return vm_available_memory() / B_PAGE_SIZE;
4053 }
4054 
4055 
4056 page_num_t
4057 vm_page_num_free_pages(void)
4058 {
4059 	int32 count = sUnreservedFreePages + sCachedPageQueue.Count();
4060 	return count > 0 ? count : 0;
4061 }
4062 
4063 
4064 page_num_t
4065 vm_page_num_unused_pages(void)
4066 {
4067 	int32 count = sUnreservedFreePages;
4068 	return count > 0 ? count : 0;
4069 }
4070 
4071 
4072 void
4073 vm_page_get_stats(system_info *info)
4074 {
4075 	// Note: there's no locking protecting any of the queues or counters here,
4076 	// so we run the risk of getting bogus values when evaluating them
4077 	// throughout this function. As these stats are for informational purposes
4078 	// only, it is not really worth introducing such locking. Therefore we just
4079 	// ensure that we don't under- or overflow any of the values.
4080 
4081 	// The pages used for the block cache buffers. Those should not be counted
4082 	// as used but as cached pages.
4083 	// TODO: We should subtract the blocks that are in use ATM, since those
4084 	// can't really be freed in a low memory situation.
4085 	page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE;
4086 	info->block_cache_pages = blockCachePages;
4087 
4088 	// Non-temporary modified pages are special as they represent pages that
4089 	// can be written back, so they could be freed if necessary, for us
4090 	// basically making them into cached pages with a higher overhead. The
4091 	// modified queue count is therefore split into temporary and non-temporary
4092 	// counts that are then added to the corresponding number.
4093 	page_num_t modifiedNonTemporaryPages
4094 		= (sModifiedPageQueue.Count() - sModifiedTemporaryPages);
4095 
4096 	info->max_pages = vm_page_num_pages();
4097 	info->cached_pages = sCachedPageQueue.Count() + modifiedNonTemporaryPages
4098 		+ blockCachePages;
4099 
4100 	// max_pages is composed of:
4101 	//	active + inactive + unused + wired + modified + cached + free + clear
4102 	// So taking out the cached (including modified non-temporary), free and
4103 	// clear ones leaves us with all used pages.
4104 	uint32 subtractPages = info->cached_pages + sFreePageQueue.Count()
4105 		+ sClearPageQueue.Count();
4106 	info->used_pages = subtractPages > info->max_pages
4107 		? 0 : info->max_pages - subtractPages;
4108 
4109 	if (info->used_pages + info->cached_pages > info->max_pages) {
4110 		// Something was shuffled around while we were summing up the counts.
4111 		// Make the values sane, preferring the worse case of more used pages.
4112 		info->cached_pages = info->max_pages - info->used_pages;
4113 	}
4114 
4115 	info->page_faults = vm_num_page_faults();
4116 	info->ignored_pages = sIgnoredPages;
4117 
4118 	// TODO: We don't consider pages used for page directories/tables yet.
4119 }
4120 
4121 
4122 /*!	Returns the greatest address within the last page of accessible physical
4123 	memory.
4124 	The value is inclusive, i.e. in case of a 32 bit phys_addr_t 0xffffffff
4125 	means the that the last page ends at exactly 4 GB.
4126 */
4127 phys_addr_t
4128 vm_page_max_address()
4129 {
4130 	return ((phys_addr_t)sPhysicalPageOffset + sNumPages) * B_PAGE_SIZE - 1;
4131 }
4132 
4133 
4134 RANGE_MARKER_FUNCTION_END(vm_page)
4135