xref: /haiku/src/system/kernel/vm/vm_page.cpp (revision d17092ceb18bf47a96dbaf8a1acf10e6e3070704)
1 /*
2  * Copyright 2010-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <string.h>
12 #include <stdlib.h>
13 
14 #include <algorithm>
15 
16 #include <KernelExport.h>
17 #include <OS.h>
18 
19 #include <AutoDeleter.h>
20 
21 #include <arch/cpu.h>
22 #include <arch/vm_translation_map.h>
23 #include <block_cache.h>
24 #include <boot/kernel_args.h>
25 #include <condition_variable.h>
26 #include <elf.h>
27 #include <heap.h>
28 #include <kernel.h>
29 #include <low_resource_manager.h>
30 #include <thread.h>
31 #include <tracing.h>
32 #include <util/AutoLock.h>
33 #include <vfs.h>
34 #include <vm/vm.h>
35 #include <vm/vm_priv.h>
36 #include <vm/vm_page.h>
37 #include <vm/VMAddressSpace.h>
38 #include <vm/VMArea.h>
39 #include <vm/VMCache.h>
40 
41 #include "IORequest.h"
42 #include "PageCacheLocker.h"
43 #include "VMAnonymousCache.h"
44 #include "VMPageQueue.h"
45 
46 
47 //#define TRACE_VM_PAGE
48 #ifdef TRACE_VM_PAGE
49 #	define TRACE(x) dprintf x
50 #else
51 #	define TRACE(x) ;
52 #endif
53 
54 //#define TRACE_VM_DAEMONS
55 #ifdef TRACE_VM_DAEMONS
56 #define TRACE_DAEMON(x...) dprintf(x)
57 #else
58 #define TRACE_DAEMON(x...) do {} while (false)
59 #endif
60 
61 //#define TRACK_PAGE_USAGE_STATS	1
62 
63 #define PAGE_ASSERT(page, condition)	\
64 	ASSERT_PRINT((condition), "page: %p", (page))
65 
66 #define SCRUB_SIZE 16
67 	// this many pages will be cleared at once in the page scrubber thread
68 
69 #define MAX_PAGE_WRITER_IO_PRIORITY				B_URGENT_DISPLAY_PRIORITY
70 	// maximum I/O priority of the page writer
71 #define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD	10000
72 	// the maximum I/O priority shall be reached when this many pages need to
73 	// be written
74 
75 
76 // The page reserve an allocation of the certain priority must not touch.
77 static const size_t kPageReserveForPriority[] = {
78 	VM_PAGE_RESERVE_USER,		// user
79 	VM_PAGE_RESERVE_SYSTEM,		// system
80 	0							// VIP
81 };
82 
83 // Minimum number of free pages the page daemon will try to achieve.
84 static uint32 sFreePagesTarget;
85 static uint32 sFreeOrCachedPagesTarget;
86 static uint32 sInactivePagesTarget;
87 
88 // Wait interval between page daemon runs.
89 static const bigtime_t kIdleScanWaitInterval = 1000000LL;	// 1 sec
90 static const bigtime_t kBusyScanWaitInterval = 500000LL;	// 0.5 sec
91 
92 // Number of idle runs after which we want to have processed the full active
93 // queue.
94 static const uint32 kIdleRunsForFullQueue = 20;
95 
96 // Maximum limit for the vm_page::usage_count.
97 static const int32 kPageUsageMax = 64;
98 // vm_page::usage_count buff an accessed page receives in a scan.
99 static const int32 kPageUsageAdvance = 3;
100 // vm_page::usage_count debuff an unaccessed page receives in a scan.
101 static const int32 kPageUsageDecline = 1;
102 
103 int32 gMappedPagesCount;
104 
105 static VMPageQueue sPageQueues[PAGE_STATE_COUNT];
106 
107 static VMPageQueue& sFreePageQueue = sPageQueues[PAGE_STATE_FREE];
108 static VMPageQueue& sClearPageQueue = sPageQueues[PAGE_STATE_CLEAR];
109 static VMPageQueue& sModifiedPageQueue = sPageQueues[PAGE_STATE_MODIFIED];
110 static VMPageQueue& sInactivePageQueue = sPageQueues[PAGE_STATE_INACTIVE];
111 static VMPageQueue& sActivePageQueue = sPageQueues[PAGE_STATE_ACTIVE];
112 static VMPageQueue& sCachedPageQueue = sPageQueues[PAGE_STATE_CACHED];
113 
114 static vm_page *sPages;
115 static page_num_t sPhysicalPageOffset;
116 static page_num_t sNumPages;
117 static page_num_t sNonExistingPages;
118 	// pages in the sPages array that aren't backed by physical memory
119 static uint64 sIgnoredPages;
120 	// pages of physical memory ignored by the boot loader (and thus not
121 	// available here)
122 static int32 sUnreservedFreePages;
123 static int32 sUnsatisfiedPageReservations;
124 static int32 sModifiedTemporaryPages;
125 
126 static ConditionVariable sFreePageCondition;
127 static mutex sPageDeficitLock = MUTEX_INITIALIZER("page deficit");
128 
129 // This lock must be used whenever the free or clear page queues are changed.
130 // If you need to work on both queues at the same time, you need to hold a write
131 // lock, otherwise, a read lock suffices (each queue still has a spinlock to
132 // guard against concurrent changes).
133 static rw_lock sFreePageQueuesLock
134 	= RW_LOCK_INITIALIZER("free/clear page queues");
135 
136 #ifdef TRACK_PAGE_USAGE_STATS
137 static page_num_t sPageUsageArrays[512];
138 static page_num_t* sPageUsage = sPageUsageArrays;
139 static page_num_t sPageUsagePageCount;
140 static page_num_t* sNextPageUsage = sPageUsageArrays + 256;
141 static page_num_t sNextPageUsagePageCount;
142 #endif
143 
144 
145 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
146 
147 struct caller_info {
148 	addr_t		caller;
149 	size_t		count;
150 };
151 
152 static const int32 kCallerInfoTableSize = 1024;
153 static caller_info sCallerInfoTable[kCallerInfoTableSize];
154 static int32 sCallerInfoCount = 0;
155 
156 static caller_info* get_caller_info(addr_t caller);
157 
158 
159 RANGE_MARKER_FUNCTION_PROTOTYPES(vm_page)
160 
161 static const addr_t kVMPageCodeAddressRange[] = {
162 	RANGE_MARKER_FUNCTION_ADDRESS_RANGE(vm_page)
163 };
164 
165 #endif
166 
167 
168 RANGE_MARKER_FUNCTION_BEGIN(vm_page)
169 
170 
171 struct page_stats {
172 	int32	totalFreePages;
173 	int32	unsatisfiedReservations;
174 	int32	cachedPages;
175 };
176 
177 
178 struct PageReservationWaiter
179 		: public DoublyLinkedListLinkImpl<PageReservationWaiter> {
180 	Thread*	thread;
181 	uint32	dontTouch;		// reserve not to touch
182 	uint32	missing;		// pages missing for the reservation
183 	int32	threadPriority;
184 
185 	bool operator<(const PageReservationWaiter& other) const
186 	{
187 		// Implies an order by descending VM priority (ascending dontTouch)
188 		// and (secondarily) descending thread priority.
189 		if (dontTouch != other.dontTouch)
190 			return dontTouch < other.dontTouch;
191 		return threadPriority > other.threadPriority;
192 	}
193 };
194 
195 typedef DoublyLinkedList<PageReservationWaiter> PageReservationWaiterList;
196 static PageReservationWaiterList sPageReservationWaiters;
197 
198 
199 struct DaemonCondition {
200 	void Init(const char* name)
201 	{
202 		mutex_init(&fLock, "daemon condition");
203 		fCondition.Init(this, name);
204 		fActivated = false;
205 	}
206 
207 	bool Lock()
208 	{
209 		return mutex_lock(&fLock) == B_OK;
210 	}
211 
212 	void Unlock()
213 	{
214 		mutex_unlock(&fLock);
215 	}
216 
217 	bool Wait(bigtime_t timeout, bool clearActivated)
218 	{
219 		MutexLocker locker(fLock);
220 		if (clearActivated)
221 			fActivated = false;
222 		else if (fActivated)
223 			return true;
224 
225 		ConditionVariableEntry entry;
226 		fCondition.Add(&entry);
227 
228 		locker.Unlock();
229 
230 		return entry.Wait(B_RELATIVE_TIMEOUT, timeout) == B_OK;
231 	}
232 
233 	void WakeUp()
234 	{
235 		if (fActivated)
236 			return;
237 
238 		MutexLocker locker(fLock);
239 		fActivated = true;
240 		fCondition.NotifyOne();
241 	}
242 
243 	void ClearActivated()
244 	{
245 		MutexLocker locker(fLock);
246 		fActivated = false;
247 	}
248 
249 private:
250 	mutex				fLock;
251 	ConditionVariable	fCondition;
252 	bool				fActivated;
253 };
254 
255 
256 static DaemonCondition sPageWriterCondition;
257 static DaemonCondition sPageDaemonCondition;
258 
259 
260 #if PAGE_ALLOCATION_TRACING
261 
262 namespace PageAllocationTracing {
263 
264 class ReservePages : public AbstractTraceEntry {
265 public:
266 	ReservePages(uint32 count)
267 		:
268 		fCount(count)
269 	{
270 		Initialized();
271 	}
272 
273 	virtual void AddDump(TraceOutput& out)
274 	{
275 		out.Print("page reserve:   %" B_PRIu32, fCount);
276 	}
277 
278 private:
279 	uint32		fCount;
280 };
281 
282 
283 class UnreservePages : public AbstractTraceEntry {
284 public:
285 	UnreservePages(uint32 count)
286 		:
287 		fCount(count)
288 	{
289 		Initialized();
290 	}
291 
292 	virtual void AddDump(TraceOutput& out)
293 	{
294 		out.Print("page unreserve: %" B_PRId32, fCount);
295 	}
296 
297 private:
298 	uint32		fCount;
299 };
300 
301 
302 class AllocatePage
303 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
304 public:
305 	AllocatePage(page_num_t pageNumber)
306 		:
307 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
308 		fPageNumber(pageNumber)
309 	{
310 		Initialized();
311 	}
312 
313 	virtual void AddDump(TraceOutput& out)
314 	{
315 		out.Print("page alloc: %#" B_PRIxPHYSADDR, fPageNumber);
316 	}
317 
318 private:
319 	page_num_t	fPageNumber;
320 };
321 
322 
323 class AllocatePageRun
324 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
325 public:
326 	AllocatePageRun(page_num_t startPage, uint32 length)
327 		:
328 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
329 		fStartPage(startPage),
330 		fLength(length)
331 	{
332 		Initialized();
333 	}
334 
335 	virtual void AddDump(TraceOutput& out)
336 	{
337 		out.Print("page alloc run: start %#" B_PRIxPHYSADDR " length: %"
338 			B_PRIu32, fStartPage, fLength);
339 	}
340 
341 private:
342 	page_num_t	fStartPage;
343 	uint32		fLength;
344 };
345 
346 
347 class FreePage
348 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
349 public:
350 	FreePage(page_num_t pageNumber)
351 		:
352 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
353 		fPageNumber(pageNumber)
354 	{
355 		Initialized();
356 	}
357 
358 	virtual void AddDump(TraceOutput& out)
359 	{
360 		out.Print("page free: %#" B_PRIxPHYSADDR, fPageNumber);
361 	}
362 
363 private:
364 	page_num_t	fPageNumber;
365 };
366 
367 
368 class ScrubbingPages : public AbstractTraceEntry {
369 public:
370 	ScrubbingPages(uint32 count)
371 		:
372 		fCount(count)
373 	{
374 		Initialized();
375 	}
376 
377 	virtual void AddDump(TraceOutput& out)
378 	{
379 		out.Print("page scrubbing: %" B_PRId32, fCount);
380 	}
381 
382 private:
383 	uint32		fCount;
384 };
385 
386 
387 class ScrubbedPages : public AbstractTraceEntry {
388 public:
389 	ScrubbedPages(uint32 count)
390 		:
391 		fCount(count)
392 	{
393 		Initialized();
394 	}
395 
396 	virtual void AddDump(TraceOutput& out)
397 	{
398 		out.Print("page scrubbed:  %" B_PRId32, fCount);
399 	}
400 
401 private:
402 	uint32		fCount;
403 };
404 
405 
406 class StolenPage : public AbstractTraceEntry {
407 public:
408 	StolenPage()
409 	{
410 		Initialized();
411 	}
412 
413 	virtual void AddDump(TraceOutput& out)
414 	{
415 		out.Print("page stolen");
416 	}
417 };
418 
419 }	// namespace PageAllocationTracing
420 
421 #	define TA(x)	new(std::nothrow) PageAllocationTracing::x
422 
423 #else
424 #	define TA(x)
425 #endif	// PAGE_ALLOCATION_TRACING
426 
427 
428 #if PAGE_DAEMON_TRACING
429 
430 namespace PageDaemonTracing {
431 
432 class ActivatePage : public AbstractTraceEntry {
433 	public:
434 		ActivatePage(vm_page* page)
435 			:
436 			fCache(page->cache),
437 			fPage(page)
438 		{
439 			Initialized();
440 		}
441 
442 		virtual void AddDump(TraceOutput& out)
443 		{
444 			out.Print("page activated:   %p, cache: %p", fPage, fCache);
445 		}
446 
447 	private:
448 		VMCache*	fCache;
449 		vm_page*	fPage;
450 };
451 
452 
453 class DeactivatePage : public AbstractTraceEntry {
454 	public:
455 		DeactivatePage(vm_page* page)
456 			:
457 			fCache(page->cache),
458 			fPage(page)
459 		{
460 			Initialized();
461 		}
462 
463 		virtual void AddDump(TraceOutput& out)
464 		{
465 			out.Print("page deactivated: %p, cache: %p", fPage, fCache);
466 		}
467 
468 	private:
469 		VMCache*	fCache;
470 		vm_page*	fPage;
471 };
472 
473 
474 class FreedPageSwap : public AbstractTraceEntry {
475 	public:
476 		FreedPageSwap(vm_page* page)
477 			:
478 			fCache(page->cache),
479 			fPage(page)
480 		{
481 			Initialized();
482 		}
483 
484 		virtual void AddDump(TraceOutput& out)
485 		{
486 			out.Print("page swap freed:  %p, cache: %p", fPage, fCache);
487 		}
488 
489 	private:
490 		VMCache*	fCache;
491 		vm_page*	fPage;
492 };
493 
494 }	// namespace PageDaemonTracing
495 
496 #	define TD(x)	new(std::nothrow) PageDaemonTracing::x
497 
498 #else
499 #	define TD(x)
500 #endif	// PAGE_DAEMON_TRACING
501 
502 
503 #if PAGE_WRITER_TRACING
504 
505 namespace PageWriterTracing {
506 
507 class WritePage : public AbstractTraceEntry {
508 	public:
509 		WritePage(vm_page* page)
510 			:
511 			fCache(page->Cache()),
512 			fPage(page)
513 		{
514 			Initialized();
515 		}
516 
517 		virtual void AddDump(TraceOutput& out)
518 		{
519 			out.Print("page write: %p, cache: %p", fPage, fCache);
520 		}
521 
522 	private:
523 		VMCache*	fCache;
524 		vm_page*	fPage;
525 };
526 
527 }	// namespace PageWriterTracing
528 
529 #	define TPW(x)	new(std::nothrow) PageWriterTracing::x
530 
531 #else
532 #	define TPW(x)
533 #endif	// PAGE_WRITER_TRACING
534 
535 
536 #if PAGE_STATE_TRACING
537 
538 namespace PageStateTracing {
539 
540 class SetPageState : public AbstractTraceEntry {
541 	public:
542 		SetPageState(vm_page* page, uint8 newState)
543 			:
544 			fPage(page),
545 			fOldState(page->State()),
546 			fNewState(newState),
547 			fBusy(page->busy),
548 			fWired(page->WiredCount() > 0),
549 			fMapped(!page->mappings.IsEmpty()),
550 			fAccessed(page->accessed),
551 			fModified(page->modified)
552 		{
553 #if PAGE_STATE_TRACING_STACK_TRACE
554 			fStackTrace = capture_tracing_stack_trace(
555 				PAGE_STATE_TRACING_STACK_TRACE, 0, true);
556 				// Don't capture userland stack trace to avoid potential
557 				// deadlocks.
558 #endif
559 			Initialized();
560 		}
561 
562 #if PAGE_STATE_TRACING_STACK_TRACE
563 		virtual void DumpStackTrace(TraceOutput& out)
564 		{
565 			out.PrintStackTrace(fStackTrace);
566 		}
567 #endif
568 
569 		virtual void AddDump(TraceOutput& out)
570 		{
571 			out.Print("page set state: %p (%c%c%c%c%c): %s -> %s", fPage,
572 				fBusy ? 'b' : '-',
573 				fWired ? 'w' : '-',
574 				fMapped ? 'm' : '-',
575 				fAccessed ? 'a' : '-',
576 				fModified ? 'm' : '-',
577 				page_state_to_string(fOldState),
578 				page_state_to_string(fNewState));
579 		}
580 
581 	private:
582 		vm_page*	fPage;
583 #if PAGE_STATE_TRACING_STACK_TRACE
584 		tracing_stack_trace* fStackTrace;
585 #endif
586 		uint8		fOldState;
587 		uint8		fNewState;
588 		bool		fBusy : 1;
589 		bool		fWired : 1;
590 		bool		fMapped : 1;
591 		bool		fAccessed : 1;
592 		bool		fModified : 1;
593 };
594 
595 }	// namespace PageStateTracing
596 
597 #	define TPS(x)	new(std::nothrow) PageStateTracing::x
598 
599 #else
600 #	define TPS(x)
601 #endif	// PAGE_STATE_TRACING
602 
603 
604 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
605 
606 namespace BKernel {
607 
608 class AllocationTrackingCallback {
609 public:
610 	virtual						~AllocationTrackingCallback();
611 
612 	virtual	bool				ProcessTrackingInfo(
613 									AllocationTrackingInfo* info,
614 									page_num_t pageNumber) = 0;
615 };
616 
617 }
618 
619 using BKernel::AllocationTrackingCallback;
620 
621 
622 class AllocationCollectorCallback : public AllocationTrackingCallback {
623 public:
624 	AllocationCollectorCallback(bool resetInfos)
625 		:
626 		fResetInfos(resetInfos)
627 	{
628 	}
629 
630 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
631 		page_num_t pageNumber)
632 	{
633 		if (!info->IsInitialized())
634 			return true;
635 
636 		addr_t caller = 0;
637 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
638 
639 		if (traceEntry != NULL && info->IsTraceEntryValid()) {
640 			caller = tracing_find_caller_in_stack_trace(
641 				traceEntry->StackTrace(), kVMPageCodeAddressRange, 1);
642 		}
643 
644 		caller_info* callerInfo = get_caller_info(caller);
645 		if (callerInfo == NULL) {
646 			kprintf("out of space for caller infos\n");
647 			return false;
648 		}
649 
650 		callerInfo->count++;
651 
652 		if (fResetInfos)
653 			info->Clear();
654 
655 		return true;
656 	}
657 
658 private:
659 	bool	fResetInfos;
660 };
661 
662 
663 class AllocationInfoPrinterCallback : public AllocationTrackingCallback {
664 public:
665 	AllocationInfoPrinterCallback(bool printStackTrace, page_num_t pageFilter,
666 		team_id teamFilter, thread_id threadFilter)
667 		:
668 		fPrintStackTrace(printStackTrace),
669 		fPageFilter(pageFilter),
670 		fTeamFilter(teamFilter),
671 		fThreadFilter(threadFilter)
672 	{
673 	}
674 
675 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
676 		page_num_t pageNumber)
677 	{
678 		if (!info->IsInitialized())
679 			return true;
680 
681 		if (fPageFilter != 0 && pageNumber != fPageFilter)
682 			return true;
683 
684 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
685 		if (traceEntry != NULL && !info->IsTraceEntryValid())
686 			traceEntry = NULL;
687 
688 		if (traceEntry != NULL) {
689 			if (fTeamFilter != -1 && traceEntry->TeamID() != fTeamFilter)
690 				return true;
691 			if (fThreadFilter != -1 && traceEntry->ThreadID() != fThreadFilter)
692 				return true;
693 		} else {
694 			// we need the info if we have filters set
695 			if (fTeamFilter != -1 || fThreadFilter != -1)
696 				return true;
697 		}
698 
699 		kprintf("page number %#" B_PRIxPHYSADDR, pageNumber);
700 
701 		if (traceEntry != NULL) {
702 			kprintf(", team: %" B_PRId32 ", thread %" B_PRId32
703 				", time %" B_PRId64 "\n", traceEntry->TeamID(),
704 				traceEntry->ThreadID(), traceEntry->Time());
705 
706 			if (fPrintStackTrace)
707 				tracing_print_stack_trace(traceEntry->StackTrace());
708 		} else
709 			kprintf("\n");
710 
711 		return true;
712 	}
713 
714 private:
715 	bool		fPrintStackTrace;
716 	page_num_t	fPageFilter;
717 	team_id		fTeamFilter;
718 	thread_id	fThreadFilter;
719 };
720 
721 
722 class AllocationDetailPrinterCallback : public AllocationTrackingCallback {
723 public:
724 	AllocationDetailPrinterCallback(addr_t caller)
725 		:
726 		fCaller(caller)
727 	{
728 	}
729 
730 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
731 		page_num_t pageNumber)
732 	{
733 		if (!info->IsInitialized())
734 			return true;
735 
736 		addr_t caller = 0;
737 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
738 		if (traceEntry != NULL && !info->IsTraceEntryValid())
739 			traceEntry = NULL;
740 
741 		if (traceEntry != NULL) {
742 			caller = tracing_find_caller_in_stack_trace(
743 				traceEntry->StackTrace(), kVMPageCodeAddressRange, 1);
744 		}
745 
746 		if (caller != fCaller)
747 			return true;
748 
749 		kprintf("page %#" B_PRIxPHYSADDR "\n", pageNumber);
750 		if (traceEntry != NULL)
751 			tracing_print_stack_trace(traceEntry->StackTrace());
752 
753 		return true;
754 	}
755 
756 private:
757 	addr_t	fCaller;
758 };
759 
760 #endif	// VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
761 
762 
763 static int
764 find_page(int argc, char **argv)
765 {
766 	struct vm_page *page;
767 	addr_t address;
768 	int32 index = 1;
769 	int i;
770 
771 	struct {
772 		const char*	name;
773 		VMPageQueue*	queue;
774 	} pageQueueInfos[] = {
775 		{ "free",		&sFreePageQueue },
776 		{ "clear",		&sClearPageQueue },
777 		{ "modified",	&sModifiedPageQueue },
778 		{ "active",		&sActivePageQueue },
779 		{ "inactive",	&sInactivePageQueue },
780 		{ "cached",		&sCachedPageQueue },
781 		{ NULL, NULL }
782 	};
783 
784 	if (argc < 2
785 		|| strlen(argv[index]) <= 2
786 		|| argv[index][0] != '0'
787 		|| argv[index][1] != 'x') {
788 		kprintf("usage: find_page <address>\n");
789 		return 0;
790 	}
791 
792 	address = strtoul(argv[index], NULL, 0);
793 	page = (vm_page*)address;
794 
795 	for (i = 0; pageQueueInfos[i].name; i++) {
796 		VMPageQueue::Iterator it = pageQueueInfos[i].queue->GetIterator();
797 		while (vm_page* p = it.Next()) {
798 			if (p == page) {
799 				kprintf("found page %p in queue %p (%s)\n", page,
800 					pageQueueInfos[i].queue, pageQueueInfos[i].name);
801 				return 0;
802 			}
803 		}
804 	}
805 
806 	kprintf("page %p isn't in any queue\n", page);
807 
808 	return 0;
809 }
810 
811 
812 const char *
813 page_state_to_string(int state)
814 {
815 	switch(state) {
816 		case PAGE_STATE_ACTIVE:
817 			return "active";
818 		case PAGE_STATE_INACTIVE:
819 			return "inactive";
820 		case PAGE_STATE_MODIFIED:
821 			return "modified";
822 		case PAGE_STATE_CACHED:
823 			return "cached";
824 		case PAGE_STATE_FREE:
825 			return "free";
826 		case PAGE_STATE_CLEAR:
827 			return "clear";
828 		case PAGE_STATE_WIRED:
829 			return "wired";
830 		case PAGE_STATE_UNUSED:
831 			return "unused";
832 		default:
833 			return "unknown";
834 	}
835 }
836 
837 
838 static int
839 dump_page(int argc, char **argv)
840 {
841 	bool addressIsPointer = true;
842 	bool physical = false;
843 	bool searchMappings = false;
844 	int32 index = 1;
845 
846 	while (index < argc) {
847 		if (argv[index][0] != '-')
848 			break;
849 
850 		if (!strcmp(argv[index], "-p")) {
851 			addressIsPointer = false;
852 			physical = true;
853 		} else if (!strcmp(argv[index], "-v")) {
854 			addressIsPointer = false;
855 		} else if (!strcmp(argv[index], "-m")) {
856 			searchMappings = true;
857 		} else {
858 			print_debugger_command_usage(argv[0]);
859 			return 0;
860 		}
861 
862 		index++;
863 	}
864 
865 	if (index + 1 != argc) {
866 		print_debugger_command_usage(argv[0]);
867 		return 0;
868 	}
869 
870 	uint64 value;
871 	if (!evaluate_debug_expression(argv[index], &value, false))
872 		return 0;
873 
874 	uint64 pageAddress = value;
875 	struct vm_page* page;
876 
877 	if (addressIsPointer) {
878 		page = (struct vm_page *)(addr_t)pageAddress;
879 	} else {
880 		if (!physical) {
881 			VMAddressSpace *addressSpace = VMAddressSpace::Kernel();
882 
883 			if (debug_get_debugged_thread()->team->address_space != NULL)
884 				addressSpace = debug_get_debugged_thread()->team->address_space;
885 
886 			uint32 flags = 0;
887 			phys_addr_t physicalAddress;
888 			if (addressSpace->TranslationMap()->QueryInterrupt(pageAddress,
889 					&physicalAddress, &flags) != B_OK
890 				|| (flags & PAGE_PRESENT) == 0) {
891 				kprintf("Virtual address not mapped to a physical page in this "
892 					"address space.\n");
893 				return 0;
894 			}
895 			pageAddress = physicalAddress;
896 		}
897 
898 		page = vm_lookup_page(pageAddress / B_PAGE_SIZE);
899 	}
900 
901 	kprintf("PAGE: %p\n", page);
902 	kprintf("queue_next,prev: %p, %p\n", page->queue_link.next,
903 		page->queue_link.previous);
904 	kprintf("physical_number: %#" B_PRIxPHYSADDR "\n",
905 		page->physical_page_number);
906 	kprintf("cache:           %p\n", page->Cache());
907 	kprintf("cache_offset:    %" B_PRIuPHYSADDR "\n", page->cache_offset);
908 	kprintf("cache_next:      %p\n", page->cache_next);
909 	kprintf("state:           %s\n", page_state_to_string(page->State()));
910 	kprintf("wired_count:     %d\n", page->WiredCount());
911 	kprintf("usage_count:     %d\n", page->usage_count);
912 	kprintf("busy:            %d\n", page->busy);
913 	kprintf("busy_writing:    %d\n", page->busy_writing);
914 	kprintf("accessed:        %d\n", page->accessed);
915 	kprintf("modified:        %d\n", page->modified);
916 	#if DEBUG_PAGE_QUEUE
917 		kprintf("queue:           %p\n", page->queue);
918 	#endif
919 	#if DEBUG_PAGE_ACCESS
920 		kprintf("accessor:        %" B_PRId32 "\n", page->accessing_thread);
921 	#endif
922 	kprintf("area mappings:\n");
923 
924 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
925 	vm_page_mapping *mapping;
926 	while ((mapping = iterator.Next()) != NULL) {
927 		kprintf("  %p (%" B_PRId32 ")\n", mapping->area, mapping->area->id);
928 		mapping = mapping->page_link.next;
929 	}
930 
931 	if (searchMappings) {
932 		kprintf("all mappings:\n");
933 		VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
934 		while (addressSpace != NULL) {
935 			size_t pageCount = addressSpace->Size() / B_PAGE_SIZE;
936 			for (addr_t address = addressSpace->Base(); pageCount != 0;
937 					address += B_PAGE_SIZE, pageCount--) {
938 				phys_addr_t physicalAddress;
939 				uint32 flags = 0;
940 				if (addressSpace->TranslationMap()->QueryInterrupt(address,
941 						&physicalAddress, &flags) == B_OK
942 					&& (flags & PAGE_PRESENT) != 0
943 					&& physicalAddress / B_PAGE_SIZE
944 						== page->physical_page_number) {
945 					VMArea* area = addressSpace->LookupArea(address);
946 					kprintf("  aspace %" B_PRId32 ", area %" B_PRId32 ": %#"
947 						B_PRIxADDR " (%c%c%s%s)\n", addressSpace->ID(),
948 						area != NULL ? area->id : -1, address,
949 						(flags & B_KERNEL_READ_AREA) != 0 ? 'r' : '-',
950 						(flags & B_KERNEL_WRITE_AREA) != 0 ? 'w' : '-',
951 						(flags & PAGE_MODIFIED) != 0 ? " modified" : "",
952 						(flags & PAGE_ACCESSED) != 0 ? " accessed" : "");
953 				}
954 			}
955 			addressSpace = VMAddressSpace::DebugNext(addressSpace);
956 		}
957 	}
958 
959 	set_debug_variable("_cache", (addr_t)page->Cache());
960 	#if DEBUG_PAGE_ACCESS
961 		set_debug_variable("_accessor", page->accessing_thread);
962 	#endif
963 
964 	return 0;
965 }
966 
967 
968 static int
969 dump_page_queue(int argc, char **argv)
970 {
971 	struct VMPageQueue *queue;
972 
973 	if (argc < 2) {
974 		kprintf("usage: page_queue <address/name> [list]\n");
975 		return 0;
976 	}
977 
978 	if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x')
979 		queue = (VMPageQueue*)strtoul(argv[1], NULL, 16);
980 	else if (!strcmp(argv[1], "free"))
981 		queue = &sFreePageQueue;
982 	else if (!strcmp(argv[1], "clear"))
983 		queue = &sClearPageQueue;
984 	else if (!strcmp(argv[1], "modified"))
985 		queue = &sModifiedPageQueue;
986 	else if (!strcmp(argv[1], "active"))
987 		queue = &sActivePageQueue;
988 	else if (!strcmp(argv[1], "inactive"))
989 		queue = &sInactivePageQueue;
990 	else if (!strcmp(argv[1], "cached"))
991 		queue = &sCachedPageQueue;
992 	else {
993 		kprintf("page_queue: unknown queue \"%s\".\n", argv[1]);
994 		return 0;
995 	}
996 
997 	kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %"
998 		B_PRIuPHYSADDR "\n", queue, queue->Head(), queue->Tail(),
999 		queue->Count());
1000 
1001 	if (argc == 3) {
1002 		struct vm_page *page = queue->Head();
1003 
1004 		kprintf("page        cache       type       state  wired  usage\n");
1005 		for (page_num_t i = 0; page; i++, page = queue->Next(page)) {
1006 			kprintf("%p  %p  %-7s %8s  %5d  %5d\n", page, page->Cache(),
1007 				vm_cache_type_to_string(page->Cache()->type),
1008 				page_state_to_string(page->State()),
1009 				page->WiredCount(), page->usage_count);
1010 		}
1011 	}
1012 	return 0;
1013 }
1014 
1015 
1016 static int
1017 dump_page_stats(int argc, char **argv)
1018 {
1019 	page_num_t swappableModified = 0;
1020 	page_num_t swappableModifiedInactive = 0;
1021 
1022 	size_t counter[8];
1023 	size_t busyCounter[8];
1024 	memset(counter, 0, sizeof(counter));
1025 	memset(busyCounter, 0, sizeof(busyCounter));
1026 
1027 	struct page_run {
1028 		page_num_t	start;
1029 		page_num_t	end;
1030 
1031 		page_num_t Length() const	{ return end - start; }
1032 	};
1033 
1034 	page_run currentFreeRun = { 0, 0 };
1035 	page_run currentCachedRun = { 0, 0 };
1036 	page_run longestFreeRun = { 0, 0 };
1037 	page_run longestCachedRun = { 0, 0 };
1038 
1039 	for (page_num_t i = 0; i < sNumPages; i++) {
1040 		if (sPages[i].State() > 7) {
1041 			panic("page %" B_PRIuPHYSADDR " at %p has invalid state!\n", i,
1042 				&sPages[i]);
1043 		}
1044 
1045 		uint32 pageState = sPages[i].State();
1046 
1047 		counter[pageState]++;
1048 		if (sPages[i].busy)
1049 			busyCounter[pageState]++;
1050 
1051 		if (pageState == PAGE_STATE_MODIFIED
1052 			&& sPages[i].Cache() != NULL
1053 			&& sPages[i].Cache()->temporary && sPages[i].WiredCount() == 0) {
1054 			swappableModified++;
1055 			if (sPages[i].usage_count == 0)
1056 				swappableModifiedInactive++;
1057 		}
1058 
1059 		// track free and cached pages runs
1060 		if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) {
1061 			currentFreeRun.end = i + 1;
1062 			currentCachedRun.end = i + 1;
1063 		} else {
1064 			if (currentFreeRun.Length() > longestFreeRun.Length())
1065 				longestFreeRun = currentFreeRun;
1066 			currentFreeRun.start = currentFreeRun.end = i + 1;
1067 
1068 			if (pageState == PAGE_STATE_CACHED) {
1069 				currentCachedRun.end = i + 1;
1070 			} else {
1071 				if (currentCachedRun.Length() > longestCachedRun.Length())
1072 					longestCachedRun = currentCachedRun;
1073 				currentCachedRun.start = currentCachedRun.end = i + 1;
1074 			}
1075 		}
1076 	}
1077 
1078 	kprintf("page stats:\n");
1079 	kprintf("total: %" B_PRIuPHYSADDR "\n", sNumPages);
1080 
1081 	kprintf("active: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1082 		counter[PAGE_STATE_ACTIVE], busyCounter[PAGE_STATE_ACTIVE]);
1083 	kprintf("inactive: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1084 		counter[PAGE_STATE_INACTIVE], busyCounter[PAGE_STATE_INACTIVE]);
1085 	kprintf("cached: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1086 		counter[PAGE_STATE_CACHED], busyCounter[PAGE_STATE_CACHED]);
1087 	kprintf("unused: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1088 		counter[PAGE_STATE_UNUSED], busyCounter[PAGE_STATE_UNUSED]);
1089 	kprintf("wired: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1090 		counter[PAGE_STATE_WIRED], busyCounter[PAGE_STATE_WIRED]);
1091 	kprintf("modified: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1092 		counter[PAGE_STATE_MODIFIED], busyCounter[PAGE_STATE_MODIFIED]);
1093 	kprintf("free: %" B_PRIuSIZE "\n", counter[PAGE_STATE_FREE]);
1094 	kprintf("clear: %" B_PRIuSIZE "\n", counter[PAGE_STATE_CLEAR]);
1095 
1096 	kprintf("unreserved free pages: %" B_PRId32 "\n", sUnreservedFreePages);
1097 	kprintf("unsatisfied page reservations: %" B_PRId32 "\n",
1098 		sUnsatisfiedPageReservations);
1099 	kprintf("mapped pages: %" B_PRId32 "\n", gMappedPagesCount);
1100 	kprintf("longest free pages run: %" B_PRIuPHYSADDR " pages (at %"
1101 		B_PRIuPHYSADDR ")\n", longestFreeRun.Length(),
1102 		sPages[longestFreeRun.start].physical_page_number);
1103 	kprintf("longest free/cached pages run: %" B_PRIuPHYSADDR " pages (at %"
1104 		B_PRIuPHYSADDR ")\n", longestCachedRun.Length(),
1105 		sPages[longestCachedRun.start].physical_page_number);
1106 
1107 	kprintf("waiting threads:\n");
1108 	for (PageReservationWaiterList::Iterator it
1109 			= sPageReservationWaiters.GetIterator();
1110 		PageReservationWaiter* waiter = it.Next();) {
1111 		kprintf("  %6" B_PRId32 ": missing: %6" B_PRIu32
1112 			", don't touch: %6" B_PRIu32 "\n", waiter->thread->id,
1113 			waiter->missing, waiter->dontTouch);
1114 	}
1115 
1116 	kprintf("\nfree queue: %p, count = %" B_PRIuPHYSADDR "\n", &sFreePageQueue,
1117 		sFreePageQueue.Count());
1118 	kprintf("clear queue: %p, count = %" B_PRIuPHYSADDR "\n", &sClearPageQueue,
1119 		sClearPageQueue.Count());
1120 	kprintf("modified queue: %p, count = %" B_PRIuPHYSADDR " (%" B_PRId32
1121 		" temporary, %" B_PRIuPHYSADDR " swappable, " "inactive: %"
1122 		B_PRIuPHYSADDR ")\n", &sModifiedPageQueue, sModifiedPageQueue.Count(),
1123 		sModifiedTemporaryPages, swappableModified, swappableModifiedInactive);
1124 	kprintf("active queue: %p, count = %" B_PRIuPHYSADDR "\n",
1125 		&sActivePageQueue, sActivePageQueue.Count());
1126 	kprintf("inactive queue: %p, count = %" B_PRIuPHYSADDR "\n",
1127 		&sInactivePageQueue, sInactivePageQueue.Count());
1128 	kprintf("cached queue: %p, count = %" B_PRIuPHYSADDR "\n",
1129 		&sCachedPageQueue, sCachedPageQueue.Count());
1130 	return 0;
1131 }
1132 
1133 
1134 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1135 
1136 static caller_info*
1137 get_caller_info(addr_t caller)
1138 {
1139 	// find the caller info
1140 	for (int32 i = 0; i < sCallerInfoCount; i++) {
1141 		if (caller == sCallerInfoTable[i].caller)
1142 			return &sCallerInfoTable[i];
1143 	}
1144 
1145 	// not found, add a new entry, if there are free slots
1146 	if (sCallerInfoCount >= kCallerInfoTableSize)
1147 		return NULL;
1148 
1149 	caller_info* info = &sCallerInfoTable[sCallerInfoCount++];
1150 	info->caller = caller;
1151 	info->count = 0;
1152 
1153 	return info;
1154 }
1155 
1156 
1157 static int
1158 caller_info_compare_count(const void* _a, const void* _b)
1159 {
1160 	const caller_info* a = (const caller_info*)_a;
1161 	const caller_info* b = (const caller_info*)_b;
1162 	return (int)(b->count - a->count);
1163 }
1164 
1165 
1166 static int
1167 dump_page_allocations_per_caller(int argc, char** argv)
1168 {
1169 	bool resetAllocationInfos = false;
1170 	bool printDetails = false;
1171 	addr_t caller = 0;
1172 
1173 	for (int32 i = 1; i < argc; i++) {
1174 		if (strcmp(argv[i], "-d") == 0) {
1175 			uint64 callerAddress;
1176 			if (++i >= argc
1177 				|| !evaluate_debug_expression(argv[i], &callerAddress, true)) {
1178 				print_debugger_command_usage(argv[0]);
1179 				return 0;
1180 			}
1181 
1182 			caller = callerAddress;
1183 			printDetails = true;
1184 		} else if (strcmp(argv[i], "-r") == 0) {
1185 			resetAllocationInfos = true;
1186 		} else {
1187 			print_debugger_command_usage(argv[0]);
1188 			return 0;
1189 		}
1190 	}
1191 
1192 	sCallerInfoCount = 0;
1193 
1194 	AllocationCollectorCallback collectorCallback(resetAllocationInfos);
1195 	AllocationDetailPrinterCallback detailsCallback(caller);
1196 	AllocationTrackingCallback& callback = printDetails
1197 		? (AllocationTrackingCallback&)detailsCallback
1198 		: (AllocationTrackingCallback&)collectorCallback;
1199 
1200 	for (page_num_t i = 0; i < sNumPages; i++)
1201 		callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i);
1202 
1203 	if (printDetails)
1204 		return 0;
1205 
1206 	// sort the array
1207 	qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info),
1208 		&caller_info_compare_count);
1209 
1210 	kprintf("%" B_PRId32 " different callers\n\n", sCallerInfoCount);
1211 
1212 	size_t totalAllocationCount = 0;
1213 
1214 	kprintf("     count      caller\n");
1215 	kprintf("----------------------------------\n");
1216 	for (int32 i = 0; i < sCallerInfoCount; i++) {
1217 		caller_info& info = sCallerInfoTable[i];
1218 		kprintf("%10" B_PRIuSIZE "  %p", info.count, (void*)info.caller);
1219 
1220 		const char* symbol;
1221 		const char* imageName;
1222 		bool exactMatch;
1223 		addr_t baseAddress;
1224 
1225 		if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol,
1226 				&imageName, &exactMatch) == B_OK) {
1227 			kprintf("  %s + %#" B_PRIxADDR " (%s)%s\n", symbol,
1228 				info.caller - baseAddress, imageName,
1229 				exactMatch ? "" : " (nearest)");
1230 		} else
1231 			kprintf("\n");
1232 
1233 		totalAllocationCount += info.count;
1234 	}
1235 
1236 	kprintf("\ntotal page allocations: %" B_PRIuSIZE "\n",
1237 		totalAllocationCount);
1238 
1239 	return 0;
1240 }
1241 
1242 
1243 static int
1244 dump_page_allocation_infos(int argc, char** argv)
1245 {
1246 	page_num_t pageFilter = 0;
1247 	team_id teamFilter = -1;
1248 	thread_id threadFilter = -1;
1249 	bool printStackTraces = false;
1250 
1251 	for (int32 i = 1; i < argc; i++) {
1252 		if (strcmp(argv[i], "--stacktrace") == 0)
1253 			printStackTraces = true;
1254 		else if (strcmp(argv[i], "-p") == 0) {
1255 			uint64 pageNumber;
1256 			if (++i >= argc
1257 				|| !evaluate_debug_expression(argv[i], &pageNumber, true)) {
1258 				print_debugger_command_usage(argv[0]);
1259 				return 0;
1260 			}
1261 
1262 			pageFilter = pageNumber;
1263 		} else if (strcmp(argv[i], "--team") == 0) {
1264 			uint64 team;
1265 			if (++i >= argc
1266 				|| !evaluate_debug_expression(argv[i], &team, true)) {
1267 				print_debugger_command_usage(argv[0]);
1268 				return 0;
1269 			}
1270 
1271 			teamFilter = team;
1272 		} else if (strcmp(argv[i], "--thread") == 0) {
1273 			uint64 thread;
1274 			if (++i >= argc
1275 				|| !evaluate_debug_expression(argv[i], &thread, true)) {
1276 				print_debugger_command_usage(argv[0]);
1277 				return 0;
1278 			}
1279 
1280 			threadFilter = thread;
1281 		} else {
1282 			print_debugger_command_usage(argv[0]);
1283 			return 0;
1284 		}
1285 	}
1286 
1287 	AllocationInfoPrinterCallback callback(printStackTraces, pageFilter,
1288 		teamFilter, threadFilter);
1289 
1290 	for (page_num_t i = 0; i < sNumPages; i++)
1291 		callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i);
1292 
1293 	return 0;
1294 }
1295 
1296 #endif	// VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1297 
1298 
1299 #ifdef TRACK_PAGE_USAGE_STATS
1300 
1301 static void
1302 track_page_usage(vm_page* page)
1303 {
1304 	if (page->WiredCount() == 0) {
1305 		sNextPageUsage[(int32)page->usage_count + 128]++;
1306 		sNextPageUsagePageCount++;
1307 	}
1308 }
1309 
1310 
1311 static void
1312 update_page_usage_stats()
1313 {
1314 	std::swap(sPageUsage, sNextPageUsage);
1315 	sPageUsagePageCount = sNextPageUsagePageCount;
1316 
1317 	memset(sNextPageUsage, 0, sizeof(page_num_t) * 256);
1318 	sNextPageUsagePageCount = 0;
1319 
1320 	// compute average
1321 	if (sPageUsagePageCount > 0) {
1322 		int64 sum = 0;
1323 		for (int32 i = 0; i < 256; i++)
1324 			sum += (int64)sPageUsage[i] * (i - 128);
1325 
1326 		TRACE_DAEMON("average page usage: %f (%lu pages)\n",
1327 			(float)sum / sPageUsagePageCount, sPageUsagePageCount);
1328 	}
1329 }
1330 
1331 
1332 static int
1333 dump_page_usage_stats(int argc, char** argv)
1334 {
1335 	kprintf("distribution of page usage counts (%lu pages):",
1336 		sPageUsagePageCount);
1337 
1338 	int64 sum = 0;
1339 	for (int32 i = 0; i < 256; i++) {
1340 		if (i % 8 == 0)
1341 			kprintf("\n%4ld:", i - 128);
1342 
1343 		int64 count = sPageUsage[i];
1344 		sum += count * (i - 128);
1345 
1346 		kprintf("  %9llu", count);
1347 	}
1348 
1349 	kprintf("\n\n");
1350 
1351 	kprintf("average usage count: %f\n",
1352 		sPageUsagePageCount > 0 ? (float)sum / sPageUsagePageCount : 0);
1353 
1354 	return 0;
1355 }
1356 
1357 #endif	// TRACK_PAGE_USAGE_STATS
1358 
1359 
1360 // #pragma mark - vm_page
1361 
1362 
1363 inline void
1364 vm_page::InitState(uint8 newState)
1365 {
1366 	state = newState;
1367 }
1368 
1369 
1370 inline void
1371 vm_page::SetState(uint8 newState)
1372 {
1373 	TPS(SetPageState(this, newState));
1374 
1375 	state = newState;
1376 }
1377 
1378 
1379 // #pragma mark -
1380 
1381 
1382 static void
1383 get_page_stats(page_stats& _pageStats)
1384 {
1385 	_pageStats.totalFreePages = sUnreservedFreePages;
1386 	_pageStats.cachedPages = sCachedPageQueue.Count();
1387 	_pageStats.unsatisfiedReservations = sUnsatisfiedPageReservations;
1388 	// TODO: We don't get an actual snapshot here!
1389 }
1390 
1391 
1392 static bool
1393 do_active_paging(const page_stats& pageStats)
1394 {
1395 	return pageStats.totalFreePages + pageStats.cachedPages
1396 		< pageStats.unsatisfiedReservations
1397 			+ (int32)sFreeOrCachedPagesTarget;
1398 }
1399 
1400 
1401 /*!	Reserves as many pages as possible from \c sUnreservedFreePages up to
1402 	\a count. Doesn't touch the last \a dontTouch pages of
1403 	\c sUnreservedFreePages, though.
1404 	\return The number of actually reserved pages.
1405 */
1406 static uint32
1407 reserve_some_pages(uint32 count, uint32 dontTouch)
1408 {
1409 	while (true) {
1410 		int32 freePages = atomic_get(&sUnreservedFreePages);
1411 		if (freePages <= (int32)dontTouch)
1412 			return 0;
1413 
1414 		int32 toReserve = std::min(count, freePages - dontTouch);
1415 		if (atomic_test_and_set(&sUnreservedFreePages,
1416 					freePages - toReserve, freePages)
1417 				== freePages) {
1418 			return toReserve;
1419 		}
1420 
1421 		// the count changed in the meantime -- retry
1422 	}
1423 }
1424 
1425 
1426 static void
1427 wake_up_page_reservation_waiters()
1428 {
1429 	MutexLocker pageDeficitLocker(sPageDeficitLock);
1430 
1431 	// TODO: If this is a low priority thread, we might want to disable
1432 	// interrupts or otherwise ensure that we aren't unscheduled. Otherwise
1433 	// high priority threads wait be kept waiting while a medium priority thread
1434 	// prevents us from running.
1435 
1436 	while (PageReservationWaiter* waiter = sPageReservationWaiters.Head()) {
1437 		int32 reserved = reserve_some_pages(waiter->missing,
1438 			waiter->dontTouch);
1439 		if (reserved == 0)
1440 			return;
1441 
1442 		atomic_add(&sUnsatisfiedPageReservations, -reserved);
1443 		waiter->missing -= reserved;
1444 
1445 		if (waiter->missing > 0)
1446 			return;
1447 
1448 		sPageReservationWaiters.Remove(waiter);
1449 
1450 		thread_unblock(waiter->thread, B_OK);
1451 	}
1452 }
1453 
1454 
1455 static inline void
1456 unreserve_pages(uint32 count)
1457 {
1458 	atomic_add(&sUnreservedFreePages, count);
1459 	if (atomic_get(&sUnsatisfiedPageReservations) != 0)
1460 		wake_up_page_reservation_waiters();
1461 }
1462 
1463 
1464 static void
1465 free_page(vm_page* page, bool clear)
1466 {
1467 	DEBUG_PAGE_ACCESS_CHECK(page);
1468 
1469 	PAGE_ASSERT(page, !page->IsMapped());
1470 
1471 	VMPageQueue* fromQueue;
1472 
1473 	switch (page->State()) {
1474 		case PAGE_STATE_ACTIVE:
1475 			fromQueue = &sActivePageQueue;
1476 			break;
1477 		case PAGE_STATE_INACTIVE:
1478 			fromQueue = &sInactivePageQueue;
1479 			break;
1480 		case PAGE_STATE_MODIFIED:
1481 			fromQueue = &sModifiedPageQueue;
1482 			break;
1483 		case PAGE_STATE_CACHED:
1484 			fromQueue = &sCachedPageQueue;
1485 			break;
1486 		case PAGE_STATE_FREE:
1487 		case PAGE_STATE_CLEAR:
1488 			panic("free_page(): page %p already free", page);
1489 			return;
1490 		case PAGE_STATE_WIRED:
1491 		case PAGE_STATE_UNUSED:
1492 			fromQueue = NULL;
1493 			break;
1494 		default:
1495 			panic("free_page(): page %p in invalid state %d",
1496 				page, page->State());
1497 			return;
1498 	}
1499 
1500 	if (page->CacheRef() != NULL)
1501 		panic("to be freed page %p has cache", page);
1502 	if (page->IsMapped())
1503 		panic("to be freed page %p has mappings", page);
1504 
1505 	if (fromQueue != NULL)
1506 		fromQueue->RemoveUnlocked(page);
1507 
1508 	TA(FreePage(page->physical_page_number));
1509 
1510 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1511 	page->allocation_tracking_info.Clear();
1512 #endif
1513 
1514 	ReadLocker locker(sFreePageQueuesLock);
1515 
1516 	DEBUG_PAGE_ACCESS_END(page);
1517 
1518 	if (clear) {
1519 		page->SetState(PAGE_STATE_CLEAR);
1520 		sClearPageQueue.PrependUnlocked(page);
1521 	} else {
1522 		page->SetState(PAGE_STATE_FREE);
1523 		sFreePageQueue.PrependUnlocked(page);
1524 	}
1525 
1526 	locker.Unlock();
1527 }
1528 
1529 
1530 /*!	The caller must make sure that no-one else tries to change the page's state
1531 	while the function is called. If the page has a cache, this can be done by
1532 	locking the cache.
1533 */
1534 static void
1535 set_page_state(vm_page *page, int pageState)
1536 {
1537 	DEBUG_PAGE_ACCESS_CHECK(page);
1538 
1539 	if (pageState == page->State())
1540 		return;
1541 
1542 	VMPageQueue* fromQueue;
1543 
1544 	switch (page->State()) {
1545 		case PAGE_STATE_ACTIVE:
1546 			fromQueue = &sActivePageQueue;
1547 			break;
1548 		case PAGE_STATE_INACTIVE:
1549 			fromQueue = &sInactivePageQueue;
1550 			break;
1551 		case PAGE_STATE_MODIFIED:
1552 			fromQueue = &sModifiedPageQueue;
1553 			break;
1554 		case PAGE_STATE_CACHED:
1555 			fromQueue = &sCachedPageQueue;
1556 			break;
1557 		case PAGE_STATE_FREE:
1558 		case PAGE_STATE_CLEAR:
1559 			panic("set_page_state(): page %p is free/clear", page);
1560 			return;
1561 		case PAGE_STATE_WIRED:
1562 		case PAGE_STATE_UNUSED:
1563 			fromQueue = NULL;
1564 			break;
1565 		default:
1566 			panic("set_page_state(): page %p in invalid state %d",
1567 				page, page->State());
1568 			return;
1569 	}
1570 
1571 	VMPageQueue* toQueue;
1572 
1573 	switch (pageState) {
1574 		case PAGE_STATE_ACTIVE:
1575 			toQueue = &sActivePageQueue;
1576 			break;
1577 		case PAGE_STATE_INACTIVE:
1578 			toQueue = &sInactivePageQueue;
1579 			break;
1580 		case PAGE_STATE_MODIFIED:
1581 			toQueue = &sModifiedPageQueue;
1582 			break;
1583 		case PAGE_STATE_CACHED:
1584 			PAGE_ASSERT(page, !page->IsMapped());
1585 			PAGE_ASSERT(page, !page->modified);
1586 			toQueue = &sCachedPageQueue;
1587 			break;
1588 		case PAGE_STATE_FREE:
1589 		case PAGE_STATE_CLEAR:
1590 			panic("set_page_state(): target state is free/clear");
1591 			return;
1592 		case PAGE_STATE_WIRED:
1593 		case PAGE_STATE_UNUSED:
1594 			toQueue = NULL;
1595 			break;
1596 		default:
1597 			panic("set_page_state(): invalid target state %d", pageState);
1598 			return;
1599 	}
1600 
1601 	VMCache* cache = page->Cache();
1602 	if (cache != NULL && cache->temporary) {
1603 		if (pageState == PAGE_STATE_MODIFIED)
1604 			atomic_add(&sModifiedTemporaryPages, 1);
1605 		else if (page->State() == PAGE_STATE_MODIFIED)
1606 			atomic_add(&sModifiedTemporaryPages, -1);
1607 	}
1608 
1609 	// move the page
1610 	if (toQueue == fromQueue) {
1611 		// Note: Theoretically we are required to lock when changing the page
1612 		// state, even if we don't change the queue. We actually don't have to
1613 		// do this, though, since only for the active queue there are different
1614 		// page states and active pages have a cache that must be locked at
1615 		// this point. So we rely on the fact that everyone must lock the cache
1616 		// before trying to change/interpret the page state.
1617 		PAGE_ASSERT(page, cache != NULL);
1618 		cache->AssertLocked();
1619 		page->SetState(pageState);
1620 	} else {
1621 		if (fromQueue != NULL)
1622 			fromQueue->RemoveUnlocked(page);
1623 
1624 		page->SetState(pageState);
1625 
1626 		if (toQueue != NULL)
1627 			toQueue->AppendUnlocked(page);
1628 	}
1629 }
1630 
1631 
1632 /*! Moves a previously modified page into a now appropriate queue.
1633 	The page queues must not be locked.
1634 */
1635 static void
1636 move_page_to_appropriate_queue(vm_page *page)
1637 {
1638 	DEBUG_PAGE_ACCESS_CHECK(page);
1639 
1640 	// Note, this logic must be in sync with what the page daemon does.
1641 	int32 state;
1642 	if (page->IsMapped())
1643 		state = PAGE_STATE_ACTIVE;
1644 	else if (page->modified)
1645 		state = PAGE_STATE_MODIFIED;
1646 	else
1647 		state = PAGE_STATE_CACHED;
1648 
1649 // TODO: If free + cached pages are low, we might directly want to free the
1650 // page.
1651 	set_page_state(page, state);
1652 }
1653 
1654 
1655 static void
1656 clear_page(struct vm_page *page)
1657 {
1658 	vm_memset_physical(page->physical_page_number << PAGE_SHIFT, 0,
1659 		B_PAGE_SIZE);
1660 }
1661 
1662 
1663 static status_t
1664 mark_page_range_in_use(page_num_t startPage, page_num_t length, bool wired)
1665 {
1666 	TRACE(("mark_page_range_in_use: start %#" B_PRIxPHYSADDR ", len %#"
1667 		B_PRIxPHYSADDR "\n", startPage, length));
1668 
1669 	if (sPhysicalPageOffset > startPage) {
1670 		dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR
1671 			"): start page is before free list\n", startPage, length);
1672 		if (sPhysicalPageOffset - startPage >= length)
1673 			return B_OK;
1674 		length -= sPhysicalPageOffset - startPage;
1675 		startPage = sPhysicalPageOffset;
1676 	}
1677 
1678 	startPage -= sPhysicalPageOffset;
1679 
1680 	if (startPage + length > sNumPages) {
1681 		dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR
1682 			"): range would extend past free list\n", startPage, length);
1683 		if (startPage >= sNumPages)
1684 			return B_OK;
1685 		length = sNumPages - startPage;
1686 	}
1687 
1688 	WriteLocker locker(sFreePageQueuesLock);
1689 
1690 	for (page_num_t i = 0; i < length; i++) {
1691 		vm_page *page = &sPages[startPage + i];
1692 		switch (page->State()) {
1693 			case PAGE_STATE_FREE:
1694 			case PAGE_STATE_CLEAR:
1695 			{
1696 // TODO: This violates the page reservation policy, since we remove pages from
1697 // the free/clear queues without having reserved them before. This should happen
1698 // in the early boot process only, though.
1699 				DEBUG_PAGE_ACCESS_START(page);
1700 				VMPageQueue& queue = page->State() == PAGE_STATE_FREE
1701 					? sFreePageQueue : sClearPageQueue;
1702 				queue.Remove(page);
1703 				page->SetState(wired ? PAGE_STATE_WIRED : PAGE_STATE_UNUSED);
1704 				page->busy = false;
1705 				atomic_add(&sUnreservedFreePages, -1);
1706 				DEBUG_PAGE_ACCESS_END(page);
1707 				break;
1708 			}
1709 			case PAGE_STATE_WIRED:
1710 			case PAGE_STATE_UNUSED:
1711 				break;
1712 			case PAGE_STATE_ACTIVE:
1713 			case PAGE_STATE_INACTIVE:
1714 			case PAGE_STATE_MODIFIED:
1715 			case PAGE_STATE_CACHED:
1716 			default:
1717 				// uh
1718 				dprintf("mark_page_range_in_use: page %#" B_PRIxPHYSADDR
1719 					" in non-free state %d!\n", startPage + i, page->State());
1720 				break;
1721 		}
1722 	}
1723 
1724 	return B_OK;
1725 }
1726 
1727 
1728 /*!
1729 	This is a background thread that wakes up every now and then (every 100ms)
1730 	and moves some pages from the free queue over to the clear queue.
1731 	Given enough time, it will clear out all pages from the free queue - we
1732 	could probably slow it down after having reached a certain threshold.
1733 */
1734 static int32
1735 page_scrubber(void *unused)
1736 {
1737 	(void)(unused);
1738 
1739 	TRACE(("page_scrubber starting...\n"));
1740 
1741 	for (;;) {
1742 		snooze(100000); // 100ms
1743 
1744 		if (sFreePageQueue.Count() == 0
1745 				|| atomic_get(&sUnreservedFreePages)
1746 					< (int32)sFreePagesTarget) {
1747 			continue;
1748 		}
1749 
1750 		// Since we temporarily remove pages from the free pages reserve,
1751 		// we must make sure we don't cause a violation of the page
1752 		// reservation warranty. The following is usually stricter than
1753 		// necessary, because we don't have information on how many of the
1754 		// reserved pages have already been allocated.
1755 		int32 reserved = reserve_some_pages(SCRUB_SIZE,
1756 			kPageReserveForPriority[VM_PRIORITY_USER]);
1757 		if (reserved == 0)
1758 			continue;
1759 
1760 		// get some pages from the free queue
1761 		ReadLocker locker(sFreePageQueuesLock);
1762 
1763 		vm_page *page[SCRUB_SIZE];
1764 		int32 scrubCount = 0;
1765 		for (int32 i = 0; i < reserved; i++) {
1766 			page[i] = sFreePageQueue.RemoveHeadUnlocked();
1767 			if (page[i] == NULL)
1768 				break;
1769 
1770 			DEBUG_PAGE_ACCESS_START(page[i]);
1771 
1772 			page[i]->SetState(PAGE_STATE_ACTIVE);
1773 			page[i]->busy = true;
1774 			scrubCount++;
1775 		}
1776 
1777 		locker.Unlock();
1778 
1779 		if (scrubCount == 0) {
1780 			unreserve_pages(reserved);
1781 			continue;
1782 		}
1783 
1784 		TA(ScrubbingPages(scrubCount));
1785 
1786 		// clear them
1787 		for (int32 i = 0; i < scrubCount; i++)
1788 			clear_page(page[i]);
1789 
1790 		locker.Lock();
1791 
1792 		// and put them into the clear queue
1793 		for (int32 i = 0; i < scrubCount; i++) {
1794 			page[i]->SetState(PAGE_STATE_CLEAR);
1795 			page[i]->busy = false;
1796 			DEBUG_PAGE_ACCESS_END(page[i]);
1797 			sClearPageQueue.PrependUnlocked(page[i]);
1798 		}
1799 
1800 		locker.Unlock();
1801 
1802 		unreserve_pages(reserved);
1803 
1804 		TA(ScrubbedPages(scrubCount));
1805 	}
1806 
1807 	return 0;
1808 }
1809 
1810 
1811 static void
1812 init_page_marker(vm_page &marker)
1813 {
1814 	marker.SetCacheRef(NULL);
1815 	marker.InitState(PAGE_STATE_UNUSED);
1816 	marker.busy = true;
1817 #if DEBUG_PAGE_QUEUE
1818 	marker.queue = NULL;
1819 #endif
1820 #if DEBUG_PAGE_ACCESS
1821 	marker.accessing_thread = thread_get_current_thread_id();
1822 #endif
1823 }
1824 
1825 
1826 static void
1827 remove_page_marker(struct vm_page &marker)
1828 {
1829 	DEBUG_PAGE_ACCESS_CHECK(&marker);
1830 
1831 	if (marker.State() < PAGE_STATE_FIRST_UNQUEUED)
1832 		sPageQueues[marker.State()].RemoveUnlocked(&marker);
1833 
1834 	marker.SetState(PAGE_STATE_UNUSED);
1835 }
1836 
1837 
1838 static vm_page*
1839 next_modified_page(page_num_t& maxPagesToSee)
1840 {
1841 	InterruptsSpinLocker locker(sModifiedPageQueue.GetLock());
1842 
1843 	while (maxPagesToSee > 0) {
1844 		vm_page* page = sModifiedPageQueue.Head();
1845 		if (page == NULL)
1846 			return NULL;
1847 
1848 		sModifiedPageQueue.Requeue(page, true);
1849 
1850 		maxPagesToSee--;
1851 
1852 		if (!page->busy)
1853 			return page;
1854 	}
1855 
1856 	return NULL;
1857 }
1858 
1859 
1860 // #pragma mark -
1861 
1862 
1863 class PageWriteTransfer;
1864 class PageWriteWrapper;
1865 
1866 
1867 class PageWriterRun {
1868 public:
1869 	status_t Init(uint32 maxPages);
1870 
1871 	void PrepareNextRun();
1872 	void AddPage(vm_page* page);
1873 	uint32 Go();
1874 
1875 	void PageWritten(PageWriteTransfer* transfer, status_t status,
1876 		bool partialTransfer, size_t bytesTransferred);
1877 
1878 private:
1879 	uint32				fMaxPages;
1880 	uint32				fWrapperCount;
1881 	uint32				fTransferCount;
1882 	int32				fPendingTransfers;
1883 	PageWriteWrapper*	fWrappers;
1884 	PageWriteTransfer*	fTransfers;
1885 	ConditionVariable	fAllFinishedCondition;
1886 };
1887 
1888 
1889 class PageWriteTransfer : public AsyncIOCallback {
1890 public:
1891 	void SetTo(PageWriterRun* run, vm_page* page, int32 maxPages);
1892 	bool AddPage(vm_page* page);
1893 
1894 	status_t Schedule(uint32 flags);
1895 
1896 	void SetStatus(status_t status, size_t transferred);
1897 
1898 	status_t Status() const	{ return fStatus; }
1899 	struct VMCache* Cache() const { return fCache; }
1900 	uint32 PageCount() const { return fPageCount; }
1901 
1902 	virtual void IOFinished(status_t status, bool partialTransfer,
1903 		generic_size_t bytesTransferred);
1904 private:
1905 	PageWriterRun*		fRun;
1906 	struct VMCache*		fCache;
1907 	off_t				fOffset;
1908 	uint32				fPageCount;
1909 	int32				fMaxPages;
1910 	status_t			fStatus;
1911 	uint32				fVecCount;
1912 	generic_io_vec		fVecs[32]; // TODO: make dynamic/configurable
1913 };
1914 
1915 
1916 class PageWriteWrapper {
1917 public:
1918 	PageWriteWrapper();
1919 	~PageWriteWrapper();
1920 	void SetTo(vm_page* page);
1921 	bool Done(status_t result);
1922 
1923 private:
1924 	vm_page*			fPage;
1925 	struct VMCache*		fCache;
1926 	bool				fIsActive;
1927 };
1928 
1929 
1930 PageWriteWrapper::PageWriteWrapper()
1931 	:
1932 	fIsActive(false)
1933 {
1934 }
1935 
1936 
1937 PageWriteWrapper::~PageWriteWrapper()
1938 {
1939 	if (fIsActive)
1940 		panic("page write wrapper going out of scope but isn't completed");
1941 }
1942 
1943 
1944 /*!	The page's cache must be locked.
1945 */
1946 void
1947 PageWriteWrapper::SetTo(vm_page* page)
1948 {
1949 	DEBUG_PAGE_ACCESS_CHECK(page);
1950 
1951 	if (page->busy)
1952 		panic("setting page write wrapper to busy page");
1953 
1954 	if (fIsActive)
1955 		panic("re-setting page write wrapper that isn't completed");
1956 
1957 	fPage = page;
1958 	fCache = page->Cache();
1959 	fIsActive = true;
1960 
1961 	fPage->busy = true;
1962 	fPage->busy_writing = true;
1963 
1964 	// We have a modified page -- however, while we're writing it back,
1965 	// the page might still be mapped. In order not to lose any changes to the
1966 	// page, we mark it clean before actually writing it back; if
1967 	// writing the page fails for some reason, we'll just keep it in the
1968 	// modified page list, but that should happen only rarely.
1969 
1970 	// If the page is changed after we cleared the dirty flag, but before we
1971 	// had the chance to write it back, then we'll write it again later -- that
1972 	// will probably not happen that often, though.
1973 
1974 	vm_clear_map_flags(fPage, PAGE_MODIFIED);
1975 }
1976 
1977 
1978 /*!	The page's cache must be locked.
1979 	The page queues must not be locked.
1980 	\return \c true if the page was written successfully respectively could be
1981 		handled somehow, \c false otherwise.
1982 */
1983 bool
1984 PageWriteWrapper::Done(status_t result)
1985 {
1986 	if (!fIsActive)
1987 		panic("completing page write wrapper that is not active");
1988 
1989 	DEBUG_PAGE_ACCESS_START(fPage);
1990 
1991 	fPage->busy = false;
1992 		// Set unbusy and notify later by hand, since we might free the page.
1993 
1994 	bool success = true;
1995 
1996 	if (result == B_OK) {
1997 		// put it into the active/inactive queue
1998 		move_page_to_appropriate_queue(fPage);
1999 		fPage->busy_writing = false;
2000 		DEBUG_PAGE_ACCESS_END(fPage);
2001 	} else {
2002 		// Writing the page failed. One reason would be that the cache has been
2003 		// shrunk and the page does no longer belong to the file. Otherwise the
2004 		// actual I/O failed, in which case we'll simply keep the page modified.
2005 
2006 		if (!fPage->busy_writing) {
2007 			// The busy_writing flag was cleared. That means the cache has been
2008 			// shrunk while we were trying to write the page and we have to free
2009 			// it now.
2010 			vm_remove_all_page_mappings(fPage);
2011 // TODO: Unmapping should already happen when resizing the cache!
2012 			fCache->RemovePage(fPage);
2013 			free_page(fPage, false);
2014 			unreserve_pages(1);
2015 		} else {
2016 			// Writing the page failed -- mark the page modified and move it to
2017 			// an appropriate queue other than the modified queue, so we don't
2018 			// keep trying to write it over and over again. We keep
2019 			// non-temporary pages in the modified queue, though, so they don't
2020 			// get lost in the inactive queue.
2021 			dprintf("PageWriteWrapper: Failed to write page %p: %s\n", fPage,
2022 				strerror(result));
2023 
2024 			fPage->modified = true;
2025 			if (!fCache->temporary)
2026 				set_page_state(fPage, PAGE_STATE_MODIFIED);
2027 			else if (fPage->IsMapped())
2028 				set_page_state(fPage, PAGE_STATE_ACTIVE);
2029 			else
2030 				set_page_state(fPage, PAGE_STATE_INACTIVE);
2031 
2032 			fPage->busy_writing = false;
2033 			DEBUG_PAGE_ACCESS_END(fPage);
2034 
2035 			success = false;
2036 		}
2037 	}
2038 
2039 	fCache->NotifyPageEvents(fPage, PAGE_EVENT_NOT_BUSY);
2040 	fIsActive = false;
2041 
2042 	return success;
2043 }
2044 
2045 
2046 /*!	The page's cache must be locked.
2047 */
2048 void
2049 PageWriteTransfer::SetTo(PageWriterRun* run, vm_page* page, int32 maxPages)
2050 {
2051 	fRun = run;
2052 	fCache = page->Cache();
2053 	fOffset = page->cache_offset;
2054 	fPageCount = 1;
2055 	fMaxPages = maxPages;
2056 	fStatus = B_OK;
2057 
2058 	fVecs[0].base = (phys_addr_t)page->physical_page_number << PAGE_SHIFT;
2059 	fVecs[0].length = B_PAGE_SIZE;
2060 	fVecCount = 1;
2061 }
2062 
2063 
2064 /*!	The page's cache must be locked.
2065 */
2066 bool
2067 PageWriteTransfer::AddPage(vm_page* page)
2068 {
2069 	if (page->Cache() != fCache
2070 		|| (fMaxPages >= 0 && fPageCount >= (uint32)fMaxPages))
2071 		return false;
2072 
2073 	phys_addr_t nextBase = fVecs[fVecCount - 1].base
2074 		+ fVecs[fVecCount - 1].length;
2075 
2076 	if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase
2077 		&& (off_t)page->cache_offset == fOffset + fPageCount) {
2078 		// append to last iovec
2079 		fVecs[fVecCount - 1].length += B_PAGE_SIZE;
2080 		fPageCount++;
2081 		return true;
2082 	}
2083 
2084 	nextBase = fVecs[0].base - B_PAGE_SIZE;
2085 	if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase
2086 		&& (off_t)page->cache_offset == fOffset - 1) {
2087 		// prepend to first iovec and adjust offset
2088 		fVecs[0].base = nextBase;
2089 		fVecs[0].length += B_PAGE_SIZE;
2090 		fOffset = page->cache_offset;
2091 		fPageCount++;
2092 		return true;
2093 	}
2094 
2095 	if (((off_t)page->cache_offset == fOffset + fPageCount
2096 			|| (off_t)page->cache_offset == fOffset - 1)
2097 		&& fVecCount < sizeof(fVecs) / sizeof(fVecs[0])) {
2098 		// not physically contiguous or not in the right order
2099 		uint32 vectorIndex;
2100 		if ((off_t)page->cache_offset < fOffset) {
2101 			// we are pre-pending another vector, move the other vecs
2102 			for (uint32 i = fVecCount; i > 0; i--)
2103 				fVecs[i] = fVecs[i - 1];
2104 
2105 			fOffset = page->cache_offset;
2106 			vectorIndex = 0;
2107 		} else
2108 			vectorIndex = fVecCount;
2109 
2110 		fVecs[vectorIndex].base
2111 			= (phys_addr_t)page->physical_page_number << PAGE_SHIFT;
2112 		fVecs[vectorIndex].length = B_PAGE_SIZE;
2113 
2114 		fVecCount++;
2115 		fPageCount++;
2116 		return true;
2117 	}
2118 
2119 	return false;
2120 }
2121 
2122 
2123 status_t
2124 PageWriteTransfer::Schedule(uint32 flags)
2125 {
2126 	off_t writeOffset = (off_t)fOffset << PAGE_SHIFT;
2127 	generic_size_t writeLength = (phys_size_t)fPageCount << PAGE_SHIFT;
2128 
2129 	if (fRun != NULL) {
2130 		return fCache->WriteAsync(writeOffset, fVecs, fVecCount, writeLength,
2131 			flags | B_PHYSICAL_IO_REQUEST, this);
2132 	}
2133 
2134 	status_t status = fCache->Write(writeOffset, fVecs, fVecCount,
2135 		flags | B_PHYSICAL_IO_REQUEST, &writeLength);
2136 
2137 	SetStatus(status, writeLength);
2138 	return fStatus;
2139 }
2140 
2141 
2142 void
2143 PageWriteTransfer::SetStatus(status_t status, size_t transferred)
2144 {
2145 	// only succeed if all pages up to the last one have been written fully
2146 	// and the last page has at least been written partially
2147 	if (status == B_OK && transferred <= (fPageCount - 1) * B_PAGE_SIZE)
2148 		status = B_ERROR;
2149 
2150 	fStatus = status;
2151 }
2152 
2153 
2154 void
2155 PageWriteTransfer::IOFinished(status_t status, bool partialTransfer,
2156 	generic_size_t bytesTransferred)
2157 {
2158 	SetStatus(status, bytesTransferred);
2159 	fRun->PageWritten(this, fStatus, partialTransfer, bytesTransferred);
2160 }
2161 
2162 
2163 status_t
2164 PageWriterRun::Init(uint32 maxPages)
2165 {
2166 	fMaxPages = maxPages;
2167 	fWrapperCount = 0;
2168 	fTransferCount = 0;
2169 	fPendingTransfers = 0;
2170 
2171 	fWrappers = new(std::nothrow) PageWriteWrapper[maxPages];
2172 	fTransfers = new(std::nothrow) PageWriteTransfer[maxPages];
2173 	if (fWrappers == NULL || fTransfers == NULL)
2174 		return B_NO_MEMORY;
2175 
2176 	return B_OK;
2177 }
2178 
2179 
2180 void
2181 PageWriterRun::PrepareNextRun()
2182 {
2183 	fWrapperCount = 0;
2184 	fTransferCount = 0;
2185 	fPendingTransfers = 0;
2186 }
2187 
2188 
2189 /*!	The page's cache must be locked.
2190 */
2191 void
2192 PageWriterRun::AddPage(vm_page* page)
2193 {
2194 	fWrappers[fWrapperCount++].SetTo(page);
2195 
2196 	if (fTransferCount == 0 || !fTransfers[fTransferCount - 1].AddPage(page)) {
2197 		fTransfers[fTransferCount++].SetTo(this, page,
2198 			page->Cache()->MaxPagesPerAsyncWrite());
2199 	}
2200 }
2201 
2202 
2203 /*!	Writes all pages previously added.
2204 	\return The number of pages that could not be written or otherwise handled.
2205 */
2206 uint32
2207 PageWriterRun::Go()
2208 {
2209 	atomic_set(&fPendingTransfers, fTransferCount);
2210 
2211 	fAllFinishedCondition.Init(this, "page writer wait for I/O");
2212 	ConditionVariableEntry waitEntry;
2213 	fAllFinishedCondition.Add(&waitEntry);
2214 
2215 	// schedule writes
2216 	for (uint32 i = 0; i < fTransferCount; i++)
2217 		fTransfers[i].Schedule(B_VIP_IO_REQUEST);
2218 
2219 	// wait until all pages have been written
2220 	waitEntry.Wait();
2221 
2222 	// mark pages depending on whether they could be written or not
2223 
2224 	uint32 failedPages = 0;
2225 	uint32 wrapperIndex = 0;
2226 	for (uint32 i = 0; i < fTransferCount; i++) {
2227 		PageWriteTransfer& transfer = fTransfers[i];
2228 		transfer.Cache()->Lock();
2229 
2230 		for (uint32 j = 0; j < transfer.PageCount(); j++) {
2231 			if (!fWrappers[wrapperIndex++].Done(transfer.Status()))
2232 				failedPages++;
2233 		}
2234 
2235 		transfer.Cache()->Unlock();
2236 	}
2237 
2238 	ASSERT(wrapperIndex == fWrapperCount);
2239 
2240 	for (uint32 i = 0; i < fTransferCount; i++) {
2241 		PageWriteTransfer& transfer = fTransfers[i];
2242 		struct VMCache* cache = transfer.Cache();
2243 
2244 		// We've acquired a references for each page
2245 		for (uint32 j = 0; j < transfer.PageCount(); j++) {
2246 			// We release the cache references after all pages were made
2247 			// unbusy again - otherwise releasing a vnode could deadlock.
2248 			cache->ReleaseStoreRef();
2249 			cache->ReleaseRef();
2250 		}
2251 	}
2252 
2253 	return failedPages;
2254 }
2255 
2256 
2257 void
2258 PageWriterRun::PageWritten(PageWriteTransfer* transfer, status_t status,
2259 	bool partialTransfer, size_t bytesTransferred)
2260 {
2261 	if (atomic_add(&fPendingTransfers, -1) == 1)
2262 		fAllFinishedCondition.NotifyAll();
2263 }
2264 
2265 
2266 /*!	The page writer continuously takes some pages from the modified
2267 	queue, writes them back, and moves them back to the active queue.
2268 	It runs in its own thread, and is only there to keep the number
2269 	of modified pages low, so that more pages can be reused with
2270 	fewer costs.
2271 */
2272 status_t
2273 page_writer(void* /*unused*/)
2274 {
2275 	const uint32 kNumPages = 256;
2276 #ifdef TRACE_VM_PAGE
2277 	uint32 writtenPages = 0;
2278 	bigtime_t lastWrittenTime = 0;
2279 	bigtime_t pageCollectionTime = 0;
2280 	bigtime_t pageWritingTime = 0;
2281 #endif
2282 
2283 	PageWriterRun run;
2284 	if (run.Init(kNumPages) != B_OK) {
2285 		panic("page writer: Failed to init PageWriterRun!");
2286 		return B_ERROR;
2287 	}
2288 
2289 	page_num_t pagesSinceLastSuccessfulWrite = 0;
2290 
2291 	while (true) {
2292 // TODO: Maybe wait shorter when memory is low!
2293 		if (sModifiedPageQueue.Count() < kNumPages) {
2294 			sPageWriterCondition.Wait(3000000, true);
2295 				// all 3 seconds when no one triggers us
2296 		}
2297 
2298 		page_num_t modifiedPages = sModifiedPageQueue.Count();
2299 		if (modifiedPages == 0)
2300 			continue;
2301 
2302 		if (modifiedPages <= pagesSinceLastSuccessfulWrite) {
2303 			// We ran through the whole queue without being able to write a
2304 			// single page. Take a break.
2305 			snooze(500000);
2306 			pagesSinceLastSuccessfulWrite = 0;
2307 		}
2308 
2309 #if ENABLE_SWAP_SUPPORT
2310 		page_stats pageStats;
2311 		get_page_stats(pageStats);
2312 		bool activePaging = do_active_paging(pageStats);
2313 #endif
2314 
2315 		// depending on how urgent it becomes to get pages to disk, we adjust
2316 		// our I/O priority
2317 		uint32 lowPagesState = low_resource_state(B_KERNEL_RESOURCE_PAGES);
2318 		int32 ioPriority = B_IDLE_PRIORITY;
2319 		if (lowPagesState >= B_LOW_RESOURCE_CRITICAL
2320 			|| modifiedPages > MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD) {
2321 			ioPriority = MAX_PAGE_WRITER_IO_PRIORITY;
2322 		} else {
2323 			ioPriority = (uint64)MAX_PAGE_WRITER_IO_PRIORITY * modifiedPages
2324 				/ MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD;
2325 		}
2326 
2327 		thread_set_io_priority(ioPriority);
2328 
2329 		uint32 numPages = 0;
2330 		run.PrepareNextRun();
2331 
2332 		// TODO: make this laptop friendly, too (ie. only start doing
2333 		// something if someone else did something or there is really
2334 		// enough to do).
2335 
2336 		// collect pages to be written
2337 #ifdef TRACE_VM_PAGE
2338 		pageCollectionTime -= system_time();
2339 #endif
2340 
2341 		page_num_t maxPagesToSee = modifiedPages;
2342 
2343 		while (numPages < kNumPages && maxPagesToSee > 0) {
2344 			vm_page *page = next_modified_page(maxPagesToSee);
2345 			if (page == NULL)
2346 				break;
2347 
2348 			PageCacheLocker cacheLocker(page, false);
2349 			if (!cacheLocker.IsLocked())
2350 				continue;
2351 
2352 			VMCache *cache = page->Cache();
2353 
2354 			// If the page is busy or its state has changed while we were
2355 			// locking the cache, just ignore it.
2356 			if (page->busy || page->State() != PAGE_STATE_MODIFIED)
2357 				continue;
2358 
2359 			DEBUG_PAGE_ACCESS_START(page);
2360 
2361 			// Don't write back wired (locked) pages.
2362 			if (page->WiredCount() > 0) {
2363 				set_page_state(page, PAGE_STATE_ACTIVE);
2364 				DEBUG_PAGE_ACCESS_END(page);
2365 				continue;
2366 			}
2367 
2368 			// Write back temporary pages only when we're actively paging.
2369 			if (cache->temporary
2370 #if ENABLE_SWAP_SUPPORT
2371 				&& (!activePaging
2372 					|| !cache->CanWritePage(
2373 							(off_t)page->cache_offset << PAGE_SHIFT))
2374 #endif
2375 				) {
2376 				// We can't/don't want to do anything with this page, so move it
2377 				// to one of the other queues.
2378 				if (page->mappings.IsEmpty())
2379 					set_page_state(page, PAGE_STATE_INACTIVE);
2380 				else
2381 					set_page_state(page, PAGE_STATE_ACTIVE);
2382 
2383 				DEBUG_PAGE_ACCESS_END(page);
2384 				continue;
2385 			}
2386 
2387 			// We need our own reference to the store, as it might currently be
2388 			// destroyed.
2389 			if (cache->AcquireUnreferencedStoreRef() != B_OK) {
2390 				DEBUG_PAGE_ACCESS_END(page);
2391 				cacheLocker.Unlock();
2392 				thread_yield();
2393 				continue;
2394 			}
2395 
2396 			run.AddPage(page);
2397 				// TODO: We're possibly adding pages of different caches and
2398 				// thus maybe of different underlying file systems here. This
2399 				// is a potential problem for loop file systems/devices, since
2400 				// we could mark a page busy that would need to be accessed
2401 				// when writing back another page, thus causing a deadlock.
2402 
2403 			DEBUG_PAGE_ACCESS_END(page);
2404 
2405 			//dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count);
2406 			TPW(WritePage(page));
2407 
2408 			cache->AcquireRefLocked();
2409 			numPages++;
2410 		}
2411 
2412 #ifdef TRACE_VM_PAGE
2413 		pageCollectionTime += system_time();
2414 #endif
2415 		if (numPages == 0)
2416 			continue;
2417 
2418 		// write pages to disk and do all the cleanup
2419 #ifdef TRACE_VM_PAGE
2420 		pageWritingTime -= system_time();
2421 #endif
2422 		uint32 failedPages = run.Go();
2423 #ifdef TRACE_VM_PAGE
2424 		pageWritingTime += system_time();
2425 
2426 		// debug output only...
2427 		writtenPages += numPages;
2428 		if (writtenPages >= 1024) {
2429 			bigtime_t now = system_time();
2430 			TRACE(("page writer: wrote 1024 pages (total: %" B_PRIu64 " ms, "
2431 				"collect: %" B_PRIu64 " ms, write: %" B_PRIu64 " ms)\n",
2432 				(now - lastWrittenTime) / 1000,
2433 				pageCollectionTime / 1000, pageWritingTime / 1000));
2434 			lastWrittenTime = now;
2435 
2436 			writtenPages -= 1024;
2437 			pageCollectionTime = 0;
2438 			pageWritingTime = 0;
2439 		}
2440 #endif
2441 
2442 		if (failedPages == numPages)
2443 			pagesSinceLastSuccessfulWrite += modifiedPages - maxPagesToSee;
2444 		else
2445 			pagesSinceLastSuccessfulWrite = 0;
2446 	}
2447 
2448 	return B_OK;
2449 }
2450 
2451 
2452 // #pragma mark -
2453 
2454 
2455 // TODO: This should be done in the page daemon!
2456 #if 0
2457 #if ENABLE_SWAP_SUPPORT
2458 static bool
2459 free_page_swap_space(int32 index)
2460 {
2461 	vm_page *page = vm_page_at_index(index);
2462 	PageCacheLocker locker(page);
2463 	if (!locker.IsLocked())
2464 		return false;
2465 
2466 	DEBUG_PAGE_ACCESS_START(page);
2467 
2468 	VMCache* cache = page->Cache();
2469 	if (cache->temporary && page->WiredCount() == 0
2470 			&& cache->HasPage(page->cache_offset << PAGE_SHIFT)
2471 			&& page->usage_count > 0) {
2472 		// TODO: how to judge a page is highly active?
2473 		if (swap_free_page_swap_space(page)) {
2474 			// We need to mark the page modified, since otherwise it could be
2475 			// stolen and we'd lose its data.
2476 			vm_page_set_state(page, PAGE_STATE_MODIFIED);
2477 			TD(FreedPageSwap(page));
2478 			DEBUG_PAGE_ACCESS_END(page);
2479 			return true;
2480 		}
2481 	}
2482 	DEBUG_PAGE_ACCESS_END(page);
2483 	return false;
2484 }
2485 #endif
2486 #endif	// 0
2487 
2488 
2489 static vm_page *
2490 find_cached_page_candidate(struct vm_page &marker)
2491 {
2492 	DEBUG_PAGE_ACCESS_CHECK(&marker);
2493 
2494 	InterruptsSpinLocker locker(sCachedPageQueue.GetLock());
2495 	vm_page *page;
2496 
2497 	if (marker.State() == PAGE_STATE_UNUSED) {
2498 		// Get the first free pages of the (in)active queue
2499 		page = sCachedPageQueue.Head();
2500 	} else {
2501 		// Get the next page of the current queue
2502 		if (marker.State() != PAGE_STATE_CACHED) {
2503 			panic("invalid marker %p state", &marker);
2504 			return NULL;
2505 		}
2506 
2507 		page = sCachedPageQueue.Next(&marker);
2508 		sCachedPageQueue.Remove(&marker);
2509 		marker.SetState(PAGE_STATE_UNUSED);
2510 	}
2511 
2512 	while (page != NULL) {
2513 		if (!page->busy) {
2514 			// we found a candidate, insert marker
2515 			marker.SetState(PAGE_STATE_CACHED);
2516 			sCachedPageQueue.InsertAfter(page, &marker);
2517 			return page;
2518 		}
2519 
2520 		page = sCachedPageQueue.Next(page);
2521 	}
2522 
2523 	return NULL;
2524 }
2525 
2526 
2527 static bool
2528 free_cached_page(vm_page *page, bool dontWait)
2529 {
2530 	// try to lock the page's cache
2531 	if (vm_cache_acquire_locked_page_cache(page, dontWait) == NULL)
2532 		return false;
2533 	VMCache* cache = page->Cache();
2534 
2535 	AutoLocker<VMCache> cacheLocker(cache, true);
2536 	MethodDeleter<VMCache> _2(cache, &VMCache::ReleaseRefLocked);
2537 
2538 	// check again if that page is still a candidate
2539 	if (page->busy || page->State() != PAGE_STATE_CACHED)
2540 		return false;
2541 
2542 	DEBUG_PAGE_ACCESS_START(page);
2543 
2544 	PAGE_ASSERT(page, !page->IsMapped());
2545 	PAGE_ASSERT(page, !page->modified);
2546 
2547 	// we can now steal this page
2548 
2549 	cache->RemovePage(page);
2550 		// Now the page doesn't have cache anymore, so no one else (e.g.
2551 		// vm_page_allocate_page_run() can pick it up), since they would be
2552 		// required to lock the cache first, which would fail.
2553 
2554 	sCachedPageQueue.RemoveUnlocked(page);
2555 	return true;
2556 }
2557 
2558 
2559 static uint32
2560 free_cached_pages(uint32 pagesToFree, bool dontWait)
2561 {
2562 	vm_page marker;
2563 	init_page_marker(marker);
2564 
2565 	uint32 pagesFreed = 0;
2566 
2567 	while (pagesFreed < pagesToFree) {
2568 		vm_page *page = find_cached_page_candidate(marker);
2569 		if (page == NULL)
2570 			break;
2571 
2572 		if (free_cached_page(page, dontWait)) {
2573 			ReadLocker locker(sFreePageQueuesLock);
2574 			page->SetState(PAGE_STATE_FREE);
2575 			DEBUG_PAGE_ACCESS_END(page);
2576 			sFreePageQueue.PrependUnlocked(page);
2577 			locker.Unlock();
2578 
2579 			TA(StolenPage());
2580 
2581 			pagesFreed++;
2582 		}
2583 	}
2584 
2585 	remove_page_marker(marker);
2586 
2587 	return pagesFreed;
2588 }
2589 
2590 
2591 static void
2592 idle_scan_active_pages(page_stats& pageStats)
2593 {
2594 	VMPageQueue& queue = sActivePageQueue;
2595 
2596 	// We want to scan the whole queue in roughly kIdleRunsForFullQueue runs.
2597 	uint32 maxToScan = queue.Count() / kIdleRunsForFullQueue + 1;
2598 
2599 	while (maxToScan > 0) {
2600 		maxToScan--;
2601 
2602 		// Get the next page. Note that we don't bother to lock here. We go with
2603 		// the assumption that on all architectures reading/writing pointers is
2604 		// atomic. Beyond that it doesn't really matter. We have to unlock the
2605 		// queue anyway to lock the page's cache, and we'll recheck afterwards.
2606 		vm_page* page = queue.Head();
2607 		if (page == NULL)
2608 			break;
2609 
2610 		// lock the page's cache
2611 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2612 		if (cache == NULL)
2613 			continue;
2614 
2615 		if (page->State() != PAGE_STATE_ACTIVE) {
2616 			// page is no longer in the cache or in this queue
2617 			cache->ReleaseRefAndUnlock();
2618 			continue;
2619 		}
2620 
2621 		if (page->busy) {
2622 			// page is busy -- requeue at the end
2623 			vm_page_requeue(page, true);
2624 			cache->ReleaseRefAndUnlock();
2625 			continue;
2626 		}
2627 
2628 		DEBUG_PAGE_ACCESS_START(page);
2629 
2630 		// Get the page active/modified flags and update the page's usage count.
2631 		// We completely unmap inactive temporary pages. This saves us to
2632 		// iterate through the inactive list as well, since we'll be notified
2633 		// via page fault whenever such an inactive page is used again.
2634 		// We don't remove the mappings of non-temporary pages, since we
2635 		// wouldn't notice when those would become unused and could thus be
2636 		// moved to the cached list.
2637 		int32 usageCount;
2638 		if (page->WiredCount() > 0 || page->usage_count > 0
2639 			|| !cache->temporary) {
2640 			usageCount = vm_clear_page_mapping_accessed_flags(page);
2641 		} else
2642 			usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2643 
2644 		if (usageCount > 0) {
2645 			usageCount += page->usage_count + kPageUsageAdvance;
2646 			if (usageCount > kPageUsageMax)
2647 				usageCount = kPageUsageMax;
2648 // TODO: This would probably also be the place to reclaim swap space.
2649 		} else {
2650 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2651 			if (usageCount < 0) {
2652 				usageCount = 0;
2653 				set_page_state(page, PAGE_STATE_INACTIVE);
2654 			}
2655 		}
2656 
2657 		page->usage_count = usageCount;
2658 
2659 		DEBUG_PAGE_ACCESS_END(page);
2660 
2661 		cache->ReleaseRefAndUnlock();
2662 	}
2663 }
2664 
2665 
2666 static void
2667 full_scan_inactive_pages(page_stats& pageStats, int32 despairLevel)
2668 {
2669 	int32 pagesToFree = pageStats.unsatisfiedReservations
2670 		+ sFreeOrCachedPagesTarget
2671 		- (pageStats.totalFreePages + pageStats.cachedPages);
2672 	if (pagesToFree <= 0)
2673 		return;
2674 
2675 	bigtime_t time = system_time();
2676 	uint32 pagesScanned = 0;
2677 	uint32 pagesToCached = 0;
2678 	uint32 pagesToModified = 0;
2679 	uint32 pagesToActive = 0;
2680 
2681 	// Determine how many pages at maximum to send to the modified queue. Since
2682 	// it is relatively expensive to page out pages, we do that on a grander
2683 	// scale only when things get desperate.
2684 	uint32 maxToFlush = despairLevel <= 1 ? 32 : 10000;
2685 
2686 	vm_page marker;
2687 	init_page_marker(marker);
2688 
2689 	VMPageQueue& queue = sInactivePageQueue;
2690 	InterruptsSpinLocker queueLocker(queue.GetLock());
2691 	uint32 maxToScan = queue.Count();
2692 
2693 	vm_page* nextPage = queue.Head();
2694 
2695 	while (pagesToFree > 0 && maxToScan > 0) {
2696 		maxToScan--;
2697 
2698 		// get the next page
2699 		vm_page* page = nextPage;
2700 		if (page == NULL)
2701 			break;
2702 		nextPage = queue.Next(page);
2703 
2704 		if (page->busy)
2705 			continue;
2706 
2707 		// mark the position
2708 		queue.InsertAfter(page, &marker);
2709 		queueLocker.Unlock();
2710 
2711 		// lock the page's cache
2712 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2713 		if (cache == NULL || page->busy
2714 				|| page->State() != PAGE_STATE_INACTIVE) {
2715 			if (cache != NULL)
2716 				cache->ReleaseRefAndUnlock();
2717 			queueLocker.Lock();
2718 			nextPage = queue.Next(&marker);
2719 			queue.Remove(&marker);
2720 			continue;
2721 		}
2722 
2723 		pagesScanned++;
2724 
2725 		DEBUG_PAGE_ACCESS_START(page);
2726 
2727 		// Get the accessed count, clear the accessed/modified flags and
2728 		// unmap the page, if it hasn't been accessed.
2729 		int32 usageCount;
2730 		if (page->WiredCount() > 0)
2731 			usageCount = vm_clear_page_mapping_accessed_flags(page);
2732 		else
2733 			usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2734 
2735 		// update usage count
2736 		if (usageCount > 0) {
2737 			usageCount += page->usage_count + kPageUsageAdvance;
2738 			if (usageCount > kPageUsageMax)
2739 				usageCount = kPageUsageMax;
2740 		} else {
2741 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2742 			if (usageCount < 0)
2743 				usageCount = 0;
2744 		}
2745 
2746 		page->usage_count = usageCount;
2747 
2748 		// Move to fitting queue or requeue:
2749 		// * Active mapped pages go to the active queue.
2750 		// * Inactive mapped (i.e. wired) pages are requeued.
2751 		// * The remaining pages are cachable. Thus, if unmodified they go to
2752 		//   the cached queue, otherwise to the modified queue (up to a limit).
2753 		//   Note that until in the idle scanning we don't exempt pages of
2754 		//   temporary caches. Apparently we really need memory, so we better
2755 		//   page out memory as well.
2756 		bool isMapped = page->IsMapped();
2757 		if (usageCount > 0) {
2758 			if (isMapped) {
2759 				set_page_state(page, PAGE_STATE_ACTIVE);
2760 				pagesToActive++;
2761 			} else
2762 				vm_page_requeue(page, true);
2763 		} else if (isMapped) {
2764 			vm_page_requeue(page, true);
2765 		} else if (!page->modified) {
2766 			set_page_state(page, PAGE_STATE_CACHED);
2767 			pagesToFree--;
2768 			pagesToCached++;
2769 		} else if (maxToFlush > 0) {
2770 			set_page_state(page, PAGE_STATE_MODIFIED);
2771 			maxToFlush--;
2772 			pagesToModified++;
2773 		} else
2774 			vm_page_requeue(page, true);
2775 
2776 		DEBUG_PAGE_ACCESS_END(page);
2777 
2778 		cache->ReleaseRefAndUnlock();
2779 
2780 		// remove the marker
2781 		queueLocker.Lock();
2782 		nextPage = queue.Next(&marker);
2783 		queue.Remove(&marker);
2784 	}
2785 
2786 	queueLocker.Unlock();
2787 
2788 	time = system_time() - time;
2789 	TRACE_DAEMON("  -> inactive scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32
2790 		", moved: %" B_PRIu32 " -> cached, %" B_PRIu32 " -> modified, %"
2791 		B_PRIu32 " -> active\n", time, pagesScanned, pagesToCached,
2792 		pagesToModified, pagesToActive);
2793 
2794 	// wake up the page writer, if we tossed it some pages
2795 	if (pagesToModified > 0)
2796 		sPageWriterCondition.WakeUp();
2797 }
2798 
2799 
2800 static void
2801 full_scan_active_pages(page_stats& pageStats, int32 despairLevel)
2802 {
2803 	vm_page marker;
2804 	init_page_marker(marker);
2805 
2806 	VMPageQueue& queue = sActivePageQueue;
2807 	InterruptsSpinLocker queueLocker(queue.GetLock());
2808 	uint32 maxToScan = queue.Count();
2809 
2810 	int32 pagesToDeactivate = pageStats.unsatisfiedReservations
2811 		+ sFreeOrCachedPagesTarget
2812 		- (pageStats.totalFreePages + pageStats.cachedPages)
2813 		+ std::max((int32)sInactivePagesTarget - (int32)maxToScan, (int32)0);
2814 	if (pagesToDeactivate <= 0)
2815 		return;
2816 
2817 	bigtime_t time = system_time();
2818 	uint32 pagesAccessed = 0;
2819 	uint32 pagesToInactive = 0;
2820 	uint32 pagesScanned = 0;
2821 
2822 	vm_page* nextPage = queue.Head();
2823 
2824 	while (pagesToDeactivate > 0 && maxToScan > 0) {
2825 		maxToScan--;
2826 
2827 		// get the next page
2828 		vm_page* page = nextPage;
2829 		if (page == NULL)
2830 			break;
2831 		nextPage = queue.Next(page);
2832 
2833 		if (page->busy)
2834 			continue;
2835 
2836 		// mark the position
2837 		queue.InsertAfter(page, &marker);
2838 		queueLocker.Unlock();
2839 
2840 		// lock the page's cache
2841 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2842 		if (cache == NULL || page->busy || page->State() != PAGE_STATE_ACTIVE) {
2843 			if (cache != NULL)
2844 				cache->ReleaseRefAndUnlock();
2845 			queueLocker.Lock();
2846 			nextPage = queue.Next(&marker);
2847 			queue.Remove(&marker);
2848 			continue;
2849 		}
2850 
2851 		pagesScanned++;
2852 
2853 		DEBUG_PAGE_ACCESS_START(page);
2854 
2855 		// Get the page active/modified flags and update the page's usage count.
2856 		int32 usageCount = vm_clear_page_mapping_accessed_flags(page);
2857 
2858 		if (usageCount > 0) {
2859 			usageCount += page->usage_count + kPageUsageAdvance;
2860 			if (usageCount > kPageUsageMax)
2861 				usageCount = kPageUsageMax;
2862 			pagesAccessed++;
2863 // TODO: This would probably also be the place to reclaim swap space.
2864 		} else {
2865 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2866 			if (usageCount <= 0) {
2867 				usageCount = 0;
2868 				set_page_state(page, PAGE_STATE_INACTIVE);
2869 				pagesToInactive++;
2870 			}
2871 		}
2872 
2873 		page->usage_count = usageCount;
2874 
2875 		DEBUG_PAGE_ACCESS_END(page);
2876 
2877 		cache->ReleaseRefAndUnlock();
2878 
2879 		// remove the marker
2880 		queueLocker.Lock();
2881 		nextPage = queue.Next(&marker);
2882 		queue.Remove(&marker);
2883 	}
2884 
2885 	time = system_time() - time;
2886 	TRACE_DAEMON("  ->   active scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32
2887 		", moved: %" B_PRIu32 " -> inactive, encountered %" B_PRIu32 " accessed"
2888 		" ones\n", time, pagesScanned, pagesToInactive, pagesAccessed);
2889 }
2890 
2891 
2892 static void
2893 page_daemon_idle_scan(page_stats& pageStats)
2894 {
2895 	TRACE_DAEMON("page daemon: idle run\n");
2896 
2897 	if (pageStats.totalFreePages < (int32)sFreePagesTarget) {
2898 		// We want more actually free pages, so free some from the cached
2899 		// ones.
2900 		uint32 freed = free_cached_pages(
2901 			sFreePagesTarget - pageStats.totalFreePages, false);
2902 		if (freed > 0)
2903 			unreserve_pages(freed);
2904 		get_page_stats(pageStats);
2905 	}
2906 
2907 	// Walk the active list and move pages to the inactive queue.
2908 	get_page_stats(pageStats);
2909 	idle_scan_active_pages(pageStats);
2910 }
2911 
2912 
2913 static void
2914 page_daemon_full_scan(page_stats& pageStats, int32 despairLevel)
2915 {
2916 	TRACE_DAEMON("page daemon: full run: free: %" B_PRIu32 ", cached: %"
2917 		B_PRIu32 ", to free: %" B_PRIu32 "\n", pageStats.totalFreePages,
2918 		pageStats.cachedPages, pageStats.unsatisfiedReservations
2919 			+ sFreeOrCachedPagesTarget
2920 			- (pageStats.totalFreePages + pageStats.cachedPages));
2921 
2922 	// Walk the inactive list and transfer pages to the cached and modified
2923 	// queues.
2924 	full_scan_inactive_pages(pageStats, despairLevel);
2925 
2926 	// Free cached pages. Also wake up reservation waiters.
2927 	get_page_stats(pageStats);
2928 	int32 pagesToFree = pageStats.unsatisfiedReservations + sFreePagesTarget
2929 		- (pageStats.totalFreePages);
2930 	if (pagesToFree > 0) {
2931 		uint32 freed = free_cached_pages(pagesToFree, true);
2932 		if (freed > 0)
2933 			unreserve_pages(freed);
2934 	}
2935 
2936 	// Walk the active list and move pages to the inactive queue.
2937 	get_page_stats(pageStats);
2938 	full_scan_active_pages(pageStats, despairLevel);
2939 }
2940 
2941 
2942 static status_t
2943 page_daemon(void* /*unused*/)
2944 {
2945 	int32 despairLevel = 0;
2946 
2947 	while (true) {
2948 		sPageDaemonCondition.ClearActivated();
2949 
2950 		// evaluate the free pages situation
2951 		page_stats pageStats;
2952 		get_page_stats(pageStats);
2953 
2954 		if (!do_active_paging(pageStats)) {
2955 			// Things look good -- just maintain statistics and keep the pool
2956 			// of actually free pages full enough.
2957 			despairLevel = 0;
2958 			page_daemon_idle_scan(pageStats);
2959 			sPageDaemonCondition.Wait(kIdleScanWaitInterval, false);
2960 		} else {
2961 			// Not enough free pages. We need to do some real work.
2962 			despairLevel = std::max(despairLevel + 1, (int32)3);
2963 			page_daemon_full_scan(pageStats, despairLevel);
2964 
2965 			// Don't wait after the first full scan, but rather immediately
2966 			// check whether we were successful in freeing enough pages and
2967 			// re-run with increased despair level. The first scan is
2968 			// conservative with respect to moving inactive modified pages to
2969 			// the modified list to avoid thrashing. The second scan, however,
2970 			// will not hold back.
2971 			if (despairLevel > 1)
2972 				snooze(kBusyScanWaitInterval);
2973 		}
2974 	}
2975 
2976 	return B_OK;
2977 }
2978 
2979 
2980 /*!	Returns how many pages could *not* be reserved.
2981 */
2982 static uint32
2983 reserve_pages(uint32 count, int priority, bool dontWait)
2984 {
2985 	int32 dontTouch = kPageReserveForPriority[priority];
2986 
2987 	while (true) {
2988 		count -= reserve_some_pages(count, dontTouch);
2989 		if (count == 0)
2990 			return 0;
2991 
2992 		if (sUnsatisfiedPageReservations == 0) {
2993 			count -= free_cached_pages(count, dontWait);
2994 			if (count == 0)
2995 				return count;
2996 		}
2997 
2998 		if (dontWait)
2999 			return count;
3000 
3001 		// we need to wait for pages to become available
3002 
3003 		MutexLocker pageDeficitLocker(sPageDeficitLock);
3004 
3005 		bool notifyDaemon = sUnsatisfiedPageReservations == 0;
3006 		sUnsatisfiedPageReservations += count;
3007 
3008 		if (atomic_get(&sUnreservedFreePages) > dontTouch) {
3009 			// the situation changed
3010 			sUnsatisfiedPageReservations -= count;
3011 			continue;
3012 		}
3013 
3014 		PageReservationWaiter waiter;
3015 		waiter.dontTouch = dontTouch;
3016 		waiter.missing = count;
3017 		waiter.thread = thread_get_current_thread();
3018 		waiter.threadPriority = waiter.thread->priority;
3019 
3020 		// insert ordered (i.e. after all waiters with higher or equal priority)
3021 		PageReservationWaiter* otherWaiter = NULL;
3022 		for (PageReservationWaiterList::Iterator it
3023 				= sPageReservationWaiters.GetIterator();
3024 			(otherWaiter = it.Next()) != NULL;) {
3025 			if (waiter < *otherWaiter)
3026 				break;
3027 		}
3028 
3029 		sPageReservationWaiters.InsertBefore(otherWaiter, &waiter);
3030 
3031 		thread_prepare_to_block(waiter.thread, 0, THREAD_BLOCK_TYPE_OTHER,
3032 			"waiting for pages");
3033 
3034 		if (notifyDaemon)
3035 			sPageDaemonCondition.WakeUp();
3036 
3037 		pageDeficitLocker.Unlock();
3038 
3039 		low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0);
3040 		thread_block();
3041 
3042 		pageDeficitLocker.Lock();
3043 
3044 		return 0;
3045 	}
3046 }
3047 
3048 
3049 //	#pragma mark - private kernel API
3050 
3051 
3052 /*!	Writes a range of modified pages of a cache to disk.
3053 	You need to hold the VMCache lock when calling this function.
3054 	Note that the cache lock is released in this function.
3055 	\param cache The cache.
3056 	\param firstPage Offset (in page size units) of the first page in the range.
3057 	\param endPage End offset (in page size units) of the page range. The page
3058 		at this offset is not included.
3059 */
3060 status_t
3061 vm_page_write_modified_page_range(struct VMCache* cache, uint32 firstPage,
3062 	uint32 endPage)
3063 {
3064 	static const int32 kMaxPages = 256;
3065 	int32 maxPages = cache->MaxPagesPerWrite();
3066 	if (maxPages < 0 || maxPages > kMaxPages)
3067 		maxPages = kMaxPages;
3068 
3069 	const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
3070 		| HEAP_DONT_LOCK_KERNEL_SPACE;
3071 
3072 	PageWriteWrapper stackWrappersPool[2];
3073 	PageWriteWrapper* stackWrappers[1];
3074 	PageWriteWrapper* wrapperPool
3075 		= new(malloc_flags(allocationFlags)) PageWriteWrapper[maxPages + 1];
3076 	PageWriteWrapper** wrappers
3077 		= new(malloc_flags(allocationFlags)) PageWriteWrapper*[maxPages];
3078 	if (wrapperPool == NULL || wrappers == NULL) {
3079 		// don't fail, just limit our capabilities
3080 		free(wrapperPool);
3081 		free(wrappers);
3082 		wrapperPool = stackWrappersPool;
3083 		wrappers = stackWrappers;
3084 		maxPages = 1;
3085 	}
3086 
3087 	int32 nextWrapper = 0;
3088 	int32 usedWrappers = 0;
3089 
3090 	PageWriteTransfer transfer;
3091 	bool transferEmpty = true;
3092 
3093 	VMCachePagesTree::Iterator it
3094 		= cache->pages.GetIterator(firstPage, true, true);
3095 
3096 	while (true) {
3097 		vm_page* page = it.Next();
3098 		if (page == NULL || page->cache_offset >= endPage) {
3099 			if (transferEmpty)
3100 				break;
3101 
3102 			page = NULL;
3103 		}
3104 
3105 		if (page != NULL) {
3106 			if (page->busy
3107 				|| (page->State() != PAGE_STATE_MODIFIED
3108 					&& !vm_test_map_modification(page))) {
3109 				page = NULL;
3110 			}
3111 		}
3112 
3113 		PageWriteWrapper* wrapper = NULL;
3114 		if (page != NULL) {
3115 			wrapper = &wrapperPool[nextWrapper++];
3116 			if (nextWrapper > maxPages)
3117 				nextWrapper = 0;
3118 
3119 			DEBUG_PAGE_ACCESS_START(page);
3120 
3121 			wrapper->SetTo(page);
3122 
3123 			if (transferEmpty || transfer.AddPage(page)) {
3124 				if (transferEmpty) {
3125 					transfer.SetTo(NULL, page, maxPages);
3126 					transferEmpty = false;
3127 				}
3128 
3129 				DEBUG_PAGE_ACCESS_END(page);
3130 
3131 				wrappers[usedWrappers++] = wrapper;
3132 				continue;
3133 			}
3134 
3135 			DEBUG_PAGE_ACCESS_END(page);
3136 		}
3137 
3138 		if (transferEmpty)
3139 			continue;
3140 
3141 		cache->Unlock();
3142 		status_t status = transfer.Schedule(0);
3143 		cache->Lock();
3144 
3145 		for (int32 i = 0; i < usedWrappers; i++)
3146 			wrappers[i]->Done(status);
3147 
3148 		usedWrappers = 0;
3149 
3150 		if (page != NULL) {
3151 			transfer.SetTo(NULL, page, maxPages);
3152 			wrappers[usedWrappers++] = wrapper;
3153 		} else
3154 			transferEmpty = true;
3155 	}
3156 
3157 	if (wrapperPool != stackWrappersPool) {
3158 		delete[] wrapperPool;
3159 		delete[] wrappers;
3160 	}
3161 
3162 	return B_OK;
3163 }
3164 
3165 
3166 /*!	You need to hold the VMCache lock when calling this function.
3167 	Note that the cache lock is released in this function.
3168 */
3169 status_t
3170 vm_page_write_modified_pages(VMCache *cache)
3171 {
3172 	return vm_page_write_modified_page_range(cache, 0,
3173 		(cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT);
3174 }
3175 
3176 
3177 /*!	Schedules the page writer to write back the specified \a page.
3178 	Note, however, that it might not do this immediately, and it can well
3179 	take several seconds until the page is actually written out.
3180 */
3181 void
3182 vm_page_schedule_write_page(vm_page *page)
3183 {
3184 	PAGE_ASSERT(page, page->State() == PAGE_STATE_MODIFIED);
3185 
3186 	vm_page_requeue(page, false);
3187 
3188 	sPageWriterCondition.WakeUp();
3189 }
3190 
3191 
3192 /*!	Cache must be locked.
3193 */
3194 void
3195 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage,
3196 	uint32 endPage)
3197 {
3198 	uint32 modified = 0;
3199 	for (VMCachePagesTree::Iterator it
3200 				= cache->pages.GetIterator(firstPage, true, true);
3201 			vm_page *page = it.Next();) {
3202 		if (page->cache_offset >= endPage)
3203 			break;
3204 
3205 		if (!page->busy && page->State() == PAGE_STATE_MODIFIED) {
3206 			DEBUG_PAGE_ACCESS_START(page);
3207 			vm_page_requeue(page, false);
3208 			modified++;
3209 			DEBUG_PAGE_ACCESS_END(page);
3210 		}
3211 	}
3212 
3213 	if (modified > 0)
3214 		sPageWriterCondition.WakeUp();
3215 }
3216 
3217 
3218 void
3219 vm_page_init_num_pages(kernel_args *args)
3220 {
3221 	// calculate the size of memory by looking at the physical_memory_range array
3222 	sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE;
3223 	page_num_t physicalPagesEnd = sPhysicalPageOffset
3224 		+ args->physical_memory_range[0].size / B_PAGE_SIZE;
3225 
3226 	sNonExistingPages = 0;
3227 	sIgnoredPages = args->ignored_physical_memory / B_PAGE_SIZE;
3228 
3229 	for (uint32 i = 1; i < args->num_physical_memory_ranges; i++) {
3230 		page_num_t start = args->physical_memory_range[i].start / B_PAGE_SIZE;
3231 		if (start > physicalPagesEnd)
3232 			sNonExistingPages += start - physicalPagesEnd;
3233 		physicalPagesEnd = start
3234 			+ args->physical_memory_range[i].size / B_PAGE_SIZE;
3235 
3236 #ifdef LIMIT_AVAILABLE_MEMORY
3237 		page_num_t available
3238 			= physicalPagesEnd - sPhysicalPageOffset - sNonExistingPages;
3239 		if (available > LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE)) {
3240 			physicalPagesEnd = sPhysicalPageOffset + sNonExistingPages
3241 				+ LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE);
3242 			break;
3243 		}
3244 #endif
3245 	}
3246 
3247 	TRACE(("first phys page = %#" B_PRIxPHYSADDR ", end %#" B_PRIxPHYSADDR "\n",
3248 		sPhysicalPageOffset, physicalPagesEnd));
3249 
3250 	sNumPages = physicalPagesEnd - sPhysicalPageOffset;
3251 }
3252 
3253 
3254 status_t
3255 vm_page_init(kernel_args *args)
3256 {
3257 	TRACE(("vm_page_init: entry\n"));
3258 
3259 	// init page queues
3260 	sModifiedPageQueue.Init("modified pages queue");
3261 	sInactivePageQueue.Init("inactive pages queue");
3262 	sActivePageQueue.Init("active pages queue");
3263 	sCachedPageQueue.Init("cached pages queue");
3264 	sFreePageQueue.Init("free pages queue");
3265 	sClearPageQueue.Init("clear pages queue");
3266 
3267 	new (&sPageReservationWaiters) PageReservationWaiterList;
3268 
3269 	// map in the new free page table
3270 	sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page),
3271 		~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3272 
3273 	TRACE(("vm_init: putting free_page_table @ %p, # ents %" B_PRIuPHYSADDR
3274 		" (size %#" B_PRIxPHYSADDR ")\n", sPages, sNumPages,
3275 		(phys_addr_t)(sNumPages * sizeof(vm_page))));
3276 
3277 	// initialize the free page table
3278 	for (uint32 i = 0; i < sNumPages; i++) {
3279 		sPages[i].Init(sPhysicalPageOffset + i);
3280 		sFreePageQueue.Append(&sPages[i]);
3281 
3282 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3283 		sPages[i].allocation_tracking_info.Clear();
3284 #endif
3285 	}
3286 
3287 	sUnreservedFreePages = sNumPages;
3288 
3289 	TRACE(("initialized table\n"));
3290 
3291 	// mark the ranges between usable physical memory unused
3292 	phys_addr_t previousEnd = 0;
3293 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3294 		phys_addr_t base = args->physical_memory_range[i].start;
3295 		phys_size_t size = args->physical_memory_range[i].size;
3296 		if (base > previousEnd) {
3297 			mark_page_range_in_use(previousEnd / B_PAGE_SIZE,
3298 				(base - previousEnd) / B_PAGE_SIZE, false);
3299 		}
3300 		previousEnd = base + size;
3301 	}
3302 
3303 	// mark the allocated physical page ranges wired
3304 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3305 		mark_page_range_in_use(
3306 			args->physical_allocated_range[i].start / B_PAGE_SIZE,
3307 			args->physical_allocated_range[i].size / B_PAGE_SIZE, true);
3308 	}
3309 
3310 	// The target of actually free pages. This must be at least the system
3311 	// reserve, but should be a few more pages, so we don't have to extract
3312 	// a cached page with each allocation.
3313 	sFreePagesTarget = VM_PAGE_RESERVE_USER
3314 		+ std::max((page_num_t)32, (sNumPages - sNonExistingPages) / 1024);
3315 
3316 	// The target of free + cached and inactive pages. On low-memory machines
3317 	// keep things tight. free + cached is the pool of immediately allocatable
3318 	// pages. We want a few inactive pages, so when we're actually paging, we
3319 	// have a reasonably large set of pages to work with.
3320 	if (sUnreservedFreePages < 16 * 1024) {
3321 		sFreeOrCachedPagesTarget = sFreePagesTarget + 128;
3322 		sInactivePagesTarget = sFreePagesTarget / 3;
3323 	} else {
3324 		sFreeOrCachedPagesTarget = 2 * sFreePagesTarget;
3325 		sInactivePagesTarget = sFreePagesTarget / 2;
3326 	}
3327 
3328 	TRACE(("vm_page_init: exit\n"));
3329 
3330 	return B_OK;
3331 }
3332 
3333 
3334 status_t
3335 vm_page_init_post_area(kernel_args *args)
3336 {
3337 	void *dummy;
3338 
3339 	dummy = sPages;
3340 	create_area("page structures", &dummy, B_EXACT_ADDRESS,
3341 		PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED,
3342 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3343 
3344 	add_debugger_command("page_stats", &dump_page_stats,
3345 		"Dump statistics about page usage");
3346 	add_debugger_command_etc("page", &dump_page,
3347 		"Dump page info",
3348 		"[ \"-p\" | \"-v\" ] [ \"-m\" ] <address>\n"
3349 		"Prints information for the physical page. If neither \"-p\" nor\n"
3350 		"\"-v\" are given, the provided address is interpreted as address of\n"
3351 		"the vm_page data structure for the page in question. If \"-p\" is\n"
3352 		"given, the address is the physical address of the page. If \"-v\" is\n"
3353 		"given, the address is interpreted as virtual address in the current\n"
3354 		"thread's address space and for the page it is mapped to (if any)\n"
3355 		"information are printed. If \"-m\" is specified, the command will\n"
3356 		"search all known address spaces for mappings to that page and print\n"
3357 		"them.\n", 0);
3358 	add_debugger_command("page_queue", &dump_page_queue, "Dump page queue");
3359 	add_debugger_command("find_page", &find_page,
3360 		"Find out which queue a page is actually in");
3361 
3362 #ifdef TRACK_PAGE_USAGE_STATS
3363 	add_debugger_command_etc("page_usage", &dump_page_usage_stats,
3364 		"Dumps statistics about page usage counts",
3365 		"\n"
3366 		"Dumps statistics about page usage counts.\n",
3367 		B_KDEBUG_DONT_PARSE_ARGUMENTS);
3368 #endif
3369 
3370 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3371 	add_debugger_command_etc("page_allocations_per_caller",
3372 		&dump_page_allocations_per_caller,
3373 		"Dump current page allocations summed up per caller",
3374 		"[ -d <caller> ] [ -r ]\n"
3375 		"The current allocations will by summed up by caller (their count)\n"
3376 		"printed in decreasing order by count.\n"
3377 		"If \"-d\" is given, each allocation for caller <caller> is printed\n"
3378 		"including the respective stack trace.\n"
3379 		"If \"-r\" is given, the allocation infos are reset after gathering\n"
3380 		"the information, so the next command invocation will only show the\n"
3381 		"allocations made after the reset.\n", 0);
3382 	add_debugger_command_etc("page_allocation_infos",
3383 		&dump_page_allocation_infos,
3384 		"Dump current page allocations",
3385 		"[ --stacktrace ] [ -p <page number> ] [ --team <team ID> ] "
3386 		"[ --thread <thread ID> ]\n"
3387 		"The current allocations filtered by optional values will be printed.\n"
3388 		"The optional \"-p\" page number filters for a specific page,\n"
3389 		"with \"--team\" and \"--thread\" allocations by specific teams\n"
3390 		"and/or threads can be filtered (these only work if a corresponding\n"
3391 		"tracing entry is still available).\n"
3392 		"If \"--stacktrace\" is given, then stack traces of the allocation\n"
3393 		"callers are printed, where available\n", 0);
3394 #endif
3395 
3396 	return B_OK;
3397 }
3398 
3399 
3400 status_t
3401 vm_page_init_post_thread(kernel_args *args)
3402 {
3403 	new (&sFreePageCondition) ConditionVariable;
3404 	sFreePageCondition.Publish(&sFreePageQueue, "free page");
3405 
3406 	// create a kernel thread to clear out pages
3407 
3408 	thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber",
3409 		B_LOWEST_ACTIVE_PRIORITY, NULL);
3410 	resume_thread(thread);
3411 
3412 	// start page writer
3413 
3414 	sPageWriterCondition.Init("page writer");
3415 
3416 	thread = spawn_kernel_thread(&page_writer, "page writer",
3417 		B_NORMAL_PRIORITY + 1, NULL);
3418 	resume_thread(thread);
3419 
3420 	// start page daemon
3421 
3422 	sPageDaemonCondition.Init("page daemon");
3423 
3424 	thread = spawn_kernel_thread(&page_daemon, "page daemon",
3425 		B_NORMAL_PRIORITY, NULL);
3426 	resume_thread(thread);
3427 
3428 	return B_OK;
3429 }
3430 
3431 
3432 status_t
3433 vm_mark_page_inuse(page_num_t page)
3434 {
3435 	return vm_mark_page_range_inuse(page, 1);
3436 }
3437 
3438 
3439 status_t
3440 vm_mark_page_range_inuse(page_num_t startPage, page_num_t length)
3441 {
3442 	return mark_page_range_in_use(startPage, length, false);
3443 }
3444 
3445 
3446 /*!	Unreserve pages previously reserved with vm_page_reserve_pages().
3447 */
3448 void
3449 vm_page_unreserve_pages(vm_page_reservation* reservation)
3450 {
3451 	uint32 count = reservation->count;
3452 	reservation->count = 0;
3453 
3454 	if (count == 0)
3455 		return;
3456 
3457 	TA(UnreservePages(count));
3458 
3459 	unreserve_pages(count);
3460 }
3461 
3462 
3463 /*!	With this call, you can reserve a number of free pages in the system.
3464 	They will only be handed out to someone who has actually reserved them.
3465 	This call returns as soon as the number of requested pages has been
3466 	reached.
3467 	The caller must not hold any cache lock or the function might deadlock.
3468 */
3469 void
3470 vm_page_reserve_pages(vm_page_reservation* reservation, uint32 count,
3471 	int priority)
3472 {
3473 	reservation->count = count;
3474 
3475 	if (count == 0)
3476 		return;
3477 
3478 	TA(ReservePages(count));
3479 
3480 	reserve_pages(count, priority, false);
3481 }
3482 
3483 
3484 bool
3485 vm_page_try_reserve_pages(vm_page_reservation* reservation, uint32 count,
3486 	int priority)
3487 {
3488 	if (count == 0) {
3489 		reservation->count = count;
3490 		return true;
3491 	}
3492 
3493 	uint32 remaining = reserve_pages(count, priority, true);
3494 	if (remaining == 0) {
3495 		TA(ReservePages(count));
3496 		reservation->count = count;
3497 		return true;
3498 	}
3499 
3500 	unreserve_pages(count - remaining);
3501 
3502 	return false;
3503 }
3504 
3505 
3506 vm_page *
3507 vm_page_allocate_page(vm_page_reservation* reservation, uint32 flags)
3508 {
3509 	uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3510 	ASSERT(pageState != PAGE_STATE_FREE);
3511 	ASSERT(pageState != PAGE_STATE_CLEAR);
3512 
3513 	ASSERT(reservation->count > 0);
3514 	reservation->count--;
3515 
3516 	VMPageQueue* queue;
3517 	VMPageQueue* otherQueue;
3518 
3519 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3520 		queue = &sClearPageQueue;
3521 		otherQueue = &sFreePageQueue;
3522 	} else {
3523 		queue = &sFreePageQueue;
3524 		otherQueue = &sClearPageQueue;
3525 	}
3526 
3527 	ReadLocker locker(sFreePageQueuesLock);
3528 
3529 	vm_page* page = queue->RemoveHeadUnlocked();
3530 	if (page == NULL) {
3531 		// if the primary queue was empty, grab the page from the
3532 		// secondary queue
3533 		page = otherQueue->RemoveHeadUnlocked();
3534 
3535 		if (page == NULL) {
3536 			// Unlikely, but possible: the page we have reserved has moved
3537 			// between the queues after we checked the first queue. Grab the
3538 			// write locker to make sure this doesn't happen again.
3539 			locker.Unlock();
3540 			WriteLocker writeLocker(sFreePageQueuesLock);
3541 
3542 			page = queue->RemoveHead();
3543 			if (page == NULL)
3544 				otherQueue->RemoveHead();
3545 
3546 			if (page == NULL) {
3547 				panic("Had reserved page, but there is none!");
3548 				return NULL;
3549 			}
3550 
3551 			// downgrade to read lock
3552 			locker.Lock();
3553 		}
3554 	}
3555 
3556 	if (page->CacheRef() != NULL)
3557 		panic("supposed to be free page %p has cache\n", page);
3558 
3559 	DEBUG_PAGE_ACCESS_START(page);
3560 
3561 	int oldPageState = page->State();
3562 	page->SetState(pageState);
3563 	page->busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3564 	page->usage_count = 0;
3565 	page->accessed = false;
3566 	page->modified = false;
3567 
3568 	locker.Unlock();
3569 
3570 	if (pageState < PAGE_STATE_FIRST_UNQUEUED)
3571 		sPageQueues[pageState].AppendUnlocked(page);
3572 
3573 	// clear the page, if we had to take it from the free queue and a clear
3574 	// page was requested
3575 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0 && oldPageState != PAGE_STATE_CLEAR)
3576 		clear_page(page);
3577 
3578 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3579 	page->allocation_tracking_info.Init(
3580 		TA(AllocatePage(page->physical_page_number)));
3581 #else
3582 	TA(AllocatePage(page->physical_page_number));
3583 #endif
3584 
3585 	return page;
3586 }
3587 
3588 
3589 static void
3590 allocate_page_run_cleanup(VMPageQueue::PageList& freePages,
3591 	VMPageQueue::PageList& clearPages)
3592 {
3593 	while (vm_page* page = freePages.RemoveHead()) {
3594 		page->busy = false;
3595 		page->SetState(PAGE_STATE_FREE);
3596 		DEBUG_PAGE_ACCESS_END(page);
3597 		sFreePageQueue.PrependUnlocked(page);
3598 	}
3599 
3600 	while (vm_page* page = clearPages.RemoveHead()) {
3601 		page->busy = false;
3602 		page->SetState(PAGE_STATE_CLEAR);
3603 		DEBUG_PAGE_ACCESS_END(page);
3604 		sClearPageQueue.PrependUnlocked(page);
3605 	}
3606 }
3607 
3608 
3609 /*!	Tries to allocate the a contiguous run of \a length pages starting at
3610 	index \a start.
3611 
3612 	The caller must have write-locked the free/clear page queues. The function
3613 	will unlock regardless of whether it succeeds or fails.
3614 
3615 	If the function fails, it cleans up after itself, i.e. it will free all
3616 	pages it managed to allocate.
3617 
3618 	\param start The start index (into \c sPages) of the run.
3619 	\param length The number of pages to allocate.
3620 	\param flags Page allocation flags. Encodes the state the function shall
3621 		set the allocated pages to, whether the pages shall be marked busy
3622 		(VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared
3623 		(VM_PAGE_ALLOC_CLEAR).
3624 	\param freeClearQueueLocker Locked WriteLocker for the free/clear page
3625 		queues in locked state. Will be unlocked by the function.
3626 	\return The index of the first page that could not be allocated. \a length
3627 		is returned when the function was successful.
3628 */
3629 static page_num_t
3630 allocate_page_run(page_num_t start, page_num_t length, uint32 flags,
3631 	WriteLocker& freeClearQueueLocker)
3632 {
3633 	uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3634 	ASSERT(pageState != PAGE_STATE_FREE);
3635 	ASSERT(pageState != PAGE_STATE_CLEAR);
3636 	ASSERT(start + length <= sNumPages);
3637 
3638 	// Pull the free/clear pages out of their respective queues. Cached pages
3639 	// are allocated later.
3640 	page_num_t cachedPages = 0;
3641 	VMPageQueue::PageList freePages;
3642 	VMPageQueue::PageList clearPages;
3643 	page_num_t i = 0;
3644 	for (; i < length; i++) {
3645 		bool pageAllocated = true;
3646 		bool noPage = false;
3647 		vm_page& page = sPages[start + i];
3648 		switch (page.State()) {
3649 			case PAGE_STATE_CLEAR:
3650 				DEBUG_PAGE_ACCESS_START(&page);
3651 				sClearPageQueue.Remove(&page);
3652 				clearPages.Add(&page);
3653 				break;
3654 			case PAGE_STATE_FREE:
3655 				DEBUG_PAGE_ACCESS_START(&page);
3656 				sFreePageQueue.Remove(&page);
3657 				freePages.Add(&page);
3658 				break;
3659 			case PAGE_STATE_CACHED:
3660 				// We allocate cached pages later.
3661 				cachedPages++;
3662 				pageAllocated = false;
3663 				break;
3664 
3665 			default:
3666 				// Probably a page was cached when our caller checked. Now it's
3667 				// gone and we have to abort.
3668 				noPage = true;
3669 				break;
3670 		}
3671 
3672 		if (noPage)
3673 			break;
3674 
3675 		if (pageAllocated) {
3676 			page.SetState(flags & VM_PAGE_ALLOC_STATE);
3677 			page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3678 			page.usage_count = 0;
3679 			page.accessed = false;
3680 			page.modified = false;
3681 		}
3682 	}
3683 
3684 	if (i < length) {
3685 		// failed to allocate a page -- free all that we've got
3686 		allocate_page_run_cleanup(freePages, clearPages);
3687 		return i;
3688 	}
3689 
3690 	freeClearQueueLocker.Unlock();
3691 
3692 	if (cachedPages > 0) {
3693 		// allocate the pages that weren't free but cached
3694 		page_num_t freedCachedPages = 0;
3695 		page_num_t nextIndex = start;
3696 		vm_page* freePage = freePages.Head();
3697 		vm_page* clearPage = clearPages.Head();
3698 		while (cachedPages > 0) {
3699 			// skip, if we've already got the page
3700 			if (freePage != NULL && size_t(freePage - sPages) == nextIndex) {
3701 				freePage = freePages.GetNext(freePage);
3702 				nextIndex++;
3703 				continue;
3704 			}
3705 			if (clearPage != NULL && size_t(clearPage - sPages) == nextIndex) {
3706 				clearPage = clearPages.GetNext(clearPage);
3707 				nextIndex++;
3708 				continue;
3709 			}
3710 
3711 			// free the page, if it is still cached
3712 			vm_page& page = sPages[nextIndex];
3713 			if (!free_cached_page(&page, false)) {
3714 				// TODO: if the page turns out to have been freed already,
3715 				// there would be no need to fail
3716 				break;
3717 			}
3718 
3719 			page.SetState(flags & VM_PAGE_ALLOC_STATE);
3720 			page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3721 			page.usage_count = 0;
3722 			page.accessed = false;
3723 			page.modified = false;
3724 
3725 			freePages.InsertBefore(freePage, &page);
3726 			freedCachedPages++;
3727 			cachedPages--;
3728 			nextIndex++;
3729 		}
3730 
3731 		// If we have freed cached pages, we need to balance things.
3732 		if (freedCachedPages > 0)
3733 			unreserve_pages(freedCachedPages);
3734 
3735 		if (nextIndex - start < length) {
3736 			// failed to allocate all cached pages -- free all that we've got
3737 			freeClearQueueLocker.Lock();
3738 			allocate_page_run_cleanup(freePages, clearPages);
3739 			freeClearQueueLocker.Unlock();
3740 
3741 			return nextIndex - start;
3742 		}
3743 	}
3744 
3745 	// clear pages, if requested
3746 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3747 		for (VMPageQueue::PageList::Iterator it = freePages.GetIterator();
3748 				vm_page* page = it.Next();) {
3749  			clear_page(page);
3750 		}
3751 	}
3752 
3753 	// add pages to target queue
3754 	if (pageState < PAGE_STATE_FIRST_UNQUEUED) {
3755 		freePages.MoveFrom(&clearPages);
3756 		sPageQueues[pageState].AppendUnlocked(freePages, length);
3757 	}
3758 
3759 	// Note: We don't unreserve the pages since we pulled them out of the
3760 	// free/clear queues without adjusting sUnreservedFreePages.
3761 
3762 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3763 	AbstractTraceEntryWithStackTrace* traceEntry
3764 		= TA(AllocatePageRun(start, length));
3765 
3766 	for (page_num_t i = start; i < start + length; i++)
3767 		sPages[i].allocation_tracking_info.Init(traceEntry);
3768 #else
3769 	TA(AllocatePageRun(start, length));
3770 #endif
3771 
3772 	return length;
3773 }
3774 
3775 
3776 /*! Allocate a physically contiguous range of pages.
3777 
3778 	\param flags Page allocation flags. Encodes the state the function shall
3779 		set the allocated pages to, whether the pages shall be marked busy
3780 		(VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared
3781 		(VM_PAGE_ALLOC_CLEAR).
3782 	\param length The number of contiguous pages to allocate.
3783 	\param restrictions Restrictions to the physical addresses of the page run
3784 		to allocate, including \c low_address, the first acceptable physical
3785 		address where the page run may start, \c high_address, the last
3786 		acceptable physical address where the page run may end (i.e. it must
3787 		hold \code runStartAddress + length <= high_address \endcode),
3788 		\c alignment, the alignment of the page run start address, and
3789 		\c boundary, multiples of which the page run must not cross.
3790 		Values set to \c 0 are ignored.
3791 	\param priority The page reservation priority (as passed to
3792 		vm_page_reserve_pages()).
3793 	\return The first page of the allocated page run on success; \c NULL
3794 		when the allocation failed.
3795 */
3796 vm_page*
3797 vm_page_allocate_page_run(uint32 flags, page_num_t length,
3798 	const physical_address_restrictions* restrictions, int priority)
3799 {
3800 	// compute start and end page index
3801 	page_num_t requestedStart
3802 		= std::max(restrictions->low_address / B_PAGE_SIZE, sPhysicalPageOffset)
3803 			- sPhysicalPageOffset;
3804 	page_num_t start = requestedStart;
3805 	page_num_t end;
3806 	if (restrictions->high_address > 0) {
3807 		end = std::max(restrictions->high_address / B_PAGE_SIZE,
3808 				sPhysicalPageOffset)
3809 			- sPhysicalPageOffset;
3810 		end = std::min(end, sNumPages);
3811 	} else
3812 		end = sNumPages;
3813 
3814 	// compute alignment mask
3815 	page_num_t alignmentMask
3816 		= std::max(restrictions->alignment / B_PAGE_SIZE, (phys_addr_t)1) - 1;
3817 	ASSERT(((alignmentMask + 1) & alignmentMask) == 0);
3818 		// alignment must be a power of 2
3819 
3820 	// compute the boundary mask
3821 	uint32 boundaryMask = 0;
3822 	if (restrictions->boundary != 0) {
3823 		page_num_t boundary = restrictions->boundary / B_PAGE_SIZE;
3824 		// boundary must be a power of two and not less than alignment and
3825 		// length
3826 		ASSERT(((boundary - 1) & boundary) == 0);
3827 		ASSERT(boundary >= alignmentMask + 1);
3828 		ASSERT(boundary >= length);
3829 
3830 		boundaryMask = -boundary;
3831 	}
3832 
3833 	vm_page_reservation reservation;
3834 	vm_page_reserve_pages(&reservation, length, priority);
3835 
3836 	WriteLocker freeClearQueueLocker(sFreePageQueuesLock);
3837 
3838 	// First we try to get a run with free pages only. If that fails, we also
3839 	// consider cached pages. If there are only few free pages and many cached
3840 	// ones, the odds are that we won't find enough contiguous ones, so we skip
3841 	// the first iteration in this case.
3842 	int32 freePages = sUnreservedFreePages;
3843 	int useCached = freePages > 0 && (page_num_t)freePages > 2 * length ? 0 : 1;
3844 
3845 	for (;;) {
3846 		if (alignmentMask != 0 || boundaryMask != 0) {
3847 			page_num_t offsetStart = start + sPhysicalPageOffset;
3848 
3849 			// enforce alignment
3850 			if ((offsetStart & alignmentMask) != 0)
3851 				offsetStart = (offsetStart + alignmentMask) & ~alignmentMask;
3852 
3853 			// enforce boundary
3854 			if (boundaryMask != 0 && ((offsetStart ^ (offsetStart
3855 				+ length - 1)) & boundaryMask) != 0) {
3856 				offsetStart = (offsetStart + length - 1) & boundaryMask;
3857 			}
3858 
3859 			start = offsetStart - sPhysicalPageOffset;
3860 		}
3861 
3862 		if (start + length > end) {
3863 			if (useCached == 0) {
3864 				// The first iteration with free pages only was unsuccessful.
3865 				// Try again also considering cached pages.
3866 				useCached = 1;
3867 				start = requestedStart;
3868 				continue;
3869 			}
3870 
3871 			dprintf("vm_page_allocate_page_run(): Failed to allocate run of "
3872 				"length %" B_PRIuPHYSADDR " (%" B_PRIuPHYSADDR " %"
3873 				B_PRIuPHYSADDR ") in second iteration (align: %" B_PRIuPHYSADDR
3874 				" boundary: %" B_PRIuPHYSADDR ")!\n", length, requestedStart,
3875 				end, restrictions->alignment, restrictions->boundary);
3876 
3877 			freeClearQueueLocker.Unlock();
3878 			vm_page_unreserve_pages(&reservation);
3879 			return NULL;
3880 		}
3881 
3882 		bool foundRun = true;
3883 		page_num_t i;
3884 		for (i = 0; i < length; i++) {
3885 			uint32 pageState = sPages[start + i].State();
3886 			if (pageState != PAGE_STATE_FREE
3887 				&& pageState != PAGE_STATE_CLEAR
3888 				&& (pageState != PAGE_STATE_CACHED || useCached == 0)) {
3889 				foundRun = false;
3890 				break;
3891 			}
3892 		}
3893 
3894 		if (foundRun) {
3895 			i = allocate_page_run(start, length, flags, freeClearQueueLocker);
3896 			if (i == length)
3897 				return &sPages[start];
3898 
3899 			// apparently a cached page couldn't be allocated -- skip it and
3900 			// continue
3901 			freeClearQueueLocker.Lock();
3902 		}
3903 
3904 		start += i + 1;
3905 	}
3906 }
3907 
3908 
3909 vm_page *
3910 vm_page_at_index(int32 index)
3911 {
3912 	return &sPages[index];
3913 }
3914 
3915 
3916 vm_page *
3917 vm_lookup_page(page_num_t pageNumber)
3918 {
3919 	if (pageNumber < sPhysicalPageOffset)
3920 		return NULL;
3921 
3922 	pageNumber -= sPhysicalPageOffset;
3923 	if (pageNumber >= sNumPages)
3924 		return NULL;
3925 
3926 	return &sPages[pageNumber];
3927 }
3928 
3929 
3930 bool
3931 vm_page_is_dummy(struct vm_page *page)
3932 {
3933 	return page < sPages || page >= sPages + sNumPages;
3934 }
3935 
3936 
3937 /*!	Free the page that belonged to a certain cache.
3938 	You can use vm_page_set_state() manually if you prefer, but only
3939 	if the page does not equal PAGE_STATE_MODIFIED.
3940 
3941 	\param cache The cache the page was previously owned by or NULL. The page
3942 		must have been removed from its cache before calling this method in
3943 		either case.
3944 	\param page The page to free.
3945 	\param reservation If not NULL, the page count of the reservation will be
3946 		incremented, thus allowing to allocate another page for the freed one at
3947 		a later time.
3948 */
3949 void
3950 vm_page_free_etc(VMCache* cache, vm_page* page,
3951 	vm_page_reservation* reservation)
3952 {
3953 	PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
3954 		&& page->State() != PAGE_STATE_CLEAR);
3955 
3956 	if (page->State() == PAGE_STATE_MODIFIED && cache->temporary)
3957 		atomic_add(&sModifiedTemporaryPages, -1);
3958 
3959 	free_page(page, false);
3960 	if (reservation == NULL)
3961 		unreserve_pages(1);
3962 }
3963 
3964 
3965 void
3966 vm_page_set_state(vm_page *page, int pageState)
3967 {
3968 	PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
3969 		&& page->State() != PAGE_STATE_CLEAR);
3970 
3971 	if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) {
3972 		free_page(page, pageState == PAGE_STATE_CLEAR);
3973 		unreserve_pages(1);
3974 	} else
3975 		set_page_state(page, pageState);
3976 }
3977 
3978 
3979 /*!	Moves a page to either the tail of the head of its current queue,
3980 	depending on \a tail.
3981 	The page must have a cache and the cache must be locked!
3982 */
3983 void
3984 vm_page_requeue(struct vm_page *page, bool tail)
3985 {
3986 	PAGE_ASSERT(page, page->Cache() != NULL);
3987 	page->Cache()->AssertLocked();
3988 	// DEBUG_PAGE_ACCESS_CHECK(page);
3989 		// TODO: This assertion cannot be satisfied by idle_scan_active_pages()
3990 		// when it requeues busy pages. The reason is that vm_soft_fault()
3991 		// (respectively fault_get_page()) and the file cache keep newly
3992 		// allocated pages accessed while they are reading them from disk. It
3993 		// would probably be better to change that code and reenable this
3994 		// check.
3995 
3996 	VMPageQueue *queue = NULL;
3997 
3998 	switch (page->State()) {
3999 		case PAGE_STATE_ACTIVE:
4000 			queue = &sActivePageQueue;
4001 			break;
4002 		case PAGE_STATE_INACTIVE:
4003 			queue = &sInactivePageQueue;
4004 			break;
4005 		case PAGE_STATE_MODIFIED:
4006 			queue = &sModifiedPageQueue;
4007 			break;
4008 		case PAGE_STATE_CACHED:
4009 			queue = &sCachedPageQueue;
4010 			break;
4011 		case PAGE_STATE_FREE:
4012 		case PAGE_STATE_CLEAR:
4013 			panic("vm_page_requeue() called for free/clear page %p", page);
4014 			return;
4015 		case PAGE_STATE_WIRED:
4016 		case PAGE_STATE_UNUSED:
4017 			return;
4018 		default:
4019 			panic("vm_page_touch: vm_page %p in invalid state %d\n",
4020 				page, page->State());
4021 			break;
4022 	}
4023 
4024 	queue->RequeueUnlocked(page, tail);
4025 }
4026 
4027 
4028 page_num_t
4029 vm_page_num_pages(void)
4030 {
4031 	return sNumPages - sNonExistingPages;
4032 }
4033 
4034 
4035 /*! There is a subtle distinction between the page counts returned by
4036 	this function and vm_page_num_free_pages():
4037 	The latter returns the number of pages that are completely uncommitted,
4038 	whereas this one returns the number of pages that are available for
4039 	use by being reclaimed as well (IOW it factors in things like cache pages
4040 	as available).
4041 */
4042 page_num_t
4043 vm_page_num_available_pages(void)
4044 {
4045 	return vm_available_memory() / B_PAGE_SIZE;
4046 }
4047 
4048 
4049 page_num_t
4050 vm_page_num_free_pages(void)
4051 {
4052 	int32 count = sUnreservedFreePages + sCachedPageQueue.Count();
4053 	return count > 0 ? count : 0;
4054 }
4055 
4056 
4057 page_num_t
4058 vm_page_num_unused_pages(void)
4059 {
4060 	int32 count = sUnreservedFreePages;
4061 	return count > 0 ? count : 0;
4062 }
4063 
4064 
4065 void
4066 vm_page_get_stats(system_info *info)
4067 {
4068 	// Note: there's no locking protecting any of the queues or counters here,
4069 	// so we run the risk of getting bogus values when evaluating them
4070 	// throughout this function. As these stats are for informational purposes
4071 	// only, it is not really worth introducing such locking. Therefore we just
4072 	// ensure that we don't under- or overflow any of the values.
4073 
4074 	// The pages used for the block cache buffers. Those should not be counted
4075 	// as used but as cached pages.
4076 	// TODO: We should subtract the blocks that are in use ATM, since those
4077 	// can't really be freed in a low memory situation.
4078 	page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE;
4079 	info->block_cache_pages = blockCachePages;
4080 
4081 	// Non-temporary modified pages are special as they represent pages that
4082 	// can be written back, so they could be freed if necessary, for us
4083 	// basically making them into cached pages with a higher overhead. The
4084 	// modified queue count is therefore split into temporary and non-temporary
4085 	// counts that are then added to the corresponding number.
4086 	page_num_t modifiedNonTemporaryPages
4087 		= (sModifiedPageQueue.Count() - sModifiedTemporaryPages);
4088 
4089 	info->max_pages = vm_page_num_pages();
4090 	info->cached_pages = sCachedPageQueue.Count() + modifiedNonTemporaryPages
4091 		+ blockCachePages;
4092 
4093 	// max_pages is composed of:
4094 	//	active + inactive + unused + wired + modified + cached + free + clear
4095 	// So taking out the cached (including modified non-temporary), free and
4096 	// clear ones leaves us with all used pages.
4097 	uint32 subtractPages = info->cached_pages + sFreePageQueue.Count()
4098 		+ sClearPageQueue.Count();
4099 	info->used_pages = subtractPages > info->max_pages
4100 		? 0 : info->max_pages - subtractPages;
4101 
4102 	if (info->used_pages + info->cached_pages > info->max_pages) {
4103 		// Something was shuffled around while we were summing up the counts.
4104 		// Make the values sane, preferring the worse case of more used pages.
4105 		info->cached_pages = info->max_pages - info->used_pages;
4106 	}
4107 
4108 	info->page_faults = vm_num_page_faults();
4109 	info->ignored_pages = sIgnoredPages;
4110 
4111 	// TODO: We don't consider pages used for page directories/tables yet.
4112 }
4113 
4114 
4115 /*!	Returns the greatest address within the last page of accessible physical
4116 	memory.
4117 	The value is inclusive, i.e. in case of a 32 bit phys_addr_t 0xffffffff
4118 	means the that the last page ends at exactly 4 GB.
4119 */
4120 phys_addr_t
4121 vm_page_max_address()
4122 {
4123 	return ((phys_addr_t)sPhysicalPageOffset + sNumPages) * B_PAGE_SIZE - 1;
4124 }
4125 
4126 
4127 RANGE_MARKER_FUNCTION_END(vm_page)
4128