xref: /haiku/src/system/kernel/vm/vm_page.cpp (revision 9a6a20d4689307142a7ed26a1437ba47e244e73f)
1 /*
2  * Copyright 2010-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <string.h>
12 #include <stdlib.h>
13 
14 #include <algorithm>
15 
16 #include <KernelExport.h>
17 #include <OS.h>
18 
19 #include <AutoDeleter.h>
20 
21 #include <arch/cpu.h>
22 #include <arch/vm_translation_map.h>
23 #include <block_cache.h>
24 #include <boot/kernel_args.h>
25 #include <condition_variable.h>
26 #include <elf.h>
27 #include <heap.h>
28 #include <kernel.h>
29 #include <low_resource_manager.h>
30 #include <thread.h>
31 #include <tracing.h>
32 #include <util/AutoLock.h>
33 #include <vfs.h>
34 #include <vm/vm.h>
35 #include <vm/vm_priv.h>
36 #include <vm/vm_page.h>
37 #include <vm/VMAddressSpace.h>
38 #include <vm/VMArea.h>
39 #include <vm/VMCache.h>
40 
41 #include "IORequest.h"
42 #include "PageCacheLocker.h"
43 #include "VMAnonymousCache.h"
44 #include "VMPageQueue.h"
45 
46 
47 //#define TRACE_VM_PAGE
48 #ifdef TRACE_VM_PAGE
49 #	define TRACE(x) dprintf x
50 #else
51 #	define TRACE(x) ;
52 #endif
53 
54 //#define TRACE_VM_DAEMONS
55 #ifdef TRACE_VM_DAEMONS
56 #define TRACE_DAEMON(x...) dprintf(x)
57 #else
58 #define TRACE_DAEMON(x...) do {} while (false)
59 #endif
60 
61 //#define TRACK_PAGE_USAGE_STATS	1
62 
63 #define PAGE_ASSERT(page, condition)	\
64 	ASSERT_PRINT((condition), "page: %p", (page))
65 
66 #define SCRUB_SIZE 32
67 	// this many pages will be cleared at once in the page scrubber thread
68 
69 #define MAX_PAGE_WRITER_IO_PRIORITY				B_URGENT_DISPLAY_PRIORITY
70 	// maximum I/O priority of the page writer
71 #define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD	10000
72 	// the maximum I/O priority shall be reached when this many pages need to
73 	// be written
74 
75 
76 // The page reserve an allocation of the certain priority must not touch.
77 static const size_t kPageReserveForPriority[] = {
78 	VM_PAGE_RESERVE_USER,		// user
79 	VM_PAGE_RESERVE_SYSTEM,		// system
80 	0							// VIP
81 };
82 
83 // Minimum number of free pages the page daemon will try to achieve.
84 static uint32 sFreePagesTarget;
85 static uint32 sFreeOrCachedPagesTarget;
86 static uint32 sInactivePagesTarget;
87 
88 // Wait interval between page daemon runs.
89 static const bigtime_t kIdleScanWaitInterval = 1000000LL;	// 1 sec
90 static const bigtime_t kBusyScanWaitInterval = 500000LL;	// 0.5 sec
91 
92 // Number of idle runs after which we want to have processed the full active
93 // queue.
94 static const uint32 kIdleRunsForFullQueue = 20;
95 
96 // Maximum limit for the vm_page::usage_count.
97 static const int32 kPageUsageMax = 64;
98 // vm_page::usage_count buff an accessed page receives in a scan.
99 static const int32 kPageUsageAdvance = 3;
100 // vm_page::usage_count debuff an unaccessed page receives in a scan.
101 static const int32 kPageUsageDecline = 1;
102 
103 int32 gMappedPagesCount;
104 
105 static VMPageQueue sPageQueues[PAGE_STATE_COUNT];
106 
107 static VMPageQueue& sFreePageQueue = sPageQueues[PAGE_STATE_FREE];
108 static VMPageQueue& sClearPageQueue = sPageQueues[PAGE_STATE_CLEAR];
109 static VMPageQueue& sModifiedPageQueue = sPageQueues[PAGE_STATE_MODIFIED];
110 static VMPageQueue& sInactivePageQueue = sPageQueues[PAGE_STATE_INACTIVE];
111 static VMPageQueue& sActivePageQueue = sPageQueues[PAGE_STATE_ACTIVE];
112 static VMPageQueue& sCachedPageQueue = sPageQueues[PAGE_STATE_CACHED];
113 
114 static vm_page *sPages;
115 static page_num_t sPhysicalPageOffset;
116 static page_num_t sNumPages;
117 static page_num_t sNonExistingPages;
118 	// pages in the sPages array that aren't backed by physical memory
119 static uint64 sIgnoredPages;
120 	// pages of physical memory ignored by the boot loader (and thus not
121 	// available here)
122 static int32 sUnreservedFreePages;
123 static int32 sUnsatisfiedPageReservations;
124 static int32 sModifiedTemporaryPages;
125 
126 static ConditionVariable sFreePageCondition;
127 static mutex sPageDeficitLock = MUTEX_INITIALIZER("page deficit");
128 
129 // This lock must be used whenever the free or clear page queues are changed.
130 // If you need to work on both queues at the same time, you need to hold a write
131 // lock, otherwise, a read lock suffices (each queue still has a spinlock to
132 // guard against concurrent changes).
133 static rw_lock sFreePageQueuesLock
134 	= RW_LOCK_INITIALIZER("free/clear page queues");
135 
136 #ifdef TRACK_PAGE_USAGE_STATS
137 static page_num_t sPageUsageArrays[512];
138 static page_num_t* sPageUsage = sPageUsageArrays;
139 static page_num_t sPageUsagePageCount;
140 static page_num_t* sNextPageUsage = sPageUsageArrays + 256;
141 static page_num_t sNextPageUsagePageCount;
142 #endif
143 
144 
145 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
146 
147 struct caller_info {
148 	addr_t		caller;
149 	size_t		count;
150 };
151 
152 static const int32 kCallerInfoTableSize = 1024;
153 static caller_info sCallerInfoTable[kCallerInfoTableSize];
154 static int32 sCallerInfoCount = 0;
155 
156 static caller_info* get_caller_info(addr_t caller);
157 
158 
159 RANGE_MARKER_FUNCTION_PROTOTYPES(vm_page)
160 
161 static const addr_t kVMPageCodeAddressRange[] = {
162 	RANGE_MARKER_FUNCTION_ADDRESS_RANGE(vm_page)
163 };
164 
165 #endif
166 
167 
168 RANGE_MARKER_FUNCTION_BEGIN(vm_page)
169 
170 
171 struct page_stats {
172 	int32	totalFreePages;
173 	int32	unsatisfiedReservations;
174 	int32	cachedPages;
175 };
176 
177 
178 struct PageReservationWaiter
179 		: public DoublyLinkedListLinkImpl<PageReservationWaiter> {
180 	Thread*	thread;
181 	uint32	dontTouch;		// reserve not to touch
182 	uint32	missing;		// pages missing for the reservation
183 	int32	threadPriority;
184 
185 	bool operator<(const PageReservationWaiter& other) const
186 	{
187 		// Implies an order by descending VM priority (ascending dontTouch)
188 		// and (secondarily) descending thread priority.
189 		if (dontTouch != other.dontTouch)
190 			return dontTouch < other.dontTouch;
191 		return threadPriority > other.threadPriority;
192 	}
193 };
194 
195 typedef DoublyLinkedList<PageReservationWaiter> PageReservationWaiterList;
196 static PageReservationWaiterList sPageReservationWaiters;
197 
198 
199 struct DaemonCondition {
200 	void Init(const char* name)
201 	{
202 		mutex_init(&fLock, "daemon condition");
203 		fCondition.Init(this, name);
204 		fActivated = false;
205 	}
206 
207 	bool Lock()
208 	{
209 		return mutex_lock(&fLock) == B_OK;
210 	}
211 
212 	void Unlock()
213 	{
214 		mutex_unlock(&fLock);
215 	}
216 
217 	bool Wait(bigtime_t timeout, bool clearActivated)
218 	{
219 		MutexLocker locker(fLock);
220 		if (clearActivated)
221 			fActivated = false;
222 		else if (fActivated)
223 			return true;
224 
225 		ConditionVariableEntry entry;
226 		fCondition.Add(&entry);
227 
228 		locker.Unlock();
229 
230 		return entry.Wait(B_RELATIVE_TIMEOUT, timeout) == B_OK;
231 	}
232 
233 	void WakeUp()
234 	{
235 		if (fActivated)
236 			return;
237 
238 		MutexLocker locker(fLock);
239 		fActivated = true;
240 		fCondition.NotifyOne();
241 	}
242 
243 	void ClearActivated()
244 	{
245 		MutexLocker locker(fLock);
246 		fActivated = false;
247 	}
248 
249 private:
250 	mutex				fLock;
251 	ConditionVariable	fCondition;
252 	bool				fActivated;
253 };
254 
255 
256 static DaemonCondition sPageWriterCondition;
257 static DaemonCondition sPageDaemonCondition;
258 
259 
260 #if PAGE_ALLOCATION_TRACING
261 
262 namespace PageAllocationTracing {
263 
264 class ReservePages : public AbstractTraceEntry {
265 public:
266 	ReservePages(uint32 count)
267 		:
268 		fCount(count)
269 	{
270 		Initialized();
271 	}
272 
273 	virtual void AddDump(TraceOutput& out)
274 	{
275 		out.Print("page reserve:   %" B_PRIu32, fCount);
276 	}
277 
278 private:
279 	uint32		fCount;
280 };
281 
282 
283 class UnreservePages : public AbstractTraceEntry {
284 public:
285 	UnreservePages(uint32 count)
286 		:
287 		fCount(count)
288 	{
289 		Initialized();
290 	}
291 
292 	virtual void AddDump(TraceOutput& out)
293 	{
294 		out.Print("page unreserve: %" B_PRId32, fCount);
295 	}
296 
297 private:
298 	uint32		fCount;
299 };
300 
301 
302 class AllocatePage
303 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
304 public:
305 	AllocatePage(page_num_t pageNumber)
306 		:
307 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
308 		fPageNumber(pageNumber)
309 	{
310 		Initialized();
311 	}
312 
313 	virtual void AddDump(TraceOutput& out)
314 	{
315 		out.Print("page alloc: %#" B_PRIxPHYSADDR, fPageNumber);
316 	}
317 
318 private:
319 	page_num_t	fPageNumber;
320 };
321 
322 
323 class AllocatePageRun
324 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
325 public:
326 	AllocatePageRun(page_num_t startPage, uint32 length)
327 		:
328 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
329 		fStartPage(startPage),
330 		fLength(length)
331 	{
332 		Initialized();
333 	}
334 
335 	virtual void AddDump(TraceOutput& out)
336 	{
337 		out.Print("page alloc run: start %#" B_PRIxPHYSADDR " length: %"
338 			B_PRIu32, fStartPage, fLength);
339 	}
340 
341 private:
342 	page_num_t	fStartPage;
343 	uint32		fLength;
344 };
345 
346 
347 class FreePage
348 	: public TRACE_ENTRY_SELECTOR(PAGE_ALLOCATION_TRACING_STACK_TRACE) {
349 public:
350 	FreePage(page_num_t pageNumber)
351 		:
352 		TraceEntryBase(PAGE_ALLOCATION_TRACING_STACK_TRACE, 0, true),
353 		fPageNumber(pageNumber)
354 	{
355 		Initialized();
356 	}
357 
358 	virtual void AddDump(TraceOutput& out)
359 	{
360 		out.Print("page free: %#" B_PRIxPHYSADDR, fPageNumber);
361 	}
362 
363 private:
364 	page_num_t	fPageNumber;
365 };
366 
367 
368 class ScrubbingPages : public AbstractTraceEntry {
369 public:
370 	ScrubbingPages(uint32 count)
371 		:
372 		fCount(count)
373 	{
374 		Initialized();
375 	}
376 
377 	virtual void AddDump(TraceOutput& out)
378 	{
379 		out.Print("page scrubbing: %" B_PRId32, fCount);
380 	}
381 
382 private:
383 	uint32		fCount;
384 };
385 
386 
387 class ScrubbedPages : public AbstractTraceEntry {
388 public:
389 	ScrubbedPages(uint32 count)
390 		:
391 		fCount(count)
392 	{
393 		Initialized();
394 	}
395 
396 	virtual void AddDump(TraceOutput& out)
397 	{
398 		out.Print("page scrubbed:  %" B_PRId32, fCount);
399 	}
400 
401 private:
402 	uint32		fCount;
403 };
404 
405 
406 class StolenPage : public AbstractTraceEntry {
407 public:
408 	StolenPage()
409 	{
410 		Initialized();
411 	}
412 
413 	virtual void AddDump(TraceOutput& out)
414 	{
415 		out.Print("page stolen");
416 	}
417 };
418 
419 }	// namespace PageAllocationTracing
420 
421 #	define TA(x)	new(std::nothrow) PageAllocationTracing::x
422 
423 #else
424 #	define TA(x)
425 #endif	// PAGE_ALLOCATION_TRACING
426 
427 
428 #if PAGE_DAEMON_TRACING
429 
430 namespace PageDaemonTracing {
431 
432 class ActivatePage : public AbstractTraceEntry {
433 	public:
434 		ActivatePage(vm_page* page)
435 			:
436 			fCache(page->cache),
437 			fPage(page)
438 		{
439 			Initialized();
440 		}
441 
442 		virtual void AddDump(TraceOutput& out)
443 		{
444 			out.Print("page activated:   %p, cache: %p", fPage, fCache);
445 		}
446 
447 	private:
448 		VMCache*	fCache;
449 		vm_page*	fPage;
450 };
451 
452 
453 class DeactivatePage : public AbstractTraceEntry {
454 	public:
455 		DeactivatePage(vm_page* page)
456 			:
457 			fCache(page->cache),
458 			fPage(page)
459 		{
460 			Initialized();
461 		}
462 
463 		virtual void AddDump(TraceOutput& out)
464 		{
465 			out.Print("page deactivated: %p, cache: %p", fPage, fCache);
466 		}
467 
468 	private:
469 		VMCache*	fCache;
470 		vm_page*	fPage;
471 };
472 
473 
474 class FreedPageSwap : public AbstractTraceEntry {
475 	public:
476 		FreedPageSwap(vm_page* page)
477 			:
478 			fCache(page->cache),
479 			fPage(page)
480 		{
481 			Initialized();
482 		}
483 
484 		virtual void AddDump(TraceOutput& out)
485 		{
486 			out.Print("page swap freed:  %p, cache: %p", fPage, fCache);
487 		}
488 
489 	private:
490 		VMCache*	fCache;
491 		vm_page*	fPage;
492 };
493 
494 }	// namespace PageDaemonTracing
495 
496 #	define TD(x)	new(std::nothrow) PageDaemonTracing::x
497 
498 #else
499 #	define TD(x)
500 #endif	// PAGE_DAEMON_TRACING
501 
502 
503 #if PAGE_WRITER_TRACING
504 
505 namespace PageWriterTracing {
506 
507 class WritePage : public AbstractTraceEntry {
508 	public:
509 		WritePage(vm_page* page)
510 			:
511 			fCache(page->Cache()),
512 			fPage(page)
513 		{
514 			Initialized();
515 		}
516 
517 		virtual void AddDump(TraceOutput& out)
518 		{
519 			out.Print("page write: %p, cache: %p", fPage, fCache);
520 		}
521 
522 	private:
523 		VMCache*	fCache;
524 		vm_page*	fPage;
525 };
526 
527 }	// namespace PageWriterTracing
528 
529 #	define TPW(x)	new(std::nothrow) PageWriterTracing::x
530 
531 #else
532 #	define TPW(x)
533 #endif	// PAGE_WRITER_TRACING
534 
535 
536 #if PAGE_STATE_TRACING
537 
538 namespace PageStateTracing {
539 
540 class SetPageState : public AbstractTraceEntry {
541 	public:
542 		SetPageState(vm_page* page, uint8 newState)
543 			:
544 			fPage(page),
545 			fOldState(page->State()),
546 			fNewState(newState),
547 			fBusy(page->busy),
548 			fWired(page->WiredCount() > 0),
549 			fMapped(!page->mappings.IsEmpty()),
550 			fAccessed(page->accessed),
551 			fModified(page->modified)
552 		{
553 #if PAGE_STATE_TRACING_STACK_TRACE
554 			fStackTrace = capture_tracing_stack_trace(
555 				PAGE_STATE_TRACING_STACK_TRACE, 0, true);
556 				// Don't capture userland stack trace to avoid potential
557 				// deadlocks.
558 #endif
559 			Initialized();
560 		}
561 
562 #if PAGE_STATE_TRACING_STACK_TRACE
563 		virtual void DumpStackTrace(TraceOutput& out)
564 		{
565 			out.PrintStackTrace(fStackTrace);
566 		}
567 #endif
568 
569 		virtual void AddDump(TraceOutput& out)
570 		{
571 			out.Print("page set state: %p (%c%c%c%c%c): %s -> %s", fPage,
572 				fBusy ? 'b' : '-',
573 				fWired ? 'w' : '-',
574 				fMapped ? 'm' : '-',
575 				fAccessed ? 'a' : '-',
576 				fModified ? 'm' : '-',
577 				page_state_to_string(fOldState),
578 				page_state_to_string(fNewState));
579 		}
580 
581 	private:
582 		vm_page*	fPage;
583 #if PAGE_STATE_TRACING_STACK_TRACE
584 		tracing_stack_trace* fStackTrace;
585 #endif
586 		uint8		fOldState;
587 		uint8		fNewState;
588 		bool		fBusy : 1;
589 		bool		fWired : 1;
590 		bool		fMapped : 1;
591 		bool		fAccessed : 1;
592 		bool		fModified : 1;
593 };
594 
595 }	// namespace PageStateTracing
596 
597 #	define TPS(x)	new(std::nothrow) PageStateTracing::x
598 
599 #else
600 #	define TPS(x)
601 #endif	// PAGE_STATE_TRACING
602 
603 
604 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
605 
606 namespace BKernel {
607 
608 class AllocationTrackingCallback {
609 public:
610 	virtual						~AllocationTrackingCallback();
611 
612 	virtual	bool				ProcessTrackingInfo(
613 									AllocationTrackingInfo* info,
614 									page_num_t pageNumber) = 0;
615 };
616 
617 }
618 
619 using BKernel::AllocationTrackingCallback;
620 
621 
622 class AllocationCollectorCallback : public AllocationTrackingCallback {
623 public:
624 	AllocationCollectorCallback(bool resetInfos)
625 		:
626 		fResetInfos(resetInfos)
627 	{
628 	}
629 
630 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
631 		page_num_t pageNumber)
632 	{
633 		if (!info->IsInitialized())
634 			return true;
635 
636 		addr_t caller = 0;
637 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
638 
639 		if (traceEntry != NULL && info->IsTraceEntryValid()) {
640 			caller = tracing_find_caller_in_stack_trace(
641 				traceEntry->StackTrace(), kVMPageCodeAddressRange, 1);
642 		}
643 
644 		caller_info* callerInfo = get_caller_info(caller);
645 		if (callerInfo == NULL) {
646 			kprintf("out of space for caller infos\n");
647 			return false;
648 		}
649 
650 		callerInfo->count++;
651 
652 		if (fResetInfos)
653 			info->Clear();
654 
655 		return true;
656 	}
657 
658 private:
659 	bool	fResetInfos;
660 };
661 
662 
663 class AllocationInfoPrinterCallback : public AllocationTrackingCallback {
664 public:
665 	AllocationInfoPrinterCallback(bool printStackTrace, page_num_t pageFilter,
666 		team_id teamFilter, thread_id threadFilter)
667 		:
668 		fPrintStackTrace(printStackTrace),
669 		fPageFilter(pageFilter),
670 		fTeamFilter(teamFilter),
671 		fThreadFilter(threadFilter)
672 	{
673 	}
674 
675 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
676 		page_num_t pageNumber)
677 	{
678 		if (!info->IsInitialized())
679 			return true;
680 
681 		if (fPageFilter != 0 && pageNumber != fPageFilter)
682 			return true;
683 
684 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
685 		if (traceEntry != NULL && !info->IsTraceEntryValid())
686 			traceEntry = NULL;
687 
688 		if (traceEntry != NULL) {
689 			if (fTeamFilter != -1 && traceEntry->TeamID() != fTeamFilter)
690 				return true;
691 			if (fThreadFilter != -1 && traceEntry->ThreadID() != fThreadFilter)
692 				return true;
693 		} else {
694 			// we need the info if we have filters set
695 			if (fTeamFilter != -1 || fThreadFilter != -1)
696 				return true;
697 		}
698 
699 		kprintf("page number %#" B_PRIxPHYSADDR, pageNumber);
700 
701 		if (traceEntry != NULL) {
702 			kprintf(", team: %" B_PRId32 ", thread %" B_PRId32
703 				", time %" B_PRId64 "\n", traceEntry->TeamID(),
704 				traceEntry->ThreadID(), traceEntry->Time());
705 
706 			if (fPrintStackTrace)
707 				tracing_print_stack_trace(traceEntry->StackTrace());
708 		} else
709 			kprintf("\n");
710 
711 		return true;
712 	}
713 
714 private:
715 	bool		fPrintStackTrace;
716 	page_num_t	fPageFilter;
717 	team_id		fTeamFilter;
718 	thread_id	fThreadFilter;
719 };
720 
721 
722 class AllocationDetailPrinterCallback : public AllocationTrackingCallback {
723 public:
724 	AllocationDetailPrinterCallback(addr_t caller)
725 		:
726 		fCaller(caller)
727 	{
728 	}
729 
730 	virtual bool ProcessTrackingInfo(AllocationTrackingInfo* info,
731 		page_num_t pageNumber)
732 	{
733 		if (!info->IsInitialized())
734 			return true;
735 
736 		addr_t caller = 0;
737 		AbstractTraceEntryWithStackTrace* traceEntry = info->TraceEntry();
738 		if (traceEntry != NULL && !info->IsTraceEntryValid())
739 			traceEntry = NULL;
740 
741 		if (traceEntry != NULL) {
742 			caller = tracing_find_caller_in_stack_trace(
743 				traceEntry->StackTrace(), kVMPageCodeAddressRange, 1);
744 		}
745 
746 		if (caller != fCaller)
747 			return true;
748 
749 		kprintf("page %#" B_PRIxPHYSADDR "\n", pageNumber);
750 		if (traceEntry != NULL)
751 			tracing_print_stack_trace(traceEntry->StackTrace());
752 
753 		return true;
754 	}
755 
756 private:
757 	addr_t	fCaller;
758 };
759 
760 #endif	// VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
761 
762 
763 static void
764 list_page(vm_page* page)
765 {
766 	kprintf("0x%08" B_PRIxADDR " ",
767 		(addr_t)(page->physical_page_number * B_PAGE_SIZE));
768 	switch (page->State()) {
769 		case PAGE_STATE_ACTIVE:   kprintf("A"); break;
770 		case PAGE_STATE_INACTIVE: kprintf("I"); break;
771 		case PAGE_STATE_MODIFIED: kprintf("M"); break;
772 		case PAGE_STATE_CACHED:   kprintf("C"); break;
773 		case PAGE_STATE_FREE:     kprintf("F"); break;
774 		case PAGE_STATE_CLEAR:    kprintf("L"); break;
775 		case PAGE_STATE_WIRED:    kprintf("W"); break;
776 		case PAGE_STATE_UNUSED:   kprintf("-"); break;
777 	}
778 	kprintf(" ");
779 	if (page->busy)         kprintf("B"); else kprintf("-");
780 	if (page->busy_writing) kprintf("W"); else kprintf("-");
781 	if (page->accessed)     kprintf("A"); else kprintf("-");
782 	if (page->modified)     kprintf("M"); else kprintf("-");
783 	kprintf("-");
784 
785 	kprintf(" usage:%3u", page->usage_count);
786 	kprintf(" wired:%5u", page->WiredCount());
787 
788 	bool first = true;
789 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
790 	vm_page_mapping* mapping;
791 	while ((mapping = iterator.Next()) != NULL) {
792 		if (first) {
793 			kprintf(": ");
794 			first = false;
795 		} else
796 			kprintf(", ");
797 
798 		kprintf("%" B_PRId32 " (%s)", mapping->area->id, mapping->area->name);
799 		mapping = mapping->page_link.next;
800 	}
801 }
802 
803 
804 static int
805 dump_page_list(int argc, char **argv)
806 {
807 	kprintf("page table:\n");
808 	for (page_num_t i = 0; i < sNumPages; i++) {
809 		if (sPages[i].State() != PAGE_STATE_UNUSED) {
810 			list_page(&sPages[i]);
811 			kprintf("\n");
812 		}
813 	}
814 	kprintf("end of page table\n");
815 
816 	return 0;
817 }
818 
819 
820 static int
821 find_page(int argc, char **argv)
822 {
823 	struct vm_page *page;
824 	addr_t address;
825 	int32 index = 1;
826 	int i;
827 
828 	struct {
829 		const char*	name;
830 		VMPageQueue*	queue;
831 	} pageQueueInfos[] = {
832 		{ "free",		&sFreePageQueue },
833 		{ "clear",		&sClearPageQueue },
834 		{ "modified",	&sModifiedPageQueue },
835 		{ "active",		&sActivePageQueue },
836 		{ "inactive",	&sInactivePageQueue },
837 		{ "cached",		&sCachedPageQueue },
838 		{ NULL, NULL }
839 	};
840 
841 	if (argc < 2
842 		|| strlen(argv[index]) <= 2
843 		|| argv[index][0] != '0'
844 		|| argv[index][1] != 'x') {
845 		kprintf("usage: find_page <address>\n");
846 		return 0;
847 	}
848 
849 	address = strtoul(argv[index], NULL, 0);
850 	page = (vm_page*)address;
851 
852 	for (i = 0; pageQueueInfos[i].name; i++) {
853 		VMPageQueue::Iterator it = pageQueueInfos[i].queue->GetIterator();
854 		while (vm_page* p = it.Next()) {
855 			if (p == page) {
856 				kprintf("found page %p in queue %p (%s)\n", page,
857 					pageQueueInfos[i].queue, pageQueueInfos[i].name);
858 				return 0;
859 			}
860 		}
861 	}
862 
863 	kprintf("page %p isn't in any queue\n", page);
864 
865 	return 0;
866 }
867 
868 
869 const char *
870 page_state_to_string(int state)
871 {
872 	switch(state) {
873 		case PAGE_STATE_ACTIVE:
874 			return "active";
875 		case PAGE_STATE_INACTIVE:
876 			return "inactive";
877 		case PAGE_STATE_MODIFIED:
878 			return "modified";
879 		case PAGE_STATE_CACHED:
880 			return "cached";
881 		case PAGE_STATE_FREE:
882 			return "free";
883 		case PAGE_STATE_CLEAR:
884 			return "clear";
885 		case PAGE_STATE_WIRED:
886 			return "wired";
887 		case PAGE_STATE_UNUSED:
888 			return "unused";
889 		default:
890 			return "unknown";
891 	}
892 }
893 
894 
895 static int
896 dump_page_long(int argc, char **argv)
897 {
898 	bool addressIsPointer = true;
899 	bool physical = false;
900 	bool searchMappings = false;
901 	int32 index = 1;
902 
903 	while (index < argc) {
904 		if (argv[index][0] != '-')
905 			break;
906 
907 		if (!strcmp(argv[index], "-p")) {
908 			addressIsPointer = false;
909 			physical = true;
910 		} else if (!strcmp(argv[index], "-v")) {
911 			addressIsPointer = false;
912 		} else if (!strcmp(argv[index], "-m")) {
913 			searchMappings = true;
914 		} else {
915 			print_debugger_command_usage(argv[0]);
916 			return 0;
917 		}
918 
919 		index++;
920 	}
921 
922 	if (index + 1 != argc) {
923 		print_debugger_command_usage(argv[0]);
924 		return 0;
925 	}
926 
927 	uint64 value;
928 	if (!evaluate_debug_expression(argv[index], &value, false))
929 		return 0;
930 
931 	uint64 pageAddress = value;
932 	struct vm_page* page;
933 
934 	if (addressIsPointer) {
935 		page = (struct vm_page *)(addr_t)pageAddress;
936 	} else {
937 		if (!physical) {
938 			VMAddressSpace *addressSpace = VMAddressSpace::Kernel();
939 
940 			if (debug_get_debugged_thread()->team->address_space != NULL)
941 				addressSpace = debug_get_debugged_thread()->team->address_space;
942 
943 			uint32 flags = 0;
944 			phys_addr_t physicalAddress;
945 			if (addressSpace->TranslationMap()->QueryInterrupt(pageAddress,
946 					&physicalAddress, &flags) != B_OK
947 				|| (flags & PAGE_PRESENT) == 0) {
948 				kprintf("Virtual address not mapped to a physical page in this "
949 					"address space.\n");
950 				return 0;
951 			}
952 			pageAddress = physicalAddress;
953 		}
954 
955 		page = vm_lookup_page(pageAddress / B_PAGE_SIZE);
956 	}
957 
958 	if (page == NULL) {
959 		kprintf("Page not found.\n");
960 		return 0;
961 	}
962 
963 	kprintf("PAGE: %p\n", page);
964 
965 	const off_t pageOffset = (addr_t)page - (addr_t)sPages;
966 	const off_t pageIndex = pageOffset / (off_t)sizeof(vm_page);
967 	if (pageIndex < 0) {
968 		kprintf("\taddress is before start of page array!"
969 			" (offset %" B_PRIdOFF ")\n", pageOffset);
970 	} else if ((page_num_t)pageIndex >= sNumPages) {
971 		kprintf("\taddress is after end of page array!"
972 			" (offset %" B_PRIdOFF ")\n", pageOffset);
973 	} else if ((pageIndex * (off_t)sizeof(vm_page)) != pageOffset) {
974 		kprintf("\taddress isn't a multiple of page structure size!"
975 			" (offset %" B_PRIdOFF ", expected align %" B_PRIuSIZE ")\n",
976 			pageOffset, sizeof(vm_page));
977 	}
978 
979 	kprintf("queue_next,prev: %p, %p\n", page->queue_link.next,
980 		page->queue_link.previous);
981 	kprintf("physical_number: %#" B_PRIxPHYSADDR "\n", page->physical_page_number);
982 	kprintf("cache:           %p\n", page->Cache());
983 	kprintf("cache_offset:    %" B_PRIuPHYSADDR "\n", page->cache_offset);
984 	kprintf("cache_next:      %p\n", page->cache_next);
985 	kprintf("state:           %s\n", page_state_to_string(page->State()));
986 	kprintf("wired_count:     %d\n", page->WiredCount());
987 	kprintf("usage_count:     %d\n", page->usage_count);
988 	kprintf("busy:            %d\n", page->busy);
989 	kprintf("busy_writing:    %d\n", page->busy_writing);
990 	kprintf("accessed:        %d\n", page->accessed);
991 	kprintf("modified:        %d\n", page->modified);
992 #if DEBUG_PAGE_QUEUE
993 	kprintf("queue:           %p\n", page->queue);
994 #endif
995 #if DEBUG_PAGE_ACCESS
996 	kprintf("accessor:        %" B_PRId32 "\n", page->accessing_thread);
997 #endif
998 
999 	if (pageIndex < 0 || (page_num_t)pageIndex >= sNumPages) {
1000 		// Don't try to read the mappings.
1001 		return 0;
1002 	}
1003 
1004 	kprintf("area mappings:\n");
1005 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
1006 	vm_page_mapping *mapping;
1007 	while ((mapping = iterator.Next()) != NULL) {
1008 		kprintf("  %p (%" B_PRId32 ")\n", mapping->area, mapping->area->id);
1009 		mapping = mapping->page_link.next;
1010 	}
1011 
1012 	if (searchMappings) {
1013 		kprintf("all mappings:\n");
1014 		VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
1015 		while (addressSpace != NULL) {
1016 			size_t pageCount = addressSpace->Size() / B_PAGE_SIZE;
1017 			for (addr_t address = addressSpace->Base(); pageCount != 0;
1018 					address += B_PAGE_SIZE, pageCount--) {
1019 				phys_addr_t physicalAddress;
1020 				uint32 flags = 0;
1021 				if (addressSpace->TranslationMap()->QueryInterrupt(address,
1022 						&physicalAddress, &flags) == B_OK
1023 					&& (flags & PAGE_PRESENT) != 0
1024 					&& physicalAddress / B_PAGE_SIZE
1025 						== page->physical_page_number) {
1026 					VMArea* area = addressSpace->LookupArea(address);
1027 					kprintf("  aspace %" B_PRId32 ", area %" B_PRId32 ": %#"
1028 						B_PRIxADDR " (%c%c%s%s)\n", addressSpace->ID(),
1029 						area != NULL ? area->id : -1, address,
1030 						(flags & B_KERNEL_READ_AREA) != 0 ? 'r' : '-',
1031 						(flags & B_KERNEL_WRITE_AREA) != 0 ? 'w' : '-',
1032 						(flags & PAGE_MODIFIED) != 0 ? " modified" : "",
1033 						(flags & PAGE_ACCESSED) != 0 ? " accessed" : "");
1034 				}
1035 			}
1036 			addressSpace = VMAddressSpace::DebugNext(addressSpace);
1037 		}
1038 	}
1039 
1040 	set_debug_variable("_cache", (addr_t)page->Cache());
1041 #if DEBUG_PAGE_ACCESS
1042 	set_debug_variable("_accessor", page->accessing_thread);
1043 #endif
1044 
1045 	return 0;
1046 }
1047 
1048 
1049 static int
1050 dump_page_queue(int argc, char **argv)
1051 {
1052 	struct VMPageQueue *queue;
1053 
1054 	if (argc < 2) {
1055 		kprintf("usage: page_queue <address/name> [list]\n");
1056 		return 0;
1057 	}
1058 
1059 	if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x')
1060 		queue = (VMPageQueue*)strtoul(argv[1], NULL, 16);
1061 	else if (!strcmp(argv[1], "free"))
1062 		queue = &sFreePageQueue;
1063 	else if (!strcmp(argv[1], "clear"))
1064 		queue = &sClearPageQueue;
1065 	else if (!strcmp(argv[1], "modified"))
1066 		queue = &sModifiedPageQueue;
1067 	else if (!strcmp(argv[1], "active"))
1068 		queue = &sActivePageQueue;
1069 	else if (!strcmp(argv[1], "inactive"))
1070 		queue = &sInactivePageQueue;
1071 	else if (!strcmp(argv[1], "cached"))
1072 		queue = &sCachedPageQueue;
1073 	else {
1074 		kprintf("page_queue: unknown queue \"%s\".\n", argv[1]);
1075 		return 0;
1076 	}
1077 
1078 	kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %"
1079 		B_PRIuPHYSADDR "\n", queue, queue->Head(), queue->Tail(),
1080 		queue->Count());
1081 
1082 	if (argc == 3) {
1083 		struct vm_page *page = queue->Head();
1084 
1085 		kprintf("page        cache       type       state  wired  usage\n");
1086 		for (page_num_t i = 0; page; i++, page = queue->Next(page)) {
1087 			kprintf("%p  %p  %-7s %8s  %5d  %5d\n", page, page->Cache(),
1088 				vm_cache_type_to_string(page->Cache()->type),
1089 				page_state_to_string(page->State()),
1090 				page->WiredCount(), page->usage_count);
1091 		}
1092 	}
1093 	return 0;
1094 }
1095 
1096 
1097 static int
1098 dump_page_stats(int argc, char **argv)
1099 {
1100 	page_num_t swappableModified = 0;
1101 	page_num_t swappableModifiedInactive = 0;
1102 
1103 	size_t counter[8];
1104 	size_t busyCounter[8];
1105 	memset(counter, 0, sizeof(counter));
1106 	memset(busyCounter, 0, sizeof(busyCounter));
1107 
1108 	struct page_run {
1109 		page_num_t	start;
1110 		page_num_t	end;
1111 
1112 		page_num_t Length() const	{ return end - start; }
1113 	};
1114 
1115 	page_run currentFreeRun = { 0, 0 };
1116 	page_run currentCachedRun = { 0, 0 };
1117 	page_run longestFreeRun = { 0, 0 };
1118 	page_run longestCachedRun = { 0, 0 };
1119 
1120 	for (page_num_t i = 0; i < sNumPages; i++) {
1121 		if (sPages[i].State() > 7) {
1122 			panic("page %" B_PRIuPHYSADDR " at %p has invalid state!\n", i,
1123 				&sPages[i]);
1124 		}
1125 
1126 		uint32 pageState = sPages[i].State();
1127 
1128 		counter[pageState]++;
1129 		if (sPages[i].busy)
1130 			busyCounter[pageState]++;
1131 
1132 		if (pageState == PAGE_STATE_MODIFIED
1133 			&& sPages[i].Cache() != NULL
1134 			&& sPages[i].Cache()->temporary && sPages[i].WiredCount() == 0) {
1135 			swappableModified++;
1136 			if (sPages[i].usage_count == 0)
1137 				swappableModifiedInactive++;
1138 		}
1139 
1140 		// track free and cached pages runs
1141 		if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) {
1142 			currentFreeRun.end = i + 1;
1143 			currentCachedRun.end = i + 1;
1144 		} else {
1145 			if (currentFreeRun.Length() > longestFreeRun.Length())
1146 				longestFreeRun = currentFreeRun;
1147 			currentFreeRun.start = currentFreeRun.end = i + 1;
1148 
1149 			if (pageState == PAGE_STATE_CACHED) {
1150 				currentCachedRun.end = i + 1;
1151 			} else {
1152 				if (currentCachedRun.Length() > longestCachedRun.Length())
1153 					longestCachedRun = currentCachedRun;
1154 				currentCachedRun.start = currentCachedRun.end = i + 1;
1155 			}
1156 		}
1157 	}
1158 
1159 	kprintf("page stats:\n");
1160 	kprintf("total: %" B_PRIuPHYSADDR "\n", sNumPages);
1161 
1162 	kprintf("active: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1163 		counter[PAGE_STATE_ACTIVE], busyCounter[PAGE_STATE_ACTIVE]);
1164 	kprintf("inactive: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1165 		counter[PAGE_STATE_INACTIVE], busyCounter[PAGE_STATE_INACTIVE]);
1166 	kprintf("cached: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1167 		counter[PAGE_STATE_CACHED], busyCounter[PAGE_STATE_CACHED]);
1168 	kprintf("unused: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1169 		counter[PAGE_STATE_UNUSED], busyCounter[PAGE_STATE_UNUSED]);
1170 	kprintf("wired: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1171 		counter[PAGE_STATE_WIRED], busyCounter[PAGE_STATE_WIRED]);
1172 	kprintf("modified: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
1173 		counter[PAGE_STATE_MODIFIED], busyCounter[PAGE_STATE_MODIFIED]);
1174 	kprintf("free: %" B_PRIuSIZE "\n", counter[PAGE_STATE_FREE]);
1175 	kprintf("clear: %" B_PRIuSIZE "\n", counter[PAGE_STATE_CLEAR]);
1176 
1177 	kprintf("unreserved free pages: %" B_PRId32 "\n", sUnreservedFreePages);
1178 	kprintf("unsatisfied page reservations: %" B_PRId32 "\n",
1179 		sUnsatisfiedPageReservations);
1180 	kprintf("mapped pages: %" B_PRId32 "\n", gMappedPagesCount);
1181 	kprintf("longest free pages run: %" B_PRIuPHYSADDR " pages (at %"
1182 		B_PRIuPHYSADDR ")\n", longestFreeRun.Length(),
1183 		sPages[longestFreeRun.start].physical_page_number);
1184 	kprintf("longest free/cached pages run: %" B_PRIuPHYSADDR " pages (at %"
1185 		B_PRIuPHYSADDR ")\n", longestCachedRun.Length(),
1186 		sPages[longestCachedRun.start].physical_page_number);
1187 
1188 	kprintf("waiting threads:\n");
1189 	for (PageReservationWaiterList::Iterator it
1190 			= sPageReservationWaiters.GetIterator();
1191 		PageReservationWaiter* waiter = it.Next();) {
1192 		kprintf("  %6" B_PRId32 ": missing: %6" B_PRIu32
1193 			", don't touch: %6" B_PRIu32 "\n", waiter->thread->id,
1194 			waiter->missing, waiter->dontTouch);
1195 	}
1196 
1197 	kprintf("\nfree queue: %p, count = %" B_PRIuPHYSADDR "\n", &sFreePageQueue,
1198 		sFreePageQueue.Count());
1199 	kprintf("clear queue: %p, count = %" B_PRIuPHYSADDR "\n", &sClearPageQueue,
1200 		sClearPageQueue.Count());
1201 	kprintf("modified queue: %p, count = %" B_PRIuPHYSADDR " (%" B_PRId32
1202 		" temporary, %" B_PRIuPHYSADDR " swappable, " "inactive: %"
1203 		B_PRIuPHYSADDR ")\n", &sModifiedPageQueue, sModifiedPageQueue.Count(),
1204 		sModifiedTemporaryPages, swappableModified, swappableModifiedInactive);
1205 	kprintf("active queue: %p, count = %" B_PRIuPHYSADDR "\n",
1206 		&sActivePageQueue, sActivePageQueue.Count());
1207 	kprintf("inactive queue: %p, count = %" B_PRIuPHYSADDR "\n",
1208 		&sInactivePageQueue, sInactivePageQueue.Count());
1209 	kprintf("cached queue: %p, count = %" B_PRIuPHYSADDR "\n",
1210 		&sCachedPageQueue, sCachedPageQueue.Count());
1211 	return 0;
1212 }
1213 
1214 
1215 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1216 
1217 static caller_info*
1218 get_caller_info(addr_t caller)
1219 {
1220 	// find the caller info
1221 	for (int32 i = 0; i < sCallerInfoCount; i++) {
1222 		if (caller == sCallerInfoTable[i].caller)
1223 			return &sCallerInfoTable[i];
1224 	}
1225 
1226 	// not found, add a new entry, if there are free slots
1227 	if (sCallerInfoCount >= kCallerInfoTableSize)
1228 		return NULL;
1229 
1230 	caller_info* info = &sCallerInfoTable[sCallerInfoCount++];
1231 	info->caller = caller;
1232 	info->count = 0;
1233 
1234 	return info;
1235 }
1236 
1237 
1238 static int
1239 caller_info_compare_count(const void* _a, const void* _b)
1240 {
1241 	const caller_info* a = (const caller_info*)_a;
1242 	const caller_info* b = (const caller_info*)_b;
1243 	return (int)(b->count - a->count);
1244 }
1245 
1246 
1247 static int
1248 dump_page_allocations_per_caller(int argc, char** argv)
1249 {
1250 	bool resetAllocationInfos = false;
1251 	bool printDetails = false;
1252 	addr_t caller = 0;
1253 
1254 	for (int32 i = 1; i < argc; i++) {
1255 		if (strcmp(argv[i], "-d") == 0) {
1256 			uint64 callerAddress;
1257 			if (++i >= argc
1258 				|| !evaluate_debug_expression(argv[i], &callerAddress, true)) {
1259 				print_debugger_command_usage(argv[0]);
1260 				return 0;
1261 			}
1262 
1263 			caller = callerAddress;
1264 			printDetails = true;
1265 		} else if (strcmp(argv[i], "-r") == 0) {
1266 			resetAllocationInfos = true;
1267 		} else {
1268 			print_debugger_command_usage(argv[0]);
1269 			return 0;
1270 		}
1271 	}
1272 
1273 	sCallerInfoCount = 0;
1274 
1275 	AllocationCollectorCallback collectorCallback(resetAllocationInfos);
1276 	AllocationDetailPrinterCallback detailsCallback(caller);
1277 	AllocationTrackingCallback& callback = printDetails
1278 		? (AllocationTrackingCallback&)detailsCallback
1279 		: (AllocationTrackingCallback&)collectorCallback;
1280 
1281 	for (page_num_t i = 0; i < sNumPages; i++)
1282 		callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i);
1283 
1284 	if (printDetails)
1285 		return 0;
1286 
1287 	// sort the array
1288 	qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info),
1289 		&caller_info_compare_count);
1290 
1291 	kprintf("%" B_PRId32 " different callers\n\n", sCallerInfoCount);
1292 
1293 	size_t totalAllocationCount = 0;
1294 
1295 	kprintf("     count      caller\n");
1296 	kprintf("----------------------------------\n");
1297 	for (int32 i = 0; i < sCallerInfoCount; i++) {
1298 		caller_info& info = sCallerInfoTable[i];
1299 		kprintf("%10" B_PRIuSIZE "  %p", info.count, (void*)info.caller);
1300 
1301 		const char* symbol;
1302 		const char* imageName;
1303 		bool exactMatch;
1304 		addr_t baseAddress;
1305 
1306 		if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol,
1307 				&imageName, &exactMatch) == B_OK) {
1308 			kprintf("  %s + %#" B_PRIxADDR " (%s)%s\n", symbol,
1309 				info.caller - baseAddress, imageName,
1310 				exactMatch ? "" : " (nearest)");
1311 		} else
1312 			kprintf("\n");
1313 
1314 		totalAllocationCount += info.count;
1315 	}
1316 
1317 	kprintf("\ntotal page allocations: %" B_PRIuSIZE "\n",
1318 		totalAllocationCount);
1319 
1320 	return 0;
1321 }
1322 
1323 
1324 static int
1325 dump_page_allocation_infos(int argc, char** argv)
1326 {
1327 	page_num_t pageFilter = 0;
1328 	team_id teamFilter = -1;
1329 	thread_id threadFilter = -1;
1330 	bool printStackTraces = false;
1331 
1332 	for (int32 i = 1; i < argc; i++) {
1333 		if (strcmp(argv[i], "--stacktrace") == 0)
1334 			printStackTraces = true;
1335 		else if (strcmp(argv[i], "-p") == 0) {
1336 			uint64 pageNumber;
1337 			if (++i >= argc
1338 				|| !evaluate_debug_expression(argv[i], &pageNumber, true)) {
1339 				print_debugger_command_usage(argv[0]);
1340 				return 0;
1341 			}
1342 
1343 			pageFilter = pageNumber;
1344 		} else if (strcmp(argv[i], "--team") == 0) {
1345 			uint64 team;
1346 			if (++i >= argc
1347 				|| !evaluate_debug_expression(argv[i], &team, true)) {
1348 				print_debugger_command_usage(argv[0]);
1349 				return 0;
1350 			}
1351 
1352 			teamFilter = team;
1353 		} else if (strcmp(argv[i], "--thread") == 0) {
1354 			uint64 thread;
1355 			if (++i >= argc
1356 				|| !evaluate_debug_expression(argv[i], &thread, true)) {
1357 				print_debugger_command_usage(argv[0]);
1358 				return 0;
1359 			}
1360 
1361 			threadFilter = thread;
1362 		} else {
1363 			print_debugger_command_usage(argv[0]);
1364 			return 0;
1365 		}
1366 	}
1367 
1368 	AllocationInfoPrinterCallback callback(printStackTraces, pageFilter,
1369 		teamFilter, threadFilter);
1370 
1371 	for (page_num_t i = 0; i < sNumPages; i++)
1372 		callback.ProcessTrackingInfo(&sPages[i].allocation_tracking_info, i);
1373 
1374 	return 0;
1375 }
1376 
1377 #endif	// VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1378 
1379 
1380 #ifdef TRACK_PAGE_USAGE_STATS
1381 
1382 static void
1383 track_page_usage(vm_page* page)
1384 {
1385 	if (page->WiredCount() == 0) {
1386 		sNextPageUsage[(int32)page->usage_count + 128]++;
1387 		sNextPageUsagePageCount++;
1388 	}
1389 }
1390 
1391 
1392 static void
1393 update_page_usage_stats()
1394 {
1395 	std::swap(sPageUsage, sNextPageUsage);
1396 	sPageUsagePageCount = sNextPageUsagePageCount;
1397 
1398 	memset(sNextPageUsage, 0, sizeof(page_num_t) * 256);
1399 	sNextPageUsagePageCount = 0;
1400 
1401 	// compute average
1402 	if (sPageUsagePageCount > 0) {
1403 		int64 sum = 0;
1404 		for (int32 i = 0; i < 256; i++)
1405 			sum += (int64)sPageUsage[i] * (i - 128);
1406 
1407 		TRACE_DAEMON("average page usage: %f (%lu pages)\n",
1408 			(float)sum / sPageUsagePageCount, sPageUsagePageCount);
1409 	}
1410 }
1411 
1412 
1413 static int
1414 dump_page_usage_stats(int argc, char** argv)
1415 {
1416 	kprintf("distribution of page usage counts (%lu pages):",
1417 		sPageUsagePageCount);
1418 
1419 	int64 sum = 0;
1420 	for (int32 i = 0; i < 256; i++) {
1421 		if (i % 8 == 0)
1422 			kprintf("\n%4ld:", i - 128);
1423 
1424 		int64 count = sPageUsage[i];
1425 		sum += count * (i - 128);
1426 
1427 		kprintf("  %9llu", count);
1428 	}
1429 
1430 	kprintf("\n\n");
1431 
1432 	kprintf("average usage count: %f\n",
1433 		sPageUsagePageCount > 0 ? (float)sum / sPageUsagePageCount : 0);
1434 
1435 	return 0;
1436 }
1437 
1438 #endif	// TRACK_PAGE_USAGE_STATS
1439 
1440 
1441 // #pragma mark - vm_page
1442 
1443 
1444 inline void
1445 vm_page::InitState(uint8 newState)
1446 {
1447 	state = newState;
1448 }
1449 
1450 
1451 inline void
1452 vm_page::SetState(uint8 newState)
1453 {
1454 	TPS(SetPageState(this, newState));
1455 
1456 	state = newState;
1457 }
1458 
1459 
1460 // #pragma mark -
1461 
1462 
1463 static void
1464 get_page_stats(page_stats& _pageStats)
1465 {
1466 	_pageStats.totalFreePages = sUnreservedFreePages;
1467 	_pageStats.cachedPages = sCachedPageQueue.Count();
1468 	_pageStats.unsatisfiedReservations = sUnsatisfiedPageReservations;
1469 	// TODO: We don't get an actual snapshot here!
1470 }
1471 
1472 
1473 static bool
1474 do_active_paging(const page_stats& pageStats)
1475 {
1476 	return pageStats.totalFreePages + pageStats.cachedPages
1477 		< pageStats.unsatisfiedReservations
1478 			+ (int32)sFreeOrCachedPagesTarget;
1479 }
1480 
1481 
1482 /*!	Reserves as many pages as possible from \c sUnreservedFreePages up to
1483 	\a count. Doesn't touch the last \a dontTouch pages of
1484 	\c sUnreservedFreePages, though.
1485 	\return The number of actually reserved pages.
1486 */
1487 static uint32
1488 reserve_some_pages(uint32 count, uint32 dontTouch)
1489 {
1490 	while (true) {
1491 		int32 freePages = atomic_get(&sUnreservedFreePages);
1492 		if (freePages <= (int32)dontTouch)
1493 			return 0;
1494 
1495 		int32 toReserve = std::min(count, freePages - dontTouch);
1496 		if (atomic_test_and_set(&sUnreservedFreePages,
1497 					freePages - toReserve, freePages)
1498 				== freePages) {
1499 			return toReserve;
1500 		}
1501 
1502 		// the count changed in the meantime -- retry
1503 	}
1504 }
1505 
1506 
1507 static void
1508 wake_up_page_reservation_waiters()
1509 {
1510 	MutexLocker pageDeficitLocker(sPageDeficitLock);
1511 
1512 	// TODO: If this is a low priority thread, we might want to disable
1513 	// interrupts or otherwise ensure that we aren't unscheduled. Otherwise
1514 	// high priority threads wait be kept waiting while a medium priority thread
1515 	// prevents us from running.
1516 
1517 	while (PageReservationWaiter* waiter = sPageReservationWaiters.Head()) {
1518 		int32 reserved = reserve_some_pages(waiter->missing,
1519 			waiter->dontTouch);
1520 		if (reserved == 0)
1521 			return;
1522 
1523 		atomic_add(&sUnsatisfiedPageReservations, -reserved);
1524 		waiter->missing -= reserved;
1525 
1526 		if (waiter->missing > 0)
1527 			return;
1528 
1529 		sPageReservationWaiters.Remove(waiter);
1530 
1531 		thread_unblock(waiter->thread, B_OK);
1532 	}
1533 }
1534 
1535 
1536 static inline void
1537 unreserve_pages(uint32 count)
1538 {
1539 	atomic_add(&sUnreservedFreePages, count);
1540 	if (atomic_get(&sUnsatisfiedPageReservations) != 0)
1541 		wake_up_page_reservation_waiters();
1542 }
1543 
1544 
1545 static void
1546 free_page(vm_page* page, bool clear)
1547 {
1548 	DEBUG_PAGE_ACCESS_CHECK(page);
1549 
1550 	PAGE_ASSERT(page, !page->IsMapped());
1551 
1552 	VMPageQueue* fromQueue;
1553 
1554 	switch (page->State()) {
1555 		case PAGE_STATE_ACTIVE:
1556 			fromQueue = &sActivePageQueue;
1557 			break;
1558 		case PAGE_STATE_INACTIVE:
1559 			fromQueue = &sInactivePageQueue;
1560 			break;
1561 		case PAGE_STATE_MODIFIED:
1562 			fromQueue = &sModifiedPageQueue;
1563 			break;
1564 		case PAGE_STATE_CACHED:
1565 			fromQueue = &sCachedPageQueue;
1566 			break;
1567 		case PAGE_STATE_FREE:
1568 		case PAGE_STATE_CLEAR:
1569 			panic("free_page(): page %p already free", page);
1570 			return;
1571 		case PAGE_STATE_WIRED:
1572 		case PAGE_STATE_UNUSED:
1573 			fromQueue = NULL;
1574 			break;
1575 		default:
1576 			panic("free_page(): page %p in invalid state %d",
1577 				page, page->State());
1578 			return;
1579 	}
1580 
1581 	if (page->CacheRef() != NULL)
1582 		panic("to be freed page %p has cache", page);
1583 	if (page->IsMapped())
1584 		panic("to be freed page %p has mappings", page);
1585 
1586 	if (fromQueue != NULL)
1587 		fromQueue->RemoveUnlocked(page);
1588 
1589 	TA(FreePage(page->physical_page_number));
1590 
1591 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
1592 	page->allocation_tracking_info.Clear();
1593 #endif
1594 
1595 	ReadLocker locker(sFreePageQueuesLock);
1596 
1597 	DEBUG_PAGE_ACCESS_END(page);
1598 
1599 	if (clear) {
1600 		page->SetState(PAGE_STATE_CLEAR);
1601 		sClearPageQueue.PrependUnlocked(page);
1602 	} else {
1603 		page->SetState(PAGE_STATE_FREE);
1604 		sFreePageQueue.PrependUnlocked(page);
1605 		sFreePageCondition.NotifyAll();
1606 	}
1607 
1608 	locker.Unlock();
1609 }
1610 
1611 
1612 /*!	The caller must make sure that no-one else tries to change the page's state
1613 	while the function is called. If the page has a cache, this can be done by
1614 	locking the cache.
1615 */
1616 static void
1617 set_page_state(vm_page *page, int pageState)
1618 {
1619 	DEBUG_PAGE_ACCESS_CHECK(page);
1620 
1621 	if (pageState == page->State())
1622 		return;
1623 
1624 	VMPageQueue* fromQueue;
1625 
1626 	switch (page->State()) {
1627 		case PAGE_STATE_ACTIVE:
1628 			fromQueue = &sActivePageQueue;
1629 			break;
1630 		case PAGE_STATE_INACTIVE:
1631 			fromQueue = &sInactivePageQueue;
1632 			break;
1633 		case PAGE_STATE_MODIFIED:
1634 			fromQueue = &sModifiedPageQueue;
1635 			break;
1636 		case PAGE_STATE_CACHED:
1637 			fromQueue = &sCachedPageQueue;
1638 			break;
1639 		case PAGE_STATE_FREE:
1640 		case PAGE_STATE_CLEAR:
1641 			panic("set_page_state(): page %p is free/clear", page);
1642 			return;
1643 		case PAGE_STATE_WIRED:
1644 		case PAGE_STATE_UNUSED:
1645 			fromQueue = NULL;
1646 			break;
1647 		default:
1648 			panic("set_page_state(): page %p in invalid state %d",
1649 				page, page->State());
1650 			return;
1651 	}
1652 
1653 	VMPageQueue* toQueue;
1654 
1655 	switch (pageState) {
1656 		case PAGE_STATE_ACTIVE:
1657 			toQueue = &sActivePageQueue;
1658 			break;
1659 		case PAGE_STATE_INACTIVE:
1660 			toQueue = &sInactivePageQueue;
1661 			break;
1662 		case PAGE_STATE_MODIFIED:
1663 			toQueue = &sModifiedPageQueue;
1664 			break;
1665 		case PAGE_STATE_CACHED:
1666 			PAGE_ASSERT(page, !page->IsMapped());
1667 			PAGE_ASSERT(page, !page->modified);
1668 			toQueue = &sCachedPageQueue;
1669 			break;
1670 		case PAGE_STATE_FREE:
1671 		case PAGE_STATE_CLEAR:
1672 			panic("set_page_state(): target state is free/clear");
1673 			return;
1674 		case PAGE_STATE_WIRED:
1675 		case PAGE_STATE_UNUSED:
1676 			toQueue = NULL;
1677 			break;
1678 		default:
1679 			panic("set_page_state(): invalid target state %d", pageState);
1680 			return;
1681 	}
1682 
1683 	VMCache* cache = page->Cache();
1684 	if (cache != NULL && cache->temporary) {
1685 		if (pageState == PAGE_STATE_MODIFIED)
1686 			atomic_add(&sModifiedTemporaryPages, 1);
1687 		else if (page->State() == PAGE_STATE_MODIFIED)
1688 			atomic_add(&sModifiedTemporaryPages, -1);
1689 	}
1690 
1691 	// move the page
1692 	if (toQueue == fromQueue) {
1693 		// Note: Theoretically we are required to lock when changing the page
1694 		// state, even if we don't change the queue. We actually don't have to
1695 		// do this, though, since only for the active queue there are different
1696 		// page states and active pages have a cache that must be locked at
1697 		// this point. So we rely on the fact that everyone must lock the cache
1698 		// before trying to change/interpret the page state.
1699 		PAGE_ASSERT(page, cache != NULL);
1700 		cache->AssertLocked();
1701 		page->SetState(pageState);
1702 	} else {
1703 		if (fromQueue != NULL)
1704 			fromQueue->RemoveUnlocked(page);
1705 
1706 		page->SetState(pageState);
1707 
1708 		if (toQueue != NULL)
1709 			toQueue->AppendUnlocked(page);
1710 	}
1711 }
1712 
1713 
1714 /*! Moves a previously modified page into a now appropriate queue.
1715 	The page queues must not be locked.
1716 */
1717 static void
1718 move_page_to_appropriate_queue(vm_page *page)
1719 {
1720 	DEBUG_PAGE_ACCESS_CHECK(page);
1721 
1722 	// Note, this logic must be in sync with what the page daemon does.
1723 	int32 state;
1724 	if (page->IsMapped())
1725 		state = PAGE_STATE_ACTIVE;
1726 	else if (page->modified)
1727 		state = PAGE_STATE_MODIFIED;
1728 	else
1729 		state = PAGE_STATE_CACHED;
1730 
1731 // TODO: If free + cached pages are low, we might directly want to free the
1732 // page.
1733 	set_page_state(page, state);
1734 }
1735 
1736 
1737 static void
1738 clear_page(struct vm_page *page)
1739 {
1740 	vm_memset_physical(page->physical_page_number << PAGE_SHIFT, 0,
1741 		B_PAGE_SIZE);
1742 }
1743 
1744 
1745 static status_t
1746 mark_page_range_in_use(page_num_t startPage, page_num_t length, bool wired)
1747 {
1748 	TRACE(("mark_page_range_in_use: start %#" B_PRIxPHYSADDR ", len %#"
1749 		B_PRIxPHYSADDR "\n", startPage, length));
1750 
1751 	if (sPhysicalPageOffset > startPage) {
1752 		dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR
1753 			"): start page is before free list\n", startPage, length);
1754 		if (sPhysicalPageOffset - startPage >= length)
1755 			return B_OK;
1756 		length -= sPhysicalPageOffset - startPage;
1757 		startPage = sPhysicalPageOffset;
1758 	}
1759 
1760 	startPage -= sPhysicalPageOffset;
1761 
1762 	if (startPage + length > sNumPages) {
1763 		dprintf("mark_page_range_in_use(%#" B_PRIxPHYSADDR ", %#" B_PRIxPHYSADDR
1764 			"): range would extend past free list\n", startPage, length);
1765 		if (startPage >= sNumPages)
1766 			return B_OK;
1767 		length = sNumPages - startPage;
1768 	}
1769 
1770 	WriteLocker locker(sFreePageQueuesLock);
1771 
1772 	for (page_num_t i = 0; i < length; i++) {
1773 		vm_page *page = &sPages[startPage + i];
1774 		switch (page->State()) {
1775 			case PAGE_STATE_FREE:
1776 			case PAGE_STATE_CLEAR:
1777 			{
1778 // TODO: This violates the page reservation policy, since we remove pages from
1779 // the free/clear queues without having reserved them before. This should happen
1780 // in the early boot process only, though.
1781 				DEBUG_PAGE_ACCESS_START(page);
1782 				VMPageQueue& queue = page->State() == PAGE_STATE_FREE
1783 					? sFreePageQueue : sClearPageQueue;
1784 				queue.Remove(page);
1785 				page->SetState(wired ? PAGE_STATE_WIRED : PAGE_STATE_UNUSED);
1786 				page->busy = false;
1787 				atomic_add(&sUnreservedFreePages, -1);
1788 				DEBUG_PAGE_ACCESS_END(page);
1789 				break;
1790 			}
1791 			case PAGE_STATE_WIRED:
1792 			case PAGE_STATE_UNUSED:
1793 				break;
1794 			case PAGE_STATE_ACTIVE:
1795 			case PAGE_STATE_INACTIVE:
1796 			case PAGE_STATE_MODIFIED:
1797 			case PAGE_STATE_CACHED:
1798 			default:
1799 				// uh
1800 				dprintf("mark_page_range_in_use: page %#" B_PRIxPHYSADDR
1801 					" in non-free state %d!\n", startPage + i, page->State());
1802 				break;
1803 		}
1804 	}
1805 
1806 	return B_OK;
1807 }
1808 
1809 
1810 /*!
1811 	This is a background thread that wakes up when its condition is notified
1812 	and moves some pages from the free queue over to the clear queue.
1813 	Given enough time, it will clear out all pages from the free queue - we
1814 	could probably slow it down after having reached a certain threshold.
1815 */
1816 static int32
1817 page_scrubber(void *unused)
1818 {
1819 	(void)(unused);
1820 
1821 	TRACE(("page_scrubber starting...\n"));
1822 
1823 	ConditionVariableEntry entry;
1824 	for (;;) {
1825 		while (sFreePageQueue.Count() == 0
1826 				|| atomic_get(&sUnreservedFreePages)
1827 					< (int32)sFreePagesTarget) {
1828 			sFreePageCondition.Add(&entry);
1829 			entry.Wait();
1830 		}
1831 
1832 		// Since we temporarily remove pages from the free pages reserve,
1833 		// we must make sure we don't cause a violation of the page
1834 		// reservation warranty. The following is usually stricter than
1835 		// necessary, because we don't have information on how many of the
1836 		// reserved pages have already been allocated.
1837 		int32 reserved = reserve_some_pages(SCRUB_SIZE,
1838 			kPageReserveForPriority[VM_PRIORITY_USER]);
1839 		if (reserved == 0)
1840 			continue;
1841 
1842 		// get some pages from the free queue, mostly sorted
1843 		ReadLocker locker(sFreePageQueuesLock);
1844 
1845 		vm_page *page[SCRUB_SIZE];
1846 		int32 scrubCount = 0;
1847 		for (int32 i = 0; i < reserved; i++) {
1848 			page[i] = sFreePageQueue.RemoveHeadUnlocked();
1849 			if (page[i] == NULL)
1850 				break;
1851 
1852 			DEBUG_PAGE_ACCESS_START(page[i]);
1853 
1854 			page[i]->SetState(PAGE_STATE_ACTIVE);
1855 			page[i]->busy = true;
1856 			scrubCount++;
1857 		}
1858 
1859 		locker.Unlock();
1860 
1861 		if (scrubCount == 0) {
1862 			unreserve_pages(reserved);
1863 			continue;
1864 		}
1865 
1866 		TA(ScrubbingPages(scrubCount));
1867 
1868 		// clear them
1869 		for (int32 i = 0; i < scrubCount; i++)
1870 			clear_page(page[i]);
1871 
1872 		locker.Lock();
1873 
1874 		// and put them into the clear queue
1875 		// process the array reversed when prepending to preserve sequential order
1876 		for (int32 i = scrubCount - 1; i >= 0; i--) {
1877 			page[i]->SetState(PAGE_STATE_CLEAR);
1878 			page[i]->busy = false;
1879 			DEBUG_PAGE_ACCESS_END(page[i]);
1880 			sClearPageQueue.PrependUnlocked(page[i]);
1881 		}
1882 
1883 		locker.Unlock();
1884 
1885 		unreserve_pages(reserved);
1886 
1887 		TA(ScrubbedPages(scrubCount));
1888 
1889 		// wait at least 100ms between runs
1890 		snooze(100 * 1000);
1891 	}
1892 
1893 	return 0;
1894 }
1895 
1896 
1897 static void
1898 init_page_marker(vm_page &marker)
1899 {
1900 	marker.SetCacheRef(NULL);
1901 	marker.InitState(PAGE_STATE_UNUSED);
1902 	marker.busy = true;
1903 #if DEBUG_PAGE_QUEUE
1904 	marker.queue = NULL;
1905 #endif
1906 #if DEBUG_PAGE_ACCESS
1907 	marker.accessing_thread = thread_get_current_thread_id();
1908 #endif
1909 }
1910 
1911 
1912 static void
1913 remove_page_marker(struct vm_page &marker)
1914 {
1915 	DEBUG_PAGE_ACCESS_CHECK(&marker);
1916 
1917 	if (marker.State() < PAGE_STATE_FIRST_UNQUEUED)
1918 		sPageQueues[marker.State()].RemoveUnlocked(&marker);
1919 
1920 	marker.SetState(PAGE_STATE_UNUSED);
1921 }
1922 
1923 
1924 static vm_page*
1925 next_modified_page(page_num_t& maxPagesToSee)
1926 {
1927 	InterruptsSpinLocker locker(sModifiedPageQueue.GetLock());
1928 
1929 	while (maxPagesToSee > 0) {
1930 		vm_page* page = sModifiedPageQueue.Head();
1931 		if (page == NULL)
1932 			return NULL;
1933 
1934 		sModifiedPageQueue.Requeue(page, true);
1935 
1936 		maxPagesToSee--;
1937 
1938 		if (!page->busy)
1939 			return page;
1940 	}
1941 
1942 	return NULL;
1943 }
1944 
1945 
1946 // #pragma mark -
1947 
1948 
1949 class PageWriteTransfer;
1950 class PageWriteWrapper;
1951 
1952 
1953 class PageWriterRun {
1954 public:
1955 	status_t Init(uint32 maxPages);
1956 
1957 	void PrepareNextRun();
1958 	void AddPage(vm_page* page);
1959 	uint32 Go();
1960 
1961 	void PageWritten(PageWriteTransfer* transfer, status_t status,
1962 		bool partialTransfer, size_t bytesTransferred);
1963 
1964 private:
1965 	uint32				fMaxPages;
1966 	uint32				fWrapperCount;
1967 	uint32				fTransferCount;
1968 	int32				fPendingTransfers;
1969 	PageWriteWrapper*	fWrappers;
1970 	PageWriteTransfer*	fTransfers;
1971 	ConditionVariable	fAllFinishedCondition;
1972 };
1973 
1974 
1975 class PageWriteTransfer : public AsyncIOCallback {
1976 public:
1977 	void SetTo(PageWriterRun* run, vm_page* page, int32 maxPages);
1978 	bool AddPage(vm_page* page);
1979 
1980 	status_t Schedule(uint32 flags);
1981 
1982 	void SetStatus(status_t status, size_t transferred);
1983 
1984 	status_t Status() const	{ return fStatus; }
1985 	struct VMCache* Cache() const { return fCache; }
1986 	uint32 PageCount() const { return fPageCount; }
1987 
1988 	virtual void IOFinished(status_t status, bool partialTransfer,
1989 		generic_size_t bytesTransferred);
1990 
1991 private:
1992 	PageWriterRun*		fRun;
1993 	struct VMCache*		fCache;
1994 	off_t				fOffset;
1995 	uint32				fPageCount;
1996 	int32				fMaxPages;
1997 	status_t			fStatus;
1998 	uint32				fVecCount;
1999 	generic_io_vec		fVecs[32]; // TODO: make dynamic/configurable
2000 };
2001 
2002 
2003 class PageWriteWrapper {
2004 public:
2005 	PageWriteWrapper();
2006 	~PageWriteWrapper();
2007 	void SetTo(vm_page* page);
2008 	bool Done(status_t result);
2009 
2010 private:
2011 	vm_page*			fPage;
2012 	struct VMCache*		fCache;
2013 	bool				fIsActive;
2014 };
2015 
2016 
2017 PageWriteWrapper::PageWriteWrapper()
2018 	:
2019 	fIsActive(false)
2020 {
2021 }
2022 
2023 
2024 PageWriteWrapper::~PageWriteWrapper()
2025 {
2026 	if (fIsActive)
2027 		panic("page write wrapper going out of scope but isn't completed");
2028 }
2029 
2030 
2031 /*!	The page's cache must be locked.
2032 */
2033 void
2034 PageWriteWrapper::SetTo(vm_page* page)
2035 {
2036 	DEBUG_PAGE_ACCESS_CHECK(page);
2037 
2038 	if (page->busy)
2039 		panic("setting page write wrapper to busy page");
2040 
2041 	if (fIsActive)
2042 		panic("re-setting page write wrapper that isn't completed");
2043 
2044 	fPage = page;
2045 	fCache = page->Cache();
2046 	fIsActive = true;
2047 
2048 	fPage->busy = true;
2049 	fPage->busy_writing = true;
2050 
2051 	// We have a modified page -- however, while we're writing it back,
2052 	// the page might still be mapped. In order not to lose any changes to the
2053 	// page, we mark it clean before actually writing it back; if
2054 	// writing the page fails for some reason, we'll just keep it in the
2055 	// modified page list, but that should happen only rarely.
2056 
2057 	// If the page is changed after we cleared the dirty flag, but before we
2058 	// had the chance to write it back, then we'll write it again later -- that
2059 	// will probably not happen that often, though.
2060 
2061 	vm_clear_map_flags(fPage, PAGE_MODIFIED);
2062 }
2063 
2064 
2065 /*!	The page's cache must be locked.
2066 	The page queues must not be locked.
2067 	\return \c true if the page was written successfully respectively could be
2068 		handled somehow, \c false otherwise.
2069 */
2070 bool
2071 PageWriteWrapper::Done(status_t result)
2072 {
2073 	if (!fIsActive)
2074 		panic("completing page write wrapper that is not active");
2075 
2076 	DEBUG_PAGE_ACCESS_START(fPage);
2077 
2078 	fPage->busy = false;
2079 		// Set unbusy and notify later by hand, since we might free the page.
2080 
2081 	bool success = true;
2082 
2083 	if (result == B_OK) {
2084 		// put it into the active/inactive queue
2085 		move_page_to_appropriate_queue(fPage);
2086 		fPage->busy_writing = false;
2087 		DEBUG_PAGE_ACCESS_END(fPage);
2088 	} else {
2089 		// Writing the page failed. One reason would be that the cache has been
2090 		// shrunk and the page does no longer belong to the file. Otherwise the
2091 		// actual I/O failed, in which case we'll simply keep the page modified.
2092 
2093 		if (!fPage->busy_writing) {
2094 			// The busy_writing flag was cleared. That means the cache has been
2095 			// shrunk while we were trying to write the page and we have to free
2096 			// it now.
2097 			vm_remove_all_page_mappings(fPage);
2098 // TODO: Unmapping should already happen when resizing the cache!
2099 			fCache->RemovePage(fPage);
2100 			free_page(fPage, false);
2101 			unreserve_pages(1);
2102 		} else {
2103 			// Writing the page failed -- mark the page modified and move it to
2104 			// an appropriate queue other than the modified queue, so we don't
2105 			// keep trying to write it over and over again. We keep
2106 			// non-temporary pages in the modified queue, though, so they don't
2107 			// get lost in the inactive queue.
2108 			dprintf("PageWriteWrapper: Failed to write page %p: %s\n", fPage,
2109 				strerror(result));
2110 
2111 			fPage->modified = true;
2112 			if (!fCache->temporary)
2113 				set_page_state(fPage, PAGE_STATE_MODIFIED);
2114 			else if (fPage->IsMapped())
2115 				set_page_state(fPage, PAGE_STATE_ACTIVE);
2116 			else
2117 				set_page_state(fPage, PAGE_STATE_INACTIVE);
2118 
2119 			fPage->busy_writing = false;
2120 			DEBUG_PAGE_ACCESS_END(fPage);
2121 
2122 			success = false;
2123 		}
2124 	}
2125 
2126 	fCache->NotifyPageEvents(fPage, PAGE_EVENT_NOT_BUSY);
2127 	fIsActive = false;
2128 
2129 	return success;
2130 }
2131 
2132 
2133 /*!	The page's cache must be locked.
2134 */
2135 void
2136 PageWriteTransfer::SetTo(PageWriterRun* run, vm_page* page, int32 maxPages)
2137 {
2138 	fRun = run;
2139 	fCache = page->Cache();
2140 	fOffset = page->cache_offset;
2141 	fPageCount = 1;
2142 	fMaxPages = maxPages;
2143 	fStatus = B_OK;
2144 
2145 	fVecs[0].base = (phys_addr_t)page->physical_page_number << PAGE_SHIFT;
2146 	fVecs[0].length = B_PAGE_SIZE;
2147 	fVecCount = 1;
2148 }
2149 
2150 
2151 /*!	The page's cache must be locked.
2152 */
2153 bool
2154 PageWriteTransfer::AddPage(vm_page* page)
2155 {
2156 	if (page->Cache() != fCache
2157 		|| (fMaxPages >= 0 && fPageCount >= (uint32)fMaxPages))
2158 		return false;
2159 
2160 	phys_addr_t nextBase = fVecs[fVecCount - 1].base
2161 		+ fVecs[fVecCount - 1].length;
2162 
2163 	if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase
2164 		&& (off_t)page->cache_offset == fOffset + fPageCount) {
2165 		// append to last iovec
2166 		fVecs[fVecCount - 1].length += B_PAGE_SIZE;
2167 		fPageCount++;
2168 		return true;
2169 	}
2170 
2171 	nextBase = fVecs[0].base - B_PAGE_SIZE;
2172 	if ((phys_addr_t)page->physical_page_number << PAGE_SHIFT == nextBase
2173 		&& (off_t)page->cache_offset == fOffset - 1) {
2174 		// prepend to first iovec and adjust offset
2175 		fVecs[0].base = nextBase;
2176 		fVecs[0].length += B_PAGE_SIZE;
2177 		fOffset = page->cache_offset;
2178 		fPageCount++;
2179 		return true;
2180 	}
2181 
2182 	if (((off_t)page->cache_offset == fOffset + fPageCount
2183 			|| (off_t)page->cache_offset == fOffset - 1)
2184 		&& fVecCount < sizeof(fVecs) / sizeof(fVecs[0])) {
2185 		// not physically contiguous or not in the right order
2186 		uint32 vectorIndex;
2187 		if ((off_t)page->cache_offset < fOffset) {
2188 			// we are pre-pending another vector, move the other vecs
2189 			for (uint32 i = fVecCount; i > 0; i--)
2190 				fVecs[i] = fVecs[i - 1];
2191 
2192 			fOffset = page->cache_offset;
2193 			vectorIndex = 0;
2194 		} else
2195 			vectorIndex = fVecCount;
2196 
2197 		fVecs[vectorIndex].base
2198 			= (phys_addr_t)page->physical_page_number << PAGE_SHIFT;
2199 		fVecs[vectorIndex].length = B_PAGE_SIZE;
2200 
2201 		fVecCount++;
2202 		fPageCount++;
2203 		return true;
2204 	}
2205 
2206 	return false;
2207 }
2208 
2209 
2210 status_t
2211 PageWriteTransfer::Schedule(uint32 flags)
2212 {
2213 	off_t writeOffset = (off_t)fOffset << PAGE_SHIFT;
2214 	generic_size_t writeLength = (phys_size_t)fPageCount << PAGE_SHIFT;
2215 
2216 	if (fRun != NULL) {
2217 		return fCache->WriteAsync(writeOffset, fVecs, fVecCount, writeLength,
2218 			flags | B_PHYSICAL_IO_REQUEST, this);
2219 	}
2220 
2221 	status_t status = fCache->Write(writeOffset, fVecs, fVecCount,
2222 		flags | B_PHYSICAL_IO_REQUEST, &writeLength);
2223 
2224 	SetStatus(status, writeLength);
2225 	return fStatus;
2226 }
2227 
2228 
2229 void
2230 PageWriteTransfer::SetStatus(status_t status, size_t transferred)
2231 {
2232 	// only succeed if all pages up to the last one have been written fully
2233 	// and the last page has at least been written partially
2234 	if (status == B_OK && transferred <= (fPageCount - 1) * B_PAGE_SIZE)
2235 		status = B_ERROR;
2236 
2237 	fStatus = status;
2238 }
2239 
2240 
2241 void
2242 PageWriteTransfer::IOFinished(status_t status, bool partialTransfer,
2243 	generic_size_t bytesTransferred)
2244 {
2245 	SetStatus(status, bytesTransferred);
2246 	fRun->PageWritten(this, fStatus, partialTransfer, bytesTransferred);
2247 }
2248 
2249 
2250 status_t
2251 PageWriterRun::Init(uint32 maxPages)
2252 {
2253 	fMaxPages = maxPages;
2254 	fWrapperCount = 0;
2255 	fTransferCount = 0;
2256 	fPendingTransfers = 0;
2257 
2258 	fWrappers = new(std::nothrow) PageWriteWrapper[maxPages];
2259 	fTransfers = new(std::nothrow) PageWriteTransfer[maxPages];
2260 	if (fWrappers == NULL || fTransfers == NULL)
2261 		return B_NO_MEMORY;
2262 
2263 	return B_OK;
2264 }
2265 
2266 
2267 void
2268 PageWriterRun::PrepareNextRun()
2269 {
2270 	fWrapperCount = 0;
2271 	fTransferCount = 0;
2272 	fPendingTransfers = 0;
2273 }
2274 
2275 
2276 /*!	The page's cache must be locked.
2277 */
2278 void
2279 PageWriterRun::AddPage(vm_page* page)
2280 {
2281 	fWrappers[fWrapperCount++].SetTo(page);
2282 
2283 	if (fTransferCount == 0 || !fTransfers[fTransferCount - 1].AddPage(page)) {
2284 		fTransfers[fTransferCount++].SetTo(this, page,
2285 			page->Cache()->MaxPagesPerAsyncWrite());
2286 	}
2287 }
2288 
2289 
2290 /*!	Writes all pages previously added.
2291 	\return The number of pages that could not be written or otherwise handled.
2292 */
2293 uint32
2294 PageWriterRun::Go()
2295 {
2296 	atomic_set(&fPendingTransfers, fTransferCount);
2297 
2298 	fAllFinishedCondition.Init(this, "page writer wait for I/O");
2299 	ConditionVariableEntry waitEntry;
2300 	fAllFinishedCondition.Add(&waitEntry);
2301 
2302 	// schedule writes
2303 	for (uint32 i = 0; i < fTransferCount; i++)
2304 		fTransfers[i].Schedule(B_VIP_IO_REQUEST);
2305 
2306 	// wait until all pages have been written
2307 	waitEntry.Wait();
2308 
2309 	// mark pages depending on whether they could be written or not
2310 
2311 	uint32 failedPages = 0;
2312 	uint32 wrapperIndex = 0;
2313 	for (uint32 i = 0; i < fTransferCount; i++) {
2314 		PageWriteTransfer& transfer = fTransfers[i];
2315 		transfer.Cache()->Lock();
2316 
2317 		for (uint32 j = 0; j < transfer.PageCount(); j++) {
2318 			if (!fWrappers[wrapperIndex++].Done(transfer.Status()))
2319 				failedPages++;
2320 		}
2321 
2322 		transfer.Cache()->Unlock();
2323 	}
2324 
2325 	ASSERT(wrapperIndex == fWrapperCount);
2326 
2327 	for (uint32 i = 0; i < fTransferCount; i++) {
2328 		PageWriteTransfer& transfer = fTransfers[i];
2329 		struct VMCache* cache = transfer.Cache();
2330 
2331 		// We've acquired a references for each page
2332 		for (uint32 j = 0; j < transfer.PageCount(); j++) {
2333 			// We release the cache references after all pages were made
2334 			// unbusy again - otherwise releasing a vnode could deadlock.
2335 			cache->ReleaseStoreRef();
2336 			cache->ReleaseRef();
2337 		}
2338 	}
2339 
2340 	return failedPages;
2341 }
2342 
2343 
2344 void
2345 PageWriterRun::PageWritten(PageWriteTransfer* transfer, status_t status,
2346 	bool partialTransfer, size_t bytesTransferred)
2347 {
2348 	if (atomic_add(&fPendingTransfers, -1) == 1)
2349 		fAllFinishedCondition.NotifyAll();
2350 }
2351 
2352 
2353 /*!	The page writer continuously takes some pages from the modified
2354 	queue, writes them back, and moves them back to the active queue.
2355 	It runs in its own thread, and is only there to keep the number
2356 	of modified pages low, so that more pages can be reused with
2357 	fewer costs.
2358 */
2359 status_t
2360 page_writer(void* /*unused*/)
2361 {
2362 	const uint32 kNumPages = 256;
2363 #ifdef TRACE_VM_PAGE
2364 	uint32 writtenPages = 0;
2365 	bigtime_t lastWrittenTime = 0;
2366 	bigtime_t pageCollectionTime = 0;
2367 	bigtime_t pageWritingTime = 0;
2368 #endif
2369 
2370 	PageWriterRun run;
2371 	if (run.Init(kNumPages) != B_OK) {
2372 		panic("page writer: Failed to init PageWriterRun!");
2373 		return B_ERROR;
2374 	}
2375 
2376 	page_num_t pagesSinceLastSuccessfulWrite = 0;
2377 
2378 	while (true) {
2379 // TODO: Maybe wait shorter when memory is low!
2380 		if (sModifiedPageQueue.Count() < kNumPages) {
2381 			sPageWriterCondition.Wait(3000000, true);
2382 				// all 3 seconds when no one triggers us
2383 		}
2384 
2385 		page_num_t modifiedPages = sModifiedPageQueue.Count();
2386 		if (modifiedPages == 0)
2387 			continue;
2388 
2389 		if (modifiedPages <= pagesSinceLastSuccessfulWrite) {
2390 			// We ran through the whole queue without being able to write a
2391 			// single page. Take a break.
2392 			snooze(500000);
2393 			pagesSinceLastSuccessfulWrite = 0;
2394 		}
2395 
2396 #if ENABLE_SWAP_SUPPORT
2397 		page_stats pageStats;
2398 		get_page_stats(pageStats);
2399 		bool activePaging = do_active_paging(pageStats);
2400 #endif
2401 
2402 		// depending on how urgent it becomes to get pages to disk, we adjust
2403 		// our I/O priority
2404 		uint32 lowPagesState = low_resource_state(B_KERNEL_RESOURCE_PAGES);
2405 		int32 ioPriority = B_IDLE_PRIORITY;
2406 		if (lowPagesState >= B_LOW_RESOURCE_CRITICAL
2407 			|| modifiedPages > MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD) {
2408 			ioPriority = MAX_PAGE_WRITER_IO_PRIORITY;
2409 		} else {
2410 			ioPriority = (uint64)MAX_PAGE_WRITER_IO_PRIORITY * modifiedPages
2411 				/ MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD;
2412 		}
2413 
2414 		thread_set_io_priority(ioPriority);
2415 
2416 		uint32 numPages = 0;
2417 		run.PrepareNextRun();
2418 
2419 		// TODO: make this laptop friendly, too (ie. only start doing
2420 		// something if someone else did something or there is really
2421 		// enough to do).
2422 
2423 		// collect pages to be written
2424 #ifdef TRACE_VM_PAGE
2425 		pageCollectionTime -= system_time();
2426 #endif
2427 
2428 		page_num_t maxPagesToSee = modifiedPages;
2429 
2430 		while (numPages < kNumPages && maxPagesToSee > 0) {
2431 			vm_page *page = next_modified_page(maxPagesToSee);
2432 			if (page == NULL)
2433 				break;
2434 
2435 			PageCacheLocker cacheLocker(page, false);
2436 			if (!cacheLocker.IsLocked())
2437 				continue;
2438 
2439 			VMCache *cache = page->Cache();
2440 
2441 			// If the page is busy or its state has changed while we were
2442 			// locking the cache, just ignore it.
2443 			if (page->busy || page->State() != PAGE_STATE_MODIFIED)
2444 				continue;
2445 
2446 			DEBUG_PAGE_ACCESS_START(page);
2447 
2448 			// Don't write back wired (locked) pages.
2449 			if (page->WiredCount() > 0) {
2450 				set_page_state(page, PAGE_STATE_ACTIVE);
2451 				DEBUG_PAGE_ACCESS_END(page);
2452 				continue;
2453 			}
2454 
2455 			// Write back temporary pages only when we're actively paging.
2456 			if (cache->temporary
2457 #if ENABLE_SWAP_SUPPORT
2458 				&& (!activePaging
2459 					|| !cache->CanWritePage(
2460 							(off_t)page->cache_offset << PAGE_SHIFT))
2461 #endif
2462 				) {
2463 				// We can't/don't want to do anything with this page, so move it
2464 				// to one of the other queues.
2465 				if (page->mappings.IsEmpty())
2466 					set_page_state(page, PAGE_STATE_INACTIVE);
2467 				else
2468 					set_page_state(page, PAGE_STATE_ACTIVE);
2469 
2470 				DEBUG_PAGE_ACCESS_END(page);
2471 				continue;
2472 			}
2473 
2474 			// We need our own reference to the store, as it might currently be
2475 			// destroyed.
2476 			if (cache->AcquireUnreferencedStoreRef() != B_OK) {
2477 				DEBUG_PAGE_ACCESS_END(page);
2478 				cacheLocker.Unlock();
2479 				thread_yield();
2480 				continue;
2481 			}
2482 
2483 			run.AddPage(page);
2484 				// TODO: We're possibly adding pages of different caches and
2485 				// thus maybe of different underlying file systems here. This
2486 				// is a potential problem for loop file systems/devices, since
2487 				// we could mark a page busy that would need to be accessed
2488 				// when writing back another page, thus causing a deadlock.
2489 
2490 			DEBUG_PAGE_ACCESS_END(page);
2491 
2492 			//dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count);
2493 			TPW(WritePage(page));
2494 
2495 			cache->AcquireRefLocked();
2496 			numPages++;
2497 		}
2498 
2499 #ifdef TRACE_VM_PAGE
2500 		pageCollectionTime += system_time();
2501 #endif
2502 		if (numPages == 0)
2503 			continue;
2504 
2505 		// write pages to disk and do all the cleanup
2506 #ifdef TRACE_VM_PAGE
2507 		pageWritingTime -= system_time();
2508 #endif
2509 		uint32 failedPages = run.Go();
2510 #ifdef TRACE_VM_PAGE
2511 		pageWritingTime += system_time();
2512 
2513 		// debug output only...
2514 		writtenPages += numPages;
2515 		if (writtenPages >= 1024) {
2516 			bigtime_t now = system_time();
2517 			TRACE(("page writer: wrote 1024 pages (total: %" B_PRIu64 " ms, "
2518 				"collect: %" B_PRIu64 " ms, write: %" B_PRIu64 " ms)\n",
2519 				(now - lastWrittenTime) / 1000,
2520 				pageCollectionTime / 1000, pageWritingTime / 1000));
2521 			lastWrittenTime = now;
2522 
2523 			writtenPages -= 1024;
2524 			pageCollectionTime = 0;
2525 			pageWritingTime = 0;
2526 		}
2527 #endif
2528 
2529 		if (failedPages == numPages)
2530 			pagesSinceLastSuccessfulWrite += modifiedPages - maxPagesToSee;
2531 		else
2532 			pagesSinceLastSuccessfulWrite = 0;
2533 	}
2534 
2535 	return B_OK;
2536 }
2537 
2538 
2539 // #pragma mark -
2540 
2541 
2542 // TODO: This should be done in the page daemon!
2543 #if 0
2544 #if ENABLE_SWAP_SUPPORT
2545 static bool
2546 free_page_swap_space(int32 index)
2547 {
2548 	vm_page *page = vm_page_at_index(index);
2549 	PageCacheLocker locker(page);
2550 	if (!locker.IsLocked())
2551 		return false;
2552 
2553 	DEBUG_PAGE_ACCESS_START(page);
2554 
2555 	VMCache* cache = page->Cache();
2556 	if (cache->temporary && page->WiredCount() == 0
2557 			&& cache->HasPage(page->cache_offset << PAGE_SHIFT)
2558 			&& page->usage_count > 0) {
2559 		// TODO: how to judge a page is highly active?
2560 		if (swap_free_page_swap_space(page)) {
2561 			// We need to mark the page modified, since otherwise it could be
2562 			// stolen and we'd lose its data.
2563 			vm_page_set_state(page, PAGE_STATE_MODIFIED);
2564 			TD(FreedPageSwap(page));
2565 			DEBUG_PAGE_ACCESS_END(page);
2566 			return true;
2567 		}
2568 	}
2569 	DEBUG_PAGE_ACCESS_END(page);
2570 	return false;
2571 }
2572 #endif
2573 #endif	// 0
2574 
2575 
2576 static vm_page *
2577 find_cached_page_candidate(struct vm_page &marker)
2578 {
2579 	DEBUG_PAGE_ACCESS_CHECK(&marker);
2580 
2581 	InterruptsSpinLocker locker(sCachedPageQueue.GetLock());
2582 	vm_page *page;
2583 
2584 	if (marker.State() == PAGE_STATE_UNUSED) {
2585 		// Get the first free pages of the (in)active queue
2586 		page = sCachedPageQueue.Head();
2587 	} else {
2588 		// Get the next page of the current queue
2589 		if (marker.State() != PAGE_STATE_CACHED) {
2590 			panic("invalid marker %p state", &marker);
2591 			return NULL;
2592 		}
2593 
2594 		page = sCachedPageQueue.Next(&marker);
2595 		sCachedPageQueue.Remove(&marker);
2596 		marker.SetState(PAGE_STATE_UNUSED);
2597 	}
2598 
2599 	while (page != NULL) {
2600 		if (!page->busy) {
2601 			// we found a candidate, insert marker
2602 			marker.SetState(PAGE_STATE_CACHED);
2603 			sCachedPageQueue.InsertAfter(page, &marker);
2604 			return page;
2605 		}
2606 
2607 		page = sCachedPageQueue.Next(page);
2608 	}
2609 
2610 	return NULL;
2611 }
2612 
2613 
2614 static bool
2615 free_cached_page(vm_page *page, bool dontWait)
2616 {
2617 	// try to lock the page's cache
2618 	if (vm_cache_acquire_locked_page_cache(page, dontWait) == NULL)
2619 		return false;
2620 	VMCache* cache = page->Cache();
2621 
2622 	AutoLocker<VMCache> cacheLocker(cache, true);
2623 	MethodDeleter<VMCache, void, &VMCache::ReleaseRefLocked> _2(cache);
2624 
2625 	// check again if that page is still a candidate
2626 	if (page->busy || page->State() != PAGE_STATE_CACHED)
2627 		return false;
2628 
2629 	DEBUG_PAGE_ACCESS_START(page);
2630 
2631 	PAGE_ASSERT(page, !page->IsMapped());
2632 	PAGE_ASSERT(page, !page->modified);
2633 
2634 	// we can now steal this page
2635 
2636 	cache->RemovePage(page);
2637 		// Now the page doesn't have cache anymore, so no one else (e.g.
2638 		// vm_page_allocate_page_run() can pick it up), since they would be
2639 		// required to lock the cache first, which would fail.
2640 
2641 	sCachedPageQueue.RemoveUnlocked(page);
2642 	return true;
2643 }
2644 
2645 
2646 static uint32
2647 free_cached_pages(uint32 pagesToFree, bool dontWait)
2648 {
2649 	vm_page marker;
2650 	init_page_marker(marker);
2651 
2652 	uint32 pagesFreed = 0;
2653 
2654 	while (pagesFreed < pagesToFree) {
2655 		vm_page *page = find_cached_page_candidate(marker);
2656 		if (page == NULL)
2657 			break;
2658 
2659 		if (free_cached_page(page, dontWait)) {
2660 			ReadLocker locker(sFreePageQueuesLock);
2661 			page->SetState(PAGE_STATE_FREE);
2662 			DEBUG_PAGE_ACCESS_END(page);
2663 			sFreePageQueue.PrependUnlocked(page);
2664 			locker.Unlock();
2665 
2666 			TA(StolenPage());
2667 
2668 			pagesFreed++;
2669 		}
2670 	}
2671 
2672 	remove_page_marker(marker);
2673 
2674 	sFreePageCondition.NotifyAll();
2675 
2676 	return pagesFreed;
2677 }
2678 
2679 
2680 static void
2681 idle_scan_active_pages(page_stats& pageStats)
2682 {
2683 	VMPageQueue& queue = sActivePageQueue;
2684 
2685 	// We want to scan the whole queue in roughly kIdleRunsForFullQueue runs.
2686 	uint32 maxToScan = queue.Count() / kIdleRunsForFullQueue + 1;
2687 
2688 	while (maxToScan > 0) {
2689 		maxToScan--;
2690 
2691 		// Get the next page. Note that we don't bother to lock here. We go with
2692 		// the assumption that on all architectures reading/writing pointers is
2693 		// atomic. Beyond that it doesn't really matter. We have to unlock the
2694 		// queue anyway to lock the page's cache, and we'll recheck afterwards.
2695 		vm_page* page = queue.Head();
2696 		if (page == NULL)
2697 			break;
2698 
2699 		// lock the page's cache
2700 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2701 		if (cache == NULL)
2702 			continue;
2703 
2704 		if (page->State() != PAGE_STATE_ACTIVE) {
2705 			// page is no longer in the cache or in this queue
2706 			cache->ReleaseRefAndUnlock();
2707 			continue;
2708 		}
2709 
2710 		if (page->busy) {
2711 			// page is busy -- requeue at the end
2712 			vm_page_requeue(page, true);
2713 			cache->ReleaseRefAndUnlock();
2714 			continue;
2715 		}
2716 
2717 		DEBUG_PAGE_ACCESS_START(page);
2718 
2719 		// Get the page active/modified flags and update the page's usage count.
2720 		// We completely unmap inactive temporary pages. This saves us to
2721 		// iterate through the inactive list as well, since we'll be notified
2722 		// via page fault whenever such an inactive page is used again.
2723 		// We don't remove the mappings of non-temporary pages, since we
2724 		// wouldn't notice when those would become unused and could thus be
2725 		// moved to the cached list.
2726 		int32 usageCount;
2727 		if (page->WiredCount() > 0 || page->usage_count > 0
2728 			|| !cache->temporary) {
2729 			usageCount = vm_clear_page_mapping_accessed_flags(page);
2730 		} else
2731 			usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2732 
2733 		if (usageCount > 0) {
2734 			usageCount += page->usage_count + kPageUsageAdvance;
2735 			if (usageCount > kPageUsageMax)
2736 				usageCount = kPageUsageMax;
2737 // TODO: This would probably also be the place to reclaim swap space.
2738 		} else {
2739 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2740 			if (usageCount < 0) {
2741 				usageCount = 0;
2742 				set_page_state(page, PAGE_STATE_INACTIVE);
2743 			}
2744 		}
2745 
2746 		page->usage_count = usageCount;
2747 
2748 		DEBUG_PAGE_ACCESS_END(page);
2749 
2750 		cache->ReleaseRefAndUnlock();
2751 	}
2752 }
2753 
2754 
2755 static void
2756 full_scan_inactive_pages(page_stats& pageStats, int32 despairLevel)
2757 {
2758 	int32 pagesToFree = pageStats.unsatisfiedReservations
2759 		+ sFreeOrCachedPagesTarget
2760 		- (pageStats.totalFreePages + pageStats.cachedPages);
2761 	if (pagesToFree <= 0)
2762 		return;
2763 
2764 	bigtime_t time = system_time();
2765 	uint32 pagesScanned = 0;
2766 	uint32 pagesToCached = 0;
2767 	uint32 pagesToModified = 0;
2768 	uint32 pagesToActive = 0;
2769 
2770 	// Determine how many pages at maximum to send to the modified queue. Since
2771 	// it is relatively expensive to page out pages, we do that on a grander
2772 	// scale only when things get desperate.
2773 	uint32 maxToFlush = despairLevel <= 1 ? 32 : 10000;
2774 
2775 	vm_page marker;
2776 	init_page_marker(marker);
2777 
2778 	VMPageQueue& queue = sInactivePageQueue;
2779 	InterruptsSpinLocker queueLocker(queue.GetLock());
2780 	uint32 maxToScan = queue.Count();
2781 
2782 	vm_page* nextPage = queue.Head();
2783 
2784 	while (pagesToFree > 0 && maxToScan > 0) {
2785 		maxToScan--;
2786 
2787 		// get the next page
2788 		vm_page* page = nextPage;
2789 		if (page == NULL)
2790 			break;
2791 		nextPage = queue.Next(page);
2792 
2793 		if (page->busy)
2794 			continue;
2795 
2796 		// mark the position
2797 		queue.InsertAfter(page, &marker);
2798 		queueLocker.Unlock();
2799 
2800 		// lock the page's cache
2801 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2802 		if (cache == NULL || page->busy
2803 				|| page->State() != PAGE_STATE_INACTIVE) {
2804 			if (cache != NULL)
2805 				cache->ReleaseRefAndUnlock();
2806 			queueLocker.Lock();
2807 			nextPage = queue.Next(&marker);
2808 			queue.Remove(&marker);
2809 			continue;
2810 		}
2811 
2812 		pagesScanned++;
2813 
2814 		DEBUG_PAGE_ACCESS_START(page);
2815 
2816 		// Get the accessed count, clear the accessed/modified flags and
2817 		// unmap the page, if it hasn't been accessed.
2818 		int32 usageCount;
2819 		if (page->WiredCount() > 0)
2820 			usageCount = vm_clear_page_mapping_accessed_flags(page);
2821 		else
2822 			usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2823 
2824 		// update usage count
2825 		if (usageCount > 0) {
2826 			usageCount += page->usage_count + kPageUsageAdvance;
2827 			if (usageCount > kPageUsageMax)
2828 				usageCount = kPageUsageMax;
2829 		} else {
2830 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2831 			if (usageCount < 0)
2832 				usageCount = 0;
2833 		}
2834 
2835 		page->usage_count = usageCount;
2836 
2837 		// Move to fitting queue or requeue:
2838 		// * Active mapped pages go to the active queue.
2839 		// * Inactive mapped (i.e. wired) pages are requeued.
2840 		// * The remaining pages are cachable. Thus, if unmodified they go to
2841 		//   the cached queue, otherwise to the modified queue (up to a limit).
2842 		//   Note that until in the idle scanning we don't exempt pages of
2843 		//   temporary caches. Apparently we really need memory, so we better
2844 		//   page out memory as well.
2845 		bool isMapped = page->IsMapped();
2846 		if (usageCount > 0) {
2847 			if (isMapped) {
2848 				set_page_state(page, PAGE_STATE_ACTIVE);
2849 				pagesToActive++;
2850 			} else
2851 				vm_page_requeue(page, true);
2852 		} else if (isMapped) {
2853 			vm_page_requeue(page, true);
2854 		} else if (!page->modified) {
2855 			set_page_state(page, PAGE_STATE_CACHED);
2856 			pagesToFree--;
2857 			pagesToCached++;
2858 		} else if (maxToFlush > 0) {
2859 			set_page_state(page, PAGE_STATE_MODIFIED);
2860 			maxToFlush--;
2861 			pagesToModified++;
2862 		} else
2863 			vm_page_requeue(page, true);
2864 
2865 		DEBUG_PAGE_ACCESS_END(page);
2866 
2867 		cache->ReleaseRefAndUnlock();
2868 
2869 		// remove the marker
2870 		queueLocker.Lock();
2871 		nextPage = queue.Next(&marker);
2872 		queue.Remove(&marker);
2873 	}
2874 
2875 	queueLocker.Unlock();
2876 
2877 	time = system_time() - time;
2878 	TRACE_DAEMON("  -> inactive scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32
2879 		", moved: %" B_PRIu32 " -> cached, %" B_PRIu32 " -> modified, %"
2880 		B_PRIu32 " -> active\n", time, pagesScanned, pagesToCached,
2881 		pagesToModified, pagesToActive);
2882 
2883 	// wake up the page writer, if we tossed it some pages
2884 	if (pagesToModified > 0)
2885 		sPageWriterCondition.WakeUp();
2886 }
2887 
2888 
2889 static void
2890 full_scan_active_pages(page_stats& pageStats, int32 despairLevel)
2891 {
2892 	vm_page marker;
2893 	init_page_marker(marker);
2894 
2895 	VMPageQueue& queue = sActivePageQueue;
2896 	InterruptsSpinLocker queueLocker(queue.GetLock());
2897 	uint32 maxToScan = queue.Count();
2898 
2899 	int32 pagesToDeactivate = pageStats.unsatisfiedReservations
2900 		+ sFreeOrCachedPagesTarget
2901 		- (pageStats.totalFreePages + pageStats.cachedPages)
2902 		+ std::max((int32)sInactivePagesTarget - (int32)maxToScan, (int32)0);
2903 	if (pagesToDeactivate <= 0)
2904 		return;
2905 
2906 	bigtime_t time = system_time();
2907 	uint32 pagesAccessed = 0;
2908 	uint32 pagesToInactive = 0;
2909 	uint32 pagesScanned = 0;
2910 
2911 	vm_page* nextPage = queue.Head();
2912 
2913 	while (pagesToDeactivate > 0 && maxToScan > 0) {
2914 		maxToScan--;
2915 
2916 		// get the next page
2917 		vm_page* page = nextPage;
2918 		if (page == NULL)
2919 			break;
2920 		nextPage = queue.Next(page);
2921 
2922 		if (page->busy)
2923 			continue;
2924 
2925 		// mark the position
2926 		queue.InsertAfter(page, &marker);
2927 		queueLocker.Unlock();
2928 
2929 		// lock the page's cache
2930 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2931 		if (cache == NULL || page->busy || page->State() != PAGE_STATE_ACTIVE) {
2932 			if (cache != NULL)
2933 				cache->ReleaseRefAndUnlock();
2934 			queueLocker.Lock();
2935 			nextPage = queue.Next(&marker);
2936 			queue.Remove(&marker);
2937 			continue;
2938 		}
2939 
2940 		pagesScanned++;
2941 
2942 		DEBUG_PAGE_ACCESS_START(page);
2943 
2944 		// Get the page active/modified flags and update the page's usage count.
2945 		int32 usageCount = vm_clear_page_mapping_accessed_flags(page);
2946 
2947 		if (usageCount > 0) {
2948 			usageCount += page->usage_count + kPageUsageAdvance;
2949 			if (usageCount > kPageUsageMax)
2950 				usageCount = kPageUsageMax;
2951 			pagesAccessed++;
2952 // TODO: This would probably also be the place to reclaim swap space.
2953 		} else {
2954 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2955 			if (usageCount <= 0) {
2956 				usageCount = 0;
2957 				set_page_state(page, PAGE_STATE_INACTIVE);
2958 				pagesToInactive++;
2959 			}
2960 		}
2961 
2962 		page->usage_count = usageCount;
2963 
2964 		DEBUG_PAGE_ACCESS_END(page);
2965 
2966 		cache->ReleaseRefAndUnlock();
2967 
2968 		// remove the marker
2969 		queueLocker.Lock();
2970 		nextPage = queue.Next(&marker);
2971 		queue.Remove(&marker);
2972 	}
2973 
2974 	time = system_time() - time;
2975 	TRACE_DAEMON("  ->   active scan (%7" B_PRId64 " us): scanned: %7" B_PRIu32
2976 		", moved: %" B_PRIu32 " -> inactive, encountered %" B_PRIu32 " accessed"
2977 		" ones\n", time, pagesScanned, pagesToInactive, pagesAccessed);
2978 }
2979 
2980 
2981 static void
2982 page_daemon_idle_scan(page_stats& pageStats)
2983 {
2984 	TRACE_DAEMON("page daemon: idle run\n");
2985 
2986 	if (pageStats.totalFreePages < (int32)sFreePagesTarget) {
2987 		// We want more actually free pages, so free some from the cached
2988 		// ones.
2989 		uint32 freed = free_cached_pages(
2990 			sFreePagesTarget - pageStats.totalFreePages, false);
2991 		if (freed > 0)
2992 			unreserve_pages(freed);
2993 		get_page_stats(pageStats);
2994 	}
2995 
2996 	// Walk the active list and move pages to the inactive queue.
2997 	get_page_stats(pageStats);
2998 	idle_scan_active_pages(pageStats);
2999 }
3000 
3001 
3002 static void
3003 page_daemon_full_scan(page_stats& pageStats, int32 despairLevel)
3004 {
3005 	TRACE_DAEMON("page daemon: full run: free: %" B_PRIu32 ", cached: %"
3006 		B_PRIu32 ", to free: %" B_PRIu32 "\n", pageStats.totalFreePages,
3007 		pageStats.cachedPages, pageStats.unsatisfiedReservations
3008 			+ sFreeOrCachedPagesTarget
3009 			- (pageStats.totalFreePages + pageStats.cachedPages));
3010 
3011 	// Walk the inactive list and transfer pages to the cached and modified
3012 	// queues.
3013 	full_scan_inactive_pages(pageStats, despairLevel);
3014 
3015 	// Free cached pages. Also wake up reservation waiters.
3016 	get_page_stats(pageStats);
3017 	int32 pagesToFree = pageStats.unsatisfiedReservations + sFreePagesTarget
3018 		- (pageStats.totalFreePages);
3019 	if (pagesToFree > 0) {
3020 		uint32 freed = free_cached_pages(pagesToFree, true);
3021 		if (freed > 0)
3022 			unreserve_pages(freed);
3023 	}
3024 
3025 	// Walk the active list and move pages to the inactive queue.
3026 	get_page_stats(pageStats);
3027 	full_scan_active_pages(pageStats, despairLevel);
3028 }
3029 
3030 
3031 static status_t
3032 page_daemon(void* /*unused*/)
3033 {
3034 	int32 despairLevel = 0;
3035 
3036 	while (true) {
3037 		sPageDaemonCondition.ClearActivated();
3038 
3039 		// evaluate the free pages situation
3040 		page_stats pageStats;
3041 		get_page_stats(pageStats);
3042 
3043 		if (!do_active_paging(pageStats)) {
3044 			// Things look good -- just maintain statistics and keep the pool
3045 			// of actually free pages full enough.
3046 			despairLevel = 0;
3047 			page_daemon_idle_scan(pageStats);
3048 			sPageDaemonCondition.Wait(kIdleScanWaitInterval, false);
3049 		} else {
3050 			// Not enough free pages. We need to do some real work.
3051 			despairLevel = std::max(despairLevel + 1, (int32)3);
3052 			page_daemon_full_scan(pageStats, despairLevel);
3053 
3054 			// Don't wait after the first full scan, but rather immediately
3055 			// check whether we were successful in freeing enough pages and
3056 			// re-run with increased despair level. The first scan is
3057 			// conservative with respect to moving inactive modified pages to
3058 			// the modified list to avoid thrashing. The second scan, however,
3059 			// will not hold back.
3060 			if (despairLevel > 1)
3061 				snooze(kBusyScanWaitInterval);
3062 		}
3063 	}
3064 
3065 	return B_OK;
3066 }
3067 
3068 
3069 /*!	Returns how many pages could *not* be reserved.
3070 */
3071 static uint32
3072 reserve_pages(uint32 count, int priority, bool dontWait)
3073 {
3074 	int32 dontTouch = kPageReserveForPriority[priority];
3075 
3076 	while (true) {
3077 		count -= reserve_some_pages(count, dontTouch);
3078 		if (count == 0)
3079 			return 0;
3080 
3081 		if (sUnsatisfiedPageReservations == 0) {
3082 			count -= free_cached_pages(count, dontWait);
3083 			if (count == 0)
3084 				return count;
3085 		}
3086 
3087 		if (dontWait)
3088 			return count;
3089 
3090 		// we need to wait for pages to become available
3091 
3092 		MutexLocker pageDeficitLocker(sPageDeficitLock);
3093 
3094 		bool notifyDaemon = sUnsatisfiedPageReservations == 0;
3095 		sUnsatisfiedPageReservations += count;
3096 
3097 		if (atomic_get(&sUnreservedFreePages) > dontTouch) {
3098 			// the situation changed
3099 			sUnsatisfiedPageReservations -= count;
3100 			continue;
3101 		}
3102 
3103 		PageReservationWaiter waiter;
3104 		waiter.dontTouch = dontTouch;
3105 		waiter.missing = count;
3106 		waiter.thread = thread_get_current_thread();
3107 		waiter.threadPriority = waiter.thread->priority;
3108 
3109 		// insert ordered (i.e. after all waiters with higher or equal priority)
3110 		PageReservationWaiter* otherWaiter = NULL;
3111 		for (PageReservationWaiterList::Iterator it
3112 				= sPageReservationWaiters.GetIterator();
3113 			(otherWaiter = it.Next()) != NULL;) {
3114 			if (waiter < *otherWaiter)
3115 				break;
3116 		}
3117 
3118 		sPageReservationWaiters.InsertBefore(otherWaiter, &waiter);
3119 
3120 		thread_prepare_to_block(waiter.thread, 0, THREAD_BLOCK_TYPE_OTHER,
3121 			"waiting for pages");
3122 
3123 		if (notifyDaemon)
3124 			sPageDaemonCondition.WakeUp();
3125 
3126 		pageDeficitLocker.Unlock();
3127 
3128 		low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0);
3129 		thread_block();
3130 
3131 		pageDeficitLocker.Lock();
3132 
3133 		return 0;
3134 	}
3135 }
3136 
3137 
3138 //	#pragma mark - private kernel API
3139 
3140 
3141 /*!	Writes a range of modified pages of a cache to disk.
3142 	You need to hold the VMCache lock when calling this function.
3143 	Note that the cache lock is released in this function.
3144 	\param cache The cache.
3145 	\param firstPage Offset (in page size units) of the first page in the range.
3146 	\param endPage End offset (in page size units) of the page range. The page
3147 		at this offset is not included.
3148 */
3149 status_t
3150 vm_page_write_modified_page_range(struct VMCache* cache, uint32 firstPage,
3151 	uint32 endPage)
3152 {
3153 	static const int32 kMaxPages = 256;
3154 	int32 maxPages = cache->MaxPagesPerWrite();
3155 	if (maxPages < 0 || maxPages > kMaxPages)
3156 		maxPages = kMaxPages;
3157 
3158 	const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
3159 		| HEAP_DONT_LOCK_KERNEL_SPACE;
3160 
3161 	PageWriteWrapper stackWrappersPool[2];
3162 	PageWriteWrapper* stackWrappers[1];
3163 	PageWriteWrapper* wrapperPool
3164 		= new(malloc_flags(allocationFlags)) PageWriteWrapper[maxPages + 1];
3165 	PageWriteWrapper** wrappers
3166 		= new(malloc_flags(allocationFlags)) PageWriteWrapper*[maxPages];
3167 	if (wrapperPool == NULL || wrappers == NULL) {
3168 		// don't fail, just limit our capabilities
3169 		delete[] wrapperPool;
3170 		delete[] wrappers;
3171 		wrapperPool = stackWrappersPool;
3172 		wrappers = stackWrappers;
3173 		maxPages = 1;
3174 	}
3175 
3176 	int32 nextWrapper = 0;
3177 	int32 usedWrappers = 0;
3178 
3179 	PageWriteTransfer transfer;
3180 	bool transferEmpty = true;
3181 
3182 	VMCachePagesTree::Iterator it
3183 		= cache->pages.GetIterator(firstPage, true, true);
3184 
3185 	while (true) {
3186 		vm_page* page = it.Next();
3187 		if (page == NULL || page->cache_offset >= endPage) {
3188 			if (transferEmpty)
3189 				break;
3190 
3191 			page = NULL;
3192 		}
3193 
3194 		if (page != NULL) {
3195 			if (page->busy
3196 				|| (page->State() != PAGE_STATE_MODIFIED
3197 					&& !vm_test_map_modification(page))) {
3198 				page = NULL;
3199 			}
3200 		}
3201 
3202 		PageWriteWrapper* wrapper = NULL;
3203 		if (page != NULL) {
3204 			wrapper = &wrapperPool[nextWrapper++];
3205 			if (nextWrapper > maxPages)
3206 				nextWrapper = 0;
3207 
3208 			DEBUG_PAGE_ACCESS_START(page);
3209 
3210 			wrapper->SetTo(page);
3211 
3212 			if (transferEmpty || transfer.AddPage(page)) {
3213 				if (transferEmpty) {
3214 					transfer.SetTo(NULL, page, maxPages);
3215 					transferEmpty = false;
3216 				}
3217 
3218 				DEBUG_PAGE_ACCESS_END(page);
3219 
3220 				wrappers[usedWrappers++] = wrapper;
3221 				continue;
3222 			}
3223 
3224 			DEBUG_PAGE_ACCESS_END(page);
3225 		}
3226 
3227 		if (transferEmpty)
3228 			continue;
3229 
3230 		cache->Unlock();
3231 		status_t status = transfer.Schedule(0);
3232 		cache->Lock();
3233 
3234 		for (int32 i = 0; i < usedWrappers; i++)
3235 			wrappers[i]->Done(status);
3236 
3237 		usedWrappers = 0;
3238 
3239 		if (page != NULL) {
3240 			transfer.SetTo(NULL, page, maxPages);
3241 			wrappers[usedWrappers++] = wrapper;
3242 		} else
3243 			transferEmpty = true;
3244 	}
3245 
3246 	if (wrapperPool != stackWrappersPool) {
3247 		delete[] wrapperPool;
3248 		delete[] wrappers;
3249 	}
3250 
3251 	return B_OK;
3252 }
3253 
3254 
3255 /*!	You need to hold the VMCache lock when calling this function.
3256 	Note that the cache lock is released in this function.
3257 */
3258 status_t
3259 vm_page_write_modified_pages(VMCache *cache)
3260 {
3261 	return vm_page_write_modified_page_range(cache, 0,
3262 		(cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT);
3263 }
3264 
3265 
3266 /*!	Schedules the page writer to write back the specified \a page.
3267 	Note, however, that it might not do this immediately, and it can well
3268 	take several seconds until the page is actually written out.
3269 */
3270 void
3271 vm_page_schedule_write_page(vm_page *page)
3272 {
3273 	PAGE_ASSERT(page, page->State() == PAGE_STATE_MODIFIED);
3274 
3275 	vm_page_requeue(page, false);
3276 
3277 	sPageWriterCondition.WakeUp();
3278 }
3279 
3280 
3281 /*!	Cache must be locked.
3282 */
3283 void
3284 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage,
3285 	uint32 endPage)
3286 {
3287 	uint32 modified = 0;
3288 	for (VMCachePagesTree::Iterator it
3289 				= cache->pages.GetIterator(firstPage, true, true);
3290 			vm_page *page = it.Next();) {
3291 		if (page->cache_offset >= endPage)
3292 			break;
3293 
3294 		if (!page->busy && page->State() == PAGE_STATE_MODIFIED) {
3295 			DEBUG_PAGE_ACCESS_START(page);
3296 			vm_page_requeue(page, false);
3297 			modified++;
3298 			DEBUG_PAGE_ACCESS_END(page);
3299 		}
3300 	}
3301 
3302 	if (modified > 0)
3303 		sPageWriterCondition.WakeUp();
3304 }
3305 
3306 
3307 void
3308 vm_page_init_num_pages(kernel_args *args)
3309 {
3310 	// calculate the size of memory by looking at the physical_memory_range array
3311 	sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE;
3312 	page_num_t physicalPagesEnd = sPhysicalPageOffset
3313 		+ args->physical_memory_range[0].size / B_PAGE_SIZE;
3314 
3315 	sNonExistingPages = 0;
3316 	sIgnoredPages = args->ignored_physical_memory / B_PAGE_SIZE;
3317 
3318 	for (uint32 i = 1; i < args->num_physical_memory_ranges; i++) {
3319 		page_num_t start = args->physical_memory_range[i].start / B_PAGE_SIZE;
3320 		if (start > physicalPagesEnd)
3321 			sNonExistingPages += start - physicalPagesEnd;
3322 		physicalPagesEnd = start
3323 			+ args->physical_memory_range[i].size / B_PAGE_SIZE;
3324 
3325 #ifdef LIMIT_AVAILABLE_MEMORY
3326 		page_num_t available
3327 			= physicalPagesEnd - sPhysicalPageOffset - sNonExistingPages;
3328 		if (available > LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE)) {
3329 			physicalPagesEnd = sPhysicalPageOffset + sNonExistingPages
3330 				+ LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE);
3331 			break;
3332 		}
3333 #endif
3334 	}
3335 
3336 	TRACE(("first phys page = %#" B_PRIxPHYSADDR ", end %#" B_PRIxPHYSADDR "\n",
3337 		sPhysicalPageOffset, physicalPagesEnd));
3338 
3339 	sNumPages = physicalPagesEnd - sPhysicalPageOffset;
3340 }
3341 
3342 
3343 status_t
3344 vm_page_init(kernel_args *args)
3345 {
3346 	TRACE(("vm_page_init: entry\n"));
3347 
3348 	// init page queues
3349 	sModifiedPageQueue.Init("modified pages queue");
3350 	sInactivePageQueue.Init("inactive pages queue");
3351 	sActivePageQueue.Init("active pages queue");
3352 	sCachedPageQueue.Init("cached pages queue");
3353 	sFreePageQueue.Init("free pages queue");
3354 	sClearPageQueue.Init("clear pages queue");
3355 
3356 	new (&sPageReservationWaiters) PageReservationWaiterList;
3357 
3358 	// map in the new free page table
3359 	sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page),
3360 		~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3361 
3362 	TRACE(("vm_init: putting free_page_table @ %p, # ents %" B_PRIuPHYSADDR
3363 		" (size %#" B_PRIxPHYSADDR ")\n", sPages, sNumPages,
3364 		(phys_addr_t)(sNumPages * sizeof(vm_page))));
3365 
3366 	// initialize the free page table
3367 	for (uint32 i = 0; i < sNumPages; i++) {
3368 		sPages[i].Init(sPhysicalPageOffset + i);
3369 		sFreePageQueue.Append(&sPages[i]);
3370 
3371 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3372 		sPages[i].allocation_tracking_info.Clear();
3373 #endif
3374 	}
3375 
3376 	sUnreservedFreePages = sNumPages;
3377 
3378 	TRACE(("initialized table\n"));
3379 
3380 	// mark the ranges between usable physical memory unused
3381 	phys_addr_t previousEnd = 0;
3382 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3383 		phys_addr_t base = args->physical_memory_range[i].start;
3384 		phys_size_t size = args->physical_memory_range[i].size;
3385 		if (base > previousEnd) {
3386 			mark_page_range_in_use(previousEnd / B_PAGE_SIZE,
3387 				(base - previousEnd) / B_PAGE_SIZE, false);
3388 		}
3389 		previousEnd = base + size;
3390 	}
3391 
3392 	// mark the allocated physical page ranges wired
3393 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3394 		mark_page_range_in_use(
3395 			args->physical_allocated_range[i].start / B_PAGE_SIZE,
3396 			args->physical_allocated_range[i].size / B_PAGE_SIZE, true);
3397 	}
3398 
3399 	// prevent future allocations from the kernel args ranges
3400 	args->num_physical_allocated_ranges = 0;
3401 
3402 	// The target of actually free pages. This must be at least the system
3403 	// reserve, but should be a few more pages, so we don't have to extract
3404 	// a cached page with each allocation.
3405 	sFreePagesTarget = VM_PAGE_RESERVE_USER
3406 		+ std::max((page_num_t)32, (sNumPages - sNonExistingPages) / 1024);
3407 
3408 	// The target of free + cached and inactive pages. On low-memory machines
3409 	// keep things tight. free + cached is the pool of immediately allocatable
3410 	// pages. We want a few inactive pages, so when we're actually paging, we
3411 	// have a reasonably large set of pages to work with.
3412 	if (sUnreservedFreePages < 16 * 1024) {
3413 		sFreeOrCachedPagesTarget = sFreePagesTarget + 128;
3414 		sInactivePagesTarget = sFreePagesTarget / 3;
3415 	} else {
3416 		sFreeOrCachedPagesTarget = 2 * sFreePagesTarget;
3417 		sInactivePagesTarget = sFreePagesTarget / 2;
3418 	}
3419 
3420 	TRACE(("vm_page_init: exit\n"));
3421 
3422 	return B_OK;
3423 }
3424 
3425 
3426 status_t
3427 vm_page_init_post_area(kernel_args *args)
3428 {
3429 	void *dummy;
3430 
3431 	dummy = sPages;
3432 	create_area("page structures", &dummy, B_EXACT_ADDRESS,
3433 		PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED,
3434 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3435 
3436 	add_debugger_command("list_pages", &dump_page_list,
3437 		"List physical pages");
3438 	add_debugger_command("page_stats", &dump_page_stats,
3439 		"Dump statistics about page usage");
3440 	add_debugger_command_etc("page", &dump_page_long,
3441 		"Dump page info",
3442 		"[ \"-p\" | \"-v\" ] [ \"-m\" ] <address>\n"
3443 		"Prints information for the physical page. If neither \"-p\" nor\n"
3444 		"\"-v\" are given, the provided address is interpreted as address of\n"
3445 		"the vm_page data structure for the page in question. If \"-p\" is\n"
3446 		"given, the address is the physical address of the page. If \"-v\" is\n"
3447 		"given, the address is interpreted as virtual address in the current\n"
3448 		"thread's address space and for the page it is mapped to (if any)\n"
3449 		"information are printed. If \"-m\" is specified, the command will\n"
3450 		"search all known address spaces for mappings to that page and print\n"
3451 		"them.\n", 0);
3452 	add_debugger_command("page_queue", &dump_page_queue, "Dump page queue");
3453 	add_debugger_command("find_page", &find_page,
3454 		"Find out which queue a page is actually in");
3455 
3456 #ifdef TRACK_PAGE_USAGE_STATS
3457 	add_debugger_command_etc("page_usage", &dump_page_usage_stats,
3458 		"Dumps statistics about page usage counts",
3459 		"\n"
3460 		"Dumps statistics about page usage counts.\n",
3461 		B_KDEBUG_DONT_PARSE_ARGUMENTS);
3462 #endif
3463 
3464 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3465 	add_debugger_command_etc("page_allocations_per_caller",
3466 		&dump_page_allocations_per_caller,
3467 		"Dump current page allocations summed up per caller",
3468 		"[ -d <caller> ] [ -r ]\n"
3469 		"The current allocations will by summed up by caller (their count)\n"
3470 		"printed in decreasing order by count.\n"
3471 		"If \"-d\" is given, each allocation for caller <caller> is printed\n"
3472 		"including the respective stack trace.\n"
3473 		"If \"-r\" is given, the allocation infos are reset after gathering\n"
3474 		"the information, so the next command invocation will only show the\n"
3475 		"allocations made after the reset.\n", 0);
3476 	add_debugger_command_etc("page_allocation_infos",
3477 		&dump_page_allocation_infos,
3478 		"Dump current page allocations",
3479 		"[ --stacktrace ] [ -p <page number> ] [ --team <team ID> ] "
3480 		"[ --thread <thread ID> ]\n"
3481 		"The current allocations filtered by optional values will be printed.\n"
3482 		"The optional \"-p\" page number filters for a specific page,\n"
3483 		"with \"--team\" and \"--thread\" allocations by specific teams\n"
3484 		"and/or threads can be filtered (these only work if a corresponding\n"
3485 		"tracing entry is still available).\n"
3486 		"If \"--stacktrace\" is given, then stack traces of the allocation\n"
3487 		"callers are printed, where available\n", 0);
3488 #endif
3489 
3490 	return B_OK;
3491 }
3492 
3493 
3494 status_t
3495 vm_page_init_post_thread(kernel_args *args)
3496 {
3497 	new (&sFreePageCondition) ConditionVariable;
3498 
3499 	// create a kernel thread to clear out pages
3500 
3501 	thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber",
3502 		B_LOWEST_ACTIVE_PRIORITY, NULL);
3503 	resume_thread(thread);
3504 
3505 	// start page writer
3506 
3507 	sPageWriterCondition.Init("page writer");
3508 
3509 	thread = spawn_kernel_thread(&page_writer, "page writer",
3510 		B_NORMAL_PRIORITY + 1, NULL);
3511 	resume_thread(thread);
3512 
3513 	// start page daemon
3514 
3515 	sPageDaemonCondition.Init("page daemon");
3516 
3517 	thread = spawn_kernel_thread(&page_daemon, "page daemon",
3518 		B_NORMAL_PRIORITY, NULL);
3519 	resume_thread(thread);
3520 
3521 	return B_OK;
3522 }
3523 
3524 
3525 status_t
3526 vm_mark_page_inuse(page_num_t page)
3527 {
3528 	return vm_mark_page_range_inuse(page, 1);
3529 }
3530 
3531 
3532 status_t
3533 vm_mark_page_range_inuse(page_num_t startPage, page_num_t length)
3534 {
3535 	return mark_page_range_in_use(startPage, length, false);
3536 }
3537 
3538 
3539 /*!	Unreserve pages previously reserved with vm_page_reserve_pages().
3540 */
3541 void
3542 vm_page_unreserve_pages(vm_page_reservation* reservation)
3543 {
3544 	uint32 count = reservation->count;
3545 	reservation->count = 0;
3546 
3547 	if (count == 0)
3548 		return;
3549 
3550 	TA(UnreservePages(count));
3551 
3552 	unreserve_pages(count);
3553 }
3554 
3555 
3556 /*!	With this call, you can reserve a number of free pages in the system.
3557 	They will only be handed out to someone who has actually reserved them.
3558 	This call returns as soon as the number of requested pages has been
3559 	reached.
3560 	The caller must not hold any cache lock or the function might deadlock.
3561 */
3562 void
3563 vm_page_reserve_pages(vm_page_reservation* reservation, uint32 count,
3564 	int priority)
3565 {
3566 	reservation->count = count;
3567 
3568 	if (count == 0)
3569 		return;
3570 
3571 	TA(ReservePages(count));
3572 
3573 	reserve_pages(count, priority, false);
3574 }
3575 
3576 
3577 bool
3578 vm_page_try_reserve_pages(vm_page_reservation* reservation, uint32 count,
3579 	int priority)
3580 {
3581 	if (count == 0) {
3582 		reservation->count = count;
3583 		return true;
3584 	}
3585 
3586 	uint32 remaining = reserve_pages(count, priority, true);
3587 	if (remaining == 0) {
3588 		TA(ReservePages(count));
3589 		reservation->count = count;
3590 		return true;
3591 	}
3592 
3593 	unreserve_pages(count - remaining);
3594 
3595 	return false;
3596 }
3597 
3598 
3599 vm_page *
3600 vm_page_allocate_page(vm_page_reservation* reservation, uint32 flags)
3601 {
3602 	uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3603 	ASSERT(pageState != PAGE_STATE_FREE);
3604 	ASSERT(pageState != PAGE_STATE_CLEAR);
3605 
3606 	ASSERT(reservation->count > 0);
3607 	reservation->count--;
3608 
3609 	VMPageQueue* queue;
3610 	VMPageQueue* otherQueue;
3611 
3612 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3613 		queue = &sClearPageQueue;
3614 		otherQueue = &sFreePageQueue;
3615 	} else {
3616 		queue = &sFreePageQueue;
3617 		otherQueue = &sClearPageQueue;
3618 	}
3619 
3620 	ReadLocker locker(sFreePageQueuesLock);
3621 
3622 	vm_page* page = queue->RemoveHeadUnlocked();
3623 	if (page == NULL) {
3624 		// if the primary queue was empty, grab the page from the
3625 		// secondary queue
3626 		page = otherQueue->RemoveHeadUnlocked();
3627 
3628 		if (page == NULL) {
3629 			// Unlikely, but possible: the page we have reserved has moved
3630 			// between the queues after we checked the first queue. Grab the
3631 			// write locker to make sure this doesn't happen again.
3632 			locker.Unlock();
3633 			WriteLocker writeLocker(sFreePageQueuesLock);
3634 
3635 			page = queue->RemoveHead();
3636 			if (page == NULL)
3637 				otherQueue->RemoveHead();
3638 
3639 			if (page == NULL) {
3640 				panic("Had reserved page, but there is none!");
3641 				return NULL;
3642 			}
3643 
3644 			// downgrade to read lock
3645 			locker.Lock();
3646 		}
3647 	}
3648 
3649 	if (page->CacheRef() != NULL)
3650 		panic("supposed to be free page %p has cache\n", page);
3651 
3652 	DEBUG_PAGE_ACCESS_START(page);
3653 
3654 	int oldPageState = page->State();
3655 	page->SetState(pageState);
3656 	page->busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3657 	page->usage_count = 0;
3658 	page->accessed = false;
3659 	page->modified = false;
3660 
3661 	locker.Unlock();
3662 
3663 	if (pageState < PAGE_STATE_FIRST_UNQUEUED)
3664 		sPageQueues[pageState].AppendUnlocked(page);
3665 
3666 	// clear the page, if we had to take it from the free queue and a clear
3667 	// page was requested
3668 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0 && oldPageState != PAGE_STATE_CLEAR)
3669 		clear_page(page);
3670 
3671 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3672 	page->allocation_tracking_info.Init(
3673 		TA(AllocatePage(page->physical_page_number)));
3674 #else
3675 	TA(AllocatePage(page->physical_page_number));
3676 #endif
3677 
3678 	return page;
3679 }
3680 
3681 
3682 static void
3683 allocate_page_run_cleanup(VMPageQueue::PageList& freePages,
3684 	VMPageQueue::PageList& clearPages)
3685 {
3686 	// Page lists are sorted, so remove tails before prepending to the respective queue.
3687 
3688 	while (vm_page* page = freePages.RemoveTail()) {
3689 		page->busy = false;
3690 		page->SetState(PAGE_STATE_FREE);
3691 		DEBUG_PAGE_ACCESS_END(page);
3692 		sFreePageQueue.PrependUnlocked(page);
3693 	}
3694 
3695 	while (vm_page* page = clearPages.RemoveTail()) {
3696 		page->busy = false;
3697 		page->SetState(PAGE_STATE_CLEAR);
3698 		DEBUG_PAGE_ACCESS_END(page);
3699 		sClearPageQueue.PrependUnlocked(page);
3700 	}
3701 
3702 	sFreePageCondition.NotifyAll();
3703 }
3704 
3705 
3706 /*!	Tries to allocate the a contiguous run of \a length pages starting at
3707 	index \a start.
3708 
3709 	The caller must have write-locked the free/clear page queues. The function
3710 	will unlock regardless of whether it succeeds or fails.
3711 
3712 	If the function fails, it cleans up after itself, i.e. it will free all
3713 	pages it managed to allocate.
3714 
3715 	\param start The start index (into \c sPages) of the run.
3716 	\param length The number of pages to allocate.
3717 	\param flags Page allocation flags. Encodes the state the function shall
3718 		set the allocated pages to, whether the pages shall be marked busy
3719 		(VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared
3720 		(VM_PAGE_ALLOC_CLEAR).
3721 	\param freeClearQueueLocker Locked WriteLocker for the free/clear page
3722 		queues in locked state. Will be unlocked by the function.
3723 	\return The index of the first page that could not be allocated. \a length
3724 		is returned when the function was successful.
3725 */
3726 static page_num_t
3727 allocate_page_run(page_num_t start, page_num_t length, uint32 flags,
3728 	WriteLocker& freeClearQueueLocker)
3729 {
3730 	uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3731 	ASSERT(pageState != PAGE_STATE_FREE);
3732 	ASSERT(pageState != PAGE_STATE_CLEAR);
3733 	ASSERT(start + length <= sNumPages);
3734 
3735 	// Pull the free/clear pages out of their respective queues. Cached pages
3736 	// are allocated later.
3737 	page_num_t cachedPages = 0;
3738 	VMPageQueue::PageList freePages;
3739 	VMPageQueue::PageList clearPages;
3740 	page_num_t i = 0;
3741 	for (; i < length; i++) {
3742 		bool pageAllocated = true;
3743 		bool noPage = false;
3744 		vm_page& page = sPages[start + i];
3745 		switch (page.State()) {
3746 			case PAGE_STATE_CLEAR:
3747 				DEBUG_PAGE_ACCESS_START(&page);
3748 				sClearPageQueue.Remove(&page);
3749 				clearPages.Add(&page);
3750 				break;
3751 			case PAGE_STATE_FREE:
3752 				DEBUG_PAGE_ACCESS_START(&page);
3753 				sFreePageQueue.Remove(&page);
3754 				freePages.Add(&page);
3755 				break;
3756 			case PAGE_STATE_CACHED:
3757 				// We allocate cached pages later.
3758 				cachedPages++;
3759 				pageAllocated = false;
3760 				break;
3761 
3762 			default:
3763 				// Probably a page was cached when our caller checked. Now it's
3764 				// gone and we have to abort.
3765 				noPage = true;
3766 				break;
3767 		}
3768 
3769 		if (noPage)
3770 			break;
3771 
3772 		if (pageAllocated) {
3773 			page.SetState(flags & VM_PAGE_ALLOC_STATE);
3774 			page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3775 			page.usage_count = 0;
3776 			page.accessed = false;
3777 			page.modified = false;
3778 		}
3779 	}
3780 
3781 	if (i < length) {
3782 		// failed to allocate a page -- free all that we've got
3783 		allocate_page_run_cleanup(freePages, clearPages);
3784 		return i;
3785 	}
3786 
3787 	freeClearQueueLocker.Unlock();
3788 
3789 	if (cachedPages > 0) {
3790 		// allocate the pages that weren't free but cached
3791 		page_num_t freedCachedPages = 0;
3792 		page_num_t nextIndex = start;
3793 		vm_page* freePage = freePages.Head();
3794 		vm_page* clearPage = clearPages.Head();
3795 		while (cachedPages > 0) {
3796 			// skip, if we've already got the page
3797 			if (freePage != NULL && size_t(freePage - sPages) == nextIndex) {
3798 				freePage = freePages.GetNext(freePage);
3799 				nextIndex++;
3800 				continue;
3801 			}
3802 			if (clearPage != NULL && size_t(clearPage - sPages) == nextIndex) {
3803 				clearPage = clearPages.GetNext(clearPage);
3804 				nextIndex++;
3805 				continue;
3806 			}
3807 
3808 			// free the page, if it is still cached
3809 			vm_page& page = sPages[nextIndex];
3810 			if (!free_cached_page(&page, false)) {
3811 				// TODO: if the page turns out to have been freed already,
3812 				// there would be no need to fail
3813 				break;
3814 			}
3815 
3816 			page.SetState(flags & VM_PAGE_ALLOC_STATE);
3817 			page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3818 			page.usage_count = 0;
3819 			page.accessed = false;
3820 			page.modified = false;
3821 
3822 			freePages.InsertBefore(freePage, &page);
3823 			freedCachedPages++;
3824 			cachedPages--;
3825 			nextIndex++;
3826 		}
3827 
3828 		// If we have freed cached pages, we need to balance things.
3829 		if (freedCachedPages > 0)
3830 			unreserve_pages(freedCachedPages);
3831 
3832 		if (nextIndex - start < length) {
3833 			// failed to allocate all cached pages -- free all that we've got
3834 			freeClearQueueLocker.Lock();
3835 			allocate_page_run_cleanup(freePages, clearPages);
3836 			freeClearQueueLocker.Unlock();
3837 
3838 			return nextIndex - start;
3839 		}
3840 	}
3841 
3842 	// clear pages, if requested
3843 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3844 		for (VMPageQueue::PageList::Iterator it = freePages.GetIterator();
3845 				vm_page* page = it.Next();) {
3846 			clear_page(page);
3847 		}
3848 	}
3849 
3850 	// add pages to target queue
3851 	if (pageState < PAGE_STATE_FIRST_UNQUEUED) {
3852 		freePages.MoveFrom(&clearPages);
3853 		sPageQueues[pageState].AppendUnlocked(freePages, length);
3854 	}
3855 
3856 	// Note: We don't unreserve the pages since we pulled them out of the
3857 	// free/clear queues without adjusting sUnreservedFreePages.
3858 
3859 #if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
3860 	AbstractTraceEntryWithStackTrace* traceEntry
3861 		= TA(AllocatePageRun(start, length));
3862 
3863 	for (page_num_t i = start; i < start + length; i++)
3864 		sPages[i].allocation_tracking_info.Init(traceEntry);
3865 #else
3866 	TA(AllocatePageRun(start, length));
3867 #endif
3868 
3869 	return length;
3870 }
3871 
3872 
3873 /*! Allocate a physically contiguous range of pages.
3874 
3875 	\param flags Page allocation flags. Encodes the state the function shall
3876 		set the allocated pages to, whether the pages shall be marked busy
3877 		(VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared
3878 		(VM_PAGE_ALLOC_CLEAR).
3879 	\param length The number of contiguous pages to allocate.
3880 	\param restrictions Restrictions to the physical addresses of the page run
3881 		to allocate, including \c low_address, the first acceptable physical
3882 		address where the page run may start, \c high_address, the last
3883 		acceptable physical address where the page run may end (i.e. it must
3884 		hold \code runStartAddress + length <= high_address \endcode),
3885 		\c alignment, the alignment of the page run start address, and
3886 		\c boundary, multiples of which the page run must not cross.
3887 		Values set to \c 0 are ignored.
3888 	\param priority The page reservation priority (as passed to
3889 		vm_page_reserve_pages()).
3890 	\return The first page of the allocated page run on success; \c NULL
3891 		when the allocation failed.
3892 */
3893 vm_page*
3894 vm_page_allocate_page_run(uint32 flags, page_num_t length,
3895 	const physical_address_restrictions* restrictions, int priority)
3896 {
3897 	// compute start and end page index
3898 	page_num_t requestedStart
3899 		= std::max(restrictions->low_address / B_PAGE_SIZE, sPhysicalPageOffset)
3900 			- sPhysicalPageOffset;
3901 	page_num_t start = requestedStart;
3902 	page_num_t end;
3903 	if (restrictions->high_address > 0) {
3904 		end = std::max(restrictions->high_address / B_PAGE_SIZE,
3905 				sPhysicalPageOffset)
3906 			- sPhysicalPageOffset;
3907 		end = std::min(end, sNumPages);
3908 	} else
3909 		end = sNumPages;
3910 
3911 	// compute alignment mask
3912 	page_num_t alignmentMask
3913 		= std::max(restrictions->alignment / B_PAGE_SIZE, (phys_addr_t)1) - 1;
3914 	ASSERT(((alignmentMask + 1) & alignmentMask) == 0);
3915 		// alignment must be a power of 2
3916 
3917 	// compute the boundary mask
3918 	uint32 boundaryMask = 0;
3919 	if (restrictions->boundary != 0) {
3920 		page_num_t boundary = restrictions->boundary / B_PAGE_SIZE;
3921 		// boundary must be a power of two and not less than alignment and
3922 		// length
3923 		ASSERT(((boundary - 1) & boundary) == 0);
3924 		ASSERT(boundary >= alignmentMask + 1);
3925 		ASSERT(boundary >= length);
3926 
3927 		boundaryMask = -boundary;
3928 	}
3929 
3930 	vm_page_reservation reservation;
3931 	vm_page_reserve_pages(&reservation, length, priority);
3932 
3933 	WriteLocker freeClearQueueLocker(sFreePageQueuesLock);
3934 
3935 	// First we try to get a run with free pages only. If that fails, we also
3936 	// consider cached pages. If there are only few free pages and many cached
3937 	// ones, the odds are that we won't find enough contiguous ones, so we skip
3938 	// the first iteration in this case.
3939 	int32 freePages = sUnreservedFreePages;
3940 	int useCached = freePages > 0 && (page_num_t)freePages > 2 * length ? 0 : 1;
3941 
3942 	for (;;) {
3943 		if (alignmentMask != 0 || boundaryMask != 0) {
3944 			page_num_t offsetStart = start + sPhysicalPageOffset;
3945 
3946 			// enforce alignment
3947 			if ((offsetStart & alignmentMask) != 0)
3948 				offsetStart = (offsetStart + alignmentMask) & ~alignmentMask;
3949 
3950 			// enforce boundary
3951 			if (boundaryMask != 0 && ((offsetStart ^ (offsetStart
3952 				+ length - 1)) & boundaryMask) != 0) {
3953 				offsetStart = (offsetStart + length - 1) & boundaryMask;
3954 			}
3955 
3956 			start = offsetStart - sPhysicalPageOffset;
3957 		}
3958 
3959 		if (start + length > end) {
3960 			if (useCached == 0) {
3961 				// The first iteration with free pages only was unsuccessful.
3962 				// Try again also considering cached pages.
3963 				useCached = 1;
3964 				start = requestedStart;
3965 				continue;
3966 			}
3967 
3968 			dprintf("vm_page_allocate_page_run(): Failed to allocate run of "
3969 				"length %" B_PRIuPHYSADDR " (%" B_PRIuPHYSADDR " %"
3970 				B_PRIuPHYSADDR ") in second iteration (align: %" B_PRIuPHYSADDR
3971 				" boundary: %" B_PRIuPHYSADDR ")!\n", length, requestedStart,
3972 				end, restrictions->alignment, restrictions->boundary);
3973 
3974 			freeClearQueueLocker.Unlock();
3975 			vm_page_unreserve_pages(&reservation);
3976 			return NULL;
3977 		}
3978 
3979 		bool foundRun = true;
3980 		page_num_t i;
3981 		for (i = 0; i < length; i++) {
3982 			uint32 pageState = sPages[start + i].State();
3983 			if (pageState != PAGE_STATE_FREE
3984 				&& pageState != PAGE_STATE_CLEAR
3985 				&& (pageState != PAGE_STATE_CACHED || useCached == 0)) {
3986 				foundRun = false;
3987 				break;
3988 			}
3989 		}
3990 
3991 		if (foundRun) {
3992 			i = allocate_page_run(start, length, flags, freeClearQueueLocker);
3993 			if (i == length) {
3994 				reservation.count = 0;
3995 				return &sPages[start];
3996 			}
3997 
3998 			// apparently a cached page couldn't be allocated -- skip it and
3999 			// continue
4000 			freeClearQueueLocker.Lock();
4001 		}
4002 
4003 		start += i + 1;
4004 	}
4005 }
4006 
4007 
4008 vm_page *
4009 vm_page_at_index(int32 index)
4010 {
4011 	return &sPages[index];
4012 }
4013 
4014 
4015 vm_page *
4016 vm_lookup_page(page_num_t pageNumber)
4017 {
4018 	if (pageNumber < sPhysicalPageOffset)
4019 		return NULL;
4020 
4021 	pageNumber -= sPhysicalPageOffset;
4022 	if (pageNumber >= sNumPages)
4023 		return NULL;
4024 
4025 	return &sPages[pageNumber];
4026 }
4027 
4028 
4029 bool
4030 vm_page_is_dummy(struct vm_page *page)
4031 {
4032 	return page < sPages || page >= sPages + sNumPages;
4033 }
4034 
4035 
4036 /*!	Free the page that belonged to a certain cache.
4037 	You can use vm_page_set_state() manually if you prefer, but only
4038 	if the page does not equal PAGE_STATE_MODIFIED.
4039 
4040 	\param cache The cache the page was previously owned by or NULL. The page
4041 		must have been removed from its cache before calling this method in
4042 		either case.
4043 	\param page The page to free.
4044 	\param reservation If not NULL, the page count of the reservation will be
4045 		incremented, thus allowing to allocate another page for the freed one at
4046 		a later time.
4047 */
4048 void
4049 vm_page_free_etc(VMCache* cache, vm_page* page,
4050 	vm_page_reservation* reservation)
4051 {
4052 	PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
4053 		&& page->State() != PAGE_STATE_CLEAR);
4054 
4055 	if (page->State() == PAGE_STATE_MODIFIED && cache->temporary)
4056 		atomic_add(&sModifiedTemporaryPages, -1);
4057 
4058 	free_page(page, false);
4059 	if (reservation == NULL)
4060 		unreserve_pages(1);
4061 }
4062 
4063 
4064 void
4065 vm_page_set_state(vm_page *page, int pageState)
4066 {
4067 	PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
4068 		&& page->State() != PAGE_STATE_CLEAR);
4069 
4070 	if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) {
4071 		free_page(page, pageState == PAGE_STATE_CLEAR);
4072 		unreserve_pages(1);
4073 	} else
4074 		set_page_state(page, pageState);
4075 }
4076 
4077 
4078 /*!	Moves a page to either the tail of the head of its current queue,
4079 	depending on \a tail.
4080 	The page must have a cache and the cache must be locked!
4081 */
4082 void
4083 vm_page_requeue(struct vm_page *page, bool tail)
4084 {
4085 	PAGE_ASSERT(page, page->Cache() != NULL);
4086 	page->Cache()->AssertLocked();
4087 	// DEBUG_PAGE_ACCESS_CHECK(page);
4088 		// TODO: This assertion cannot be satisfied by idle_scan_active_pages()
4089 		// when it requeues busy pages. The reason is that vm_soft_fault()
4090 		// (respectively fault_get_page()) and the file cache keep newly
4091 		// allocated pages accessed while they are reading them from disk. It
4092 		// would probably be better to change that code and reenable this
4093 		// check.
4094 
4095 	VMPageQueue *queue = NULL;
4096 
4097 	switch (page->State()) {
4098 		case PAGE_STATE_ACTIVE:
4099 			queue = &sActivePageQueue;
4100 			break;
4101 		case PAGE_STATE_INACTIVE:
4102 			queue = &sInactivePageQueue;
4103 			break;
4104 		case PAGE_STATE_MODIFIED:
4105 			queue = &sModifiedPageQueue;
4106 			break;
4107 		case PAGE_STATE_CACHED:
4108 			queue = &sCachedPageQueue;
4109 			break;
4110 		case PAGE_STATE_FREE:
4111 		case PAGE_STATE_CLEAR:
4112 			panic("vm_page_requeue() called for free/clear page %p", page);
4113 			return;
4114 		case PAGE_STATE_WIRED:
4115 		case PAGE_STATE_UNUSED:
4116 			return;
4117 		default:
4118 			panic("vm_page_touch: vm_page %p in invalid state %d\n",
4119 				page, page->State());
4120 			break;
4121 	}
4122 
4123 	queue->RequeueUnlocked(page, tail);
4124 }
4125 
4126 
4127 page_num_t
4128 vm_page_num_pages(void)
4129 {
4130 	return sNumPages - sNonExistingPages;
4131 }
4132 
4133 
4134 /*! There is a subtle distinction between the page counts returned by
4135 	this function and vm_page_num_free_pages():
4136 	The latter returns the number of pages that are completely uncommitted,
4137 	whereas this one returns the number of pages that are available for
4138 	use by being reclaimed as well (IOW it factors in things like cache pages
4139 	as available).
4140 */
4141 page_num_t
4142 vm_page_num_available_pages(void)
4143 {
4144 	return vm_available_memory() / B_PAGE_SIZE;
4145 }
4146 
4147 
4148 page_num_t
4149 vm_page_num_free_pages(void)
4150 {
4151 	int32 count = sUnreservedFreePages + sCachedPageQueue.Count();
4152 	return count > 0 ? count : 0;
4153 }
4154 
4155 
4156 page_num_t
4157 vm_page_num_unused_pages(void)
4158 {
4159 	int32 count = sUnreservedFreePages;
4160 	return count > 0 ? count : 0;
4161 }
4162 
4163 
4164 void
4165 vm_page_get_stats(system_info *info)
4166 {
4167 	// Note: there's no locking protecting any of the queues or counters here,
4168 	// so we run the risk of getting bogus values when evaluating them
4169 	// throughout this function. As these stats are for informational purposes
4170 	// only, it is not really worth introducing such locking. Therefore we just
4171 	// ensure that we don't under- or overflow any of the values.
4172 
4173 	// The pages used for the block cache buffers. Those should not be counted
4174 	// as used but as cached pages.
4175 	// TODO: We should subtract the blocks that are in use ATM, since those
4176 	// can't really be freed in a low memory situation.
4177 	page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE;
4178 	info->block_cache_pages = blockCachePages;
4179 
4180 	// Non-temporary modified pages are special as they represent pages that
4181 	// can be written back, so they could be freed if necessary, for us
4182 	// basically making them into cached pages with a higher overhead. The
4183 	// modified queue count is therefore split into temporary and non-temporary
4184 	// counts that are then added to the corresponding number.
4185 	page_num_t modifiedNonTemporaryPages
4186 		= (sModifiedPageQueue.Count() - sModifiedTemporaryPages);
4187 
4188 	info->max_pages = vm_page_num_pages();
4189 	info->cached_pages = sCachedPageQueue.Count() + modifiedNonTemporaryPages
4190 		+ blockCachePages;
4191 
4192 	// max_pages is composed of:
4193 	//	active + inactive + unused + wired + modified + cached + free + clear
4194 	// So taking out the cached (including modified non-temporary), free and
4195 	// clear ones leaves us with all used pages.
4196 	uint32 subtractPages = info->cached_pages + sFreePageQueue.Count()
4197 		+ sClearPageQueue.Count();
4198 	info->used_pages = subtractPages > info->max_pages
4199 		? 0 : info->max_pages - subtractPages;
4200 
4201 	if (info->used_pages + info->cached_pages > info->max_pages) {
4202 		// Something was shuffled around while we were summing up the counts.
4203 		// Make the values sane, preferring the worse case of more used pages.
4204 		info->cached_pages = info->max_pages - info->used_pages;
4205 	}
4206 
4207 	info->page_faults = vm_num_page_faults();
4208 	info->ignored_pages = sIgnoredPages;
4209 
4210 	// TODO: We don't consider pages used for page directories/tables yet.
4211 }
4212 
4213 
4214 /*!	Returns the greatest address within the last page of accessible physical
4215 	memory.
4216 	The value is inclusive, i.e. in case of a 32 bit phys_addr_t 0xffffffff
4217 	means the that the last page ends at exactly 4 GB.
4218 */
4219 phys_addr_t
4220 vm_page_max_address()
4221 {
4222 	return ((phys_addr_t)sPhysicalPageOffset + sNumPages) * B_PAGE_SIZE - 1;
4223 }
4224 
4225 
4226 RANGE_MARKER_FUNCTION_END(vm_page)
4227