xref: /haiku/src/system/kernel/vm/vm_page.cpp (revision 97901ec593ec4dd50ac115c1c35a6d72f6e489a5)
1 /*
2  * Copyright 2010, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <string.h>
12 #include <stdlib.h>
13 
14 #include <algorithm>
15 
16 #include <KernelExport.h>
17 #include <OS.h>
18 
19 #include <AutoDeleter.h>
20 
21 #include <arch/cpu.h>
22 #include <arch/vm_translation_map.h>
23 #include <block_cache.h>
24 #include <boot/kernel_args.h>
25 #include <condition_variable.h>
26 #include <heap.h>
27 #include <kernel.h>
28 #include <low_resource_manager.h>
29 #include <thread.h>
30 #include <tracing.h>
31 #include <util/AutoLock.h>
32 #include <vfs.h>
33 #include <vm/vm.h>
34 #include <vm/vm_priv.h>
35 #include <vm/vm_page.h>
36 #include <vm/VMAddressSpace.h>
37 #include <vm/VMArea.h>
38 #include <vm/VMCache.h>
39 
40 #include "IORequest.h"
41 #include "PageCacheLocker.h"
42 #include "VMAnonymousCache.h"
43 #include "VMPageQueue.h"
44 
45 
46 //#define TRACE_VM_PAGE
47 #ifdef TRACE_VM_PAGE
48 #	define TRACE(x) dprintf x
49 #else
50 #	define TRACE(x) ;
51 #endif
52 
53 //#define TRACE_VM_DAEMONS
54 #ifdef TRACE_VM_DAEMONS
55 #define TRACE_DAEMON(x...) dprintf(x)
56 #else
57 #define TRACE_DAEMON(x...) do {} while (false)
58 #endif
59 
60 //#define TRACK_PAGE_USAGE_STATS	1
61 
62 #define PAGE_ASSERT(page, condition)	\
63 	ASSERT_PRINT((condition), "page: %p", (page))
64 
65 #define SCRUB_SIZE 16
66 	// this many pages will be cleared at once in the page scrubber thread
67 
68 #define MAX_PAGE_WRITER_IO_PRIORITY				B_URGENT_DISPLAY_PRIORITY
69 	// maximum I/O priority of the page writer
70 #define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD	10000
71 	// the maximum I/O priority shall be reached when this many pages need to
72 	// be written
73 
74 
75 // The page reserve an allocation of the certain priority must not touch.
76 static const size_t kPageReserveForPriority[] = {
77 	VM_PAGE_RESERVE_USER,		// user
78 	VM_PAGE_RESERVE_SYSTEM,		// system
79 	0							// VIP
80 };
81 
82 // Minimum number of free pages the page daemon will try to achieve.
83 static uint32 sFreePagesTarget;
84 static uint32 sFreeOrCachedPagesTarget;
85 static uint32 sInactivePagesTarget;
86 
87 // Wait interval between page daemon runs.
88 static const bigtime_t kIdleScanWaitInterval = 1000000LL;	// 1 sec
89 static const bigtime_t kBusyScanWaitInterval = 500000LL;	// 0.5 sec
90 
91 // Number of idle runs after which we want to have processed the full active
92 // queue.
93 static const uint32 kIdleRunsForFullQueue = 20;
94 
95 // Maximum limit for the vm_page::usage_count.
96 static const int32 kPageUsageMax = 64;
97 // vm_page::usage_count buff an accessed page receives in a scan.
98 static const int32 kPageUsageAdvance = 3;
99 // vm_page::usage_count debuff an unaccessed page receives in a scan.
100 static const int32 kPageUsageDecline = 1;
101 
102 int32 gMappedPagesCount;
103 
104 static VMPageQueue sPageQueues[PAGE_STATE_COUNT];
105 
106 static VMPageQueue& sFreePageQueue = sPageQueues[PAGE_STATE_FREE];
107 static VMPageQueue& sClearPageQueue = sPageQueues[PAGE_STATE_CLEAR];
108 static VMPageQueue& sModifiedPageQueue = sPageQueues[PAGE_STATE_MODIFIED];
109 static VMPageQueue& sInactivePageQueue = sPageQueues[PAGE_STATE_INACTIVE];
110 static VMPageQueue& sActivePageQueue = sPageQueues[PAGE_STATE_ACTIVE];
111 static VMPageQueue& sCachedPageQueue = sPageQueues[PAGE_STATE_CACHED];
112 
113 static vm_page *sPages;
114 static addr_t sPhysicalPageOffset;
115 static size_t sNumPages;
116 static vint32 sUnreservedFreePages;
117 static vint32 sUnsatisfiedPageReservations;
118 static vint32 sModifiedTemporaryPages;
119 
120 static ConditionVariable sFreePageCondition;
121 static mutex sPageDeficitLock = MUTEX_INITIALIZER("page deficit");
122 
123 static rw_lock sFreePageQueuesLock
124 	= RW_LOCK_INITIALIZER("free/clear page queues");
125 
126 #ifdef TRACK_PAGE_USAGE_STATS
127 static page_num_t sPageUsageArrays[512];
128 static page_num_t* sPageUsage = sPageUsageArrays;
129 static page_num_t sPageUsagePageCount;
130 static page_num_t* sNextPageUsage = sPageUsageArrays + 256;
131 static page_num_t sNextPageUsagePageCount;
132 #endif
133 
134 
135 struct page_stats {
136 	int32	totalFreePages;
137 	int32	unsatisfiedReservations;
138 	int32	cachedPages;
139 };
140 
141 
142 struct PageReservationWaiter
143 		: public DoublyLinkedListLinkImpl<PageReservationWaiter> {
144 	struct thread*	thread;
145 	uint32			dontTouch;		// reserve not to touch
146 	uint32			missing;		// pages missing for the reservation
147 	int32			threadPriority;
148 
149 	bool operator<(const PageReservationWaiter& other) const
150 	{
151 		// Implies an order by descending VM priority (ascending dontTouch)
152 		// and (secondarily) descending thread priority.
153 		if (dontTouch != other.dontTouch)
154 			return dontTouch < other.dontTouch;
155 		return threadPriority > other.threadPriority;
156 	}
157 };
158 
159 typedef DoublyLinkedList<PageReservationWaiter> PageReservationWaiterList;
160 static PageReservationWaiterList sPageReservationWaiters;
161 
162 
163 struct DaemonCondition {
164 	void Init(const char* name)
165 	{
166 		mutex_init(&fLock, "daemon condition");
167 		fCondition.Init(this, name);
168 		fActivated = false;
169 	}
170 
171 	bool Lock()
172 	{
173 		return mutex_lock(&fLock) == B_OK;
174 	}
175 
176 	void Unlock()
177 	{
178 		mutex_unlock(&fLock);
179 	}
180 
181 	bool Wait(bigtime_t timeout, bool clearActivated)
182 	{
183 		MutexLocker locker(fLock);
184 		if (clearActivated)
185 			fActivated = false;
186 		else if (fActivated)
187 			return true;
188 
189 		ConditionVariableEntry entry;
190 		fCondition.Add(&entry);
191 
192 		locker.Unlock();
193 
194 		return entry.Wait(B_RELATIVE_TIMEOUT, timeout) == B_OK;
195 	}
196 
197 	void WakeUp()
198 	{
199 		if (fActivated)
200 			return;
201 
202 		MutexLocker locker(fLock);
203 		fActivated = true;
204 		fCondition.NotifyOne();
205 	}
206 
207 	void ClearActivated()
208 	{
209 		MutexLocker locker(fLock);
210 		fActivated = false;
211 	}
212 
213 private:
214 	mutex				fLock;
215 	ConditionVariable	fCondition;
216 	bool				fActivated;
217 };
218 
219 
220 static DaemonCondition sPageWriterCondition;
221 static DaemonCondition sPageDaemonCondition;
222 
223 
224 #if PAGE_ALLOCATION_TRACING
225 
226 namespace PageAllocationTracing {
227 
228 class ReservePages : public AbstractTraceEntry {
229 	public:
230 		ReservePages(uint32 count)
231 			:
232 			fCount(count)
233 		{
234 			Initialized();
235 		}
236 
237 		virtual void AddDump(TraceOutput& out)
238 		{
239 			out.Print("page reserve:   %lu", fCount);
240 		}
241 
242 	private:
243 		uint32		fCount;
244 };
245 
246 
247 class UnreservePages : public AbstractTraceEntry {
248 	public:
249 		UnreservePages(uint32 count)
250 			:
251 			fCount(count)
252 		{
253 			Initialized();
254 		}
255 
256 		virtual void AddDump(TraceOutput& out)
257 		{
258 			out.Print("page unreserve: %lu", fCount);
259 		}
260 
261 	private:
262 		uint32		fCount;
263 };
264 
265 
266 class AllocatePage : public AbstractTraceEntry {
267 	public:
268 		AllocatePage()
269 		{
270 			Initialized();
271 		}
272 
273 		virtual void AddDump(TraceOutput& out)
274 		{
275 			out.Print("page alloc");
276 		}
277 };
278 
279 
280 class AllocatePageRun : public AbstractTraceEntry {
281 	public:
282 		AllocatePageRun(uint32 length)
283 			:
284 			fLength(length)
285 		{
286 			Initialized();
287 		}
288 
289 		virtual void AddDump(TraceOutput& out)
290 		{
291 			out.Print("page alloc run: length: %ld", fLength);
292 		}
293 
294 	private:
295 		uint32		fLength;
296 };
297 
298 
299 class FreePage : public AbstractTraceEntry {
300 	public:
301 		FreePage()
302 		{
303 			Initialized();
304 		}
305 
306 		virtual void AddDump(TraceOutput& out)
307 		{
308 			out.Print("page free");
309 		}
310 };
311 
312 
313 class ScrubbingPages : public AbstractTraceEntry {
314 	public:
315 		ScrubbingPages(uint32 count)
316 			:
317 			fCount(count)
318 		{
319 			Initialized();
320 		}
321 
322 		virtual void AddDump(TraceOutput& out)
323 		{
324 			out.Print("page scrubbing: %lu", fCount);
325 		}
326 
327 	private:
328 		uint32		fCount;
329 };
330 
331 
332 class ScrubbedPages : public AbstractTraceEntry {
333 	public:
334 		ScrubbedPages(uint32 count)
335 			:
336 			fCount(count)
337 		{
338 			Initialized();
339 		}
340 
341 		virtual void AddDump(TraceOutput& out)
342 		{
343 			out.Print("page scrubbed:  %lu", fCount);
344 		}
345 
346 	private:
347 		uint32		fCount;
348 };
349 
350 
351 class StolenPage : public AbstractTraceEntry {
352 	public:
353 		StolenPage()
354 		{
355 			Initialized();
356 		}
357 
358 		virtual void AddDump(TraceOutput& out)
359 		{
360 			out.Print("page stolen");
361 		}
362 };
363 
364 }	// namespace PageAllocationTracing
365 
366 #	define TA(x)	new(std::nothrow) PageAllocationTracing::x
367 
368 #else
369 #	define TA(x)
370 #endif	// PAGE_ALLOCATION_TRACING
371 
372 
373 #if PAGE_DAEMON_TRACING
374 
375 namespace PageDaemonTracing {
376 
377 class ActivatePage : public AbstractTraceEntry {
378 	public:
379 		ActivatePage(vm_page* page)
380 			:
381 			fCache(page->cache),
382 			fPage(page)
383 		{
384 			Initialized();
385 		}
386 
387 		virtual void AddDump(TraceOutput& out)
388 		{
389 			out.Print("page activated:   %p, cache: %p", fPage, fCache);
390 		}
391 
392 	private:
393 		VMCache*	fCache;
394 		vm_page*	fPage;
395 };
396 
397 
398 class DeactivatePage : public AbstractTraceEntry {
399 	public:
400 		DeactivatePage(vm_page* page)
401 			:
402 			fCache(page->cache),
403 			fPage(page)
404 		{
405 			Initialized();
406 		}
407 
408 		virtual void AddDump(TraceOutput& out)
409 		{
410 			out.Print("page deactivated: %p, cache: %p", fPage, fCache);
411 		}
412 
413 	private:
414 		VMCache*	fCache;
415 		vm_page*	fPage;
416 };
417 
418 
419 class FreedPageSwap : public AbstractTraceEntry {
420 	public:
421 		FreedPageSwap(vm_page* page)
422 			:
423 			fCache(page->cache),
424 			fPage(page)
425 		{
426 			Initialized();
427 		}
428 
429 		virtual void AddDump(TraceOutput& out)
430 		{
431 			out.Print("page swap freed:  %p, cache: %p", fPage, fCache);
432 		}
433 
434 	private:
435 		VMCache*	fCache;
436 		vm_page*	fPage;
437 };
438 
439 }	// namespace PageDaemonTracing
440 
441 #	define TD(x)	new(std::nothrow) PageDaemonTracing::x
442 
443 #else
444 #	define TD(x)
445 #endif	// PAGE_DAEMON_TRACING
446 
447 
448 #if PAGE_WRITER_TRACING
449 
450 namespace PageWriterTracing {
451 
452 class WritePage : public AbstractTraceEntry {
453 	public:
454 		WritePage(vm_page* page)
455 			:
456 			fCache(page->Cache()),
457 			fPage(page)
458 		{
459 			Initialized();
460 		}
461 
462 		virtual void AddDump(TraceOutput& out)
463 		{
464 			out.Print("page write: %p, cache: %p", fPage, fCache);
465 		}
466 
467 	private:
468 		VMCache*	fCache;
469 		vm_page*	fPage;
470 };
471 
472 }	// namespace PageWriterTracing
473 
474 #	define TPW(x)	new(std::nothrow) PageWriterTracing::x
475 
476 #else
477 #	define TPW(x)
478 #endif	// PAGE_WRITER_TRACING
479 
480 
481 #if PAGE_STATE_TRACING
482 
483 namespace PageStateTracing {
484 
485 class SetPageState : public AbstractTraceEntry {
486 	public:
487 		SetPageState(vm_page* page, uint8 newState)
488 			:
489 			fPage(page),
490 			fOldState(page->State()),
491 			fNewState(newState),
492 			fBusy(page->busy),
493 			fWired(page->wired_count > 0),
494 			fMapped(!page->mappings.IsEmpty()),
495 			fAccessed(page->accessed),
496 			fModified(page->modified)
497 		{
498 #if PAGE_STATE_TRACING_STACK_TRACE
499 			fStackTrace = capture_tracing_stack_trace(
500 				PAGE_STATE_TRACING_STACK_TRACE, 0, true);
501 				// Don't capture userland stack trace to avoid potential
502 				// deadlocks.
503 #endif
504 			Initialized();
505 		}
506 
507 #if PAGE_STATE_TRACING_STACK_TRACE
508 		virtual void DumpStackTrace(TraceOutput& out)
509 		{
510 			out.PrintStackTrace(fStackTrace);
511 		}
512 #endif
513 
514 		virtual void AddDump(TraceOutput& out)
515 		{
516 			out.Print("page set state: %p (%c%c%c%c%c): %s -> %s", fPage,
517 				fBusy ? 'b' : '-',
518 				fWired ? 'w' : '-',
519 				fMapped ? 'm' : '-',
520 				fAccessed ? 'a' : '-',
521 				fModified ? 'm' : '-',
522 				page_state_to_string(fOldState),
523 				page_state_to_string(fNewState));
524 		}
525 
526 	private:
527 		vm_page*	fPage;
528 #if PAGE_STATE_TRACING_STACK_TRACE
529 		tracing_stack_trace* fStackTrace;
530 #endif
531 		uint8		fOldState;
532 		uint8		fNewState;
533 		bool		fBusy : 1;
534 		bool		fWired : 1;
535 		bool		fMapped : 1;
536 		bool		fAccessed : 1;
537 		bool		fModified : 1;
538 };
539 
540 }	// namespace PageStateTracing
541 
542 #	define TPS(x)	new(std::nothrow) PageStateTracing::x
543 
544 #else
545 #	define TPS(x)
546 #endif	// PAGE_STATE_TRACING
547 
548 
549 static int
550 find_page(int argc, char **argv)
551 {
552 	struct vm_page *page;
553 	addr_t address;
554 	int32 index = 1;
555 	int i;
556 
557 	struct {
558 		const char*	name;
559 		VMPageQueue*	queue;
560 	} pageQueueInfos[] = {
561 		{ "free",		&sFreePageQueue },
562 		{ "clear",		&sClearPageQueue },
563 		{ "modified",	&sModifiedPageQueue },
564 		{ "active",		&sActivePageQueue },
565 		{ "inactive",	&sInactivePageQueue },
566 		{ "cached",		&sCachedPageQueue },
567 		{ NULL, NULL }
568 	};
569 
570 	if (argc < 2
571 		|| strlen(argv[index]) <= 2
572 		|| argv[index][0] != '0'
573 		|| argv[index][1] != 'x') {
574 		kprintf("usage: find_page <address>\n");
575 		return 0;
576 	}
577 
578 	address = strtoul(argv[index], NULL, 0);
579 	page = (vm_page*)address;
580 
581 	for (i = 0; pageQueueInfos[i].name; i++) {
582 		VMPageQueue::Iterator it = pageQueueInfos[i].queue->GetIterator();
583 		while (vm_page* p = it.Next()) {
584 			if (p == page) {
585 				kprintf("found page %p in queue %p (%s)\n", page,
586 					pageQueueInfos[i].queue, pageQueueInfos[i].name);
587 				return 0;
588 			}
589 		}
590 	}
591 
592 	kprintf("page %p isn't in any queue\n", page);
593 
594 	return 0;
595 }
596 
597 
598 const char *
599 page_state_to_string(int state)
600 {
601 	switch(state) {
602 		case PAGE_STATE_ACTIVE:
603 			return "active";
604 		case PAGE_STATE_INACTIVE:
605 			return "inactive";
606 		case PAGE_STATE_MODIFIED:
607 			return "modified";
608 		case PAGE_STATE_CACHED:
609 			return "cached";
610 		case PAGE_STATE_FREE:
611 			return "free";
612 		case PAGE_STATE_CLEAR:
613 			return "clear";
614 		case PAGE_STATE_WIRED:
615 			return "wired";
616 		case PAGE_STATE_UNUSED:
617 			return "unused";
618 		default:
619 			return "unknown";
620 	}
621 }
622 
623 
624 static int
625 dump_page(int argc, char **argv)
626 {
627 	bool addressIsPointer = true;
628 	bool physical = false;
629 	bool searchMappings = false;
630 	int32 index = 1;
631 
632 	while (index < argc) {
633 		if (argv[index][0] != '-')
634 			break;
635 
636 		if (!strcmp(argv[index], "-p")) {
637 			addressIsPointer = false;
638 			physical = true;
639 		} else if (!strcmp(argv[index], "-v")) {
640 			addressIsPointer = false;
641 		} else if (!strcmp(argv[index], "-m")) {
642 			searchMappings = true;
643 		} else {
644 			print_debugger_command_usage(argv[0]);
645 			return 0;
646 		}
647 
648 		index++;
649 	}
650 
651 	if (index + 1 != argc) {
652 		print_debugger_command_usage(argv[0]);
653 		return 0;
654 	}
655 
656 	uint64 value;
657 	if (!evaluate_debug_expression(argv[index], &value, false))
658 		return 0;
659 
660 	addr_t pageAddress = (addr_t)value;
661 	struct vm_page* page;
662 
663 	if (addressIsPointer) {
664 		page = (struct vm_page *)pageAddress;
665 	} else {
666 		if (!physical) {
667 			VMAddressSpace *addressSpace = VMAddressSpace::Kernel();
668 
669 			if (debug_get_debugged_thread()->team->address_space != NULL)
670 				addressSpace = debug_get_debugged_thread()->team->address_space;
671 
672 			uint32 flags = 0;
673 			if (addressSpace->TranslationMap()->QueryInterrupt(pageAddress,
674 					&pageAddress, &flags) != B_OK
675 				|| (flags & PAGE_PRESENT) == 0) {
676 				kprintf("Virtual address not mapped to a physical page in this "
677 					"address space.\n");
678 				return 0;
679 			}
680 		}
681 
682 		page = vm_lookup_page(pageAddress / B_PAGE_SIZE);
683 	}
684 
685 	kprintf("PAGE: %p\n", page);
686 	kprintf("queue_next,prev: %p, %p\n", page->queue_link.next,
687 		page->queue_link.previous);
688 	kprintf("physical_number: %#lx\n", page->physical_page_number);
689 	kprintf("cache:           %p\n", page->Cache());
690 	kprintf("cache_offset:    %ld\n", page->cache_offset);
691 	kprintf("cache_next:      %p\n", page->cache_next);
692 	kprintf("state:           %s\n", page_state_to_string(page->State()));
693 	kprintf("wired_count:     %d\n", page->wired_count);
694 	kprintf("usage_count:     %d\n", page->usage_count);
695 	kprintf("busy:            %d\n", page->busy);
696 	kprintf("busy_writing:    %d\n", page->busy_writing);
697 	kprintf("accessed:        %d\n", page->accessed);
698 	kprintf("modified:        %d\n", page->modified);
699 	#if DEBUG_PAGE_QUEUE
700 		kprintf("queue:           %p\n", page->queue);
701 	#endif
702 	#if DEBUG_PAGE_ACCESS
703 		kprintf("accessor:        %" B_PRId32 "\n", page->accessing_thread);
704 	#endif
705 	kprintf("area mappings:\n");
706 
707 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
708 	vm_page_mapping *mapping;
709 	while ((mapping = iterator.Next()) != NULL) {
710 		kprintf("  %p (%" B_PRId32 ")\n", mapping->area, mapping->area->id);
711 		mapping = mapping->page_link.next;
712 	}
713 
714 	if (searchMappings) {
715 		kprintf("all mappings:\n");
716 		VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
717 		while (addressSpace != NULL) {
718 			size_t pageCount = addressSpace->Size() / B_PAGE_SIZE;
719 			for (addr_t address = addressSpace->Base(); pageCount != 0;
720 					address += B_PAGE_SIZE, pageCount--) {
721 				addr_t physicalAddress;
722 				uint32 flags = 0;
723 				if (addressSpace->TranslationMap()->QueryInterrupt(address,
724 						&physicalAddress, &flags) == B_OK
725 					&& (flags & PAGE_PRESENT) != 0
726 					&& physicalAddress / B_PAGE_SIZE
727 						== page->physical_page_number) {
728 					VMArea* area = addressSpace->LookupArea(address);
729 					kprintf("  aspace %ld, area %ld: %#" B_PRIxADDR
730 						" (%c%c%s%s)\n", addressSpace->ID(),
731 						area != NULL ? area->id : -1, address,
732 						(flags & B_KERNEL_READ_AREA) != 0 ? 'r' : '-',
733 						(flags & B_KERNEL_WRITE_AREA) != 0 ? 'w' : '-',
734 						(flags & PAGE_MODIFIED) != 0 ? " modified" : "",
735 						(flags & PAGE_ACCESSED) != 0 ? " accessed" : "");
736 				}
737 			}
738 			addressSpace = VMAddressSpace::DebugNext(addressSpace);
739 		}
740 	}
741 
742 	set_debug_variable("_cache", (addr_t)page->Cache());
743 	#if DEBUG_PAGE_ACCESS
744 		set_debug_variable("_accessor", page->accessing_thread);
745 	#endif
746 
747 	return 0;
748 }
749 
750 
751 static int
752 dump_page_queue(int argc, char **argv)
753 {
754 	struct VMPageQueue *queue;
755 
756 	if (argc < 2) {
757 		kprintf("usage: page_queue <address/name> [list]\n");
758 		return 0;
759 	}
760 
761 	if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x')
762 		queue = (VMPageQueue*)strtoul(argv[1], NULL, 16);
763 	if (!strcmp(argv[1], "free"))
764 		queue = &sFreePageQueue;
765 	else if (!strcmp(argv[1], "clear"))
766 		queue = &sClearPageQueue;
767 	else if (!strcmp(argv[1], "modified"))
768 		queue = &sModifiedPageQueue;
769 	else if (!strcmp(argv[1], "active"))
770 		queue = &sActivePageQueue;
771 	else if (!strcmp(argv[1], "inactive"))
772 		queue = &sInactivePageQueue;
773 	else if (!strcmp(argv[1], "cached"))
774 		queue = &sCachedPageQueue;
775 	else {
776 		kprintf("page_queue: unknown queue \"%s\".\n", argv[1]);
777 		return 0;
778 	}
779 
780 	kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %ld\n",
781 		queue, queue->Head(), queue->Tail(), queue->Count());
782 
783 	if (argc == 3) {
784 		struct vm_page *page = queue->Head();
785 		const char *type = "none";
786 		int i;
787 
788 		if (page->Cache() != NULL) {
789 			switch (page->Cache()->type) {
790 				case CACHE_TYPE_RAM:
791 					type = "RAM";
792 					break;
793 				case CACHE_TYPE_DEVICE:
794 					type = "device";
795 					break;
796 				case CACHE_TYPE_VNODE:
797 					type = "vnode";
798 					break;
799 				case CACHE_TYPE_NULL:
800 					type = "null";
801 					break;
802 				default:
803 					type = "???";
804 					break;
805 			}
806 		}
807 
808 		kprintf("page        cache       type       state  wired  usage\n");
809 		for (i = 0; page; i++, page = queue->Next(page)) {
810 			kprintf("%p  %p  %-7s %8s  %5d  %5d\n", page, page->Cache(),
811 				type, page_state_to_string(page->State()),
812 				page->wired_count, page->usage_count);
813 		}
814 	}
815 	return 0;
816 }
817 
818 
819 static int
820 dump_page_stats(int argc, char **argv)
821 {
822 	page_num_t swappableModified = 0;
823 	page_num_t swappableModifiedInactive = 0;
824 
825 	size_t counter[8];
826 	size_t busyCounter[8];
827 	memset(counter, 0, sizeof(counter));
828 	memset(busyCounter, 0, sizeof(busyCounter));
829 
830 	struct page_run {
831 		page_num_t	start;
832 		page_num_t	end;
833 
834 		page_num_t Length() const	{ return end - start; }
835 	};
836 
837 	page_run currentFreeRun = { 0, 0 };
838 	page_run currentCachedRun = { 0, 0 };
839 	page_run longestFreeRun = { 0, 0 };
840 	page_run longestCachedRun = { 0, 0 };
841 
842 	for (addr_t i = 0; i < sNumPages; i++) {
843 		if (sPages[i].State() > 7)
844 			panic("page %li at %p has invalid state!\n", i, &sPages[i]);
845 
846 		uint32 pageState = sPages[i].State();
847 
848 		counter[pageState]++;
849 		if (sPages[i].busy)
850 			busyCounter[pageState]++;
851 
852 		if (pageState == PAGE_STATE_MODIFIED
853 			&& sPages[i].Cache() != NULL
854 			&& sPages[i].Cache()->temporary && sPages[i].wired_count == 0) {
855 			swappableModified++;
856 			if (sPages[i].usage_count == 0)
857 				swappableModifiedInactive++;
858 		}
859 
860 		// track free and cached pages runs
861 		if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR) {
862 			currentFreeRun.end = i + 1;
863 			currentCachedRun.end = i + 1;
864 		} else {
865 			if (currentFreeRun.Length() > longestFreeRun.Length())
866 				longestFreeRun = currentFreeRun;
867 			currentFreeRun.start = currentFreeRun.end = i + 1;
868 
869 			if (pageState == PAGE_STATE_CACHED) {
870 				currentCachedRun.end = i + 1;
871 			} else {
872 				if (currentCachedRun.Length() > longestCachedRun.Length())
873 					longestCachedRun = currentCachedRun;
874 				currentCachedRun.start = currentCachedRun.end = i + 1;
875 			}
876 		}
877 	}
878 
879 	kprintf("page stats:\n");
880 	kprintf("total: %lu\n", sNumPages);
881 
882 	kprintf("active: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
883 		counter[PAGE_STATE_ACTIVE], busyCounter[PAGE_STATE_ACTIVE]);
884 	kprintf("inactive: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
885 		counter[PAGE_STATE_INACTIVE], busyCounter[PAGE_STATE_INACTIVE]);
886 	kprintf("cached: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
887 		counter[PAGE_STATE_CACHED], busyCounter[PAGE_STATE_CACHED]);
888 	kprintf("unused: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
889 		counter[PAGE_STATE_UNUSED], busyCounter[PAGE_STATE_UNUSED]);
890 	kprintf("wired: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
891 		counter[PAGE_STATE_WIRED], busyCounter[PAGE_STATE_WIRED]);
892 	kprintf("modified: %" B_PRIuSIZE " (busy: %" B_PRIuSIZE ")\n",
893 		counter[PAGE_STATE_MODIFIED], busyCounter[PAGE_STATE_MODIFIED]);
894 	kprintf("free: %" B_PRIuSIZE "\n", counter[PAGE_STATE_FREE]);
895 	kprintf("clear: %" B_PRIuSIZE "\n", counter[PAGE_STATE_CLEAR]);
896 
897 	kprintf("unreserved free pages: %" B_PRId32 "\n", sUnreservedFreePages);
898 	kprintf("unsatisfied page reservations: %" B_PRId32 "\n",
899 		sUnsatisfiedPageReservations);
900 	kprintf("mapped pages: %lu\n", gMappedPagesCount);
901 	kprintf("longest free pages run: %" B_PRIuSIZE " pages (at %" B_PRIuSIZE
902 		")\n", longestFreeRun.Length(),
903 		sPages[longestFreeRun.start].physical_page_number);
904 	kprintf("longest free/cached pages run: %" B_PRIuSIZE " pages (at %"
905 		B_PRIuSIZE ")\n", longestCachedRun.Length(),
906 		sPages[longestCachedRun.start].physical_page_number);
907 
908 	kprintf("waiting threads:\n");
909 	for (PageReservationWaiterList::Iterator it
910 			= sPageReservationWaiters.GetIterator();
911 		PageReservationWaiter* waiter = it.Next();) {
912 		kprintf("  %6" B_PRId32 ": missing: %6" B_PRIu32
913 			", don't touch: %6" B_PRIu32 "\n", waiter->thread->id,
914 			waiter->missing, waiter->dontTouch);
915 	}
916 
917 	kprintf("\nfree queue: %p, count = %ld\n", &sFreePageQueue,
918 		sFreePageQueue.Count());
919 	kprintf("clear queue: %p, count = %ld\n", &sClearPageQueue,
920 		sClearPageQueue.Count());
921 	kprintf("modified queue: %p, count = %ld (%ld temporary, %lu swappable, "
922 		"inactive: %lu)\n", &sModifiedPageQueue, sModifiedPageQueue.Count(),
923 		sModifiedTemporaryPages, swappableModified, swappableModifiedInactive);
924 	kprintf("active queue: %p, count = %ld\n", &sActivePageQueue,
925 		sActivePageQueue.Count());
926 	kprintf("inactive queue: %p, count = %ld\n", &sInactivePageQueue,
927 		sInactivePageQueue.Count());
928 	kprintf("cached queue: %p, count = %ld\n", &sCachedPageQueue,
929 		sCachedPageQueue.Count());
930 	return 0;
931 }
932 
933 
934 #ifdef TRACK_PAGE_USAGE_STATS
935 
936 static void
937 track_page_usage(vm_page* page)
938 {
939 	if (page->wired_count == 0) {
940 		sNextPageUsage[(int32)page->usage_count + 128]++;
941 		sNextPageUsagePageCount++;
942 	}
943 }
944 
945 
946 static void
947 update_page_usage_stats()
948 {
949 	std::swap(sPageUsage, sNextPageUsage);
950 	sPageUsagePageCount = sNextPageUsagePageCount;
951 
952 	memset(sNextPageUsage, 0, sizeof(page_num_t) * 256);
953 	sNextPageUsagePageCount = 0;
954 
955 	// compute average
956 	if (sPageUsagePageCount > 0) {
957 		int64 sum = 0;
958 		for (int32 i = 0; i < 256; i++)
959 			sum += (int64)sPageUsage[i] * (i - 128);
960 
961 		TRACE_DAEMON("average page usage: %f (%lu pages)\n",
962 			(float)sum / sPageUsagePageCount, sPageUsagePageCount);
963 	}
964 }
965 
966 
967 static int
968 dump_page_usage_stats(int argc, char** argv)
969 {
970 	kprintf("distribution of page usage counts (%lu pages):",
971 		sPageUsagePageCount);
972 
973 	int64 sum = 0;
974 	for (int32 i = 0; i < 256; i++) {
975 		if (i % 8 == 0)
976 			kprintf("\n%4ld:", i - 128);
977 
978 		int64 count = sPageUsage[i];
979 		sum += count * (i - 128);
980 
981 		kprintf("  %9llu", count);
982 	}
983 
984 	kprintf("\n\n");
985 
986 	kprintf("average usage count: %f\n",
987 		sPageUsagePageCount > 0 ? (float)sum / sPageUsagePageCount : 0);
988 
989 	return 0;
990 }
991 
992 #endif	// TRACK_PAGE_USAGE_STATS
993 
994 
995 // #pragma mark - vm_page
996 
997 
998 inline void
999 vm_page::InitState(uint8 newState)
1000 {
1001 	state = newState;
1002 }
1003 
1004 
1005 inline void
1006 vm_page::SetState(uint8 newState)
1007 {
1008 	TPS(SetPageState(this, newState));
1009 
1010 	state = newState;
1011 }
1012 
1013 
1014 // #pragma mark -
1015 
1016 
1017 static void
1018 get_page_stats(page_stats& _pageStats)
1019 {
1020 	_pageStats.totalFreePages = sUnreservedFreePages;
1021 	_pageStats.cachedPages = sCachedPageQueue.Count();
1022 	_pageStats.unsatisfiedReservations = sUnsatisfiedPageReservations;
1023 	// TODO: We don't get an actual snapshot here!
1024 }
1025 
1026 
1027 static bool
1028 do_active_paging(const page_stats& pageStats)
1029 {
1030 	return pageStats.totalFreePages + pageStats.cachedPages
1031 		< pageStats.unsatisfiedReservations
1032 			+ (int32)sFreeOrCachedPagesTarget;
1033 }
1034 
1035 
1036 /*!	Reserves as many pages as possible from \c sUnreservedFreePages up to
1037 	\a count. Doesn't touch the last \a dontTouch pages of
1038 	\c sUnreservedFreePages, though.
1039 	\return The number of actually reserved pages.
1040 */
1041 static uint32
1042 reserve_some_pages(uint32 count, uint32 dontTouch)
1043 {
1044 	while (true) {
1045 		int32 freePages = sUnreservedFreePages;
1046 		if (freePages <= (int32)dontTouch)
1047 			return 0;
1048 
1049 		int32 toReserve = std::min(count, freePages - dontTouch);
1050 		if (atomic_test_and_set(&sUnreservedFreePages,
1051 					freePages - toReserve, freePages)
1052 				== freePages) {
1053 			return toReserve;
1054 		}
1055 
1056 		// the count changed in the meantime -- retry
1057 	}
1058 }
1059 
1060 
1061 static void
1062 wake_up_page_reservation_waiters()
1063 {
1064 	MutexLocker pageDeficitLocker(sPageDeficitLock);
1065 
1066 	// TODO: If this is a low priority thread, we might want to disable
1067 	// interrupts or otherwise ensure that we aren't unscheduled. Otherwise
1068 	// high priority threads wait be kept waiting while a medium priority thread
1069 	// prevents us from running.
1070 
1071 	while (PageReservationWaiter* waiter = sPageReservationWaiters.Head()) {
1072 		int32 reserved = reserve_some_pages(waiter->missing,
1073 			waiter->dontTouch);
1074 		if (reserved == 0)
1075 			return;
1076 
1077 		atomic_add(&sUnsatisfiedPageReservations, -reserved);
1078 		waiter->missing -= reserved;
1079 
1080 		if (waiter->missing > 0)
1081 			return;
1082 
1083 		sPageReservationWaiters.Remove(waiter);
1084 
1085 		InterruptsSpinLocker threadLocker(gThreadSpinlock);
1086 		thread_unblock_locked(waiter->thread, B_OK);
1087 	}
1088 }
1089 
1090 
1091 static inline void
1092 unreserve_pages(uint32 count)
1093 {
1094 	atomic_add(&sUnreservedFreePages, count);
1095 	if (sUnsatisfiedPageReservations != 0)
1096 		wake_up_page_reservation_waiters();
1097 }
1098 
1099 
1100 static void
1101 free_page(vm_page* page, bool clear)
1102 {
1103 	DEBUG_PAGE_ACCESS_CHECK(page);
1104 
1105 	PAGE_ASSERT(page, !page->IsMapped());
1106 
1107 	VMPageQueue* fromQueue;
1108 
1109 	switch (page->State()) {
1110 		case PAGE_STATE_ACTIVE:
1111 			fromQueue = &sActivePageQueue;
1112 			break;
1113 		case PAGE_STATE_INACTIVE:
1114 			fromQueue = &sInactivePageQueue;
1115 			break;
1116 		case PAGE_STATE_MODIFIED:
1117 			fromQueue = &sModifiedPageQueue;
1118 			break;
1119 		case PAGE_STATE_CACHED:
1120 			fromQueue = &sCachedPageQueue;
1121 			break;
1122 		case PAGE_STATE_FREE:
1123 		case PAGE_STATE_CLEAR:
1124 			panic("free_page(): page %p already free", page);
1125 			return;
1126 		case PAGE_STATE_WIRED:
1127 		case PAGE_STATE_UNUSED:
1128 			fromQueue = NULL;
1129 			break;
1130 		default:
1131 			panic("free_page(): page %p in invalid state %d",
1132 				page, page->State());
1133 			return;
1134 	}
1135 
1136 	if (page->CacheRef() != NULL)
1137 		panic("to be freed page %p has cache", page);
1138 	if (page->IsMapped())
1139 		panic("to be freed page %p has mappings", page);
1140 
1141 	if (fromQueue != NULL)
1142 		fromQueue->RemoveUnlocked(page);
1143 
1144 	TA(FreePage());
1145 
1146 	ReadLocker locker(sFreePageQueuesLock);
1147 
1148 	DEBUG_PAGE_ACCESS_END(page);
1149 
1150 	if (clear) {
1151 		page->SetState(PAGE_STATE_CLEAR);
1152 		sClearPageQueue.PrependUnlocked(page);
1153 	} else {
1154 		page->SetState(PAGE_STATE_FREE);
1155 		sFreePageQueue.PrependUnlocked(page);
1156 	}
1157 
1158 	locker.Unlock();
1159 
1160 	unreserve_pages(1);
1161 }
1162 
1163 
1164 /*!	The caller must make sure that no-one else tries to change the page's state
1165 	while the function is called. If the page has a cache, this can be done by
1166 	locking the cache.
1167 */
1168 static void
1169 set_page_state(vm_page *page, int pageState)
1170 {
1171 	DEBUG_PAGE_ACCESS_CHECK(page);
1172 
1173 	if (pageState == page->State())
1174 		return;
1175 
1176 	VMPageQueue* fromQueue;
1177 
1178 	switch (page->State()) {
1179 		case PAGE_STATE_ACTIVE:
1180 			fromQueue = &sActivePageQueue;
1181 			break;
1182 		case PAGE_STATE_INACTIVE:
1183 			fromQueue = &sInactivePageQueue;
1184 			break;
1185 		case PAGE_STATE_MODIFIED:
1186 			fromQueue = &sModifiedPageQueue;
1187 			break;
1188 		case PAGE_STATE_CACHED:
1189 			fromQueue = &sCachedPageQueue;
1190 			break;
1191 		case PAGE_STATE_FREE:
1192 		case PAGE_STATE_CLEAR:
1193 			panic("set_page_state(): page %p is free/clear", page);
1194 			return;
1195 		case PAGE_STATE_WIRED:
1196 		case PAGE_STATE_UNUSED:
1197 			fromQueue = NULL;
1198 			break;
1199 		default:
1200 			panic("set_page_state(): page %p in invalid state %d",
1201 				page, page->State());
1202 			return;
1203 	}
1204 
1205 	VMPageQueue* toQueue;
1206 
1207 	switch (pageState) {
1208 		case PAGE_STATE_ACTIVE:
1209 			toQueue = &sActivePageQueue;
1210 			break;
1211 		case PAGE_STATE_INACTIVE:
1212 			toQueue = &sInactivePageQueue;
1213 			break;
1214 		case PAGE_STATE_MODIFIED:
1215 			toQueue = &sModifiedPageQueue;
1216 			break;
1217 		case PAGE_STATE_CACHED:
1218 			PAGE_ASSERT(page, !page->IsMapped());
1219 			PAGE_ASSERT(page, !page->modified);
1220 			toQueue = &sCachedPageQueue;
1221 			break;
1222 		case PAGE_STATE_FREE:
1223 		case PAGE_STATE_CLEAR:
1224 			panic("set_page_state(): target state is free/clear");
1225 			return;
1226 		case PAGE_STATE_WIRED:
1227 		case PAGE_STATE_UNUSED:
1228 			toQueue = NULL;
1229 			break;
1230 		default:
1231 			panic("set_page_state(): invalid target state %d", pageState);
1232 			return;
1233 	}
1234 
1235 	VMCache* cache = page->Cache();
1236 	if (cache != NULL && cache->temporary) {
1237 		if (pageState == PAGE_STATE_MODIFIED)
1238 			atomic_add(&sModifiedTemporaryPages, 1);
1239 		else if (page->State() == PAGE_STATE_MODIFIED)
1240 			atomic_add(&sModifiedTemporaryPages, -1);
1241 	}
1242 
1243 	// move the page
1244 	if (toQueue == fromQueue) {
1245 		// Note: Theoretically we are required to lock when changing the page
1246 		// state, even if we don't change the queue. We actually don't have to
1247 		// do this, though, since only for the active queue there are different
1248 		// page states and active pages have a cache that must be locked at
1249 		// this point. So we rely on the fact that everyone must lock the cache
1250 		// before trying to change/interpret the page state.
1251 		PAGE_ASSERT(page, cache != NULL);
1252 		cache->AssertLocked();
1253 		page->SetState(pageState);
1254 	} else {
1255 		if (fromQueue != NULL)
1256 			fromQueue->RemoveUnlocked(page);
1257 
1258 		page->SetState(pageState);
1259 
1260 		if (toQueue != NULL)
1261 			toQueue->AppendUnlocked(page);
1262 	}
1263 }
1264 
1265 
1266 /*! Moves a previously modified page into a now appropriate queue.
1267 	The page queues must not be locked.
1268 */
1269 static void
1270 move_page_to_appropriate_queue(vm_page *page)
1271 {
1272 	DEBUG_PAGE_ACCESS_CHECK(page);
1273 
1274 	// Note, this logic must be in sync with what the page daemon does.
1275 	int32 state;
1276 	if (page->IsMapped())
1277 		state = PAGE_STATE_ACTIVE;
1278 	else if (page->modified)
1279 		state = PAGE_STATE_MODIFIED;
1280 	else
1281 		state = PAGE_STATE_CACHED;
1282 
1283 // TODO: If free + cached pages are low, we might directly want to free the
1284 // page.
1285 	set_page_state(page, state);
1286 }
1287 
1288 
1289 static void
1290 clear_page(struct vm_page *page)
1291 {
1292 	vm_memset_physical(page->physical_page_number << PAGE_SHIFT, 0,
1293 		B_PAGE_SIZE);
1294 }
1295 
1296 
1297 static status_t
1298 mark_page_range_in_use(addr_t startPage, size_t length, bool wired)
1299 {
1300 	TRACE(("mark_page_range_in_use: start 0x%lx, len 0x%lx\n",
1301 		startPage, length));
1302 
1303 	if (sPhysicalPageOffset > startPage) {
1304 		dprintf("mark_page_range_in_use(%#" B_PRIxADDR ", %#" B_PRIxSIZE "): "
1305 			"start page is before free list\n", startPage, length);
1306 		if (sPhysicalPageOffset - startPage >= length)
1307 			return B_OK;
1308 		length -= sPhysicalPageOffset - startPage;
1309 		startPage = sPhysicalPageOffset;
1310 	}
1311 
1312 	startPage -= sPhysicalPageOffset;
1313 
1314 	if (startPage + length > sNumPages) {
1315 		dprintf("mark_page_range_in_use(%#" B_PRIxADDR ", %#" B_PRIxSIZE "): "
1316 			"range would extend past free list\n", startPage, length);
1317 		if (startPage >= sNumPages)
1318 			return B_OK;
1319 		length = sNumPages - startPage;
1320 	}
1321 
1322 	WriteLocker locker(sFreePageQueuesLock);
1323 
1324 	for (size_t i = 0; i < length; i++) {
1325 		vm_page *page = &sPages[startPage + i];
1326 		switch (page->State()) {
1327 			case PAGE_STATE_FREE:
1328 			case PAGE_STATE_CLEAR:
1329 			{
1330 // TODO: This violates the page reservation policy, since we remove pages from
1331 // the free/clear queues without having reserved them before. This should happen
1332 // in the early boot process only, though.
1333 				DEBUG_PAGE_ACCESS_START(page);
1334 				VMPageQueue& queue = page->State() == PAGE_STATE_FREE
1335 					? sFreePageQueue : sClearPageQueue;
1336 				queue.Remove(page);
1337 				page->SetState(wired ? PAGE_STATE_UNUSED : PAGE_STATE_UNUSED);
1338 				page->busy = false;
1339 				atomic_add(&sUnreservedFreePages, -1);
1340 				DEBUG_PAGE_ACCESS_END(page);
1341 				break;
1342 			}
1343 			case PAGE_STATE_WIRED:
1344 			case PAGE_STATE_UNUSED:
1345 				break;
1346 			case PAGE_STATE_ACTIVE:
1347 			case PAGE_STATE_INACTIVE:
1348 			case PAGE_STATE_MODIFIED:
1349 			case PAGE_STATE_CACHED:
1350 			default:
1351 				// uh
1352 				dprintf("mark_page_range_in_use: page 0x%lx in non-free state %d!\n",
1353 					startPage + i, page->State());
1354 				break;
1355 		}
1356 	}
1357 
1358 	return B_OK;
1359 }
1360 
1361 
1362 /*!
1363 	This is a background thread that wakes up every now and then (every 100ms)
1364 	and moves some pages from the free queue over to the clear queue.
1365 	Given enough time, it will clear out all pages from the free queue - we
1366 	could probably slow it down after having reached a certain threshold.
1367 */
1368 static int32
1369 page_scrubber(void *unused)
1370 {
1371 	(void)(unused);
1372 
1373 	TRACE(("page_scrubber starting...\n"));
1374 
1375 	for (;;) {
1376 		snooze(100000); // 100ms
1377 
1378 		if (sFreePageQueue.Count() == 0
1379 				|| sUnreservedFreePages < (int32)sFreePagesTarget) {
1380 			continue;
1381 		}
1382 
1383 		// Since we temporarily remove pages from the free pages reserve,
1384 		// we must make sure we don't cause a violation of the page
1385 		// reservation warranty. The following is usually stricter than
1386 		// necessary, because we don't have information on how many of the
1387 		// reserved pages have already been allocated.
1388 		int32 reserved = reserve_some_pages(SCRUB_SIZE,
1389 			kPageReserveForPriority[VM_PRIORITY_USER]);
1390 		if (reserved == 0)
1391 			continue;
1392 
1393 		// get some pages from the free queue
1394 		ReadLocker locker(sFreePageQueuesLock);
1395 
1396 		vm_page *page[SCRUB_SIZE];
1397 		int32 scrubCount = 0;
1398 		for (int32 i = 0; i < reserved; i++) {
1399 			page[i] = sFreePageQueue.RemoveHeadUnlocked();
1400 			if (page[i] == NULL)
1401 				break;
1402 
1403 			DEBUG_PAGE_ACCESS_START(page[i]);
1404 
1405 			page[i]->SetState(PAGE_STATE_ACTIVE);
1406 			page[i]->busy = true;
1407 			scrubCount++;
1408 		}
1409 
1410 		locker.Unlock();
1411 
1412 		if (scrubCount == 0) {
1413 			unreserve_pages(reserved);
1414 			continue;
1415 		}
1416 
1417 		TA(ScrubbingPages(scrubCount));
1418 
1419 		// clear them
1420 		for (int32 i = 0; i < scrubCount; i++)
1421 			clear_page(page[i]);
1422 
1423 		locker.Lock();
1424 
1425 		// and put them into the clear queue
1426 		for (int32 i = 0; i < scrubCount; i++) {
1427 			page[i]->SetState(PAGE_STATE_CLEAR);
1428 			page[i]->busy = false;
1429 			DEBUG_PAGE_ACCESS_END(page[i]);
1430 			sClearPageQueue.PrependUnlocked(page[i]);
1431 		}
1432 
1433 		locker.Unlock();
1434 
1435 		unreserve_pages(reserved);
1436 
1437 		TA(ScrubbedPages(scrubCount));
1438 	}
1439 
1440 	return 0;
1441 }
1442 
1443 
1444 static void
1445 init_page_marker(vm_page &marker)
1446 {
1447 	marker.SetCacheRef(NULL);
1448 	marker.InitState(PAGE_STATE_UNUSED);
1449 	marker.busy = true;
1450 #if DEBUG_PAGE_QUEUE
1451 	marker.queue = NULL;
1452 #endif
1453 #if DEBUG_PAGE_ACCESS
1454 	marker.accessing_thread = thread_get_current_thread_id();
1455 #endif
1456 }
1457 
1458 
1459 static void
1460 remove_page_marker(struct vm_page &marker)
1461 {
1462 	DEBUG_PAGE_ACCESS_CHECK(&marker);
1463 
1464 	if (marker.State() < PAGE_STATE_FIRST_UNQUEUED)
1465 		sPageQueues[marker.State()].RemoveUnlocked(&marker);
1466 
1467 	marker.SetState(PAGE_STATE_UNUSED);
1468 }
1469 
1470 
1471 static vm_page *
1472 next_modified_page(struct vm_page &marker)
1473 {
1474 	InterruptsSpinLocker locker(sModifiedPageQueue.GetLock());
1475 	vm_page *page;
1476 
1477 	DEBUG_PAGE_ACCESS_CHECK(&marker);
1478 
1479 	if (marker.State() == PAGE_STATE_MODIFIED) {
1480 		page = sModifiedPageQueue.Next(&marker);
1481 		sModifiedPageQueue.Remove(&marker);
1482 		marker.SetState(PAGE_STATE_UNUSED);
1483 	} else
1484 		page = sModifiedPageQueue.Head();
1485 
1486 	for (; page != NULL; page = sModifiedPageQueue.Next(page)) {
1487 		if (!page->busy) {
1488 			// insert marker
1489 			marker.SetState(PAGE_STATE_MODIFIED);
1490 			sModifiedPageQueue.InsertAfter(page, &marker);
1491 			return page;
1492 		}
1493 	}
1494 
1495 	return NULL;
1496 }
1497 
1498 
1499 // #pragma mark -
1500 
1501 
1502 class PageWriteTransfer;
1503 class PageWriteWrapper;
1504 
1505 
1506 class PageWriterRun {
1507 public:
1508 	status_t Init(uint32 maxPages);
1509 
1510 	void PrepareNextRun();
1511 	void AddPage(vm_page* page);
1512 	void Go();
1513 
1514 	void PageWritten(PageWriteTransfer* transfer, status_t status,
1515 		bool partialTransfer, size_t bytesTransferred);
1516 
1517 private:
1518 	uint32				fMaxPages;
1519 	uint32				fWrapperCount;
1520 	uint32				fTransferCount;
1521 	vint32				fPendingTransfers;
1522 	PageWriteWrapper*	fWrappers;
1523 	PageWriteTransfer*	fTransfers;
1524 	ConditionVariable	fAllFinishedCondition;
1525 };
1526 
1527 
1528 class PageWriteTransfer : public AsyncIOCallback {
1529 public:
1530 	void SetTo(PageWriterRun* run, vm_page* page, int32 maxPages);
1531 	bool AddPage(vm_page* page);
1532 
1533 	status_t Schedule(uint32 flags);
1534 
1535 	void SetStatus(status_t status, size_t transferred);
1536 
1537 	status_t Status() const	{ return fStatus; }
1538 	struct VMCache* Cache() const { return fCache; }
1539 	uint32 PageCount() const { return fPageCount; }
1540 
1541 	virtual void IOFinished(status_t status, bool partialTransfer,
1542 		size_t bytesTransferred);
1543 private:
1544 	PageWriterRun*		fRun;
1545 	struct VMCache*		fCache;
1546 	off_t				fOffset;
1547 	uint32				fPageCount;
1548 	int32				fMaxPages;
1549 	status_t			fStatus;
1550 	uint32				fVecCount;
1551 	iovec				fVecs[32]; // TODO: make dynamic/configurable
1552 };
1553 
1554 
1555 class PageWriteWrapper {
1556 public:
1557 	PageWriteWrapper();
1558 	~PageWriteWrapper();
1559 	void SetTo(vm_page* page);
1560 	void Done(status_t result);
1561 
1562 private:
1563 	vm_page*			fPage;
1564 	struct VMCache*		fCache;
1565 	bool				fIsActive;
1566 };
1567 
1568 
1569 PageWriteWrapper::PageWriteWrapper()
1570 	:
1571 	fIsActive(false)
1572 {
1573 }
1574 
1575 
1576 PageWriteWrapper::~PageWriteWrapper()
1577 {
1578 	if (fIsActive)
1579 		panic("page write wrapper going out of scope but isn't completed");
1580 }
1581 
1582 
1583 /*!	The page's cache must be locked.
1584 */
1585 void
1586 PageWriteWrapper::SetTo(vm_page* page)
1587 {
1588 	DEBUG_PAGE_ACCESS_CHECK(page);
1589 
1590 	if (page->busy)
1591 		panic("setting page write wrapper to busy page");
1592 
1593 	if (fIsActive)
1594 		panic("re-setting page write wrapper that isn't completed");
1595 
1596 	fPage = page;
1597 	fCache = page->Cache();
1598 	fIsActive = true;
1599 
1600 	fPage->busy = true;
1601 	fPage->busy_writing = true;
1602 
1603 	// We have a modified page -- however, while we're writing it back,
1604 	// the page might still be mapped. In order not to lose any changes to the
1605 	// page, we mark it clean before actually writing it back; if
1606 	// writing the page fails for some reason, we'll just keep it in the
1607 	// modified page list, but that should happen only rarely.
1608 
1609 	// If the page is changed after we cleared the dirty flag, but before we
1610 	// had the chance to write it back, then we'll write it again later -- that
1611 	// will probably not happen that often, though.
1612 
1613 	vm_clear_map_flags(fPage, PAGE_MODIFIED);
1614 }
1615 
1616 
1617 /*!	The page's cache must be locked.
1618 	The page queues must not be locked.
1619 */
1620 void
1621 PageWriteWrapper::Done(status_t result)
1622 {
1623 	if (!fIsActive)
1624 		panic("completing page write wrapper that is not active");
1625 
1626 	DEBUG_PAGE_ACCESS_START(fPage);
1627 
1628 	fPage->busy = false;
1629 		// Set unbusy and notify later by hand, since we might free the page.
1630 
1631 	if (result == B_OK) {
1632 		// put it into the active/inactive queue
1633 		move_page_to_appropriate_queue(fPage);
1634 		fPage->busy_writing = false;
1635 		DEBUG_PAGE_ACCESS_END(fPage);
1636 	} else {
1637 		// Writing the page failed. One reason would be that the cache has been
1638 		// shrunk and the page does no longer belong to the file. Otherwise the
1639 		// actual I/O failed, in which case we'll simply keep the page modified.
1640 
1641 		if (!fPage->busy_writing) {
1642 			// The busy_writing flag was cleared. That means the cache has been
1643 			// shrunk while we were trying to write the page and we have to free
1644 			// it now.
1645 			vm_remove_all_page_mappings(fPage);
1646 // TODO: Unmapping should already happen when resizing the cache!
1647 			fCache->RemovePage(fPage);
1648 			free_page(fPage, false);
1649 		} else {
1650 			// Writing the page failed -- mark the page modified and move it to
1651 			// an appropriate queue other than the modified queue, so we don't
1652 			// keep trying to write it over and over again. We keep
1653 			// non-temporary pages in the modified queue, though, so they don't
1654 			// get lost in the inactive queue.
1655 			dprintf("PageWriteWrapper: Failed to write page %p: %s\n", fPage,
1656 				strerror(result));
1657 
1658 			fPage->modified = true;
1659 			if (!fCache->temporary)
1660 				set_page_state(fPage, PAGE_STATE_MODIFIED);
1661 			else if (fPage->IsMapped())
1662 				set_page_state(fPage, PAGE_STATE_ACTIVE);
1663 			else
1664 				set_page_state(fPage, PAGE_STATE_INACTIVE);
1665 
1666 			fPage->busy_writing = false;
1667 			DEBUG_PAGE_ACCESS_END(fPage);
1668 		}
1669 	}
1670 
1671 	fCache->NotifyPageEvents(fPage, PAGE_EVENT_NOT_BUSY);
1672 	fIsActive = false;
1673 }
1674 
1675 
1676 /*!	The page's cache must be locked.
1677 */
1678 void
1679 PageWriteTransfer::SetTo(PageWriterRun* run, vm_page* page, int32 maxPages)
1680 {
1681 	fRun = run;
1682 	fCache = page->Cache();
1683 	fOffset = page->cache_offset;
1684 	fPageCount = 1;
1685 	fMaxPages = maxPages;
1686 	fStatus = B_OK;
1687 
1688 	fVecs[0].iov_base = (void*)(page->physical_page_number << PAGE_SHIFT);
1689 	fVecs[0].iov_len = B_PAGE_SIZE;
1690 	fVecCount = 1;
1691 }
1692 
1693 
1694 /*!	The page's cache must be locked.
1695 */
1696 bool
1697 PageWriteTransfer::AddPage(vm_page* page)
1698 {
1699 	if (page->Cache() != fCache
1700 		|| (fMaxPages >= 0 && fPageCount >= (uint32)fMaxPages))
1701 		return false;
1702 
1703 	addr_t nextBase
1704 		= (addr_t)fVecs[fVecCount - 1].iov_base + fVecs[fVecCount - 1].iov_len;
1705 
1706 	if (page->physical_page_number << PAGE_SHIFT == nextBase
1707 		&& page->cache_offset == fOffset + fPageCount) {
1708 		// append to last iovec
1709 		fVecs[fVecCount - 1].iov_len += B_PAGE_SIZE;
1710 		fPageCount++;
1711 		return true;
1712 	}
1713 
1714 	nextBase = (addr_t)fVecs[0].iov_base - B_PAGE_SIZE;
1715 	if (page->physical_page_number << PAGE_SHIFT == nextBase
1716 		&& page->cache_offset == fOffset - 1) {
1717 		// prepend to first iovec and adjust offset
1718 		fVecs[0].iov_base = (void*)nextBase;
1719 		fVecs[0].iov_len += B_PAGE_SIZE;
1720 		fOffset = page->cache_offset;
1721 		fPageCount++;
1722 		return true;
1723 	}
1724 
1725 	if ((page->cache_offset == fOffset + fPageCount
1726 			|| page->cache_offset == fOffset - 1)
1727 		&& fVecCount < sizeof(fVecs) / sizeof(fVecs[0])) {
1728 		// not physically contiguous or not in the right order
1729 		uint32 vectorIndex;
1730 		if (page->cache_offset < fOffset) {
1731 			// we are pre-pending another vector, move the other vecs
1732 			for (uint32 i = fVecCount; i > 0; i--)
1733 				fVecs[i] = fVecs[i - 1];
1734 
1735 			fOffset = page->cache_offset;
1736 			vectorIndex = 0;
1737 		} else
1738 			vectorIndex = fVecCount;
1739 
1740 		fVecs[vectorIndex].iov_base
1741 			= (void*)(page->physical_page_number << PAGE_SHIFT);
1742 		fVecs[vectorIndex].iov_len = B_PAGE_SIZE;
1743 
1744 		fVecCount++;
1745 		fPageCount++;
1746 		return true;
1747 	}
1748 
1749 	return false;
1750 }
1751 
1752 
1753 status_t
1754 PageWriteTransfer::Schedule(uint32 flags)
1755 {
1756 	off_t writeOffset = (off_t)fOffset << PAGE_SHIFT;
1757 	size_t writeLength = fPageCount << PAGE_SHIFT;
1758 
1759 	if (fRun != NULL) {
1760 		return fCache->WriteAsync(writeOffset, fVecs, fVecCount, writeLength,
1761 			flags | B_PHYSICAL_IO_REQUEST, this);
1762 	}
1763 
1764 	status_t status = fCache->Write(writeOffset, fVecs, fVecCount,
1765 		flags | B_PHYSICAL_IO_REQUEST, &writeLength);
1766 
1767 	SetStatus(status, writeLength);
1768 	return fStatus;
1769 }
1770 
1771 
1772 void
1773 PageWriteTransfer::SetStatus(status_t status, size_t transferred)
1774 {
1775 	// only succeed if all pages up to the last one have been written fully
1776 	// and the last page has at least been written partially
1777 	if (status == B_OK && transferred <= (fPageCount - 1) * B_PAGE_SIZE)
1778 		status = B_ERROR;
1779 
1780 	fStatus = status;
1781 }
1782 
1783 
1784 void
1785 PageWriteTransfer::IOFinished(status_t status, bool partialTransfer,
1786 	size_t bytesTransferred)
1787 {
1788 	SetStatus(status, bytesTransferred);
1789 	fRun->PageWritten(this, fStatus, partialTransfer, bytesTransferred);
1790 }
1791 
1792 
1793 status_t
1794 PageWriterRun::Init(uint32 maxPages)
1795 {
1796 	fMaxPages = maxPages;
1797 	fWrapperCount = 0;
1798 	fTransferCount = 0;
1799 	fPendingTransfers = 0;
1800 
1801 	fWrappers = new(std::nothrow) PageWriteWrapper[maxPages];
1802 	fTransfers = new(std::nothrow) PageWriteTransfer[maxPages];
1803 	if (fWrappers == NULL || fTransfers == NULL)
1804 		return B_NO_MEMORY;
1805 
1806 	return B_OK;
1807 }
1808 
1809 
1810 void
1811 PageWriterRun::PrepareNextRun()
1812 {
1813 	fWrapperCount = 0;
1814 	fTransferCount = 0;
1815 	fPendingTransfers = 0;
1816 }
1817 
1818 
1819 /*!	The page's cache must be locked.
1820 */
1821 void
1822 PageWriterRun::AddPage(vm_page* page)
1823 {
1824 	fWrappers[fWrapperCount++].SetTo(page);
1825 
1826 	if (fTransferCount == 0 || !fTransfers[fTransferCount - 1].AddPage(page)) {
1827 		fTransfers[fTransferCount++].SetTo(this, page,
1828 			page->Cache()->MaxPagesPerAsyncWrite());
1829 	}
1830 }
1831 
1832 
1833 void
1834 PageWriterRun::Go()
1835 {
1836 	fPendingTransfers = fTransferCount;
1837 
1838 	fAllFinishedCondition.Init(this, "page writer wait for I/O");
1839 	ConditionVariableEntry waitEntry;
1840 	fAllFinishedCondition.Add(&waitEntry);
1841 
1842 	// schedule writes
1843 	for (uint32 i = 0; i < fTransferCount; i++)
1844 		fTransfers[i].Schedule(B_VIP_IO_REQUEST);
1845 
1846 	// wait until all pages have been written
1847 	waitEntry.Wait();
1848 
1849 	// mark pages depending on whether they could be written or not
1850 
1851 	uint32 wrapperIndex = 0;
1852 	for (uint32 i = 0; i < fTransferCount; i++) {
1853 		PageWriteTransfer& transfer = fTransfers[i];
1854 		transfer.Cache()->Lock();
1855 
1856 		for (uint32 j = 0; j < transfer.PageCount(); j++)
1857 			fWrappers[wrapperIndex++].Done(transfer.Status());
1858 
1859 		transfer.Cache()->Unlock();
1860 	}
1861 
1862 	ASSERT(wrapperIndex == fWrapperCount);
1863 
1864 	for (uint32 i = 0; i < fTransferCount; i++) {
1865 		PageWriteTransfer& transfer = fTransfers[i];
1866 		struct VMCache* cache = transfer.Cache();
1867 
1868 		// We've acquired a references for each page
1869 		for (uint32 j = 0; j < transfer.PageCount(); j++) {
1870 			// We release the cache references after all pages were made
1871 			// unbusy again - otherwise releasing a vnode could deadlock.
1872 			cache->ReleaseStoreRef();
1873 			cache->ReleaseRef();
1874 		}
1875 	}
1876 }
1877 
1878 
1879 void
1880 PageWriterRun::PageWritten(PageWriteTransfer* transfer, status_t status,
1881 	bool partialTransfer, size_t bytesTransferred)
1882 {
1883 	if (atomic_add(&fPendingTransfers, -1) == 1)
1884 		fAllFinishedCondition.NotifyAll();
1885 }
1886 
1887 
1888 /*!	The page writer continuously takes some pages from the modified
1889 	queue, writes them back, and moves them back to the active queue.
1890 	It runs in its own thread, and is only there to keep the number
1891 	of modified pages low, so that more pages can be reused with
1892 	fewer costs.
1893 */
1894 status_t
1895 page_writer(void* /*unused*/)
1896 {
1897 	const uint32 kNumPages = 256;
1898 	uint32 writtenPages = 0;
1899 	bigtime_t lastWrittenTime = 0;
1900 	bigtime_t pageCollectionTime = 0;
1901 	bigtime_t pageWritingTime = 0;
1902 
1903 	PageWriterRun run;
1904 	if (run.Init(kNumPages) != B_OK) {
1905 		panic("page writer: Failed to init PageWriterRun!");
1906 		return B_ERROR;
1907 	}
1908 
1909 	vm_page marker;
1910 	init_page_marker(marker);
1911 
1912 	while (true) {
1913 // TODO: Maybe wait shorter when memory is low!
1914 		if (sModifiedPageQueue.Count() < kNumPages) {
1915 			sPageWriterCondition.Wait(3000000, true);
1916 				// all 3 seconds when no one triggers us
1917 		}
1918 
1919 		int32 modifiedPages = sModifiedPageQueue.Count();
1920 		if (modifiedPages == 0)
1921 			continue;
1922 
1923 #if ENABLE_SWAP_SUPPORT
1924 		page_stats pageStats;
1925 		get_page_stats(pageStats);
1926 		bool activePaging = do_active_paging(pageStats);
1927 #endif
1928 
1929 		// depending on how urgent it becomes to get pages to disk, we adjust
1930 		// our I/O priority
1931 		uint32 lowPagesState = low_resource_state(B_KERNEL_RESOURCE_PAGES);
1932 		int32 ioPriority = B_IDLE_PRIORITY;
1933 		if (lowPagesState >= B_LOW_RESOURCE_CRITICAL
1934 			|| modifiedPages > MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD) {
1935 			ioPriority = MAX_PAGE_WRITER_IO_PRIORITY;
1936 		} else {
1937 			ioPriority = (uint64)MAX_PAGE_WRITER_IO_PRIORITY * modifiedPages
1938 				/ MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD;
1939 		}
1940 
1941 		thread_set_io_priority(ioPriority);
1942 
1943 		uint32 numPages = 0;
1944 		run.PrepareNextRun();
1945 
1946 		// TODO: make this laptop friendly, too (ie. only start doing
1947 		// something if someone else did something or there is really
1948 		// enough to do).
1949 
1950 		// collect pages to be written
1951 		pageCollectionTime -= system_time();
1952 
1953 		while (numPages < kNumPages) {
1954 			vm_page *page = next_modified_page(marker);
1955 			if (page == NULL)
1956 				break;
1957 
1958 			PageCacheLocker cacheLocker(page, false);
1959 			if (!cacheLocker.IsLocked())
1960 				continue;
1961 
1962 			VMCache *cache = page->Cache();
1963 
1964 			// If the page is busy or its state has changed while we were
1965 			// locking the cache, just ignore it.
1966 			if (page->busy || page->State() != PAGE_STATE_MODIFIED)
1967 				continue;
1968 
1969 			DEBUG_PAGE_ACCESS_START(page);
1970 
1971 			// Don't write back wired (locked) pages.
1972 			if (page->wired_count > 0) {
1973 				set_page_state(page, PAGE_STATE_ACTIVE);
1974 				DEBUG_PAGE_ACCESS_END(page);
1975 				continue;
1976 			}
1977 
1978 			// Write back temporary pages only when we're actively paging.
1979 			if (cache->temporary
1980 #if ENABLE_SWAP_SUPPORT
1981 				&& (!activePaging
1982 					|| !cache->CanWritePage(
1983 							(off_t)page->cache_offset << PAGE_SHIFT))
1984 #endif
1985 				) {
1986 				// We can't/don't want to do anything with this page, so move it
1987 				// to one of the other queues.
1988 				if (page->mappings.IsEmpty())
1989 					set_page_state(page, PAGE_STATE_INACTIVE);
1990 				else
1991 					set_page_state(page, PAGE_STATE_ACTIVE);
1992 
1993 				DEBUG_PAGE_ACCESS_END(page);
1994 				continue;
1995 			}
1996 
1997 			// We need our own reference to the store, as it might currently be
1998 			// destroyed.
1999 			if (cache->AcquireUnreferencedStoreRef() != B_OK) {
2000 				DEBUG_PAGE_ACCESS_END(page);
2001 				cacheLocker.Unlock();
2002 				thread_yield(true);
2003 				continue;
2004 			}
2005 
2006 			run.AddPage(page);
2007 
2008 			DEBUG_PAGE_ACCESS_END(page);
2009 
2010 			//dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count);
2011 			TPW(WritePage(page));
2012 
2013 			cache->AcquireRefLocked();
2014 			numPages++;
2015 		}
2016 
2017 		pageCollectionTime += system_time();
2018 
2019 		if (numPages == 0)
2020 			continue;
2021 
2022 		// write pages to disk and do all the cleanup
2023 		pageWritingTime -= system_time();
2024 		run.Go();
2025 		pageWritingTime += system_time();
2026 
2027 		// debug output only...
2028 		writtenPages += numPages;
2029 		if (writtenPages >= 1024) {
2030 			bigtime_t now = system_time();
2031 			TRACE(("page writer: wrote 1024 pages (total: %llu ms, "
2032 				"collect: %llu ms, write: %llu ms)\n",
2033 				(now - lastWrittenTime) / 1000,
2034 				pageCollectionTime / 1000, pageWritingTime / 1000));
2035 			writtenPages -= 1024;
2036 			lastWrittenTime = now;
2037 			pageCollectionTime = 0;
2038 			pageWritingTime = 0;
2039 		}
2040 	}
2041 
2042 	remove_page_marker(marker);
2043 	return B_OK;
2044 }
2045 
2046 
2047 // #pragma mark -
2048 
2049 
2050 // TODO: This should be done in the page daemon!
2051 #if 0
2052 #if ENABLE_SWAP_SUPPORT
2053 static bool
2054 free_page_swap_space(int32 index)
2055 {
2056 	vm_page *page = vm_page_at_index(index);
2057 	PageCacheLocker locker(page);
2058 	if (!locker.IsLocked())
2059 		return false;
2060 
2061 	DEBUG_PAGE_ACCESS_START(page);
2062 
2063 	VMCache* cache = page->Cache();
2064 	if (cache->temporary && page->wired_count == 0
2065 			&& cache->HasPage(page->cache_offset << PAGE_SHIFT)
2066 			&& page->usage_count > 0) {
2067 		// TODO: how to judge a page is highly active?
2068 		if (swap_free_page_swap_space(page)) {
2069 			// We need to mark the page modified, since otherwise it could be
2070 			// stolen and we'd lose its data.
2071 			vm_page_set_state(page, PAGE_STATE_MODIFIED);
2072 			TD(FreedPageSwap(page));
2073 			DEBUG_PAGE_ACCESS_END(page);
2074 			return true;
2075 		}
2076 	}
2077 	DEBUG_PAGE_ACCESS_END(page);
2078 	return false;
2079 }
2080 #endif
2081 #endif	// 0
2082 
2083 
2084 static vm_page *
2085 find_cached_page_candidate(struct vm_page &marker)
2086 {
2087 	DEBUG_PAGE_ACCESS_CHECK(&marker);
2088 
2089 	InterruptsSpinLocker locker(sCachedPageQueue.GetLock());
2090 	vm_page *page;
2091 
2092 	if (marker.State() == PAGE_STATE_UNUSED) {
2093 		// Get the first free pages of the (in)active queue
2094 		page = sCachedPageQueue.Head();
2095 	} else {
2096 		// Get the next page of the current queue
2097 		if (marker.State() != PAGE_STATE_CACHED) {
2098 			panic("invalid marker %p state", &marker);
2099 			return NULL;
2100 		}
2101 
2102 		page = sCachedPageQueue.Next(&marker);
2103 		sCachedPageQueue.Remove(&marker);
2104 		marker.SetState(PAGE_STATE_UNUSED);
2105 	}
2106 
2107 	while (page != NULL) {
2108 		if (!page->busy) {
2109 			// we found a candidate, insert marker
2110 			marker.SetState(PAGE_STATE_CACHED);
2111 			sCachedPageQueue.InsertAfter(page, &marker);
2112 			return page;
2113 		}
2114 
2115 		page = sCachedPageQueue.Next(page);
2116 	}
2117 
2118 	return NULL;
2119 }
2120 
2121 
2122 static bool
2123 free_cached_page(vm_page *page, bool dontWait)
2124 {
2125 	// try to lock the page's cache
2126 	if (vm_cache_acquire_locked_page_cache(page, dontWait) == NULL)
2127 		return false;
2128 	VMCache* cache = page->Cache();
2129 
2130 	AutoLocker<VMCache> cacheLocker(cache, true);
2131 	MethodDeleter<VMCache> _2(cache, &VMCache::ReleaseRefLocked);
2132 
2133 	// check again if that page is still a candidate
2134 	if (page->busy || page->State() != PAGE_STATE_CACHED)
2135 		return false;
2136 
2137 	DEBUG_PAGE_ACCESS_START(page);
2138 
2139 	PAGE_ASSERT(page, !page->IsMapped());
2140 	PAGE_ASSERT(page, !page->modified);
2141 
2142 	// we can now steal this page
2143 
2144 	cache->RemovePage(page);
2145 		// Now the page doesn't have cache anymore, so no one else (e.g.
2146 		// vm_page_allocate_page_run() can pick it up), since they would be
2147 		// required to lock the cache first, which would fail.
2148 
2149 	sCachedPageQueue.RemoveUnlocked(page);
2150 	return true;
2151 }
2152 
2153 
2154 static uint32
2155 free_cached_pages(uint32 pagesToFree, bool dontWait)
2156 {
2157 	vm_page marker;
2158 	init_page_marker(marker);
2159 
2160 	uint32 pagesFreed = 0;
2161 
2162 	while (pagesFreed < pagesToFree) {
2163 		vm_page *page = find_cached_page_candidate(marker);
2164 		if (page == NULL)
2165 			break;
2166 
2167 		if (free_cached_page(page, dontWait)) {
2168 			ReadLocker locker(sFreePageQueuesLock);
2169 			page->SetState(PAGE_STATE_FREE);
2170 			DEBUG_PAGE_ACCESS_END(page);
2171 			sFreePageQueue.PrependUnlocked(page);
2172 			locker.Unlock();
2173 
2174 			TA(StolenPage());
2175 
2176 			pagesFreed++;
2177 		}
2178 	}
2179 
2180 	remove_page_marker(marker);
2181 
2182 	return pagesFreed;
2183 }
2184 
2185 
2186 static void
2187 idle_scan_active_pages(page_stats& pageStats)
2188 {
2189 	VMPageQueue& queue = sActivePageQueue;
2190 
2191 	// We want to scan the whole queue in roughly kIdleRunsForFullQueue runs.
2192 	uint32 maxToScan = queue.Count() / kIdleRunsForFullQueue + 1;
2193 
2194 	while (maxToScan > 0) {
2195 		maxToScan--;
2196 
2197 		// Get the next page. Note that we don't bother to lock here. We go with
2198 		// the assumption that on all architectures reading/writing pointers is
2199 		// atomic. Beyond that it doesn't really matter. We have to unlock the
2200 		// queue anyway to lock the page's cache, and we'll recheck afterwards.
2201 		vm_page* page = queue.Head();
2202 		if (page == NULL)
2203 			break;
2204 
2205 		// lock the page's cache
2206 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2207 		if (cache == NULL)
2208 			continue;
2209 
2210 		if (cache == NULL || page->State() != PAGE_STATE_ACTIVE) {
2211 			// page is no longer in the cache or in this queue
2212 			cache->ReleaseRefAndUnlock();
2213 			continue;
2214 		}
2215 
2216 		if (page->busy) {
2217 			// page is busy -- requeue at the end
2218 			vm_page_requeue(page, true);
2219 			cache->ReleaseRefAndUnlock();
2220 			continue;
2221 		}
2222 
2223 		DEBUG_PAGE_ACCESS_START(page);
2224 
2225 		// Get the page active/modified flags and update the page's usage count.
2226 		// We completely unmap inactive temporary pages. This saves us to
2227 		// iterate through the inactive list as well, since we'll be notified
2228 		// via page fault whenever such an inactive page is used again.
2229 		// We don't remove the mappings of non-temporary pages, since we
2230 		// wouldn't notice when those would become unused and could thus be
2231 		// moved to the cached list.
2232 		int32 usageCount;
2233 		if (page->wired_count > 0 || page->usage_count > 0 || !cache->temporary)
2234 			usageCount = vm_clear_page_mapping_accessed_flags(page);
2235 		else
2236 			usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2237 
2238 		if (usageCount > 0) {
2239 			usageCount += page->usage_count + kPageUsageAdvance;
2240 			if (usageCount > kPageUsageMax)
2241 				usageCount = kPageUsageMax;
2242 // TODO: This would probably also be the place to reclaim swap space.
2243 		} else {
2244 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2245 			if (usageCount < 0) {
2246 				usageCount = 0;
2247 				set_page_state(page, PAGE_STATE_INACTIVE);
2248 			}
2249 		}
2250 
2251 		page->usage_count = usageCount;
2252 
2253 		DEBUG_PAGE_ACCESS_END(page);
2254 
2255 		cache->ReleaseRefAndUnlock();
2256 	}
2257 }
2258 
2259 
2260 static void
2261 full_scan_inactive_pages(page_stats& pageStats, int32 despairLevel)
2262 {
2263 	int32 pagesToFree = pageStats.unsatisfiedReservations
2264 		+ sFreeOrCachedPagesTarget
2265 		- (pageStats.totalFreePages + pageStats.cachedPages);
2266 	if (pagesToFree <= 0)
2267 		return;
2268 
2269 	bigtime_t time = system_time();
2270 	uint32 pagesScanned = 0;
2271 	uint32 pagesToCached = 0;
2272 	uint32 pagesToModified = 0;
2273 	uint32 pagesToActive = 0;
2274 
2275 	// Determine how many pages at maximum to send to the modified queue. Since
2276 	// it is relatively expensive to page out pages, we do that on a grander
2277 	// scale only when things get desperate.
2278 	uint32 maxToFlush = despairLevel <= 1 ? 32 : 10000;
2279 
2280 	vm_page marker;
2281 	init_page_marker(marker);
2282 
2283 	VMPageQueue& queue = sInactivePageQueue;
2284 	InterruptsSpinLocker queueLocker(queue.GetLock());
2285 	uint32 maxToScan = queue.Count();
2286 
2287 	vm_page* nextPage = queue.Head();
2288 
2289 	while (pagesToFree > 0 && maxToScan > 0) {
2290 		maxToScan--;
2291 
2292 		// get the next page
2293 		vm_page* page = nextPage;
2294 		if (page == NULL)
2295 			break;
2296 		nextPage = queue.Next(page);
2297 
2298 		if (page->busy)
2299 			continue;
2300 
2301 		// mark the position
2302 		queue.InsertAfter(page, &marker);
2303 		queueLocker.Unlock();
2304 
2305 		// lock the page's cache
2306 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2307 		if (cache == NULL || page->busy
2308 				|| page->State() != PAGE_STATE_INACTIVE) {
2309 			if (cache != NULL)
2310 				cache->ReleaseRefAndUnlock();
2311 			queueLocker.Lock();
2312 			nextPage = queue.Next(&marker);
2313 			queue.Remove(&marker);
2314 			continue;
2315 		}
2316 
2317 		pagesScanned++;
2318 
2319 		DEBUG_PAGE_ACCESS_START(page);
2320 
2321 		// Get the accessed count, clear the accessed/modified flags and
2322 		// unmap the page, if it hasn't been accessed.
2323 		int32 usageCount;
2324 		if (page->wired_count > 0)
2325 			usageCount = vm_clear_page_mapping_accessed_flags(page);
2326 		else
2327 			usageCount = vm_remove_all_page_mappings_if_unaccessed(page);
2328 
2329 		// update usage count
2330 		if (usageCount > 0) {
2331 			usageCount += page->usage_count + kPageUsageAdvance;
2332 			if (usageCount > kPageUsageMax)
2333 				usageCount = kPageUsageMax;
2334 		} else {
2335 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2336 			if (usageCount < 0)
2337 				usageCount = 0;
2338 		}
2339 
2340 		page->usage_count = usageCount;
2341 
2342 		// Move to fitting queue or requeue:
2343 		// * Active mapped pages go to the active queue.
2344 		// * Inactive mapped (i.e. wired) pages are requeued.
2345 		// * The remaining pages are cachable. Thus, if unmodified they go to
2346 		//   the cached queue, otherwise to the modified queue (up to a limit).
2347 		//   Note that until in the idle scanning we don't exempt pages of
2348 		//   temporary caches. Apparently we really need memory, so we better
2349 		//   page out memory as well.
2350 		bool isMapped = page->IsMapped();
2351 		if (usageCount > 0) {
2352 			if (isMapped) {
2353 				set_page_state(page, PAGE_STATE_ACTIVE);
2354 				pagesToActive++;
2355 			} else
2356 				vm_page_requeue(page, true);
2357 		} else if (isMapped) {
2358 			vm_page_requeue(page, true);
2359 		} else if (!page->modified) {
2360 			set_page_state(page, PAGE_STATE_CACHED);
2361 			pagesToFree--;
2362 			pagesToCached++;
2363 		} else if (maxToFlush > 0) {
2364 			set_page_state(page, PAGE_STATE_MODIFIED);
2365 			maxToFlush--;
2366 			pagesToModified++;
2367 		} else
2368 			vm_page_requeue(page, true);
2369 
2370 		DEBUG_PAGE_ACCESS_END(page);
2371 
2372 		cache->ReleaseRefAndUnlock();
2373 
2374 		// remove the marker
2375 		queueLocker.Lock();
2376 		nextPage = queue.Next(&marker);
2377 		queue.Remove(&marker);
2378 	}
2379 
2380 	queueLocker.Unlock();
2381 
2382 	time = system_time() - time;
2383 	TRACE_DAEMON("  -> inactive scan (%7lld us): scanned: %7lu, "
2384 		"moved: %lu -> cached, %lu -> modified, %lu -> active\n", time,
2385 		pagesScanned, pagesToCached, pagesToModified, pagesToActive);
2386 
2387 	// wake up the page writer, if we tossed it some pages
2388 	if (pagesToModified > 0)
2389 		sPageWriterCondition.WakeUp();
2390 }
2391 
2392 
2393 static void
2394 full_scan_active_pages(page_stats& pageStats, int32 despairLevel)
2395 {
2396 	vm_page marker;
2397 	init_page_marker(marker);
2398 
2399 	VMPageQueue& queue = sActivePageQueue;
2400 	InterruptsSpinLocker queueLocker(queue.GetLock());
2401 	uint32 maxToScan = queue.Count();
2402 
2403 	int32 pagesToDeactivate = pageStats.unsatisfiedReservations
2404 		+ sFreeOrCachedPagesTarget
2405 		- (pageStats.totalFreePages + pageStats.cachedPages)
2406 		+ std::max((int32)sInactivePagesTarget - (int32)maxToScan, (int32)0);
2407 	if (pagesToDeactivate <= 0)
2408 		return;
2409 
2410 	bigtime_t time = system_time();
2411 	uint32 pagesAccessed = 0;
2412 	uint32 pagesToInactive = 0;
2413 	uint32 pagesScanned = 0;
2414 
2415 	vm_page* nextPage = queue.Head();
2416 
2417 	while (pagesToDeactivate > 0 && maxToScan > 0) {
2418 		maxToScan--;
2419 
2420 		// get the next page
2421 		vm_page* page = nextPage;
2422 		if (page == NULL)
2423 			break;
2424 		nextPage = queue.Next(page);
2425 
2426 		if (page->busy)
2427 			continue;
2428 
2429 		// mark the position
2430 		queue.InsertAfter(page, &marker);
2431 		queueLocker.Unlock();
2432 
2433 		// lock the page's cache
2434 		VMCache* cache = vm_cache_acquire_locked_page_cache(page, true);
2435 		if (cache == NULL || page->busy || page->State() != PAGE_STATE_ACTIVE) {
2436 			if (cache != NULL)
2437 				cache->ReleaseRefAndUnlock();
2438 			queueLocker.Lock();
2439 			nextPage = queue.Next(&marker);
2440 			queue.Remove(&marker);
2441 			continue;
2442 		}
2443 
2444 		pagesScanned++;
2445 
2446 		DEBUG_PAGE_ACCESS_START(page);
2447 
2448 		// Get the page active/modified flags and update the page's usage count.
2449 		int32 usageCount = vm_clear_page_mapping_accessed_flags(page);
2450 
2451 		if (usageCount > 0) {
2452 			usageCount += page->usage_count + kPageUsageAdvance;
2453 			if (usageCount > kPageUsageMax)
2454 				usageCount = kPageUsageMax;
2455 			pagesAccessed++;
2456 // TODO: This would probably also be the place to reclaim swap space.
2457 		} else {
2458 			usageCount += page->usage_count - (int32)kPageUsageDecline;
2459 			if (usageCount <= 0) {
2460 				usageCount = 0;
2461 				set_page_state(page, PAGE_STATE_INACTIVE);
2462 				pagesToInactive++;
2463 			}
2464 		}
2465 
2466 		page->usage_count = usageCount;
2467 
2468 		DEBUG_PAGE_ACCESS_END(page);
2469 
2470 		cache->ReleaseRefAndUnlock();
2471 
2472 		// remove the marker
2473 		queueLocker.Lock();
2474 		nextPage = queue.Next(&marker);
2475 		queue.Remove(&marker);
2476 	}
2477 
2478 	time = system_time() - time;
2479 	TRACE_DAEMON("  ->   active scan (%7lld us): scanned: %7lu, "
2480 		"moved: %lu -> inactive, encountered %lu accessed ones\n", time,
2481 		pagesScanned, pagesToInactive, pagesAccessed);
2482 }
2483 
2484 
2485 static void
2486 page_daemon_idle_scan(page_stats& pageStats)
2487 {
2488 	TRACE_DAEMON("page daemon: idle run\n");
2489 
2490 	if (pageStats.totalFreePages < (int32)sFreePagesTarget) {
2491 		// We want more actually free pages, so free some from the cached
2492 		// ones.
2493 		uint32 freed = free_cached_pages(
2494 			sFreePagesTarget - pageStats.totalFreePages, false);
2495 		if (freed > 0)
2496 			unreserve_pages(freed);
2497 		get_page_stats(pageStats);
2498 	}
2499 
2500 	// Walk the active list and move pages to the inactive queue.
2501 	get_page_stats(pageStats);
2502 	idle_scan_active_pages(pageStats);
2503 }
2504 
2505 
2506 static void
2507 page_daemon_full_scan(page_stats& pageStats, int32 despairLevel)
2508 {
2509 	TRACE_DAEMON("page daemon: full run: free: %lu, cached: %lu, "
2510 		"to free: %lu\n", pageStats.totalFreePages, pageStats.cachedPages,
2511 		pageStats.unsatisfiedReservations + sFreeOrCachedPagesTarget
2512 			- (pageStats.totalFreePages + pageStats.cachedPages));
2513 
2514 	// Walk the inactive list and transfer pages to the cached and modified
2515 	// queues.
2516 	full_scan_inactive_pages(pageStats, despairLevel);
2517 
2518 	// Free cached pages. Also wake up reservation waiters.
2519 	get_page_stats(pageStats);
2520 	int32 pagesToFree = pageStats.unsatisfiedReservations + sFreePagesTarget
2521 		- (pageStats.totalFreePages);
2522 	if (pagesToFree > 0) {
2523 		uint32 freed = free_cached_pages(pagesToFree, true);
2524 		if (freed > 0)
2525 			unreserve_pages(freed);
2526 	}
2527 
2528 	// Walk the active list and move pages to the inactive queue.
2529 	get_page_stats(pageStats);
2530 	full_scan_active_pages(pageStats, despairLevel);
2531 }
2532 
2533 
2534 static status_t
2535 page_daemon(void* /*unused*/)
2536 {
2537 	int32 despairLevel = 0;
2538 
2539 	while (true) {
2540 		sPageDaemonCondition.ClearActivated();
2541 
2542 		// evaluate the free pages situation
2543 		page_stats pageStats;
2544 		get_page_stats(pageStats);
2545 
2546 		if (!do_active_paging(pageStats)) {
2547 			// Things look good -- just maintain statistics and keep the pool
2548 			// of actually free pages full enough.
2549 			despairLevel = 0;
2550 			page_daemon_idle_scan(pageStats);
2551 			sPageDaemonCondition.Wait(kIdleScanWaitInterval, false);
2552 		} else {
2553 			// Not enough free pages. We need to do some real work.
2554 			despairLevel = std::max(despairLevel + 1, (int32)3);
2555 			page_daemon_full_scan(pageStats, despairLevel);
2556 
2557 			// Don't wait after the first full scan, but rather immediately
2558 			// check whether we were successful in freeing enough pages and
2559 			// re-run with increased despair level. The first scan is
2560 			// conservative with respect to moving inactive modified pages to
2561 			// the modified list to avoid thrashing. The second scan, however,
2562 			// will not hold back.
2563 			if (despairLevel > 1)
2564 				snooze(kBusyScanWaitInterval);
2565 		}
2566 	}
2567 
2568 	return B_OK;
2569 }
2570 
2571 
2572 /*!	Returns how many pages could *not* be reserved.
2573 */
2574 static uint32
2575 reserve_pages(uint32 count, int priority, bool dontWait)
2576 {
2577 	int32 dontTouch = kPageReserveForPriority[priority];
2578 
2579 	while (true) {
2580 		count -= reserve_some_pages(count, dontTouch);
2581 		if (count == 0)
2582 			return 0;
2583 
2584 		if (sUnsatisfiedPageReservations == 0) {
2585 			count -= free_cached_pages(count, dontWait);
2586 			if (count == 0)
2587 				return count;
2588 		}
2589 
2590 		if (dontWait)
2591 			return count;
2592 
2593 		// we need to wait for pages to become available
2594 
2595 		MutexLocker pageDeficitLocker(sPageDeficitLock);
2596 
2597 		bool notifyDaemon = sUnsatisfiedPageReservations == 0;
2598 		sUnsatisfiedPageReservations += count;
2599 
2600 		if (sUnreservedFreePages > dontTouch) {
2601 			// the situation changed
2602 			sUnsatisfiedPageReservations -= count;
2603 			continue;
2604 		}
2605 
2606 		PageReservationWaiter waiter;
2607 		waiter.dontTouch = dontTouch;
2608 		waiter.missing = count;
2609 		waiter.thread = thread_get_current_thread();
2610 		waiter.threadPriority = waiter.thread->priority;
2611 
2612 		// insert ordered (i.e. after all waiters with higher or equal priority)
2613 		PageReservationWaiter* otherWaiter = NULL;
2614 		for (PageReservationWaiterList::Iterator it
2615 				= sPageReservationWaiters.GetIterator();
2616 			(otherWaiter = it.Next()) != NULL;) {
2617 			if (waiter < *otherWaiter)
2618 				break;
2619 		}
2620 
2621 		sPageReservationWaiters.InsertBefore(otherWaiter, &waiter);
2622 
2623 		thread_prepare_to_block(waiter.thread, 0, THREAD_BLOCK_TYPE_OTHER,
2624 			"waiting for pages");
2625 
2626 		if (notifyDaemon)
2627 			sPageDaemonCondition.WakeUp();
2628 
2629 		pageDeficitLocker.Unlock();
2630 
2631 		low_resource(B_KERNEL_RESOURCE_PAGES, count, B_RELATIVE_TIMEOUT, 0);
2632 		thread_block();
2633 
2634 		pageDeficitLocker.Lock();
2635 
2636 		return 0;
2637 	}
2638 }
2639 
2640 
2641 //	#pragma mark - private kernel API
2642 
2643 
2644 /*!	Writes a range of modified pages of a cache to disk.
2645 	You need to hold the VMCache lock when calling this function.
2646 	Note that the cache lock is released in this function.
2647 	\param cache The cache.
2648 	\param firstPage Offset (in page size units) of the first page in the range.
2649 	\param endPage End offset (in page size units) of the page range. The page
2650 		at this offset is not included.
2651 */
2652 status_t
2653 vm_page_write_modified_page_range(struct VMCache* cache, uint32 firstPage,
2654 	uint32 endPage)
2655 {
2656 	static const int32 kMaxPages = 256;
2657 	int32 maxPages = cache->MaxPagesPerWrite();
2658 	if (maxPages < 0 || maxPages > kMaxPages)
2659 		maxPages = kMaxPages;
2660 
2661 	const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
2662 		| HEAP_DONT_LOCK_KERNEL_SPACE;
2663 
2664 	PageWriteWrapper stackWrappers[2];
2665 	PageWriteWrapper* wrapperPool
2666 		= new(malloc_flags(allocationFlags)) PageWriteWrapper[maxPages + 1];
2667 	if (wrapperPool == NULL) {
2668 		// don't fail, just limit our capabilities
2669 		wrapperPool = stackWrappers;
2670 		maxPages = 1;
2671 	}
2672 
2673 	int32 nextWrapper = 0;
2674 
2675 	PageWriteWrapper* wrappers[maxPages];
2676 	int32 usedWrappers = 0;
2677 
2678 	PageWriteTransfer transfer;
2679 	bool transferEmpty = true;
2680 
2681 	VMCachePagesTree::Iterator it
2682 		= cache->pages.GetIterator(firstPage, true, true);
2683 
2684 	while (true) {
2685 		vm_page* page = it.Next();
2686 		if (page == NULL || page->cache_offset >= endPage) {
2687 			if (transferEmpty)
2688 				break;
2689 
2690 			page = NULL;
2691 		}
2692 
2693 		if (page != NULL) {
2694 			if (page->busy
2695 				|| (page->State() != PAGE_STATE_MODIFIED
2696 					&& !vm_test_map_modification(page))) {
2697 				page = NULL;
2698 			}
2699 		}
2700 
2701 		PageWriteWrapper* wrapper = NULL;
2702 		if (page != NULL) {
2703 			wrapper = &wrapperPool[nextWrapper++];
2704 			if (nextWrapper > maxPages)
2705 				nextWrapper = 0;
2706 
2707 			DEBUG_PAGE_ACCESS_START(page);
2708 
2709 			wrapper->SetTo(page);
2710 
2711 			if (transferEmpty || transfer.AddPage(page)) {
2712 				if (transferEmpty) {
2713 					transfer.SetTo(NULL, page, maxPages);
2714 					transferEmpty = false;
2715 				}
2716 
2717 				DEBUG_PAGE_ACCESS_END(page);
2718 
2719 				wrappers[usedWrappers++] = wrapper;
2720 				continue;
2721 			}
2722 
2723 			DEBUG_PAGE_ACCESS_END(page);
2724 		}
2725 
2726 		if (transferEmpty)
2727 			continue;
2728 
2729 		cache->Unlock();
2730 		status_t status = transfer.Schedule(0);
2731 		cache->Lock();
2732 
2733 		for (int32 i = 0; i < usedWrappers; i++)
2734 			wrappers[i]->Done(status);
2735 
2736 		usedWrappers = 0;
2737 
2738 		if (page != NULL) {
2739 			transfer.SetTo(NULL, page, maxPages);
2740 			wrappers[usedWrappers++] = wrapper;
2741 		} else
2742 			transferEmpty = true;
2743 	}
2744 
2745 	if (wrapperPool != stackWrappers)
2746 		delete[] wrapperPool;
2747 
2748 	return B_OK;
2749 }
2750 
2751 
2752 /*!	You need to hold the VMCache lock when calling this function.
2753 	Note that the cache lock is released in this function.
2754 */
2755 status_t
2756 vm_page_write_modified_pages(VMCache *cache)
2757 {
2758 	return vm_page_write_modified_page_range(cache, 0,
2759 		(cache->virtual_end + B_PAGE_SIZE - 1) >> PAGE_SHIFT);
2760 }
2761 
2762 
2763 /*!	Schedules the page writer to write back the specified \a page.
2764 	Note, however, that it might not do this immediately, and it can well
2765 	take several seconds until the page is actually written out.
2766 */
2767 void
2768 vm_page_schedule_write_page(vm_page *page)
2769 {
2770 	PAGE_ASSERT(page, page->State() == PAGE_STATE_MODIFIED);
2771 
2772 	vm_page_requeue(page, false);
2773 
2774 	sPageWriterCondition.WakeUp();
2775 }
2776 
2777 
2778 /*!	Cache must be locked.
2779 */
2780 void
2781 vm_page_schedule_write_page_range(struct VMCache *cache, uint32 firstPage,
2782 	uint32 endPage)
2783 {
2784 	uint32 modified = 0;
2785 	for (VMCachePagesTree::Iterator it
2786 				= cache->pages.GetIterator(firstPage, true, true);
2787 			vm_page *page = it.Next();) {
2788 		if (page->cache_offset >= endPage)
2789 			break;
2790 
2791 		if (!page->busy && page->State() == PAGE_STATE_MODIFIED) {
2792 			DEBUG_PAGE_ACCESS_START(page);
2793 			vm_page_requeue(page, false);
2794 			modified++;
2795 			DEBUG_PAGE_ACCESS_END(page);
2796 		}
2797 	}
2798 
2799 	if (modified > 0)
2800 		sPageWriterCondition.WakeUp();
2801 }
2802 
2803 
2804 void
2805 vm_page_init_num_pages(kernel_args *args)
2806 {
2807 	// calculate the size of memory by looking at the physical_memory_range array
2808 	addr_t physicalPagesEnd = 0;
2809 	sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE;
2810 
2811 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
2812 		physicalPagesEnd = (args->physical_memory_range[i].start
2813 			+ args->physical_memory_range[i].size) / B_PAGE_SIZE;
2814 	}
2815 
2816 	TRACE(("first phys page = 0x%lx, end 0x%lx\n", sPhysicalPageOffset,
2817 		physicalPagesEnd));
2818 
2819 	sNumPages = physicalPagesEnd - sPhysicalPageOffset;
2820 
2821 #ifdef LIMIT_AVAILABLE_MEMORY
2822 	if (sNumPages > LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE))
2823 		sNumPages = LIMIT_AVAILABLE_MEMORY * (1024 * 1024 / B_PAGE_SIZE);
2824 #endif
2825 }
2826 
2827 
2828 status_t
2829 vm_page_init(kernel_args *args)
2830 {
2831 	TRACE(("vm_page_init: entry\n"));
2832 
2833 	// init page queues
2834 	sModifiedPageQueue.Init("modified pages queue");
2835 	sInactivePageQueue.Init("inactive pages queue");
2836 	sActivePageQueue.Init("active pages queue");
2837 	sCachedPageQueue.Init("cached pages queue");
2838 	sFreePageQueue.Init("free pages queue");
2839 	sClearPageQueue.Init("clear pages queue");
2840 
2841 	new (&sPageReservationWaiters) PageReservationWaiterList;
2842 
2843 	// map in the new free page table
2844 	sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page),
2845 		~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, false);
2846 
2847 	TRACE(("vm_init: putting free_page_table @ %p, # ents %ld (size 0x%x)\n",
2848 		sPages, sNumPages, (unsigned int)(sNumPages * sizeof(vm_page))));
2849 
2850 	// initialize the free page table
2851 	for (uint32 i = 0; i < sNumPages; i++) {
2852 		sPages[i].physical_page_number = sPhysicalPageOffset + i;
2853 		sPages[i].InitState(PAGE_STATE_FREE);
2854 		new(&sPages[i].mappings) vm_page_mappings();
2855 		sPages[i].wired_count = 0;
2856 		sPages[i].usage_count = 0;
2857 		sPages[i].busy_writing = false;
2858 		sPages[i].SetCacheRef(NULL);
2859 		#if DEBUG_PAGE_QUEUE
2860 			sPages[i].queue = NULL;
2861 		#endif
2862 		#if DEBUG_PAGE_ACCESS
2863 			sPages[i].accessing_thread = -1;
2864 		#endif
2865 		sFreePageQueue.Append(&sPages[i]);
2866 	}
2867 
2868 	sUnreservedFreePages = sNumPages;
2869 
2870 	TRACE(("initialized table\n"));
2871 
2872 	// mark the ranges between usable physical memory unused
2873 	addr_t previousEnd = 0;
2874 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
2875 		addr_t base = args->physical_memory_range[i].start;
2876 		addr_t size = args->physical_memory_range[i].size;
2877 		if (base > previousEnd) {
2878 			mark_page_range_in_use(previousEnd / B_PAGE_SIZE,
2879 				(base - previousEnd) / B_PAGE_SIZE, false);
2880 		}
2881 		previousEnd = base + size;
2882 	}
2883 
2884 	// mark the allocated physical page ranges wired
2885 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
2886 		mark_page_range_in_use(
2887 			args->physical_allocated_range[i].start / B_PAGE_SIZE,
2888 			args->physical_allocated_range[i].size / B_PAGE_SIZE, true);
2889 	}
2890 
2891 	// The target of actually free pages. This must be at least the system
2892 	// reserve, but should be a few more pages, so we don't have to extract
2893 	// a cached page with each allocation.
2894 	sFreePagesTarget = VM_PAGE_RESERVE_USER
2895 		+ std::max((uint32)32, sNumPages / 1024);
2896 
2897 	// The target of free + cached and inactive pages. On low-memory machines
2898 	// keep things tight. free + cached is the pool of immediately allocatable
2899 	// pages. We want a few inactive pages, so when we're actually paging, we
2900 	// have a reasonably large set of pages to work with.
2901 	if (sUnreservedFreePages < 16 * 1024) {
2902 		sFreeOrCachedPagesTarget = sFreePagesTarget + 128;
2903 		sInactivePagesTarget = sFreePagesTarget / 3;
2904 	} else {
2905 		sFreeOrCachedPagesTarget = 2 * sFreePagesTarget;
2906 		sInactivePagesTarget = sFreePagesTarget / 2;
2907 	}
2908 
2909 	TRACE(("vm_page_init: exit\n"));
2910 
2911 	return B_OK;
2912 }
2913 
2914 
2915 status_t
2916 vm_page_init_post_area(kernel_args *args)
2917 {
2918 	void *dummy;
2919 
2920 	dummy = sPages;
2921 	create_area("page structures", &dummy, B_EXACT_ADDRESS,
2922 		PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED,
2923 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2924 
2925 	add_debugger_command("page_stats", &dump_page_stats,
2926 		"Dump statistics about page usage");
2927 	add_debugger_command_etc("page", &dump_page,
2928 		"Dump page info",
2929 		"[ \"-p\" | \"-v\" ] [ \"-m\" ] <address>\n"
2930 		"Prints information for the physical page. If neither \"-p\" nor\n"
2931 		"\"-v\" are given, the provided address is interpreted as address of\n"
2932 		"the vm_page data structure for the page in question. If \"-p\" is\n"
2933 		"given, the address is the physical address of the page. If \"-v\" is\n"
2934 		"given, the address is interpreted as virtual address in the current\n"
2935 		"thread's address space and for the page it is mapped to (if any)\n"
2936 		"information are printed. If \"-m\" is specified, the command will\n"
2937 		"search all known address spaces for mappings to that page and print\n"
2938 		"them.\n", 0);
2939 	add_debugger_command("page_queue", &dump_page_queue, "Dump page queue");
2940 	add_debugger_command("find_page", &find_page,
2941 		"Find out which queue a page is actually in");
2942 
2943 #ifdef TRACK_PAGE_USAGE_STATS
2944 	add_debugger_command_etc("page_usage", &dump_page_usage_stats,
2945 		"Dumps statistics about page usage counts",
2946 		"\n"
2947 		"Dumps statistics about page usage counts.\n",
2948 		B_KDEBUG_DONT_PARSE_ARGUMENTS);
2949 #endif
2950 
2951 	return B_OK;
2952 }
2953 
2954 
2955 status_t
2956 vm_page_init_post_thread(kernel_args *args)
2957 {
2958 	new (&sFreePageCondition) ConditionVariable;
2959 	sFreePageCondition.Publish(&sFreePageQueue, "free page");
2960 
2961 	// create a kernel thread to clear out pages
2962 
2963 	thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber",
2964 		B_LOWEST_ACTIVE_PRIORITY, NULL);
2965 	resume_thread(thread);
2966 
2967 	// start page writer
2968 
2969 	sPageWriterCondition.Init("page writer");
2970 
2971 	thread = spawn_kernel_thread(&page_writer, "page writer",
2972 		B_NORMAL_PRIORITY + 1, NULL);
2973 	resume_thread(thread);
2974 
2975 	// start page daemon
2976 
2977 	sPageDaemonCondition.Init("page daemon");
2978 
2979 	thread = spawn_kernel_thread(&page_daemon, "page daemon",
2980 		B_NORMAL_PRIORITY, NULL);
2981 	resume_thread(thread);
2982 
2983 	return B_OK;
2984 }
2985 
2986 
2987 status_t
2988 vm_mark_page_inuse(addr_t page)
2989 {
2990 	return vm_mark_page_range_inuse(page, 1);
2991 }
2992 
2993 
2994 status_t
2995 vm_mark_page_range_inuse(addr_t startPage, addr_t length)
2996 {
2997 	return mark_page_range_in_use(startPage, length, false);
2998 }
2999 
3000 
3001 /*!	Unreserve pages previously reserved with vm_page_reserve_pages().
3002 */
3003 void
3004 vm_page_unreserve_pages(vm_page_reservation* reservation)
3005 {
3006 	uint32 count = reservation->count;
3007 	reservation->count = 0;
3008 
3009 	if (count == 0)
3010 		return;
3011 
3012 	TA(UnreservePages(count));
3013 
3014 	unreserve_pages(count);
3015 }
3016 
3017 
3018 /*!	With this call, you can reserve a number of free pages in the system.
3019 	They will only be handed out to someone who has actually reserved them.
3020 	This call returns as soon as the number of requested pages has been
3021 	reached.
3022 	The caller must not hold any cache lock or the function might deadlock.
3023 */
3024 void
3025 vm_page_reserve_pages(vm_page_reservation* reservation, uint32 count,
3026 	int priority)
3027 {
3028 	reservation->count = count;
3029 
3030 	if (count == 0)
3031 		return;
3032 
3033 	TA(ReservePages(count));
3034 
3035 	reserve_pages(count, priority, false);
3036 }
3037 
3038 
3039 bool
3040 vm_page_try_reserve_pages(vm_page_reservation* reservation, uint32 count,
3041 	int priority)
3042 {
3043 	if (count == 0) {
3044 		reservation->count = count;
3045 		return true;
3046 	}
3047 
3048 	uint32 remaining = reserve_pages(count, priority, true);
3049 	if (remaining == 0) {
3050 		TA(ReservePages(count));
3051 		reservation->count = count;
3052 		return true;
3053 	}
3054 
3055 	unreserve_pages(count - remaining);
3056 
3057 	return false;
3058 }
3059 
3060 
3061 vm_page *
3062 vm_page_allocate_page(vm_page_reservation* reservation, uint32 flags)
3063 {
3064 	uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3065 	ASSERT(pageState != PAGE_STATE_FREE);
3066 	ASSERT(pageState != PAGE_STATE_CLEAR);
3067 
3068 	ASSERT(reservation->count > 0);
3069 	reservation->count--;
3070 
3071 	VMPageQueue* queue;
3072 	VMPageQueue* otherQueue;
3073 
3074 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3075 		queue = &sClearPageQueue;
3076 		otherQueue = &sFreePageQueue;
3077 	} else {
3078 		queue = &sFreePageQueue;
3079 		otherQueue = &sClearPageQueue;
3080 	}
3081 
3082 	TA(AllocatePage());
3083 
3084 	ReadLocker locker(sFreePageQueuesLock);
3085 
3086 	vm_page* page = queue->RemoveHeadUnlocked();
3087 	if (page == NULL) {
3088 		// if the primary queue was empty, grab the page from the
3089 		// secondary queue
3090 		page = otherQueue->RemoveHeadUnlocked();
3091 
3092 		if (page == NULL) {
3093 			// Unlikely, but possible: the page we have reserved has moved
3094 			// between the queues after we checked the first queue. Grab the
3095 			// write locker to make sure this doesn't happen again.
3096 			locker.Unlock();
3097 			WriteLocker writeLocker(sFreePageQueuesLock);
3098 
3099 			page = queue->RemoveHead();
3100 			if (page == NULL)
3101 				otherQueue->RemoveHead();
3102 
3103 			if (page == NULL) {
3104 				panic("Had reserved page, but there is none!");
3105 				return NULL;
3106 			}
3107 
3108 			// downgrade to read lock
3109 			locker.Lock();
3110 		}
3111 	}
3112 
3113 	if (page->CacheRef() != NULL)
3114 		panic("supposed to be free page %p has cache\n", page);
3115 
3116 	DEBUG_PAGE_ACCESS_START(page);
3117 
3118 	int oldPageState = page->State();
3119 	page->SetState(pageState);
3120 	page->busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3121 	page->usage_count = 0;
3122 	page->accessed = false;
3123 	page->modified = false;
3124 
3125 	locker.Unlock();
3126 
3127 	if (pageState < PAGE_STATE_FIRST_UNQUEUED)
3128 		sPageQueues[pageState].AppendUnlocked(page);
3129 
3130 	// clear the page, if we had to take it from the free queue and a clear
3131 	// page was requested
3132 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0 && oldPageState != PAGE_STATE_CLEAR)
3133 		clear_page(page);
3134 
3135 	return page;
3136 }
3137 
3138 
3139 static void
3140 allocate_page_run_cleanup(VMPageQueue::PageList& freePages,
3141 	VMPageQueue::PageList& clearPages)
3142 {
3143 	while (vm_page* page = freePages.RemoveHead()) {
3144 		page->busy = false;
3145 		page->SetState(PAGE_STATE_FREE);
3146 		DEBUG_PAGE_ACCESS_END(page);
3147 		sFreePageQueue.PrependUnlocked(page);
3148 	}
3149 
3150 	while (vm_page* page = clearPages.RemoveHead()) {
3151 		page->busy = false;
3152 		page->SetState(PAGE_STATE_CLEAR);
3153 		DEBUG_PAGE_ACCESS_END(page);
3154 		sClearPageQueue.PrependUnlocked(page);
3155 	}
3156 
3157 }
3158 
3159 
3160 /*!	Tries to allocate the a contiguous run of \a length pages starting at
3161 	index \a start.
3162 
3163 	The must have write-locked the free/clear page queues. The function will
3164 	unlock regardless of whether it succeeds or fails.
3165 
3166 	If the function fails, it cleans up after itself, i.e. it will free all
3167 	pages it managed to allocate.
3168 
3169 	\param start The start index (into \c sPages) of the run.
3170 	\param length The number of pages to allocate.
3171 	\param flags Page allocation flags. Encodes the state the function shall
3172 		set the allocated pages to, whether the pages shall be marked busy
3173 		(VM_PAGE_ALLOC_BUSY), and whether the pages shall be cleared
3174 		(VM_PAGE_ALLOC_CLEAR).
3175 	\param freeClearQueueLocker Locked WriteLocker for the free/clear page
3176 		queues in locked state. Will be unlocked by the function.
3177 	\return The index of the first page that could not be allocated. \a length
3178 		is returned when the function was successful.
3179 */
3180 static page_num_t
3181 allocate_page_run(page_num_t start, page_num_t length, uint32 flags,
3182 	WriteLocker& freeClearQueueLocker)
3183 {
3184 	uint32 pageState = flags & VM_PAGE_ALLOC_STATE;
3185 	ASSERT(pageState != PAGE_STATE_FREE);
3186 	ASSERT(pageState != PAGE_STATE_CLEAR);
3187 
3188 	TA(AllocatePageRun(length));
3189 
3190 	// Pull the free/clear pages out of their respective queues. Cached pages
3191 	// are allocated later.
3192 	page_num_t cachedPages = 0;
3193 	VMPageQueue::PageList freePages;
3194 	VMPageQueue::PageList clearPages;
3195 	page_num_t i = 0;
3196 	for (; i < length; i++) {
3197 		bool pageAllocated = true;
3198 		bool noPage = false;
3199 		vm_page& page = sPages[start + i];
3200 		switch (page.State()) {
3201 			case PAGE_STATE_CLEAR:
3202 				DEBUG_PAGE_ACCESS_START(&page);
3203 				sClearPageQueue.Remove(&page);
3204 				clearPages.Add(&page);
3205 				break;
3206 			case PAGE_STATE_FREE:
3207 				DEBUG_PAGE_ACCESS_START(&page);
3208 				sFreePageQueue.Remove(&page);
3209 				freePages.Add(&page);
3210 				break;
3211 			case PAGE_STATE_CACHED:
3212 				// We allocate cached pages later.
3213 				cachedPages++;
3214 				pageAllocated = false;
3215 				break;
3216 
3217 			default:
3218 				// Probably a page was cached when our caller checked. Now it's
3219 				// gone and we have to abort.
3220 				noPage = true;
3221 				break;
3222 		}
3223 
3224 		if (noPage)
3225 			break;
3226 
3227 		if (pageAllocated) {
3228 			page.SetState(flags & VM_PAGE_ALLOC_STATE);
3229 			page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3230 			page.usage_count = 0;
3231 			page.accessed = false;
3232 			page.modified = false;
3233 		}
3234 	}
3235 
3236 	if (i < length) {
3237 		// failed to allocate a page -- free all that we've got
3238 		allocate_page_run_cleanup(freePages, clearPages);
3239 		return i;
3240 	}
3241 
3242 	freeClearQueueLocker.Unlock();
3243 
3244 	if (cachedPages > 0) {
3245 		// allocate the pages that weren't free but cached
3246 		page_num_t freedCachedPages = 0;
3247 		page_num_t nextIndex = start;
3248 		vm_page* freePage = freePages.Head();
3249 		vm_page* clearPage = clearPages.Head();
3250 		while (cachedPages > 0) {
3251 			// skip, if we've already got the page
3252 			if (freePage != NULL && size_t(freePage - sPages) == nextIndex) {
3253 				freePage = freePages.GetNext(freePage);
3254 				nextIndex++;
3255 				continue;
3256 			}
3257 			if (clearPage != NULL && size_t(clearPage - sPages) == nextIndex) {
3258 				clearPage = clearPages.GetNext(clearPage);
3259 				nextIndex++;
3260 				continue;
3261 			}
3262 
3263 			// free the page, if it is still cached
3264 			vm_page& page = sPages[nextIndex];
3265 			if (!free_cached_page(&page, false))
3266 				break;
3267 
3268 			page.SetState(flags & VM_PAGE_ALLOC_STATE);
3269 			page.busy = (flags & VM_PAGE_ALLOC_BUSY) != 0;
3270 			page.usage_count = 0;
3271 			page.accessed = false;
3272 			page.modified = false;
3273 
3274 			freePages.InsertBefore(freePage, &page);
3275 			freedCachedPages++;
3276 			cachedPages--;
3277 			nextIndex++;
3278 		}
3279 
3280 		// If we have freed cached pages, we need to balance things.
3281 		if (freedCachedPages > 0)
3282 			unreserve_pages(freedCachedPages);
3283 
3284 		if (nextIndex - start < length) {
3285 			// failed to allocate all cached pages -- free all that we've got
3286 			freeClearQueueLocker.Lock();
3287 			allocate_page_run_cleanup(freePages, clearPages);
3288 			freeClearQueueLocker.Unlock();
3289 
3290 			return nextIndex - start;
3291 		}
3292 	}
3293 
3294 	// clear pages, if requested
3295 	if ((flags & VM_PAGE_ALLOC_CLEAR) != 0) {
3296 		for (VMPageQueue::PageList::Iterator it = freePages.GetIterator();
3297 				vm_page* page = it.Next();) {
3298  			clear_page(page);
3299 		}
3300 	}
3301 
3302 	// add pages to target queue
3303 	if (pageState < PAGE_STATE_FIRST_UNQUEUED) {
3304 		freePages.MoveFrom(&clearPages);
3305 		sPageQueues[pageState].AppendUnlocked(freePages, length);
3306 	}
3307 
3308 	// Note: We don't unreserve the pages since we pulled them out of the
3309 	// free/clear queues without adjusting sUnreservedFreePages.
3310 
3311 	return length;
3312 }
3313 
3314 
3315 vm_page *
3316 vm_page_allocate_page_run(uint32 flags, addr_t base, size_t length,
3317 	int priority)
3318 {
3319 	uint32 start = base >> PAGE_SHIFT;
3320 
3321 	vm_page_reservation reservation;
3322 	vm_page_reserve_pages(&reservation, length, priority);
3323 
3324 	WriteLocker freeClearQueueLocker(sFreePageQueuesLock);
3325 
3326 	// First we try to get a run with free pages only. If that fails, we also
3327 	// consider cached pages. If there are only few free pages and many cached
3328 	// ones, the odds are that we won't find enough contiguous ones, so we skip
3329 	// the first iteration in this case.
3330 	int32 freePages = sUnreservedFreePages;
3331 	int useCached = freePages > 0 && (page_num_t)freePages > 2 * length ? 0 : 1;
3332 
3333 	for (;;) {
3334 		bool foundRun = true;
3335 		if (start + length > sNumPages) {
3336 			if (useCached == 0) {
3337 				// The first iteration with free pages only was unsuccessful.
3338 				// Try again also considering cached pages.
3339 				useCached = 1;
3340 				start = base >> PAGE_SHIFT;
3341 				continue;
3342 			}
3343 
3344 			dprintf("vm_page_allocate_page_run(): Failed to allocate run of "
3345 				"length %" B_PRIuSIZE " in second iteration!", length);
3346 
3347 			freeClearQueueLocker.Unlock();
3348 			vm_page_unreserve_pages(&reservation);
3349 			return NULL;
3350 		}
3351 
3352 		uint32 i;
3353 		for (i = 0; i < length; i++) {
3354 			uint32 pageState = sPages[start + i].State();
3355 			if (pageState != PAGE_STATE_FREE
3356 				&& pageState != PAGE_STATE_CLEAR
3357 				&& (pageState != PAGE_STATE_CACHED || useCached == 0)) {
3358 				foundRun = false;
3359 				break;
3360 			}
3361 		}
3362 
3363 		if (foundRun) {
3364 			i = allocate_page_run(start, length, flags, freeClearQueueLocker);
3365 			if (i == length)
3366 				return &sPages[start];
3367 
3368 			// apparently a cached page couldn't be allocated -- skip it and
3369 			// continue
3370 			freeClearQueueLocker.Lock();
3371 		}
3372 
3373 		start += i + 1;
3374 	}
3375 }
3376 
3377 
3378 vm_page *
3379 vm_page_at_index(int32 index)
3380 {
3381 	return &sPages[index];
3382 }
3383 
3384 
3385 vm_page *
3386 vm_lookup_page(addr_t pageNumber)
3387 {
3388 	if (pageNumber < sPhysicalPageOffset)
3389 		return NULL;
3390 
3391 	pageNumber -= sPhysicalPageOffset;
3392 	if (pageNumber >= sNumPages)
3393 		return NULL;
3394 
3395 	return &sPages[pageNumber];
3396 }
3397 
3398 
3399 bool
3400 vm_page_is_dummy(struct vm_page *page)
3401 {
3402 	return page < sPages || page >= sPages + sNumPages;
3403 }
3404 
3405 
3406 /*!	Free the page that belonged to a certain cache.
3407 	You can use vm_page_set_state() manually if you prefer, but only
3408 	if the page does not equal PAGE_STATE_MODIFIED.
3409 */
3410 void
3411 vm_page_free(VMCache *cache, vm_page *page)
3412 {
3413 	PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
3414 		&& page->State() != PAGE_STATE_CLEAR);
3415 
3416 	if (page->State() == PAGE_STATE_MODIFIED && cache->temporary)
3417 		atomic_add(&sModifiedTemporaryPages, -1);
3418 
3419 	free_page(page, false);
3420 }
3421 
3422 
3423 void
3424 vm_page_set_state(vm_page *page, int pageState)
3425 {
3426 	PAGE_ASSERT(page, page->State() != PAGE_STATE_FREE
3427 		&& page->State() != PAGE_STATE_CLEAR);
3428 
3429 	if (pageState == PAGE_STATE_FREE || pageState == PAGE_STATE_CLEAR)
3430 		free_page(page, pageState == PAGE_STATE_CLEAR);
3431 	else
3432 		set_page_state(page, pageState);
3433 }
3434 
3435 
3436 /*!	Moves a page to either the tail of the head of its current queue,
3437 	depending on \a tail.
3438 	The page must have a cache and the cache must be locked!
3439 */
3440 void
3441 vm_page_requeue(struct vm_page *page, bool tail)
3442 {
3443 	PAGE_ASSERT(page, page->Cache() != NULL);
3444 	page->Cache()->AssertLocked();
3445 	// DEBUG_PAGE_ACCESS_CHECK(page);
3446 		// TODO: This assertion cannot be satisfied by idle_scan_active_pages()
3447 		// when it requeues busy pages. The reason is that vm_soft_fault()
3448 		// (respectively fault_get_page()) and the file cache keep newly
3449 		// allocated pages accessed while they are reading them from disk. It
3450 		// would probably be better to change that code and reenable this
3451 		// check.
3452 
3453 	VMPageQueue *queue = NULL;
3454 
3455 	switch (page->State()) {
3456 		case PAGE_STATE_ACTIVE:
3457 			queue = &sActivePageQueue;
3458 			break;
3459 		case PAGE_STATE_INACTIVE:
3460 			queue = &sInactivePageQueue;
3461 			break;
3462 		case PAGE_STATE_MODIFIED:
3463 			queue = &sModifiedPageQueue;
3464 			break;
3465 		case PAGE_STATE_CACHED:
3466 			queue = &sCachedPageQueue;
3467 			break;
3468 		case PAGE_STATE_FREE:
3469 		case PAGE_STATE_CLEAR:
3470 			panic("vm_page_requeue() called for free/clear page %p", page);
3471 			return;
3472 		case PAGE_STATE_WIRED:
3473 		case PAGE_STATE_UNUSED:
3474 			return;
3475 		default:
3476 			panic("vm_page_touch: vm_page %p in invalid state %d\n",
3477 				page, page->State());
3478 			break;
3479 	}
3480 
3481 	queue->RequeueUnlocked(page, tail);
3482 }
3483 
3484 
3485 size_t
3486 vm_page_num_pages(void)
3487 {
3488 	return sNumPages;
3489 }
3490 
3491 
3492 /*! There is a subtle distinction between the page counts returned by
3493 	this function and vm_page_num_free_pages():
3494 	The latter returns the number of pages that are completely uncommitted,
3495 	whereas this one returns the number of pages that are available for
3496 	use by being reclaimed as well (IOW it factors in things like cache pages
3497 	as available).
3498 */
3499 size_t
3500 vm_page_num_available_pages(void)
3501 {
3502 	return vm_available_memory() / B_PAGE_SIZE;
3503 }
3504 
3505 
3506 size_t
3507 vm_page_num_free_pages(void)
3508 {
3509 	int32 count = sUnreservedFreePages + sCachedPageQueue.Count();
3510 	return count > 0 ? count : 0;
3511 }
3512 
3513 
3514 size_t
3515 vm_page_num_unused_pages(void)
3516 {
3517 	int32 count = sUnreservedFreePages;
3518 	return count > 0 ? count : 0;
3519 }
3520 
3521 
3522 void
3523 vm_page_get_stats(system_info *info)
3524 {
3525 	// Get free pages count -- not really exact, since we don't know how many
3526 	// of the reserved pages have already been allocated, but good citizens
3527 	// unreserve chunk-wise as they are allocating the pages, if they have
3528 	// reserved a larger quantity.
3529 	int32 free = sUnreservedFreePages;
3530 	if (free < 0)
3531 		free = 0;
3532 
3533 	// The pages used for the block cache buffers. Those should not be counted
3534 	// as used but as cached pages.
3535 	// TODO: We should subtract the blocks that are in use ATM, since those
3536 	// can't really be freed in a low memory situation.
3537 	page_num_t blockCachePages = block_cache_used_memory() / B_PAGE_SIZE;
3538 
3539 	info->max_pages = sNumPages;
3540 	info->used_pages = gMappedPagesCount - blockCachePages;
3541 	info->cached_pages = sNumPages >= (uint32)free + info->used_pages
3542 		? sNumPages - free - info->used_pages : 0;
3543 	info->page_faults = vm_num_page_faults();
3544 
3545 	// TODO: We don't consider pages used for page directories/tables yet.
3546 }
3547