xref: /haiku/src/system/kernel/vm/vm_page.cpp (revision 4b3b81da9e459443d75329cfd08bc9a57ad02653)
1 /*
2  * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 #include <signal.h>
10 #include <string.h>
11 #include <stdlib.h>
12 
13 #include <KernelExport.h>
14 #include <OS.h>
15 
16 #include <arch/cpu.h>
17 #include <arch/vm_translation_map.h>
18 #include <boot/kernel_args.h>
19 #include <condition_variable.h>
20 #include <kernel.h>
21 #include <thread.h>
22 #include <tracing.h>
23 #include <util/AutoLock.h>
24 #include <vm.h>
25 #include <vm_address_space.h>
26 #include <vm_low_memory.h>
27 #include <vm_priv.h>
28 #include <vm_page.h>
29 #include <vm_cache.h>
30 
31 #include "PageCacheLocker.h"
32 
33 
34 //#define TRACE_VM_PAGE
35 #ifdef TRACE_VM_PAGE
36 #	define TRACE(x) dprintf x
37 #else
38 #	define TRACE(x) ;
39 #endif
40 
41 #define SCRUB_SIZE 16
42 	// this many pages will be cleared at once in the page scrubber thread
43 
44 typedef struct page_queue {
45 	vm_page *head;
46 	vm_page *tail;
47 	uint32	count;
48 } page_queue;
49 
50 static page_queue sFreePageQueue;
51 static page_queue sClearPageQueue;
52 static page_queue sModifiedPageQueue;
53 static page_queue sInactivePageQueue;
54 static page_queue sActivePageQueue;
55 
56 static vm_page *sPages;
57 static addr_t sPhysicalPageOffset;
58 static size_t sNumPages;
59 static size_t sReservedPages;
60 static vint32 sPageDeficit;
61 static size_t sModifiedTemporaryPages;
62 
63 static ConditionVariable<page_queue> sFreePageCondition;
64 static spinlock sPageLock;
65 
66 static sem_id sWriterWaitSem;
67 
68 
69 #ifdef PAGE_ALLOCATION_TRACING
70 
71 namespace PageAllocationTracing {
72 
73 class ReservePages : public AbstractTraceEntry {
74 	public:
75 		ReservePages(uint32 count)
76 			:
77 			fCount(count)
78 		{
79 			Initialized();
80 		}
81 
82 		virtual void AddDump(TraceOutput& out)
83 		{
84 			out.Print("page reserve:   %lu", fCount);
85 		}
86 
87 	private:
88 		uint32		fCount;
89 };
90 
91 
92 class UnreservePages : public AbstractTraceEntry {
93 	public:
94 		UnreservePages(uint32 count)
95 			:
96 			fCount(count)
97 		{
98 			Initialized();
99 		}
100 
101 		virtual void AddDump(TraceOutput& out)
102 		{
103 			out.Print("page unreserve: %lu", fCount);
104 		}
105 
106 	private:
107 		uint32		fCount;
108 };
109 
110 
111 class AllocatePage : public AbstractTraceEntry {
112 	public:
113 		AllocatePage(bool reserved)
114 			:
115 			fReserved(reserved)
116 		{
117 			Initialized();
118 		}
119 
120 		virtual void AddDump(TraceOutput& out)
121 		{
122 			out.Print("page alloc");
123 			if (fReserved)
124 				out.Print(" reserved");
125 		}
126 
127 	private:
128 		bool		fReserved;
129 };
130 
131 
132 class AllocatePageRun : public AbstractTraceEntry {
133 	public:
134 		AllocatePageRun(uint32 length)
135 			:
136 			fLength(length)
137 		{
138 			Initialized();
139 		}
140 
141 		virtual void AddDump(TraceOutput& out)
142 		{
143 			out.Print("page alloc run: length: %ld", fLength);
144 		}
145 
146 	private:
147 		uint32		fLength;
148 };
149 
150 
151 class FreePage : public AbstractTraceEntry {
152 	public:
153 		FreePage()
154 		{
155 			Initialized();
156 		}
157 
158 		virtual void AddDump(TraceOutput& out)
159 		{
160 			out.Print("page free");
161 		}
162 };
163 
164 
165 class ScrubbingPages : public AbstractTraceEntry {
166 	public:
167 		ScrubbingPages(uint32 count)
168 			:
169 			fCount(count)
170 		{
171 			Initialized();
172 		}
173 
174 		virtual void AddDump(TraceOutput& out)
175 		{
176 			out.Print("page scrubbing: %lu", fCount);
177 		}
178 
179 	private:
180 		uint32		fCount;
181 };
182 
183 
184 class ScrubbedPages : public AbstractTraceEntry {
185 	public:
186 		ScrubbedPages(uint32 count)
187 			:
188 			fCount(count)
189 		{
190 			Initialized();
191 		}
192 
193 		virtual void AddDump(TraceOutput& out)
194 		{
195 			out.Print("page scrubbed:  %lu", fCount);
196 		}
197 
198 	private:
199 		uint32		fCount;
200 };
201 
202 
203 class StolenPage : public AbstractTraceEntry {
204 	public:
205 		StolenPage()
206 		{
207 			Initialized();
208 		}
209 
210 		virtual void AddDump(TraceOutput& out)
211 		{
212 			out.Print("page stolen");
213 		}
214 };
215 
216 }	// namespace PageAllocationTracing
217 
218 #	define T(x)	new(std::nothrow) PageAllocationTracing::x
219 
220 #else
221 #	define T(x)
222 #endif	// PAGE_ALLOCATION_TRACING
223 
224 
225 /*!	Dequeues a page from the head of the given queue */
226 static vm_page *
227 dequeue_page(page_queue *queue)
228 {
229 	vm_page *page;
230 
231 	page = queue->head;
232 	if (page != NULL) {
233 		if (queue->tail == page)
234 			queue->tail = NULL;
235 		if (page->queue_next != NULL)
236 			page->queue_next->queue_prev = NULL;
237 
238 		queue->head = page->queue_next;
239 		queue->count--;
240 
241 #ifdef DEBUG_PAGE_QUEUE
242 		if (page->queue != queue) {
243 			panic("dequeue_page(queue: %p): page %p thinks it is in queue "
244 				"%p", queue, page, page->queue);
245 		}
246 
247 		page->queue = NULL;
248 #endif	// DEBUG_PAGE_QUEUE
249 	}
250 
251 	return page;
252 }
253 
254 
255 /*!	Enqueues a page to the tail of the given queue */
256 static void
257 enqueue_page(page_queue *queue, vm_page *page)
258 {
259 #ifdef DEBUG_PAGE_QUEUE
260 	if (page->queue != NULL) {
261 		panic("enqueue_page(queue: %p, page: %p): page thinks it is "
262 			"already in queue %p", queue, page, page->queue);
263 	}
264 #endif	// DEBUG_PAGE_QUEUE
265 
266 	if (queue->tail != NULL)
267 		queue->tail->queue_next = page;
268 	page->queue_prev = queue->tail;
269 	queue->tail = page;
270 	page->queue_next = NULL;
271 	if (queue->head == NULL)
272 		queue->head = page;
273 	queue->count++;
274 
275 #ifdef DEBUG_PAGE_QUEUE
276 	page->queue = queue;
277 #endif
278 }
279 
280 
281 /*!	Enqueues a page to the head of the given queue */
282 static void
283 enqueue_page_to_head(page_queue *queue, vm_page *page)
284 {
285 #ifdef DEBUG_PAGE_QUEUE
286 	if (page->queue != NULL) {
287 		panic("enqueue_page_to_head(queue: %p, page: %p): page thinks it is "
288 			"already in queue %p", queue, page, page->queue);
289 	}
290 #endif	// DEBUG_PAGE_QUEUE
291 
292 	if (queue->head != NULL)
293 		queue->head->queue_prev = page;
294 	page->queue_next = queue->head;
295 	queue->head = page;
296 	page->queue_prev = NULL;
297 	if (queue->tail == NULL)
298 		queue->tail = page;
299 	queue->count++;
300 
301 #ifdef DEBUG_PAGE_QUEUE
302 	page->queue = queue;
303 #endif
304 }
305 
306 
307 static void
308 remove_page_from_queue(page_queue *queue, vm_page *page)
309 {
310 #ifdef DEBUG_PAGE_QUEUE
311 	if (page->queue != queue) {
312 		panic("remove_page_from_queue(queue: %p, page: %p): page thinks it "
313 			"is in queue %p", queue, page, page->queue);
314 	}
315 #endif	// DEBUG_PAGE_QUEUE
316 
317 	if (page->queue_next != NULL)
318 		page->queue_next->queue_prev = page->queue_prev;
319 	else
320 		queue->tail = page->queue_prev;
321 
322 	if (page->queue_prev != NULL)
323 		page->queue_prev->queue_next = page->queue_next;
324 	else
325 		queue->head = page->queue_next;
326 
327 	queue->count--;
328 
329 #ifdef DEBUG_PAGE_QUEUE
330 	page->queue = NULL;
331 #endif
332 }
333 
334 
335 /*!	Moves a page to the tail of the given queue, but only does so if
336 	the page is currently in another queue.
337 */
338 static void
339 move_page_to_queue(page_queue *fromQueue, page_queue *toQueue, vm_page *page)
340 {
341 	if (fromQueue != toQueue) {
342 		remove_page_from_queue(fromQueue, page);
343 		enqueue_page(toQueue, page);
344 	}
345 }
346 
347 
348 /*! Inserts \a page after the \a before page in the \a queue. */
349 static void
350 insert_page_after(page_queue *queue, vm_page *before, vm_page *page)
351 {
352 #ifdef DEBUG_PAGE_QUEUE
353 	if (page->queue != NULL) {
354 		panic("enqueue_page(queue: %p, page: %p): page thinks it is "
355 			"already in queue %p", queue, page, page->queue);
356 	}
357 #endif	// DEBUG_PAGE_QUEUE
358 
359 	if (before == NULL) {
360 		enqueue_page(queue, page);
361 		return;
362 	}
363 
364 	page->queue_next = before->queue_next;
365 	if (page->queue_next != NULL)
366 		page->queue_next->queue_prev = page;
367 	page->queue_prev = before;
368 	before->queue_next = page;
369 
370 	if (queue->tail == before)
371 		queue->tail = page;
372 
373 	queue->count++;
374 
375 #ifdef DEBUG_PAGE_QUEUE
376 	page->queue = queue;
377 #endif
378 }
379 
380 
381 static int
382 find_page(int argc, char **argv)
383 {
384 	struct vm_page *page;
385 	addr_t address;
386 	int32 index = 1;
387 	int i;
388 
389 	struct {
390 		const char*	name;
391 		page_queue*	queue;
392 	} pageQueueInfos[] = {
393 		{ "free",		&sFreePageQueue },
394 		{ "clear",		&sClearPageQueue },
395 		{ "modified",	&sModifiedPageQueue },
396 		{ "active",		&sActivePageQueue },
397 		{ NULL, NULL }
398 	};
399 
400 	if (argc < 2
401 		|| strlen(argv[index]) <= 2
402 		|| argv[index][0] != '0'
403 		|| argv[index][1] != 'x') {
404 		kprintf("usage: find_page <address>\n");
405 		return 0;
406 	}
407 
408 	address = strtoul(argv[index], NULL, 0);
409 	page = (vm_page*)address;
410 
411 	for (i = 0; pageQueueInfos[i].name; i++) {
412 		vm_page* p = pageQueueInfos[i].queue->head;
413 		while (p) {
414 			if (p == page) {
415 				kprintf("found page %p in queue %p (%s)\n", page,
416 					pageQueueInfos[i].queue, pageQueueInfos[i].name);
417 				return 0;
418 			}
419 			p = p->queue_next;
420 		}
421 	}
422 
423 	kprintf("page %p isn't in any queue\n", page);
424 
425 	return 0;
426 }
427 
428 
429 const char *
430 page_state_to_string(int state)
431 {
432 	switch(state) {
433 		case PAGE_STATE_ACTIVE:
434 			return "active";
435 		case PAGE_STATE_INACTIVE:
436 			return "inactive";
437 		case PAGE_STATE_BUSY:
438 			return "busy";
439 		case PAGE_STATE_MODIFIED:
440 			return "modified";
441 		case PAGE_STATE_FREE:
442 			return "free";
443 		case PAGE_STATE_CLEAR:
444 			return "clear";
445 		case PAGE_STATE_WIRED:
446 			return "wired";
447 		case PAGE_STATE_UNUSED:
448 			return "unused";
449 		default:
450 			return "unknown";
451 	}
452 }
453 
454 
455 static int
456 dump_page(int argc, char **argv)
457 {
458 	struct vm_page *page;
459 	addr_t address;
460 	bool physical = false;
461 	int32 index = 1;
462 
463 	if (argc > 2) {
464 		if (!strcmp(argv[1], "-p")) {
465 			physical = true;
466 			index++;
467 		} else if (!strcmp(argv[1], "-v"))
468 			index++;
469 	}
470 
471 	if (argc < 2
472 		|| strlen(argv[index]) <= 2
473 		|| argv[index][0] != '0'
474 		|| argv[index][1] != 'x') {
475 		kprintf("usage: page [-p|-v] <address>\n"
476 			"  -v looks up a virtual address for the page, -p a physical address.\n"
477 			"  Default is to look for the page structure address directly.\n");
478 		return 0;
479 	}
480 
481 	address = strtoul(argv[index], NULL, 0);
482 
483 	if (index == 2) {
484 		if (!physical) {
485 			vm_address_space *addressSpace = vm_kernel_address_space();
486 			uint32 flags;
487 
488 			if (thread_get_current_thread()->team->address_space != NULL)
489 				addressSpace = thread_get_current_thread()->team->address_space;
490 
491 			addressSpace->translation_map.ops->query_interrupt(
492 				&addressSpace->translation_map, address, &address, &flags);
493 		}
494 		page = vm_lookup_page(address / B_PAGE_SIZE);
495 	} else
496 		page = (struct vm_page *)address;
497 
498 	kprintf("PAGE: %p\n", page);
499 	kprintf("queue_next,prev: %p, %p\n", page->queue_next, page->queue_prev);
500 	kprintf("hash_next:       %p\n", page->hash_next);
501 	kprintf("physical_number: %lx\n", page->physical_page_number);
502 	kprintf("cache:           %p\n", page->cache);
503 	kprintf("cache_offset:    %ld\n", page->cache_offset);
504 	kprintf("cache_next,prev: %p, %p\n", page->cache_next, page->cache_prev);
505 	kprintf("type:            %d\n", page->type);
506 	kprintf("state:           %s\n", page_state_to_string(page->state));
507 	kprintf("wired_count:     %d\n", page->wired_count);
508 	kprintf("usage_count:     %d\n", page->usage_count);
509 	kprintf("busy_writing:    %d\n", page->busy_writing);
510 	#ifdef DEBUG_PAGE_QUEUE
511 		kprintf("queue:           %p\n", page->queue);
512 	#endif
513 	#ifdef DEBUG_PAGE_CACHE_TRANSITIONS
514 		kprintf("debug_flags:     0x%lx\n", page->debug_flags);
515 		kprintf("collided page:   %p\n", page->collided_page);
516 	#endif	// DEBUG_PAGE_CACHE_TRANSITIONS
517 	kprintf("area mappings:\n");
518 
519 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
520 	vm_page_mapping *mapping;
521 	while ((mapping = iterator.Next()) != NULL) {
522 		kprintf("  %p (%#lx)\n", mapping->area, mapping->area->id);
523 		mapping = mapping->page_link.next;
524 	}
525 
526 	return 0;
527 }
528 
529 
530 static int
531 dump_page_queue(int argc, char **argv)
532 {
533 	struct page_queue *queue;
534 
535 	if (argc < 2) {
536 		kprintf("usage: page_queue <address/name> [list]\n");
537 		return 0;
538 	}
539 
540 	if (strlen(argv[1]) >= 2 && argv[1][0] == '0' && argv[1][1] == 'x')
541 		queue = (struct page_queue *)strtoul(argv[1], NULL, 16);
542 	if (!strcmp(argv[1], "free"))
543 		queue = &sFreePageQueue;
544 	else if (!strcmp(argv[1], "clear"))
545 		queue = &sClearPageQueue;
546 	else if (!strcmp(argv[1], "modified"))
547 		queue = &sModifiedPageQueue;
548 	else if (!strcmp(argv[1], "active"))
549 		queue = &sActivePageQueue;
550 	else if (!strcmp(argv[1], "inactive"))
551 		queue = &sInactivePageQueue;
552 	else {
553 		kprintf("page_queue: unknown queue \"%s\".\n", argv[1]);
554 		return 0;
555 	}
556 
557 	kprintf("queue = %p, queue->head = %p, queue->tail = %p, queue->count = %ld\n",
558 		queue, queue->head, queue->tail, queue->count);
559 
560 	if (argc == 3) {
561 		struct vm_page *page = queue->head;
562 		const char *type = "none";
563 		int i;
564 
565 		if (page->cache != NULL) {
566 			switch (page->cache->type) {
567 				case CACHE_TYPE_RAM:
568 					type = "RAM";
569 					break;
570 				case CACHE_TYPE_DEVICE:
571 					type = "device";
572 					break;
573 				case CACHE_TYPE_VNODE:
574 					type = "vnode";
575 					break;
576 				case CACHE_TYPE_NULL:
577 					type = "null";
578 					break;
579 				default:
580 					type = "???";
581 					break;
582 			}
583 		}
584 
585 		kprintf("page        cache       type       state  wired  usage\n");
586 		for (i = 0; page; i++, page = page->queue_next) {
587 			kprintf("%p  %p  %-7s %8s  %5d  %5d\n", page, page->cache,
588 				type, page_state_to_string(page->state),
589 				page->wired_count, page->usage_count);
590 		}
591 	}
592 	return 0;
593 }
594 
595 
596 static int
597 dump_page_stats(int argc, char **argv)
598 {
599 	uint32 counter[8];
600 	int32 totalActive;
601 	addr_t i;
602 
603 	memset(counter, 0, sizeof(counter));
604 
605 	for (i = 0; i < sNumPages; i++) {
606 		if (sPages[i].state > 7)
607 			panic("page %li at %p has invalid state!\n", i, &sPages[i]);
608 
609 		counter[sPages[i].state]++;
610 	}
611 
612 	kprintf("page stats:\n");
613 	kprintf("active: %lu\ninactive: %lu\nbusy: %lu\nunused: %lu\n",
614 		counter[PAGE_STATE_ACTIVE], counter[PAGE_STATE_INACTIVE],
615 		counter[PAGE_STATE_BUSY], counter[PAGE_STATE_UNUSED]);
616 	kprintf("wired: %lu\nmodified: %lu\nfree: %lu\nclear: %lu\n",
617 		counter[PAGE_STATE_WIRED], counter[PAGE_STATE_MODIFIED],
618 		counter[PAGE_STATE_FREE], counter[PAGE_STATE_CLEAR]);
619 	kprintf("reserved pages: %lu\n", sReservedPages);
620 	kprintf("page deficit: %lu\n", sPageDeficit);
621 
622 	kprintf("\nfree queue: %p, count = %ld\n", &sFreePageQueue,
623 		sFreePageQueue.count);
624 	kprintf("clear queue: %p, count = %ld\n", &sClearPageQueue,
625 		sClearPageQueue.count);
626 	kprintf("modified queue: %p, count = %ld (%ld temporary)\n",
627 		&sModifiedPageQueue, sModifiedPageQueue.count, sModifiedTemporaryPages);
628 	kprintf("active queue: %p, count = %ld\n", &sActivePageQueue,
629 		sActivePageQueue.count);
630 	kprintf("inactive queue: %p, count = %ld\n", &sInactivePageQueue,
631 		sInactivePageQueue.count);
632 	return 0;
633 }
634 
635 
636 static inline size_t
637 free_page_queue_count(void)
638 {
639 	return sFreePageQueue.count + sClearPageQueue.count;
640 }
641 
642 
643 static status_t
644 set_page_state_nolock(vm_page *page, int pageState)
645 {
646 	if (pageState == page->state)
647 		return B_OK;
648 
649 	page_queue *fromQueue = NULL;
650 	page_queue *toQueue = NULL;
651 
652 	switch (page->state) {
653 		case PAGE_STATE_BUSY:
654 		case PAGE_STATE_ACTIVE:
655 		case PAGE_STATE_WIRED:
656 		case PAGE_STATE_UNUSED:
657 			fromQueue = &sActivePageQueue;
658 			break;
659 		case PAGE_STATE_INACTIVE:
660 			fromQueue = &sInactivePageQueue;
661 			break;
662 		case PAGE_STATE_MODIFIED:
663 			fromQueue = &sModifiedPageQueue;
664 			break;
665 		case PAGE_STATE_FREE:
666 			fromQueue = &sFreePageQueue;
667 			break;
668 		case PAGE_STATE_CLEAR:
669 			fromQueue = &sClearPageQueue;
670 			break;
671 		default:
672 			panic("vm_page_set_state: vm_page %p in invalid state %d\n",
673 				page, page->state);
674 			break;
675 	}
676 
677 	if (page->state == PAGE_STATE_CLEAR || page->state == PAGE_STATE_FREE) {
678 		if (page->cache != NULL)
679 			panic("free page %p has cache", page);
680 	}
681 
682 	switch (pageState) {
683 		case PAGE_STATE_BUSY:
684 		case PAGE_STATE_ACTIVE:
685 		case PAGE_STATE_WIRED:
686 		case PAGE_STATE_UNUSED:
687 			toQueue = &sActivePageQueue;
688 			break;
689 		case PAGE_STATE_INACTIVE:
690 			toQueue = &sInactivePageQueue;
691 			break;
692 		case PAGE_STATE_MODIFIED:
693 			toQueue = &sModifiedPageQueue;
694 			break;
695 		case PAGE_STATE_FREE:
696 			toQueue = &sFreePageQueue;
697 			break;
698 		case PAGE_STATE_CLEAR:
699 			toQueue = &sClearPageQueue;
700 			break;
701 		default:
702 			panic("vm_page_set_state: invalid target state %d\n", pageState);
703 	}
704 
705 	if (pageState == PAGE_STATE_CLEAR || pageState == PAGE_STATE_FREE
706 		|| pageState == PAGE_STATE_INACTIVE) {
707 		if (sPageDeficit > 0)
708 			sFreePageCondition.NotifyOne();
709 
710 		if (pageState != PAGE_STATE_INACTIVE && page->cache != NULL)
711 			panic("to be freed page %p has cache", page);
712 	}
713 	if (page->cache != NULL && page->cache->temporary) {
714 		if (pageState == PAGE_STATE_MODIFIED)
715 			sModifiedTemporaryPages++;
716 		else if (page->state == PAGE_STATE_MODIFIED)
717 			sModifiedTemporaryPages--;
718 	}
719 
720 #ifdef PAGE_ALLOCATION_TRACING
721 	if ((pageState == PAGE_STATE_CLEAR || pageState == PAGE_STATE_FREE)
722 		&& page->state != PAGE_STATE_CLEAR && page->state != PAGE_STATE_FREE) {
723 		T(FreePage());
724 	}
725 #endif	// PAGE_ALLOCATION_TRACING
726 
727 	page->state = pageState;
728 	move_page_to_queue(fromQueue, toQueue, page);
729 
730 	return B_OK;
731 }
732 
733 
734 /*! Moves a modified page into either the active or inactive page queue
735 	depending on its usage count and wiring.
736 */
737 static void
738 move_page_to_active_or_inactive_queue(vm_page *page, bool dequeued)
739 {
740 	// Note, this logic must be in sync with what the page daemon does
741 	int32 state;
742 	if (!page->mappings.IsEmpty() || page->usage_count >= 0
743 		|| page->wired_count)
744 		state = PAGE_STATE_ACTIVE;
745 	else
746 		state = PAGE_STATE_INACTIVE;
747 
748 	if (dequeued) {
749 		page->state = state;
750 		enqueue_page(state == PAGE_STATE_ACTIVE
751 			? &sActivePageQueue : &sInactivePageQueue, page);
752 		if (page->cache->temporary)
753 			sModifiedTemporaryPages--;
754 	} else
755 		set_page_state_nolock(page, state);
756 }
757 
758 
759 static void
760 clear_page(struct vm_page *page)
761 {
762 	addr_t virtualAddress;
763 	vm_get_physical_page(page->physical_page_number << PAGE_SHIFT,
764 		&virtualAddress, PHYSICAL_PAGE_CAN_WAIT);
765 
766 	memset((void *)virtualAddress, 0, B_PAGE_SIZE);
767 
768 	vm_put_physical_page(virtualAddress);
769 }
770 
771 
772 /*!
773 	This is a background thread that wakes up every now and then (every 100ms)
774 	and moves some pages from the free queue over to the clear queue.
775 	Given enough time, it will clear out all pages from the free queue - we
776 	could probably slow it down after having reached a certain threshold.
777 */
778 static int32
779 page_scrubber(void *unused)
780 {
781 	(void)(unused);
782 
783 	TRACE(("page_scrubber starting...\n"));
784 
785 	for (;;) {
786 		snooze(100000); // 100ms
787 
788 		if (sFreePageQueue.count > 0) {
789 			cpu_status state;
790 			vm_page *page[SCRUB_SIZE];
791 			int32 i, scrubCount;
792 
793 			// get some pages from the free queue
794 
795 			state = disable_interrupts();
796 			acquire_spinlock(&sPageLock);
797 
798 			// Since we temporarily remove pages from the free pages reserve,
799 			// we must make sure we don't cause a violation of the page
800 			// reservation warranty. The following is usually stricter than
801 			// necessary, because we don't have information on how many of the
802 			// reserved pages have already been allocated.
803 			scrubCount = SCRUB_SIZE;
804 			uint32 freeCount = free_page_queue_count();
805 			if (freeCount < sReservedPages)
806 				scrubCount = 0;
807 			else if ((uint32)scrubCount > freeCount - sReservedPages)
808 				scrubCount = freeCount - sReservedPages;
809 
810 			for (i = 0; i < scrubCount; i++) {
811 				page[i] = dequeue_page(&sFreePageQueue);
812 				if (page[i] == NULL)
813 					break;
814 				page[i]->state = PAGE_STATE_BUSY;
815 			}
816 
817 			scrubCount = i;
818 
819 			if (scrubCount > 0) {
820 				T(ScrubbingPages(scrubCount));
821 			}
822 
823 			release_spinlock(&sPageLock);
824 			restore_interrupts(state);
825 
826 			// clear them
827 
828 			for (i = 0; i < scrubCount; i++) {
829 				clear_page(page[i]);
830 			}
831 
832 			state = disable_interrupts();
833 			acquire_spinlock(&sPageLock);
834 
835 			// and put them into the clear queue
836 
837 			for (i = 0; i < scrubCount; i++) {
838 				page[i]->state = PAGE_STATE_CLEAR;
839 				enqueue_page(&sClearPageQueue, page[i]);
840 			}
841 
842 			if (scrubCount > 0) {
843 				T(ScrubbedPages(scrubCount));
844 			}
845 
846 			release_spinlock(&sPageLock);
847 			restore_interrupts(state);
848 		}
849 	}
850 
851 	return 0;
852 }
853 
854 
855 static status_t
856 write_page(vm_page *page, bool fsReenter)
857 {
858 	vm_store *store = page->cache->store;
859 	size_t length = B_PAGE_SIZE;
860 	status_t status;
861 	iovec vecs[1];
862 
863 	TRACE(("write_page(page = %p): offset = %Ld\n", page, (off_t)page->cache_offset << PAGE_SHIFT));
864 
865 	status = vm_get_physical_page(page->physical_page_number * B_PAGE_SIZE,
866 		(addr_t *)&vecs[0].iov_base, PHYSICAL_PAGE_CAN_WAIT);
867 	if (status < B_OK)
868 		panic("could not map page!");
869 	vecs->iov_len = B_PAGE_SIZE;
870 
871 	status = store->ops->write(store, (off_t)page->cache_offset << PAGE_SHIFT,
872 		vecs, 1, &length, fsReenter);
873 
874 	vm_put_physical_page((addr_t)vecs[0].iov_base);
875 #if 0
876 	if (status < B_OK) {
877 		dprintf("write_page(page = %p): offset = %lx, status = %ld\n",
878 			page, page->cache_offset, status);
879 	}
880 #endif
881 	if (status == B_OK && length == 0)
882 		status = B_ERROR;
883 
884 	return status;
885 }
886 
887 
888 static void
889 remove_page_marker(struct vm_page &marker)
890 {
891 	if (marker.state == PAGE_STATE_UNUSED)
892 		return;
893 
894 	page_queue *queue;
895 	vm_page *page;
896 
897 	switch (marker.state) {
898 		case PAGE_STATE_ACTIVE:
899 			queue = &sActivePageQueue;
900 			break;
901 		case PAGE_STATE_INACTIVE:
902 			queue = &sInactivePageQueue;
903 			break;
904 		case PAGE_STATE_MODIFIED:
905 			queue = &sModifiedPageQueue;
906 			break;
907 
908 		default:
909 			return;
910 	}
911 
912 	remove_page_from_queue(queue, &marker);
913 	marker.state = PAGE_STATE_UNUSED;
914 }
915 
916 
917 static vm_page *
918 next_modified_page(struct vm_page &marker)
919 {
920 	InterruptsSpinLocker locker(sPageLock);
921 	vm_page *page;
922 
923 	if (marker.state == PAGE_STATE_MODIFIED) {
924 		page = marker.queue_next;
925 		remove_page_from_queue(&sModifiedPageQueue, &marker);
926 		marker.state = PAGE_STATE_UNUSED;
927 	} else
928 		page = sModifiedPageQueue.head;
929 
930 	for (; page != NULL; page = page->queue_next) {
931 		if (page->type != PAGE_TYPE_DUMMY && page->state != PAGE_STATE_BUSY) {
932 			// insert marker
933 			marker.state = PAGE_STATE_MODIFIED;
934 			insert_page_after(&sModifiedPageQueue, page, &marker);
935 			return page;
936 		}
937 	}
938 
939 	return NULL;
940 }
941 
942 
943 /*!	The page writer continuously takes some pages from the modified
944 	queue, writes them back, and moves them back to the active queue.
945 	It runs in its own thread, and is only there to keep the number
946 	of modified pages low, so that more pages can be reused with
947 	fewer costs.
948 */
949 status_t
950 page_writer(void* /*unused*/)
951 {
952 	vm_page marker;
953 	marker.type = PAGE_TYPE_DUMMY;
954 	marker.cache = NULL;
955 	marker.state = PAGE_STATE_UNUSED;
956 
957 	while (true) {
958 		if (sModifiedPageQueue.count - sModifiedTemporaryPages < 1024) {
959 			int32 count = 0;
960 			get_sem_count(sWriterWaitSem, &count);
961 			if (count == 0)
962 				count = 1;
963 
964 			acquire_sem_etc(sWriterWaitSem, count, B_RELATIVE_TIMEOUT, 3000000);
965 				// all 3 seconds when no one triggers us
966 		}
967 
968 		const uint32 kNumPages = 32;
969 		ConditionVariable<vm_page> busyConditions[kNumPages];
970 		union {
971 			vm_page *pages[kNumPages];
972 			vm_cache *caches[kNumPages];
973 		} u;
974 		uint32 numPages = 0;
975 
976 		// TODO: once the I/O scheduler is there, we should write
977 		// a lot more pages back.
978 		// TODO: make this laptop friendly, too (ie. only start doing
979 		// something if someone else did something or there is really
980 		// enough to do).
981 
982 		// collect pages to be written
983 
984 		while (numPages < kNumPages) {
985 			vm_page *page = next_modified_page(marker);
986 			if (page == NULL)
987 				break;
988 
989 			PageCacheLocker cacheLocker(page, false);
990 			if (!cacheLocker.IsLocked())
991 				continue;
992 
993 			vm_cache *cache = page->cache;
994 			// TODO: write back temporary ones as soon as we have swap file support
995 			if (cache->temporary/* && vm_low_memory_state() == B_NO_LOW_MEMORY*/)
996 				continue;
997 
998 			if (cache->store->ops->acquire_unreferenced_ref != NULL) {
999 				// we need our own reference to the store, as it might
1000 				// currently be destructed
1001 				if (cache->store->ops->acquire_unreferenced_ref(cache->store)
1002 						!= B_OK) {
1003 					cacheLocker.Unlock();
1004 					thread_yield(true);
1005 					continue;
1006 				}
1007 			}
1008 
1009 			InterruptsSpinLocker locker(sPageLock);
1010 
1011 			// state might have change while we were locking the cache
1012 			if (page->state != PAGE_STATE_MODIFIED)
1013 				continue;
1014 
1015 			remove_page_from_queue(&sModifiedPageQueue, page);
1016 			page->state = PAGE_STATE_BUSY;
1017 			page->busy_writing = true;
1018 
1019 			busyConditions[numPages].Publish(page, "page");
1020 
1021 			locker.Unlock();
1022 
1023 			//dprintf("write page %p, cache %p (%ld)\n", page, page->cache, page->cache->ref_count);
1024 			vm_clear_map_flags(page, PAGE_MODIFIED);
1025 			vm_cache_acquire_ref(cache);
1026 			u.pages[numPages++] = page;
1027 		}
1028 
1029 		if (numPages == 0)
1030 			continue;
1031 
1032 		// write pages to disk
1033 
1034 		// TODO: put this as requests into the I/O scheduler
1035 		status_t writeStatus[kNumPages];
1036 		for (uint32 i = 0; i < numPages; i++) {
1037 			writeStatus[i] = write_page(u.pages[i], false);
1038 		}
1039 
1040 		// mark pages depending on whether they could be written or not
1041 
1042 		for (uint32 i = 0; i < numPages; i++) {
1043 			vm_cache *cache = u.pages[i]->cache;
1044 			mutex_lock(&cache->lock);
1045 
1046 			if (writeStatus[i] == B_OK) {
1047 				// put it into the active queue
1048 				InterruptsSpinLocker locker(sPageLock);
1049 				move_page_to_active_or_inactive_queue(u.pages[i], true);
1050 				u.pages[i]->busy_writing = false;
1051 			} else {
1052 				// We don't have to put the PAGE_MODIFIED bit back, as it's
1053 				// still in the modified pages list.
1054 				{
1055 					InterruptsSpinLocker locker(sPageLock);
1056 					u.pages[i]->state = PAGE_STATE_MODIFIED;
1057 					enqueue_page(&sModifiedPageQueue, u.pages[i]);
1058 				}
1059 				if (!u.pages[i]->busy_writing) {
1060 					// someone has cleared the busy_writing flag which tells
1061 					// us our page has gone invalid
1062 					vm_cache_remove_page(cache, u.pages[i]);
1063 				} else
1064 					u.pages[i]->busy_writing = false;
1065 			}
1066 
1067 			busyConditions[i].Unpublish();
1068 
1069 			u.caches[i] = cache;
1070 			mutex_unlock(&cache->lock);
1071 		}
1072 
1073 		for (uint32 i = 0; i < numPages; i++) {
1074 			vm_cache *cache = u.caches[i];
1075 
1076 			// We release the cache references after all pages were made
1077 			// unbusy again - otherwise releasing a vnode could deadlock.
1078 			if (cache->store->ops->release_ref != NULL)
1079 				cache->store->ops->release_ref(cache->store);
1080 			vm_cache_release_ref(cache);
1081 		}
1082 	}
1083 
1084 	remove_page_marker(marker);
1085 	return B_OK;
1086 }
1087 
1088 
1089 static vm_page *
1090 find_page_candidate(struct vm_page &marker, bool stealActive)
1091 {
1092 	InterruptsSpinLocker locker(sPageLock);
1093 	page_queue *queue;
1094 	vm_page *page;
1095 
1096 	switch (marker.state) {
1097 		case PAGE_STATE_ACTIVE:
1098 			queue = &sActivePageQueue;
1099 			page = marker.queue_next;
1100 			remove_page_from_queue(queue, &marker);
1101 			marker.state = PAGE_STATE_UNUSED;
1102 			break;
1103 		case PAGE_STATE_INACTIVE:
1104 			queue = &sInactivePageQueue;
1105 			page = marker.queue_next;
1106 			remove_page_from_queue(queue, &marker);
1107 			marker.state = PAGE_STATE_UNUSED;
1108 			break;
1109 		default:
1110 			queue = &sInactivePageQueue;
1111 			page = sInactivePageQueue.head;
1112 			if (page == NULL && stealActive) {
1113 				queue = &sActivePageQueue;
1114 				page = sActivePageQueue.head;
1115 			}
1116 			break;
1117 	}
1118 
1119 	while (page != NULL) {
1120 		if (page->type != PAGE_TYPE_DUMMY
1121 			&& (page->state == PAGE_STATE_INACTIVE
1122 				|| (stealActive && page->state == PAGE_STATE_ACTIVE
1123 					&& page->wired_count == 0))) {
1124 			// insert marker
1125 			marker.state = queue == &sActivePageQueue ? PAGE_STATE_ACTIVE : PAGE_STATE_INACTIVE;
1126 			insert_page_after(queue, page, &marker);
1127 			return page;
1128 		}
1129 
1130 		page = page->queue_next;
1131 		if (page == NULL && stealActive && queue != &sActivePageQueue) {
1132 			queue = &sActivePageQueue;
1133 			page = sActivePageQueue.head;
1134 		}
1135 	}
1136 
1137 	return NULL;
1138 }
1139 
1140 
1141 static bool
1142 steal_page(vm_page *page, bool stealActive)
1143 {
1144 	// try to lock the page's cache
1145 
1146 	class PageCacheTryLocker {
1147 	public:
1148 		PageCacheTryLocker(vm_page *page)
1149 			:
1150 			fIsLocked(false),
1151 			fOwnsLock(false)
1152 		{
1153 			fCache = vm_cache_acquire_page_cache_ref(page);
1154 			if (fCache != NULL) {
1155 				if (fCache->lock.holder != thread_get_current_thread_id()) {
1156 					if (mutex_trylock(&fCache->lock) != B_OK)
1157 						return;
1158 
1159 					fOwnsLock = true;
1160 				}
1161 
1162 				if (fCache == page->cache)
1163 					fIsLocked = true;
1164 			}
1165 		}
1166 
1167 		~PageCacheTryLocker()
1168 		{
1169 			if (fOwnsLock)
1170 				mutex_unlock(&fCache->lock);
1171 			if (fCache != NULL)
1172 				vm_cache_release_ref(fCache);
1173 		}
1174 
1175 		bool IsLocked() { return fIsLocked; }
1176 
1177 	private:
1178 		vm_cache *fCache;
1179 		bool fIsLocked;
1180 		bool fOwnsLock;
1181 	} cacheLocker(page);
1182 
1183 	if (!cacheLocker.IsLocked())
1184 		return false;
1185 
1186 	// check again if that page is still a candidate
1187 	if (page->state != PAGE_STATE_INACTIVE
1188 		&& (!stealActive || page->state != PAGE_STATE_ACTIVE
1189 			|| page->wired_count != 0))
1190 		return false;
1191 
1192 	// recheck eventual last minute changes
1193 	uint32 flags;
1194 	vm_remove_all_page_mappings(page, &flags);
1195 	if ((flags & PAGE_MODIFIED) != 0) {
1196 		// page was modified, don't steal it
1197 		vm_page_set_state(page, PAGE_STATE_MODIFIED);
1198 		return false;
1199 	} else if ((flags & PAGE_ACCESSED) != 0) {
1200 		// page is in active use, don't steal it
1201 		vm_page_set_state(page, PAGE_STATE_ACTIVE);
1202 		return false;
1203 	}
1204 
1205 	// we can now steal this page
1206 
1207 	//dprintf("  steal page %p from cache %p%s\n", page, page->cache,
1208 	//	page->state == PAGE_STATE_INACTIVE ? "" : " (ACTIVE)");
1209 
1210 	vm_cache_remove_page(page->cache, page);
1211 	remove_page_from_queue(page->state == PAGE_STATE_ACTIVE
1212 		? &sActivePageQueue : &sInactivePageQueue, page);
1213 	return true;
1214 }
1215 
1216 
1217 static size_t
1218 steal_pages(vm_page **pages, size_t count, bool reserve)
1219 {
1220 	size_t maxCount = count;
1221 
1222 	while (true) {
1223 		vm_page marker;
1224 		marker.type = PAGE_TYPE_DUMMY;
1225 		marker.cache = NULL;
1226 		marker.state = PAGE_STATE_UNUSED;
1227 
1228 		bool tried = false;
1229 		size_t stolen = 0;
1230 
1231 		while (count > 0) {
1232 			vm_page *page = find_page_candidate(marker, false);
1233 			if (page == NULL)
1234 				break;
1235 
1236 			if (steal_page(page, false)) {
1237 				if (reserve || stolen >= maxCount) {
1238 					InterruptsSpinLocker _(sPageLock);
1239 					enqueue_page(&sFreePageQueue, page);
1240 					page->state = PAGE_STATE_FREE;
1241 
1242 					T(StolenPage());
1243 				} else if (stolen < maxCount) {
1244 					pages[stolen] = page;
1245 				}
1246 				stolen++;
1247 				count--;
1248 			} else
1249 				tried = true;
1250 		}
1251 
1252 		InterruptsSpinLocker locker(sPageLock);
1253 		remove_page_marker(marker);
1254 
1255 		if (reserve && sReservedPages <= free_page_queue_count()
1256 			|| count == 0
1257 			|| !reserve && (sInactivePageQueue.count > 0
1258 				|| free_page_queue_count() > sReservedPages))
1259 			return stolen;
1260 
1261 		if (stolen && !tried && sInactivePageQueue.count > 0) {
1262 			count++;
1263 			continue;
1264 		}
1265 		if (tried) {
1266 			// We tried all potential pages, but one or more couldn't be stolen
1267 			// at that time (likely because their cache was locked). No one
1268 			// else will have any better luck, so we'll just retry a little
1269 			// later.
1270 			// TODO: Think about better strategies. E.g. if our condition
1271 			// variables had timeouts, we could just wait with timeout on
1272 			// the free page queue condition variable, which could might
1273 			// succeed earlier.
1274 			locker.Unlock();
1275 			snooze(10000);
1276 			continue;
1277 		}
1278 
1279 		// we need to wait for pages to become inactive
1280 
1281 		ConditionVariableEntry<page_queue> freeConditionEntry;
1282 		sPageDeficit++;
1283 		freeConditionEntry.Add(&sFreePageQueue);
1284 		locker.Unlock();
1285 
1286 		vm_low_memory(count);
1287 		//snooze(50000);
1288 			// sleep for 50ms
1289 
1290 		freeConditionEntry.Wait();
1291 
1292 		locker.Lock();
1293 		sPageDeficit--;
1294 
1295 		if (reserve && sReservedPages <= free_page_queue_count())
1296 			return stolen;
1297 	}
1298 }
1299 
1300 
1301 //	#pragma mark - private kernel API
1302 
1303 
1304 /*!	You need to hold the vm_cache lock when calling this function.
1305 	Note that the cache lock is released in this function.
1306 */
1307 status_t
1308 vm_page_write_modified_pages(vm_cache *cache, bool fsReenter)
1309 {
1310 	// ToDo: join adjacent pages into one vec list
1311 
1312 	for (vm_page *page = cache->page_list; page; page = page->cache_next) {
1313 		bool dequeuedPage = false;
1314 
1315 		if (page->state == PAGE_STATE_MODIFIED) {
1316 			InterruptsSpinLocker locker(&sPageLock);
1317 			remove_page_from_queue(&sModifiedPageQueue, page);
1318 			dequeuedPage = true;
1319 		} else if (page->state == PAGE_STATE_BUSY
1320 				|| !vm_test_map_modification(page)) {
1321 			continue;
1322 		}
1323 
1324 		page->state = PAGE_STATE_BUSY;
1325 		page->busy_writing = true;
1326 
1327 		ConditionVariable<vm_page> busyCondition;
1328 		busyCondition.Publish(page, "page");
1329 
1330 		// We have a modified page - however, while we're writing it back,
1331 		// the page is still mapped. In order not to lose any changes to the
1332 		// page, we mark it clean before actually writing it back; if writing
1333 		// the page fails for some reason, we just keep it in the modified page
1334 		// list, but that should happen only rarely.
1335 
1336 		// If the page is changed after we cleared the dirty flag, but before we
1337 		// had the chance to write it back, then we'll write it again later -
1338 		// that will probably not happen that often, though.
1339 
1340 		// clear the modified flag
1341 		vm_clear_map_flags(page, PAGE_MODIFIED);
1342 
1343 		mutex_unlock(&cache->lock);
1344 		status_t status = write_page(page, fsReenter);
1345 		mutex_lock(&cache->lock);
1346 
1347 		InterruptsSpinLocker locker(&sPageLock);
1348 
1349 		if (status == B_OK) {
1350 			// put it into the active/inactive queue
1351 			move_page_to_active_or_inactive_queue(page, dequeuedPage);
1352 			page->busy_writing = false;
1353 		} else {
1354 			// We don't have to put the PAGE_MODIFIED bit back, as it's still
1355 			// in the modified pages list.
1356 			if (dequeuedPage) {
1357 				page->state = PAGE_STATE_MODIFIED;
1358 				enqueue_page(&sModifiedPageQueue, page);
1359 			}
1360 
1361 			if (!page->busy_writing) {
1362 				// someone has cleared the busy_writing flag which tells
1363 				// us our page has gone invalid
1364 				vm_cache_remove_page(cache, page);
1365 			} else {
1366 				if (!dequeuedPage)
1367 					set_page_state_nolock(page, PAGE_STATE_MODIFIED);
1368 
1369 				page->busy_writing = false;
1370 			}
1371 		}
1372 
1373 		busyCondition.Unpublish();
1374 	}
1375 
1376 	return B_OK;
1377 }
1378 
1379 
1380 /*!	Schedules the page writer to write back the specified \a page.
1381 	Note, however, that it might not do this immediately, and it can well
1382 	take several seconds until the page is actually written out.
1383 */
1384 void
1385 vm_page_schedule_write_page(vm_page *page)
1386 {
1387 	ASSERT(page->state == PAGE_STATE_MODIFIED);
1388 
1389 	vm_page_requeue(page, false);
1390 
1391 	release_sem_etc(sWriterWaitSem, 1, B_DO_NOT_RESCHEDULE);
1392 }
1393 
1394 
1395 void
1396 vm_page_init_num_pages(kernel_args *args)
1397 {
1398 	uint32 i;
1399 
1400 	// calculate the size of memory by looking at the physical_memory_range array
1401 	addr_t physicalPagesEnd = 0;
1402 	sPhysicalPageOffset = args->physical_memory_range[0].start / B_PAGE_SIZE;
1403 
1404 	for (i = 0; i < args->num_physical_memory_ranges; i++) {
1405 		physicalPagesEnd = (args->physical_memory_range[i].start
1406 			+ args->physical_memory_range[i].size) / B_PAGE_SIZE;
1407 	}
1408 
1409 	TRACE(("first phys page = 0x%lx, end 0x%x\n", sPhysicalPageOffset,
1410 		physicalPagesEnd));
1411 
1412 	sNumPages = physicalPagesEnd - sPhysicalPageOffset;
1413 }
1414 
1415 
1416 status_t
1417 vm_page_init(kernel_args *args)
1418 {
1419 	TRACE(("vm_page_init: entry\n"));
1420 
1421 	// map in the new free page table
1422 	sPages = (vm_page *)vm_allocate_early(args, sNumPages * sizeof(vm_page),
1423 		~0L, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1424 
1425 	TRACE(("vm_init: putting free_page_table @ %p, # ents %d (size 0x%x)\n",
1426 		sPages, sNumPages, (unsigned int)(sNumPages * sizeof(vm_page))));
1427 
1428 	// initialize the free page table
1429 	for (uint32 i = 0; i < sNumPages; i++) {
1430 		sPages[i].physical_page_number = sPhysicalPageOffset + i;
1431 		sPages[i].type = PAGE_TYPE_PHYSICAL;
1432 		sPages[i].state = PAGE_STATE_FREE;
1433 		new(&sPages[i].mappings) vm_page_mappings();
1434 		sPages[i].wired_count = 0;
1435 		sPages[i].usage_count = 0;
1436 		sPages[i].busy_writing = false;
1437 		sPages[i].cache = NULL;
1438 		#ifdef DEBUG_PAGE_QUEUE
1439 			sPages[i].queue = NULL;
1440 		#endif
1441 		#ifdef DEBUG_PAGE_CACHE_TRANSITIONS
1442 			sPages[i].debug_flags = 0;
1443 			sPages[i].collided_page = NULL;
1444 		#endif	// DEBUG_PAGE_CACHE_TRANSITIONS
1445 		enqueue_page(&sFreePageQueue, &sPages[i]);
1446 	}
1447 
1448 	TRACE(("initialized table\n"));
1449 
1450 	// mark some of the page ranges inuse
1451 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
1452 		vm_mark_page_range_inuse(args->physical_allocated_range[i].start / B_PAGE_SIZE,
1453 			args->physical_allocated_range[i].size / B_PAGE_SIZE);
1454 	}
1455 
1456 	TRACE(("vm_page_init: exit\n"));
1457 
1458 	return B_OK;
1459 }
1460 
1461 
1462 status_t
1463 vm_page_init_post_area(kernel_args *args)
1464 {
1465 	void *dummy;
1466 
1467 	dummy = sPages;
1468 	create_area("page structures", &dummy, B_EXACT_ADDRESS,
1469 		PAGE_ALIGN(sNumPages * sizeof(vm_page)), B_ALREADY_WIRED,
1470 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1471 
1472 	add_debugger_command("page_stats", &dump_page_stats, "Dump statistics about page usage");
1473 	add_debugger_command("page", &dump_page, "Dump page info");
1474 	add_debugger_command("page_queue", &dump_page_queue, "Dump page queue");
1475 	add_debugger_command("find_page", &find_page,
1476 		"Find out which queue a page is actually in");
1477 
1478 	return B_OK;
1479 }
1480 
1481 
1482 status_t
1483 vm_page_init_post_thread(kernel_args *args)
1484 {
1485 	new (&sFreePageCondition) ConditionVariable<page_queue>;
1486 	sFreePageCondition.Publish(&sFreePageQueue, "free page");
1487 
1488 	// create a kernel thread to clear out pages
1489 
1490 	thread_id thread = spawn_kernel_thread(&page_scrubber, "page scrubber",
1491 		B_LOWEST_ACTIVE_PRIORITY, NULL);
1492 	send_signal_etc(thread, SIGCONT, B_DO_NOT_RESCHEDULE);
1493 
1494 	// start page writer
1495 
1496 	sWriterWaitSem = create_sem(0, "page writer");
1497 
1498 	thread = spawn_kernel_thread(&page_writer, "page writer",
1499 		B_NORMAL_PRIORITY + 1, NULL);
1500 	send_signal_etc(thread, SIGCONT, B_DO_NOT_RESCHEDULE);
1501 
1502 	return B_OK;
1503 }
1504 
1505 
1506 status_t
1507 vm_mark_page_inuse(addr_t page)
1508 {
1509 	return vm_mark_page_range_inuse(page, 1);
1510 }
1511 
1512 
1513 status_t
1514 vm_mark_page_range_inuse(addr_t startPage, addr_t length)
1515 {
1516 	TRACE(("vm_mark_page_range_inuse: start 0x%lx, len 0x%lx\n",
1517 		startPage, length));
1518 
1519 	if (sPhysicalPageOffset > startPage) {
1520 		TRACE(("vm_mark_page_range_inuse: start page %ld is before free list\n",
1521 			startPage));
1522 		return B_BAD_VALUE;
1523 	}
1524 	startPage -= sPhysicalPageOffset;
1525 	if (startPage + length > sNumPages) {
1526 		TRACE(("vm_mark_page_range_inuse: range would extend past free list\n"));
1527 		return B_BAD_VALUE;
1528 	}
1529 
1530 	cpu_status state = disable_interrupts();
1531 	acquire_spinlock(&sPageLock);
1532 
1533 	for (addr_t i = 0; i < length; i++) {
1534 		vm_page *page = &sPages[startPage + i];
1535 		switch (page->state) {
1536 			case PAGE_STATE_FREE:
1537 			case PAGE_STATE_CLEAR:
1538 				set_page_state_nolock(page, PAGE_STATE_UNUSED);
1539 				break;
1540 			case PAGE_STATE_WIRED:
1541 				break;
1542 			case PAGE_STATE_ACTIVE:
1543 			case PAGE_STATE_INACTIVE:
1544 			case PAGE_STATE_BUSY:
1545 			case PAGE_STATE_MODIFIED:
1546 			case PAGE_STATE_UNUSED:
1547 			default:
1548 				// uh
1549 				dprintf("vm_mark_page_range_inuse: page 0x%lx in non-free state %d!\n",
1550 					startPage + i, page->state);
1551 				break;
1552 		}
1553 	}
1554 
1555 	release_spinlock(&sPageLock);
1556 	restore_interrupts(state);
1557 
1558 	return B_OK;
1559 }
1560 
1561 
1562 /*!	Unreserve pages previously reserved with vm_page_reserve_pages().
1563 	Note, you specify the same \a count here that you specified when
1564 	reserving the pages - you don't need to keep track how many pages
1565 	you actually needed of that upfront allocation.
1566 */
1567 void
1568 vm_page_unreserve_pages(uint32 count)
1569 {
1570 	if (count == 0)
1571 		return;
1572 
1573 	InterruptsSpinLocker locker(sPageLock);
1574 	ASSERT(sReservedPages >= count);
1575 
1576 	T(UnreservePages(count));
1577 
1578 	sReservedPages -= count;
1579 
1580 	if (sPageDeficit > 0)
1581 		sFreePageCondition.NotifyAll();
1582 }
1583 
1584 
1585 /*!	With this call, you can reserve a number of free pages in the system.
1586 	They will only be handed out to someone who has actually reserved them.
1587 	This call returns as soon as the number of requested pages has been
1588 	reached.
1589 */
1590 void
1591 vm_page_reserve_pages(uint32 count)
1592 {
1593 	if (count == 0)
1594 		return;
1595 
1596 	InterruptsSpinLocker locker(sPageLock);
1597 
1598 	T(ReservePages(count));
1599 
1600 	sReservedPages += count;
1601 	size_t freePages = free_page_queue_count();
1602 	if (sReservedPages <= freePages)
1603 		return;
1604 
1605 	locker.Unlock();
1606 
1607 	steal_pages(NULL, count + 1, true);
1608 		// we get one more, just in case we can do something someone
1609 		// else can't
1610 }
1611 
1612 
1613 vm_page *
1614 vm_page_allocate_page(int pageState, bool reserved)
1615 {
1616 	ConditionVariableEntry<page_queue> freeConditionEntry;
1617 	page_queue *queue;
1618 	page_queue *otherQueue;
1619 
1620 	switch (pageState) {
1621 		case PAGE_STATE_FREE:
1622 			queue = &sFreePageQueue;
1623 			otherQueue = &sClearPageQueue;
1624 			break;
1625 		case PAGE_STATE_CLEAR:
1626 			queue = &sClearPageQueue;
1627 			otherQueue = &sFreePageQueue;
1628 			break;
1629 		default:
1630 			return NULL; // invalid
1631 	}
1632 
1633 	InterruptsSpinLocker locker(sPageLock);
1634 
1635 	T(AllocatePage(reserved));
1636 
1637 	vm_page *page = NULL;
1638 	while (true) {
1639 		if (reserved || sReservedPages < free_page_queue_count()) {
1640 			page = dequeue_page(queue);
1641 			if (page == NULL) {
1642 #ifdef DEBUG
1643 				if (queue->count != 0)
1644 					panic("queue %p corrupted, count = %d\n", queue, queue->count);
1645 #endif
1646 
1647 				// if the primary queue was empty, grap the page from the
1648 				// secondary queue
1649 				page = dequeue_page(otherQueue);
1650 			}
1651 		}
1652 
1653 		if (page != NULL)
1654 			break;
1655 
1656 		if (reserved)
1657 			panic("Had reserved page, but there is none!");
1658 
1659 		// steal one from the inactive list
1660 		locker.Unlock();
1661 		size_t stolen = steal_pages(&page, 1, false);
1662 		locker.Lock();
1663 
1664 		if (stolen > 0)
1665 			break;
1666 	}
1667 
1668 	if (page->cache != NULL)
1669 		panic("supposed to be free page %p has cache\n", page);
1670 
1671 	int oldPageState = page->state;
1672 	page->state = PAGE_STATE_BUSY;
1673 	page->usage_count = 2;
1674 
1675 	enqueue_page(&sActivePageQueue, page);
1676 
1677 	locker.Unlock();
1678 
1679 	// if needed take the page from the free queue and zero it out
1680 	if (pageState == PAGE_STATE_CLEAR && oldPageState != PAGE_STATE_CLEAR)
1681 		clear_page(page);
1682 
1683 	return page;
1684 }
1685 
1686 
1687 /*!	Allocates a number of pages and puts their pointers into the provided
1688 	array. All pages are marked busy.
1689 	Returns B_OK on success, and B_NO_MEMORY when there aren't any free
1690 	pages left to allocate.
1691 */
1692 status_t
1693 vm_page_allocate_pages(int pageState, vm_page **pages, uint32 numPages)
1694 {
1695 	uint32 i;
1696 
1697 	for (i = 0; i < numPages; i++) {
1698 		pages[i] = vm_page_allocate_page(pageState, false);
1699 		if (pages[i] == NULL) {
1700 			// allocation failed, we need to free what we already have
1701 			while (i-- > 0)
1702 				vm_page_set_state(pages[i], pageState);
1703 
1704 			return B_NO_MEMORY;
1705 		}
1706 	}
1707 
1708 	return B_OK;
1709 }
1710 
1711 
1712 vm_page *
1713 vm_page_allocate_page_run(int pageState, addr_t length)
1714 {
1715 	vm_page *firstPage = NULL;
1716 	uint32 start = 0;
1717 
1718 	InterruptsSpinLocker locker(sPageLock);
1719 
1720 	if (sFreePageQueue.count + sClearPageQueue.count - sReservedPages < length) {
1721 		// TODO: add more tries, ie. free some inactive, ...
1722 		// no free space
1723 		return NULL;
1724 	}
1725 
1726 	for (;;) {
1727 		bool foundRun = true;
1728 		if (start + length > sNumPages)
1729 			break;
1730 
1731 		uint32 i;
1732 		for (i = 0; i < length; i++) {
1733 			if (sPages[start + i].state != PAGE_STATE_FREE
1734 				&& sPages[start + i].state != PAGE_STATE_CLEAR) {
1735 				foundRun = false;
1736 				i++;
1737 				break;
1738 			}
1739 		}
1740 		if (foundRun) {
1741 			// pull the pages out of the appropriate queues
1742 			for (i = 0; i < length; i++) {
1743 				sPages[start + i].is_cleared
1744 					= sPages[start + i].state == PAGE_STATE_CLEAR;
1745 				set_page_state_nolock(&sPages[start + i], PAGE_STATE_BUSY);
1746 				sPages[i].usage_count = 2;
1747 			}
1748 			firstPage = &sPages[start];
1749 			break;
1750 		} else {
1751 			start += i;
1752 		}
1753 	}
1754 
1755 	T(AllocatePageRun(length));
1756 
1757 	locker.Unlock();
1758 
1759 	if (firstPage != NULL && pageState == PAGE_STATE_CLEAR) {
1760 		for (uint32 i = 0; i < length; i++) {
1761 			if (!sPages[start + i].is_cleared)
1762 	 			clear_page(&sPages[start + i]);
1763 		}
1764 	}
1765 
1766 	return firstPage;
1767 }
1768 
1769 
1770 vm_page *
1771 vm_page_at_index(int32 index)
1772 {
1773 	return &sPages[index];
1774 }
1775 
1776 
1777 vm_page *
1778 vm_lookup_page(addr_t pageNumber)
1779 {
1780 	if (pageNumber < sPhysicalPageOffset)
1781 		return NULL;
1782 
1783 	pageNumber -= sPhysicalPageOffset;
1784 	if (pageNumber >= sNumPages)
1785 		return NULL;
1786 
1787 	return &sPages[pageNumber];
1788 }
1789 
1790 
1791 /*!	Free the page that belonged to a certain cache.
1792 	You can use vm_page_set_state() manually if you prefer, but only
1793 	if the page does not equal PAGE_STATE_MODIFIED.
1794 */
1795 void
1796 vm_page_free(vm_cache *cache, vm_page *page)
1797 {
1798 	InterruptsSpinLocker _(sPageLock);
1799 
1800 	if (page->cache == NULL && page->state == PAGE_STATE_MODIFIED
1801 		&& cache->temporary)
1802 		sModifiedTemporaryPages--;
1803 
1804 	set_page_state_nolock(page, PAGE_STATE_FREE);
1805 }
1806 
1807 
1808 status_t
1809 vm_page_set_state(vm_page *page, int pageState)
1810 {
1811 	InterruptsSpinLocker _(sPageLock);
1812 
1813 	return set_page_state_nolock(page, pageState);
1814 }
1815 
1816 
1817 /*!	Moves a page to either the tail of the head of its current queue,
1818 	depending on \a tail.
1819 */
1820 void
1821 vm_page_requeue(struct vm_page *page, bool tail)
1822 {
1823 	InterruptsSpinLocker _(sPageLock);
1824 	page_queue *queue = NULL;
1825 
1826 	switch (page->state) {
1827 		case PAGE_STATE_BUSY:
1828 		case PAGE_STATE_ACTIVE:
1829 		case PAGE_STATE_WIRED:
1830 		case PAGE_STATE_UNUSED:
1831 			queue = &sActivePageQueue;
1832 			break;
1833 		case PAGE_STATE_INACTIVE:
1834 			queue = &sInactivePageQueue;
1835 			break;
1836 		case PAGE_STATE_MODIFIED:
1837 			queue = &sModifiedPageQueue;
1838 			break;
1839 		case PAGE_STATE_FREE:
1840 			queue = &sFreePageQueue;
1841 			break;
1842 		case PAGE_STATE_CLEAR:
1843 			queue = &sClearPageQueue;
1844 			break;
1845 		default:
1846 			panic("vm_page_touch: vm_page %p in invalid state %d\n",
1847 				page, page->state);
1848 			break;
1849 	}
1850 
1851 	remove_page_from_queue(queue, page);
1852 
1853 	if (tail)
1854 		enqueue_page(queue, page);
1855 	else
1856 		enqueue_page_to_head(queue, page);
1857 }
1858 
1859 
1860 size_t
1861 vm_page_num_pages(void)
1862 {
1863 	return sNumPages;
1864 }
1865 
1866 
1867 /*! There is a subtle distinction between the page counts returned by
1868 	this function and vm_page_num_free_pages():
1869 	The latter returns the number of pages that are completely uncommitted,
1870 	whereas this one returns the number of pages that are available for
1871 	use by being reclaimed as well (IOW it factors in things like cache pages
1872 	as available).
1873 */
1874 size_t
1875 vm_page_num_available_pages(void)
1876 {
1877 	return vm_available_memory() / B_PAGE_SIZE;
1878 }
1879 
1880 
1881 size_t
1882 vm_page_num_free_pages(void)
1883 {
1884 	size_t reservedPages = sReservedPages;
1885 	size_t count = free_page_queue_count() + sInactivePageQueue.count;
1886 	if (reservedPages > count)
1887 		return 0;
1888 
1889 	return count - reservedPages;
1890 }
1891 
1892