xref: /haiku/src/system/kernel/vm/vm.cpp (revision a381c8a06378de22ff08adf4282b4e3f7e50d250)
1 /*
2  * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 
10 #include <vm.h>
11 
12 #include <ctype.h>
13 #include <stdlib.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 #include <OS.h>
18 #include <KernelExport.h>
19 
20 #include <AutoDeleter.h>
21 
22 #include <vm_address_space.h>
23 #include <vm_priv.h>
24 #include <vm_page.h>
25 #include <vm_cache.h>
26 #include <vm_low_memory.h>
27 #include <file_cache.h>
28 #include <heap.h>
29 #include <condition_variable.h>
30 #include <debug.h>
31 #include <console.h>
32 #include <int.h>
33 #include <smp.h>
34 #include <lock.h>
35 #include <thread.h>
36 #include <team.h>
37 #include <util/AutoLock.h>
38 #include <util/khash.h>
39 
40 #include <boot/stage2.h>
41 #include <boot/elf.h>
42 
43 #include <arch/cpu.h>
44 #include <arch/vm.h>
45 
46 #include "vm_store_anonymous_noswap.h"
47 #include "vm_store_device.h"
48 #include "vm_store_null.h"
49 
50 
51 //#define TRACE_VM
52 //#define TRACE_FAULTS
53 #ifdef TRACE_VM
54 #	define TRACE(x) dprintf x
55 #else
56 #	define TRACE(x) ;
57 #endif
58 #ifdef TRACE_FAULTS
59 #	define FTRACE(x) dprintf x
60 #else
61 #	define FTRACE(x) ;
62 #endif
63 
64 #define ROUNDUP(a, b) (((a) + ((b)-1)) & ~((b)-1))
65 #define ROUNDOWN(a, b) (((a) / (b)) * (b))
66 
67 
68 class AddressSpaceReadLocker {
69 public:
70 	AddressSpaceReadLocker(team_id team);
71 	AddressSpaceReadLocker(vm_address_space* space);
72 	AddressSpaceReadLocker();
73 	~AddressSpaceReadLocker();
74 
75 	status_t SetTo(team_id team);
76 	void SetTo(vm_address_space* space);
77 	status_t SetFromArea(area_id areaID, vm_area*& area);
78 
79 	bool IsLocked() const { return fLocked; }
80 	void Unlock();
81 
82 	void Unset();
83 
84 	vm_address_space* AddressSpace() { return fSpace; }
85 
86 private:
87 	vm_address_space* fSpace;
88 	bool	fLocked;
89 };
90 
91 class AddressSpaceWriteLocker {
92 public:
93 	AddressSpaceWriteLocker(team_id team);
94 	AddressSpaceWriteLocker();
95 	~AddressSpaceWriteLocker();
96 
97 	status_t SetTo(team_id team);
98 	status_t SetFromArea(area_id areaID, vm_area*& area);
99 	status_t SetFromArea(team_id team, area_id areaID, bool allowKernel,
100 		vm_area*& area);
101 	status_t SetFromArea(team_id team, area_id areaID, vm_area*& area);
102 
103 	bool IsLocked() const { return fLocked; }
104 	void Unlock();
105 
106 	void DegradeToReadLock();
107 	void Unset();
108 
109 	vm_address_space* AddressSpace() { return fSpace; }
110 
111 private:
112 	vm_address_space* fSpace;
113 	bool	fLocked;
114 	bool	fDegraded;
115 };
116 
117 class MultiAddressSpaceLocker {
118 public:
119 	MultiAddressSpaceLocker();
120 	~MultiAddressSpaceLocker();
121 
122 	inline status_t AddTeam(team_id team, bool writeLock,
123 		vm_address_space** _space = NULL);
124 	inline status_t AddArea(area_id area, bool writeLock,
125 		vm_address_space** _space = NULL);
126 
127 	status_t AddAreaCacheAndLock(area_id areaID, bool writeLockThisOne,
128 		bool writeLockOthers, vm_area*& _area, vm_cache** _cache = NULL,
129 		bool checkNoCacheChange = false);
130 
131 	status_t Lock();
132 	void Unlock();
133 	bool IsLocked() const { return fLocked; }
134 
135 	void Unset();
136 
137 private:
138 	struct lock_item {
139 		vm_address_space*	space;
140 		bool				write_lock;
141 	};
142 
143 	bool _ResizeIfNeeded();
144 	int32 _IndexOfAddressSpace(vm_address_space* space) const;
145 	status_t _AddAddressSpace(vm_address_space* space, bool writeLock,
146 		vm_address_space** _space);
147 
148 	static int _CompareItems(const void* _a, const void* _b);
149 
150 	lock_item*	fItems;
151 	int32		fCapacity;
152 	int32		fCount;
153 	bool		fLocked;
154 };
155 
156 
157 class AreaCacheLocking {
158 public:
159 	inline bool Lock(vm_cache* lockable)
160 	{
161 		return false;
162 	}
163 
164 	inline void Unlock(vm_cache* lockable)
165 	{
166 		vm_area_put_locked_cache(lockable);
167 	}
168 };
169 
170 class AreaCacheLocker : public AutoLocker<vm_cache, AreaCacheLocking> {
171 public:
172 	inline AreaCacheLocker(vm_cache* cache = NULL)
173 		: AutoLocker<vm_cache, AreaCacheLocking>(cache, true)
174 	{
175 	}
176 
177 	inline AreaCacheLocker(vm_area* area)
178 		: AutoLocker<vm_cache, AreaCacheLocking>()
179 	{
180 		SetTo(area);
181 	}
182 
183 	inline void SetTo(vm_area* area)
184 	{
185 		return AutoLocker<vm_cache, AreaCacheLocking>::SetTo(
186 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
187 	}
188 };
189 
190 
191 #define REGION_HASH_TABLE_SIZE 1024
192 static area_id sNextAreaID;
193 static hash_table *sAreaHash;
194 static sem_id sAreaHashLock;
195 static mutex sMappingLock;
196 static mutex sAreaCacheLock;
197 
198 static off_t sAvailableMemory;
199 static benaphore sAvailableMemoryLock;
200 
201 // function declarations
202 static void delete_area(vm_address_space *addressSpace, vm_area *area);
203 static vm_address_space *get_address_space_by_area_id(area_id id);
204 static status_t vm_soft_fault(addr_t address, bool isWrite, bool isUser);
205 
206 
207 //	#pragma mark -
208 
209 
210 AddressSpaceReadLocker::AddressSpaceReadLocker(team_id team)
211 	:
212 	fSpace(NULL),
213 	fLocked(false)
214 {
215 	SetTo(team);
216 }
217 
218 
219 //! Takes over the reference of the address space
220 AddressSpaceReadLocker::AddressSpaceReadLocker(vm_address_space* space)
221 	:
222 	fSpace(NULL),
223 	fLocked(false)
224 {
225 	SetTo(space);
226 }
227 
228 
229 AddressSpaceReadLocker::AddressSpaceReadLocker()
230 	:
231 	fSpace(NULL),
232 	fLocked(false)
233 {
234 }
235 
236 
237 AddressSpaceReadLocker::~AddressSpaceReadLocker()
238 {
239 	Unset();
240 }
241 
242 
243 void
244 AddressSpaceReadLocker::Unset()
245 {
246 	Unlock();
247 	if (fSpace != NULL)
248 		vm_put_address_space(fSpace);
249 }
250 
251 
252 status_t
253 AddressSpaceReadLocker::SetTo(team_id team)
254 {
255 	fSpace = vm_get_address_space_by_id(team);
256 	if (fSpace == NULL)
257 		return B_BAD_TEAM_ID;
258 
259 	acquire_sem_etc(fSpace->sem, READ_COUNT, 0, 0);
260 	fLocked = true;
261 	return B_OK;
262 }
263 
264 
265 //! Takes over the reference of the address space
266 void
267 AddressSpaceReadLocker::SetTo(vm_address_space* space)
268 {
269 	fSpace = space;
270 	acquire_sem_etc(fSpace->sem, READ_COUNT, 0, 0);
271 	fLocked = true;
272 }
273 
274 
275 status_t
276 AddressSpaceReadLocker::SetFromArea(area_id areaID, vm_area*& area)
277 {
278 	fSpace = get_address_space_by_area_id(areaID);
279 	if (fSpace == NULL)
280 		return B_BAD_TEAM_ID;
281 
282 	acquire_sem_etc(fSpace->sem, READ_COUNT, 0, 0);
283 
284 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
285 	area = (vm_area *)hash_lookup(sAreaHash, &areaID);
286 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
287 
288 	if (area == NULL || area->address_space != fSpace) {
289 		release_sem_etc(fSpace->sem, READ_COUNT, 0);
290 		return B_BAD_VALUE;
291 	}
292 
293 	fLocked = true;
294 	return B_OK;
295 }
296 
297 
298 void
299 AddressSpaceReadLocker::Unlock()
300 {
301 	if (fLocked) {
302 		release_sem_etc(fSpace->sem, READ_COUNT, 0);
303 		fLocked = false;
304 	}
305 }
306 
307 
308 //	#pragma mark -
309 
310 
311 AddressSpaceWriteLocker::AddressSpaceWriteLocker(team_id team)
312 	:
313 	fSpace(NULL),
314 	fLocked(false),
315 	fDegraded(false)
316 {
317 	SetTo(team);
318 }
319 
320 
321 AddressSpaceWriteLocker::AddressSpaceWriteLocker()
322 	:
323 	fSpace(NULL),
324 	fLocked(false),
325 	fDegraded(false)
326 {
327 }
328 
329 
330 AddressSpaceWriteLocker::~AddressSpaceWriteLocker()
331 {
332 	Unset();
333 }
334 
335 
336 void
337 AddressSpaceWriteLocker::Unset()
338 {
339 	Unlock();
340 	if (fSpace != NULL)
341 		vm_put_address_space(fSpace);
342 }
343 
344 
345 status_t
346 AddressSpaceWriteLocker::SetTo(team_id team)
347 {
348 	fSpace = vm_get_address_space_by_id(team);
349 	if (fSpace == NULL)
350 		return B_BAD_TEAM_ID;
351 
352 	acquire_sem_etc(fSpace->sem, WRITE_COUNT, 0, 0);
353 	fLocked = true;
354 	return B_OK;
355 }
356 
357 
358 status_t
359 AddressSpaceWriteLocker::SetFromArea(area_id areaID, vm_area*& area)
360 {
361 	fSpace = get_address_space_by_area_id(areaID);
362 	if (fSpace == NULL)
363 		return B_BAD_VALUE;
364 
365 	acquire_sem_etc(fSpace->sem, WRITE_COUNT, 0, 0);
366 
367 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
368 	area = (vm_area*)hash_lookup(sAreaHash, &areaID);
369 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
370 
371 	if (area == NULL || area->address_space != fSpace) {
372 		release_sem_etc(fSpace->sem, WRITE_COUNT, 0);
373 		return B_BAD_VALUE;
374 	}
375 
376 	fLocked = true;
377 	return B_OK;
378 }
379 
380 
381 status_t
382 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID,
383 	bool allowKernel, vm_area*& area)
384 {
385 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
386 
387 	area = (vm_area *)hash_lookup(sAreaHash, &areaID);
388 	if (area != NULL
389 		&& (area->address_space->id == team
390 			|| allowKernel && team == vm_kernel_address_space_id())) {
391 		fSpace = area->address_space;
392 		atomic_add(&fSpace->ref_count, 1);
393 	}
394 
395 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
396 
397 	if (fSpace == NULL)
398 		return B_BAD_VALUE;
399 
400 	// Second try to get the area -- this time with the address space
401 	// write lock held
402 
403 	acquire_sem_etc(fSpace->sem, WRITE_COUNT, 0, 0);
404 
405 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
406 	area = (vm_area *)hash_lookup(sAreaHash, &areaID);
407 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
408 
409 	if (area == NULL) {
410 		release_sem_etc(fSpace->sem, WRITE_COUNT, 0);
411 		return B_BAD_VALUE;
412 	}
413 
414 	fLocked = true;
415 	return B_OK;
416 }
417 
418 
419 status_t
420 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID,
421 	vm_area*& area)
422 {
423 	return SetFromArea(team, areaID, false, area);
424 }
425 
426 
427 void
428 AddressSpaceWriteLocker::Unlock()
429 {
430 	if (fLocked) {
431 		release_sem_etc(fSpace->sem, fDegraded ? READ_COUNT : WRITE_COUNT, 0);
432 		fLocked = false;
433 		fDegraded = false;
434 	}
435 }
436 
437 
438 void
439 AddressSpaceWriteLocker::DegradeToReadLock()
440 {
441 	release_sem_etc(fSpace->sem, WRITE_COUNT - READ_COUNT, 0);
442 	fDegraded = true;
443 }
444 
445 
446 //	#pragma mark -
447 
448 
449 MultiAddressSpaceLocker::MultiAddressSpaceLocker()
450 	:
451 	fItems(NULL),
452 	fCapacity(0),
453 	fCount(0),
454 	fLocked(false)
455 {
456 }
457 
458 
459 MultiAddressSpaceLocker::~MultiAddressSpaceLocker()
460 {
461 	Unset();
462 	free(fItems);
463 }
464 
465 
466 /*static*/ int
467 MultiAddressSpaceLocker::_CompareItems(const void* _a, const void* _b)
468 {
469 	lock_item* a = (lock_item*)_a;
470 	lock_item* b = (lock_item*)_b;
471 	return a->space->id - b->space->id;
472 }
473 
474 
475 bool
476 MultiAddressSpaceLocker::_ResizeIfNeeded()
477 {
478 	if (fCount == fCapacity) {
479 		lock_item* items = (lock_item*)realloc(fItems,
480 			(fCapacity + 4) * sizeof(lock_item));
481 		if (items == NULL)
482 			return false;
483 
484 		fCapacity += 4;
485 		fItems = items;
486 	}
487 
488 	return true;
489 }
490 
491 
492 int32
493 MultiAddressSpaceLocker::_IndexOfAddressSpace(vm_address_space* space) const
494 {
495 	for (int32 i = 0; i < fCount; i++) {
496 		if (fItems[i].space == space)
497 			return i;
498 	}
499 
500 	return -1;
501 }
502 
503 
504 status_t
505 MultiAddressSpaceLocker::_AddAddressSpace(vm_address_space* space,
506 	bool writeLock, vm_address_space** _space)
507 {
508 	if (!space)
509 		return B_BAD_VALUE;
510 
511 	int32 index = _IndexOfAddressSpace(space);
512 	if (index < 0) {
513 		if (!_ResizeIfNeeded()) {
514 			vm_put_address_space(space);
515 			return B_NO_MEMORY;
516 		}
517 
518 		lock_item& item = fItems[fCount++];
519 		item.space = space;
520 		item.write_lock = writeLock;
521 	} else {
522 
523 		// one reference is enough
524 		vm_put_address_space(space);
525 
526 		fItems[index].write_lock |= writeLock;
527 	}
528 
529 	if (_space != NULL)
530 		*_space = space;
531 
532 	return B_OK;
533 }
534 
535 
536 inline status_t
537 MultiAddressSpaceLocker::AddTeam(team_id team, bool writeLock,
538 	vm_address_space** _space)
539 {
540 	return _AddAddressSpace(vm_get_address_space_by_id(team), writeLock,
541 		_space);
542 }
543 
544 
545 inline status_t
546 MultiAddressSpaceLocker::AddArea(area_id area, bool writeLock,
547 	vm_address_space** _space)
548 {
549 	return _AddAddressSpace(get_address_space_by_area_id(area), writeLock,
550 		_space);
551 }
552 
553 
554 void
555 MultiAddressSpaceLocker::Unset()
556 {
557 	Unlock();
558 
559 	for (int32 i = 0; i < fCount; i++)
560 		vm_put_address_space(fItems[i].space);
561 
562 	fCount = 0;
563 }
564 
565 
566 status_t
567 MultiAddressSpaceLocker::Lock()
568 {
569 	ASSERT(!fLocked);
570 
571 	qsort(fItems, fCount, sizeof(lock_item), &_CompareItems);
572 
573 	for (int32 i = 0; i < fCount; i++) {
574 		status_t status = acquire_sem_etc(fItems[i].space->sem,
575 			fItems[i].write_lock ? WRITE_COUNT : READ_COUNT, 0, 0);
576 		if (status < B_OK) {
577 			while (--i >= 0) {
578 				release_sem_etc(fItems[i].space->sem,
579 					fItems[i].write_lock ? WRITE_COUNT : READ_COUNT, 0);
580 			}
581 			return status;
582 		}
583 	}
584 
585 	fLocked = true;
586 	return B_OK;
587 }
588 
589 
590 void
591 MultiAddressSpaceLocker::Unlock()
592 {
593 	if (!fLocked)
594 		return;
595 
596 	for (int32 i = 0; i < fCount; i++) {
597 		release_sem_etc(fItems[i].space->sem,
598 			fItems[i].write_lock ? WRITE_COUNT : READ_COUNT, 0);
599 	}
600 
601 	fLocked = false;
602 }
603 
604 
605 /*!	Adds all address spaces of the areas associated with the given area's cache,
606 	locks them, and locks the cache (including a reference to it). It retries
607 	until the situation is stable (i.e. the neither cache nor cache's areas
608 	changed) or an error occurs. If \c checkNoCacheChange ist \c true it does
609 	not return until all areas' \c no_cache_change flags is clear.
610 */
611 status_t
612 MultiAddressSpaceLocker::AddAreaCacheAndLock(area_id areaID,
613 	bool writeLockThisOne, bool writeLockOthers, vm_area*& _area,
614 	vm_cache** _cache, bool checkNoCacheChange)
615 {
616 	// remember the original state
617 	int originalCount = fCount;
618 	lock_item* originalItems = NULL;
619 	if (fCount > 0) {
620 		originalItems = new(nothrow) lock_item[fCount];
621 		if (originalItems == NULL)
622 			return B_NO_MEMORY;
623 		memcpy(originalItems, fItems, fCount * sizeof(lock_item));
624 	}
625 	ArrayDeleter<lock_item> _(originalItems);
626 
627 	// get the cache
628 	vm_cache* cache;
629 	vm_area* area;
630 	status_t error;
631 	{
632 		AddressSpaceReadLocker locker;
633 		error = locker.SetFromArea(areaID, area);
634 		if (error != B_OK)
635 			return error;
636 
637 		cache = vm_area_get_locked_cache(area);
638 	}
639 
640 	while (true) {
641 		// add all areas
642 		vm_area* firstArea = cache->areas;
643 		for (vm_area* current = firstArea; current;
644 				current = current->cache_next) {
645 			error = AddArea(current->id,
646 				current == area ? writeLockThisOne : writeLockOthers);
647 			if (error != B_OK) {
648 				vm_area_put_locked_cache(cache);
649 				return error;
650 			}
651 		}
652 
653 		// unlock the cache and attempt to lock the address spaces
654 		vm_area_put_locked_cache(cache);
655 
656 		error = Lock();
657 		if (error != B_OK)
658 			return error;
659 
660 		// lock the cache again and check whether anything has changed
661 
662 		// check whether the area is gone in the meantime
663 		acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
664 		area = (vm_area *)hash_lookup(sAreaHash, &areaID);
665 		release_sem_etc(sAreaHashLock, READ_COUNT, 0);
666 
667 		if (area == NULL) {
668 			Unlock();
669 			return B_BAD_VALUE;
670 		}
671 
672 		// lock the cache
673 		vm_cache* oldCache = cache;
674 		cache = vm_area_get_locked_cache(area);
675 
676 		// If neither the area's cache has changed nor its area list we're
677 		// done...
678 		bool done = (cache == oldCache || firstArea == cache->areas);
679 
680 		// ... unless we're supposed to check the areas' "no_cache_change" flag
681 		bool yield = false;
682 		if (done && checkNoCacheChange) {
683 			for (vm_area *tempArea = cache->areas; tempArea != NULL;
684 					tempArea = tempArea->cache_next) {
685 				if (tempArea->no_cache_change) {
686 					done = false;
687 					yield = true;
688 					break;
689 				}
690 			}
691 		}
692 
693 		// If everything looks dandy, return the values.
694 		if (done) {
695 			_area = area;
696 			if (_cache != NULL)
697 				*_cache = cache;
698 			return B_OK;
699 		}
700 
701 		// Restore the original state and try again.
702 
703 		// Unlock the address spaces, but keep the cache locked for the next
704 		// iteration.
705 		Unlock();
706 
707 		// Get an additional reference to the original address spaces.
708 		for (int32 i = 0; i < originalCount; i++)
709 			atomic_add(&originalItems[i].space->ref_count, 1);
710 
711 		// Release all references to the current address spaces.
712 		for (int32 i = 0; i < fCount; i++)
713 			vm_put_address_space(fItems[i].space);
714 
715 		// Copy over the original state.
716 		fCount = originalCount;
717 		if (originalItems != NULL)
718 			memcpy(fItems, originalItems, fCount * sizeof(lock_item));
719 
720 		if (yield)
721 			thread_yield(true);
722 	}
723 }
724 
725 
726 //	#pragma mark -
727 
728 
729 static int
730 area_compare(void *_area, const void *key)
731 {
732 	vm_area *area = (vm_area *)_area;
733 	const area_id *id = (const area_id *)key;
734 
735 	if (area->id == *id)
736 		return 0;
737 
738 	return -1;
739 }
740 
741 
742 static uint32
743 area_hash(void *_area, const void *key, uint32 range)
744 {
745 	vm_area *area = (vm_area *)_area;
746 	const area_id *id = (const area_id *)key;
747 
748 	if (area != NULL)
749 		return area->id % range;
750 
751 	return (uint32)*id % range;
752 }
753 
754 
755 static vm_address_space *
756 get_address_space_by_area_id(area_id id)
757 {
758 	vm_address_space* addressSpace = NULL;
759 
760 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
761 
762 	vm_area *area = (vm_area *)hash_lookup(sAreaHash, &id);
763 	if (area != NULL) {
764 		addressSpace = area->address_space;
765 		atomic_add(&addressSpace->ref_count, 1);
766 	}
767 
768 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
769 
770 	return addressSpace;
771 }
772 
773 
774 //! You need to have the address space locked when calling this function
775 static vm_area *
776 lookup_area(vm_address_space* addressSpace, area_id id)
777 {
778 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
779 
780 	vm_area *area = (vm_area *)hash_lookup(sAreaHash, &id);
781 	if (area != NULL && area->address_space != addressSpace)
782 		area = NULL;
783 
784 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
785 
786 	return area;
787 }
788 
789 
790 static vm_area *
791 create_reserved_area_struct(vm_address_space *addressSpace, uint32 flags)
792 {
793 	vm_area *reserved = (vm_area *)malloc(sizeof(vm_area));
794 	if (reserved == NULL)
795 		return NULL;
796 
797 	memset(reserved, 0, sizeof(vm_area));
798 	reserved->id = RESERVED_AREA_ID;
799 		// this marks it as reserved space
800 	reserved->protection = flags;
801 	reserved->address_space = addressSpace;
802 
803 	return reserved;
804 }
805 
806 
807 static vm_area *
808 create_area_struct(vm_address_space *addressSpace, const char *name,
809 	uint32 wiring, uint32 protection)
810 {
811 	// restrict the area name to B_OS_NAME_LENGTH
812 	size_t length = strlen(name) + 1;
813 	if (length > B_OS_NAME_LENGTH)
814 		length = B_OS_NAME_LENGTH;
815 
816 	vm_area *area = (vm_area *)malloc(sizeof(vm_area));
817 	if (area == NULL)
818 		return NULL;
819 
820 	area->name = (char *)malloc(length);
821 	if (area->name == NULL) {
822 		free(area);
823 		return NULL;
824 	}
825 	strlcpy(area->name, name, length);
826 
827 	area->id = atomic_add(&sNextAreaID, 1);
828 	area->base = 0;
829 	area->size = 0;
830 	area->protection = protection;
831 	area->wiring = wiring;
832 	area->memory_type = 0;
833 
834 	area->cache = NULL;
835 	area->no_cache_change = 0;
836 	area->cache_offset = 0;
837 
838 	area->address_space = addressSpace;
839 	area->address_space_next = NULL;
840 	area->cache_next = area->cache_prev = NULL;
841 	area->hash_next = NULL;
842 	new (&area->mappings) vm_area_mappings;
843 
844 	return area;
845 }
846 
847 
848 /**	Finds a reserved area that covers the region spanned by \a start and
849  *	\a size, inserts the \a area into that region and makes sure that
850  *	there are reserved regions for the remaining parts.
851  */
852 
853 static status_t
854 find_reserved_area(vm_address_space *addressSpace, addr_t start,
855 	addr_t size, vm_area *area)
856 {
857 	vm_area *next, *last = NULL;
858 
859 	next = addressSpace->areas;
860 	while (next) {
861 		if (next->base <= start && next->base + next->size >= start + size) {
862 			// this area covers the requested range
863 			if (next->id != RESERVED_AREA_ID) {
864 				// but it's not reserved space, it's a real area
865 				return B_BAD_VALUE;
866 			}
867 
868 			break;
869 		}
870 		last = next;
871 		next = next->address_space_next;
872 	}
873 	if (next == NULL)
874 		return B_ENTRY_NOT_FOUND;
875 
876 	// now we have to transfer the requested part of the reserved
877 	// range to the new area - and remove, resize or split the old
878 	// reserved area.
879 
880 	if (start == next->base) {
881 		// the area starts at the beginning of the reserved range
882 		if (last)
883 			last->address_space_next = area;
884 		else
885 			addressSpace->areas = area;
886 
887 		if (size == next->size) {
888 			// the new area fully covers the reversed range
889 			area->address_space_next = next->address_space_next;
890 			vm_put_address_space(addressSpace);
891 			free(next);
892 		} else {
893 			// resize the reserved range behind the area
894 			area->address_space_next = next;
895 			next->base += size;
896 			next->size -= size;
897 		}
898 	} else if (start + size == next->base + next->size) {
899 		// the area is at the end of the reserved range
900 		area->address_space_next = next->address_space_next;
901 		next->address_space_next = area;
902 
903 		// resize the reserved range before the area
904 		next->size = start - next->base;
905 	} else {
906 		// the area splits the reserved range into two separate ones
907 		// we need a new reserved area to cover this space
908 		vm_area *reserved = create_reserved_area_struct(addressSpace,
909 			next->protection);
910 		if (reserved == NULL)
911 			return B_NO_MEMORY;
912 
913 		atomic_add(&addressSpace->ref_count, 1);
914 		reserved->address_space_next = next->address_space_next;
915 		area->address_space_next = reserved;
916 		next->address_space_next = area;
917 
918 		// resize regions
919 		reserved->size = next->base + next->size - start - size;
920 		next->size = start - next->base;
921 		reserved->base = start + size;
922 		reserved->cache_offset = next->cache_offset;
923 	}
924 
925 	area->base = start;
926 	area->size = size;
927 	addressSpace->change_count++;
928 
929 	return B_OK;
930 }
931 
932 
933 /*!	Must be called with this address space's sem held */
934 static status_t
935 find_and_insert_area_slot(vm_address_space *addressSpace, addr_t start,
936 	addr_t size, addr_t end, uint32 addressSpec, vm_area *area)
937 {
938 	vm_area *last = NULL;
939 	vm_area *next;
940 	bool foundSpot = false;
941 
942 	TRACE(("find_and_insert_area_slot: address space %p, start 0x%lx, "
943 		"size %ld, end 0x%lx, addressSpec %ld, area %p\n", addressSpace, start,
944 		size, end, addressSpec, area));
945 
946 	// do some sanity checking
947 	if (start < addressSpace->base || size == 0
948 		|| (end - 1) > (addressSpace->base + (addressSpace->size - 1))
949 		|| start + size > end)
950 		return B_BAD_ADDRESS;
951 
952 	if (addressSpec == B_EXACT_ADDRESS) {
953 		// search for a reserved area
954 		status_t status = find_reserved_area(addressSpace, start, size, area);
955 		if (status == B_OK || status == B_BAD_VALUE)
956 			return status;
957 
958 		// There was no reserved area, and the slot doesn't seem to be used
959 		// already
960 		// ToDo: this could be further optimized.
961 	}
962 
963 	size_t alignment = B_PAGE_SIZE;
964 	if (addressSpec == B_ANY_KERNEL_BLOCK_ADDRESS) {
965 		// align the memory to the next power of two of the size
966 		while (alignment < size)
967 			alignment <<= 1;
968 	}
969 
970 	start = ROUNDUP(start, alignment);
971 
972 	// walk up to the spot where we should start searching
973 second_chance:
974 	next = addressSpace->areas;
975 	while (next) {
976 		if (next->base >= start + size) {
977 			// we have a winner
978 			break;
979 		}
980 		last = next;
981 		next = next->address_space_next;
982 	}
983 
984 	// find the right spot depending on the address specification - the area
985 	// will be inserted directly after "last" ("next" is not referenced anymore)
986 
987 	switch (addressSpec) {
988 		case B_ANY_ADDRESS:
989 		case B_ANY_KERNEL_ADDRESS:
990 		case B_ANY_KERNEL_BLOCK_ADDRESS:
991 			// find a hole big enough for a new area
992 			if (!last) {
993 				// see if we can build it at the beginning of the virtual map
994 				if (!next || (next->base >= ROUNDUP(addressSpace->base,
995 						alignment) + size)) {
996 					foundSpot = true;
997 					area->base = ROUNDUP(addressSpace->base, alignment);
998 					break;
999 				}
1000 				last = next;
1001 				next = next->address_space_next;
1002 			}
1003 			// keep walking
1004 			while (next) {
1005 				if (next->base >= ROUNDUP(last->base + last->size, alignment)
1006 						+ size) {
1007 					// we found a spot (it'll be filled up below)
1008 					break;
1009 				}
1010 				last = next;
1011 				next = next->address_space_next;
1012 			}
1013 
1014 			if ((addressSpace->base + (addressSpace->size - 1)) >= (ROUNDUP(
1015 					last->base + last->size, alignment) + (size - 1))) {
1016 				// got a spot
1017 				foundSpot = true;
1018 				area->base = ROUNDUP(last->base + last->size, alignment);
1019 				break;
1020 			} else {
1021 				// We didn't find a free spot - if there were any reserved areas
1022 				// with the RESERVED_AVOID_BASE flag set, we can now test those
1023 				// for free space
1024 				// ToDo: it would make sense to start with the biggest of them
1025 				next = addressSpace->areas;
1026 				last = NULL;
1027 				for (last = NULL; next; next = next->address_space_next,
1028 						last = next) {
1029 					// ToDo: take free space after the reserved area into account!
1030 					if (next->base == ROUNDUP(next->base, alignment)
1031 						&& next->size == size) {
1032 						// The reserved area is entirely covered, and thus,
1033 						// removed
1034 						if (last)
1035 							last->address_space_next = next->address_space_next;
1036 						else
1037 							addressSpace->areas = next->address_space_next;
1038 
1039 						foundSpot = true;
1040 						area->base = next->base;
1041 						free(next);
1042 						break;
1043 					}
1044 					if (next->size - (ROUNDUP(next->base, alignment)
1045 							- next->base) >= size) {
1046 						// The new area will be placed at the end of the
1047 						// reserved area, and the reserved area will be resized
1048 						// to make space
1049 						foundSpot = true;
1050 						next->size -= size;
1051 						last = next;
1052 						area->base = next->base + next->size;
1053 						break;
1054 					}
1055 				}
1056 			}
1057 			break;
1058 
1059 		case B_BASE_ADDRESS:
1060 			// find a hole big enough for a new area beginning with "start"
1061 			if (!last) {
1062 				// see if we can build it at the beginning of the specified start
1063 				if (!next || (next->base >= start + size)) {
1064 					foundSpot = true;
1065 					area->base = start;
1066 					break;
1067 				}
1068 				last = next;
1069 				next = next->address_space_next;
1070 			}
1071 			// keep walking
1072 			while (next) {
1073 				if (next->base >= last->base + last->size + size) {
1074 					// we found a spot (it'll be filled up below)
1075 					break;
1076 				}
1077 				last = next;
1078 				next = next->address_space_next;
1079 			}
1080 
1081 			if ((addressSpace->base + (addressSpace->size - 1))
1082 					>= (last->base + last->size + (size - 1))) {
1083 				// got a spot
1084 				foundSpot = true;
1085 				if (last->base + last->size <= start)
1086 					area->base = start;
1087 				else
1088 					area->base = last->base + last->size;
1089 				break;
1090 			}
1091 			// we didn't find a free spot in the requested range, so we'll
1092 			// try again without any restrictions
1093 			start = addressSpace->base;
1094 			addressSpec = B_ANY_ADDRESS;
1095 			last = NULL;
1096 			goto second_chance;
1097 
1098 		case B_EXACT_ADDRESS:
1099 			// see if we can create it exactly here
1100 			if (!last) {
1101 				if (!next || (next->base >= start + size)) {
1102 					foundSpot = true;
1103 					area->base = start;
1104 					break;
1105 				}
1106 			} else {
1107 				if (next) {
1108 					if (last->base + last->size <= start && next->base >= start + size) {
1109 						foundSpot = true;
1110 						area->base = start;
1111 						break;
1112 					}
1113 				} else {
1114 					if ((last->base + (last->size - 1)) <= start - 1) {
1115 						foundSpot = true;
1116 						area->base = start;
1117 					}
1118 				}
1119 			}
1120 			break;
1121 		default:
1122 			return B_BAD_VALUE;
1123 	}
1124 
1125 	if (!foundSpot)
1126 		return addressSpec == B_EXACT_ADDRESS ? B_BAD_VALUE : B_NO_MEMORY;
1127 
1128 	area->size = size;
1129 	if (last) {
1130 		area->address_space_next = last->address_space_next;
1131 		last->address_space_next = area;
1132 	} else {
1133 		area->address_space_next = addressSpace->areas;
1134 		addressSpace->areas = area;
1135 	}
1136 	addressSpace->change_count++;
1137 	return B_OK;
1138 }
1139 
1140 
1141 /**	This inserts the area you pass into the specified address space.
1142  *	It will also set the "_address" argument to its base address when
1143  *	the call succeeds.
1144  *	You need to hold the vm_address_space semaphore.
1145  */
1146 
1147 static status_t
1148 insert_area(vm_address_space *addressSpace, void **_address,
1149 	uint32 addressSpec, addr_t size, vm_area *area)
1150 {
1151 	addr_t searchBase, searchEnd;
1152 	status_t status;
1153 
1154 	switch (addressSpec) {
1155 		case B_EXACT_ADDRESS:
1156 			searchBase = (addr_t)*_address;
1157 			searchEnd = (addr_t)*_address + size;
1158 			break;
1159 
1160 		case B_BASE_ADDRESS:
1161 			searchBase = (addr_t)*_address;
1162 			searchEnd = addressSpace->base + (addressSpace->size - 1);
1163 			break;
1164 
1165 		case B_ANY_ADDRESS:
1166 		case B_ANY_KERNEL_ADDRESS:
1167 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1168 			searchBase = addressSpace->base;
1169 			searchEnd = addressSpace->base + (addressSpace->size - 1);
1170 			break;
1171 
1172 		default:
1173 			return B_BAD_VALUE;
1174 	}
1175 
1176 	status = find_and_insert_area_slot(addressSpace, searchBase, size,
1177 				searchEnd, addressSpec, area);
1178 	if (status == B_OK) {
1179 		// ToDo: do we have to do anything about B_ANY_KERNEL_ADDRESS
1180 		//		vs. B_ANY_KERNEL_BLOCK_ADDRESS here?
1181 		*_address = (void *)area->base;
1182 	}
1183 
1184 	return status;
1185 }
1186 
1187 
1188 /*! You need to hold the lock of the cache and the write lock of the address
1189 	space when calling this function.
1190 	Note, that in case of error your cache will be temporarily unlocked.
1191 */
1192 static status_t
1193 map_backing_store(vm_address_space *addressSpace, vm_cache *cache,
1194 	void **_virtualAddress, off_t offset, addr_t size, uint32 addressSpec,
1195 	int wiring, int protection, int mapping, vm_area **_area,
1196 	const char *areaName)
1197 {
1198 	TRACE(("map_backing_store: aspace %p, cache %p, *vaddr %p, offset 0x%Lx, size %lu, addressSpec %ld, wiring %d, protection %d, _area %p, area_name '%s'\n",
1199 		addressSpace, cache, *_virtualAddress, offset, size, addressSpec,
1200 		wiring, protection, _area, areaName));
1201 	ASSERT_LOCKED_MUTEX(&cache->lock);
1202 
1203 	vm_area *area = create_area_struct(addressSpace, areaName, wiring,
1204 		protection);
1205 	if (area == NULL)
1206 		return B_NO_MEMORY;
1207 
1208 	vm_store *store = cache->store;
1209 	status_t status;
1210 
1211 	// if this is a private map, we need to create a new cache & store object
1212 	// pair to handle the private copies of pages as they are written to
1213 	vm_cache* sourceCache = cache;
1214 	if (mapping == REGION_PRIVATE_MAP) {
1215 		vm_cache *newCache;
1216 		vm_store *newStore;
1217 
1218 		// create an anonymous store object
1219 		newStore = vm_store_create_anonymous_noswap(
1220 			(protection & B_STACK_AREA) != 0, 0, USER_STACK_GUARD_PAGES);
1221 		if (newStore == NULL) {
1222 			status = B_NO_MEMORY;
1223 			goto err1;
1224 		}
1225 		newCache = vm_cache_create(newStore);
1226 		if (newCache == NULL) {
1227 			status = B_NO_MEMORY;
1228 			newStore->ops->destroy(newStore);
1229 			goto err1;
1230 		}
1231 
1232 		mutex_lock(&newCache->lock);
1233 		newCache->type = CACHE_TYPE_RAM;
1234 		newCache->temporary = 1;
1235 		newCache->scan_skip = cache->scan_skip;
1236 		newCache->virtual_base = offset;
1237 		newCache->virtual_size = offset + size;
1238 
1239 		vm_cache_add_consumer_locked(cache, newCache);
1240 
1241 		cache = newCache;
1242 		store = newStore;
1243 	}
1244 
1245 	status = vm_cache_set_minimal_commitment_locked(cache, offset + size);
1246 	if (status != B_OK)
1247 		goto err2;
1248 
1249 	// check to see if this address space has entered DELETE state
1250 	if (addressSpace->state == VM_ASPACE_STATE_DELETION) {
1251 		// okay, someone is trying to delete this address space now, so we can't
1252 		// insert the area, so back out
1253 		status = B_BAD_TEAM_ID;
1254 		goto err2;
1255 	}
1256 
1257 	status = insert_area(addressSpace, _virtualAddress, addressSpec, size, area);
1258 	if (status < B_OK)
1259 		goto err2;
1260 
1261 	// attach the cache to the area
1262 	area->cache = cache;
1263 	area->cache_offset = offset;
1264 
1265 	// point the cache back to the area
1266 	vm_cache_insert_area_locked(cache, area);
1267 	if (mapping == REGION_PRIVATE_MAP)
1268 		mutex_unlock(&cache->lock);
1269 
1270 	// insert the area in the global area hash table
1271 	acquire_sem_etc(sAreaHashLock, WRITE_COUNT, 0 ,0);
1272 	hash_insert(sAreaHash, area);
1273 	release_sem_etc(sAreaHashLock, WRITE_COUNT, 0);
1274 
1275 	// grab a ref to the address space (the area holds this)
1276 	atomic_add(&addressSpace->ref_count, 1);
1277 
1278 	*_area = area;
1279 	return B_OK;
1280 
1281 err2:
1282 	if (mapping == REGION_PRIVATE_MAP) {
1283 		// We created this cache, so we must delete it again. Note, that we
1284 		// need to temporarily unlock the source cache or we'll otherwise
1285 		// deadlock, since vm_cache_remove_consumer will try to lock it too.
1286 		mutex_unlock(&cache->lock);
1287 		mutex_unlock(&sourceCache->lock);
1288 		vm_cache_release_ref(cache);
1289 		mutex_lock(&sourceCache->lock);
1290 	}
1291 err1:
1292 	free(area->name);
1293 	free(area);
1294 	return status;
1295 }
1296 
1297 
1298 status_t
1299 vm_unreserve_address_range(team_id team, void *address, addr_t size)
1300 {
1301 	AddressSpaceWriteLocker locker(team);
1302 	if (!locker.IsLocked())
1303 		return B_BAD_TEAM_ID;
1304 
1305 	// check to see if this address space has entered DELETE state
1306 	if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) {
1307 		// okay, someone is trying to delete this address space now, so we can't
1308 		// insert the area, so back out
1309 		return B_BAD_TEAM_ID;
1310 	}
1311 
1312 	// search area list and remove any matching reserved ranges
1313 
1314 	vm_area* area = locker.AddressSpace()->areas;
1315 	vm_area* last = NULL;
1316 	while (area) {
1317 		// the area must be completely part of the reserved range
1318 		if (area->id == RESERVED_AREA_ID && area->base >= (addr_t)address
1319 			&& area->base + area->size <= (addr_t)address + size) {
1320 			// remove reserved range
1321 			vm_area *reserved = area;
1322 			if (last)
1323 				last->address_space_next = reserved->address_space_next;
1324 			else
1325 				locker.AddressSpace()->areas = reserved->address_space_next;
1326 
1327 			area = reserved->address_space_next;
1328 			vm_put_address_space(locker.AddressSpace());
1329 			free(reserved);
1330 			continue;
1331 		}
1332 
1333 		last = area;
1334 		area = area->address_space_next;
1335 	}
1336 
1337 	return B_OK;
1338 }
1339 
1340 
1341 status_t
1342 vm_reserve_address_range(team_id team, void **_address, uint32 addressSpec,
1343 	addr_t size, uint32 flags)
1344 {
1345 	if (size == 0)
1346 		return B_BAD_VALUE;
1347 
1348 	AddressSpaceWriteLocker locker(team);
1349 	if (!locker.IsLocked())
1350 		return B_BAD_TEAM_ID;
1351 
1352 	// check to see if this address space has entered DELETE state
1353 	if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) {
1354 		// okay, someone is trying to delete this address space now, so we
1355 		// can't insert the area, let's back out
1356 		return B_BAD_TEAM_ID;
1357 	}
1358 
1359 	vm_area *area = create_reserved_area_struct(locker.AddressSpace(), flags);
1360 	if (area == NULL)
1361 		return B_NO_MEMORY;
1362 
1363 	status_t status = insert_area(locker.AddressSpace(), _address, addressSpec,
1364 		size, area);
1365 	if (status < B_OK) {
1366 		free(area);
1367 		return status;
1368 	}
1369 
1370 	// the area is now reserved!
1371 
1372 	area->cache_offset = area->base;
1373 		// we cache the original base address here
1374 
1375 	atomic_add(&locker.AddressSpace()->ref_count, 1);
1376 	return B_OK;
1377 }
1378 
1379 
1380 area_id
1381 vm_create_anonymous_area(team_id team, const char *name, void **address,
1382 	uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection)
1383 {
1384 	vm_area *area;
1385 	vm_cache *cache;
1386 	vm_store *store;
1387 	vm_page *page = NULL;
1388 	bool isStack = (protection & B_STACK_AREA) != 0;
1389 	bool canOvercommit = false;
1390 
1391 	TRACE(("create_anonymous_area %s: size 0x%lx\n", name, size));
1392 
1393 	if (size == 0)
1394 		return B_BAD_VALUE;
1395 	if (!arch_vm_supports_protection(protection))
1396 		return B_NOT_SUPPORTED;
1397 
1398 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1399 		canOvercommit = true;
1400 
1401 #ifdef DEBUG_KERNEL_STACKS
1402 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1403 		isStack = true;
1404 #endif
1405 
1406 	/* check parameters */
1407 	switch (addressSpec) {
1408 		case B_ANY_ADDRESS:
1409 		case B_EXACT_ADDRESS:
1410 		case B_BASE_ADDRESS:
1411 		case B_ANY_KERNEL_ADDRESS:
1412 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1413 			break;
1414 
1415 		default:
1416 			return B_BAD_VALUE;
1417 	}
1418 
1419 	switch (wiring) {
1420 		case B_NO_LOCK:
1421 		case B_FULL_LOCK:
1422 		case B_LAZY_LOCK:
1423 		case B_CONTIGUOUS:
1424 		case B_ALREADY_WIRED:
1425 			break;
1426 		case B_LOMEM:
1427 		//case B_SLOWMEM:
1428 			dprintf("B_LOMEM/SLOWMEM is not yet supported!\n");
1429 			wiring = B_FULL_LOCK;
1430 			break;
1431 		default:
1432 			return B_BAD_VALUE;
1433 	}
1434 
1435 	AddressSpaceWriteLocker locker;
1436 	status_t status = locker.SetTo(team);
1437 	if (status != NULL)
1438 		return status;
1439 
1440 	vm_address_space *addressSpace = locker.AddressSpace();
1441 	size = PAGE_ALIGN(size);
1442 
1443 	if (wiring == B_CONTIGUOUS) {
1444 		// we try to allocate the page run here upfront as this may easily
1445 		// fail for obvious reasons
1446 		page = vm_page_allocate_page_run(PAGE_STATE_CLEAR, size / B_PAGE_SIZE);
1447 		if (page == NULL)
1448 			return B_NO_MEMORY;
1449 	}
1450 
1451 	// create an anonymous store object
1452 	// if it's a stack, make sure that two pages are available at least
1453 	store = vm_store_create_anonymous_noswap(canOvercommit, isStack ? 2 : 0,
1454 		isStack ? ((protection & B_USER_PROTECTION) != 0 ?
1455 			USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0);
1456 	if (store == NULL) {
1457 		status = B_NO_MEMORY;
1458 		goto err1;
1459 	}
1460 	cache = vm_cache_create(store);
1461 	if (cache == NULL) {
1462 		status = B_NO_MEMORY;
1463 		goto err2;
1464 	}
1465 
1466 	cache->temporary = 1;
1467 	cache->type = CACHE_TYPE_RAM;
1468 	cache->virtual_size = size;
1469 
1470 	switch (wiring) {
1471 		case B_LAZY_LOCK:
1472 		case B_FULL_LOCK:
1473 		case B_CONTIGUOUS:
1474 		case B_ALREADY_WIRED:
1475 			cache->scan_skip = 1;
1476 			break;
1477 		case B_NO_LOCK:
1478 			cache->scan_skip = 0;
1479 			break;
1480 	}
1481 
1482 	mutex_lock(&cache->lock);
1483 
1484 	status = map_backing_store(addressSpace, cache, address, 0, size,
1485 		addressSpec, wiring, protection, REGION_NO_PRIVATE_MAP, &area, name);
1486 
1487 	mutex_unlock(&cache->lock);
1488 
1489 	if (status < B_OK) {
1490 		vm_cache_release_ref(cache);
1491 		goto err1;
1492 	}
1493 
1494 	locker.DegradeToReadLock();
1495 
1496 	switch (wiring) {
1497 		case B_NO_LOCK:
1498 		case B_LAZY_LOCK:
1499 			// do nothing - the pages are mapped in as needed
1500 			break;
1501 
1502 		case B_FULL_LOCK:
1503 		{
1504 			vm_translation_map *map = &addressSpace->translation_map;
1505 			size_t reservePages = map->ops->map_max_pages_need(map,
1506 				area->base, area->base + (area->size - 1));
1507 			vm_page_reserve_pages(reservePages);
1508 
1509 			// Allocate and map all pages for this area
1510 			mutex_lock(&cache->lock);
1511 
1512 			off_t offset = 0;
1513 			for (addr_t address = area->base; address < area->base + (area->size - 1);
1514 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1515 #ifdef DEBUG_KERNEL_STACKS
1516 #	ifdef STACK_GROWS_DOWNWARDS
1517 				if (isStack && address < area->base + KERNEL_STACK_GUARD_PAGES
1518 						* B_PAGE_SIZE)
1519 #	else
1520 				if (isStack && address >= area->base + area->size
1521 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1522 #	endif
1523 					continue;
1524 #endif
1525 				vm_page *page = vm_page_allocate_page(PAGE_STATE_CLEAR, false);
1526 				if (page == NULL) {
1527 					// this shouldn't really happen, as we reserve the memory upfront
1528 					panic("couldn't fulfill B_FULL lock!");
1529 				}
1530 
1531 				vm_cache_insert_page(cache, page, offset);
1532 				vm_map_page(area, page, address, protection);
1533 			}
1534 
1535 			mutex_unlock(&cache->lock);
1536 			vm_page_unreserve_pages(reservePages);
1537 			break;
1538 		}
1539 
1540 		case B_ALREADY_WIRED:
1541 		{
1542 			// the pages should already be mapped. This is only really useful during
1543 			// boot time. Find the appropriate vm_page objects and stick them in
1544 			// the cache object.
1545 			vm_translation_map *map = &addressSpace->translation_map;
1546 			off_t offset = 0;
1547 
1548 			if (!kernel_startup)
1549 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1550 
1551 			mutex_lock(&cache->lock);
1552 			map->ops->lock(map);
1553 
1554 			for (addr_t virtualAddress = area->base; virtualAddress < area->base
1555 					+ (area->size - 1); virtualAddress += B_PAGE_SIZE,
1556 					offset += B_PAGE_SIZE) {
1557 				addr_t physicalAddress;
1558 				uint32 flags;
1559 				status = map->ops->query(map, virtualAddress,
1560 					&physicalAddress, &flags);
1561 				if (status < B_OK) {
1562 					panic("looking up mapping failed for va 0x%lx\n",
1563 						virtualAddress);
1564 				}
1565 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1566 				if (page == NULL) {
1567 					panic("looking up page failed for pa 0x%lx\n",
1568 						physicalAddress);
1569 				}
1570 
1571 				page->wired_count++;
1572 					// TODO: needs to be atomic on all platforms!
1573 				vm_page_set_state(page, PAGE_STATE_WIRED);
1574 				vm_cache_insert_page(cache, page, offset);
1575 			}
1576 
1577 			map->ops->unlock(map);
1578 			mutex_unlock(&cache->lock);
1579 			break;
1580 		}
1581 
1582 		case B_CONTIGUOUS:
1583 		{
1584 			// We have already allocated our continuous pages run, so we can now just
1585 			// map them in the address space
1586 			vm_translation_map *map = &addressSpace->translation_map;
1587 			addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE;
1588 			addr_t virtualAddress = area->base;
1589 			size_t reservePages = map->ops->map_max_pages_need(map,
1590 				virtualAddress, virtualAddress + (area->size - 1));
1591 			off_t offset = 0;
1592 
1593 			vm_page_reserve_pages(reservePages);
1594 			mutex_lock(&cache->lock);
1595 			map->ops->lock(map);
1596 
1597 			for (virtualAddress = area->base; virtualAddress < area->base
1598 					+ (area->size - 1); virtualAddress += B_PAGE_SIZE,
1599 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1600 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1601 				if (page == NULL)
1602 					panic("couldn't lookup physical page just allocated\n");
1603 
1604 				status = map->ops->map(map, virtualAddress, physicalAddress,
1605 					protection);
1606 				if (status < B_OK)
1607 					panic("couldn't map physical page in page run\n");
1608 
1609 				page->wired_count++;
1610 					// TODO: needs to be atomic on all platforms!
1611 				vm_page_set_state(page, PAGE_STATE_WIRED);
1612 				vm_cache_insert_page(cache, page, offset);
1613 			}
1614 
1615 			map->ops->unlock(map);
1616 			mutex_unlock(&cache->lock);
1617 			vm_page_unreserve_pages(reservePages);
1618 			break;
1619 		}
1620 
1621 		default:
1622 			break;
1623 	}
1624 
1625 	TRACE(("vm_create_anonymous_area: done\n"));
1626 
1627 	area->cache_type = CACHE_TYPE_RAM;
1628 	return area->id;
1629 
1630 err2:
1631 	store->ops->destroy(store);
1632 err1:
1633 	if (wiring == B_CONTIGUOUS) {
1634 		// we had reserved the area space upfront...
1635 		addr_t pageNumber = page->physical_page_number;
1636 		int32 i;
1637 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1638 			page = vm_lookup_page(pageNumber);
1639 			if (page == NULL)
1640 				panic("couldn't lookup physical page just allocated\n");
1641 
1642 			vm_page_set_state(page, PAGE_STATE_FREE);
1643 		}
1644 	}
1645 
1646 	return status;
1647 }
1648 
1649 
1650 area_id
1651 vm_map_physical_memory(team_id team, const char *name, void **_address,
1652 	uint32 addressSpec, addr_t size, uint32 protection, addr_t physicalAddress)
1653 {
1654 	vm_area *area;
1655 	vm_cache *cache;
1656 	vm_store *store;
1657 	addr_t mapOffset;
1658 
1659 	TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, "
1660 		"spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team,
1661 		name, _address, addressSpec, size, protection, physicalAddress));
1662 
1663 	if (!arch_vm_supports_protection(protection))
1664 		return B_NOT_SUPPORTED;
1665 
1666 	AddressSpaceWriteLocker locker(team);
1667 	if (!locker.IsLocked())
1668 		return B_BAD_TEAM_ID;
1669 
1670 	// if the physical address is somewhat inside a page,
1671 	// move the actual area down to align on a page boundary
1672 	mapOffset = physicalAddress % B_PAGE_SIZE;
1673 	size += mapOffset;
1674 	physicalAddress -= mapOffset;
1675 
1676 	size = PAGE_ALIGN(size);
1677 
1678 	// create an device store object
1679 
1680 	store = vm_store_create_device(physicalAddress);
1681 	if (store == NULL)
1682 		return B_NO_MEMORY;
1683 
1684 	cache = vm_cache_create(store);
1685 	if (cache == NULL) {
1686 		store->ops->destroy(store);
1687 		return B_NO_MEMORY;
1688 	}
1689 
1690 	// tell the page scanner to skip over this area, it's pages are special
1691 	cache->scan_skip = 1;
1692 	cache->type = CACHE_TYPE_DEVICE;
1693 	cache->virtual_size = size;
1694 
1695 	mutex_lock(&cache->lock);
1696 
1697 	status_t status = map_backing_store(locker.AddressSpace(), cache, _address,
1698 		0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection,
1699 		REGION_NO_PRIVATE_MAP, &area, name);
1700 
1701 	mutex_unlock(&cache->lock);
1702 
1703 	if (status < B_OK)
1704 		vm_cache_release_ref(cache);
1705 
1706 	if (status >= B_OK && (addressSpec & B_MTR_MASK) != 0) {
1707 		// set requested memory type
1708 		status = arch_vm_set_memory_type(area, physicalAddress,
1709 			addressSpec & B_MTR_MASK);
1710 		if (status < B_OK)
1711 			delete_area(locker.AddressSpace(), area);
1712 	}
1713 
1714 	if (status >= B_OK) {
1715 		// make sure our area is mapped in completely
1716 
1717 		vm_translation_map *map = &locker.AddressSpace()->translation_map;
1718 		size_t reservePages = map->ops->map_max_pages_need(map, area->base,
1719 			area->base + (size - 1));
1720 
1721 		vm_page_reserve_pages(reservePages);
1722 		map->ops->lock(map);
1723 
1724 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1725 			map->ops->map(map, area->base + offset, physicalAddress + offset,
1726 				protection);
1727 		}
1728 
1729 		map->ops->unlock(map);
1730 		vm_page_unreserve_pages(reservePages);
1731 	}
1732 
1733 	if (status < B_OK)
1734 		return status;
1735 
1736 	// modify the pointer returned to be offset back into the new area
1737 	// the same way the physical address in was offset
1738 	*_address = (void *)((addr_t)*_address + mapOffset);
1739 
1740 	area->cache_type = CACHE_TYPE_DEVICE;
1741 	return area->id;
1742 }
1743 
1744 
1745 area_id
1746 vm_create_null_area(team_id team, const char *name, void **address,
1747 	uint32 addressSpec, addr_t size)
1748 {
1749 	vm_area *area;
1750 	vm_cache *cache;
1751 	vm_store *store;
1752 	status_t status;
1753 
1754 	AddressSpaceWriteLocker locker(team);
1755 	if (!locker.IsLocked())
1756 		return B_BAD_TEAM_ID;
1757 
1758 	size = PAGE_ALIGN(size);
1759 
1760 	// create an null store object
1761 
1762 	store = vm_store_create_null();
1763 	if (store == NULL)
1764 		return B_NO_MEMORY;
1765 
1766 	cache = vm_cache_create(store);
1767 	if (cache == NULL) {
1768 		store->ops->destroy(store);
1769 		return B_NO_MEMORY;
1770 	}
1771 
1772 	// tell the page scanner to skip over this area, no pages will be mapped here
1773 	cache->scan_skip = 1;
1774 	cache->type = CACHE_TYPE_NULL;
1775 	cache->virtual_size = size;
1776 
1777 	mutex_lock(&cache->lock);
1778 
1779 	status = map_backing_store(locker.AddressSpace(), cache, address, 0, size,
1780 		addressSpec, 0, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, &area, name);
1781 
1782 	mutex_unlock(&cache->lock);
1783 
1784 	if (status < B_OK) {
1785 		vm_cache_release_ref(cache);
1786 		return status;
1787 	}
1788 
1789 	area->cache_type = CACHE_TYPE_NULL;
1790 	return area->id;
1791 }
1792 
1793 
1794 /*!	Creates the vnode cache for the specified \a vnode.
1795 	The vnode has to be marked busy when calling this function.
1796 */
1797 status_t
1798 vm_create_vnode_cache(struct vnode *vnode, struct vm_cache **_cache)
1799 {
1800 	status_t status;
1801 
1802 	// create a vnode store object
1803 	vm_store *store = vm_create_vnode_store(vnode);
1804 	if (store == NULL)
1805 		return B_NO_MEMORY;
1806 
1807 	vm_cache *cache = vm_cache_create(store);
1808 	if (cache == NULL) {
1809 		status = B_NO_MEMORY;
1810 		goto err1;
1811 	}
1812 
1813 	cache->type = CACHE_TYPE_VNODE;
1814 
1815 	*_cache = cache;
1816 	return B_OK;
1817 
1818 err1:
1819 	store->ops->destroy(store);
1820 	return status;
1821 }
1822 
1823 
1824 /*!	Will map the file at the path specified by \a name to an area in memory.
1825 	The file will be mirrored beginning at the specified \a offset. The \a offset
1826 	and \a size arguments have to be page aligned.
1827 */
1828 static area_id
1829 _vm_map_file(team_id team, const char *name, void **_address, uint32 addressSpec,
1830 	size_t size, uint32 protection, uint32 mapping, const char *path,
1831 	off_t offset, bool kernel)
1832 {
1833 	// ToDo: maybe attach to an FD, not a path (or both, like VFS calls)
1834 	// ToDo: check file access permissions (would be already done if the above were true)
1835 	// ToDo: for binary files, we want to make sure that they get the
1836 	//	copy of a file at a given time, ie. later changes should not
1837 	//	make it into the mapped copy -- this will need quite some changes
1838 	//	to be done in a nice way
1839 	TRACE(("_vm_map_file(\"%s\", offset = %Ld, size = %lu, mapping %ld)\n",
1840 		path, offset, size, mapping));
1841 
1842 	offset = ROUNDOWN(offset, B_PAGE_SIZE);
1843 	size = PAGE_ALIGN(size);
1844 
1845 	// get the vnode for the object, this also grabs a ref to it
1846 	struct vnode *vnode;
1847 	status_t status = vfs_get_vnode_from_path(path, kernel, &vnode);
1848 	if (status < B_OK)
1849 		return status;
1850 
1851 	AddressSpaceWriteLocker locker(team);
1852 	if (!locker.IsLocked()) {
1853 		vfs_put_vnode(vnode);
1854 		return B_BAD_TEAM_ID;
1855 	}
1856 
1857 	// ToDo: this only works for file systems that use the file cache
1858 	vm_cache *cache;
1859 	status = vfs_get_vnode_cache(vnode, &cache, false);
1860 	if (status < B_OK) {
1861 		vfs_put_vnode(vnode);
1862 		return status;
1863 	}
1864 
1865 	mutex_lock(&cache->lock);
1866 
1867 	vm_area *area;
1868 	status = map_backing_store(locker.AddressSpace(), cache, _address,
1869 		offset, size, addressSpec, 0, protection, mapping, &area, name);
1870 
1871 	mutex_unlock(&cache->lock);
1872 
1873 	vfs_put_vnode(vnode);
1874 		// we don't need this vnode anymore - if the above call was
1875 		// successful, the store already has a ref to it
1876 
1877 	if (status < B_OK || mapping == REGION_PRIVATE_MAP) {
1878 		// map_backing_store() cannot know we no longer need the ref
1879 		vm_cache_release_ref(cache);
1880 	}
1881 	if (status < B_OK)
1882 		return status;
1883 
1884 	area->cache_type = CACHE_TYPE_VNODE;
1885 	return area->id;
1886 }
1887 
1888 
1889 area_id
1890 vm_map_file(team_id aid, const char *name, void **address, uint32 addressSpec,
1891 	addr_t size, uint32 protection, uint32 mapping, const char *path,
1892 	off_t offset)
1893 {
1894 	if (!arch_vm_supports_protection(protection))
1895 		return B_NOT_SUPPORTED;
1896 
1897 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
1898 		mapping, path, offset, true);
1899 }
1900 
1901 
1902 vm_cache *
1903 vm_area_get_locked_cache(vm_area *area)
1904 {
1905 	MutexLocker locker(sAreaCacheLock);
1906 	while (true) {
1907 		vm_cache* cache = area->cache;
1908 		vm_cache_acquire_ref(cache);
1909 		locker.Unlock();
1910 
1911 		mutex_lock(&cache->lock);
1912 
1913 		locker.Lock();
1914 		if (cache == area->cache)
1915 			return cache;
1916 
1917 		// the cache changed in the meantime
1918 		mutex_unlock(&cache->lock);
1919 		vm_cache_release_ref(cache);
1920 	}
1921 }
1922 
1923 
1924 void
1925 vm_area_put_locked_cache(vm_cache *cache)
1926 {
1927 	mutex_unlock(&cache->lock);
1928 	vm_cache_release_ref(cache);
1929 }
1930 
1931 
1932 area_id
1933 vm_clone_area(team_id team, const char *name, void **address,
1934 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID)
1935 {
1936 	vm_area *newArea = NULL;
1937 	vm_area *sourceArea;
1938 
1939 	MultiAddressSpaceLocker locker;
1940 	vm_address_space *sourceAddressSpace;
1941 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
1942 	if (status != B_OK)
1943 		return status;
1944 
1945 	vm_address_space *targetAddressSpace;
1946 	status = locker.AddTeam(team, true, &targetAddressSpace);
1947 	if (status != B_OK)
1948 		return status;
1949 
1950 	status = locker.Lock();
1951 	if (status != B_OK)
1952 		return status;
1953 
1954 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
1955 	if (sourceArea == NULL)
1956 		return B_BAD_VALUE;
1957 
1958 	vm_cache *cache = vm_area_get_locked_cache(sourceArea);
1959 
1960 	// ToDo: for now, B_USER_CLONEABLE is disabled, until all drivers
1961 	//	have been adapted. Maybe it should be part of the kernel settings,
1962 	//	anyway (so that old drivers can always work).
1963 #if 0
1964 	if (sourceArea->aspace == vm_kernel_address_space() && addressSpace != vm_kernel_address_space()
1965 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
1966 		// kernel areas must not be cloned in userland, unless explicitly
1967 		// declared user-cloneable upon construction
1968 		status = B_NOT_ALLOWED;
1969 	} else
1970 #endif
1971 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
1972 		status = B_NOT_ALLOWED;
1973 	else {
1974 		status = map_backing_store(targetAddressSpace, cache, address,
1975 			sourceArea->cache_offset, sourceArea->size, addressSpec,
1976 			sourceArea->wiring, protection, mapping, &newArea, name);
1977 	}
1978 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
1979 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
1980 		// to create a new ref, and has therefore already acquired a reference
1981 		// to the source cache - but otherwise it has no idea that we need
1982 		// one.
1983 		vm_cache_acquire_ref(cache);
1984 	}
1985 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
1986 		// we need to map in everything at this point
1987 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
1988 			// we don't have actual pages to map but a physical area
1989 			vm_translation_map *map = &sourceArea->address_space->translation_map;
1990 			map->ops->lock(map);
1991 
1992 			addr_t physicalAddress;
1993 			uint32 oldProtection;
1994 			map->ops->query(map, sourceArea->base, &physicalAddress,
1995 				&oldProtection);
1996 
1997 			map->ops->unlock(map);
1998 
1999 			map = &targetAddressSpace->translation_map;
2000 			size_t reservePages = map->ops->map_max_pages_need(map,
2001 				newArea->base, newArea->base + (newArea->size - 1));
2002 
2003 			vm_page_reserve_pages(reservePages);
2004 			map->ops->lock(map);
2005 
2006 			for (addr_t offset = 0; offset < newArea->size;
2007 					offset += B_PAGE_SIZE) {
2008 				map->ops->map(map, newArea->base + offset,
2009 					physicalAddress + offset, protection);
2010 			}
2011 
2012 			map->ops->unlock(map);
2013 			vm_page_unreserve_pages(reservePages);
2014 		} else {
2015 			vm_translation_map *map = &targetAddressSpace->translation_map;
2016 			size_t reservePages = map->ops->map_max_pages_need(map,
2017 				newArea->base, newArea->base + (newArea->size - 1));
2018 			vm_page_reserve_pages(reservePages);
2019 
2020 			// map in all pages from source
2021 			for (vm_page *page = cache->page_list; page != NULL;
2022 					page = page->cache_next) {
2023 				vm_map_page(newArea, page, newArea->base
2024 					+ ((page->cache_offset << PAGE_SHIFT) - newArea->cache_offset),
2025 					protection);
2026 			}
2027 
2028 			vm_page_unreserve_pages(reservePages);
2029 		}
2030 	}
2031 	if (status == B_OK)
2032 		newArea->cache_type = sourceArea->cache_type;
2033 
2034 	vm_area_put_locked_cache(cache);
2035 
2036 	if (status < B_OK)
2037 		return status;
2038 
2039 	return newArea->id;
2040 }
2041 
2042 
2043 //! The address space must be write locked at this point
2044 static void
2045 remove_area_from_address_space(vm_address_space *addressSpace, vm_area *area)
2046 {
2047 	vm_area *temp, *last = NULL;
2048 
2049 	temp = addressSpace->areas;
2050 	while (temp != NULL) {
2051 		if (area == temp) {
2052 			if (last != NULL) {
2053 				last->address_space_next = temp->address_space_next;
2054 			} else {
2055 				addressSpace->areas = temp->address_space_next;
2056 			}
2057 			addressSpace->change_count++;
2058 			break;
2059 		}
2060 		last = temp;
2061 		temp = temp->address_space_next;
2062 	}
2063 	if (area == addressSpace->area_hint)
2064 		addressSpace->area_hint = NULL;
2065 
2066 	if (temp == NULL)
2067 		panic("vm_area_release_ref: area not found in aspace's area list\n");
2068 }
2069 
2070 
2071 static void
2072 delete_area(vm_address_space *addressSpace, vm_area *area)
2073 {
2074 	acquire_sem_etc(sAreaHashLock, WRITE_COUNT, 0, 0);
2075 	hash_remove(sAreaHash, area);
2076 	release_sem_etc(sAreaHashLock, WRITE_COUNT, 0);
2077 
2078 	// At this point the area is removed from the global hash table, but
2079 	// still exists in the area list.
2080 
2081 	// Unmap the virtual address space the area occupied
2082 	vm_unmap_pages(area, area->base, area->size, !area->cache->temporary);
2083 
2084 	if (!area->cache->temporary)
2085 		vm_cache_write_modified(area->cache, false);
2086 
2087 	arch_vm_unset_memory_type(area);
2088 	remove_area_from_address_space(addressSpace, area);
2089 	vm_put_address_space(addressSpace);
2090 
2091 	vm_cache_remove_area(area->cache, area);
2092 	vm_cache_release_ref(area->cache);
2093 
2094 	free(area->name);
2095 	free(area);
2096 }
2097 
2098 
2099 status_t
2100 vm_delete_area(team_id team, area_id id)
2101 {
2102 	TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id));
2103 
2104 	AddressSpaceWriteLocker locker;
2105 	vm_area *area;
2106 	status_t status = locker.SetFromArea(team, id, area);
2107 	if (status < B_OK)
2108 		return status;
2109 
2110 	delete_area(locker.AddressSpace(), area);
2111 	return B_OK;
2112 }
2113 
2114 
2115 /*!	Creates a new cache on top of given cache, moves all areas from
2116 	the old cache to the new one, and changes the protection of all affected
2117 	areas' pages to read-only.
2118 	Preconditions:
2119 	- The given cache must be locked.
2120 	- All of the cache's areas' address spaces must be read locked.
2121 	- All of the cache's areas must have a clear \c no_cache_change flags.
2122 */
2123 static status_t
2124 vm_copy_on_write_area(vm_cache* lowerCache)
2125 {
2126 	vm_store *store;
2127 	vm_cache *upperCache;
2128 	vm_page *page;
2129 	status_t status;
2130 
2131 	TRACE(("vm_copy_on_write_area(area = %p)\n", area));
2132 
2133 	// We need to separate the cache from its areas. The cache goes one level
2134 	// deeper and we create a new cache inbetween.
2135 
2136 	// create an anonymous store object
2137 	store = vm_store_create_anonymous_noswap(false, 0, 0);
2138 	if (store == NULL)
2139 		return B_NO_MEMORY;
2140 
2141 	upperCache = vm_cache_create(store);
2142 	if (upperCache == NULL) {
2143 		store->ops->destroy(store);
2144 		return B_NO_MEMORY;
2145 	}
2146 
2147 	mutex_lock(&upperCache->lock);
2148 
2149 	upperCache->type = CACHE_TYPE_RAM;
2150 	upperCache->temporary = 1;
2151 	upperCache->scan_skip = lowerCache->scan_skip;
2152 	upperCache->virtual_base = lowerCache->virtual_base;
2153 	upperCache->virtual_size = lowerCache->virtual_size;
2154 
2155 	// transfer the lower cache areas to the upper cache
2156 	mutex_lock(&sAreaCacheLock);
2157 
2158 	upperCache->areas = lowerCache->areas;
2159 	lowerCache->areas = NULL;
2160 
2161 	for (vm_area *tempArea = upperCache->areas; tempArea != NULL;
2162 			tempArea = tempArea->cache_next) {
2163 		ASSERT(!tempArea->no_cache_change);
2164 
2165 		tempArea->cache = upperCache;
2166 		atomic_add(&upperCache->ref_count, 1);
2167 		atomic_add(&lowerCache->ref_count, -1);
2168 	}
2169 
2170 	mutex_unlock(&sAreaCacheLock);
2171 
2172 	vm_cache_add_consumer_locked(lowerCache, upperCache);
2173 
2174 	// We now need to remap all pages from all of the cache's areas read-only, so that
2175 	// a copy will be created on next write access
2176 
2177 	for (vm_area *tempArea = upperCache->areas; tempArea != NULL;
2178 			tempArea = tempArea->cache_next) {
2179 		// The area must be readable in the same way it was previously writable
2180 		uint32 protection = B_KERNEL_READ_AREA;
2181 		if (tempArea->protection & B_READ_AREA)
2182 			protection |= B_READ_AREA;
2183 
2184 		vm_translation_map *map = &tempArea->address_space->translation_map;
2185 		map->ops->lock(map);
2186 		map->ops->protect(map, tempArea->base, tempArea->base - 1 + tempArea->size, protection);
2187 		map->ops->unlock(map);
2188 	}
2189 
2190 	vm_area_put_locked_cache(upperCache);
2191 
2192 	return B_OK;
2193 }
2194 
2195 
2196 area_id
2197 vm_copy_area(team_id team, const char *name, void **_address,
2198 	uint32 addressSpec, uint32 protection, area_id sourceID)
2199 {
2200 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2201 
2202 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2203 		// set the same protection for the kernel as for userland
2204 		protection |= B_KERNEL_READ_AREA;
2205 		if (writableCopy)
2206 			protection |= B_KERNEL_WRITE_AREA;
2207 	}
2208 
2209 	// Do the locking: target address space, all address spaces associated with
2210 	// the source cache, and the cache itself.
2211 	MultiAddressSpaceLocker locker;
2212 	vm_address_space *targetAddressSpace;
2213 	vm_cache *cache;
2214 	vm_area* source;
2215 	status_t status = locker.AddTeam(team, true, &targetAddressSpace);
2216 	if (status == B_OK) {
2217 		status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2218 			&cache, true);
2219 	}
2220 	if (status != B_OK)
2221 		return status;
2222 
2223 	AreaCacheLocker cacheLocker(cache);	// already locked
2224 
2225 	if (addressSpec == B_CLONE_ADDRESS) {
2226 		addressSpec = B_EXACT_ADDRESS;
2227 		*_address = (void *)source->base;
2228 	}
2229 
2230 	// First, create a cache on top of the source area
2231 
2232 	vm_area *target;
2233 	status = map_backing_store(targetAddressSpace, cache, _address,
2234 		source->cache_offset, source->size, addressSpec, source->wiring,
2235 		protection, REGION_PRIVATE_MAP, &target, name);
2236 
2237 	if (status < B_OK)
2238 		return status;
2239 
2240 	// If the source area is writable, we need to move it one layer up as well
2241 
2242 	if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2243 		// ToDo: do something more useful if this fails!
2244 		if (vm_copy_on_write_area(cache) < B_OK)
2245 			panic("vm_copy_on_write_area() failed!\n");
2246 	}
2247 
2248 	// we return the ID of the newly created area
2249 	return target->id;
2250 }
2251 
2252 
2253 //! You need to hold the cache lock when calling this function
2254 static int32
2255 count_writable_areas(vm_cache *cache, vm_area *ignoreArea)
2256 {
2257 	struct vm_area *area = cache->areas;
2258 	uint32 count = 0;
2259 
2260 	for (; area != NULL; area = area->cache_next) {
2261 		if (area != ignoreArea
2262 			&& (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0)
2263 			count++;
2264 	}
2265 
2266 	return count;
2267 }
2268 
2269 
2270 static status_t
2271 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection)
2272 {
2273 	TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = %#lx)\n",
2274 		team, areaID, newProtection));
2275 
2276 	if (!arch_vm_supports_protection(newProtection))
2277 		return B_NOT_SUPPORTED;
2278 
2279 	// lock address spaces and cache
2280 	MultiAddressSpaceLocker locker;
2281 	vm_cache *cache;
2282 	vm_area* area;
2283 	status_t status = locker.AddAreaCacheAndLock(areaID, true, false, area,
2284 		&cache, true);
2285 	AreaCacheLocker cacheLocker(cache);	// already locked
2286 
2287 	if (area->protection == newProtection)
2288 		return B_OK;
2289 
2290 	if (team != vm_kernel_address_space_id()
2291 		&& area->address_space->id != team) {
2292 		// unless you're the kernel, you are only allowed to set
2293 		// the protection of your own areas
2294 		return B_NOT_ALLOWED;
2295 	}
2296 
2297 	bool changePageProtection = true;
2298 
2299 	if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2300 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0) {
2301 		// writable -> !writable
2302 
2303 		if (cache->source != NULL && cache->temporary) {
2304 			if (count_writable_areas(cache, area) == 0) {
2305 				// Since this cache now lives from the pages in its source cache,
2306 				// we can change the cache's commitment to take only those pages
2307 				// into account that really are in this cache.
2308 
2309 				// count existing pages in this cache
2310 				struct vm_page *page = cache->page_list;
2311 				uint32 count = 0;
2312 
2313 				for (; page != NULL; page = page->cache_next) {
2314 					count++;
2315 				}
2316 
2317 				status = cache->store->ops->commit(cache->store,
2318 					cache->virtual_base + count * B_PAGE_SIZE);
2319 
2320 				// ToDo: we may be able to join with our source cache, if count == 0
2321 			}
2322 		}
2323 	} else if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0
2324 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
2325 		// !writable -> writable
2326 
2327 		if (!list_is_empty(&cache->consumers)) {
2328 			// There are consumers -- we have to insert a new cache. Fortunately
2329 			// vm_copy_on_write_area() does everything that's needed.
2330 			changePageProtection = false;
2331 			status = vm_copy_on_write_area(cache);
2332 		} else {
2333 			// No consumers, so we don't need to insert a new one.
2334 			if (cache->source != NULL && cache->temporary) {
2335 				// the cache's commitment must contain all possible pages
2336 				status = cache->store->ops->commit(cache->store,
2337 					cache->virtual_size);
2338 			}
2339 
2340 			if (status == B_OK && cache->source != NULL) {
2341 				// There's a source cache, hence we can't just change all pages'
2342 				// protection or we might allow writing into pages belonging to
2343 				// a lower cache.
2344 				changePageProtection = false;
2345 
2346 				struct vm_translation_map *map
2347 					= &area->address_space->translation_map;
2348 				map->ops->lock(map);
2349 
2350 				vm_page* page = cache->page_list;
2351 				while (page) {
2352 					addr_t address = area->base
2353 						+ (page->cache_offset << PAGE_SHIFT);
2354 					map->ops->protect(map, address, address - 1 + B_PAGE_SIZE,
2355 						newProtection);
2356 					page = page->cache_next;
2357 				}
2358 
2359 				map->ops->unlock(map);
2360 			}
2361 		}
2362 	} else {
2363 		// we don't have anything special to do in all other cases
2364 	}
2365 
2366 	if (status == B_OK) {
2367 		// remap existing pages in this cache
2368 		struct vm_translation_map *map = &area->address_space->translation_map;
2369 
2370 		if (changePageProtection) {
2371 			map->ops->lock(map);
2372 			map->ops->protect(map, area->base, area->base + area->size,
2373 				newProtection);
2374 			map->ops->unlock(map);
2375 		}
2376 
2377 		area->protection = newProtection;
2378 	}
2379 
2380 	return status;
2381 }
2382 
2383 
2384 status_t
2385 vm_get_page_mapping(team_id team, addr_t vaddr, addr_t *paddr)
2386 {
2387 	vm_address_space *addressSpace = vm_get_address_space_by_id(team);
2388 	if (addressSpace == NULL)
2389 		return B_BAD_TEAM_ID;
2390 
2391 	uint32 dummyFlags;
2392 	status_t status = addressSpace->translation_map.ops->query(
2393 		&addressSpace->translation_map, vaddr, paddr, &dummyFlags);
2394 
2395 	vm_put_address_space(addressSpace);
2396 	return status;
2397 }
2398 
2399 
2400 static inline addr_t
2401 virtual_page_address(vm_area *area, vm_page *page)
2402 {
2403 	return area->base
2404 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
2405 }
2406 
2407 
2408 bool
2409 vm_test_map_modification(vm_page *page)
2410 {
2411 	MutexLocker locker(sMappingLock);
2412 
2413 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2414 	vm_page_mapping *mapping;
2415 	while ((mapping = iterator.Next()) != NULL) {
2416 		vm_area *area = mapping->area;
2417 		vm_translation_map *map = &area->address_space->translation_map;
2418 
2419 		addr_t physicalAddress;
2420 		uint32 flags;
2421 		map->ops->lock(map);
2422 		map->ops->query(map, virtual_page_address(area, page),
2423 			&physicalAddress, &flags);
2424 		map->ops->unlock(map);
2425 
2426 		if (flags & PAGE_MODIFIED)
2427 			return true;
2428 	}
2429 
2430 	return false;
2431 }
2432 
2433 
2434 int32
2435 vm_test_map_activation(vm_page *page, bool *_modified)
2436 {
2437 	int32 activation = 0;
2438 	bool modified = false;
2439 
2440 	MutexLocker locker(sMappingLock);
2441 
2442 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2443 	vm_page_mapping *mapping;
2444 	while ((mapping = iterator.Next()) != NULL) {
2445 		vm_area *area = mapping->area;
2446 		vm_translation_map *map = &area->address_space->translation_map;
2447 
2448 		addr_t physicalAddress;
2449 		uint32 flags;
2450 		map->ops->lock(map);
2451 		map->ops->query(map, virtual_page_address(area, page),
2452 			&physicalAddress, &flags);
2453 		map->ops->unlock(map);
2454 
2455 		if (flags & PAGE_ACCESSED)
2456 			activation++;
2457 		if (flags & PAGE_MODIFIED)
2458 			modified = true;
2459 	}
2460 
2461 	if (_modified != NULL)
2462 		*_modified = modified;
2463 
2464 	return activation;
2465 }
2466 
2467 
2468 void
2469 vm_clear_map_flags(vm_page *page, uint32 flags)
2470 {
2471 	MutexLocker locker(sMappingLock);
2472 
2473 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2474 	vm_page_mapping *mapping;
2475 	while ((mapping = iterator.Next()) != NULL) {
2476 		vm_area *area = mapping->area;
2477 		vm_translation_map *map = &area->address_space->translation_map;
2478 
2479 		map->ops->lock(map);
2480 		map->ops->clear_flags(map, virtual_page_address(area, page), flags);
2481 		map->ops->unlock(map);
2482 	}
2483 }
2484 
2485 
2486 /*!	Removes all mappings from a page.
2487 	After you've called this function, the page is unmapped from memory.
2488 	The accumulated page flags of all mappings can be found in \a _flags.
2489 */
2490 void
2491 vm_remove_all_page_mappings(vm_page *page, uint32 *_flags)
2492 {
2493 	uint32 accumulatedFlags = 0;
2494 	MutexLocker locker(sMappingLock);
2495 
2496 	vm_page_mappings queue;
2497 	queue.MoveFrom(&page->mappings);
2498 
2499 	vm_page_mappings::Iterator iterator = queue.GetIterator();
2500 	vm_page_mapping *mapping;
2501 	while ((mapping = iterator.Next()) != NULL) {
2502 		vm_area *area = mapping->area;
2503 		vm_translation_map *map = &area->address_space->translation_map;
2504 		addr_t physicalAddress;
2505 		uint32 flags;
2506 
2507 		map->ops->lock(map);
2508 		addr_t address = virtual_page_address(area, page);
2509 		map->ops->unmap(map, address, address + (B_PAGE_SIZE - 1));
2510 		map->ops->flush(map);
2511 		map->ops->query(map, address, &physicalAddress, &flags);
2512 		map->ops->unlock(map);
2513 
2514 		area->mappings.Remove(mapping);
2515 
2516 		accumulatedFlags |= flags;
2517 	}
2518 
2519 	locker.Unlock();
2520 
2521 	// free now unused mappings
2522 
2523 	while ((mapping = queue.RemoveHead()) != NULL) {
2524 		free(mapping);
2525 	}
2526 
2527 	if (_flags != NULL)
2528 		*_flags = accumulatedFlags;
2529 }
2530 
2531 
2532 status_t
2533 vm_unmap_pages(vm_area *area, addr_t base, size_t size, bool preserveModified)
2534 {
2535 	vm_translation_map *map = &area->address_space->translation_map;
2536 	addr_t end = base + (size - 1);
2537 
2538 	map->ops->lock(map);
2539 
2540 	if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE) {
2541 		// iterate through all pages and decrease their wired count
2542 		for (addr_t virtualAddress = base; virtualAddress < end;
2543 				virtualAddress += B_PAGE_SIZE) {
2544 			addr_t physicalAddress;
2545 			uint32 flags;
2546 			status_t status = map->ops->query(map, virtualAddress,
2547 				&physicalAddress, &flags);
2548 			if (status < B_OK || (flags & PAGE_PRESENT) == 0)
2549 				continue;
2550 
2551 			vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
2552 			if (page == NULL) {
2553 				panic("area %p looking up page failed for pa 0x%lx\n", area,
2554 					physicalAddress);
2555 			}
2556 
2557 			page->wired_count--;
2558 				// TODO: needs to be atomic on all platforms!
2559 		}
2560 	}
2561 
2562 	map->ops->unmap(map, base, end);
2563 	if (preserveModified) {
2564 		map->ops->flush(map);
2565 
2566 		for (addr_t virtualAddress = base; virtualAddress < end;
2567 				virtualAddress += B_PAGE_SIZE) {
2568 			addr_t physicalAddress;
2569 			uint32 flags;
2570 			status_t status = map->ops->query(map, virtualAddress,
2571 				&physicalAddress, &flags);
2572 			if (status < B_OK || (flags & PAGE_PRESENT) == 0)
2573 				continue;
2574 
2575 			vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
2576 			if (page == NULL) {
2577 				panic("area %p looking up page failed for pa 0x%lx\n", area,
2578 					physicalAddress);
2579 			}
2580 
2581 			if ((flags & PAGE_MODIFIED) != 0
2582 				&& page->state != PAGE_STATE_MODIFIED)
2583 				vm_page_set_state(page, PAGE_STATE_MODIFIED);
2584 		}
2585 	}
2586 	map->ops->unlock(map);
2587 
2588 	if (area->wiring == B_NO_LOCK) {
2589 		uint32 startOffset = (area->cache_offset + base - area->base)
2590 			>> PAGE_SHIFT;
2591 		uint32 endOffset = startOffset + (size >> PAGE_SHIFT);
2592 		vm_page_mapping *mapping;
2593 		vm_area_mappings queue;
2594 
2595 		mutex_lock(&sMappingLock);
2596 		map->ops->lock(map);
2597 
2598 		vm_area_mappings::Iterator iterator = area->mappings.GetIterator();
2599 		while (iterator.HasNext()) {
2600 			mapping = iterator.Next();
2601 
2602 			vm_page *page = mapping->page;
2603 			if (page->cache_offset < startOffset
2604 				|| page->cache_offset >= endOffset)
2605 				continue;
2606 
2607 			mapping->page->mappings.Remove(mapping);
2608 			iterator.Remove();
2609 
2610 			queue.Add(mapping);
2611 		}
2612 
2613 		map->ops->unlock(map);
2614 		mutex_unlock(&sMappingLock);
2615 
2616 		while ((mapping = queue.RemoveHead()) != NULL) {
2617 			free(mapping);
2618 		}
2619 	}
2620 
2621 	return B_OK;
2622 }
2623 
2624 
2625 /*!	When calling this function, you need to have pages reserved! */
2626 status_t
2627 vm_map_page(vm_area *area, vm_page *page, addr_t address, uint32 protection)
2628 {
2629 	vm_translation_map *map = &area->address_space->translation_map;
2630 	vm_page_mapping *mapping = NULL;
2631 
2632 	if (area->wiring == B_NO_LOCK) {
2633 		mapping = (vm_page_mapping *)malloc(sizeof(vm_page_mapping));
2634 		if (mapping == NULL)
2635 			return B_NO_MEMORY;
2636 
2637 		mapping->page = page;
2638 		mapping->area = area;
2639 	}
2640 
2641 	map->ops->lock(map);
2642 	map->ops->map(map, address, page->physical_page_number * B_PAGE_SIZE,
2643 		protection);
2644 	map->ops->unlock(map);
2645 
2646 	if (area->wiring != B_NO_LOCK) {
2647 		page->wired_count++;
2648 			// TODO: needs to be atomic on all platforms!
2649 	} else {
2650 		// insert mapping into lists
2651 		MutexLocker locker(sMappingLock);
2652 
2653 		page->mappings.Add(mapping);
2654 		area->mappings.Add(mapping);
2655 	}
2656 
2657 	if (page->usage_count < 0)
2658 		page->usage_count = 1;
2659 
2660 	if (page->state != PAGE_STATE_MODIFIED)
2661 		vm_page_set_state(page, PAGE_STATE_ACTIVE);
2662 
2663 	return B_OK;
2664 }
2665 
2666 
2667 static int
2668 display_mem(int argc, char **argv)
2669 {
2670 	bool physical = false;
2671 	addr_t copyAddress;
2672 	int32 displayWidth;
2673 	int32 itemSize;
2674 	int32 num = -1;
2675 	addr_t address;
2676 	int i = 1, j;
2677 
2678 	if (argc > 1 && argv[1][0] == '-') {
2679 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2680 			physical = true;
2681 			i++;
2682 		} else
2683 			i = 99;
2684 	}
2685 
2686 	if (argc < i + 1 || argc > i + 2) {
2687 		kprintf("usage: dl/dw/ds/db [-p|--physical] <address> [num]\n"
2688 			"\tdl - 8 bytes\n"
2689 			"\tdw - 4 bytes\n"
2690 			"\tds - 2 bytes\n"
2691 			"\tdb - 1 byte\n"
2692 			"  -p or --physical only allows memory from a single page to be displayed.\n");
2693 		return 0;
2694 	}
2695 
2696 	address = strtoul(argv[i], NULL, 0);
2697 
2698 	if (argc > i + 1)
2699 		num = atoi(argv[i + 1]);
2700 
2701 	// build the format string
2702 	if (strcmp(argv[0], "db") == 0) {
2703 		itemSize = 1;
2704 		displayWidth = 16;
2705 	} else if (strcmp(argv[0], "ds") == 0) {
2706 		itemSize = 2;
2707 		displayWidth = 8;
2708 	} else if (strcmp(argv[0], "dw") == 0) {
2709 		itemSize = 4;
2710 		displayWidth = 4;
2711 	} else if (strcmp(argv[0], "dl") == 0) {
2712 		itemSize = 8;
2713 		displayWidth = 2;
2714 	} else {
2715 		kprintf("display_mem called in an invalid way!\n");
2716 		return 0;
2717 	}
2718 
2719 	if (num <= 0)
2720 		num = displayWidth;
2721 
2722 	if (physical) {
2723 		int32 offset = address & (B_PAGE_SIZE - 1);
2724 		if (num * itemSize + offset > B_PAGE_SIZE) {
2725 			num = (B_PAGE_SIZE - offset) / itemSize;
2726 			kprintf("NOTE: number of bytes has been cut to page size\n");
2727 		}
2728 
2729 		address = ROUNDOWN(address, B_PAGE_SIZE);
2730 
2731 		kernel_startup = true;
2732 			// vm_get_physical_page() needs to lock...
2733 
2734 		if (vm_get_physical_page(address, &copyAddress, PHYSICAL_PAGE_NO_WAIT) != B_OK) {
2735 			kprintf("getting the hardware page failed.");
2736 			kernel_startup = false;
2737 			return 0;
2738 		}
2739 
2740 		kernel_startup = false;
2741 		address += offset;
2742 		copyAddress += offset;
2743 	} else
2744 		copyAddress = address;
2745 
2746 	for (i = 0; i < num; i++) {
2747 		uint32 value;
2748 
2749 		if ((i % displayWidth) == 0) {
2750 			int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2751 			if (i != 0)
2752 				kprintf("\n");
2753 
2754 			kprintf("[0x%lx]  ", address + i * itemSize);
2755 
2756 			for (j = 0; j < displayed; j++) {
2757 				char c;
2758 				if (user_memcpy(&c, (char *)copyAddress + i * itemSize + j, 1) != B_OK) {
2759 					displayed = j;
2760 					break;
2761 				}
2762 				if (!isprint(c))
2763 					c = '.';
2764 
2765 				kprintf("%c", c);
2766 			}
2767 			if (num > displayWidth) {
2768 				// make sure the spacing in the last line is correct
2769 				for (j = displayed; j < displayWidth * itemSize; j++)
2770 					kprintf(" ");
2771 			}
2772 			kprintf("  ");
2773 		}
2774 
2775 		if (user_memcpy(&value, (uint8 *)copyAddress + i * itemSize, itemSize) != B_OK) {
2776 			kprintf("read fault");
2777 			break;
2778 		}
2779 
2780 		switch (itemSize) {
2781 			case 1:
2782 				kprintf(" %02x", *(uint8 *)&value);
2783 				break;
2784 			case 2:
2785 				kprintf(" %04x", *(uint16 *)&value);
2786 				break;
2787 			case 4:
2788 				kprintf(" %08lx", *(uint32 *)&value);
2789 				break;
2790 			case 8:
2791 				kprintf(" %016Lx", *(uint64 *)&value);
2792 				break;
2793 		}
2794 	}
2795 
2796 	kprintf("\n");
2797 
2798 	if (physical) {
2799 		copyAddress = ROUNDOWN(copyAddress, B_PAGE_SIZE);
2800 		kernel_startup = true;
2801 		vm_put_physical_page(copyAddress);
2802 		kernel_startup = false;
2803 	}
2804 	return 0;
2805 }
2806 
2807 
2808 static void
2809 dump_cache_tree_recursively(vm_cache* cache, int level,
2810 	vm_cache* highlightCache)
2811 {
2812 	// print this cache
2813 	for (int i = 0; i < level; i++)
2814 		kprintf("  ");
2815 	if (cache == highlightCache)
2816 		kprintf("%p <--\n", cache);
2817 	else
2818 		kprintf("%p\n", cache);
2819 
2820 	// recursively print its consumers
2821 	vm_cache* consumer = NULL;
2822 	while ((consumer = (vm_cache *)list_get_next_item(&cache->consumers,
2823 			consumer)) != NULL) {
2824 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
2825 	}
2826 }
2827 
2828 
2829 static int
2830 dump_cache_tree(int argc, char **argv)
2831 {
2832 	if (argc < 2 || strlen(argv[1]) < 2
2833 		|| argv[1][0] != '0'
2834 		|| argv[1][1] != 'x') {
2835 		kprintf("%s: invalid argument, pass address\n", argv[0]);
2836 		return 0;
2837 	}
2838 
2839 	addr_t address = strtoul(argv[1], NULL, 0);
2840 	if (address == NULL)
2841 		return 0;
2842 
2843 	vm_cache *cache = (vm_cache *)address;
2844 	vm_cache *root = cache;
2845 
2846 	// find the root cache (the transitive source)
2847 	while (root->source != NULL)
2848 		root = root->source;
2849 
2850 	dump_cache_tree_recursively(root, 0, cache);
2851 
2852 	return 0;
2853 }
2854 
2855 
2856 #if DEBUG_CACHE_LIST
2857 
2858 static int
2859 dump_caches(int argc, char **argv)
2860 {
2861 	kprintf("caches:");
2862 
2863 	vm_cache* cache = gDebugCacheList;
2864 	while (cache) {
2865 		kprintf(" %p", cache);
2866 		cache = cache->debug_next;
2867 	}
2868 
2869 	kprintf("\n");
2870 
2871 	return 0;
2872 }
2873 
2874 #endif	// DEBUG_CACHE_LIST
2875 
2876 
2877 static const char *
2878 cache_type_to_string(int32 type)
2879 {
2880 	switch (type) {
2881 		case CACHE_TYPE_RAM:
2882 			return "RAM";
2883 		case CACHE_TYPE_DEVICE:
2884 			return "device";
2885 		case CACHE_TYPE_VNODE:
2886 			return "vnode";
2887 		case CACHE_TYPE_NULL:
2888 			return "null";
2889 
2890 		default:
2891 			return "unknown";
2892 	}
2893 }
2894 
2895 
2896 static int
2897 dump_cache(int argc, char **argv)
2898 {
2899 	vm_cache *cache;
2900 	bool showPages = false;
2901 	int i = 1;
2902 
2903 	if (argc < 2) {
2904 		kprintf("usage: %s [-ps] <address>\n"
2905 			"  if -p is specified, all pages are shown, if -s is used\n"
2906 			"  only the cache info is shown respectively.\n", argv[0]);
2907 		return 0;
2908 	}
2909 	while (argv[i][0] == '-') {
2910 		char *arg = argv[i] + 1;
2911 		while (arg[0]) {
2912 			if (arg[0] == 'p')
2913 				showPages = true;
2914 			arg++;
2915 		}
2916 		i++;
2917 	}
2918 	if (argv[i] == NULL || strlen(argv[i]) < 2
2919 		|| argv[i][0] != '0'
2920 		|| argv[i][1] != 'x') {
2921 		kprintf("%s: invalid argument, pass address\n", argv[0]);
2922 		return 0;
2923 	}
2924 
2925 	addr_t address = strtoul(argv[i], NULL, 0);
2926 	if (address == NULL)
2927 		return 0;
2928 
2929 	cache = (vm_cache *)address;
2930 
2931 	kprintf("CACHE %p:\n", cache);
2932 	kprintf("  ref_count:    %ld\n", cache->ref_count);
2933 	kprintf("  source:       %p\n", cache->source);
2934 	kprintf("  store:        %p\n", cache->store);
2935 	kprintf("  type:         %s\n", cache_type_to_string(cache->type));
2936 	kprintf("  virtual_base: 0x%Lx\n", cache->virtual_base);
2937 	kprintf("  virtual_size: 0x%Lx\n", cache->virtual_size);
2938 	kprintf("  temporary:    %ld\n", cache->temporary);
2939 	kprintf("  scan_skip:    %ld\n", cache->scan_skip);
2940 	kprintf("  lock.holder:  %ld\n", cache->lock.holder);
2941 	kprintf("  lock.sem:     0x%lx\n", cache->lock.sem);
2942 	kprintf("  areas:\n");
2943 
2944 	for (vm_area *area = cache->areas; area != NULL; area = area->cache_next) {
2945 		kprintf("    area 0x%lx, %s\n", area->id, area->name);
2946 		kprintf("\tbase_addr:  0x%lx, size: 0x%lx\n", area->base, area->size);
2947 		kprintf("\tprotection: 0x%lx\n", area->protection);
2948 		kprintf("\towner:      0x%lx\n", area->address_space->id);
2949 	}
2950 
2951 	kprintf("  consumers:\n");
2952 	vm_cache *consumer = NULL;
2953 	while ((consumer = (vm_cache *)list_get_next_item(&cache->consumers, consumer)) != NULL) {
2954 		kprintf("\t%p\n", consumer);
2955 	}
2956 
2957 	kprintf("  pages:\n");
2958 	int32 count = 0;
2959 	for (vm_page *page = cache->page_list; page != NULL; page = page->cache_next) {
2960 		count++;
2961 		if (!showPages)
2962 			continue;
2963 
2964 		if (page->type == PAGE_TYPE_PHYSICAL) {
2965 			kprintf("\t%p ppn 0x%lx offset 0x%lx type %u state %u (%s) wired_count %u\n",
2966 				page, page->physical_page_number, page->cache_offset, page->type, page->state,
2967 				page_state_to_string(page->state), page->wired_count);
2968 		} else if(page->type == PAGE_TYPE_DUMMY) {
2969 			kprintf("\t%p DUMMY PAGE state %u (%s)\n",
2970 				page, page->state, page_state_to_string(page->state));
2971 		} else
2972 			kprintf("\t%p UNKNOWN PAGE type %u\n", page, page->type);
2973 	}
2974 
2975 	if (!showPages)
2976 		kprintf("\t%ld in cache\n", count);
2977 
2978 	return 0;
2979 }
2980 
2981 
2982 static void
2983 dump_area_struct(vm_area *area, bool mappings)
2984 {
2985 	kprintf("AREA: %p\n", area);
2986 	kprintf("name:\t\t'%s'\n", area->name);
2987 	kprintf("owner:\t\t0x%lx\n", area->address_space->id);
2988 	kprintf("id:\t\t0x%lx\n", area->id);
2989 	kprintf("base:\t\t0x%lx\n", area->base);
2990 	kprintf("size:\t\t0x%lx\n", area->size);
2991 	kprintf("protection:\t0x%lx\n", area->protection);
2992 	kprintf("wiring:\t\t0x%x\n", area->wiring);
2993 	kprintf("memory_type:\t0x%x\n", area->memory_type);
2994 	kprintf("cache:\t\t%p\n", area->cache);
2995 	kprintf("cache_type:\t%s\n", cache_type_to_string(area->cache_type));
2996 	kprintf("cache_offset:\t0x%Lx\n", area->cache_offset);
2997 	kprintf("cache_next:\t%p\n", area->cache_next);
2998 	kprintf("cache_prev:\t%p\n", area->cache_prev);
2999 
3000 	vm_area_mappings::Iterator iterator = area->mappings.GetIterator();
3001 	if (mappings) {
3002 		kprintf("page mappings:\n");
3003 		while (iterator.HasNext()) {
3004 			vm_page_mapping *mapping = iterator.Next();
3005 			kprintf("  %p", mapping->page);
3006 		}
3007 		kprintf("\n");
3008 	} else {
3009 		uint32 count = 0;
3010 		while (iterator.Next() != NULL) {
3011 			count++;
3012 		}
3013 		kprintf("page mappings:\t%lu\n", count);
3014 	}
3015 }
3016 
3017 
3018 static int
3019 dump_area(int argc, char **argv)
3020 {
3021 	bool mappings = false;
3022 	bool found = false;
3023 	int32 index = 1;
3024 	vm_area *area;
3025 	addr_t num;
3026 
3027 	if (argc < 2) {
3028 		kprintf("usage: area [-m] <id|address|name>\n");
3029 		return 0;
3030 	}
3031 
3032 	if (!strcmp(argv[1], "-m")) {
3033 		mappings = true;
3034 		index++;
3035 	}
3036 
3037 	num = strtoul(argv[index], NULL, 0);
3038 
3039 	// walk through the area list, looking for the arguments as a name
3040 	struct hash_iterator iter;
3041 
3042 	hash_open(sAreaHash, &iter);
3043 	while ((area = (vm_area *)hash_next(sAreaHash, &iter)) != NULL) {
3044 		if ((area->name != NULL && !strcmp(argv[index], area->name))
3045 			|| num != 0
3046 				&& ((addr_t)area->id == num
3047 					|| area->base <= num && area->base + area->size > num)) {
3048 			dump_area_struct(area, mappings);
3049 			found = true;
3050 		}
3051 	}
3052 
3053 	if (!found)
3054 		kprintf("could not find area %s (%ld)\n", argv[index], num);
3055 	return 0;
3056 }
3057 
3058 
3059 static int
3060 dump_area_list(int argc, char **argv)
3061 {
3062 	vm_area *area;
3063 	struct hash_iterator iter;
3064 	const char *name = NULL;
3065 	int32 id = 0;
3066 
3067 	if (argc > 1) {
3068 		id = strtoul(argv[1], NULL, 0);
3069 		if (id == 0)
3070 			name = argv[1];
3071 	}
3072 
3073 	kprintf("addr          id  base\t\tsize    protect lock  name\n");
3074 
3075 	hash_open(sAreaHash, &iter);
3076 	while ((area = (vm_area *)hash_next(sAreaHash, &iter)) != NULL) {
3077 		if (id != 0 && area->address_space->id != id
3078 			|| name != NULL && strstr(area->name, name) == NULL)
3079 			continue;
3080 
3081 		kprintf("%p %5lx  %p\t%p %4lx\t%4d  %s\n", area, area->id, (void *)area->base,
3082 			(void *)area->size, area->protection, area->wiring, area->name);
3083 	}
3084 	hash_close(sAreaHash, &iter, false);
3085 	return 0;
3086 }
3087 
3088 
3089 static int
3090 dump_available_memory(int argc, char **argv)
3091 {
3092 	kprintf("Available memory: %Ld/%lu bytes\n",
3093 		sAvailableMemory, vm_page_num_pages() * B_PAGE_SIZE);
3094 	return 0;
3095 }
3096 
3097 
3098 status_t
3099 vm_delete_areas(struct vm_address_space *addressSpace)
3100 {
3101 	vm_area *area;
3102 	vm_area *next, *last = NULL;
3103 
3104 	TRACE(("vm_delete_areas: called on address space 0x%lx\n",
3105 		addressSpace->id));
3106 
3107 	acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0);
3108 
3109 	// remove all reserved areas in this address space
3110 
3111 	for (area = addressSpace->areas; area; area = next) {
3112 		next = area->address_space_next;
3113 
3114 		if (area->id == RESERVED_AREA_ID) {
3115 			// just remove it
3116 			if (last)
3117 				last->address_space_next = area->address_space_next;
3118 			else
3119 				addressSpace->areas = area->address_space_next;
3120 
3121 			vm_put_address_space(addressSpace);
3122 			free(area);
3123 			continue;
3124 		}
3125 
3126 		last = area;
3127 	}
3128 
3129 	// delete all the areas in this address space
3130 
3131 	for (area = addressSpace->areas; area; area = next) {
3132 		next = area->address_space_next;
3133 		delete_area(addressSpace, area);
3134 	}
3135 
3136 	release_sem_etc(addressSpace->sem, WRITE_COUNT, 0);
3137 	return B_OK;
3138 }
3139 
3140 
3141 static area_id
3142 vm_area_for(team_id team, addr_t address)
3143 {
3144 	AddressSpaceReadLocker locker(team);
3145 	if (!locker.IsLocked())
3146 		return B_BAD_TEAM_ID;
3147 
3148 	vm_area *area = vm_area_lookup(locker.AddressSpace(), address);
3149 	if (area != NULL)
3150 		return area->id;
3151 
3152 	return B_ERROR;
3153 }
3154 
3155 
3156 /*!
3157 	Frees physical pages that were used during the boot process.
3158 */
3159 static void
3160 unmap_and_free_physical_pages(vm_translation_map *map, addr_t start, addr_t end)
3161 {
3162 	// free all physical pages in the specified range
3163 
3164 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3165 		addr_t physicalAddress;
3166 		uint32 flags;
3167 
3168 		if (map->ops->query(map, current, &physicalAddress, &flags) == B_OK) {
3169 			vm_page *page = vm_lookup_page(current / B_PAGE_SIZE);
3170 			if (page != NULL)
3171 				vm_page_set_state(page, PAGE_STATE_FREE);
3172 		}
3173 	}
3174 
3175 	// unmap the memory
3176 	map->ops->unmap(map, start, end - 1);
3177 }
3178 
3179 
3180 void
3181 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3182 {
3183 	vm_translation_map *map = &vm_kernel_address_space()->translation_map;
3184 	addr_t end = start + size;
3185 	addr_t lastEnd = start;
3186 	vm_area *area;
3187 
3188 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", (void *)start, (void *)end));
3189 
3190 	// The areas are sorted in virtual address space order, so
3191 	// we just have to find the holes between them that fall
3192 	// into the area we should dispose
3193 
3194 	map->ops->lock(map);
3195 
3196 	for (area = vm_kernel_address_space()->areas; area; area = area->address_space_next) {
3197 		addr_t areaStart = area->base;
3198 		addr_t areaEnd = areaStart + area->size;
3199 
3200 		if (area->id == RESERVED_AREA_ID)
3201 			continue;
3202 
3203 		if (areaEnd >= end) {
3204 			// we are done, the areas are already beyond of what we have to free
3205 			lastEnd = end;
3206 			break;
3207 		}
3208 
3209 		if (areaStart > lastEnd) {
3210 			// this is something we can free
3211 			TRACE(("free boot range: get rid of %p - %p\n", (void *)lastEnd, (void *)areaStart));
3212 			unmap_and_free_physical_pages(map, lastEnd, areaStart);
3213 		}
3214 
3215 		lastEnd = areaEnd;
3216 	}
3217 
3218 	if (lastEnd < end) {
3219 		// we can also get rid of some space at the end of the area
3220 		TRACE(("free boot range: also remove %p - %p\n", (void *)lastEnd, (void *)end));
3221 		unmap_and_free_physical_pages(map, lastEnd, end);
3222 	}
3223 
3224 	map->ops->unlock(map);
3225 }
3226 
3227 
3228 static void
3229 create_preloaded_image_areas(struct preloaded_image *image)
3230 {
3231 	char name[B_OS_NAME_LENGTH];
3232 	void *address;
3233 	int32 length;
3234 
3235 	// use file name to create a good area name
3236 	char *fileName = strrchr(image->name, '/');
3237 	if (fileName == NULL)
3238 		fileName = image->name;
3239 	else
3240 		fileName++;
3241 
3242 	length = strlen(fileName);
3243 	// make sure there is enough space for the suffix
3244 	if (length > 25)
3245 		length = 25;
3246 
3247 	memcpy(name, fileName, length);
3248 	strcpy(name + length, "_text");
3249 	address = (void *)ROUNDOWN(image->text_region.start, B_PAGE_SIZE);
3250 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3251 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3252 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3253 		// this will later be remapped read-only/executable by the
3254 		// ELF initialization code
3255 
3256 	strcpy(name + length, "_data");
3257 	address = (void *)ROUNDOWN(image->data_region.start, B_PAGE_SIZE);
3258 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3259 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3260 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3261 }
3262 
3263 
3264 /**	Frees all previously kernel arguments areas from the kernel_args structure.
3265  *	Any boot loader resources contained in that arguments must not be accessed
3266  *	anymore past this point.
3267  */
3268 
3269 void
3270 vm_free_kernel_args(kernel_args *args)
3271 {
3272 	uint32 i;
3273 
3274 	TRACE(("vm_free_kernel_args()\n"));
3275 
3276 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3277 		area_id area = area_for((void *)args->kernel_args_range[i].start);
3278 		if (area >= B_OK)
3279 			delete_area(area);
3280 	}
3281 }
3282 
3283 
3284 static void
3285 allocate_kernel_args(kernel_args *args)
3286 {
3287 	uint32 i;
3288 
3289 	TRACE(("allocate_kernel_args()\n"));
3290 
3291 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3292 		void *address = (void *)args->kernel_args_range[i].start;
3293 
3294 		create_area("_kernel args_", &address, B_EXACT_ADDRESS, args->kernel_args_range[i].size,
3295 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3296 	}
3297 }
3298 
3299 
3300 static void
3301 unreserve_boot_loader_ranges(kernel_args *args)
3302 {
3303 	uint32 i;
3304 
3305 	TRACE(("unreserve_boot_loader_ranges()\n"));
3306 
3307 	for (i = 0; i < args->num_virtual_allocated_ranges; i++) {
3308 		vm_unreserve_address_range(vm_kernel_address_space_id(),
3309 			(void *)args->virtual_allocated_range[i].start,
3310 			args->virtual_allocated_range[i].size);
3311 	}
3312 }
3313 
3314 
3315 static void
3316 reserve_boot_loader_ranges(kernel_args *args)
3317 {
3318 	uint32 i;
3319 
3320 	TRACE(("reserve_boot_loader_ranges()\n"));
3321 
3322 	for (i = 0; i < args->num_virtual_allocated_ranges; i++) {
3323 		void *address = (void *)args->virtual_allocated_range[i].start;
3324 
3325 		// If the address is no kernel address, we just skip it. The
3326 		// architecture specific code has to deal with it.
3327 		if (!IS_KERNEL_ADDRESS(address)) {
3328 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n",
3329 				address, args->virtual_allocated_range[i].size);
3330 			continue;
3331 		}
3332 
3333 		status_t status = vm_reserve_address_range(vm_kernel_address_space_id(), &address,
3334 			B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3335 		if (status < B_OK)
3336 			panic("could not reserve boot loader ranges\n");
3337 	}
3338 }
3339 
3340 
3341 static addr_t
3342 allocate_early_virtual(kernel_args *args, size_t size)
3343 {
3344 	addr_t spot = 0;
3345 	uint32 i;
3346 	int last_valloc_entry = 0;
3347 
3348 	size = PAGE_ALIGN(size);
3349 	// find a slot in the virtual allocation addr range
3350 	for (i = 1; i < args->num_virtual_allocated_ranges; i++) {
3351 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3352 			+ args->virtual_allocated_range[i - 1].size;
3353 		last_valloc_entry = i;
3354 		// check to see if the space between this one and the last is big enough
3355 		if (previousRangeEnd >= KERNEL_BASE
3356 			&& args->virtual_allocated_range[i].start
3357 				- previousRangeEnd >= size) {
3358 			spot = previousRangeEnd;
3359 			args->virtual_allocated_range[i - 1].size += size;
3360 			goto out;
3361 		}
3362 	}
3363 	if (spot == 0) {
3364 		// we hadn't found one between allocation ranges. this is ok.
3365 		// see if there's a gap after the last one
3366 		addr_t lastRangeEnd
3367 			= args->virtual_allocated_range[last_valloc_entry].start
3368 				+ args->virtual_allocated_range[last_valloc_entry].size;
3369 		if (KERNEL_BASE + (KERNEL_SIZE - 1) - lastRangeEnd >= size) {
3370 			spot = lastRangeEnd;
3371 			args->virtual_allocated_range[last_valloc_entry].size += size;
3372 			goto out;
3373 		}
3374 		// see if there's a gap before the first one
3375 		if (args->virtual_allocated_range[0].start > KERNEL_BASE) {
3376 			if (args->virtual_allocated_range[0].start - KERNEL_BASE >= size) {
3377 				args->virtual_allocated_range[0].start -= size;
3378 				spot = args->virtual_allocated_range[0].start;
3379 				goto out;
3380 			}
3381 		}
3382 	}
3383 
3384 out:
3385 	return spot;
3386 }
3387 
3388 
3389 static bool
3390 is_page_in_physical_memory_range(kernel_args *args, addr_t address)
3391 {
3392 	// TODO: horrible brute-force method of determining if the page can be allocated
3393 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3394 		if (address >= args->physical_memory_range[i].start
3395 			&& address < args->physical_memory_range[i].start
3396 				+ args->physical_memory_range[i].size)
3397 			return true;
3398 	}
3399 	return false;
3400 }
3401 
3402 
3403 static addr_t
3404 allocate_early_physical_page(kernel_args *args)
3405 {
3406 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3407 		addr_t nextPage;
3408 
3409 		nextPage = args->physical_allocated_range[i].start
3410 			+ args->physical_allocated_range[i].size;
3411 		// see if the page after the next allocated paddr run can be allocated
3412 		if (i + 1 < args->num_physical_allocated_ranges
3413 			&& args->physical_allocated_range[i + 1].size != 0) {
3414 			// see if the next page will collide with the next allocated range
3415 			if (nextPage >= args->physical_allocated_range[i+1].start)
3416 				continue;
3417 		}
3418 		// see if the next physical page fits in the memory block
3419 		if (is_page_in_physical_memory_range(args, nextPage)) {
3420 			// we got one!
3421 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3422 			return nextPage / B_PAGE_SIZE;
3423 		}
3424 	}
3425 
3426 	return 0;
3427 		// could not allocate a block
3428 }
3429 
3430 
3431 /*!
3432 	This one uses the kernel_args' physical and virtual memory ranges to
3433 	allocate some pages before the VM is completely up.
3434 */
3435 addr_t
3436 vm_allocate_early(kernel_args *args, size_t virtualSize, size_t physicalSize,
3437 	uint32 attributes)
3438 {
3439 	if (physicalSize > virtualSize)
3440 		physicalSize = virtualSize;
3441 
3442 	// find the vaddr to allocate at
3443 	addr_t virtualBase = allocate_early_virtual(args, virtualSize);
3444 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualAddress);
3445 
3446 	// map the pages
3447 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3448 		addr_t physicalAddress = allocate_early_physical_page(args);
3449 		if (physicalAddress == 0)
3450 			panic("error allocating early page!\n");
3451 
3452 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3453 
3454 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3455 			physicalAddress * B_PAGE_SIZE, attributes,
3456 			&allocate_early_physical_page);
3457 	}
3458 
3459 	return virtualBase;
3460 }
3461 
3462 
3463 status_t
3464 vm_init(kernel_args *args)
3465 {
3466 	struct preloaded_image *image;
3467 	void *address;
3468 	status_t err = 0;
3469 	uint32 i;
3470 
3471 	TRACE(("vm_init: entry\n"));
3472 	err = arch_vm_translation_map_init(args);
3473 	err = arch_vm_init(args);
3474 
3475 	// initialize some globals
3476 	sNextAreaID = 1;
3477 	sAreaHashLock = -1;
3478 	sAvailableMemoryLock.sem = -1;
3479 
3480 	vm_page_init_num_pages(args);
3481 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3482 
3483 	// map in the new heap and initialize it
3484 	size_t heapSize = INITIAL_HEAP_SIZE;
3485 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3486 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3487 	TRACE(("heap at 0x%lx\n", heapBase));
3488 	heap_init(heapBase, heapSize);
3489 
3490 	vm_low_memory_init();
3491 
3492 	size_t slabInitialSize = args->num_cpus * 2 * B_PAGE_SIZE;
3493 	addr_t slabInitialBase = vm_allocate_early(args, slabInitialSize,
3494 		slabInitialSize, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3495 	slab_init(args, slabInitialBase, slabInitialSize);
3496 
3497 	// initialize the free page list and physical page mapper
3498 	vm_page_init(args);
3499 
3500 	// initialize the hash table that stores the pages mapped to caches
3501 	vm_cache_init(args);
3502 
3503 	{
3504 		vm_area *area;
3505 		sAreaHash = hash_init(REGION_HASH_TABLE_SIZE, (addr_t)&area->hash_next - (addr_t)area,
3506 			&area_compare, &area_hash);
3507 		if (sAreaHash == NULL)
3508 			panic("vm_init: error creating aspace hash table\n");
3509 	}
3510 
3511 	vm_address_space_init();
3512 	reserve_boot_loader_ranges(args);
3513 
3514 	// do any further initialization that the architecture dependant layers may need now
3515 	arch_vm_translation_map_init_post_area(args);
3516 	arch_vm_init_post_area(args);
3517 	vm_page_init_post_area(args);
3518 
3519 	// allocate areas to represent stuff that already exists
3520 
3521 	address = (void *)ROUNDOWN(heapBase, B_PAGE_SIZE);
3522 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
3523 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3524 
3525 	address = (void *)ROUNDOWN(slabInitialBase, B_PAGE_SIZE);
3526 	create_area("initial slab space", &address, B_EXACT_ADDRESS,
3527 		slabInitialSize, B_ALREADY_WIRED, B_KERNEL_READ_AREA
3528 		| B_KERNEL_WRITE_AREA);
3529 
3530 	allocate_kernel_args(args);
3531 
3532 	args->kernel_image.name = "kernel";
3533 		// the lazy boot loader currently doesn't set the kernel's name...
3534 	create_preloaded_image_areas(&args->kernel_image);
3535 
3536 	// allocate areas for preloaded images
3537 	for (image = args->preloaded_images; image != NULL; image = image->next) {
3538 		create_preloaded_image_areas(image);
3539 	}
3540 
3541 	// allocate kernel stacks
3542 	for (i = 0; i < args->num_cpus; i++) {
3543 		char name[64];
3544 
3545 		sprintf(name, "idle thread %lu kstack", i + 1);
3546 		address = (void *)args->cpu_kstack[i].start;
3547 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
3548 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3549 	}
3550 
3551 	// add some debugger commands
3552 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
3553 	add_debugger_command("area", &dump_area, "Dump info about a particular area");
3554 	add_debugger_command("cache", &dump_cache, "Dump vm_cache");
3555 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump vm_cache tree");
3556 #if DEBUG_CACHE_LIST
3557 	add_debugger_command("caches", &dump_caches, "List vm_cache structures");
3558 #endif
3559 	add_debugger_command("avail", &dump_available_memory, "Dump available memory");
3560 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
3561 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
3562 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
3563 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
3564 
3565 	TRACE(("vm_init: exit\n"));
3566 
3567 	return err;
3568 }
3569 
3570 
3571 status_t
3572 vm_init_post_sem(kernel_args *args)
3573 {
3574 	vm_area *area;
3575 
3576 	// This frees all unused boot loader resources and makes its space available again
3577 	arch_vm_init_end(args);
3578 	unreserve_boot_loader_ranges(args);
3579 
3580 	// fill in all of the semaphores that were not allocated before
3581 	// since we're still single threaded and only the kernel address space exists,
3582 	// it isn't that hard to find all of the ones we need to create
3583 
3584 	benaphore_init(&sAvailableMemoryLock, "available memory lock");
3585 	arch_vm_translation_map_init_post_sem(args);
3586 	vm_address_space_init_post_sem();
3587 
3588 	for (area = vm_kernel_address_space()->areas; area;
3589 			area = area->address_space_next) {
3590 		if (area->id == RESERVED_AREA_ID)
3591 			continue;
3592 
3593 		if (area->cache->lock.sem < 0)
3594 			mutex_init(&area->cache->lock, "vm_cache");
3595 	}
3596 
3597 	sAreaHashLock = create_sem(WRITE_COUNT, "area hash");
3598 	mutex_init(&sAreaCacheLock, "area->cache");
3599 	mutex_init(&sMappingLock, "page mappings");
3600 
3601 	slab_init_post_sem();
3602 	return heap_init_post_sem();
3603 }
3604 
3605 
3606 status_t
3607 vm_init_post_thread(kernel_args *args)
3608 {
3609 	vm_page_init_post_thread(args);
3610 	vm_daemon_init();
3611 	vm_low_memory_init_post_thread();
3612 	return heap_init_post_thread();
3613 }
3614 
3615 
3616 status_t
3617 vm_init_post_modules(kernel_args *args)
3618 {
3619 	return arch_vm_init_post_modules(args);
3620 }
3621 
3622 
3623 void
3624 permit_page_faults(void)
3625 {
3626 	struct thread *thread = thread_get_current_thread();
3627 	if (thread != NULL)
3628 		atomic_add(&thread->page_faults_allowed, 1);
3629 }
3630 
3631 
3632 void
3633 forbid_page_faults(void)
3634 {
3635 	struct thread *thread = thread_get_current_thread();
3636 	if (thread != NULL)
3637 		atomic_add(&thread->page_faults_allowed, -1);
3638 }
3639 
3640 
3641 status_t
3642 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser,
3643 	addr_t *newIP)
3644 {
3645 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, faultAddress));
3646 
3647 	*newIP = 0;
3648 
3649 	status_t status = vm_soft_fault(address, isWrite, isUser);
3650 	if (status < B_OK) {
3651 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at 0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n",
3652 			strerror(status), address, faultAddress, isWrite, isUser,
3653 			thread_get_current_thread_id());
3654 		if (!isUser) {
3655 			struct thread *thread = thread_get_current_thread();
3656 			if (thread != NULL && thread->fault_handler != 0) {
3657 				// this will cause the arch dependant page fault handler to
3658 				// modify the IP on the interrupt frame or whatever to return
3659 				// to this address
3660 				*newIP = thread->fault_handler;
3661 			} else {
3662 				// unhandled page fault in the kernel
3663 				panic("vm_page_fault: unhandled page fault in kernel space at 0x%lx, ip 0x%lx\n",
3664 					address, faultAddress);
3665 			}
3666 		} else {
3667 #if 1
3668 			// ToDo: remove me once we have proper userland debugging support (and tools)
3669 			vm_address_space *addressSpace = vm_get_current_user_address_space();
3670 			vm_area *area;
3671 
3672 			acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0);
3673 			area = vm_area_lookup(addressSpace, faultAddress);
3674 
3675 			dprintf("vm_page_fault: sending team \"%s\" 0x%lx SIGSEGV, ip %#lx (\"%s\" +%#lx)\n",
3676 				thread_get_current_thread()->team->name,
3677 				thread_get_current_thread()->team->id, faultAddress,
3678 				area ? area->name : "???", faultAddress - (area ? area->base : 0x0));
3679 
3680 			// We can print a stack trace of the userland thread here.
3681 #if 1
3682 			if (area) {
3683 				struct stack_frame {
3684 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
3685 						struct stack_frame*	previous;
3686 						void*				return_address;
3687 					#else
3688 						// ...
3689 					#warning writeme
3690 					#endif
3691 				} frame;
3692 #ifdef __INTEL__
3693 				struct iframe *iframe = i386_get_user_iframe();
3694 				if (iframe == NULL)
3695 					panic("iframe is NULL!");
3696 
3697 				status_t status = user_memcpy(&frame, (void *)iframe->ebp,
3698 					sizeof(struct stack_frame));
3699 #elif defined(__POWERPC__)
3700 				struct iframe *iframe = ppc_get_user_iframe();
3701 				if (iframe == NULL)
3702 					panic("iframe is NULL!");
3703 
3704 				status_t status = user_memcpy(&frame, (void *)iframe->r1,
3705 					sizeof(struct stack_frame));
3706 #else
3707 #	warning "vm_page_fault() stack trace won't work"
3708 				status = B_ERROR;
3709 #endif
3710 
3711 				dprintf("stack trace:\n");
3712 				int32 maxFrames = 50;
3713 				while (status == B_OK && --maxFrames >= 0
3714 						&& frame.return_address != NULL) {
3715 					dprintf("  %p", frame.return_address);
3716 					area = vm_area_lookup(addressSpace,
3717 						(addr_t)frame.return_address);
3718 					if (area) {
3719 						dprintf(" (%s + %#lx)", area->name,
3720 							(addr_t)frame.return_address - area->base);
3721 					}
3722 					dprintf("\n");
3723 
3724 					status = user_memcpy(&frame, frame.previous,
3725 						sizeof(struct stack_frame));
3726 				}
3727 			}
3728 #endif	// 0 (stack trace)
3729 
3730 			release_sem_etc(addressSpace->sem, READ_COUNT, 0);
3731 			vm_put_address_space(addressSpace);
3732 #endif
3733 			if (user_debug_exception_occurred(B_SEGMENT_VIOLATION, SIGSEGV))
3734 				send_signal(team_get_current_team_id(), SIGSEGV);
3735 		}
3736 	}
3737 
3738 	return B_HANDLED_INTERRUPT;
3739 }
3740 
3741 
3742 static inline status_t
3743 fault_acquire_locked_source(vm_cache *cache, vm_cache **_source)
3744 {
3745 retry:
3746 	vm_cache *source = cache->source;
3747 	if (source == NULL)
3748 		return B_ERROR;
3749 	if (source->busy)
3750 		return B_BUSY;
3751 
3752 	vm_cache_acquire_ref(source);
3753 
3754 	mutex_lock(&source->lock);
3755 
3756 	if (source->busy) {
3757 		mutex_unlock(&source->lock);
3758 		vm_cache_release_ref(source);
3759 		goto retry;
3760 	}
3761 
3762 	*_source = source;
3763 	return B_OK;
3764 }
3765 
3766 
3767 /*!
3768 	Inserts a busy dummy page into a cache, and makes sure the cache won't go
3769 	away by grabbing a reference to it.
3770 */
3771 static inline void
3772 fault_insert_dummy_page(vm_cache *cache, vm_dummy_page &dummyPage,
3773 	off_t cacheOffset)
3774 {
3775 	dummyPage.state = PAGE_STATE_BUSY;
3776 	vm_cache_acquire_ref(cache);
3777 	vm_cache_insert_page(cache, &dummyPage, cacheOffset);
3778 	dummyPage.busy_condition.Publish(&dummyPage, "page");
3779 }
3780 
3781 
3782 /*!
3783 	Removes the busy dummy page from a cache, and releases its reference to
3784 	the cache.
3785 */
3786 static inline void
3787 fault_remove_dummy_page(vm_dummy_page &dummyPage, bool isLocked)
3788 {
3789 	vm_cache *cache = dummyPage.cache;
3790 	if (!isLocked)
3791 		mutex_lock(&cache->lock);
3792 
3793 	if (dummyPage.state == PAGE_STATE_BUSY) {
3794 		vm_cache_remove_page(cache, &dummyPage);
3795 		dummyPage.state = PAGE_STATE_INACTIVE;
3796 		dummyPage.busy_condition.Unpublish();
3797 	}
3798 
3799 	if (!isLocked)
3800 		mutex_unlock(&cache->lock);
3801 
3802 	vm_cache_release_ref(cache);
3803 }
3804 
3805 
3806 /*!
3807 	Finds a page at the specified \a cacheOffset in either the \a topCacheRef
3808 	or in its source chain. Will also page in a missing page in case there is
3809 	a cache that has the page.
3810 	If it couldn't find a page, it will return the vm_cache that should get it,
3811 	otherwise, it will return the vm_cache that contains the cache.
3812 	It always grabs a reference to the vm_cache that it returns, and also locks it.
3813 */
3814 static inline status_t
3815 fault_find_page(vm_translation_map *map, vm_cache *topCache,
3816 	off_t cacheOffset, bool isWrite, vm_dummy_page &dummyPage,
3817 	vm_cache **_pageCache, vm_page** _page, bool* _restart)
3818 {
3819 	*_restart = false;
3820 	vm_cache *cache = topCache;
3821 	vm_cache *lastCache = NULL;
3822 	vm_page *page = NULL;
3823 
3824 	vm_cache_acquire_ref(cache);
3825 	mutex_lock(&cache->lock);
3826 		// we release this later in the loop
3827 
3828 	while (cache != NULL) {
3829 		if (lastCache != NULL)
3830 			vm_cache_release_ref(lastCache);
3831 
3832 		// we hold the lock of the cache at this point
3833 
3834 		lastCache = cache;
3835 
3836 		for (;;) {
3837 			page = vm_cache_lookup_page(cache, cacheOffset);
3838 			if (page != NULL && page->state != PAGE_STATE_BUSY) {
3839 				// we found the page
3840 				break;
3841 			}
3842 			if (page == NULL || page == &dummyPage)
3843 				break;
3844 
3845 			// page must be busy -- wait for it to become unbusy
3846 			{
3847 				ConditionVariableEntry<vm_page> entry;
3848 				entry.Add(page);
3849 				mutex_unlock(&cache->lock);
3850 				entry.Wait();
3851 				mutex_lock(&cache->lock);
3852 			}
3853 
3854 			if (cache->busy) {
3855 				// The cache became busy, which means, it is about to be
3856 				// removed by vm_cache_remove_consumer(). We start again with
3857 				// the top cache.
3858 				ConditionVariableEntry<vm_cache> entry;
3859 				entry.Add(cache);
3860 				mutex_unlock(&cache->lock);
3861 				vm_cache_release_ref(cache);
3862 				entry.Wait();
3863 				*_restart = true;
3864 				return B_OK;
3865 			}
3866 		}
3867 
3868 		if (page != NULL && page != &dummyPage)
3869 			break;
3870 
3871 		// The current cache does not contain the page we're looking for
3872 
3873 		// see if the vm_store has it
3874 		vm_store *store = cache->store;
3875 		if (store->ops->has_page != NULL
3876 			&& store->ops->has_page(store, cacheOffset)) {
3877 			// insert a fresh page and mark it busy -- we're going to read it in
3878 			page = vm_page_allocate_page(PAGE_STATE_FREE, true);
3879 			vm_cache_insert_page(cache, page, cacheOffset);
3880 
3881 			ConditionVariable<vm_page> busyCondition;
3882 			busyCondition.Publish(page, "page");
3883 
3884 			mutex_unlock(&cache->lock);
3885 
3886 			// get a virtual address for the page
3887 			iovec vec;
3888 			map->ops->get_physical_page(
3889 				page->physical_page_number * B_PAGE_SIZE,
3890 				(addr_t *)&vec.iov_base, PHYSICAL_PAGE_CAN_WAIT);
3891 			size_t bytesRead = vec.iov_len = B_PAGE_SIZE;
3892 
3893 			// read it in
3894 			status_t status = store->ops->read(store, cacheOffset, &vec, 1,
3895 				&bytesRead, false);
3896 
3897 			map->ops->put_physical_page((addr_t)vec.iov_base);
3898 
3899 			mutex_lock(&cache->lock);
3900 
3901 			if (status < B_OK) {
3902 				// on error remove and free the page
3903 				dprintf("reading page from store %p (cache %p) returned: %s!\n",
3904 					store, cache, strerror(status));
3905 
3906 				busyCondition.Unpublish();
3907 				vm_cache_remove_page(cache, page);
3908 				vm_page_set_state(page, PAGE_STATE_FREE);
3909 
3910 				mutex_unlock(&cache->lock);
3911 				vm_cache_release_ref(cache);
3912 				return status;
3913 			}
3914 
3915 			// mark the page unbusy again
3916 			page->state = PAGE_STATE_ACTIVE;
3917 			busyCondition.Unpublish();
3918 			break;
3919 		}
3920 
3921 		// If we're at the top most cache, insert the dummy page here to keep
3922 		// other threads from faulting on the same address and chasing us up the
3923 		// cache chain
3924 		if (cache == topCache && dummyPage.state != PAGE_STATE_BUSY)
3925 			fault_insert_dummy_page(cache, dummyPage, cacheOffset);
3926 
3927 		vm_cache *nextCache;
3928 		status_t status = fault_acquire_locked_source(cache, &nextCache);
3929 		if (status == B_BUSY) {
3930 			// the source cache is currently in the process of being merged
3931 			// with his only consumer (cacheRef); since its pages are moved
3932 			// upwards, too, we try this cache again
3933 			mutex_unlock(&cache->lock);
3934 			thread_yield(true);
3935 			mutex_lock(&cache->lock);
3936 			if (cache->busy) {
3937 				// The cache became busy, which means, it is about to be
3938 				// removed by vm_cache_remove_consumer(). We start again with
3939 				// the top cache.
3940 				ConditionVariableEntry<vm_cache> entry;
3941 				entry.Add(cache);
3942 				mutex_unlock(&cache->lock);
3943 				vm_cache_release_ref(cache);
3944 				entry.Wait();
3945 				*_restart = true;
3946 				return B_OK;
3947 			}
3948 			lastCache = NULL;
3949 			continue;
3950 		} else if (status < B_OK)
3951 			nextCache = NULL;
3952 
3953 		mutex_unlock(&cache->lock);
3954 			// at this point, we still hold a ref to this cache (through lastCacheRef)
3955 
3956 		cache = nextCache;
3957 	}
3958 
3959 	if (page == NULL) {
3960 		// there was no adequate page, determine the cache for a clean one
3961 		if (cache == NULL) {
3962 			// We rolled off the end of the cache chain, so we need to decide which
3963 			// cache will get the new page we're about to create.
3964 			cache = isWrite ? topCache : lastCache;
3965 				// Read-only pages come in the deepest cache - only the
3966 				// top most cache may have direct write access.
3967 			vm_cache_acquire_ref(cache);
3968 			mutex_lock(&cache->lock);
3969 
3970 			if (cache->busy) {
3971 				// The cache became busy, which means, it is about to be
3972 				// removed by vm_cache_remove_consumer(). We start again with
3973 				// the top cache.
3974 				ConditionVariableEntry<vm_cache> entry;
3975 				entry.Add(cache);
3976 				mutex_unlock(&cache->lock);
3977 				vm_cache_release_ref(cache);
3978 				entry.Wait();
3979 				*_restart = true;
3980 			} else {
3981 				vm_page* newPage = vm_cache_lookup_page(cache, cacheOffset);
3982 				if (newPage && newPage != &dummyPage) {
3983 					// A new page turned up. It could be the one we're looking
3984 					// for, but it could as well be a dummy page from someone
3985 					// else or an otherwise busy page. We can't really handle
3986 					// that here. Hence we completely restart this functions.
3987 					mutex_unlock(&cache->lock);
3988 					vm_cache_release_ref(cache);
3989 					*_restart = true;
3990 				}
3991 			}
3992 		}
3993 
3994 		// release the reference of the last vm_cache we still have from the loop above
3995 		if (lastCache != NULL)
3996 			vm_cache_release_ref(lastCache);
3997 	} else {
3998 		// we still own a reference to the cache
3999 	}
4000 
4001 	*_pageCache = cache;
4002 	*_page = page;
4003 	return B_OK;
4004 }
4005 
4006 
4007 /*!
4008 	Returns the page that should be mapped into the area that got the fault.
4009 	It returns the owner of the page in \a sourceCache - it keeps a reference
4010 	to it, and has also locked it on exit.
4011 */
4012 static inline status_t
4013 fault_get_page(vm_translation_map *map, vm_cache *topCache, off_t cacheOffset,
4014 	bool isWrite, vm_dummy_page &dummyPage, vm_cache **_sourceCache,
4015 	vm_cache **_copiedSource, vm_page** _page)
4016 {
4017 	vm_cache *cache;
4018 	vm_page *page;
4019 	bool restart;
4020 	for (;;) {
4021 		status_t status = fault_find_page(map, topCache, cacheOffset, isWrite,
4022 			dummyPage, &cache, &page, &restart);
4023 		if (status != B_OK)
4024 			return status;
4025 
4026 		if (!restart)
4027 			break;
4028 
4029 		// Remove the dummy page, if it has been inserted.
4030 		mutex_lock(&topCache->lock);
4031 
4032 		if (dummyPage.state == PAGE_STATE_BUSY) {
4033 			ASSERT_PRINT(dummyPage.cache == topCache, "dummy page: %p\n",
4034 				&dummyPage);
4035 			fault_remove_dummy_page(dummyPage, true);
4036 		}
4037 
4038 		mutex_unlock(&topCache->lock);
4039 	}
4040 
4041 	if (page == NULL) {
4042 		// we still haven't found a page, so we allocate a clean one
4043 
4044 		page = vm_page_allocate_page(PAGE_STATE_CLEAR, true);
4045 		FTRACE(("vm_soft_fault: just allocated page 0x%lx\n", page->physical_page_number));
4046 
4047 		// Insert the new page into our cache, and replace it with the dummy page if necessary
4048 
4049 		// If we inserted a dummy page into this cache (i.e. if it is the top
4050 		// cache), we have to remove it now
4051 		if (dummyPage.state == PAGE_STATE_BUSY && dummyPage.cache == cache) {
4052 #ifdef DEBUG_PAGE_CACHE_TRANSITIONS
4053 			page->debug_flags = dummyPage.debug_flags | 0x8;
4054 			if (dummyPage.collided_page != NULL) {
4055 				dummyPage.collided_page->collided_page = page;
4056 				page->collided_page = dummyPage.collided_page;
4057 			}
4058 #endif	// DEBUG_PAGE_CACHE_TRANSITIONS
4059 
4060 			fault_remove_dummy_page(dummyPage, true);
4061 		}
4062 
4063 		vm_cache_insert_page(cache, page, cacheOffset);
4064 
4065 		if (dummyPage.state == PAGE_STATE_BUSY) {
4066 #ifdef DEBUG_PAGE_CACHE_TRANSITIONS
4067 			page->debug_flags = dummyPage.debug_flags | 0x10;
4068 			if (dummyPage.collided_page != NULL) {
4069 				dummyPage.collided_page->collided_page = page;
4070 				page->collided_page = dummyPage.collided_page;
4071 			}
4072 #endif	// DEBUG_PAGE_CACHE_TRANSITIONS
4073 
4074 			// This is not the top cache into which we inserted the dummy page,
4075 			// let's remove it from there. We need to temporarily unlock our
4076 			// cache to comply with the cache locking policy.
4077 			mutex_unlock(&cache->lock);
4078 			fault_remove_dummy_page(dummyPage, false);
4079 			mutex_lock(&cache->lock);
4080 		}
4081 	}
4082 
4083 	// We now have the page and a cache it belongs to - we now need to make
4084 	// sure that the area's cache can access it, too, and sees the correct data
4085 
4086 	if (page->cache != topCache && isWrite) {
4087 		// Now we have a page that has the data we want, but in the wrong cache
4088 		// object so we need to copy it and stick it into the top cache.
4089 		// Note that this and the "if" before are mutual exclusive. If
4090 		// fault_find_page() didn't find the page, it would return the top cache
4091 		// for write faults.
4092 		vm_page *sourcePage = page;
4093 		void *source, *dest;
4094 
4095 		// ToDo: if memory is low, it might be a good idea to steal the page
4096 		//	from our source cache - if possible, that is
4097 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4098 		page = vm_page_allocate_page(PAGE_STATE_FREE, true);
4099 #if 0
4100 if (cacheOffset == 0x12000)
4101 	dprintf("%ld: copy page %p to page %p from cache %p to cache %p\n", find_thread(NULL),
4102 		sourcePage, page, sourcePage->cache, topCacheRef->cache);
4103 #endif
4104 
4105 		// try to get a mapping for the src and dest page so we can copy it
4106 		for (;;) {
4107 			map->ops->get_physical_page(sourcePage->physical_page_number * B_PAGE_SIZE,
4108 				(addr_t *)&source, PHYSICAL_PAGE_CAN_WAIT);
4109 
4110 			if (map->ops->get_physical_page(page->physical_page_number * B_PAGE_SIZE,
4111 					(addr_t *)&dest, PHYSICAL_PAGE_NO_WAIT) == B_OK)
4112 				break;
4113 
4114 			// it couldn't map the second one, so sleep and retry
4115 			// keeps an extremely rare deadlock from occuring
4116 			map->ops->put_physical_page((addr_t)source);
4117 			snooze(5000);
4118 		}
4119 
4120 		memcpy(dest, source, B_PAGE_SIZE);
4121 		map->ops->put_physical_page((addr_t)source);
4122 		map->ops->put_physical_page((addr_t)dest);
4123 
4124 		if (sourcePage->state != PAGE_STATE_MODIFIED)
4125 			vm_page_set_state(sourcePage, PAGE_STATE_ACTIVE);
4126 
4127 		mutex_unlock(&cache->lock);
4128 		mutex_lock(&topCache->lock);
4129 
4130 		// Since the top cache has been unlocked for a while, someone else
4131 		// (vm_cache_remove_consumer()) might have replaced our dummy page.
4132 		vm_page* newPage = NULL;
4133 		for (;;) {
4134 			newPage = vm_cache_lookup_page(topCache, cacheOffset);
4135 			if (newPage == NULL || newPage == &dummyPage) {
4136 				newPage = NULL;
4137 				break;
4138 			}
4139 
4140 			if (newPage->state != PAGE_STATE_BUSY)
4141 				break;
4142 
4143 			// The page is busy, wait till it becomes unbusy.
4144 			ConditionVariableEntry<vm_page> entry;
4145 			entry.Add(newPage);
4146 			mutex_unlock(&topCache->lock);
4147 			entry.Wait();
4148 			mutex_lock(&topCache->lock);
4149 		}
4150 
4151 		if (newPage) {
4152 			// Indeed someone else threw in a page. We free ours and are happy.
4153 			vm_page_set_state(page, PAGE_STATE_FREE);
4154 			page = newPage;
4155 		} else {
4156 			// Insert the new page into our cache and remove the dummy page, if
4157 			// necessary.
4158 
4159 			// if we inserted a dummy page into this cache, we have to remove it now
4160 			if (dummyPage.state == PAGE_STATE_BUSY) {
4161 				ASSERT_PRINT(dummyPage.cache == topCache, "dummy page: %p\n",
4162 					&dummyPage);
4163 				fault_remove_dummy_page(dummyPage, true);
4164 			}
4165 
4166 			vm_cache_insert_page(topCache, page, cacheOffset);
4167 		}
4168 
4169 		*_copiedSource = cache;
4170 
4171 		cache = topCache;
4172 		vm_cache_acquire_ref(cache);
4173 	}
4174 
4175 	*_sourceCache = cache;
4176 	*_page = page;
4177 	return B_OK;
4178 }
4179 
4180 
4181 static status_t
4182 vm_soft_fault(addr_t originalAddress, bool isWrite, bool isUser)
4183 {
4184 	vm_address_space *addressSpace;
4185 
4186 	FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n",
4187 		thread_get_current_thread_id(), originalAddress, isWrite, isUser));
4188 
4189 	addr_t address = ROUNDOWN(originalAddress, B_PAGE_SIZE);
4190 
4191 	if (IS_KERNEL_ADDRESS(address)) {
4192 		addressSpace = vm_get_kernel_address_space();
4193 	} else if (IS_USER_ADDRESS(address)) {
4194 		addressSpace = vm_get_current_user_address_space();
4195 		if (addressSpace == NULL) {
4196 			if (!isUser) {
4197 				dprintf("vm_soft_fault: kernel thread accessing invalid user memory!\n");
4198 				return B_BAD_ADDRESS;
4199 			} else {
4200 				// XXX weird state.
4201 				panic("vm_soft_fault: non kernel thread accessing user memory that doesn't exist!\n");
4202 			}
4203 		}
4204 	} else {
4205 		// the hit was probably in the 64k DMZ between kernel and user space
4206 		// this keeps a user space thread from passing a buffer that crosses
4207 		// into kernel space
4208 		return B_BAD_ADDRESS;
4209 	}
4210 
4211 	AddressSpaceReadLocker locker(addressSpace);
4212 
4213 	atomic_add(&addressSpace->fault_count, 1);
4214 
4215 	// Get the area the fault was in
4216 
4217 	vm_area *area = vm_area_lookup(addressSpace, address);
4218 	if (area == NULL) {
4219 		dprintf("vm_soft_fault: va 0x%lx not covered by area in address space\n",
4220 			originalAddress);
4221 		return B_BAD_ADDRESS;
4222 	}
4223 
4224 	// check permissions
4225 	if (isUser && (area->protection & B_USER_PROTECTION) == 0) {
4226 		dprintf("user access on kernel area 0x%lx at %p\n", area->id, (void *)originalAddress);
4227 		return B_PERMISSION_DENIED;
4228 	}
4229 	if (isWrite && (area->protection & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4230 		dprintf("write access attempted on read-only area 0x%lx at %p\n",
4231 			area->id, (void *)originalAddress);
4232 		return B_PERMISSION_DENIED;
4233 	}
4234 
4235 	// We have the area, it was a valid access, so let's try to resolve the page fault now.
4236 	// At first, the top most cache from the area is investigated
4237 
4238 	vm_cache *topCache = vm_area_get_locked_cache(area);
4239 	off_t cacheOffset = address - area->base + area->cache_offset;
4240 	int32 changeCount = addressSpace->change_count;
4241 
4242 	atomic_add(&area->no_cache_change, 1);
4243 		// make sure the area's cache isn't replaced during the page fault
4244 
4245 	// See if this cache has a fault handler - this will do all the work for us
4246 	{
4247 		vm_store *store = topCache->store;
4248 		if (store->ops->fault != NULL) {
4249 			// Note, since the page fault is resolved with interrupts enabled, the
4250 			// fault handler could be called more than once for the same reason -
4251 			// the store must take this into account
4252 			status_t status = store->ops->fault(store, addressSpace, cacheOffset);
4253 			if (status != B_BAD_HANDLER) {
4254 				vm_area_put_locked_cache(topCache);
4255 				return status;
4256 			}
4257 		}
4258 	}
4259 
4260 	mutex_unlock(&topCache->lock);
4261 
4262 	// The top most cache has no fault handler, so let's see if the cache or its sources
4263 	// already have the page we're searching for (we're going from top to bottom)
4264 
4265 	vm_translation_map *map = &addressSpace->translation_map;
4266 	size_t reservePages = 2 + map->ops->map_max_pages_need(map,
4267 		originalAddress, originalAddress);
4268 	vm_page_reserve_pages(reservePages);
4269 		// we may need up to 2 pages - reserving them upfront makes sure
4270 		// we don't have any cache locked, so that the page daemon/thief
4271 		// can do their job without problems
4272 
4273 	vm_dummy_page dummyPage;
4274 	dummyPage.cache = NULL;
4275 	dummyPage.state = PAGE_STATE_INACTIVE;
4276 	dummyPage.type = PAGE_TYPE_DUMMY;
4277 	dummyPage.wired_count = 0;
4278 #ifdef DEBUG_PAGE_CACHE_TRANSITIONS
4279 	dummyPage.debug_flags = 0;
4280 	dummyPage.collided_page = NULL;
4281 #endif	// DEBUG_PAGE_CACHE_TRANSITIONS
4282 
4283 	vm_cache *copiedPageSource = NULL;
4284 	vm_cache *pageSource;
4285 	vm_page *page;
4286 	// TODO: We keep the address space read lock during the whole operation
4287 	// which might be rather expensive depending on where the data has to
4288 	// be retrieved from.
4289 	status_t status = fault_get_page(map, topCache, cacheOffset, isWrite,
4290 		dummyPage, &pageSource, &copiedPageSource, &page);
4291 
4292 	if (status == B_OK) {
4293 		// All went fine, all there is left to do is to map the page into the address space
4294 
4295 		// In case this is a copy-on-write page, we need to unmap it from the area now
4296 		if (isWrite && page->cache == topCache)
4297 			vm_unmap_pages(area, address, B_PAGE_SIZE, true);
4298 
4299 		// TODO: there is currently no mechanism to prevent a page being mapped
4300 		//	more than once in case of a second page fault!
4301 
4302 		// If the page doesn't reside in the area's cache, we need to make sure it's
4303 		// mapped in read-only, so that we cannot overwrite someone else's data (copy-on-write)
4304 		uint32 newProtection = area->protection;
4305 		if (page->cache != topCache && !isWrite)
4306 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4307 
4308 		vm_map_page(area, page, address, newProtection);
4309 
4310 		mutex_unlock(&pageSource->lock);
4311 		vm_cache_release_ref(pageSource);
4312 	}
4313 
4314 	atomic_add(&area->no_cache_change, -1);
4315 
4316 	if (copiedPageSource)
4317 		vm_cache_release_ref(copiedPageSource);
4318 
4319 	if (dummyPage.state == PAGE_STATE_BUSY) {
4320 		// We still have the dummy page in the cache - that happens if we didn't need
4321 		// to allocate a new page before, but could use one in another cache
4322 		fault_remove_dummy_page(dummyPage, false);
4323 	}
4324 
4325 	vm_cache_release_ref(topCache);
4326 	vm_page_unreserve_pages(reservePages);
4327 
4328 	return status;
4329 }
4330 
4331 
4332 /*! You must have the address space's sem held */
4333 vm_area *
4334 vm_area_lookup(vm_address_space *addressSpace, addr_t address)
4335 {
4336 	vm_area *area;
4337 
4338 	// check the areas list first
4339 	area = addressSpace->area_hint;
4340 	if (area && area->base <= address && area->base + (area->size - 1) >= address)
4341 		goto found;
4342 
4343 	for (area = addressSpace->areas; area != NULL; area = area->address_space_next) {
4344 		if (area->id == RESERVED_AREA_ID)
4345 			continue;
4346 
4347 		if (area->base <= address && area->base + (area->size - 1) >= address)
4348 			break;
4349 	}
4350 
4351 found:
4352 	if (area)
4353 		addressSpace->area_hint = area;
4354 
4355 	return area;
4356 }
4357 
4358 
4359 status_t
4360 vm_get_physical_page(addr_t paddr, addr_t *_vaddr, uint32 flags)
4361 {
4362 	return (*vm_kernel_address_space()->translation_map.ops->get_physical_page)(paddr, _vaddr, flags);
4363 }
4364 
4365 
4366 status_t
4367 vm_put_physical_page(addr_t vaddr)
4368 {
4369 	return (*vm_kernel_address_space()->translation_map.ops->put_physical_page)(vaddr);
4370 }
4371 
4372 
4373 void
4374 vm_unreserve_memory(size_t amount)
4375 {
4376 	benaphore_lock(&sAvailableMemoryLock);
4377 
4378 	sAvailableMemory += amount;
4379 
4380 	benaphore_unlock(&sAvailableMemoryLock);
4381 }
4382 
4383 
4384 status_t
4385 vm_try_reserve_memory(size_t amount)
4386 {
4387 	status_t status;
4388 	benaphore_lock(&sAvailableMemoryLock);
4389 
4390 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4391 
4392 	if (sAvailableMemory > amount) {
4393 		sAvailableMemory -= amount;
4394 		status = B_OK;
4395 	} else
4396 		status = B_NO_MEMORY;
4397 
4398 	benaphore_unlock(&sAvailableMemoryLock);
4399 	return status;
4400 }
4401 
4402 
4403 status_t
4404 vm_set_area_memory_type(area_id id, addr_t physicalBase, uint32 type)
4405 {
4406 	AddressSpaceReadLocker locker;
4407 	vm_area *area;
4408 	status_t status = locker.SetFromArea(id, area);
4409 	if (status != B_OK)
4410 		return status;
4411 
4412 	return arch_vm_set_memory_type(area, physicalBase, type);
4413 }
4414 
4415 
4416 /**	This function enforces some protection properties:
4417  *	 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well
4418  *	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4419  *	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4420  *	   and B_KERNEL_WRITE_AREA.
4421  */
4422 
4423 static void
4424 fix_protection(uint32 *protection)
4425 {
4426 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4427 		if ((*protection & B_USER_PROTECTION) == 0
4428 			|| (*protection & B_WRITE_AREA) != 0)
4429 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4430 		else
4431 			*protection |= B_KERNEL_READ_AREA;
4432 	}
4433 }
4434 
4435 
4436 static void
4437 fill_area_info(struct vm_area *area, area_info *info, size_t size)
4438 {
4439 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4440 	info->area = area->id;
4441 	info->address = (void *)area->base;
4442 	info->size = area->size;
4443 	info->protection = area->protection;
4444 	info->lock = B_FULL_LOCK;
4445 	info->team = area->address_space->id;
4446 	info->copy_count = 0;
4447 	info->in_count = 0;
4448 	info->out_count = 0;
4449 		// ToDo: retrieve real values here!
4450 
4451 	vm_cache *cache = vm_area_get_locked_cache(area);
4452 
4453 	// Note, this is a simplification; the cache could be larger than this area
4454 	info->ram_size = cache->page_count * B_PAGE_SIZE;
4455 
4456 	vm_area_put_locked_cache(cache);
4457 }
4458 
4459 
4460 /*!
4461 	Tests wether or not the area that contains the specified address
4462 	needs any kind of locking, and actually exists.
4463 	Used by both lock_memory() and unlock_memory().
4464 */
4465 static status_t
4466 test_lock_memory(vm_address_space *addressSpace, addr_t address,
4467 	bool &needsLocking)
4468 {
4469 	acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0);
4470 
4471 	vm_area *area = vm_area_lookup(addressSpace, address);
4472 	if (area != NULL) {
4473 		// This determines if we need to lock the memory at all
4474 		needsLocking = area->cache_type != CACHE_TYPE_NULL
4475 			&& area->cache_type != CACHE_TYPE_DEVICE
4476 			&& area->wiring != B_FULL_LOCK
4477 			&& area->wiring != B_CONTIGUOUS;
4478 	}
4479 
4480 	release_sem_etc(addressSpace->sem, READ_COUNT, 0);
4481 
4482 	if (area == NULL)
4483 		return B_BAD_ADDRESS;
4484 
4485 	return B_OK;
4486 }
4487 
4488 
4489 //	#pragma mark - kernel public API
4490 
4491 
4492 status_t
4493 user_memcpy(void *to, const void *from, size_t size)
4494 {
4495 	if (arch_cpu_user_memcpy(to, from, size, &thread_get_current_thread()->fault_handler) < B_OK)
4496 		return B_BAD_ADDRESS;
4497 	return B_OK;
4498 }
4499 
4500 
4501 /**	\brief Copies at most (\a size - 1) characters from the string in \a from to
4502  *	the string in \a to, NULL-terminating the result.
4503  *
4504  *	\param to Pointer to the destination C-string.
4505  *	\param from Pointer to the source C-string.
4506  *	\param size Size in bytes of the string buffer pointed to by \a to.
4507  *
4508  *	\return strlen(\a from).
4509  */
4510 
4511 ssize_t
4512 user_strlcpy(char *to, const char *from, size_t size)
4513 {
4514 	return arch_cpu_user_strlcpy(to, from, size, &thread_get_current_thread()->fault_handler);
4515 }
4516 
4517 
4518 status_t
4519 user_memset(void *s, char c, size_t count)
4520 {
4521 	if (arch_cpu_user_memset(s, c, count, &thread_get_current_thread()->fault_handler) < B_OK)
4522 		return B_BAD_ADDRESS;
4523 	return B_OK;
4524 }
4525 
4526 
4527 long
4528 lock_memory(void *address, ulong numBytes, ulong flags)
4529 {
4530 	vm_address_space *addressSpace = NULL;
4531 	struct vm_translation_map *map;
4532 	addr_t unalignedBase = (addr_t)address;
4533 	addr_t end = unalignedBase + numBytes;
4534 	addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE);
4535 	bool isUser = IS_USER_ADDRESS(address);
4536 	bool needsLocking = true;
4537 
4538 	if (isUser)
4539 		addressSpace = vm_get_current_user_address_space();
4540 	else
4541 		addressSpace = vm_get_kernel_address_space();
4542 	if (addressSpace == NULL)
4543 		return B_ERROR;
4544 
4545 	// test if we're on an area that allows faults at all
4546 
4547 	map = &addressSpace->translation_map;
4548 
4549 	status_t status = test_lock_memory(addressSpace, base, needsLocking);
4550 	if (status < B_OK)
4551 		goto out;
4552 	if (!needsLocking)
4553 		goto out;
4554 
4555 	for (; base < end; base += B_PAGE_SIZE) {
4556 		addr_t physicalAddress;
4557 		uint32 protection;
4558 		status_t status;
4559 
4560 		map->ops->lock(map);
4561 		status = map->ops->query(map, base, &physicalAddress, &protection);
4562 		map->ops->unlock(map);
4563 
4564 		if (status < B_OK)
4565 			goto out;
4566 
4567 		if ((protection & PAGE_PRESENT) != 0) {
4568 			// if B_READ_DEVICE is set, the caller intents to write to the locked
4569 			// memory, so if it hasn't been mapped writable, we'll try the soft
4570 			// fault anyway
4571 			if ((flags & B_READ_DEVICE) == 0
4572 				|| (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
4573 				// update wiring
4574 				vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
4575 				if (page == NULL)
4576 					panic("couldn't lookup physical page just allocated\n");
4577 
4578 				page->wired_count++;
4579 					// TODO: needs to be atomic on all platforms!
4580 				continue;
4581 			}
4582 		}
4583 
4584 		status = vm_soft_fault(base, (flags & B_READ_DEVICE) != 0, isUser);
4585 		if (status != B_OK)	{
4586 			dprintf("lock_memory(address = %p, numBytes = %lu, flags = %lu) failed: %s\n",
4587 				(void *)unalignedBase, numBytes, flags, strerror(status));
4588 			goto out;
4589 		}
4590 
4591 		map->ops->lock(map);
4592 		status = map->ops->query(map, base, &physicalAddress, &protection);
4593 		map->ops->unlock(map);
4594 
4595 		if (status < B_OK)
4596 			goto out;
4597 
4598 		// update wiring
4599 		vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
4600 		if (page == NULL)
4601 			panic("couldn't lookup physical page");
4602 
4603 		page->wired_count++;
4604 			// TODO: needs to be atomic on all platforms!
4605 	}
4606 
4607 out:
4608 	vm_put_address_space(addressSpace);
4609 	return status;
4610 }
4611 
4612 
4613 long
4614 unlock_memory(void *address, ulong numBytes, ulong flags)
4615 {
4616 	vm_address_space *addressSpace = NULL;
4617 	struct vm_translation_map *map;
4618 	addr_t unalignedBase = (addr_t)address;
4619 	addr_t end = unalignedBase + numBytes;
4620 	addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE);
4621 	bool needsLocking = true;
4622 
4623 	if (IS_USER_ADDRESS(address))
4624 		addressSpace = vm_get_current_user_address_space();
4625 	else
4626 		addressSpace = vm_get_kernel_address_space();
4627 	if (addressSpace == NULL)
4628 		return B_ERROR;
4629 
4630 	map = &addressSpace->translation_map;
4631 
4632 	status_t status = test_lock_memory(addressSpace, base, needsLocking);
4633 	if (status < B_OK)
4634 		goto out;
4635 	if (!needsLocking)
4636 		goto out;
4637 
4638 	for (; base < end; base += B_PAGE_SIZE) {
4639 		map->ops->lock(map);
4640 
4641 		addr_t physicalAddress;
4642 		uint32 protection;
4643 		status = map->ops->query(map, base, &physicalAddress,
4644 			&protection);
4645 
4646 		map->ops->unlock(map);
4647 
4648 		if (status < B_OK)
4649 			goto out;
4650 		if ((protection & PAGE_PRESENT) == 0)
4651 			panic("calling unlock_memory() on unmapped memory!");
4652 
4653 		// update wiring
4654 		vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
4655 		if (page == NULL)
4656 			panic("couldn't lookup physical page");
4657 
4658 		page->wired_count--;
4659 			// TODO: needs to be atomic on all platforms!
4660 	}
4661 
4662 out:
4663 	vm_put_address_space(addressSpace);
4664 	return status;
4665 }
4666 
4667 
4668 /** According to the BeBook, this function should always succeed.
4669  *	This is no longer the case.
4670  */
4671 
4672 long
4673 get_memory_map(const void *address, ulong numBytes, physical_entry *table,
4674 	long numEntries)
4675 {
4676 	vm_address_space *addressSpace;
4677 	addr_t virtualAddress = (addr_t)address;
4678 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
4679 	addr_t physicalAddress;
4680 	status_t status = B_OK;
4681 	int32 index = -1;
4682 	addr_t offset = 0;
4683 	bool interrupts = are_interrupts_enabled();
4684 
4685 	TRACE(("get_memory_map(%p, %lu bytes, %ld entries)\n", address, numBytes,
4686 		numEntries));
4687 
4688 	if (numEntries == 0 || numBytes == 0)
4689 		return B_BAD_VALUE;
4690 
4691 	// in which address space is the address to be found?
4692 	if (IS_USER_ADDRESS(virtualAddress))
4693 		addressSpace = thread_get_current_thread()->team->address_space;
4694 	else
4695 		addressSpace = vm_kernel_address_space();
4696 
4697 	if (addressSpace == NULL)
4698 		return B_ERROR;
4699 
4700 	vm_translation_map *map = &addressSpace->translation_map;
4701 
4702 	if (interrupts)
4703 		map->ops->lock(map);
4704 
4705 	while (offset < numBytes) {
4706 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
4707 		uint32 flags;
4708 
4709 		if (interrupts) {
4710 			status = map->ops->query(map, (addr_t)address + offset,
4711 				&physicalAddress, &flags);
4712 		} else {
4713 			status = map->ops->query_interrupt(map, (addr_t)address + offset,
4714 				&physicalAddress, &flags);
4715 		}
4716 		if (status < B_OK)
4717 			break;
4718 		if ((flags & PAGE_PRESENT) == 0) {
4719 			panic("get_memory_map() called on unmapped memory!");
4720 			return B_BAD_ADDRESS;
4721 		}
4722 
4723 		if (index < 0 && pageOffset > 0) {
4724 			physicalAddress += pageOffset;
4725 			if (bytes > B_PAGE_SIZE - pageOffset)
4726 				bytes = B_PAGE_SIZE - pageOffset;
4727 		}
4728 
4729 		// need to switch to the next physical_entry?
4730 		if (index < 0 || (addr_t)table[index].address
4731 				!= physicalAddress - table[index].size) {
4732 			if (++index + 1 > numEntries) {
4733 				// table to small
4734 				status = B_BUFFER_OVERFLOW;
4735 				break;
4736 			}
4737 			table[index].address = (void *)physicalAddress;
4738 			table[index].size = bytes;
4739 		} else {
4740 			// page does fit in current entry
4741 			table[index].size += bytes;
4742 		}
4743 
4744 		offset += bytes;
4745 	}
4746 
4747 	if (interrupts)
4748 		map->ops->unlock(map);
4749 
4750 	// close the entry list
4751 
4752 	if (status == B_OK) {
4753 		// if it's only one entry, we will silently accept the missing ending
4754 		if (numEntries == 1)
4755 			return B_OK;
4756 
4757 		if (++index + 1 > numEntries)
4758 			return B_BUFFER_OVERFLOW;
4759 
4760 		table[index].address = NULL;
4761 		table[index].size = 0;
4762 	}
4763 
4764 	return status;
4765 }
4766 
4767 
4768 area_id
4769 area_for(void *address)
4770 {
4771 	team_id space;
4772 
4773 	if (IS_USER_ADDRESS(address)) {
4774 		// we try the user team address space, if any
4775 		space = vm_current_user_address_space_id();
4776 		if (space < B_OK)
4777 			return space;
4778 	} else
4779 		space = vm_kernel_address_space_id();
4780 
4781 	return vm_area_for(space, (addr_t)address);
4782 }
4783 
4784 
4785 area_id
4786 find_area(const char *name)
4787 {
4788 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
4789 	struct hash_iterator iterator;
4790 	hash_open(sAreaHash, &iterator);
4791 
4792 	vm_area *area;
4793 	area_id id = B_NAME_NOT_FOUND;
4794 	while ((area = (vm_area *)hash_next(sAreaHash, &iterator)) != NULL) {
4795 		if (area->id == RESERVED_AREA_ID)
4796 			continue;
4797 
4798 		if (!strcmp(area->name, name)) {
4799 			id = area->id;
4800 			break;
4801 		}
4802 	}
4803 
4804 	hash_close(sAreaHash, &iterator, false);
4805 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
4806 
4807 	return id;
4808 }
4809 
4810 
4811 status_t
4812 _get_area_info(area_id id, area_info *info, size_t size)
4813 {
4814 	if (size != sizeof(area_info) || info == NULL)
4815 		return B_BAD_VALUE;
4816 
4817 	AddressSpaceReadLocker locker;
4818 	vm_area *area;
4819 	status_t status = locker.SetFromArea(id, area);
4820 	if (status != B_OK)
4821 		return status;
4822 
4823 	fill_area_info(area, info, size);
4824 	return B_OK;
4825 }
4826 
4827 
4828 status_t
4829 _get_next_area_info(team_id team, int32 *cookie, area_info *info, size_t size)
4830 {
4831 	addr_t nextBase = *(addr_t *)cookie;
4832 
4833 	// we're already through the list
4834 	if (nextBase == (addr_t)-1)
4835 		return B_ENTRY_NOT_FOUND;
4836 
4837 	if (team == B_CURRENT_TEAM)
4838 		team = team_get_current_team_id();
4839 
4840 	AddressSpaceReadLocker locker(team);
4841 	if (!locker.IsLocked())
4842 		return B_BAD_TEAM_ID;
4843 
4844 	vm_area *area;
4845 	for (area = locker.AddressSpace()->areas; area != NULL;
4846 			area = area->address_space_next) {
4847 		if (area->id == RESERVED_AREA_ID)
4848 			continue;
4849 
4850 		if (area->base > nextBase)
4851 			break;
4852 	}
4853 
4854 	if (area == NULL) {
4855 		nextBase = (addr_t)-1;
4856 		return B_ENTRY_NOT_FOUND;
4857 	}
4858 
4859 	fill_area_info(area, info, size);
4860 	*cookie = (int32)(area->base);
4861 
4862 	return B_OK;
4863 }
4864 
4865 
4866 status_t
4867 set_area_protection(area_id area, uint32 newProtection)
4868 {
4869 	fix_protection(&newProtection);
4870 
4871 	return vm_set_area_protection(vm_kernel_address_space_id(), area,
4872 		newProtection);
4873 }
4874 
4875 
4876 status_t
4877 resize_area(area_id areaID, size_t newSize)
4878 {
4879 	// is newSize a multiple of B_PAGE_SIZE?
4880 	if (newSize & (B_PAGE_SIZE - 1))
4881 		return B_BAD_VALUE;
4882 
4883 	// lock all affected address spaces and the cache
4884 	vm_area* area;
4885 	vm_cache* cache;
4886 
4887 	MultiAddressSpaceLocker locker;
4888 	status_t status = locker.AddAreaCacheAndLock(areaID, true, true, area,
4889 		&cache);
4890 	if (status != B_OK)
4891 		return status;
4892 	AreaCacheLocker cacheLocker(cache);	// already locked
4893 
4894 	size_t oldSize = area->size;
4895 	if (newSize == oldSize)
4896 		return B_OK;
4897 
4898 	// Resize all areas of this area's cache
4899 
4900 	if (cache->type != CACHE_TYPE_RAM)
4901 		return B_NOT_ALLOWED;
4902 
4903 	if (oldSize < newSize) {
4904 		// We need to check if all areas of this cache can be resized
4905 
4906 		for (vm_area* current = cache->areas; current != NULL;
4907 				current = current->cache_next) {
4908 			if (current->address_space_next
4909 				&& current->address_space_next->base <= (current->base
4910 					+ newSize)) {
4911 				// If the area was created inside a reserved area, it can
4912 				// also be resized in that area
4913 				// ToDo: if there is free space after the reserved area, it could be used as well...
4914 				vm_area *next = current->address_space_next;
4915 				if (next->id == RESERVED_AREA_ID
4916 					&& next->cache_offset <= current->base
4917 					&& next->base - 1 + next->size >= current->base - 1 + newSize)
4918 					continue;
4919 
4920 				return B_ERROR;
4921 			}
4922 		}
4923 	}
4924 
4925 	// Okay, looks good so far, so let's do it
4926 
4927 	for (vm_area* current = cache->areas; current != NULL;
4928 			current = current->cache_next) {
4929 		if (current->address_space_next
4930 			&& current->address_space_next->base <= (current->base + newSize)) {
4931 			vm_area *next = current->address_space_next;
4932 			if (next->id == RESERVED_AREA_ID
4933 				&& next->cache_offset <= current->base
4934 				&& next->base - 1 + next->size >= current->base - 1 + newSize) {
4935 				// resize reserved area
4936 				addr_t offset = current->base + newSize - next->base;
4937 				if (next->size <= offset) {
4938 					current->address_space_next = next->address_space_next;
4939 					free(next);
4940 				} else {
4941 					next->size -= offset;
4942 					next->base += offset;
4943 				}
4944 			} else {
4945 				status = B_ERROR;
4946 				break;
4947 			}
4948 		}
4949 
4950 		current->size = newSize;
4951 
4952 		// we also need to unmap all pages beyond the new size, if the area has shrinked
4953 		if (newSize < oldSize) {
4954 			vm_unmap_pages(current, current->base + newSize, oldSize - newSize,
4955 				false);
4956 		}
4957 	}
4958 
4959 	if (status == B_OK)
4960 		status = vm_cache_resize(cache, newSize);
4961 
4962 	if (status < B_OK) {
4963 		// This shouldn't really be possible, but hey, who knows
4964 		for (vm_area* current = cache->areas; current != NULL;
4965 				current = current->cache_next) {
4966 			current->size = oldSize;
4967 		}
4968 	}
4969 
4970 	// ToDo: we must honour the lock restrictions of this area
4971 	return status;
4972 }
4973 
4974 
4975 /**	Transfers the specified area to a new team. The caller must be the owner
4976  *	of the area (not yet enforced but probably should be).
4977  *	This function is currently not exported to the kernel namespace, but is
4978  *	only accessible using the _kern_transfer_area() syscall.
4979  */
4980 
4981 static status_t
4982 transfer_area(area_id id, void **_address, uint32 addressSpec, team_id target)
4983 {
4984 	// TODO: implement like clone_area(), just atomically (ie. hand out a new area ID)!
4985 	return B_ERROR;
4986 #if 0
4987 	vm_address_space *sourceAddressSpace;
4988 	vm_address_space *targetAddressSpace;
4989 	void *reservedAddress = NULL;
4990 	vm_area *reserved;
4991 	vm_area *area = vm_get_area(id);
4992 	if (area == NULL)
4993 		return B_BAD_VALUE;
4994 
4995 	// ToDo: check if the current team owns the area
4996 	status_t status = team_get_address_space(target, &targetAddressSpace);
4997 	if (status != B_OK)
4998 		goto err1;
4999 
5000 	// We will first remove the area, and then reserve its former
5001 	// address range so that we can later reclaim it if the
5002 	// transfer failed.
5003 
5004 	sourceAddressSpace = area->address_space;
5005 	reserved = create_reserved_area_struct(sourceAddressSpace, 0);
5006 	if (reserved == NULL) {
5007 		status = B_NO_MEMORY;
5008 		goto err2;
5009 	}
5010 
5011 	acquire_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0, 0);
5012 
5013 	// unmap the area in the source address space
5014 	vm_unmap_pages(area, area->base, area->size);
5015 
5016 	// TODO: there might be additional page faults at this point!
5017 
5018 	reservedAddress = (void *)area->base;
5019 	remove_area_from_address_space(sourceAddressSpace, area);
5020 	status = insert_area(sourceAddressSpace, &reservedAddress, B_EXACT_ADDRESS,
5021 		area->size, reserved);
5022 		// famous last words: this cannot fail :)
5023 
5024 	release_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0);
5025 
5026 	if (status != B_OK)
5027 		goto err3;
5028 
5029 	// insert the area into the target address space
5030 
5031 	acquire_sem_etc(targetAddressSpace->sem, WRITE_COUNT, 0, 0);
5032 	// check to see if this address space has entered DELETE state
5033 	if (targetAddressSpace->state == VM_ASPACE_STATE_DELETION) {
5034 		// okay, someone is trying to delete this adress space now, so we can't
5035 		// insert the area, so back out
5036 		status = B_BAD_TEAM_ID;
5037 		goto err4;
5038 	}
5039 
5040 	status = insert_area(targetAddressSpace, _address, addressSpec, area->size, area);
5041 	if (status < B_OK)
5042 		goto err4;
5043 
5044 	// The area was successfully transferred to the new team when we got here
5045 	area->address_space = targetAddressSpace;
5046 
5047 	// TODO: take area lock/wiring into account!
5048 
5049 	release_sem_etc(targetAddressSpace->sem, WRITE_COUNT, 0);
5050 
5051 	vm_unreserve_address_range(sourceAddressSpace->id, reservedAddress,
5052 		area->size);
5053 	vm_put_address_space(sourceAddressSpace);
5054 		// we keep the reference of the target address space for the
5055 		// area, so we only have to put the one from the source
5056 	vm_put_area(area);
5057 
5058 	return B_OK;
5059 
5060 err4:
5061 	release_sem_etc(targetAddressSpace->sem, WRITE_COUNT, 0);
5062 err3:
5063 	// insert the area again into the source address space
5064 	acquire_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0, 0);
5065 	// check to see if this address space has entered DELETE state
5066 	if (sourceAddressSpace->state == VM_ASPACE_STATE_DELETION
5067 		|| insert_area(sourceAddressSpace, &reservedAddress, B_EXACT_ADDRESS,
5068 				area->size, area) != B_OK) {
5069 		// We can't insert the area anymore - we have to delete it manually
5070 		vm_cache *cache = vm_area_get_locked_cache(area);
5071 		atomic_add(&area->no_cache_change, 1);
5072 		vm_area_put_locked_cache(cache);
5073 
5074 		vm_cache_remove_area(cache, area);
5075 		vm_cache_release_ref(cache);
5076 		free(area->name);
5077 		free(area);
5078 		area = NULL;
5079 	}
5080 	release_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0);
5081 err2:
5082 	vm_put_address_space(targetAddressSpace);
5083 err1:
5084 	if (area != NULL)
5085 		vm_put_area(area);
5086 	return status;
5087 #endif
5088 }
5089 
5090 
5091 area_id
5092 map_physical_memory(const char *name, void *physicalAddress, size_t numBytes,
5093 	uint32 addressSpec, uint32 protection, void **_virtualAddress)
5094 {
5095 	if (!arch_vm_supports_protection(protection))
5096 		return B_NOT_SUPPORTED;
5097 
5098 	fix_protection(&protection);
5099 
5100 	return vm_map_physical_memory(vm_kernel_address_space_id(), name, _virtualAddress,
5101 		addressSpec, numBytes, protection, (addr_t)physicalAddress);
5102 }
5103 
5104 
5105 area_id
5106 clone_area(const char *name, void **_address, uint32 addressSpec,
5107 	uint32 protection, area_id source)
5108 {
5109 	if ((protection & B_KERNEL_PROTECTION) == 0)
5110 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5111 
5112 	return vm_clone_area(vm_kernel_address_space_id(), name, _address,
5113 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source);
5114 }
5115 
5116 
5117 area_id
5118 create_area_etc(struct team *team, const char *name, void **address, uint32 addressSpec,
5119 	uint32 size, uint32 lock, uint32 protection)
5120 {
5121 	fix_protection(&protection);
5122 
5123 	return vm_create_anonymous_area(team->id, (char *)name, address,
5124 		addressSpec, size, lock, protection);
5125 }
5126 
5127 
5128 area_id
5129 create_area(const char *name, void **_address, uint32 addressSpec, size_t size, uint32 lock,
5130 	uint32 protection)
5131 {
5132 	fix_protection(&protection);
5133 
5134 	return vm_create_anonymous_area(vm_kernel_address_space_id(), (char *)name, _address,
5135 		addressSpec, size, lock, protection);
5136 }
5137 
5138 
5139 status_t
5140 delete_area_etc(struct team *team, area_id area)
5141 {
5142 	return vm_delete_area(team->id, area);
5143 }
5144 
5145 
5146 status_t
5147 delete_area(area_id area)
5148 {
5149 	return vm_delete_area(vm_kernel_address_space_id(), area);
5150 }
5151 
5152 
5153 //	#pragma mark - Userland syscalls
5154 
5155 
5156 status_t
5157 _user_reserve_heap_address_range(addr_t* userAddress, uint32 addressSpec, addr_t size)
5158 {
5159 	// filter out some unavailable values (for userland)
5160 	switch (addressSpec) {
5161 		case B_ANY_KERNEL_ADDRESS:
5162 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5163 			return B_BAD_VALUE;
5164 	}
5165 
5166 	addr_t address;
5167 
5168 	if (!IS_USER_ADDRESS(userAddress)
5169 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5170 		return B_BAD_ADDRESS;
5171 
5172 	status_t status = vm_reserve_address_range(vm_current_user_address_space_id(),
5173 		(void **)&address, addressSpec, size, RESERVED_AVOID_BASE);
5174 	if (status < B_OK)
5175 		return status;
5176 
5177 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
5178 		vm_unreserve_address_range(vm_current_user_address_space_id(),
5179 			(void *)address, size);
5180 		return B_BAD_ADDRESS;
5181 	}
5182 
5183 	return B_OK;
5184 }
5185 
5186 
5187 area_id
5188 _user_area_for(void *address)
5189 {
5190 	return vm_area_for(vm_current_user_address_space_id(), (addr_t)address);
5191 }
5192 
5193 
5194 area_id
5195 _user_find_area(const char *userName)
5196 {
5197 	char name[B_OS_NAME_LENGTH];
5198 
5199 	if (!IS_USER_ADDRESS(userName)
5200 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
5201 		return B_BAD_ADDRESS;
5202 
5203 	return find_area(name);
5204 }
5205 
5206 
5207 status_t
5208 _user_get_area_info(area_id area, area_info *userInfo)
5209 {
5210 	if (!IS_USER_ADDRESS(userInfo))
5211 		return B_BAD_ADDRESS;
5212 
5213 	area_info info;
5214 	status_t status = get_area_info(area, &info);
5215 	if (status < B_OK)
5216 		return status;
5217 
5218 	// TODO: do we want to prevent userland from seeing kernel protections?
5219 	//info.protection &= B_USER_PROTECTION;
5220 
5221 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5222 		return B_BAD_ADDRESS;
5223 
5224 	return status;
5225 }
5226 
5227 
5228 status_t
5229 _user_get_next_area_info(team_id team, int32 *userCookie, area_info *userInfo)
5230 {
5231 	int32 cookie;
5232 
5233 	if (!IS_USER_ADDRESS(userCookie)
5234 		|| !IS_USER_ADDRESS(userInfo)
5235 		|| user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK)
5236 		return B_BAD_ADDRESS;
5237 
5238 	area_info info;
5239 	status_t status = _get_next_area_info(team, &cookie, &info, sizeof(area_info));
5240 	if (status != B_OK)
5241 		return status;
5242 
5243 	//info.protection &= B_USER_PROTECTION;
5244 
5245 	if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK
5246 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5247 		return B_BAD_ADDRESS;
5248 
5249 	return status;
5250 }
5251 
5252 
5253 status_t
5254 _user_set_area_protection(area_id area, uint32 newProtection)
5255 {
5256 	if ((newProtection & ~B_USER_PROTECTION) != 0)
5257 		return B_BAD_VALUE;
5258 
5259 	fix_protection(&newProtection);
5260 
5261 	return vm_set_area_protection(vm_current_user_address_space_id(), area,
5262 		newProtection);
5263 }
5264 
5265 
5266 status_t
5267 _user_resize_area(area_id area, size_t newSize)
5268 {
5269 	// ToDo: Since we restrict deleting of areas to those owned by the team,
5270 	// we should also do that for resizing (check other functions, too).
5271 	return resize_area(area, newSize);
5272 }
5273 
5274 
5275 status_t
5276 _user_transfer_area(area_id area, void **userAddress, uint32 addressSpec, team_id target)
5277 {
5278 	// filter out some unavailable values (for userland)
5279 	switch (addressSpec) {
5280 		case B_ANY_KERNEL_ADDRESS:
5281 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5282 			return B_BAD_VALUE;
5283 	}
5284 
5285 	void *address;
5286 	if (!IS_USER_ADDRESS(userAddress)
5287 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5288 		return B_BAD_ADDRESS;
5289 
5290 	status_t status = transfer_area(area, &address, addressSpec, target);
5291 	if (status < B_OK)
5292 		return status;
5293 
5294 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
5295 		return B_BAD_ADDRESS;
5296 
5297 	return status;
5298 }
5299 
5300 
5301 area_id
5302 _user_clone_area(const char *userName, void **userAddress, uint32 addressSpec,
5303 	uint32 protection, area_id sourceArea)
5304 {
5305 	char name[B_OS_NAME_LENGTH];
5306 	void *address;
5307 
5308 	// filter out some unavailable values (for userland)
5309 	switch (addressSpec) {
5310 		case B_ANY_KERNEL_ADDRESS:
5311 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5312 			return B_BAD_VALUE;
5313 	}
5314 	if ((protection & ~B_USER_PROTECTION) != 0)
5315 		return B_BAD_VALUE;
5316 
5317 	if (!IS_USER_ADDRESS(userName)
5318 		|| !IS_USER_ADDRESS(userAddress)
5319 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
5320 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5321 		return B_BAD_ADDRESS;
5322 
5323 	fix_protection(&protection);
5324 
5325 	area_id clonedArea = vm_clone_area(vm_current_user_address_space_id(), name, &address,
5326 		addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea);
5327 	if (clonedArea < B_OK)
5328 		return clonedArea;
5329 
5330 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
5331 		delete_area(clonedArea);
5332 		return B_BAD_ADDRESS;
5333 	}
5334 
5335 	return clonedArea;
5336 }
5337 
5338 
5339 area_id
5340 _user_create_area(const char *userName, void **userAddress, uint32 addressSpec,
5341 	size_t size, uint32 lock, uint32 protection)
5342 {
5343 	char name[B_OS_NAME_LENGTH];
5344 	void *address;
5345 
5346 	// filter out some unavailable values (for userland)
5347 	switch (addressSpec) {
5348 		case B_ANY_KERNEL_ADDRESS:
5349 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5350 			return B_BAD_VALUE;
5351 	}
5352 	if ((protection & ~B_USER_PROTECTION) != 0)
5353 		return B_BAD_VALUE;
5354 
5355 	if (!IS_USER_ADDRESS(userName)
5356 		|| !IS_USER_ADDRESS(userAddress)
5357 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
5358 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5359 		return B_BAD_ADDRESS;
5360 
5361 	if (addressSpec == B_EXACT_ADDRESS
5362 		&& IS_KERNEL_ADDRESS(address))
5363 		return B_BAD_VALUE;
5364 
5365 	fix_protection(&protection);
5366 
5367 	area_id area = vm_create_anonymous_area(vm_current_user_address_space_id(),
5368 		(char *)name, &address, addressSpec, size, lock, protection);
5369 
5370 	if (area >= B_OK && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
5371 		delete_area(area);
5372 		return B_BAD_ADDRESS;
5373 	}
5374 
5375 	return area;
5376 }
5377 
5378 
5379 status_t
5380 _user_delete_area(area_id area)
5381 {
5382 	// Unlike the BeOS implementation, you can now only delete areas
5383 	// that you have created yourself from userland.
5384 	// The documentation to delete_area() explicetly states that this
5385 	// will be restricted in the future, and so it will.
5386 	return vm_delete_area(vm_current_user_address_space_id(), area);
5387 }
5388 
5389 
5390 // ToDo: create a BeOS style call for this!
5391 
5392 area_id
5393 _user_vm_map_file(const char *userName, void **userAddress, int addressSpec,
5394 	addr_t size, int protection, int mapping, const char *userPath, off_t offset)
5395 {
5396 	char name[B_OS_NAME_LENGTH];
5397 	char path[B_PATH_NAME_LENGTH];
5398 	void *address;
5399 	area_id area;
5400 
5401 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
5402 		|| !IS_USER_ADDRESS(userPath)
5403 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
5404 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
5405 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5406 		return B_BAD_ADDRESS;
5407 
5408 	// userland created areas can always be accessed by the kernel
5409 	protection |= B_KERNEL_READ_AREA | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
5410 
5411 	area = _vm_map_file(vm_current_user_address_space_id(), name, &address,
5412 		addressSpec, size, protection, mapping, path, offset, false);
5413 	if (area < B_OK)
5414 		return area;
5415 
5416 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
5417 		return B_BAD_ADDRESS;
5418 
5419 	return area;
5420 }
5421 
5422