xref: /haiku/src/system/kernel/vm/vm.cpp (revision 1214ef1b2100f2b3299fc9d8d6142e46f70a4c3f)
1 /*
2  * Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 
10 #include <vm.h>
11 
12 #include <ctype.h>
13 #include <stdlib.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 #include <OS.h>
18 #include <KernelExport.h>
19 
20 #include <AutoDeleter.h>
21 
22 #include <vm_address_space.h>
23 #include <vm_priv.h>
24 #include <vm_page.h>
25 #include <vm_cache.h>
26 #include <vm_low_memory.h>
27 #include <file_cache.h>
28 #include <memheap.h>
29 #include <condition_variable.h>
30 #include <debug.h>
31 #include <console.h>
32 #include <int.h>
33 #include <smp.h>
34 #include <lock.h>
35 #include <thread.h>
36 #include <team.h>
37 #include <util/AutoLock.h>
38 #include <util/khash.h>
39 
40 #include <boot/stage2.h>
41 #include <boot/elf.h>
42 
43 #include <arch/cpu.h>
44 #include <arch/vm.h>
45 
46 #include "vm_store_anonymous_noswap.h"
47 #include "vm_store_device.h"
48 #include "vm_store_null.h"
49 
50 
51 //#define TRACE_VM
52 //#define TRACE_FAULTS
53 #ifdef TRACE_VM
54 #	define TRACE(x) dprintf x
55 #else
56 #	define TRACE(x) ;
57 #endif
58 #ifdef TRACE_FAULTS
59 #	define FTRACE(x) dprintf x
60 #else
61 #	define FTRACE(x) ;
62 #endif
63 
64 #define ROUNDUP(a, b) (((a) + ((b)-1)) & ~((b)-1))
65 #define ROUNDOWN(a, b) (((a) / (b)) * (b))
66 
67 
68 class AddressSpaceReadLocker {
69 public:
70 	AddressSpaceReadLocker(team_id team);
71 	AddressSpaceReadLocker(vm_address_space* space);
72 	AddressSpaceReadLocker();
73 	~AddressSpaceReadLocker();
74 
75 	status_t SetTo(team_id team);
76 	void SetTo(vm_address_space* space);
77 	status_t SetFromArea(area_id areaID, vm_area*& area);
78 
79 	bool IsLocked() const { return fLocked; }
80 	void Unlock();
81 
82 	void Unset();
83 
84 	vm_address_space* AddressSpace() { return fSpace; }
85 
86 private:
87 	vm_address_space* fSpace;
88 	bool	fLocked;
89 };
90 
91 class AddressSpaceWriteLocker {
92 public:
93 	AddressSpaceWriteLocker(team_id team);
94 	AddressSpaceWriteLocker();
95 	~AddressSpaceWriteLocker();
96 
97 	status_t SetTo(team_id team);
98 	status_t SetFromArea(area_id areaID, vm_area*& area);
99 	status_t SetFromArea(team_id team, area_id areaID, bool allowKernel,
100 		vm_area*& area);
101 	status_t SetFromArea(team_id team, area_id areaID, vm_area*& area);
102 
103 	bool IsLocked() const { return fLocked; }
104 	void Unlock();
105 
106 	void DegradeToReadLock();
107 	void Unset();
108 
109 	vm_address_space* AddressSpace() { return fSpace; }
110 
111 private:
112 	vm_address_space* fSpace;
113 	bool	fLocked;
114 	bool	fDegraded;
115 };
116 
117 class MultiAddressSpaceLocker {
118 public:
119 	MultiAddressSpaceLocker();
120 	~MultiAddressSpaceLocker();
121 
122 	inline status_t AddTeam(team_id team, bool writeLock,
123 		vm_address_space** _space = NULL);
124 	inline status_t AddArea(area_id area, bool writeLock,
125 		vm_address_space** _space = NULL);
126 
127 	status_t AddAreaCacheAndLock(area_id areaID, bool writeLockThisOne,
128 		bool writeLockOthers, vm_area*& _area, vm_cache** _cache = NULL,
129 		bool checkNoCacheChange = false);
130 
131 	status_t Lock();
132 	void Unlock();
133 	bool IsLocked() const { return fLocked; }
134 
135 	void Unset();
136 
137 private:
138 	struct lock_item {
139 		vm_address_space*	space;
140 		bool				write_lock;
141 	};
142 
143 	bool _ResizeIfNeeded();
144 	int32 _IndexOfAddressSpace(vm_address_space* space) const;
145 	status_t _AddAddressSpace(vm_address_space* space, bool writeLock,
146 		vm_address_space** _space);
147 
148 	static int _CompareItems(const void* _a, const void* _b);
149 
150 	lock_item*	fItems;
151 	int32		fCapacity;
152 	int32		fCount;
153 	bool		fLocked;
154 };
155 
156 
157 class AreaCacheLocking {
158 public:
159 	inline bool Lock(vm_cache* lockable)
160 	{
161 		return false;
162 	}
163 
164 	inline void Unlock(vm_cache* lockable)
165 	{
166 		vm_area_put_locked_cache(lockable);
167 	}
168 };
169 
170 class AreaCacheLocker : public AutoLocker<vm_cache, AreaCacheLocking> {
171 public:
172 	inline AreaCacheLocker(vm_cache* cache = NULL)
173 		: AutoLocker<vm_cache, AreaCacheLocking>(cache, true)
174 	{
175 	}
176 
177 	inline AreaCacheLocker(vm_area* area)
178 		: AutoLocker<vm_cache, AreaCacheLocking>()
179 	{
180 		SetTo(area);
181 	}
182 
183 	inline void SetTo(vm_area* area)
184 	{
185 		return AutoLocker<vm_cache, AreaCacheLocking>::SetTo(
186 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
187 	}
188 };
189 
190 
191 #define REGION_HASH_TABLE_SIZE 1024
192 static area_id sNextAreaID;
193 static hash_table *sAreaHash;
194 static sem_id sAreaHashLock;
195 static mutex sMappingLock;
196 static mutex sAreaCacheLock;
197 
198 static off_t sAvailableMemory;
199 static benaphore sAvailableMemoryLock;
200 
201 // function declarations
202 static void delete_area(vm_address_space *addressSpace, vm_area *area);
203 static vm_address_space *get_address_space_by_area_id(area_id id);
204 static status_t vm_soft_fault(addr_t address, bool isWrite, bool isUser);
205 
206 
207 //	#pragma mark -
208 
209 
210 AddressSpaceReadLocker::AddressSpaceReadLocker(team_id team)
211 	:
212 	fSpace(NULL),
213 	fLocked(false)
214 {
215 	SetTo(team);
216 }
217 
218 
219 //! Takes over the reference of the address space
220 AddressSpaceReadLocker::AddressSpaceReadLocker(vm_address_space* space)
221 	:
222 	fSpace(NULL),
223 	fLocked(false)
224 {
225 	SetTo(space);
226 }
227 
228 
229 AddressSpaceReadLocker::AddressSpaceReadLocker()
230 	:
231 	fSpace(NULL),
232 	fLocked(false)
233 {
234 }
235 
236 
237 AddressSpaceReadLocker::~AddressSpaceReadLocker()
238 {
239 	Unset();
240 }
241 
242 
243 void
244 AddressSpaceReadLocker::Unset()
245 {
246 	Unlock();
247 	if (fSpace != NULL)
248 		vm_put_address_space(fSpace);
249 }
250 
251 
252 status_t
253 AddressSpaceReadLocker::SetTo(team_id team)
254 {
255 	fSpace = vm_get_address_space_by_id(team);
256 	if (fSpace == NULL)
257 		return B_BAD_TEAM_ID;
258 
259 	acquire_sem_etc(fSpace->sem, READ_COUNT, 0, 0);
260 	fLocked = true;
261 	return B_OK;
262 }
263 
264 
265 //! Takes over the reference of the address space
266 void
267 AddressSpaceReadLocker::SetTo(vm_address_space* space)
268 {
269 	fSpace = space;
270 	acquire_sem_etc(fSpace->sem, READ_COUNT, 0, 0);
271 	fLocked = true;
272 }
273 
274 
275 status_t
276 AddressSpaceReadLocker::SetFromArea(area_id areaID, vm_area*& area)
277 {
278 	fSpace = get_address_space_by_area_id(areaID);
279 	if (fSpace == NULL)
280 		return B_BAD_TEAM_ID;
281 
282 	acquire_sem_etc(fSpace->sem, READ_COUNT, 0, 0);
283 
284 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
285 	area = (vm_area *)hash_lookup(sAreaHash, &areaID);
286 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
287 
288 	if (area == NULL || area->address_space != fSpace) {
289 		release_sem_etc(fSpace->sem, READ_COUNT, 0);
290 		return B_BAD_VALUE;
291 	}
292 
293 	fLocked = true;
294 	return B_OK;
295 }
296 
297 
298 void
299 AddressSpaceReadLocker::Unlock()
300 {
301 	if (fLocked) {
302 		release_sem_etc(fSpace->sem, READ_COUNT, 0);
303 		fLocked = false;
304 	}
305 }
306 
307 
308 //	#pragma mark -
309 
310 
311 AddressSpaceWriteLocker::AddressSpaceWriteLocker(team_id team)
312 	:
313 	fSpace(NULL),
314 	fLocked(false),
315 	fDegraded(false)
316 {
317 	SetTo(team);
318 }
319 
320 
321 AddressSpaceWriteLocker::AddressSpaceWriteLocker()
322 	:
323 	fSpace(NULL),
324 	fLocked(false),
325 	fDegraded(false)
326 {
327 }
328 
329 
330 AddressSpaceWriteLocker::~AddressSpaceWriteLocker()
331 {
332 	Unset();
333 }
334 
335 
336 void
337 AddressSpaceWriteLocker::Unset()
338 {
339 	Unlock();
340 	if (fSpace != NULL)
341 		vm_put_address_space(fSpace);
342 }
343 
344 
345 status_t
346 AddressSpaceWriteLocker::SetTo(team_id team)
347 {
348 	fSpace = vm_get_address_space_by_id(team);
349 	if (fSpace == NULL)
350 		return B_BAD_TEAM_ID;
351 
352 	acquire_sem_etc(fSpace->sem, WRITE_COUNT, 0, 0);
353 	fLocked = true;
354 	return B_OK;
355 }
356 
357 
358 status_t
359 AddressSpaceWriteLocker::SetFromArea(area_id areaID, vm_area*& area)
360 {
361 	fSpace = get_address_space_by_area_id(areaID);
362 	if (fSpace == NULL)
363 		return B_BAD_VALUE;
364 
365 	acquire_sem_etc(fSpace->sem, WRITE_COUNT, 0, 0);
366 
367 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
368 	area = (vm_area*)hash_lookup(sAreaHash, &areaID);
369 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
370 
371 	if (area == NULL || area->address_space != fSpace) {
372 		release_sem_etc(fSpace->sem, WRITE_COUNT, 0);
373 		return B_BAD_VALUE;
374 	}
375 
376 	fLocked = true;
377 	return B_OK;
378 }
379 
380 
381 status_t
382 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID,
383 	bool allowKernel, vm_area*& area)
384 {
385 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
386 
387 	area = (vm_area *)hash_lookup(sAreaHash, &areaID);
388 	if (area != NULL
389 		&& (area->address_space->id == team
390 			|| allowKernel && team == vm_kernel_address_space_id())) {
391 		fSpace = area->address_space;
392 		atomic_add(&fSpace->ref_count, 1);
393 	}
394 
395 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
396 
397 	if (fSpace == NULL)
398 		return B_BAD_VALUE;
399 
400 	// Second try to get the area -- this time with the address space
401 	// write lock held
402 
403 	acquire_sem_etc(fSpace->sem, WRITE_COUNT, 0, 0);
404 
405 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
406 	area = (vm_area *)hash_lookup(sAreaHash, &areaID);
407 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
408 
409 	if (area == NULL) {
410 		release_sem_etc(fSpace->sem, WRITE_COUNT, 0);
411 		return B_BAD_VALUE;
412 	}
413 
414 	fLocked = true;
415 	return B_OK;
416 }
417 
418 
419 status_t
420 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID,
421 	vm_area*& area)
422 {
423 	return SetFromArea(team, areaID, false, area);
424 }
425 
426 
427 void
428 AddressSpaceWriteLocker::Unlock()
429 {
430 	if (fLocked) {
431 		release_sem_etc(fSpace->sem, fDegraded ? READ_COUNT : WRITE_COUNT, 0);
432 		fLocked = false;
433 		fDegraded = false;
434 	}
435 }
436 
437 
438 void
439 AddressSpaceWriteLocker::DegradeToReadLock()
440 {
441 	release_sem_etc(fSpace->sem, WRITE_COUNT - READ_COUNT, 0);
442 	fDegraded = true;
443 }
444 
445 
446 //	#pragma mark -
447 
448 
449 MultiAddressSpaceLocker::MultiAddressSpaceLocker()
450 	:
451 	fItems(NULL),
452 	fCapacity(0),
453 	fCount(0),
454 	fLocked(false)
455 {
456 }
457 
458 
459 MultiAddressSpaceLocker::~MultiAddressSpaceLocker()
460 {
461 	Unset();
462 	free(fItems);
463 }
464 
465 
466 /*static*/ int
467 MultiAddressSpaceLocker::_CompareItems(const void* _a, const void* _b)
468 {
469 	lock_item* a = (lock_item*)_a;
470 	lock_item* b = (lock_item*)_b;
471 	return a->space->id - b->space->id;
472 }
473 
474 
475 bool
476 MultiAddressSpaceLocker::_ResizeIfNeeded()
477 {
478 	if (fCount == fCapacity) {
479 		lock_item* items = (lock_item*)realloc(fItems,
480 			(fCapacity + 4) * sizeof(lock_item));
481 		if (items == NULL)
482 			return false;
483 
484 		fCapacity += 4;
485 		fItems = items;
486 	}
487 
488 	return true;
489 }
490 
491 
492 int32
493 MultiAddressSpaceLocker::_IndexOfAddressSpace(vm_address_space* space) const
494 {
495 	for (int32 i = 0; i < fCount; i++) {
496 		if (fItems[i].space == space)
497 			return i;
498 	}
499 
500 	return -1;
501 }
502 
503 
504 status_t
505 MultiAddressSpaceLocker::_AddAddressSpace(vm_address_space* space,
506 	bool writeLock, vm_address_space** _space)
507 {
508 	if (!space)
509 		return B_BAD_VALUE;
510 
511 	int32 index = _IndexOfAddressSpace(space);
512 	if (index < 0) {
513 		if (!_ResizeIfNeeded()) {
514 			vm_put_address_space(space);
515 			return B_NO_MEMORY;
516 		}
517 
518 		lock_item& item = fItems[fCount++];
519 		item.space = space;
520 		item.write_lock = writeLock;
521 	} else {
522 
523 		// one reference is enough
524 		vm_put_address_space(space);
525 
526 		fItems[index].write_lock |= writeLock;
527 	}
528 
529 	if (_space != NULL)
530 		*_space = space;
531 
532 	return B_OK;
533 }
534 
535 
536 inline status_t
537 MultiAddressSpaceLocker::AddTeam(team_id team, bool writeLock,
538 	vm_address_space** _space)
539 {
540 	return _AddAddressSpace(vm_get_address_space_by_id(team), writeLock,
541 		_space);
542 }
543 
544 
545 inline status_t
546 MultiAddressSpaceLocker::AddArea(area_id area, bool writeLock,
547 	vm_address_space** _space)
548 {
549 	return _AddAddressSpace(get_address_space_by_area_id(area), writeLock,
550 		_space);
551 }
552 
553 
554 void
555 MultiAddressSpaceLocker::Unset()
556 {
557 	Unlock();
558 
559 	for (int32 i = 0; i < fCount; i++)
560 		vm_put_address_space(fItems[i].space);
561 
562 	fCount = 0;
563 }
564 
565 
566 status_t
567 MultiAddressSpaceLocker::Lock()
568 {
569 	ASSERT(!fLocked);
570 
571 	qsort(fItems, fCount, sizeof(lock_item), &_CompareItems);
572 
573 	for (int32 i = 0; i < fCount; i++) {
574 		status_t status = acquire_sem_etc(fItems[i].space->sem,
575 			fItems[i].write_lock ? WRITE_COUNT : READ_COUNT, 0, 0);
576 		if (status < B_OK) {
577 			while (--i >= 0) {
578 				release_sem_etc(fItems[i].space->sem,
579 					fItems[i].write_lock ? WRITE_COUNT : READ_COUNT, 0);
580 			}
581 			return status;
582 		}
583 	}
584 
585 	fLocked = true;
586 	return B_OK;
587 }
588 
589 
590 void
591 MultiAddressSpaceLocker::Unlock()
592 {
593 	if (!fLocked)
594 		return;
595 
596 	for (int32 i = 0; i < fCount; i++) {
597 		release_sem_etc(fItems[i].space->sem,
598 			fItems[i].write_lock ? WRITE_COUNT : READ_COUNT, 0);
599 	}
600 
601 	fLocked = false;
602 }
603 
604 
605 /*!	Adds all address spaces of the areas associated with the given area's cache,
606 	locks them, and locks the cache (including a reference to it). It retries
607 	until the situation is stable (i.e. the neither cache nor cache's areas
608 	changed) or an error occurs. If \c checkNoCacheChange ist \c true it does
609 	not return until all areas' \c no_cache_change flags is clear.
610 */
611 status_t
612 MultiAddressSpaceLocker::AddAreaCacheAndLock(area_id areaID,
613 	bool writeLockThisOne, bool writeLockOthers, vm_area*& _area,
614 	vm_cache** _cache, bool checkNoCacheChange)
615 {
616 	// remember the original state
617 	int originalCount = fCount;
618 	lock_item* originalItems = NULL;
619 	if (fCount > 0) {
620 		originalItems = new(nothrow) lock_item[fCount];
621 		if (originalItems == NULL)
622 			return B_NO_MEMORY;
623 		memcpy(originalItems, fItems, fCount * sizeof(lock_item));
624 	}
625 	ArrayDeleter<lock_item> _(originalItems);
626 
627 	// get the cache
628 	vm_cache* cache;
629 	vm_area* area;
630 	status_t error;
631 	{
632 		AddressSpaceReadLocker locker;
633 		error = locker.SetFromArea(areaID, area);
634 		if (error != B_OK)
635 			return error;
636 
637 		cache = vm_area_get_locked_cache(area);
638 	}
639 
640 	while (true) {
641 		// add all areas
642 		vm_area* firstArea = cache->areas;
643 		for (vm_area* current = firstArea; current;
644 				current = current->cache_next) {
645 			error = AddArea(current->id,
646 				current == area ? writeLockThisOne : writeLockOthers);
647 			if (error != B_OK) {
648 				vm_area_put_locked_cache(cache);
649 				return error;
650 			}
651 		}
652 
653 		// unlock the cache and attempt to lock the address spaces
654 		vm_area_put_locked_cache(cache);
655 
656 		error = Lock();
657 		if (error != B_OK)
658 			return error;
659 
660 		// lock the cache again and check whether anything has changed
661 
662 		// check whether the area is gone in the meantime
663 		acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
664 		area = (vm_area *)hash_lookup(sAreaHash, &areaID);
665 		release_sem_etc(sAreaHashLock, READ_COUNT, 0);
666 
667 		if (area == NULL) {
668 			Unlock();
669 			return B_BAD_VALUE;
670 		}
671 
672 		// lock the cache
673 		vm_cache* oldCache = cache;
674 		cache = vm_area_get_locked_cache(area);
675 
676 		// If neither the area's cache has changed nor its area list we're
677 		// done...
678 		bool done = (cache == oldCache || firstArea == cache->areas);
679 
680 		// ... unless we're supposed to check the areas' "no_cache_change" flag
681 		bool yield = false;
682 		if (done && checkNoCacheChange) {
683 			for (vm_area *tempArea = cache->areas; tempArea != NULL;
684 					tempArea = tempArea->cache_next) {
685 				if (tempArea->no_cache_change) {
686 					done = false;
687 					yield = true;
688 					break;
689 				}
690 			}
691 		}
692 
693 		// If everything looks dandy, return the values.
694 		if (done) {
695 			_area = area;
696 			if (_cache != NULL)
697 				*_cache = cache;
698 			return B_OK;
699 		}
700 
701 		// Restore the original state and try again.
702 
703 		// Unlock the address spaces, but keep the cache locked for the next
704 		// iteration.
705 		Unlock();
706 
707 		// Get an additional reference to the original address spaces.
708 		for (int32 i = 0; i < originalCount; i++)
709 			atomic_add(&originalItems[i].space->ref_count, 1);
710 
711 		// Release all references to the current address spaces.
712 		for (int32 i = 0; i < fCount; i++)
713 			vm_put_address_space(fItems[i].space);
714 
715 		// Copy over the original state.
716 		fCount = originalCount;
717 		if (originalItems != NULL)
718 			memcpy(fItems, originalItems, fCount * sizeof(lock_item));
719 
720 		if (yield)
721 			thread_yield();
722 	}
723 }
724 
725 
726 //	#pragma mark -
727 
728 
729 static int
730 area_compare(void *_area, const void *key)
731 {
732 	vm_area *area = (vm_area *)_area;
733 	const area_id *id = (const area_id *)key;
734 
735 	if (area->id == *id)
736 		return 0;
737 
738 	return -1;
739 }
740 
741 
742 static uint32
743 area_hash(void *_area, const void *key, uint32 range)
744 {
745 	vm_area *area = (vm_area *)_area;
746 	const area_id *id = (const area_id *)key;
747 
748 	if (area != NULL)
749 		return area->id % range;
750 
751 	return (uint32)*id % range;
752 }
753 
754 
755 static vm_address_space *
756 get_address_space_by_area_id(area_id id)
757 {
758 	vm_address_space* addressSpace = NULL;
759 
760 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
761 
762 	vm_area *area = (vm_area *)hash_lookup(sAreaHash, &id);
763 	if (area != NULL) {
764 		addressSpace = area->address_space;
765 		atomic_add(&addressSpace->ref_count, 1);
766 	}
767 
768 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
769 
770 	return addressSpace;
771 }
772 
773 
774 //! You need to have the address space locked when calling this function
775 static vm_area *
776 lookup_area(vm_address_space* addressSpace, area_id id)
777 {
778 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
779 
780 	vm_area *area = (vm_area *)hash_lookup(sAreaHash, &id);
781 	if (area != NULL && area->address_space != addressSpace)
782 		area = NULL;
783 
784 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
785 
786 	return area;
787 }
788 
789 
790 static vm_area *
791 create_reserved_area_struct(vm_address_space *addressSpace, uint32 flags)
792 {
793 	vm_area *reserved = (vm_area *)malloc(sizeof(vm_area));
794 	if (reserved == NULL)
795 		return NULL;
796 
797 	memset(reserved, 0, sizeof(vm_area));
798 	reserved->id = RESERVED_AREA_ID;
799 		// this marks it as reserved space
800 	reserved->protection = flags;
801 	reserved->address_space = addressSpace;
802 
803 	return reserved;
804 }
805 
806 
807 static vm_area *
808 create_area_struct(vm_address_space *addressSpace, const char *name,
809 	uint32 wiring, uint32 protection)
810 {
811 	// restrict the area name to B_OS_NAME_LENGTH
812 	size_t length = strlen(name) + 1;
813 	if (length > B_OS_NAME_LENGTH)
814 		length = B_OS_NAME_LENGTH;
815 
816 	vm_area *area = (vm_area *)malloc(sizeof(vm_area));
817 	if (area == NULL)
818 		return NULL;
819 
820 	area->name = (char *)malloc(length);
821 	if (area->name == NULL) {
822 		free(area);
823 		return NULL;
824 	}
825 	strlcpy(area->name, name, length);
826 
827 	area->id = atomic_add(&sNextAreaID, 1);
828 	area->base = 0;
829 	area->size = 0;
830 	area->protection = protection;
831 	area->wiring = wiring;
832 	area->memory_type = 0;
833 
834 	area->cache = NULL;
835 	area->no_cache_change = 0;
836 	area->cache_offset = 0;
837 
838 	area->address_space = addressSpace;
839 	area->address_space_next = NULL;
840 	area->cache_next = area->cache_prev = NULL;
841 	area->hash_next = NULL;
842 	new (&area->mappings) vm_area_mappings;
843 
844 	return area;
845 }
846 
847 
848 /**	Finds a reserved area that covers the region spanned by \a start and
849  *	\a size, inserts the \a area into that region and makes sure that
850  *	there are reserved regions for the remaining parts.
851  */
852 
853 static status_t
854 find_reserved_area(vm_address_space *addressSpace, addr_t start,
855 	addr_t size, vm_area *area)
856 {
857 	vm_area *next, *last = NULL;
858 
859 	next = addressSpace->areas;
860 	while (next) {
861 		if (next->base <= start && next->base + next->size >= start + size) {
862 			// this area covers the requested range
863 			if (next->id != RESERVED_AREA_ID) {
864 				// but it's not reserved space, it's a real area
865 				return B_BAD_VALUE;
866 			}
867 
868 			break;
869 		}
870 		last = next;
871 		next = next->address_space_next;
872 	}
873 	if (next == NULL)
874 		return B_ENTRY_NOT_FOUND;
875 
876 	// now we have to transfer the requested part of the reserved
877 	// range to the new area - and remove, resize or split the old
878 	// reserved area.
879 
880 	if (start == next->base) {
881 		// the area starts at the beginning of the reserved range
882 		if (last)
883 			last->address_space_next = area;
884 		else
885 			addressSpace->areas = area;
886 
887 		if (size == next->size) {
888 			// the new area fully covers the reversed range
889 			area->address_space_next = next->address_space_next;
890 			vm_put_address_space(addressSpace);
891 			free(next);
892 		} else {
893 			// resize the reserved range behind the area
894 			area->address_space_next = next;
895 			next->base += size;
896 			next->size -= size;
897 		}
898 	} else if (start + size == next->base + next->size) {
899 		// the area is at the end of the reserved range
900 		area->address_space_next = next->address_space_next;
901 		next->address_space_next = area;
902 
903 		// resize the reserved range before the area
904 		next->size = start - next->base;
905 	} else {
906 		// the area splits the reserved range into two separate ones
907 		// we need a new reserved area to cover this space
908 		vm_area *reserved = create_reserved_area_struct(addressSpace,
909 			next->protection);
910 		if (reserved == NULL)
911 			return B_NO_MEMORY;
912 
913 		atomic_add(&addressSpace->ref_count, 1);
914 		reserved->address_space_next = next->address_space_next;
915 		area->address_space_next = reserved;
916 		next->address_space_next = area;
917 
918 		// resize regions
919 		reserved->size = next->base + next->size - start - size;
920 		next->size = start - next->base;
921 		reserved->base = start + size;
922 		reserved->cache_offset = next->cache_offset;
923 	}
924 
925 	area->base = start;
926 	area->size = size;
927 	addressSpace->change_count++;
928 
929 	return B_OK;
930 }
931 
932 
933 /*!	Must be called with this address space's sem held */
934 static status_t
935 find_and_insert_area_slot(vm_address_space *addressSpace, addr_t start,
936 	addr_t size, addr_t end, uint32 addressSpec, vm_area *area)
937 {
938 	vm_area *last = NULL;
939 	vm_area *next;
940 	bool foundSpot = false;
941 
942 	TRACE(("find_and_insert_area_slot: address space %p, start 0x%lx, "
943 		"size %ld, end 0x%lx, addressSpec %ld, area %p\n", addressSpace, start,
944 		size, end, addressSpec, area));
945 
946 	// do some sanity checking
947 	if (start < addressSpace->base || size == 0
948 		|| (end - 1) > (addressSpace->base + (addressSpace->size - 1))
949 		|| start + size > end)
950 		return B_BAD_ADDRESS;
951 
952 	if (addressSpec == B_EXACT_ADDRESS) {
953 		// search for a reserved area
954 		status_t status = find_reserved_area(addressSpace, start, size, area);
955 		if (status == B_OK || status == B_BAD_VALUE)
956 			return status;
957 
958 		// there was no reserved area, and the slot doesn't seem to be used already
959 		// ToDo: this could be further optimized.
960 	}
961 
962 	// walk up to the spot where we should start searching
963 second_chance:
964 	next = addressSpace->areas;
965 	while (next) {
966 		if (next->base >= start + size) {
967 			// we have a winner
968 			break;
969 		}
970 		last = next;
971 		next = next->address_space_next;
972 	}
973 
974 	// find the right spot depending on the address specification - the area
975 	// will be inserted directly after "last" ("next" is not referenced anymore)
976 
977 	switch (addressSpec) {
978 		case B_ANY_ADDRESS:
979 		case B_ANY_KERNEL_ADDRESS:
980 		case B_ANY_KERNEL_BLOCK_ADDRESS:
981 			// find a hole big enough for a new area
982 			if (!last) {
983 				// see if we can build it at the beginning of the virtual map
984 				if (!next || (next->base >= addressSpace->base + size)) {
985 					foundSpot = true;
986 					area->base = addressSpace->base;
987 					break;
988 				}
989 				last = next;
990 				next = next->address_space_next;
991 			}
992 			// keep walking
993 			while (next) {
994 				if (next->base >= last->base + last->size + size) {
995 					// we found a spot (it'll be filled up below)
996 					break;
997 				}
998 				last = next;
999 				next = next->address_space_next;
1000 			}
1001 
1002 			if ((addressSpace->base + (addressSpace->size - 1))
1003 					>= (last->base + last->size + (size - 1))) {
1004 				// got a spot
1005 				foundSpot = true;
1006 				area->base = last->base + last->size;
1007 				break;
1008 			} else {
1009 				// we didn't find a free spot - if there were any reserved areas with
1010 				// the RESERVED_AVOID_BASE flag set, we can now test those for free
1011 				// space
1012 				// ToDo: it would make sense to start with the biggest of them
1013 				next = addressSpace->areas;
1014 				last = NULL;
1015 				for (last = NULL; next; next = next->address_space_next, last = next) {
1016 					// ToDo: take free space after the reserved area into account!
1017 					if (next->size == size) {
1018 						// the reserved area is entirely covered, and thus, removed
1019 						if (last)
1020 							last->address_space_next = next->address_space_next;
1021 						else
1022 							addressSpace->areas = next->address_space_next;
1023 
1024 						foundSpot = true;
1025 						area->base = next->base;
1026 						free(next);
1027 						break;
1028 					}
1029 					if (next->size >= size) {
1030 						// the new area will be placed at the end of the reserved
1031 						// area, and the reserved area will be resized to make space
1032 						foundSpot = true;
1033 						next->size -= size;
1034 						last = next;
1035 						area->base = next->base + next->size;
1036 						break;
1037 					}
1038 				}
1039 			}
1040 			break;
1041 
1042 		case B_BASE_ADDRESS:
1043 			// find a hole big enough for a new area beginning with "start"
1044 			if (!last) {
1045 				// see if we can build it at the beginning of the specified start
1046 				if (!next || (next->base >= start + size)) {
1047 					foundSpot = true;
1048 					area->base = start;
1049 					break;
1050 				}
1051 				last = next;
1052 				next = next->address_space_next;
1053 			}
1054 			// keep walking
1055 			while (next) {
1056 				if (next->base >= last->base + last->size + size) {
1057 					// we found a spot (it'll be filled up below)
1058 					break;
1059 				}
1060 				last = next;
1061 				next = next->address_space_next;
1062 			}
1063 
1064 			if ((addressSpace->base + (addressSpace->size - 1))
1065 					>= (last->base + last->size + (size - 1))) {
1066 				// got a spot
1067 				foundSpot = true;
1068 				if (last->base + last->size <= start)
1069 					area->base = start;
1070 				else
1071 					area->base = last->base + last->size;
1072 				break;
1073 			}
1074 			// we didn't find a free spot in the requested range, so we'll
1075 			// try again without any restrictions
1076 			start = addressSpace->base;
1077 			addressSpec = B_ANY_ADDRESS;
1078 			last = NULL;
1079 			goto second_chance;
1080 
1081 		case B_EXACT_ADDRESS:
1082 			// see if we can create it exactly here
1083 			if (!last) {
1084 				if (!next || (next->base >= start + size)) {
1085 					foundSpot = true;
1086 					area->base = start;
1087 					break;
1088 				}
1089 			} else {
1090 				if (next) {
1091 					if (last->base + last->size <= start && next->base >= start + size) {
1092 						foundSpot = true;
1093 						area->base = start;
1094 						break;
1095 					}
1096 				} else {
1097 					if ((last->base + (last->size - 1)) <= start - 1) {
1098 						foundSpot = true;
1099 						area->base = start;
1100 					}
1101 				}
1102 			}
1103 			break;
1104 		default:
1105 			return B_BAD_VALUE;
1106 	}
1107 
1108 	if (!foundSpot)
1109 		return addressSpec == B_EXACT_ADDRESS ? B_BAD_VALUE : B_NO_MEMORY;
1110 
1111 	area->size = size;
1112 	if (last) {
1113 		area->address_space_next = last->address_space_next;
1114 		last->address_space_next = area;
1115 	} else {
1116 		area->address_space_next = addressSpace->areas;
1117 		addressSpace->areas = area;
1118 	}
1119 	addressSpace->change_count++;
1120 	return B_OK;
1121 }
1122 
1123 
1124 /**	This inserts the area you pass into the specified address space.
1125  *	It will also set the "_address" argument to its base address when
1126  *	the call succeeds.
1127  *	You need to hold the vm_address_space semaphore.
1128  */
1129 
1130 static status_t
1131 insert_area(vm_address_space *addressSpace, void **_address,
1132 	uint32 addressSpec, addr_t size, vm_area *area)
1133 {
1134 	addr_t searchBase, searchEnd;
1135 	status_t status;
1136 
1137 	switch (addressSpec) {
1138 		case B_EXACT_ADDRESS:
1139 			searchBase = (addr_t)*_address;
1140 			searchEnd = (addr_t)*_address + size;
1141 			break;
1142 
1143 		case B_BASE_ADDRESS:
1144 			searchBase = (addr_t)*_address;
1145 			searchEnd = addressSpace->base + (addressSpace->size - 1);
1146 			break;
1147 
1148 		case B_ANY_ADDRESS:
1149 		case B_ANY_KERNEL_ADDRESS:
1150 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1151 			searchBase = addressSpace->base;
1152 			searchEnd = addressSpace->base + (addressSpace->size - 1);
1153 			break;
1154 
1155 		default:
1156 			return B_BAD_VALUE;
1157 	}
1158 
1159 	status = find_and_insert_area_slot(addressSpace, searchBase, size,
1160 				searchEnd, addressSpec, area);
1161 	if (status == B_OK) {
1162 		// ToDo: do we have to do anything about B_ANY_KERNEL_ADDRESS
1163 		//		vs. B_ANY_KERNEL_BLOCK_ADDRESS here?
1164 		*_address = (void *)area->base;
1165 	}
1166 
1167 	return status;
1168 }
1169 
1170 
1171 /*! You need to hold the lock of the cache and the write lock of the address
1172 	space when calling this function.
1173 	Note, that in case of error your cache will be temporarily unlocked.
1174 */
1175 static status_t
1176 map_backing_store(vm_address_space *addressSpace, vm_cache *cache,
1177 	void **_virtualAddress, off_t offset, addr_t size, uint32 addressSpec,
1178 	int wiring, int protection, int mapping, vm_area **_area,
1179 	const char *areaName)
1180 {
1181 	TRACE(("map_backing_store: aspace %p, cache %p, *vaddr %p, offset 0x%Lx, size %lu, addressSpec %ld, wiring %d, protection %d, _area %p, area_name '%s'\n",
1182 		addressSpace, cache, *_virtualAddress, offset, size, addressSpec,
1183 		wiring, protection, _area, areaName));
1184 	ASSERT_LOCKED_MUTEX(&cache->lock);
1185 
1186 	vm_area *area = create_area_struct(addressSpace, areaName, wiring,
1187 		protection);
1188 	if (area == NULL)
1189 		return B_NO_MEMORY;
1190 
1191 	vm_store *store = cache->store;
1192 	status_t status;
1193 
1194 	// if this is a private map, we need to create a new cache & store object
1195 	// pair to handle the private copies of pages as they are written to
1196 	vm_cache* sourceCache = cache;
1197 	if (mapping == REGION_PRIVATE_MAP) {
1198 		vm_cache *newCache;
1199 		vm_store *newStore;
1200 
1201 		// create an anonymous store object
1202 		newStore = vm_store_create_anonymous_noswap(
1203 			(protection & B_STACK_AREA) != 0, 0, USER_STACK_GUARD_PAGES);
1204 		if (newStore == NULL) {
1205 			status = B_NO_MEMORY;
1206 			goto err1;
1207 		}
1208 		newCache = vm_cache_create(newStore);
1209 		if (newCache == NULL) {
1210 			status = B_NO_MEMORY;
1211 			newStore->ops->destroy(newStore);
1212 			goto err1;
1213 		}
1214 
1215 		mutex_lock(&newCache->lock);
1216 		newCache->type = CACHE_TYPE_RAM;
1217 		newCache->temporary = 1;
1218 		newCache->scan_skip = cache->scan_skip;
1219 		newCache->virtual_base = offset;
1220 		newCache->virtual_size = offset + size;
1221 
1222 		vm_cache_add_consumer_locked(cache, newCache);
1223 
1224 		cache = newCache;
1225 		store = newStore;
1226 	}
1227 
1228 	status = vm_cache_set_minimal_commitment_locked(cache, offset + size);
1229 	if (status != B_OK)
1230 		goto err2;
1231 
1232 	// check to see if this address space has entered DELETE state
1233 	if (addressSpace->state == VM_ASPACE_STATE_DELETION) {
1234 		// okay, someone is trying to delete this address space now, so we can't
1235 		// insert the area, so back out
1236 		status = B_BAD_TEAM_ID;
1237 		goto err2;
1238 	}
1239 
1240 	status = insert_area(addressSpace, _virtualAddress, addressSpec, size, area);
1241 	if (status < B_OK)
1242 		goto err2;
1243 
1244 	// attach the cache to the area
1245 	area->cache = cache;
1246 	area->cache_offset = offset;
1247 
1248 	// point the cache back to the area
1249 	vm_cache_insert_area_locked(cache, area);
1250 	if (mapping == REGION_PRIVATE_MAP)
1251 		mutex_unlock(&cache->lock);
1252 
1253 	// insert the area in the global area hash table
1254 	acquire_sem_etc(sAreaHashLock, WRITE_COUNT, 0 ,0);
1255 	hash_insert(sAreaHash, area);
1256 	release_sem_etc(sAreaHashLock, WRITE_COUNT, 0);
1257 
1258 	// grab a ref to the address space (the area holds this)
1259 	atomic_add(&addressSpace->ref_count, 1);
1260 
1261 	*_area = area;
1262 	return B_OK;
1263 
1264 err2:
1265 	if (mapping == REGION_PRIVATE_MAP) {
1266 		// We created this cache, so we must delete it again. Note, that we
1267 		// need to temporarily unlock the source cache or we'll otherwise
1268 		// deadlock, since vm_cache_remove_consumer will try to lock it too.
1269 		mutex_unlock(&cache->lock);
1270 		mutex_unlock(&sourceCache->lock);
1271 		vm_cache_release_ref(cache);
1272 		mutex_lock(&sourceCache->lock);
1273 	}
1274 err1:
1275 	free(area->name);
1276 	free(area);
1277 	return status;
1278 }
1279 
1280 
1281 status_t
1282 vm_unreserve_address_range(team_id team, void *address, addr_t size)
1283 {
1284 	AddressSpaceWriteLocker locker(team);
1285 	if (!locker.IsLocked())
1286 		return B_BAD_TEAM_ID;
1287 
1288 	// check to see if this address space has entered DELETE state
1289 	if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) {
1290 		// okay, someone is trying to delete this address space now, so we can't
1291 		// insert the area, so back out
1292 		return B_BAD_TEAM_ID;
1293 	}
1294 
1295 	// search area list and remove any matching reserved ranges
1296 
1297 	vm_area* area = locker.AddressSpace()->areas;
1298 	vm_area* last = NULL;
1299 	while (area) {
1300 		// the area must be completely part of the reserved range
1301 		if (area->id == RESERVED_AREA_ID && area->base >= (addr_t)address
1302 			&& area->base + area->size <= (addr_t)address + size) {
1303 			// remove reserved range
1304 			vm_area *reserved = area;
1305 			if (last)
1306 				last->address_space_next = reserved->address_space_next;
1307 			else
1308 				locker.AddressSpace()->areas = reserved->address_space_next;
1309 
1310 			area = reserved->address_space_next;
1311 			vm_put_address_space(locker.AddressSpace());
1312 			free(reserved);
1313 			continue;
1314 		}
1315 
1316 		last = area;
1317 		area = area->address_space_next;
1318 	}
1319 
1320 	return B_OK;
1321 }
1322 
1323 
1324 status_t
1325 vm_reserve_address_range(team_id team, void **_address, uint32 addressSpec,
1326 	addr_t size, uint32 flags)
1327 {
1328 	if (size == 0)
1329 		return B_BAD_VALUE;
1330 
1331 	AddressSpaceWriteLocker locker(team);
1332 	if (!locker.IsLocked())
1333 		return B_BAD_TEAM_ID;
1334 
1335 	// check to see if this address space has entered DELETE state
1336 	if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) {
1337 		// okay, someone is trying to delete this address space now, so we
1338 		// can't insert the area, let's back out
1339 		return B_BAD_TEAM_ID;
1340 	}
1341 
1342 	vm_area *area = create_reserved_area_struct(locker.AddressSpace(), flags);
1343 	if (area == NULL)
1344 		return B_NO_MEMORY;
1345 
1346 	status_t status = insert_area(locker.AddressSpace(), _address, addressSpec,
1347 		size, area);
1348 	if (status < B_OK) {
1349 		free(area);
1350 		return status;
1351 	}
1352 
1353 	// the area is now reserved!
1354 
1355 	area->cache_offset = area->base;
1356 		// we cache the original base address here
1357 
1358 	atomic_add(&locker.AddressSpace()->ref_count, 1);
1359 	return B_OK;
1360 }
1361 
1362 
1363 area_id
1364 vm_create_anonymous_area(team_id team, const char *name, void **address,
1365 	uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection)
1366 {
1367 	vm_area *area;
1368 	vm_cache *cache;
1369 	vm_store *store;
1370 	vm_page *page = NULL;
1371 	bool isStack = (protection & B_STACK_AREA) != 0;
1372 	bool canOvercommit = false;
1373 
1374 	TRACE(("create_anonymous_area %s: size 0x%lx\n", name, size));
1375 
1376 	if (size == 0)
1377 		return B_BAD_VALUE;
1378 	if (!arch_vm_supports_protection(protection))
1379 		return B_NOT_SUPPORTED;
1380 
1381 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1382 		canOvercommit = true;
1383 
1384 #ifdef DEBUG_KERNEL_STACKS
1385 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1386 		isStack = true;
1387 #endif
1388 
1389 	/* check parameters */
1390 	switch (addressSpec) {
1391 		case B_ANY_ADDRESS:
1392 		case B_EXACT_ADDRESS:
1393 		case B_BASE_ADDRESS:
1394 		case B_ANY_KERNEL_ADDRESS:
1395 			break;
1396 
1397 		default:
1398 			return B_BAD_VALUE;
1399 	}
1400 
1401 	switch (wiring) {
1402 		case B_NO_LOCK:
1403 		case B_FULL_LOCK:
1404 		case B_LAZY_LOCK:
1405 		case B_CONTIGUOUS:
1406 		case B_ALREADY_WIRED:
1407 			break;
1408 		case B_LOMEM:
1409 		//case B_SLOWMEM:
1410 			dprintf("B_LOMEM/SLOWMEM is not yet supported!\n");
1411 			wiring = B_FULL_LOCK;
1412 			break;
1413 		default:
1414 			return B_BAD_VALUE;
1415 	}
1416 
1417 	AddressSpaceWriteLocker locker;
1418 	status_t status = locker.SetTo(team);
1419 	if (status != NULL)
1420 		return status;
1421 
1422 	vm_address_space *addressSpace = locker.AddressSpace();
1423 	size = PAGE_ALIGN(size);
1424 
1425 	if (wiring == B_CONTIGUOUS) {
1426 		// we try to allocate the page run here upfront as this may easily
1427 		// fail for obvious reasons
1428 		page = vm_page_allocate_page_run(PAGE_STATE_CLEAR, size / B_PAGE_SIZE);
1429 		if (page == NULL)
1430 			return B_NO_MEMORY;
1431 	}
1432 
1433 	// create an anonymous store object
1434 	// if it's a stack, make sure that two pages are available at least
1435 	store = vm_store_create_anonymous_noswap(canOvercommit, isStack ? 2 : 0,
1436 		isStack ? ((protection & B_USER_PROTECTION) != 0 ?
1437 			USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0);
1438 	if (store == NULL) {
1439 		status = B_NO_MEMORY;
1440 		goto err1;
1441 	}
1442 	cache = vm_cache_create(store);
1443 	if (cache == NULL) {
1444 		status = B_NO_MEMORY;
1445 		goto err2;
1446 	}
1447 
1448 	cache->temporary = 1;
1449 	cache->type = CACHE_TYPE_RAM;
1450 	cache->virtual_size = size;
1451 
1452 	switch (wiring) {
1453 		case B_LAZY_LOCK:
1454 		case B_FULL_LOCK:
1455 		case B_CONTIGUOUS:
1456 		case B_ALREADY_WIRED:
1457 			cache->scan_skip = 1;
1458 			break;
1459 		case B_NO_LOCK:
1460 			cache->scan_skip = 0;
1461 			break;
1462 	}
1463 
1464 	mutex_lock(&cache->lock);
1465 
1466 	status = map_backing_store(addressSpace, cache, address, 0, size,
1467 		addressSpec, wiring, protection, REGION_NO_PRIVATE_MAP, &area, name);
1468 
1469 	mutex_unlock(&cache->lock);
1470 
1471 	if (status < B_OK) {
1472 		vm_cache_release_ref(cache);
1473 		goto err1;
1474 	}
1475 
1476 	locker.DegradeToReadLock();
1477 
1478 	switch (wiring) {
1479 		case B_NO_LOCK:
1480 		case B_LAZY_LOCK:
1481 			// do nothing - the pages are mapped in as needed
1482 			break;
1483 
1484 		case B_FULL_LOCK:
1485 		{
1486 			vm_translation_map *map = &addressSpace->translation_map;
1487 			size_t reservePages = map->ops->map_max_pages_need(map,
1488 				area->base, area->base + (area->size - 1));
1489 			vm_page_reserve_pages(reservePages);
1490 
1491 			// Allocate and map all pages for this area
1492 			mutex_lock(&cache->lock);
1493 
1494 			off_t offset = 0;
1495 			for (addr_t address = area->base; address < area->base + (area->size - 1);
1496 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1497 #ifdef DEBUG_KERNEL_STACKS
1498 #	ifdef STACK_GROWS_DOWNWARDS
1499 				if (isStack && address < area->base + KERNEL_STACK_GUARD_PAGES
1500 						* B_PAGE_SIZE)
1501 #	else
1502 				if (isStack && address >= area->base + area->size
1503 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1504 #	endif
1505 					continue;
1506 #endif
1507 				vm_page *page = vm_page_allocate_page(PAGE_STATE_CLEAR, false);
1508 				if (page == NULL) {
1509 					// this shouldn't really happen, as we reserve the memory upfront
1510 					panic("couldn't fulfill B_FULL lock!");
1511 				}
1512 
1513 				vm_cache_insert_page(cache, page, offset);
1514 				vm_map_page(area, page, address, protection);
1515 			}
1516 
1517 			mutex_unlock(&cache->lock);
1518 			vm_page_unreserve_pages(reservePages);
1519 			break;
1520 		}
1521 
1522 		case B_ALREADY_WIRED:
1523 		{
1524 			// the pages should already be mapped. This is only really useful during
1525 			// boot time. Find the appropriate vm_page objects and stick them in
1526 			// the cache object.
1527 			vm_translation_map *map = &addressSpace->translation_map;
1528 			off_t offset = 0;
1529 
1530 			if (!kernel_startup)
1531 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1532 
1533 			mutex_lock(&cache->lock);
1534 			map->ops->lock(map);
1535 
1536 			for (addr_t virtualAddress = area->base; virtualAddress < area->base
1537 					+ (area->size - 1); virtualAddress += B_PAGE_SIZE,
1538 					offset += B_PAGE_SIZE) {
1539 				addr_t physicalAddress;
1540 				uint32 flags;
1541 				status = map->ops->query(map, virtualAddress,
1542 					&physicalAddress, &flags);
1543 				if (status < B_OK) {
1544 					panic("looking up mapping failed for va 0x%lx\n",
1545 						virtualAddress);
1546 				}
1547 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1548 				if (page == NULL) {
1549 					panic("looking up page failed for pa 0x%lx\n",
1550 						physicalAddress);
1551 				}
1552 
1553 				page->wired_count++;
1554 					// TODO: needs to be atomic on all platforms!
1555 				vm_page_set_state(page, PAGE_STATE_WIRED);
1556 				vm_cache_insert_page(cache, page, offset);
1557 			}
1558 
1559 			map->ops->unlock(map);
1560 			mutex_unlock(&cache->lock);
1561 			break;
1562 		}
1563 
1564 		case B_CONTIGUOUS:
1565 		{
1566 			// We have already allocated our continuous pages run, so we can now just
1567 			// map them in the address space
1568 			vm_translation_map *map = &addressSpace->translation_map;
1569 			addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE;
1570 			addr_t virtualAddress = area->base;
1571 			size_t reservePages = map->ops->map_max_pages_need(map,
1572 				virtualAddress, virtualAddress + (area->size - 1));
1573 			off_t offset = 0;
1574 
1575 			vm_page_reserve_pages(reservePages);
1576 			mutex_lock(&cache->lock);
1577 			map->ops->lock(map);
1578 
1579 			for (virtualAddress = area->base; virtualAddress < area->base
1580 					+ (area->size - 1); virtualAddress += B_PAGE_SIZE,
1581 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1582 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1583 				if (page == NULL)
1584 					panic("couldn't lookup physical page just allocated\n");
1585 
1586 				status = map->ops->map(map, virtualAddress, physicalAddress,
1587 					protection);
1588 				if (status < B_OK)
1589 					panic("couldn't map physical page in page run\n");
1590 
1591 				page->wired_count++;
1592 					// TODO: needs to be atomic on all platforms!
1593 				vm_page_set_state(page, PAGE_STATE_WIRED);
1594 				vm_cache_insert_page(cache, page, offset);
1595 			}
1596 
1597 			map->ops->unlock(map);
1598 			mutex_unlock(&cache->lock);
1599 			vm_page_unreserve_pages(reservePages);
1600 			break;
1601 		}
1602 
1603 		default:
1604 			break;
1605 	}
1606 
1607 	TRACE(("vm_create_anonymous_area: done\n"));
1608 
1609 	area->cache_type = CACHE_TYPE_RAM;
1610 	return area->id;
1611 
1612 err2:
1613 	store->ops->destroy(store);
1614 err1:
1615 	if (wiring == B_CONTIGUOUS) {
1616 		// we had reserved the area space upfront...
1617 		addr_t pageNumber = page->physical_page_number;
1618 		int32 i;
1619 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1620 			page = vm_lookup_page(pageNumber);
1621 			if (page == NULL)
1622 				panic("couldn't lookup physical page just allocated\n");
1623 
1624 			vm_page_set_state(page, PAGE_STATE_FREE);
1625 		}
1626 	}
1627 
1628 	return status;
1629 }
1630 
1631 
1632 area_id
1633 vm_map_physical_memory(team_id team, const char *name, void **_address,
1634 	uint32 addressSpec, addr_t size, uint32 protection, addr_t physicalAddress)
1635 {
1636 	vm_area *area;
1637 	vm_cache *cache;
1638 	vm_store *store;
1639 	addr_t mapOffset;
1640 
1641 	TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, "
1642 		"spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team,
1643 		name, _address, addressSpec, size, protection, physicalAddress));
1644 
1645 	if (!arch_vm_supports_protection(protection))
1646 		return B_NOT_SUPPORTED;
1647 
1648 	AddressSpaceWriteLocker locker(team);
1649 	if (!locker.IsLocked())
1650 		return B_BAD_TEAM_ID;
1651 
1652 	// if the physical address is somewhat inside a page,
1653 	// move the actual area down to align on a page boundary
1654 	mapOffset = physicalAddress % B_PAGE_SIZE;
1655 	size += mapOffset;
1656 	physicalAddress -= mapOffset;
1657 
1658 	size = PAGE_ALIGN(size);
1659 
1660 	// create an device store object
1661 
1662 	store = vm_store_create_device(physicalAddress);
1663 	if (store == NULL)
1664 		return B_NO_MEMORY;
1665 
1666 	cache = vm_cache_create(store);
1667 	if (cache == NULL) {
1668 		store->ops->destroy(store);
1669 		return B_NO_MEMORY;
1670 	}
1671 
1672 	// tell the page scanner to skip over this area, it's pages are special
1673 	cache->scan_skip = 1;
1674 	cache->type = CACHE_TYPE_DEVICE;
1675 	cache->virtual_size = size;
1676 
1677 	mutex_lock(&cache->lock);
1678 
1679 	status_t status = map_backing_store(locker.AddressSpace(), cache, _address,
1680 		0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection,
1681 		REGION_NO_PRIVATE_MAP, &area, name);
1682 
1683 	mutex_unlock(&cache->lock);
1684 
1685 	if (status < B_OK)
1686 		vm_cache_release_ref(cache);
1687 
1688 	if (status >= B_OK && (addressSpec & B_MTR_MASK) != 0) {
1689 		// set requested memory type
1690 		status = arch_vm_set_memory_type(area, physicalAddress,
1691 			addressSpec & B_MTR_MASK);
1692 		if (status < B_OK)
1693 			delete_area(locker.AddressSpace(), area);
1694 	}
1695 
1696 	if (status >= B_OK) {
1697 		// make sure our area is mapped in completely
1698 
1699 		vm_translation_map *map = &locker.AddressSpace()->translation_map;
1700 		size_t reservePages = map->ops->map_max_pages_need(map, area->base,
1701 			area->base + (size - 1));
1702 
1703 		vm_page_reserve_pages(reservePages);
1704 		map->ops->lock(map);
1705 
1706 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1707 			map->ops->map(map, area->base + offset, physicalAddress + offset,
1708 				protection);
1709 		}
1710 
1711 		map->ops->unlock(map);
1712 		vm_page_unreserve_pages(reservePages);
1713 	}
1714 
1715 	if (status < B_OK)
1716 		return status;
1717 
1718 	// modify the pointer returned to be offset back into the new area
1719 	// the same way the physical address in was offset
1720 	*_address = (void *)((addr_t)*_address + mapOffset);
1721 
1722 	area->cache_type = CACHE_TYPE_DEVICE;
1723 	return area->id;
1724 }
1725 
1726 
1727 area_id
1728 vm_create_null_area(team_id team, const char *name, void **address,
1729 	uint32 addressSpec, addr_t size)
1730 {
1731 	vm_area *area;
1732 	vm_cache *cache;
1733 	vm_store *store;
1734 	status_t status;
1735 
1736 	AddressSpaceWriteLocker locker(team);
1737 	if (!locker.IsLocked())
1738 		return B_BAD_TEAM_ID;
1739 
1740 	size = PAGE_ALIGN(size);
1741 
1742 	// create an null store object
1743 
1744 	store = vm_store_create_null();
1745 	if (store == NULL)
1746 		return B_NO_MEMORY;
1747 
1748 	cache = vm_cache_create(store);
1749 	if (cache == NULL) {
1750 		store->ops->destroy(store);
1751 		return B_NO_MEMORY;
1752 	}
1753 
1754 	// tell the page scanner to skip over this area, no pages will be mapped here
1755 	cache->scan_skip = 1;
1756 	cache->type = CACHE_TYPE_NULL;
1757 	cache->virtual_size = size;
1758 
1759 	mutex_lock(&cache->lock);
1760 
1761 	status = map_backing_store(locker.AddressSpace(), cache, address, 0, size,
1762 		addressSpec, 0, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, &area, name);
1763 
1764 	mutex_unlock(&cache->lock);
1765 
1766 	if (status < B_OK) {
1767 		vm_cache_release_ref(cache);
1768 		return status;
1769 	}
1770 
1771 	area->cache_type = CACHE_TYPE_NULL;
1772 	return area->id;
1773 }
1774 
1775 
1776 /*!	Creates the vnode cache for the specified \a vnode.
1777 	The vnode has to be marked busy when calling this function.
1778 */
1779 status_t
1780 vm_create_vnode_cache(struct vnode *vnode, struct vm_cache **_cache)
1781 {
1782 	status_t status;
1783 
1784 	// create a vnode store object
1785 	vm_store *store = vm_create_vnode_store(vnode);
1786 	if (store == NULL)
1787 		return B_NO_MEMORY;
1788 
1789 	vm_cache *cache = vm_cache_create(store);
1790 	if (cache == NULL) {
1791 		status = B_NO_MEMORY;
1792 		goto err1;
1793 	}
1794 
1795 	cache->type = CACHE_TYPE_VNODE;
1796 
1797 	*_cache = cache;
1798 	return B_OK;
1799 
1800 err1:
1801 	store->ops->destroy(store);
1802 	return status;
1803 }
1804 
1805 
1806 /*!	Will map the file at the path specified by \a name to an area in memory.
1807 	The file will be mirrored beginning at the specified \a offset. The \a offset
1808 	and \a size arguments have to be page aligned.
1809 */
1810 static area_id
1811 _vm_map_file(team_id team, const char *name, void **_address, uint32 addressSpec,
1812 	size_t size, uint32 protection, uint32 mapping, const char *path,
1813 	off_t offset, bool kernel)
1814 {
1815 	// ToDo: maybe attach to an FD, not a path (or both, like VFS calls)
1816 	// ToDo: check file access permissions (would be already done if the above were true)
1817 	// ToDo: for binary files, we want to make sure that they get the
1818 	//	copy of a file at a given time, ie. later changes should not
1819 	//	make it into the mapped copy -- this will need quite some changes
1820 	//	to be done in a nice way
1821 	TRACE(("_vm_map_file(\"%s\", offset = %Ld, size = %lu, mapping %ld)\n",
1822 		path, offset, size, mapping));
1823 
1824 	offset = ROUNDOWN(offset, B_PAGE_SIZE);
1825 	size = PAGE_ALIGN(size);
1826 
1827 	// get the vnode for the object, this also grabs a ref to it
1828 	struct vnode *vnode;
1829 	status_t status = vfs_get_vnode_from_path(path, kernel, &vnode);
1830 	if (status < B_OK)
1831 		return status;
1832 
1833 	AddressSpaceWriteLocker locker(team);
1834 	if (!locker.IsLocked()) {
1835 		vfs_put_vnode(vnode);
1836 		return B_BAD_TEAM_ID;
1837 	}
1838 
1839 	// ToDo: this only works for file systems that use the file cache
1840 	vm_cache *cache;
1841 	status = vfs_get_vnode_cache(vnode, &cache, false);
1842 	if (status < B_OK) {
1843 		vfs_put_vnode(vnode);
1844 		return status;
1845 	}
1846 
1847 	mutex_lock(&cache->lock);
1848 
1849 	vm_area *area;
1850 	status = map_backing_store(locker.AddressSpace(), cache, _address,
1851 		offset, size, addressSpec, 0, protection, mapping, &area, name);
1852 
1853 	mutex_unlock(&cache->lock);
1854 
1855 	vfs_put_vnode(vnode);
1856 		// we don't need this vnode anymore - if the above call was
1857 		// successful, the store already has a ref to it
1858 
1859 	if (status < B_OK || mapping == REGION_PRIVATE_MAP) {
1860 		// map_backing_store() cannot know we no longer need the ref
1861 		vm_cache_release_ref(cache);
1862 	}
1863 	if (status < B_OK)
1864 		return status;
1865 
1866 	area->cache_type = CACHE_TYPE_VNODE;
1867 	return area->id;
1868 }
1869 
1870 
1871 area_id
1872 vm_map_file(team_id aid, const char *name, void **address, uint32 addressSpec,
1873 	addr_t size, uint32 protection, uint32 mapping, const char *path,
1874 	off_t offset)
1875 {
1876 	if (!arch_vm_supports_protection(protection))
1877 		return B_NOT_SUPPORTED;
1878 
1879 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
1880 		mapping, path, offset, true);
1881 }
1882 
1883 
1884 vm_cache *
1885 vm_area_get_locked_cache(vm_area *area)
1886 {
1887 	MutexLocker locker(sAreaCacheLock);
1888 	while (true) {
1889 		vm_cache* cache = area->cache;
1890 		vm_cache_acquire_ref(cache);
1891 		locker.Unlock();
1892 
1893 		mutex_lock(&cache->lock);
1894 
1895 		locker.Lock();
1896 		if (cache == area->cache)
1897 			return cache;
1898 
1899 		// the cache changed in the meantime
1900 		mutex_unlock(&cache->lock);
1901 		vm_cache_release_ref(cache);
1902 	}
1903 }
1904 
1905 
1906 void
1907 vm_area_put_locked_cache(vm_cache *cache)
1908 {
1909 	mutex_unlock(&cache->lock);
1910 	vm_cache_release_ref(cache);
1911 }
1912 
1913 
1914 area_id
1915 vm_clone_area(team_id team, const char *name, void **address,
1916 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID)
1917 {
1918 	vm_area *newArea = NULL;
1919 	vm_area *sourceArea;
1920 
1921 	MultiAddressSpaceLocker locker;
1922 	vm_address_space *sourceAddressSpace;
1923 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
1924 	if (status != B_OK)
1925 		return status;
1926 
1927 	vm_address_space *targetAddressSpace;
1928 	status = locker.AddTeam(team, true, &targetAddressSpace);
1929 	if (status != B_OK)
1930 		return status;
1931 
1932 	status = locker.Lock();
1933 	if (status != B_OK)
1934 		return status;
1935 
1936 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
1937 	if (sourceArea == NULL)
1938 		return B_BAD_VALUE;
1939 
1940 	vm_cache *cache = vm_area_get_locked_cache(sourceArea);
1941 
1942 	// ToDo: for now, B_USER_CLONEABLE is disabled, until all drivers
1943 	//	have been adapted. Maybe it should be part of the kernel settings,
1944 	//	anyway (so that old drivers can always work).
1945 #if 0
1946 	if (sourceArea->aspace == vm_kernel_address_space() && addressSpace != vm_kernel_address_space()
1947 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
1948 		// kernel areas must not be cloned in userland, unless explicitly
1949 		// declared user-cloneable upon construction
1950 		status = B_NOT_ALLOWED;
1951 	} else
1952 #endif
1953 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
1954 		status = B_NOT_ALLOWED;
1955 	else {
1956 		status = map_backing_store(targetAddressSpace, cache, address,
1957 			sourceArea->cache_offset, sourceArea->size, addressSpec,
1958 			sourceArea->wiring, protection, mapping, &newArea, name);
1959 	}
1960 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
1961 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
1962 		// to create a new ref, and has therefore already acquired a reference
1963 		// to the source cache - but otherwise it has no idea that we need
1964 		// one.
1965 		vm_cache_acquire_ref(cache);
1966 	}
1967 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
1968 		// we need to map in everything at this point
1969 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
1970 			// we don't have actual pages to map but a physical area
1971 			vm_translation_map *map = &sourceArea->address_space->translation_map;
1972 			map->ops->lock(map);
1973 
1974 			addr_t physicalAddress;
1975 			uint32 oldProtection;
1976 			map->ops->query(map, sourceArea->base, &physicalAddress,
1977 				&oldProtection);
1978 
1979 			map->ops->unlock(map);
1980 
1981 			map = &targetAddressSpace->translation_map;
1982 			size_t reservePages = map->ops->map_max_pages_need(map,
1983 				newArea->base, newArea->base + (newArea->size - 1));
1984 
1985 			vm_page_reserve_pages(reservePages);
1986 			map->ops->lock(map);
1987 
1988 			for (addr_t offset = 0; offset < newArea->size;
1989 					offset += B_PAGE_SIZE) {
1990 				map->ops->map(map, newArea->base + offset,
1991 					physicalAddress + offset, protection);
1992 			}
1993 
1994 			map->ops->unlock(map);
1995 			vm_page_unreserve_pages(reservePages);
1996 		} else {
1997 			vm_translation_map *map = &targetAddressSpace->translation_map;
1998 			size_t reservePages = map->ops->map_max_pages_need(map,
1999 				newArea->base, newArea->base + (newArea->size - 1));
2000 			vm_page_reserve_pages(reservePages);
2001 
2002 			// map in all pages from source
2003 			for (vm_page *page = cache->page_list; page != NULL;
2004 					page = page->cache_next) {
2005 				vm_map_page(newArea, page, newArea->base
2006 					+ ((page->cache_offset << PAGE_SHIFT) - newArea->cache_offset),
2007 					protection);
2008 			}
2009 
2010 			vm_page_unreserve_pages(reservePages);
2011 		}
2012 	}
2013 	if (status == B_OK)
2014 		newArea->cache_type = sourceArea->cache_type;
2015 
2016 	vm_area_put_locked_cache(cache);
2017 
2018 	if (status < B_OK)
2019 		return status;
2020 
2021 	return newArea->id;
2022 }
2023 
2024 
2025 //! The address space must be write locked at this point
2026 static void
2027 remove_area_from_address_space(vm_address_space *addressSpace, vm_area *area)
2028 {
2029 	vm_area *temp, *last = NULL;
2030 
2031 	temp = addressSpace->areas;
2032 	while (temp != NULL) {
2033 		if (area == temp) {
2034 			if (last != NULL) {
2035 				last->address_space_next = temp->address_space_next;
2036 			} else {
2037 				addressSpace->areas = temp->address_space_next;
2038 			}
2039 			addressSpace->change_count++;
2040 			break;
2041 		}
2042 		last = temp;
2043 		temp = temp->address_space_next;
2044 	}
2045 	if (area == addressSpace->area_hint)
2046 		addressSpace->area_hint = NULL;
2047 
2048 	if (temp == NULL)
2049 		panic("vm_area_release_ref: area not found in aspace's area list\n");
2050 }
2051 
2052 
2053 static void
2054 delete_area(vm_address_space *addressSpace, vm_area *area)
2055 {
2056 	acquire_sem_etc(sAreaHashLock, WRITE_COUNT, 0, 0);
2057 	hash_remove(sAreaHash, area);
2058 	release_sem_etc(sAreaHashLock, WRITE_COUNT, 0);
2059 
2060 	// At this point the area is removed from the global hash table, but
2061 	// still exists in the area list.
2062 
2063 	// Unmap the virtual address space the area occupied
2064 	vm_unmap_pages(area, area->base, area->size, !area->cache->temporary);
2065 
2066 	if (!area->cache->temporary)
2067 		vm_cache_write_modified(area->cache, false);
2068 
2069 	arch_vm_unset_memory_type(area);
2070 	remove_area_from_address_space(addressSpace, area);
2071 	vm_put_address_space(addressSpace);
2072 
2073 	vm_cache_remove_area(area->cache, area);
2074 	vm_cache_release_ref(area->cache);
2075 
2076 	free(area->name);
2077 	free(area);
2078 }
2079 
2080 
2081 status_t
2082 vm_delete_area(team_id team, area_id id)
2083 {
2084 	TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id));
2085 
2086 	AddressSpaceWriteLocker locker;
2087 	vm_area *area;
2088 	status_t status = locker.SetFromArea(team, id, area);
2089 	if (status < B_OK)
2090 		return status;
2091 
2092 	delete_area(locker.AddressSpace(), area);
2093 	return B_OK;
2094 }
2095 
2096 
2097 /*!	Creates a new cache on top of given cache, moves all areas from
2098 	the old cache to the new one, and changes the protection of all affected
2099 	areas' pages to read-only.
2100 	Preconditions:
2101 	- The given cache must be locked.
2102 	- All of the cache's areas' address spaces must be read locked.
2103 	- All of the cache's areas must have a clear \c no_cache_change flags.
2104 */
2105 static status_t
2106 vm_copy_on_write_area(vm_cache* lowerCache)
2107 {
2108 	vm_store *store;
2109 	vm_cache *upperCache;
2110 	vm_page *page;
2111 	status_t status;
2112 
2113 	TRACE(("vm_copy_on_write_area(area = %p)\n", area));
2114 
2115 	// We need to separate the cache from its areas. The cache goes one level
2116 	// deeper and we create a new cache inbetween.
2117 
2118 	// create an anonymous store object
2119 	store = vm_store_create_anonymous_noswap(false, 0, 0);
2120 	if (store == NULL)
2121 		return B_NO_MEMORY;
2122 
2123 	upperCache = vm_cache_create(store);
2124 	if (upperCache == NULL) {
2125 		store->ops->destroy(store);
2126 		return B_NO_MEMORY;
2127 	}
2128 
2129 	mutex_lock(&upperCache->lock);
2130 
2131 	upperCache->type = CACHE_TYPE_RAM;
2132 	upperCache->temporary = 1;
2133 	upperCache->scan_skip = lowerCache->scan_skip;
2134 	upperCache->virtual_base = lowerCache->virtual_base;
2135 	upperCache->virtual_size = lowerCache->virtual_size;
2136 
2137 	// transfer the lower cache areas to the upper cache
2138 	mutex_lock(&sAreaCacheLock);
2139 
2140 	upperCache->areas = lowerCache->areas;
2141 	lowerCache->areas = NULL;
2142 
2143 	for (vm_area *tempArea = upperCache->areas; tempArea != NULL;
2144 			tempArea = tempArea->cache_next) {
2145 		ASSERT(!tempArea->no_cache_change);
2146 
2147 		tempArea->cache = upperCache;
2148 		atomic_add(&upperCache->ref_count, 1);
2149 		atomic_add(&lowerCache->ref_count, -1);
2150 	}
2151 
2152 	mutex_unlock(&sAreaCacheLock);
2153 
2154 	vm_cache_add_consumer_locked(lowerCache, upperCache);
2155 
2156 	// We now need to remap all pages from all of the cache's areas read-only, so that
2157 	// a copy will be created on next write access
2158 
2159 	for (vm_area *tempArea = upperCache->areas; tempArea != NULL;
2160 			tempArea = tempArea->cache_next) {
2161 		// The area must be readable in the same way it was previously writable
2162 		uint32 protection = B_KERNEL_READ_AREA;
2163 		if (tempArea->protection & B_READ_AREA)
2164 			protection |= B_READ_AREA;
2165 
2166 		vm_translation_map *map = &tempArea->address_space->translation_map;
2167 		map->ops->lock(map);
2168 		map->ops->protect(map, tempArea->base, tempArea->base - 1 + tempArea->size, protection);
2169 		map->ops->unlock(map);
2170 	}
2171 
2172 	vm_area_put_locked_cache(upperCache);
2173 
2174 	return B_OK;
2175 }
2176 
2177 
2178 area_id
2179 vm_copy_area(team_id team, const char *name, void **_address,
2180 	uint32 addressSpec, uint32 protection, area_id sourceID)
2181 {
2182 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2183 
2184 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2185 		// set the same protection for the kernel as for userland
2186 		protection |= B_KERNEL_READ_AREA;
2187 		if (writableCopy)
2188 			protection |= B_KERNEL_WRITE_AREA;
2189 	}
2190 
2191 	// Do the locking: target address space, all address spaces associated with
2192 	// the source cache, and the cache itself.
2193 	MultiAddressSpaceLocker locker;
2194 	vm_address_space *targetAddressSpace;
2195 	vm_cache *cache;
2196 	vm_area* source;
2197 	status_t status = locker.AddTeam(team, true, &targetAddressSpace);
2198 	if (status == B_OK) {
2199 		status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2200 			&cache, true);
2201 	}
2202 	if (status != B_OK)
2203 		return status;
2204 
2205 	AreaCacheLocker cacheLocker(cache);	// already locked
2206 
2207 	if (addressSpec == B_CLONE_ADDRESS) {
2208 		addressSpec = B_EXACT_ADDRESS;
2209 		*_address = (void *)source->base;
2210 	}
2211 
2212 	// First, create a cache on top of the source area
2213 
2214 	vm_area *target;
2215 	status = map_backing_store(targetAddressSpace, cache, _address,
2216 		source->cache_offset, source->size, addressSpec, source->wiring,
2217 		protection, REGION_PRIVATE_MAP, &target, name);
2218 
2219 	if (status < B_OK)
2220 		return status;
2221 
2222 	// If the source area is writable, we need to move it one layer up as well
2223 
2224 	if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2225 		// ToDo: do something more useful if this fails!
2226 		if (vm_copy_on_write_area(cache) < B_OK)
2227 			panic("vm_copy_on_write_area() failed!\n");
2228 	}
2229 
2230 	// we return the ID of the newly created area
2231 	return target->id;
2232 }
2233 
2234 
2235 //! You need to hold the cache lock when calling this function
2236 static int32
2237 count_writable_areas(vm_cache *cache, vm_area *ignoreArea)
2238 {
2239 	struct vm_area *area = cache->areas;
2240 	uint32 count = 0;
2241 
2242 	for (; area != NULL; area = area->cache_next) {
2243 		if (area != ignoreArea
2244 			&& (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0)
2245 			count++;
2246 	}
2247 
2248 	return count;
2249 }
2250 
2251 
2252 static status_t
2253 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection)
2254 {
2255 	TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = %#lx)\n",
2256 		team, areaID, newProtection));
2257 
2258 	if (!arch_vm_supports_protection(newProtection))
2259 		return B_NOT_SUPPORTED;
2260 
2261 	// lock address spaces and cache
2262 	MultiAddressSpaceLocker locker;
2263 	vm_cache *cache;
2264 	vm_area* area;
2265 	status_t status = locker.AddAreaCacheAndLock(areaID, true, false, area,
2266 		&cache, true);
2267 	AreaCacheLocker cacheLocker(cache);	// already locked
2268 
2269 	if (area->protection == newProtection)
2270 		return B_OK;
2271 
2272 	if (team != vm_kernel_address_space_id()
2273 		&& area->address_space->id != team) {
2274 		// unless you're the kernel, you are only allowed to set
2275 		// the protection of your own areas
2276 		return B_NOT_ALLOWED;
2277 	}
2278 
2279 	bool changePageProtection = true;
2280 
2281 	if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2282 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0) {
2283 		// writable -> !writable
2284 
2285 		if (cache->source != NULL && cache->temporary) {
2286 			if (count_writable_areas(cache, area) == 0) {
2287 				// Since this cache now lives from the pages in its source cache,
2288 				// we can change the cache's commitment to take only those pages
2289 				// into account that really are in this cache.
2290 
2291 				// count existing pages in this cache
2292 				struct vm_page *page = cache->page_list;
2293 				uint32 count = 0;
2294 
2295 				for (; page != NULL; page = page->cache_next) {
2296 					count++;
2297 				}
2298 
2299 				status = cache->store->ops->commit(cache->store,
2300 					cache->virtual_base + count * B_PAGE_SIZE);
2301 
2302 				// ToDo: we may be able to join with our source cache, if count == 0
2303 			}
2304 		}
2305 	} else if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0
2306 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
2307 		// !writable -> writable
2308 
2309 		if (!list_is_empty(&cache->consumers)) {
2310 			// There are consumers -- we have to insert a new cache. Fortunately
2311 			// vm_copy_on_write_area() does everything that's needed.
2312 			changePageProtection = false;
2313 			status = vm_copy_on_write_area(cache);
2314 		} else {
2315 			// No consumers, so we don't need to insert a new one.
2316 			if (cache->source != NULL && cache->temporary) {
2317 				// the cache's commitment must contain all possible pages
2318 				status = cache->store->ops->commit(cache->store,
2319 					cache->virtual_size);
2320 			}
2321 
2322 			if (status == B_OK && cache->source != NULL) {
2323 				// There's a source cache, hence we can't just change all pages'
2324 				// protection or we might allow writing into pages belonging to
2325 				// a lower cache.
2326 				changePageProtection = false;
2327 
2328 				struct vm_translation_map *map
2329 					= &area->address_space->translation_map;
2330 				map->ops->lock(map);
2331 
2332 				vm_page* page = cache->page_list;
2333 				while (page) {
2334 					addr_t address = area->base
2335 						+ (page->cache_offset << PAGE_SHIFT);
2336 					map->ops->protect(map, address, address - 1 + B_PAGE_SIZE,
2337 						newProtection);
2338 					page = page->cache_next;
2339 				}
2340 
2341 				map->ops->unlock(map);
2342 			}
2343 		}
2344 	} else {
2345 		// we don't have anything special to do in all other cases
2346 	}
2347 
2348 	if (status == B_OK) {
2349 		// remap existing pages in this cache
2350 		struct vm_translation_map *map = &area->address_space->translation_map;
2351 
2352 		if (changePageProtection) {
2353 			map->ops->lock(map);
2354 			map->ops->protect(map, area->base, area->base + area->size,
2355 				newProtection);
2356 			map->ops->unlock(map);
2357 		}
2358 
2359 		area->protection = newProtection;
2360 	}
2361 
2362 	return status;
2363 }
2364 
2365 
2366 status_t
2367 vm_get_page_mapping(team_id team, addr_t vaddr, addr_t *paddr)
2368 {
2369 	vm_address_space *addressSpace = vm_get_address_space_by_id(team);
2370 	if (addressSpace == NULL)
2371 		return B_BAD_TEAM_ID;
2372 
2373 	uint32 dummyFlags;
2374 	status_t status = addressSpace->translation_map.ops->query(
2375 		&addressSpace->translation_map, vaddr, paddr, &dummyFlags);
2376 
2377 	vm_put_address_space(addressSpace);
2378 	return status;
2379 }
2380 
2381 
2382 static inline addr_t
2383 virtual_page_address(vm_area *area, vm_page *page)
2384 {
2385 	return area->base
2386 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
2387 }
2388 
2389 
2390 bool
2391 vm_test_map_modification(vm_page *page)
2392 {
2393 	MutexLocker locker(sMappingLock);
2394 
2395 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2396 	vm_page_mapping *mapping;
2397 	while ((mapping = iterator.Next()) != NULL) {
2398 		vm_area *area = mapping->area;
2399 		vm_translation_map *map = &area->address_space->translation_map;
2400 
2401 		addr_t physicalAddress;
2402 		uint32 flags;
2403 		map->ops->lock(map);
2404 		map->ops->query(map, virtual_page_address(area, page),
2405 			&physicalAddress, &flags);
2406 		map->ops->unlock(map);
2407 
2408 		if (flags & PAGE_MODIFIED)
2409 			return true;
2410 	}
2411 
2412 	return false;
2413 }
2414 
2415 
2416 int32
2417 vm_test_map_activation(vm_page *page, bool *_modified)
2418 {
2419 	int32 activation = 0;
2420 	bool modified = false;
2421 
2422 	MutexLocker locker(sMappingLock);
2423 
2424 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2425 	vm_page_mapping *mapping;
2426 	while ((mapping = iterator.Next()) != NULL) {
2427 		vm_area *area = mapping->area;
2428 		vm_translation_map *map = &area->address_space->translation_map;
2429 
2430 		addr_t physicalAddress;
2431 		uint32 flags;
2432 		map->ops->lock(map);
2433 		map->ops->query(map, virtual_page_address(area, page),
2434 			&physicalAddress, &flags);
2435 		map->ops->unlock(map);
2436 
2437 		if (flags & PAGE_ACCESSED)
2438 			activation++;
2439 		if (flags & PAGE_MODIFIED)
2440 			modified = true;
2441 	}
2442 
2443 	if (_modified != NULL)
2444 		*_modified = modified;
2445 
2446 	return activation;
2447 }
2448 
2449 
2450 void
2451 vm_clear_map_flags(vm_page *page, uint32 flags)
2452 {
2453 	MutexLocker locker(sMappingLock);
2454 
2455 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2456 	vm_page_mapping *mapping;
2457 	while ((mapping = iterator.Next()) != NULL) {
2458 		vm_area *area = mapping->area;
2459 		vm_translation_map *map = &area->address_space->translation_map;
2460 
2461 		map->ops->lock(map);
2462 		map->ops->clear_flags(map, virtual_page_address(area, page), flags);
2463 		map->ops->unlock(map);
2464 	}
2465 }
2466 
2467 
2468 /*!	Removes all mappings from a page.
2469 	After you've called this function, the page is unmapped from memory.
2470 	The accumulated page flags of all mappings can be found in \a _flags.
2471 */
2472 void
2473 vm_remove_all_page_mappings(vm_page *page, uint32 *_flags)
2474 {
2475 	uint32 accumulatedFlags = 0;
2476 	MutexLocker locker(sMappingLock);
2477 
2478 	vm_page_mappings queue;
2479 	queue.MoveFrom(&page->mappings);
2480 
2481 	vm_page_mappings::Iterator iterator = queue.GetIterator();
2482 	vm_page_mapping *mapping;
2483 	while ((mapping = iterator.Next()) != NULL) {
2484 		vm_area *area = mapping->area;
2485 		vm_translation_map *map = &area->address_space->translation_map;
2486 		addr_t physicalAddress;
2487 		uint32 flags;
2488 
2489 		map->ops->lock(map);
2490 		addr_t address = virtual_page_address(area, page);
2491 		map->ops->unmap(map, address, address + (B_PAGE_SIZE - 1));
2492 		map->ops->flush(map);
2493 		map->ops->query(map, address, &physicalAddress, &flags);
2494 		map->ops->unlock(map);
2495 
2496 		area->mappings.Remove(mapping);
2497 
2498 		accumulatedFlags |= flags;
2499 	}
2500 
2501 	locker.Unlock();
2502 
2503 	// free now unused mappings
2504 
2505 	while ((mapping = queue.RemoveHead()) != NULL) {
2506 		free(mapping);
2507 	}
2508 
2509 	if (_flags != NULL)
2510 		*_flags = accumulatedFlags;
2511 }
2512 
2513 
2514 status_t
2515 vm_unmap_pages(vm_area *area, addr_t base, size_t size, bool preserveModified)
2516 {
2517 	vm_translation_map *map = &area->address_space->translation_map;
2518 	addr_t end = base + (size - 1);
2519 
2520 	map->ops->lock(map);
2521 
2522 	if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE) {
2523 		// iterate through all pages and decrease their wired count
2524 		for (addr_t virtualAddress = base; virtualAddress < end;
2525 				virtualAddress += B_PAGE_SIZE) {
2526 			addr_t physicalAddress;
2527 			uint32 flags;
2528 			status_t status = map->ops->query(map, virtualAddress,
2529 				&physicalAddress, &flags);
2530 			if (status < B_OK || (flags & PAGE_PRESENT) == 0)
2531 				continue;
2532 
2533 			vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
2534 			if (page == NULL) {
2535 				panic("area %p looking up page failed for pa 0x%lx\n", area,
2536 					physicalAddress);
2537 			}
2538 
2539 			page->wired_count--;
2540 				// TODO: needs to be atomic on all platforms!
2541 		}
2542 	}
2543 
2544 	map->ops->unmap(map, base, end);
2545 	if (preserveModified) {
2546 		map->ops->flush(map);
2547 
2548 		for (addr_t virtualAddress = base; virtualAddress < end;
2549 				virtualAddress += B_PAGE_SIZE) {
2550 			addr_t physicalAddress;
2551 			uint32 flags;
2552 			status_t status = map->ops->query(map, virtualAddress,
2553 				&physicalAddress, &flags);
2554 			if (status < B_OK || (flags & PAGE_PRESENT) == 0)
2555 				continue;
2556 
2557 			vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
2558 			if (page == NULL) {
2559 				panic("area %p looking up page failed for pa 0x%lx\n", area,
2560 					physicalAddress);
2561 			}
2562 
2563 			if ((flags & PAGE_MODIFIED) != 0
2564 				&& page->state != PAGE_STATE_MODIFIED)
2565 				vm_page_set_state(page, PAGE_STATE_MODIFIED);
2566 		}
2567 	}
2568 	map->ops->unlock(map);
2569 
2570 	if (area->wiring == B_NO_LOCK) {
2571 		uint32 startOffset = (area->cache_offset + base - area->base)
2572 			>> PAGE_SHIFT;
2573 		uint32 endOffset = startOffset + (size >> PAGE_SHIFT);
2574 		vm_page_mapping *mapping;
2575 		vm_area_mappings queue;
2576 
2577 		mutex_lock(&sMappingLock);
2578 		map->ops->lock(map);
2579 
2580 		vm_area_mappings::Iterator iterator = area->mappings.GetIterator();
2581 		while (iterator.HasNext()) {
2582 			mapping = iterator.Next();
2583 
2584 			vm_page *page = mapping->page;
2585 			if (page->cache_offset < startOffset
2586 				|| page->cache_offset >= endOffset)
2587 				continue;
2588 
2589 			mapping->page->mappings.Remove(mapping);
2590 			iterator.Remove();
2591 
2592 			queue.Add(mapping);
2593 		}
2594 
2595 		map->ops->unlock(map);
2596 		mutex_unlock(&sMappingLock);
2597 
2598 		while ((mapping = queue.RemoveHead()) != NULL) {
2599 			free(mapping);
2600 		}
2601 	}
2602 
2603 	return B_OK;
2604 }
2605 
2606 
2607 /*!	When calling this function, you need to have pages reserved! */
2608 status_t
2609 vm_map_page(vm_area *area, vm_page *page, addr_t address, uint32 protection)
2610 {
2611 	vm_translation_map *map = &area->address_space->translation_map;
2612 	vm_page_mapping *mapping = NULL;
2613 
2614 	if (area->wiring == B_NO_LOCK) {
2615 		mapping = (vm_page_mapping *)malloc(sizeof(vm_page_mapping));
2616 		if (mapping == NULL)
2617 			return B_NO_MEMORY;
2618 
2619 		mapping->page = page;
2620 		mapping->area = area;
2621 	}
2622 
2623 	map->ops->lock(map);
2624 	map->ops->map(map, address, page->physical_page_number * B_PAGE_SIZE,
2625 		protection);
2626 	map->ops->unlock(map);
2627 
2628 	if (area->wiring != B_NO_LOCK) {
2629 		page->wired_count++;
2630 			// TODO: needs to be atomic on all platforms!
2631 	} else {
2632 		// insert mapping into lists
2633 		MutexLocker locker(sMappingLock);
2634 
2635 		page->mappings.Add(mapping);
2636 		area->mappings.Add(mapping);
2637 	}
2638 
2639 	if (page->usage_count < 0)
2640 		page->usage_count = 1;
2641 
2642 	if (page->state != PAGE_STATE_MODIFIED)
2643 		vm_page_set_state(page, PAGE_STATE_ACTIVE);
2644 
2645 	return B_OK;
2646 }
2647 
2648 
2649 static int
2650 display_mem(int argc, char **argv)
2651 {
2652 	bool physical = false;
2653 	addr_t copyAddress;
2654 	int32 displayWidth;
2655 	int32 itemSize;
2656 	int32 num = -1;
2657 	addr_t address;
2658 	int i = 1, j;
2659 
2660 	if (argc > 1 && argv[1][0] == '-') {
2661 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2662 			physical = true;
2663 			i++;
2664 		} else
2665 			i = 99;
2666 	}
2667 
2668 	if (argc < i + 1 || argc > i + 2) {
2669 		kprintf("usage: dl/dw/ds/db [-p|--physical] <address> [num]\n"
2670 			"\tdl - 8 bytes\n"
2671 			"\tdw - 4 bytes\n"
2672 			"\tds - 2 bytes\n"
2673 			"\tdb - 1 byte\n"
2674 			"  -p or --physical only allows memory from a single page to be displayed.\n");
2675 		return 0;
2676 	}
2677 
2678 	address = strtoul(argv[i], NULL, 0);
2679 
2680 	if (argc > i + 1)
2681 		num = atoi(argv[i + 1]);
2682 
2683 	// build the format string
2684 	if (strcmp(argv[0], "db") == 0) {
2685 		itemSize = 1;
2686 		displayWidth = 16;
2687 	} else if (strcmp(argv[0], "ds") == 0) {
2688 		itemSize = 2;
2689 		displayWidth = 8;
2690 	} else if (strcmp(argv[0], "dw") == 0) {
2691 		itemSize = 4;
2692 		displayWidth = 4;
2693 	} else if (strcmp(argv[0], "dl") == 0) {
2694 		itemSize = 8;
2695 		displayWidth = 2;
2696 	} else {
2697 		kprintf("display_mem called in an invalid way!\n");
2698 		return 0;
2699 	}
2700 
2701 	if (num <= 0)
2702 		num = displayWidth;
2703 
2704 	if (physical) {
2705 		int32 offset = address & (B_PAGE_SIZE - 1);
2706 		if (num * itemSize + offset > B_PAGE_SIZE) {
2707 			num = (B_PAGE_SIZE - offset) / itemSize;
2708 			kprintf("NOTE: number of bytes has been cut to page size\n");
2709 		}
2710 
2711 		address = ROUNDOWN(address, B_PAGE_SIZE);
2712 
2713 		kernel_startup = true;
2714 			// vm_get_physical_page() needs to lock...
2715 
2716 		if (vm_get_physical_page(address, &copyAddress, PHYSICAL_PAGE_NO_WAIT) != B_OK) {
2717 			kprintf("getting the hardware page failed.");
2718 			kernel_startup = false;
2719 			return 0;
2720 		}
2721 
2722 		kernel_startup = false;
2723 		address += offset;
2724 		copyAddress += offset;
2725 	} else
2726 		copyAddress = address;
2727 
2728 	for (i = 0; i < num; i++) {
2729 		uint32 value;
2730 
2731 		if ((i % displayWidth) == 0) {
2732 			int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2733 			if (i != 0)
2734 				kprintf("\n");
2735 
2736 			kprintf("[0x%lx]  ", address + i * itemSize);
2737 
2738 			for (j = 0; j < displayed; j++) {
2739 				char c;
2740 				if (user_memcpy(&c, (char *)copyAddress + i * itemSize + j, 1) != B_OK) {
2741 					displayed = j;
2742 					break;
2743 				}
2744 				if (!isprint(c))
2745 					c = '.';
2746 
2747 				kprintf("%c", c);
2748 			}
2749 			if (num > displayWidth) {
2750 				// make sure the spacing in the last line is correct
2751 				for (j = displayed; j < displayWidth * itemSize; j++)
2752 					kprintf(" ");
2753 			}
2754 			kprintf("  ");
2755 		}
2756 
2757 		if (user_memcpy(&value, (uint8 *)copyAddress + i * itemSize, itemSize) != B_OK) {
2758 			kprintf("read fault");
2759 			break;
2760 		}
2761 
2762 		switch (itemSize) {
2763 			case 1:
2764 				kprintf(" %02x", *(uint8 *)&value);
2765 				break;
2766 			case 2:
2767 				kprintf(" %04x", *(uint16 *)&value);
2768 				break;
2769 			case 4:
2770 				kprintf(" %08lx", *(uint32 *)&value);
2771 				break;
2772 			case 8:
2773 				kprintf(" %016Lx", *(uint64 *)&value);
2774 				break;
2775 		}
2776 	}
2777 
2778 	kprintf("\n");
2779 
2780 	if (physical) {
2781 		copyAddress = ROUNDOWN(copyAddress, B_PAGE_SIZE);
2782 		kernel_startup = true;
2783 		vm_put_physical_page(copyAddress);
2784 		kernel_startup = false;
2785 	}
2786 	return 0;
2787 }
2788 
2789 
2790 static void
2791 dump_cache_tree_recursively(vm_cache* cache, int level,
2792 	vm_cache* highlightCache)
2793 {
2794 	// print this cache
2795 	for (int i = 0; i < level; i++)
2796 		kprintf("  ");
2797 	if (cache == highlightCache)
2798 		kprintf("%p <--\n", cache);
2799 	else
2800 		kprintf("%p\n", cache);
2801 
2802 	// recursively print its consumers
2803 	vm_cache* consumer = NULL;
2804 	while ((consumer = (vm_cache *)list_get_next_item(&cache->consumers,
2805 			consumer)) != NULL) {
2806 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
2807 	}
2808 }
2809 
2810 
2811 static int
2812 dump_cache_tree(int argc, char **argv)
2813 {
2814 	if (argc < 2 || strlen(argv[1]) < 2
2815 		|| argv[1][0] != '0'
2816 		|| argv[1][1] != 'x') {
2817 		kprintf("%s: invalid argument, pass address\n", argv[0]);
2818 		return 0;
2819 	}
2820 
2821 	addr_t address = strtoul(argv[1], NULL, 0);
2822 	if (address == NULL)
2823 		return 0;
2824 
2825 	vm_cache *cache = (vm_cache *)address;
2826 	vm_cache *root = cache;
2827 
2828 	// find the root cache (the transitive source)
2829 	while (root->source != NULL)
2830 		root = root->source;
2831 
2832 	dump_cache_tree_recursively(root, 0, cache);
2833 
2834 	return 0;
2835 }
2836 
2837 
2838 #if DEBUG_CACHE_LIST
2839 
2840 static int
2841 dump_caches(int argc, char **argv)
2842 {
2843 	kprintf("caches:");
2844 
2845 	vm_cache* cache = gDebugCacheList;
2846 	while (cache) {
2847 		kprintf(" %p", cache);
2848 		cache = cache->debug_next;
2849 	}
2850 
2851 	kprintf("\n");
2852 
2853 	return 0;
2854 }
2855 
2856 #endif	// DEBUG_CACHE_LIST
2857 
2858 
2859 static const char *
2860 cache_type_to_string(int32 type)
2861 {
2862 	switch (type) {
2863 		case CACHE_TYPE_RAM:
2864 			return "RAM";
2865 		case CACHE_TYPE_DEVICE:
2866 			return "device";
2867 		case CACHE_TYPE_VNODE:
2868 			return "vnode";
2869 		case CACHE_TYPE_NULL:
2870 			return "null";
2871 
2872 		default:
2873 			return "unknown";
2874 	}
2875 }
2876 
2877 
2878 static int
2879 dump_cache(int argc, char **argv)
2880 {
2881 	vm_cache *cache;
2882 	bool showPages = false;
2883 	int i = 1;
2884 
2885 	if (argc < 2) {
2886 		kprintf("usage: %s [-ps] <address>\n"
2887 			"  if -p is specified, all pages are shown, if -s is used\n"
2888 			"  only the cache info is shown respectively.\n", argv[0]);
2889 		return 0;
2890 	}
2891 	while (argv[i][0] == '-') {
2892 		char *arg = argv[i] + 1;
2893 		while (arg[0]) {
2894 			if (arg[0] == 'p')
2895 				showPages = true;
2896 			arg++;
2897 		}
2898 		i++;
2899 	}
2900 	if (argv[i] == NULL || strlen(argv[i]) < 2
2901 		|| argv[i][0] != '0'
2902 		|| argv[i][1] != 'x') {
2903 		kprintf("%s: invalid argument, pass address\n", argv[0]);
2904 		return 0;
2905 	}
2906 
2907 	addr_t address = strtoul(argv[i], NULL, 0);
2908 	if (address == NULL)
2909 		return 0;
2910 
2911 	cache = (vm_cache *)address;
2912 
2913 	kprintf("CACHE %p:\n", cache);
2914 	kprintf("  ref_count:    %ld\n", cache->ref_count);
2915 	kprintf("  source:       %p\n", cache->source);
2916 	kprintf("  store:        %p\n", cache->store);
2917 	kprintf("  type:         %s\n", cache_type_to_string(cache->type));
2918 	kprintf("  virtual_base: 0x%Lx\n", cache->virtual_base);
2919 	kprintf("  virtual_size: 0x%Lx\n", cache->virtual_size);
2920 	kprintf("  temporary:    %ld\n", cache->temporary);
2921 	kprintf("  scan_skip:    %ld\n", cache->scan_skip);
2922 	kprintf("  lock.holder:  %ld\n", cache->lock.holder);
2923 	kprintf("  lock.sem:     0x%lx\n", cache->lock.sem);
2924 	kprintf("  areas:\n");
2925 
2926 	for (vm_area *area = cache->areas; area != NULL; area = area->cache_next) {
2927 		kprintf("    area 0x%lx, %s\n", area->id, area->name);
2928 		kprintf("\tbase_addr:  0x%lx, size: 0x%lx\n", area->base, area->size);
2929 		kprintf("\tprotection: 0x%lx\n", area->protection);
2930 		kprintf("\towner:      0x%lx\n", area->address_space->id);
2931 	}
2932 
2933 	kprintf("  consumers:\n");
2934 	vm_cache *consumer = NULL;
2935 	while ((consumer = (vm_cache *)list_get_next_item(&cache->consumers, consumer)) != NULL) {
2936 		kprintf("\t%p\n", consumer);
2937 	}
2938 
2939 	kprintf("  pages:\n");
2940 	int32 count = 0;
2941 	for (vm_page *page = cache->page_list; page != NULL; page = page->cache_next) {
2942 		count++;
2943 		if (!showPages)
2944 			continue;
2945 
2946 		if (page->type == PAGE_TYPE_PHYSICAL) {
2947 			kprintf("\t%p ppn 0x%lx offset 0x%lx type %u state %u (%s) wired_count %u\n",
2948 				page, page->physical_page_number, page->cache_offset, page->type, page->state,
2949 				page_state_to_string(page->state), page->wired_count);
2950 		} else if(page->type == PAGE_TYPE_DUMMY) {
2951 			kprintf("\t%p DUMMY PAGE state %u (%s)\n",
2952 				page, page->state, page_state_to_string(page->state));
2953 		} else
2954 			kprintf("\t%p UNKNOWN PAGE type %u\n", page, page->type);
2955 	}
2956 
2957 	if (!showPages)
2958 		kprintf("\t%ld in cache\n", count);
2959 
2960 	return 0;
2961 }
2962 
2963 
2964 static void
2965 dump_area_struct(vm_area *area, bool mappings)
2966 {
2967 	kprintf("AREA: %p\n", area);
2968 	kprintf("name:\t\t'%s'\n", area->name);
2969 	kprintf("owner:\t\t0x%lx\n", area->address_space->id);
2970 	kprintf("id:\t\t0x%lx\n", area->id);
2971 	kprintf("base:\t\t0x%lx\n", area->base);
2972 	kprintf("size:\t\t0x%lx\n", area->size);
2973 	kprintf("protection:\t0x%lx\n", area->protection);
2974 	kprintf("wiring:\t\t0x%x\n", area->wiring);
2975 	kprintf("memory_type:\t0x%x\n", area->memory_type);
2976 	kprintf("cache:\t\t%p\n", area->cache);
2977 	kprintf("cache_type:\t%s\n", cache_type_to_string(area->cache_type));
2978 	kprintf("cache_offset:\t0x%Lx\n", area->cache_offset);
2979 	kprintf("cache_next:\t%p\n", area->cache_next);
2980 	kprintf("cache_prev:\t%p\n", area->cache_prev);
2981 
2982 	vm_area_mappings::Iterator iterator = area->mappings.GetIterator();
2983 	if (mappings) {
2984 		kprintf("page mappings:\n");
2985 		while (iterator.HasNext()) {
2986 			vm_page_mapping *mapping = iterator.Next();
2987 			kprintf("  %p", mapping->page);
2988 		}
2989 		kprintf("\n");
2990 	} else {
2991 		uint32 count = 0;
2992 		while (iterator.Next() != NULL) {
2993 			count++;
2994 		}
2995 		kprintf("page mappings:\t%lu\n", count);
2996 	}
2997 }
2998 
2999 
3000 static int
3001 dump_area(int argc, char **argv)
3002 {
3003 	bool mappings = false;
3004 	bool found = false;
3005 	int32 index = 1;
3006 	vm_area *area;
3007 	addr_t num;
3008 
3009 	if (argc < 2) {
3010 		kprintf("usage: area [-m] <id|address|name>\n");
3011 		return 0;
3012 	}
3013 
3014 	if (!strcmp(argv[1], "-m")) {
3015 		mappings = true;
3016 		index++;
3017 	}
3018 
3019 	num = strtoul(argv[index], NULL, 0);
3020 
3021 	// walk through the area list, looking for the arguments as a name
3022 	struct hash_iterator iter;
3023 
3024 	hash_open(sAreaHash, &iter);
3025 	while ((area = (vm_area *)hash_next(sAreaHash, &iter)) != NULL) {
3026 		if ((area->name != NULL && !strcmp(argv[index], area->name))
3027 			|| num != 0
3028 				&& ((addr_t)area->id == num
3029 					|| area->base <= num && area->base + area->size > num)) {
3030 			dump_area_struct(area, mappings);
3031 			found = true;
3032 		}
3033 	}
3034 
3035 	if (!found)
3036 		kprintf("could not find area %s (%ld)\n", argv[index], num);
3037 	return 0;
3038 }
3039 
3040 
3041 static int
3042 dump_area_list(int argc, char **argv)
3043 {
3044 	vm_area *area;
3045 	struct hash_iterator iter;
3046 	const char *name = NULL;
3047 	int32 id = 0;
3048 
3049 	if (argc > 1) {
3050 		id = strtoul(argv[1], NULL, 0);
3051 		if (id == 0)
3052 			name = argv[1];
3053 	}
3054 
3055 	kprintf("addr          id  base\t\tsize    protect lock  name\n");
3056 
3057 	hash_open(sAreaHash, &iter);
3058 	while ((area = (vm_area *)hash_next(sAreaHash, &iter)) != NULL) {
3059 		if (id != 0 && area->address_space->id != id
3060 			|| name != NULL && strstr(area->name, name) == NULL)
3061 			continue;
3062 
3063 		kprintf("%p %5lx  %p\t%p %4lx\t%4d  %s\n", area, area->id, (void *)area->base,
3064 			(void *)area->size, area->protection, area->wiring, area->name);
3065 	}
3066 	hash_close(sAreaHash, &iter, false);
3067 	return 0;
3068 }
3069 
3070 
3071 static int
3072 dump_available_memory(int argc, char **argv)
3073 {
3074 	kprintf("Available memory: %Ld/%lu bytes\n",
3075 		sAvailableMemory, vm_page_num_pages() * B_PAGE_SIZE);
3076 	return 0;
3077 }
3078 
3079 
3080 status_t
3081 vm_delete_areas(struct vm_address_space *addressSpace)
3082 {
3083 	vm_area *area;
3084 	vm_area *next, *last = NULL;
3085 
3086 	TRACE(("vm_delete_areas: called on address space 0x%lx\n",
3087 		addressSpace->id));
3088 
3089 	acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0);
3090 
3091 	// remove all reserved areas in this address space
3092 
3093 	for (area = addressSpace->areas; area; area = next) {
3094 		next = area->address_space_next;
3095 
3096 		if (area->id == RESERVED_AREA_ID) {
3097 			// just remove it
3098 			if (last)
3099 				last->address_space_next = area->address_space_next;
3100 			else
3101 				addressSpace->areas = area->address_space_next;
3102 
3103 			vm_put_address_space(addressSpace);
3104 			free(area);
3105 			continue;
3106 		}
3107 
3108 		last = area;
3109 	}
3110 
3111 	// delete all the areas in this address space
3112 
3113 	for (area = addressSpace->areas; area; area = next) {
3114 		next = area->address_space_next;
3115 		delete_area(addressSpace, area);
3116 	}
3117 
3118 	release_sem_etc(addressSpace->sem, WRITE_COUNT, 0);
3119 	return B_OK;
3120 }
3121 
3122 
3123 static area_id
3124 vm_area_for(team_id team, addr_t address)
3125 {
3126 	AddressSpaceReadLocker locker(team);
3127 	if (!locker.IsLocked())
3128 		return B_BAD_TEAM_ID;
3129 
3130 	vm_area *area = vm_area_lookup(locker.AddressSpace(), address);
3131 	if (area != NULL)
3132 		return area->id;
3133 
3134 	return B_ERROR;
3135 }
3136 
3137 
3138 /*!
3139 	Frees physical pages that were used during the boot process.
3140 */
3141 static void
3142 unmap_and_free_physical_pages(vm_translation_map *map, addr_t start, addr_t end)
3143 {
3144 	// free all physical pages in the specified range
3145 
3146 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3147 		addr_t physicalAddress;
3148 		uint32 flags;
3149 
3150 		if (map->ops->query(map, current, &physicalAddress, &flags) == B_OK) {
3151 			vm_page *page = vm_lookup_page(current / B_PAGE_SIZE);
3152 			if (page != NULL)
3153 				vm_page_set_state(page, PAGE_STATE_FREE);
3154 		}
3155 	}
3156 
3157 	// unmap the memory
3158 	map->ops->unmap(map, start, end - 1);
3159 }
3160 
3161 
3162 void
3163 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3164 {
3165 	vm_translation_map *map = &vm_kernel_address_space()->translation_map;
3166 	addr_t end = start + size;
3167 	addr_t lastEnd = start;
3168 	vm_area *area;
3169 
3170 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", (void *)start, (void *)end));
3171 
3172 	// The areas are sorted in virtual address space order, so
3173 	// we just have to find the holes between them that fall
3174 	// into the area we should dispose
3175 
3176 	map->ops->lock(map);
3177 
3178 	for (area = vm_kernel_address_space()->areas; area; area = area->address_space_next) {
3179 		addr_t areaStart = area->base;
3180 		addr_t areaEnd = areaStart + area->size;
3181 
3182 		if (area->id == RESERVED_AREA_ID)
3183 			continue;
3184 
3185 		if (areaEnd >= end) {
3186 			// we are done, the areas are already beyond of what we have to free
3187 			lastEnd = end;
3188 			break;
3189 		}
3190 
3191 		if (areaStart > lastEnd) {
3192 			// this is something we can free
3193 			TRACE(("free boot range: get rid of %p - %p\n", (void *)lastEnd, (void *)areaStart));
3194 			unmap_and_free_physical_pages(map, lastEnd, areaStart);
3195 		}
3196 
3197 		lastEnd = areaEnd;
3198 	}
3199 
3200 	if (lastEnd < end) {
3201 		// we can also get rid of some space at the end of the area
3202 		TRACE(("free boot range: also remove %p - %p\n", (void *)lastEnd, (void *)end));
3203 		unmap_and_free_physical_pages(map, lastEnd, end);
3204 	}
3205 
3206 	map->ops->unlock(map);
3207 }
3208 
3209 
3210 static void
3211 create_preloaded_image_areas(struct preloaded_image *image)
3212 {
3213 	char name[B_OS_NAME_LENGTH];
3214 	void *address;
3215 	int32 length;
3216 
3217 	// use file name to create a good area name
3218 	char *fileName = strrchr(image->name, '/');
3219 	if (fileName == NULL)
3220 		fileName = image->name;
3221 	else
3222 		fileName++;
3223 
3224 	length = strlen(fileName);
3225 	// make sure there is enough space for the suffix
3226 	if (length > 25)
3227 		length = 25;
3228 
3229 	memcpy(name, fileName, length);
3230 	strcpy(name + length, "_text");
3231 	address = (void *)ROUNDOWN(image->text_region.start, B_PAGE_SIZE);
3232 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3233 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3234 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3235 		// this will later be remapped read-only/executable by the
3236 		// ELF initialization code
3237 
3238 	strcpy(name + length, "_data");
3239 	address = (void *)ROUNDOWN(image->data_region.start, B_PAGE_SIZE);
3240 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3241 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3242 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3243 }
3244 
3245 
3246 /**	Frees all previously kernel arguments areas from the kernel_args structure.
3247  *	Any boot loader resources contained in that arguments must not be accessed
3248  *	anymore past this point.
3249  */
3250 
3251 void
3252 vm_free_kernel_args(kernel_args *args)
3253 {
3254 	uint32 i;
3255 
3256 	TRACE(("vm_free_kernel_args()\n"));
3257 
3258 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3259 		area_id area = area_for((void *)args->kernel_args_range[i].start);
3260 		if (area >= B_OK)
3261 			delete_area(area);
3262 	}
3263 }
3264 
3265 
3266 static void
3267 allocate_kernel_args(kernel_args *args)
3268 {
3269 	uint32 i;
3270 
3271 	TRACE(("allocate_kernel_args()\n"));
3272 
3273 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3274 		void *address = (void *)args->kernel_args_range[i].start;
3275 
3276 		create_area("_kernel args_", &address, B_EXACT_ADDRESS, args->kernel_args_range[i].size,
3277 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3278 	}
3279 }
3280 
3281 
3282 static void
3283 unreserve_boot_loader_ranges(kernel_args *args)
3284 {
3285 	uint32 i;
3286 
3287 	TRACE(("unreserve_boot_loader_ranges()\n"));
3288 
3289 	for (i = 0; i < args->num_virtual_allocated_ranges; i++) {
3290 		vm_unreserve_address_range(vm_kernel_address_space_id(),
3291 			(void *)args->virtual_allocated_range[i].start,
3292 			args->virtual_allocated_range[i].size);
3293 	}
3294 }
3295 
3296 
3297 static void
3298 reserve_boot_loader_ranges(kernel_args *args)
3299 {
3300 	uint32 i;
3301 
3302 	TRACE(("reserve_boot_loader_ranges()\n"));
3303 
3304 	for (i = 0; i < args->num_virtual_allocated_ranges; i++) {
3305 		void *address = (void *)args->virtual_allocated_range[i].start;
3306 
3307 		// If the address is no kernel address, we just skip it. The
3308 		// architecture specific code has to deal with it.
3309 		if (!IS_KERNEL_ADDRESS(address)) {
3310 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n",
3311 				address, args->virtual_allocated_range[i].size);
3312 			continue;
3313 		}
3314 
3315 		status_t status = vm_reserve_address_range(vm_kernel_address_space_id(), &address,
3316 			B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3317 		if (status < B_OK)
3318 			panic("could not reserve boot loader ranges\n");
3319 	}
3320 }
3321 
3322 
3323 static addr_t
3324 allocate_early_virtual(kernel_args *args, size_t size)
3325 {
3326 	addr_t spot = 0;
3327 	uint32 i;
3328 	int last_valloc_entry = 0;
3329 
3330 	size = PAGE_ALIGN(size);
3331 	// find a slot in the virtual allocation addr range
3332 	for (i = 1; i < args->num_virtual_allocated_ranges; i++) {
3333 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3334 			+ args->virtual_allocated_range[i - 1].size;
3335 		last_valloc_entry = i;
3336 		// check to see if the space between this one and the last is big enough
3337 		if (previousRangeEnd >= KERNEL_BASE
3338 			&& args->virtual_allocated_range[i].start
3339 				- previousRangeEnd >= size) {
3340 			spot = previousRangeEnd;
3341 			args->virtual_allocated_range[i - 1].size += size;
3342 			goto out;
3343 		}
3344 	}
3345 	if (spot == 0) {
3346 		// we hadn't found one between allocation ranges. this is ok.
3347 		// see if there's a gap after the last one
3348 		addr_t lastRangeEnd
3349 			= args->virtual_allocated_range[last_valloc_entry].start
3350 				+ args->virtual_allocated_range[last_valloc_entry].size;
3351 		if (KERNEL_BASE + (KERNEL_SIZE - 1) - lastRangeEnd >= size) {
3352 			spot = lastRangeEnd;
3353 			args->virtual_allocated_range[last_valloc_entry].size += size;
3354 			goto out;
3355 		}
3356 		// see if there's a gap before the first one
3357 		if (args->virtual_allocated_range[0].start > KERNEL_BASE) {
3358 			if (args->virtual_allocated_range[0].start - KERNEL_BASE >= size) {
3359 				args->virtual_allocated_range[0].start -= size;
3360 				spot = args->virtual_allocated_range[0].start;
3361 				goto out;
3362 			}
3363 		}
3364 	}
3365 
3366 out:
3367 	return spot;
3368 }
3369 
3370 
3371 static bool
3372 is_page_in_physical_memory_range(kernel_args *args, addr_t address)
3373 {
3374 	// TODO: horrible brute-force method of determining if the page can be allocated
3375 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3376 		if (address >= args->physical_memory_range[i].start
3377 			&& address < args->physical_memory_range[i].start
3378 				+ args->physical_memory_range[i].size)
3379 			return true;
3380 	}
3381 	return false;
3382 }
3383 
3384 
3385 static addr_t
3386 allocate_early_physical_page(kernel_args *args)
3387 {
3388 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3389 		addr_t nextPage;
3390 
3391 		nextPage = args->physical_allocated_range[i].start
3392 			+ args->physical_allocated_range[i].size;
3393 		// see if the page after the next allocated paddr run can be allocated
3394 		if (i + 1 < args->num_physical_allocated_ranges
3395 			&& args->physical_allocated_range[i + 1].size != 0) {
3396 			// see if the next page will collide with the next allocated range
3397 			if (nextPage >= args->physical_allocated_range[i+1].start)
3398 				continue;
3399 		}
3400 		// see if the next physical page fits in the memory block
3401 		if (is_page_in_physical_memory_range(args, nextPage)) {
3402 			// we got one!
3403 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3404 			return nextPage / B_PAGE_SIZE;
3405 		}
3406 	}
3407 
3408 	return 0;
3409 		// could not allocate a block
3410 }
3411 
3412 
3413 /*!
3414 	This one uses the kernel_args' physical and virtual memory ranges to
3415 	allocate some pages before the VM is completely up.
3416 */
3417 addr_t
3418 vm_allocate_early(kernel_args *args, size_t virtualSize, size_t physicalSize,
3419 	uint32 attributes)
3420 {
3421 	if (physicalSize > virtualSize)
3422 		physicalSize = virtualSize;
3423 
3424 	// find the vaddr to allocate at
3425 	addr_t virtualBase = allocate_early_virtual(args, virtualSize);
3426 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualAddress);
3427 
3428 	// map the pages
3429 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3430 		addr_t physicalAddress = allocate_early_physical_page(args);
3431 		if (physicalAddress == 0)
3432 			panic("error allocating early page!\n");
3433 
3434 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3435 
3436 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3437 			physicalAddress * B_PAGE_SIZE, attributes,
3438 			&allocate_early_physical_page);
3439 	}
3440 
3441 	return virtualBase;
3442 }
3443 
3444 
3445 status_t
3446 vm_init(kernel_args *args)
3447 {
3448 	struct preloaded_image *image;
3449 	void *address;
3450 	status_t err = 0;
3451 	uint32 i;
3452 
3453 	TRACE(("vm_init: entry\n"));
3454 	err = arch_vm_translation_map_init(args);
3455 	err = arch_vm_init(args);
3456 
3457 	// initialize some globals
3458 	sNextAreaID = 1;
3459 	sAreaHashLock = -1;
3460 	sAvailableMemoryLock.sem = -1;
3461 
3462 	vm_page_init_num_pages(args);
3463 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3464 
3465 	// reduce the heap size if we have not so much RAM
3466 	size_t heapSize = HEAP_SIZE;
3467 	if (sAvailableMemory < 100 * 1024 * 1024)
3468 		heapSize /= 4;
3469 	else if (sAvailableMemory < 200 * 1024 * 1024)
3470 		heapSize /= 2;
3471 
3472 	// map in the new heap and initialize it
3473 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3474 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3475 	TRACE(("heap at 0x%lx\n", heapBase));
3476 	heap_init(heapBase, heapSize);
3477 
3478 	vm_low_memory_init();
3479 
3480 	size_t slabInitialSize = args->num_cpus * 2 * B_PAGE_SIZE;
3481 	addr_t slabInitialBase = vm_allocate_early(args, slabInitialSize,
3482 		slabInitialSize, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3483 	slab_init(args, slabInitialBase, slabInitialSize);
3484 
3485 	// initialize the free page list and physical page mapper
3486 	vm_page_init(args);
3487 
3488 	// initialize the hash table that stores the pages mapped to caches
3489 	vm_cache_init(args);
3490 
3491 	{
3492 		vm_area *area;
3493 		sAreaHash = hash_init(REGION_HASH_TABLE_SIZE, (addr_t)&area->hash_next - (addr_t)area,
3494 			&area_compare, &area_hash);
3495 		if (sAreaHash == NULL)
3496 			panic("vm_init: error creating aspace hash table\n");
3497 	}
3498 
3499 	vm_address_space_init();
3500 	reserve_boot_loader_ranges(args);
3501 
3502 	// do any further initialization that the architecture dependant layers may need now
3503 	arch_vm_translation_map_init_post_area(args);
3504 	arch_vm_init_post_area(args);
3505 	vm_page_init_post_area(args);
3506 
3507 	// allocate areas to represent stuff that already exists
3508 
3509 	address = (void *)ROUNDOWN(heapBase, B_PAGE_SIZE);
3510 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
3511 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3512 
3513 	address = (void *)ROUNDOWN(slabInitialBase, B_PAGE_SIZE);
3514 	create_area("initial slab space", &address, B_EXACT_ADDRESS,
3515 		slabInitialSize, B_ALREADY_WIRED, B_KERNEL_READ_AREA
3516 		| B_KERNEL_WRITE_AREA);
3517 
3518 	allocate_kernel_args(args);
3519 
3520 	args->kernel_image.name = "kernel";
3521 		// the lazy boot loader currently doesn't set the kernel's name...
3522 	create_preloaded_image_areas(&args->kernel_image);
3523 
3524 	// allocate areas for preloaded images
3525 	for (image = args->preloaded_images; image != NULL; image = image->next) {
3526 		create_preloaded_image_areas(image);
3527 	}
3528 
3529 	// allocate kernel stacks
3530 	for (i = 0; i < args->num_cpus; i++) {
3531 		char name[64];
3532 
3533 		sprintf(name, "idle thread %lu kstack", i + 1);
3534 		address = (void *)args->cpu_kstack[i].start;
3535 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
3536 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3537 	}
3538 
3539 	// add some debugger commands
3540 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
3541 	add_debugger_command("area", &dump_area, "Dump info about a particular area");
3542 	add_debugger_command("cache", &dump_cache, "Dump vm_cache");
3543 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump vm_cache tree");
3544 #if DEBUG_CACHE_LIST
3545 	add_debugger_command("caches", &dump_caches, "List vm_cache structures");
3546 #endif
3547 	add_debugger_command("avail", &dump_available_memory, "Dump available memory");
3548 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
3549 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
3550 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
3551 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
3552 
3553 	TRACE(("vm_init: exit\n"));
3554 
3555 	return err;
3556 }
3557 
3558 
3559 status_t
3560 vm_init_post_sem(kernel_args *args)
3561 {
3562 	vm_area *area;
3563 
3564 	// This frees all unused boot loader resources and makes its space available again
3565 	arch_vm_init_end(args);
3566 	unreserve_boot_loader_ranges(args);
3567 
3568 	// fill in all of the semaphores that were not allocated before
3569 	// since we're still single threaded and only the kernel address space exists,
3570 	// it isn't that hard to find all of the ones we need to create
3571 
3572 	benaphore_init(&sAvailableMemoryLock, "available memory lock");
3573 	arch_vm_translation_map_init_post_sem(args);
3574 	vm_address_space_init_post_sem();
3575 
3576 	for (area = vm_kernel_address_space()->areas; area;
3577 			area = area->address_space_next) {
3578 		if (area->id == RESERVED_AREA_ID)
3579 			continue;
3580 
3581 		if (area->cache->lock.sem < 0)
3582 			mutex_init(&area->cache->lock, "vm_cache");
3583 	}
3584 
3585 	sAreaHashLock = create_sem(WRITE_COUNT, "area hash");
3586 	mutex_init(&sAreaCacheLock, "area->cache");
3587 	mutex_init(&sMappingLock, "page mappings");
3588 
3589 	slab_init_post_sem();
3590 
3591 	return heap_init_post_sem(args);
3592 }
3593 
3594 
3595 status_t
3596 vm_init_post_thread(kernel_args *args)
3597 {
3598 	vm_page_init_post_thread(args);
3599 	vm_daemon_init();
3600 	vm_low_memory_init_post_thread();
3601 
3602 	return heap_init_post_thread(args);
3603 }
3604 
3605 
3606 status_t
3607 vm_init_post_modules(kernel_args *args)
3608 {
3609 	return arch_vm_init_post_modules(args);
3610 }
3611 
3612 
3613 void
3614 permit_page_faults(void)
3615 {
3616 	struct thread *thread = thread_get_current_thread();
3617 	if (thread != NULL)
3618 		atomic_add(&thread->page_faults_allowed, 1);
3619 }
3620 
3621 
3622 void
3623 forbid_page_faults(void)
3624 {
3625 	struct thread *thread = thread_get_current_thread();
3626 	if (thread != NULL)
3627 		atomic_add(&thread->page_faults_allowed, -1);
3628 }
3629 
3630 
3631 status_t
3632 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser,
3633 	addr_t *newIP)
3634 {
3635 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, faultAddress));
3636 
3637 	*newIP = 0;
3638 
3639 	status_t status = vm_soft_fault(address, isWrite, isUser);
3640 	if (status < B_OK) {
3641 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at 0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n",
3642 			strerror(status), address, faultAddress, isWrite, isUser,
3643 			thread_get_current_thread_id());
3644 		if (!isUser) {
3645 			struct thread *thread = thread_get_current_thread();
3646 			if (thread != NULL && thread->fault_handler != 0) {
3647 				// this will cause the arch dependant page fault handler to
3648 				// modify the IP on the interrupt frame or whatever to return
3649 				// to this address
3650 				*newIP = thread->fault_handler;
3651 			} else {
3652 				// unhandled page fault in the kernel
3653 				panic("vm_page_fault: unhandled page fault in kernel space at 0x%lx, ip 0x%lx\n",
3654 					address, faultAddress);
3655 			}
3656 		} else {
3657 #if 1
3658 			// ToDo: remove me once we have proper userland debugging support (and tools)
3659 			vm_address_space *addressSpace = vm_get_current_user_address_space();
3660 			vm_area *area;
3661 
3662 			acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0);
3663 			area = vm_area_lookup(addressSpace, faultAddress);
3664 
3665 			dprintf("vm_page_fault: sending team \"%s\" 0x%lx SIGSEGV, ip %#lx (\"%s\" +%#lx)\n",
3666 				thread_get_current_thread()->team->name,
3667 				thread_get_current_thread()->team->id, faultAddress,
3668 				area ? area->name : "???", faultAddress - (area ? area->base : 0x0));
3669 
3670 			// We can print a stack trace of the userland thread here.
3671 #if 1
3672 			if (area) {
3673 				struct stack_frame {
3674 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
3675 						struct stack_frame*	previous;
3676 						void*				return_address;
3677 					#else
3678 						// ...
3679 					#warning writeme
3680 					#endif
3681 				} frame;
3682 #ifdef __INTEL__
3683 				struct iframe *iframe = i386_get_user_iframe();
3684 				if (iframe == NULL)
3685 					panic("iframe is NULL!");
3686 
3687 				status_t status = user_memcpy(&frame, (void *)iframe->ebp,
3688 					sizeof(struct stack_frame));
3689 #elif defined(__POWERPC__)
3690 				struct iframe *iframe = ppc_get_user_iframe();
3691 				if (iframe == NULL)
3692 					panic("iframe is NULL!");
3693 
3694 				status_t status = user_memcpy(&frame, (void *)iframe->r1,
3695 					sizeof(struct stack_frame));
3696 #else
3697 #	warning "vm_page_fault() stack trace won't work"
3698 				status = B_ERROR;
3699 #endif
3700 
3701 				dprintf("stack trace:\n");
3702 				while (status == B_OK && frame.return_address != NULL) {
3703 					dprintf("  %p", frame.return_address);
3704 					area = vm_area_lookup(addressSpace,
3705 						(addr_t)frame.return_address);
3706 					if (area) {
3707 						dprintf(" (%s + %#lx)", area->name,
3708 							(addr_t)frame.return_address - area->base);
3709 					}
3710 					dprintf("\n");
3711 
3712 					status = user_memcpy(&frame, frame.previous,
3713 						sizeof(struct stack_frame));
3714 				}
3715 			}
3716 #endif	// 0 (stack trace)
3717 
3718 			release_sem_etc(addressSpace->sem, READ_COUNT, 0);
3719 			vm_put_address_space(addressSpace);
3720 #endif
3721 			if (user_debug_exception_occurred(B_SEGMENT_VIOLATION, SIGSEGV))
3722 				send_signal(team_get_current_team_id(), SIGSEGV);
3723 		}
3724 	}
3725 
3726 	return B_HANDLED_INTERRUPT;
3727 }
3728 
3729 
3730 static inline status_t
3731 fault_acquire_locked_source(vm_cache *cache, vm_cache **_source)
3732 {
3733 retry:
3734 	vm_cache *source = cache->source;
3735 	if (source == NULL)
3736 		return B_ERROR;
3737 	if (source->busy)
3738 		return B_BUSY;
3739 
3740 	vm_cache_acquire_ref(source);
3741 
3742 	mutex_lock(&source->lock);
3743 
3744 	if (source->busy) {
3745 		mutex_unlock(&source->lock);
3746 		vm_cache_release_ref(source);
3747 		goto retry;
3748 	}
3749 
3750 	*_source = source;
3751 	return B_OK;
3752 }
3753 
3754 
3755 /*!
3756 	Inserts a busy dummy page into a cache, and makes sure the cache won't go
3757 	away by grabbing a reference to it.
3758 */
3759 static inline void
3760 fault_insert_dummy_page(vm_cache *cache, vm_dummy_page &dummyPage,
3761 	off_t cacheOffset)
3762 {
3763 	dummyPage.state = PAGE_STATE_BUSY;
3764 	vm_cache_acquire_ref(cache);
3765 	vm_cache_insert_page(cache, &dummyPage, cacheOffset);
3766 	dummyPage.busy_condition.Publish(&dummyPage, "page");
3767 }
3768 
3769 
3770 /*!
3771 	Removes the busy dummy page from a cache, and releases its reference to
3772 	the cache.
3773 */
3774 static inline void
3775 fault_remove_dummy_page(vm_dummy_page &dummyPage, bool isLocked)
3776 {
3777 	vm_cache *cache = dummyPage.cache;
3778 	if (!isLocked)
3779 		mutex_lock(&cache->lock);
3780 
3781 	if (dummyPage.state == PAGE_STATE_BUSY) {
3782 		vm_cache_remove_page(cache, &dummyPage);
3783 		dummyPage.state = PAGE_STATE_INACTIVE;
3784 		dummyPage.busy_condition.Unpublish();
3785 	}
3786 
3787 	if (!isLocked)
3788 		mutex_unlock(&cache->lock);
3789 
3790 	vm_cache_release_ref(cache);
3791 }
3792 
3793 
3794 /*!
3795 	Finds a page at the specified \a cacheOffset in either the \a topCacheRef
3796 	or in its source chain. Will also page in a missing page in case there is
3797 	a cache that has the page.
3798 	If it couldn't find a page, it will return the vm_cache that should get it,
3799 	otherwise, it will return the vm_cache that contains the cache.
3800 	It always grabs a reference to the vm_cache that it returns, and also locks it.
3801 */
3802 static inline status_t
3803 fault_find_page(vm_translation_map *map, vm_cache *topCache,
3804 	off_t cacheOffset, bool isWrite, vm_dummy_page &dummyPage,
3805 	vm_cache **_pageCache, vm_page** _page, bool* _restart)
3806 {
3807 	*_restart = false;
3808 	vm_cache *cache = topCache;
3809 	vm_cache *lastCache = NULL;
3810 	vm_page *page = NULL;
3811 
3812 	vm_cache_acquire_ref(cache);
3813 	mutex_lock(&cache->lock);
3814 		// we release this later in the loop
3815 
3816 	while (cache != NULL) {
3817 		if (lastCache != NULL)
3818 			vm_cache_release_ref(lastCache);
3819 
3820 		// we hold the lock of the cache at this point
3821 
3822 		lastCache = cache;
3823 
3824 		for (;;) {
3825 			page = vm_cache_lookup_page(cache, cacheOffset);
3826 			if (page != NULL && page->state != PAGE_STATE_BUSY) {
3827 				// we found the page
3828 				break;
3829 			}
3830 			if (page == NULL || page == &dummyPage)
3831 				break;
3832 
3833 			// page must be busy -- wait for it to become unbusy
3834 			{
3835 				ConditionVariableEntry<vm_page> entry;
3836 				entry.Add(page);
3837 				mutex_unlock(&cache->lock);
3838 				entry.Wait();
3839 				mutex_lock(&cache->lock);
3840 			}
3841 
3842 			if (cache->busy) {
3843 				// The cache became busy, which means, it is about to be
3844 				// removed by vm_cache_remove_consumer(). We start again with
3845 				// the top cache.
3846 				ConditionVariableEntry<vm_cache> entry;
3847 				entry.Add(cache);
3848 				mutex_unlock(&cache->lock);
3849 				vm_cache_release_ref(cache);
3850 				entry.Wait();
3851 				*_restart = true;
3852 				return B_OK;
3853 			}
3854 		}
3855 
3856 		if (page != NULL && page != &dummyPage)
3857 			break;
3858 
3859 		// The current cache does not contain the page we're looking for
3860 
3861 		// see if the vm_store has it
3862 		vm_store *store = cache->store;
3863 		if (store->ops->has_page != NULL
3864 			&& store->ops->has_page(store, cacheOffset)) {
3865 			// insert a fresh page and mark it busy -- we're going to read it in
3866 			page = vm_page_allocate_page(PAGE_STATE_FREE, true);
3867 			vm_cache_insert_page(cache, page, cacheOffset);
3868 
3869 			ConditionVariable<vm_page> busyCondition;
3870 			busyCondition.Publish(page, "page");
3871 
3872 			mutex_unlock(&cache->lock);
3873 
3874 			// get a virtual address for the page
3875 			iovec vec;
3876 			map->ops->get_physical_page(
3877 				page->physical_page_number * B_PAGE_SIZE,
3878 				(addr_t *)&vec.iov_base, PHYSICAL_PAGE_CAN_WAIT);
3879 			size_t bytesRead = vec.iov_len = B_PAGE_SIZE;
3880 
3881 			// read it in
3882 			status_t status = store->ops->read(store, cacheOffset, &vec, 1,
3883 				&bytesRead, false);
3884 
3885 			map->ops->put_physical_page((addr_t)vec.iov_base);
3886 
3887 			mutex_lock(&cache->lock);
3888 
3889 			if (status < B_OK) {
3890 				// on error remove and free the page
3891 				dprintf("reading page from store %p (cache %p) returned: %s!\n",
3892 					store, cache, strerror(status));
3893 
3894 				busyCondition.Unpublish();
3895 				vm_cache_remove_page(cache, page);
3896 				vm_page_set_state(page, PAGE_STATE_FREE);
3897 
3898 				mutex_unlock(&cache->lock);
3899 				vm_cache_release_ref(cache);
3900 				return status;
3901 			}
3902 
3903 			// mark the page unbusy again
3904 			page->state = PAGE_STATE_ACTIVE;
3905 			busyCondition.Unpublish();
3906 			break;
3907 		}
3908 
3909 		// If we're at the top most cache, insert the dummy page here to keep
3910 		// other threads from faulting on the same address and chasing us up the
3911 		// cache chain
3912 		if (cache == topCache && dummyPage.state != PAGE_STATE_BUSY)
3913 			fault_insert_dummy_page(cache, dummyPage, cacheOffset);
3914 
3915 		vm_cache *nextCache;
3916 		status_t status = fault_acquire_locked_source(cache, &nextCache);
3917 		if (status == B_BUSY) {
3918 			// the source cache is currently in the process of being merged
3919 			// with his only consumer (cacheRef); since its pages are moved
3920 			// upwards, too, we try this cache again
3921 			mutex_unlock(&cache->lock);
3922 			thread_yield();
3923 			mutex_lock(&cache->lock);
3924 			if (cache->busy) {
3925 				// The cache became busy, which means, it is about to be
3926 				// removed by vm_cache_remove_consumer(). We start again with
3927 				// the top cache.
3928 				ConditionVariableEntry<vm_cache> entry;
3929 				entry.Add(cache);
3930 				mutex_unlock(&cache->lock);
3931 				vm_cache_release_ref(cache);
3932 				entry.Wait();
3933 				*_restart = true;
3934 				return B_OK;
3935 			}
3936 			lastCache = NULL;
3937 			continue;
3938 		} else if (status < B_OK)
3939 			nextCache = NULL;
3940 
3941 		mutex_unlock(&cache->lock);
3942 			// at this point, we still hold a ref to this cache (through lastCacheRef)
3943 
3944 		cache = nextCache;
3945 	}
3946 
3947 	if (page == NULL) {
3948 		// there was no adequate page, determine the cache for a clean one
3949 		if (cache == NULL) {
3950 			// We rolled off the end of the cache chain, so we need to decide which
3951 			// cache will get the new page we're about to create.
3952 			cache = isWrite ? topCache : lastCache;
3953 				// Read-only pages come in the deepest cache - only the
3954 				// top most cache may have direct write access.
3955 			vm_cache_acquire_ref(cache);
3956 			mutex_lock(&cache->lock);
3957 
3958 			if (cache->busy) {
3959 				// The cache became busy, which means, it is about to be
3960 				// removed by vm_cache_remove_consumer(). We start again with
3961 				// the top cache.
3962 				ConditionVariableEntry<vm_cache> entry;
3963 				entry.Add(cache);
3964 				mutex_unlock(&cache->lock);
3965 				vm_cache_release_ref(cache);
3966 				entry.Wait();
3967 				*_restart = true;
3968 			} else {
3969 				vm_page* newPage = vm_cache_lookup_page(cache, cacheOffset);
3970 				if (newPage && newPage != &dummyPage) {
3971 					// A new page turned up. It could be the one we're looking
3972 					// for, but it could as well be a dummy page from someone
3973 					// else or an otherwise busy page. We can't really handle
3974 					// that here. Hence we completely restart this functions.
3975 					mutex_unlock(&cache->lock);
3976 					vm_cache_release_ref(cache);
3977 					*_restart = true;
3978 				}
3979 			}
3980 		}
3981 
3982 		// release the reference of the last vm_cache we still have from the loop above
3983 		if (lastCache != NULL)
3984 			vm_cache_release_ref(lastCache);
3985 	} else {
3986 		// we still own a reference to the cache
3987 	}
3988 
3989 	*_pageCache = cache;
3990 	*_page = page;
3991 	return B_OK;
3992 }
3993 
3994 
3995 /*!
3996 	Returns the page that should be mapped into the area that got the fault.
3997 	It returns the owner of the page in \a sourceCache - it keeps a reference
3998 	to it, and has also locked it on exit.
3999 */
4000 static inline status_t
4001 fault_get_page(vm_translation_map *map, vm_cache *topCache, off_t cacheOffset,
4002 	bool isWrite, vm_dummy_page &dummyPage, vm_cache **_sourceCache,
4003 	vm_cache **_copiedSource, vm_page** _page)
4004 {
4005 	vm_cache *cache;
4006 	vm_page *page;
4007 	bool restart;
4008 	for (;;) {
4009 		status_t status = fault_find_page(map, topCache, cacheOffset, isWrite,
4010 			dummyPage, &cache, &page, &restart);
4011 		if (status != B_OK)
4012 			return status;
4013 
4014 		if (!restart)
4015 			break;
4016 
4017 		// Remove the dummy page, if it has been inserted.
4018 		mutex_lock(&topCache->lock);
4019 
4020 		if (dummyPage.state == PAGE_STATE_BUSY) {
4021 			ASSERT_PRINT(dummyPage.cache == topCache, "dummy page: %p\n",
4022 				&dummyPage);
4023 			fault_remove_dummy_page(dummyPage, true);
4024 		}
4025 
4026 		mutex_unlock(&topCache->lock);
4027 	}
4028 
4029 	if (page == NULL) {
4030 		// we still haven't found a page, so we allocate a clean one
4031 
4032 		page = vm_page_allocate_page(PAGE_STATE_CLEAR, true);
4033 		FTRACE(("vm_soft_fault: just allocated page 0x%lx\n", page->physical_page_number));
4034 
4035 		// Insert the new page into our cache, and replace it with the dummy page if necessary
4036 
4037 		// If we inserted a dummy page into this cache (i.e. if it is the top
4038 		// cache), we have to remove it now
4039 		if (dummyPage.state == PAGE_STATE_BUSY && dummyPage.cache == cache) {
4040 #ifdef DEBUG_PAGE_CACHE_TRANSITIONS
4041 			page->debug_flags = dummyPage.debug_flags | 0x8;
4042 			if (dummyPage.collided_page != NULL) {
4043 				dummyPage.collided_page->collided_page = page;
4044 				page->collided_page = dummyPage.collided_page;
4045 			}
4046 #endif	// DEBUG_PAGE_CACHE_TRANSITIONS
4047 
4048 			fault_remove_dummy_page(dummyPage, true);
4049 		}
4050 
4051 		vm_cache_insert_page(cache, page, cacheOffset);
4052 
4053 		if (dummyPage.state == PAGE_STATE_BUSY) {
4054 #ifdef DEBUG_PAGE_CACHE_TRANSITIONS
4055 			page->debug_flags = dummyPage.debug_flags | 0x10;
4056 			if (dummyPage.collided_page != NULL) {
4057 				dummyPage.collided_page->collided_page = page;
4058 				page->collided_page = dummyPage.collided_page;
4059 			}
4060 #endif	// DEBUG_PAGE_CACHE_TRANSITIONS
4061 
4062 			// This is not the top cache into which we inserted the dummy page,
4063 			// let's remove it from there. We need to temporarily unlock our
4064 			// cache to comply with the cache locking policy.
4065 			mutex_unlock(&cache->lock);
4066 			fault_remove_dummy_page(dummyPage, false);
4067 			mutex_lock(&cache->lock);
4068 		}
4069 	}
4070 
4071 	// We now have the page and a cache it belongs to - we now need to make
4072 	// sure that the area's cache can access it, too, and sees the correct data
4073 
4074 	if (page->cache != topCache && isWrite) {
4075 		// Now we have a page that has the data we want, but in the wrong cache
4076 		// object so we need to copy it and stick it into the top cache.
4077 		// Note that this and the "if" before are mutual exclusive. If
4078 		// fault_find_page() didn't find the page, it would return the top cache
4079 		// for write faults.
4080 		vm_page *sourcePage = page;
4081 		void *source, *dest;
4082 
4083 		// ToDo: if memory is low, it might be a good idea to steal the page
4084 		//	from our source cache - if possible, that is
4085 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4086 		page = vm_page_allocate_page(PAGE_STATE_FREE, true);
4087 #if 0
4088 if (cacheOffset == 0x12000)
4089 	dprintf("%ld: copy page %p to page %p from cache %p to cache %p\n", find_thread(NULL),
4090 		sourcePage, page, sourcePage->cache, topCacheRef->cache);
4091 #endif
4092 
4093 		// try to get a mapping for the src and dest page so we can copy it
4094 		for (;;) {
4095 			map->ops->get_physical_page(sourcePage->physical_page_number * B_PAGE_SIZE,
4096 				(addr_t *)&source, PHYSICAL_PAGE_CAN_WAIT);
4097 
4098 			if (map->ops->get_physical_page(page->physical_page_number * B_PAGE_SIZE,
4099 					(addr_t *)&dest, PHYSICAL_PAGE_NO_WAIT) == B_OK)
4100 				break;
4101 
4102 			// it couldn't map the second one, so sleep and retry
4103 			// keeps an extremely rare deadlock from occuring
4104 			map->ops->put_physical_page((addr_t)source);
4105 			snooze(5000);
4106 		}
4107 
4108 		memcpy(dest, source, B_PAGE_SIZE);
4109 		map->ops->put_physical_page((addr_t)source);
4110 		map->ops->put_physical_page((addr_t)dest);
4111 
4112 		if (sourcePage->state != PAGE_STATE_MODIFIED)
4113 			vm_page_set_state(sourcePage, PAGE_STATE_ACTIVE);
4114 
4115 		mutex_unlock(&cache->lock);
4116 		mutex_lock(&topCache->lock);
4117 
4118 		// Since the top cache has been unlocked for a while, someone else
4119 		// (vm_cache_remove_consumer()) might have replaced our dummy page.
4120 		vm_page* newPage = NULL;
4121 		for (;;) {
4122 			newPage = vm_cache_lookup_page(topCache, cacheOffset);
4123 			if (newPage == NULL || newPage == &dummyPage) {
4124 				newPage = NULL;
4125 				break;
4126 			}
4127 
4128 			if (newPage->state != PAGE_STATE_BUSY)
4129 				break;
4130 
4131 			// The page is busy, wait till it becomes unbusy.
4132 			ConditionVariableEntry<vm_page> entry;
4133 			entry.Add(newPage);
4134 			mutex_unlock(&topCache->lock);
4135 			entry.Wait();
4136 			mutex_lock(&topCache->lock);
4137 		}
4138 
4139 		if (newPage) {
4140 			// Indeed someone else threw in a page. We free ours and are happy.
4141 			vm_page_set_state(page, PAGE_STATE_FREE);
4142 			page = newPage;
4143 		} else {
4144 			// Insert the new page into our cache and remove the dummy page, if
4145 			// necessary.
4146 
4147 			// if we inserted a dummy page into this cache, we have to remove it now
4148 			if (dummyPage.state == PAGE_STATE_BUSY) {
4149 				ASSERT_PRINT(dummyPage.cache == topCache, "dummy page: %p\n",
4150 					&dummyPage);
4151 				fault_remove_dummy_page(dummyPage, true);
4152 			}
4153 
4154 			vm_cache_insert_page(topCache, page, cacheOffset);
4155 		}
4156 
4157 		*_copiedSource = cache;
4158 
4159 		cache = topCache;
4160 		vm_cache_acquire_ref(cache);
4161 	}
4162 
4163 	*_sourceCache = cache;
4164 	*_page = page;
4165 	return B_OK;
4166 }
4167 
4168 
4169 static status_t
4170 vm_soft_fault(addr_t originalAddress, bool isWrite, bool isUser)
4171 {
4172 	vm_address_space *addressSpace;
4173 
4174 	FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n",
4175 		thread_get_current_thread_id(), originalAddress, isWrite, isUser));
4176 
4177 	addr_t address = ROUNDOWN(originalAddress, B_PAGE_SIZE);
4178 
4179 	if (IS_KERNEL_ADDRESS(address)) {
4180 		addressSpace = vm_get_kernel_address_space();
4181 	} else if (IS_USER_ADDRESS(address)) {
4182 		addressSpace = vm_get_current_user_address_space();
4183 		if (addressSpace == NULL) {
4184 			if (!isUser) {
4185 				dprintf("vm_soft_fault: kernel thread accessing invalid user memory!\n");
4186 				return B_BAD_ADDRESS;
4187 			} else {
4188 				// XXX weird state.
4189 				panic("vm_soft_fault: non kernel thread accessing user memory that doesn't exist!\n");
4190 			}
4191 		}
4192 	} else {
4193 		// the hit was probably in the 64k DMZ between kernel and user space
4194 		// this keeps a user space thread from passing a buffer that crosses
4195 		// into kernel space
4196 		return B_BAD_ADDRESS;
4197 	}
4198 
4199 	AddressSpaceReadLocker locker(addressSpace);
4200 
4201 	atomic_add(&addressSpace->fault_count, 1);
4202 
4203 	// Get the area the fault was in
4204 
4205 	vm_area *area = vm_area_lookup(addressSpace, address);
4206 	if (area == NULL) {
4207 		dprintf("vm_soft_fault: va 0x%lx not covered by area in address space\n",
4208 			originalAddress);
4209 		return B_BAD_ADDRESS;
4210 	}
4211 
4212 	// check permissions
4213 	if (isUser && (area->protection & B_USER_PROTECTION) == 0) {
4214 		dprintf("user access on kernel area 0x%lx at %p\n", area->id, (void *)originalAddress);
4215 		return B_PERMISSION_DENIED;
4216 	}
4217 	if (isWrite && (area->protection & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4218 		dprintf("write access attempted on read-only area 0x%lx at %p\n",
4219 			area->id, (void *)originalAddress);
4220 		return B_PERMISSION_DENIED;
4221 	}
4222 
4223 	// We have the area, it was a valid access, so let's try to resolve the page fault now.
4224 	// At first, the top most cache from the area is investigated
4225 
4226 	vm_cache *topCache = vm_area_get_locked_cache(area);
4227 	off_t cacheOffset = address - area->base + area->cache_offset;
4228 	int32 changeCount = addressSpace->change_count;
4229 
4230 	atomic_add(&area->no_cache_change, 1);
4231 		// make sure the area's cache isn't replaced during the page fault
4232 
4233 	// See if this cache has a fault handler - this will do all the work for us
4234 	{
4235 		vm_store *store = topCache->store;
4236 		if (store->ops->fault != NULL) {
4237 			// Note, since the page fault is resolved with interrupts enabled, the
4238 			// fault handler could be called more than once for the same reason -
4239 			// the store must take this into account
4240 			status_t status = store->ops->fault(store, addressSpace, cacheOffset);
4241 			if (status != B_BAD_HANDLER) {
4242 				vm_area_put_locked_cache(topCache);
4243 				return status;
4244 			}
4245 		}
4246 	}
4247 
4248 	mutex_unlock(&topCache->lock);
4249 
4250 	// The top most cache has no fault handler, so let's see if the cache or its sources
4251 	// already have the page we're searching for (we're going from top to bottom)
4252 
4253 	vm_translation_map *map = &addressSpace->translation_map;
4254 	size_t reservePages = 2 + map->ops->map_max_pages_need(map,
4255 		originalAddress, originalAddress);
4256 	vm_page_reserve_pages(reservePages);
4257 		// we may need up to 2 pages - reserving them upfront makes sure
4258 		// we don't have any cache locked, so that the page daemon/thief
4259 		// can do their job without problems
4260 
4261 	vm_dummy_page dummyPage;
4262 	dummyPage.cache = NULL;
4263 	dummyPage.state = PAGE_STATE_INACTIVE;
4264 	dummyPage.type = PAGE_TYPE_DUMMY;
4265 	dummyPage.wired_count = 0;
4266 #ifdef DEBUG_PAGE_CACHE_TRANSITIONS
4267 	dummyPage.debug_flags = 0;
4268 	dummyPage.collided_page = NULL;
4269 #endif	// DEBUG_PAGE_CACHE_TRANSITIONS
4270 
4271 	vm_cache *copiedPageSource = NULL;
4272 	vm_cache *pageSource;
4273 	vm_page *page;
4274 	// TODO: We keep the address space read lock during the whole operation
4275 	// which might be rather expensive depending on where the data has to
4276 	// be retrieved from.
4277 	status_t status = fault_get_page(map, topCache, cacheOffset, isWrite,
4278 		dummyPage, &pageSource, &copiedPageSource, &page);
4279 
4280 	if (status == B_OK) {
4281 		// All went fine, all there is left to do is to map the page into the address space
4282 
4283 		// In case this is a copy-on-write page, we need to unmap it from the area now
4284 		if (isWrite && page->cache == topCache)
4285 			vm_unmap_pages(area, address, B_PAGE_SIZE, true);
4286 
4287 		// TODO: there is currently no mechanism to prevent a page being mapped
4288 		//	more than once in case of a second page fault!
4289 
4290 		// If the page doesn't reside in the area's cache, we need to make sure it's
4291 		// mapped in read-only, so that we cannot overwrite someone else's data (copy-on-write)
4292 		uint32 newProtection = area->protection;
4293 		if (page->cache != topCache && !isWrite)
4294 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4295 
4296 		vm_map_page(area, page, address, newProtection);
4297 
4298 		mutex_unlock(&pageSource->lock);
4299 		vm_cache_release_ref(pageSource);
4300 	}
4301 
4302 	atomic_add(&area->no_cache_change, -1);
4303 
4304 	if (copiedPageSource)
4305 		vm_cache_release_ref(copiedPageSource);
4306 
4307 	if (dummyPage.state == PAGE_STATE_BUSY) {
4308 		// We still have the dummy page in the cache - that happens if we didn't need
4309 		// to allocate a new page before, but could use one in another cache
4310 		fault_remove_dummy_page(dummyPage, false);
4311 	}
4312 
4313 	vm_cache_release_ref(topCache);
4314 	vm_page_unreserve_pages(reservePages);
4315 
4316 	return status;
4317 }
4318 
4319 
4320 /*! You must have the address space's sem held */
4321 vm_area *
4322 vm_area_lookup(vm_address_space *addressSpace, addr_t address)
4323 {
4324 	vm_area *area;
4325 
4326 	// check the areas list first
4327 	area = addressSpace->area_hint;
4328 	if (area && area->base <= address && area->base + (area->size - 1) >= address)
4329 		goto found;
4330 
4331 	for (area = addressSpace->areas; area != NULL; area = area->address_space_next) {
4332 		if (area->id == RESERVED_AREA_ID)
4333 			continue;
4334 
4335 		if (area->base <= address && area->base + (area->size - 1) >= address)
4336 			break;
4337 	}
4338 
4339 found:
4340 	if (area)
4341 		addressSpace->area_hint = area;
4342 
4343 	return area;
4344 }
4345 
4346 
4347 status_t
4348 vm_get_physical_page(addr_t paddr, addr_t *_vaddr, uint32 flags)
4349 {
4350 	return (*vm_kernel_address_space()->translation_map.ops->get_physical_page)(paddr, _vaddr, flags);
4351 }
4352 
4353 
4354 status_t
4355 vm_put_physical_page(addr_t vaddr)
4356 {
4357 	return (*vm_kernel_address_space()->translation_map.ops->put_physical_page)(vaddr);
4358 }
4359 
4360 
4361 void
4362 vm_unreserve_memory(size_t amount)
4363 {
4364 	benaphore_lock(&sAvailableMemoryLock);
4365 
4366 	sAvailableMemory += amount;
4367 
4368 	benaphore_unlock(&sAvailableMemoryLock);
4369 }
4370 
4371 
4372 status_t
4373 vm_try_reserve_memory(size_t amount)
4374 {
4375 	status_t status;
4376 	benaphore_lock(&sAvailableMemoryLock);
4377 
4378 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4379 
4380 	if (sAvailableMemory > amount) {
4381 		sAvailableMemory -= amount;
4382 		status = B_OK;
4383 	} else
4384 		status = B_NO_MEMORY;
4385 
4386 	benaphore_unlock(&sAvailableMemoryLock);
4387 	return status;
4388 }
4389 
4390 
4391 status_t
4392 vm_set_area_memory_type(area_id id, addr_t physicalBase, uint32 type)
4393 {
4394 	AddressSpaceReadLocker locker;
4395 	vm_area *area;
4396 	status_t status = locker.SetFromArea(id, area);
4397 	if (status != B_OK)
4398 		return status;
4399 
4400 	return arch_vm_set_memory_type(area, physicalBase, type);
4401 }
4402 
4403 
4404 /**	This function enforces some protection properties:
4405  *	 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well
4406  *	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4407  *	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4408  *	   and B_KERNEL_WRITE_AREA.
4409  */
4410 
4411 static void
4412 fix_protection(uint32 *protection)
4413 {
4414 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4415 		if ((*protection & B_USER_PROTECTION) == 0
4416 			|| (*protection & B_WRITE_AREA) != 0)
4417 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4418 		else
4419 			*protection |= B_KERNEL_READ_AREA;
4420 	}
4421 }
4422 
4423 
4424 static void
4425 fill_area_info(struct vm_area *area, area_info *info, size_t size)
4426 {
4427 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4428 	info->area = area->id;
4429 	info->address = (void *)area->base;
4430 	info->size = area->size;
4431 	info->protection = area->protection;
4432 	info->lock = B_FULL_LOCK;
4433 	info->team = area->address_space->id;
4434 	info->copy_count = 0;
4435 	info->in_count = 0;
4436 	info->out_count = 0;
4437 		// ToDo: retrieve real values here!
4438 
4439 	vm_cache *cache = vm_area_get_locked_cache(area);
4440 
4441 	// Note, this is a simplification; the cache could be larger than this area
4442 	info->ram_size = cache->page_count * B_PAGE_SIZE;
4443 
4444 	vm_area_put_locked_cache(cache);
4445 }
4446 
4447 
4448 /*!
4449 	Tests wether or not the area that contains the specified address
4450 	needs any kind of locking, and actually exists.
4451 	Used by both lock_memory() and unlock_memory().
4452 */
4453 static status_t
4454 test_lock_memory(vm_address_space *addressSpace, addr_t address,
4455 	bool &needsLocking)
4456 {
4457 	acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0);
4458 
4459 	vm_area *area = vm_area_lookup(addressSpace, address);
4460 	if (area != NULL) {
4461 		// This determines if we need to lock the memory at all
4462 		needsLocking = area->cache_type != CACHE_TYPE_NULL
4463 			&& area->cache_type != CACHE_TYPE_DEVICE
4464 			&& area->wiring != B_FULL_LOCK
4465 			&& area->wiring != B_CONTIGUOUS;
4466 	}
4467 
4468 	release_sem_etc(addressSpace->sem, READ_COUNT, 0);
4469 
4470 	if (area == NULL)
4471 		return B_BAD_ADDRESS;
4472 
4473 	return B_OK;
4474 }
4475 
4476 
4477 //	#pragma mark - kernel public API
4478 
4479 
4480 status_t
4481 user_memcpy(void *to, const void *from, size_t size)
4482 {
4483 	if (arch_cpu_user_memcpy(to, from, size, &thread_get_current_thread()->fault_handler) < B_OK)
4484 		return B_BAD_ADDRESS;
4485 	return B_OK;
4486 }
4487 
4488 
4489 /**	\brief Copies at most (\a size - 1) characters from the string in \a from to
4490  *	the string in \a to, NULL-terminating the result.
4491  *
4492  *	\param to Pointer to the destination C-string.
4493  *	\param from Pointer to the source C-string.
4494  *	\param size Size in bytes of the string buffer pointed to by \a to.
4495  *
4496  *	\return strlen(\a from).
4497  */
4498 
4499 ssize_t
4500 user_strlcpy(char *to, const char *from, size_t size)
4501 {
4502 	return arch_cpu_user_strlcpy(to, from, size, &thread_get_current_thread()->fault_handler);
4503 }
4504 
4505 
4506 status_t
4507 user_memset(void *s, char c, size_t count)
4508 {
4509 	if (arch_cpu_user_memset(s, c, count, &thread_get_current_thread()->fault_handler) < B_OK)
4510 		return B_BAD_ADDRESS;
4511 	return B_OK;
4512 }
4513 
4514 
4515 long
4516 lock_memory(void *address, ulong numBytes, ulong flags)
4517 {
4518 	vm_address_space *addressSpace = NULL;
4519 	struct vm_translation_map *map;
4520 	addr_t unalignedBase = (addr_t)address;
4521 	addr_t end = unalignedBase + numBytes;
4522 	addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE);
4523 	bool isUser = IS_USER_ADDRESS(address);
4524 	bool needsLocking = true;
4525 
4526 	if (isUser)
4527 		addressSpace = vm_get_current_user_address_space();
4528 	else
4529 		addressSpace = vm_get_kernel_address_space();
4530 	if (addressSpace == NULL)
4531 		return B_ERROR;
4532 
4533 	// test if we're on an area that allows faults at all
4534 
4535 	map = &addressSpace->translation_map;
4536 
4537 	status_t status = test_lock_memory(addressSpace, base, needsLocking);
4538 	if (status < B_OK)
4539 		goto out;
4540 	if (!needsLocking)
4541 		goto out;
4542 
4543 	for (; base < end; base += B_PAGE_SIZE) {
4544 		addr_t physicalAddress;
4545 		uint32 protection;
4546 		status_t status;
4547 
4548 		map->ops->lock(map);
4549 		status = map->ops->query(map, base, &physicalAddress, &protection);
4550 		map->ops->unlock(map);
4551 
4552 		if (status < B_OK)
4553 			goto out;
4554 
4555 		if ((protection & PAGE_PRESENT) != 0) {
4556 			// if B_READ_DEVICE is set, the caller intents to write to the locked
4557 			// memory, so if it hasn't been mapped writable, we'll try the soft
4558 			// fault anyway
4559 			if ((flags & B_READ_DEVICE) == 0
4560 				|| (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
4561 				// update wiring
4562 				vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
4563 				if (page == NULL)
4564 					panic("couldn't lookup physical page just allocated\n");
4565 
4566 				page->wired_count++;
4567 					// TODO: needs to be atomic on all platforms!
4568 				continue;
4569 			}
4570 		}
4571 
4572 		status = vm_soft_fault(base, (flags & B_READ_DEVICE) != 0, isUser);
4573 		if (status != B_OK)	{
4574 			dprintf("lock_memory(address = %p, numBytes = %lu, flags = %lu) failed: %s\n",
4575 				(void *)unalignedBase, numBytes, flags, strerror(status));
4576 			goto out;
4577 		}
4578 
4579 		map->ops->lock(map);
4580 		status = map->ops->query(map, base, &physicalAddress, &protection);
4581 		map->ops->unlock(map);
4582 
4583 		if (status < B_OK)
4584 			goto out;
4585 
4586 		// update wiring
4587 		vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
4588 		if (page == NULL)
4589 			panic("couldn't lookup physical page");
4590 
4591 		page->wired_count++;
4592 			// TODO: needs to be atomic on all platforms!
4593 	}
4594 
4595 out:
4596 	vm_put_address_space(addressSpace);
4597 	return status;
4598 }
4599 
4600 
4601 long
4602 unlock_memory(void *address, ulong numBytes, ulong flags)
4603 {
4604 	vm_address_space *addressSpace = NULL;
4605 	struct vm_translation_map *map;
4606 	addr_t unalignedBase = (addr_t)address;
4607 	addr_t end = unalignedBase + numBytes;
4608 	addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE);
4609 	bool needsLocking = true;
4610 
4611 	if (IS_USER_ADDRESS(address))
4612 		addressSpace = vm_get_current_user_address_space();
4613 	else
4614 		addressSpace = vm_get_kernel_address_space();
4615 	if (addressSpace == NULL)
4616 		return B_ERROR;
4617 
4618 	map = &addressSpace->translation_map;
4619 
4620 	status_t status = test_lock_memory(addressSpace, base, needsLocking);
4621 	if (status < B_OK)
4622 		goto out;
4623 	if (!needsLocking)
4624 		goto out;
4625 
4626 	for (; base < end; base += B_PAGE_SIZE) {
4627 		map->ops->lock(map);
4628 
4629 		addr_t physicalAddress;
4630 		uint32 protection;
4631 		status = map->ops->query(map, base, &physicalAddress,
4632 			&protection);
4633 
4634 		map->ops->unlock(map);
4635 
4636 		if (status < B_OK)
4637 			goto out;
4638 		if ((protection & PAGE_PRESENT) == 0)
4639 			panic("calling unlock_memory() on unmapped memory!");
4640 
4641 		// update wiring
4642 		vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
4643 		if (page == NULL)
4644 			panic("couldn't lookup physical page");
4645 
4646 		page->wired_count--;
4647 			// TODO: needs to be atomic on all platforms!
4648 	}
4649 
4650 out:
4651 	vm_put_address_space(addressSpace);
4652 	return status;
4653 }
4654 
4655 
4656 /** According to the BeBook, this function should always succeed.
4657  *	This is no longer the case.
4658  */
4659 
4660 long
4661 get_memory_map(const void *address, ulong numBytes, physical_entry *table,
4662 	long numEntries)
4663 {
4664 	vm_address_space *addressSpace;
4665 	addr_t virtualAddress = (addr_t)address;
4666 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
4667 	addr_t physicalAddress;
4668 	status_t status = B_OK;
4669 	int32 index = -1;
4670 	addr_t offset = 0;
4671 	bool interrupts = are_interrupts_enabled();
4672 
4673 	TRACE(("get_memory_map(%p, %lu bytes, %ld entries)\n", address, numBytes,
4674 		numEntries));
4675 
4676 	if (numEntries == 0 || numBytes == 0)
4677 		return B_BAD_VALUE;
4678 
4679 	// in which address space is the address to be found?
4680 	if (IS_USER_ADDRESS(virtualAddress))
4681 		addressSpace = thread_get_current_thread()->team->address_space;
4682 	else
4683 		addressSpace = vm_kernel_address_space();
4684 
4685 	if (addressSpace == NULL)
4686 		return B_ERROR;
4687 
4688 	vm_translation_map *map = &addressSpace->translation_map;
4689 
4690 	if (interrupts)
4691 		map->ops->lock(map);
4692 
4693 	while (offset < numBytes) {
4694 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
4695 		uint32 flags;
4696 
4697 		if (interrupts) {
4698 			status = map->ops->query(map, (addr_t)address + offset,
4699 				&physicalAddress, &flags);
4700 		} else {
4701 			status = map->ops->query_interrupt(map, (addr_t)address + offset,
4702 				&physicalAddress, &flags);
4703 		}
4704 		if (status < B_OK)
4705 			break;
4706 		if ((flags & PAGE_PRESENT) == 0) {
4707 			panic("get_memory_map() called on unmapped memory!");
4708 			return B_BAD_ADDRESS;
4709 		}
4710 
4711 		if (index < 0 && pageOffset > 0) {
4712 			physicalAddress += pageOffset;
4713 			if (bytes > B_PAGE_SIZE - pageOffset)
4714 				bytes = B_PAGE_SIZE - pageOffset;
4715 		}
4716 
4717 		// need to switch to the next physical_entry?
4718 		if (index < 0 || (addr_t)table[index].address
4719 				!= physicalAddress - table[index].size) {
4720 			if (++index + 1 > numEntries) {
4721 				// table to small
4722 				status = B_BUFFER_OVERFLOW;
4723 				break;
4724 			}
4725 			table[index].address = (void *)physicalAddress;
4726 			table[index].size = bytes;
4727 		} else {
4728 			// page does fit in current entry
4729 			table[index].size += bytes;
4730 		}
4731 
4732 		offset += bytes;
4733 	}
4734 
4735 	if (interrupts)
4736 		map->ops->unlock(map);
4737 
4738 	// close the entry list
4739 
4740 	if (status == B_OK) {
4741 		// if it's only one entry, we will silently accept the missing ending
4742 		if (numEntries == 1)
4743 			return B_OK;
4744 
4745 		if (++index + 1 > numEntries)
4746 			return B_BUFFER_OVERFLOW;
4747 
4748 		table[index].address = NULL;
4749 		table[index].size = 0;
4750 	}
4751 
4752 	return status;
4753 }
4754 
4755 
4756 area_id
4757 area_for(void *address)
4758 {
4759 	team_id space;
4760 
4761 	if (IS_USER_ADDRESS(address)) {
4762 		// we try the user team address space, if any
4763 		space = vm_current_user_address_space_id();
4764 		if (space < B_OK)
4765 			return space;
4766 	} else
4767 		space = vm_kernel_address_space_id();
4768 
4769 	return vm_area_for(space, (addr_t)address);
4770 }
4771 
4772 
4773 area_id
4774 find_area(const char *name)
4775 {
4776 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
4777 	struct hash_iterator iterator;
4778 	hash_open(sAreaHash, &iterator);
4779 
4780 	vm_area *area;
4781 	area_id id = B_NAME_NOT_FOUND;
4782 	while ((area = (vm_area *)hash_next(sAreaHash, &iterator)) != NULL) {
4783 		if (area->id == RESERVED_AREA_ID)
4784 			continue;
4785 
4786 		if (!strcmp(area->name, name)) {
4787 			id = area->id;
4788 			break;
4789 		}
4790 	}
4791 
4792 	hash_close(sAreaHash, &iterator, false);
4793 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
4794 
4795 	return id;
4796 }
4797 
4798 
4799 status_t
4800 _get_area_info(area_id id, area_info *info, size_t size)
4801 {
4802 	if (size != sizeof(area_info) || info == NULL)
4803 		return B_BAD_VALUE;
4804 
4805 	AddressSpaceReadLocker locker;
4806 	vm_area *area;
4807 	status_t status = locker.SetFromArea(id, area);
4808 	if (status != B_OK)
4809 		return status;
4810 
4811 	fill_area_info(area, info, size);
4812 	return B_OK;
4813 }
4814 
4815 
4816 status_t
4817 _get_next_area_info(team_id team, int32 *cookie, area_info *info, size_t size)
4818 {
4819 	addr_t nextBase = *(addr_t *)cookie;
4820 
4821 	// we're already through the list
4822 	if (nextBase == (addr_t)-1)
4823 		return B_ENTRY_NOT_FOUND;
4824 
4825 	if (team == B_CURRENT_TEAM)
4826 		team = team_get_current_team_id();
4827 
4828 	AddressSpaceReadLocker locker(team);
4829 	if (!locker.IsLocked())
4830 		return B_BAD_TEAM_ID;
4831 
4832 	vm_area *area;
4833 	for (area = locker.AddressSpace()->areas; area != NULL;
4834 			area = area->address_space_next) {
4835 		if (area->id == RESERVED_AREA_ID)
4836 			continue;
4837 
4838 		if (area->base > nextBase)
4839 			break;
4840 	}
4841 
4842 	if (area == NULL) {
4843 		nextBase = (addr_t)-1;
4844 		return B_ENTRY_NOT_FOUND;
4845 	}
4846 
4847 	fill_area_info(area, info, size);
4848 	*cookie = (int32)(area->base);
4849 
4850 	return B_OK;
4851 }
4852 
4853 
4854 status_t
4855 set_area_protection(area_id area, uint32 newProtection)
4856 {
4857 	fix_protection(&newProtection);
4858 
4859 	return vm_set_area_protection(vm_kernel_address_space_id(), area,
4860 		newProtection);
4861 }
4862 
4863 
4864 status_t
4865 resize_area(area_id areaID, size_t newSize)
4866 {
4867 	// is newSize a multiple of B_PAGE_SIZE?
4868 	if (newSize & (B_PAGE_SIZE - 1))
4869 		return B_BAD_VALUE;
4870 
4871 	// lock all affected address spaces and the cache
4872 	vm_area* area;
4873 	vm_cache* cache;
4874 
4875 	MultiAddressSpaceLocker locker;
4876 	status_t status = locker.AddAreaCacheAndLock(areaID, true, true, area,
4877 		&cache);
4878 	if (status != B_OK)
4879 		return status;
4880 	AreaCacheLocker cacheLocker(cache);	// already locked
4881 
4882 	size_t oldSize = area->size;
4883 	if (newSize == oldSize)
4884 		return B_OK;
4885 
4886 	// Resize all areas of this area's cache
4887 
4888 	if (cache->type != CACHE_TYPE_RAM)
4889 		return B_NOT_ALLOWED;
4890 
4891 	if (oldSize < newSize) {
4892 		// We need to check if all areas of this cache can be resized
4893 
4894 		for (vm_area* current = cache->areas; current != NULL;
4895 				current = current->cache_next) {
4896 			if (current->address_space_next
4897 				&& current->address_space_next->base <= (current->base
4898 					+ newSize)) {
4899 				// If the area was created inside a reserved area, it can
4900 				// also be resized in that area
4901 				// ToDo: if there is free space after the reserved area, it could be used as well...
4902 				vm_area *next = current->address_space_next;
4903 				if (next->id == RESERVED_AREA_ID
4904 					&& next->cache_offset <= current->base
4905 					&& next->base - 1 + next->size >= current->base - 1 + newSize)
4906 					continue;
4907 
4908 				return B_ERROR;
4909 			}
4910 		}
4911 	}
4912 
4913 	// Okay, looks good so far, so let's do it
4914 
4915 	for (vm_area* current = cache->areas; current != NULL;
4916 			current = current->cache_next) {
4917 		if (current->address_space_next
4918 			&& current->address_space_next->base <= (current->base + newSize)) {
4919 			vm_area *next = current->address_space_next;
4920 			if (next->id == RESERVED_AREA_ID
4921 				&& next->cache_offset <= current->base
4922 				&& next->base - 1 + next->size >= current->base - 1 + newSize) {
4923 				// resize reserved area
4924 				addr_t offset = current->base + newSize - next->base;
4925 				if (next->size <= offset) {
4926 					current->address_space_next = next->address_space_next;
4927 					free(next);
4928 				} else {
4929 					next->size -= offset;
4930 					next->base += offset;
4931 				}
4932 			} else {
4933 				status = B_ERROR;
4934 				break;
4935 			}
4936 		}
4937 
4938 		current->size = newSize;
4939 
4940 		// we also need to unmap all pages beyond the new size, if the area has shrinked
4941 		if (newSize < oldSize) {
4942 			vm_unmap_pages(current, current->base + newSize, oldSize - newSize,
4943 				false);
4944 		}
4945 	}
4946 
4947 	if (status == B_OK)
4948 		status = vm_cache_resize(cache, newSize);
4949 
4950 	if (status < B_OK) {
4951 		// This shouldn't really be possible, but hey, who knows
4952 		for (vm_area* current = cache->areas; current != NULL;
4953 				current = current->cache_next) {
4954 			current->size = oldSize;
4955 		}
4956 	}
4957 
4958 	// ToDo: we must honour the lock restrictions of this area
4959 	return status;
4960 }
4961 
4962 
4963 /**	Transfers the specified area to a new team. The caller must be the owner
4964  *	of the area (not yet enforced but probably should be).
4965  *	This function is currently not exported to the kernel namespace, but is
4966  *	only accessible using the _kern_transfer_area() syscall.
4967  */
4968 
4969 static status_t
4970 transfer_area(area_id id, void **_address, uint32 addressSpec, team_id target)
4971 {
4972 	// TODO: implement like clone_area(), just atomically (ie. hand out a new area ID)!
4973 	return B_ERROR;
4974 #if 0
4975 	vm_address_space *sourceAddressSpace;
4976 	vm_address_space *targetAddressSpace;
4977 	void *reservedAddress = NULL;
4978 	vm_area *reserved;
4979 	vm_area *area = vm_get_area(id);
4980 	if (area == NULL)
4981 		return B_BAD_VALUE;
4982 
4983 	// ToDo: check if the current team owns the area
4984 	status_t status = team_get_address_space(target, &targetAddressSpace);
4985 	if (status != B_OK)
4986 		goto err1;
4987 
4988 	// We will first remove the area, and then reserve its former
4989 	// address range so that we can later reclaim it if the
4990 	// transfer failed.
4991 
4992 	sourceAddressSpace = area->address_space;
4993 	reserved = create_reserved_area_struct(sourceAddressSpace, 0);
4994 	if (reserved == NULL) {
4995 		status = B_NO_MEMORY;
4996 		goto err2;
4997 	}
4998 
4999 	acquire_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0, 0);
5000 
5001 	// unmap the area in the source address space
5002 	vm_unmap_pages(area, area->base, area->size);
5003 
5004 	// TODO: there might be additional page faults at this point!
5005 
5006 	reservedAddress = (void *)area->base;
5007 	remove_area_from_address_space(sourceAddressSpace, area);
5008 	status = insert_area(sourceAddressSpace, &reservedAddress, B_EXACT_ADDRESS,
5009 		area->size, reserved);
5010 		// famous last words: this cannot fail :)
5011 
5012 	release_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0);
5013 
5014 	if (status != B_OK)
5015 		goto err3;
5016 
5017 	// insert the area into the target address space
5018 
5019 	acquire_sem_etc(targetAddressSpace->sem, WRITE_COUNT, 0, 0);
5020 	// check to see if this address space has entered DELETE state
5021 	if (targetAddressSpace->state == VM_ASPACE_STATE_DELETION) {
5022 		// okay, someone is trying to delete this adress space now, so we can't
5023 		// insert the area, so back out
5024 		status = B_BAD_TEAM_ID;
5025 		goto err4;
5026 	}
5027 
5028 	status = insert_area(targetAddressSpace, _address, addressSpec, area->size, area);
5029 	if (status < B_OK)
5030 		goto err4;
5031 
5032 	// The area was successfully transferred to the new team when we got here
5033 	area->address_space = targetAddressSpace;
5034 
5035 	// TODO: take area lock/wiring into account!
5036 
5037 	release_sem_etc(targetAddressSpace->sem, WRITE_COUNT, 0);
5038 
5039 	vm_unreserve_address_range(sourceAddressSpace->id, reservedAddress,
5040 		area->size);
5041 	vm_put_address_space(sourceAddressSpace);
5042 		// we keep the reference of the target address space for the
5043 		// area, so we only have to put the one from the source
5044 	vm_put_area(area);
5045 
5046 	return B_OK;
5047 
5048 err4:
5049 	release_sem_etc(targetAddressSpace->sem, WRITE_COUNT, 0);
5050 err3:
5051 	// insert the area again into the source address space
5052 	acquire_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0, 0);
5053 	// check to see if this address space has entered DELETE state
5054 	if (sourceAddressSpace->state == VM_ASPACE_STATE_DELETION
5055 		|| insert_area(sourceAddressSpace, &reservedAddress, B_EXACT_ADDRESS,
5056 				area->size, area) != B_OK) {
5057 		// We can't insert the area anymore - we have to delete it manually
5058 		vm_cache *cache = vm_area_get_locked_cache(area);
5059 		atomic_add(&area->no_cache_change, 1);
5060 		vm_area_put_locked_cache(cache);
5061 
5062 		vm_cache_remove_area(cache, area);
5063 		vm_cache_release_ref(cache);
5064 		free(area->name);
5065 		free(area);
5066 		area = NULL;
5067 	}
5068 	release_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0);
5069 err2:
5070 	vm_put_address_space(targetAddressSpace);
5071 err1:
5072 	if (area != NULL)
5073 		vm_put_area(area);
5074 	return status;
5075 #endif
5076 }
5077 
5078 
5079 area_id
5080 map_physical_memory(const char *name, void *physicalAddress, size_t numBytes,
5081 	uint32 addressSpec, uint32 protection, void **_virtualAddress)
5082 {
5083 	if (!arch_vm_supports_protection(protection))
5084 		return B_NOT_SUPPORTED;
5085 
5086 	fix_protection(&protection);
5087 
5088 	return vm_map_physical_memory(vm_kernel_address_space_id(), name, _virtualAddress,
5089 		addressSpec, numBytes, protection, (addr_t)physicalAddress);
5090 }
5091 
5092 
5093 area_id
5094 clone_area(const char *name, void **_address, uint32 addressSpec,
5095 	uint32 protection, area_id source)
5096 {
5097 	if ((protection & B_KERNEL_PROTECTION) == 0)
5098 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5099 
5100 	return vm_clone_area(vm_kernel_address_space_id(), name, _address,
5101 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source);
5102 }
5103 
5104 
5105 area_id
5106 create_area_etc(struct team *team, const char *name, void **address, uint32 addressSpec,
5107 	uint32 size, uint32 lock, uint32 protection)
5108 {
5109 	fix_protection(&protection);
5110 
5111 	return vm_create_anonymous_area(team->id, (char *)name, address,
5112 		addressSpec, size, lock, protection);
5113 }
5114 
5115 
5116 area_id
5117 create_area(const char *name, void **_address, uint32 addressSpec, size_t size, uint32 lock,
5118 	uint32 protection)
5119 {
5120 	fix_protection(&protection);
5121 
5122 	return vm_create_anonymous_area(vm_kernel_address_space_id(), (char *)name, _address,
5123 		addressSpec, size, lock, protection);
5124 }
5125 
5126 
5127 status_t
5128 delete_area_etc(struct team *team, area_id area)
5129 {
5130 	return vm_delete_area(team->id, area);
5131 }
5132 
5133 
5134 status_t
5135 delete_area(area_id area)
5136 {
5137 	return vm_delete_area(vm_kernel_address_space_id(), area);
5138 }
5139 
5140 
5141 //	#pragma mark - Userland syscalls
5142 
5143 
5144 status_t
5145 _user_reserve_heap_address_range(addr_t* userAddress, uint32 addressSpec, addr_t size)
5146 {
5147 	// filter out some unavailable values (for userland)
5148 	switch (addressSpec) {
5149 		case B_ANY_KERNEL_ADDRESS:
5150 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5151 			return B_BAD_VALUE;
5152 	}
5153 
5154 	addr_t address;
5155 
5156 	if (!IS_USER_ADDRESS(userAddress)
5157 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5158 		return B_BAD_ADDRESS;
5159 
5160 	status_t status = vm_reserve_address_range(vm_current_user_address_space_id(),
5161 		(void **)&address, addressSpec, size, RESERVED_AVOID_BASE);
5162 	if (status < B_OK)
5163 		return status;
5164 
5165 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
5166 		vm_unreserve_address_range(vm_current_user_address_space_id(),
5167 			(void *)address, size);
5168 		return B_BAD_ADDRESS;
5169 	}
5170 
5171 	return B_OK;
5172 }
5173 
5174 
5175 area_id
5176 _user_area_for(void *address)
5177 {
5178 	return vm_area_for(vm_current_user_address_space_id(), (addr_t)address);
5179 }
5180 
5181 
5182 area_id
5183 _user_find_area(const char *userName)
5184 {
5185 	char name[B_OS_NAME_LENGTH];
5186 
5187 	if (!IS_USER_ADDRESS(userName)
5188 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
5189 		return B_BAD_ADDRESS;
5190 
5191 	return find_area(name);
5192 }
5193 
5194 
5195 status_t
5196 _user_get_area_info(area_id area, area_info *userInfo)
5197 {
5198 	if (!IS_USER_ADDRESS(userInfo))
5199 		return B_BAD_ADDRESS;
5200 
5201 	area_info info;
5202 	status_t status = get_area_info(area, &info);
5203 	if (status < B_OK)
5204 		return status;
5205 
5206 	// TODO: do we want to prevent userland from seeing kernel protections?
5207 	//info.protection &= B_USER_PROTECTION;
5208 
5209 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5210 		return B_BAD_ADDRESS;
5211 
5212 	return status;
5213 }
5214 
5215 
5216 status_t
5217 _user_get_next_area_info(team_id team, int32 *userCookie, area_info *userInfo)
5218 {
5219 	int32 cookie;
5220 
5221 	if (!IS_USER_ADDRESS(userCookie)
5222 		|| !IS_USER_ADDRESS(userInfo)
5223 		|| user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK)
5224 		return B_BAD_ADDRESS;
5225 
5226 	area_info info;
5227 	status_t status = _get_next_area_info(team, &cookie, &info, sizeof(area_info));
5228 	if (status != B_OK)
5229 		return status;
5230 
5231 	//info.protection &= B_USER_PROTECTION;
5232 
5233 	if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK
5234 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5235 		return B_BAD_ADDRESS;
5236 
5237 	return status;
5238 }
5239 
5240 
5241 status_t
5242 _user_set_area_protection(area_id area, uint32 newProtection)
5243 {
5244 	if ((newProtection & ~B_USER_PROTECTION) != 0)
5245 		return B_BAD_VALUE;
5246 
5247 	fix_protection(&newProtection);
5248 
5249 	return vm_set_area_protection(vm_current_user_address_space_id(), area,
5250 		newProtection);
5251 }
5252 
5253 
5254 status_t
5255 _user_resize_area(area_id area, size_t newSize)
5256 {
5257 	// ToDo: Since we restrict deleting of areas to those owned by the team,
5258 	// we should also do that for resizing (check other functions, too).
5259 	return resize_area(area, newSize);
5260 }
5261 
5262 
5263 status_t
5264 _user_transfer_area(area_id area, void **userAddress, uint32 addressSpec, team_id target)
5265 {
5266 	// filter out some unavailable values (for userland)
5267 	switch (addressSpec) {
5268 		case B_ANY_KERNEL_ADDRESS:
5269 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5270 			return B_BAD_VALUE;
5271 	}
5272 
5273 	void *address;
5274 	if (!IS_USER_ADDRESS(userAddress)
5275 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5276 		return B_BAD_ADDRESS;
5277 
5278 	status_t status = transfer_area(area, &address, addressSpec, target);
5279 	if (status < B_OK)
5280 		return status;
5281 
5282 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
5283 		return B_BAD_ADDRESS;
5284 
5285 	return status;
5286 }
5287 
5288 
5289 area_id
5290 _user_clone_area(const char *userName, void **userAddress, uint32 addressSpec,
5291 	uint32 protection, area_id sourceArea)
5292 {
5293 	char name[B_OS_NAME_LENGTH];
5294 	void *address;
5295 
5296 	// filter out some unavailable values (for userland)
5297 	switch (addressSpec) {
5298 		case B_ANY_KERNEL_ADDRESS:
5299 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5300 			return B_BAD_VALUE;
5301 	}
5302 	if ((protection & ~B_USER_PROTECTION) != 0)
5303 		return B_BAD_VALUE;
5304 
5305 	if (!IS_USER_ADDRESS(userName)
5306 		|| !IS_USER_ADDRESS(userAddress)
5307 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
5308 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5309 		return B_BAD_ADDRESS;
5310 
5311 	fix_protection(&protection);
5312 
5313 	area_id clonedArea = vm_clone_area(vm_current_user_address_space_id(), name, &address,
5314 		addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea);
5315 	if (clonedArea < B_OK)
5316 		return clonedArea;
5317 
5318 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
5319 		delete_area(clonedArea);
5320 		return B_BAD_ADDRESS;
5321 	}
5322 
5323 	return clonedArea;
5324 }
5325 
5326 
5327 area_id
5328 _user_create_area(const char *userName, void **userAddress, uint32 addressSpec,
5329 	size_t size, uint32 lock, uint32 protection)
5330 {
5331 	char name[B_OS_NAME_LENGTH];
5332 	void *address;
5333 
5334 	// filter out some unavailable values (for userland)
5335 	switch (addressSpec) {
5336 		case B_ANY_KERNEL_ADDRESS:
5337 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5338 			return B_BAD_VALUE;
5339 	}
5340 	if ((protection & ~B_USER_PROTECTION) != 0)
5341 		return B_BAD_VALUE;
5342 
5343 	if (!IS_USER_ADDRESS(userName)
5344 		|| !IS_USER_ADDRESS(userAddress)
5345 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
5346 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5347 		return B_BAD_ADDRESS;
5348 
5349 	if (addressSpec == B_EXACT_ADDRESS
5350 		&& IS_KERNEL_ADDRESS(address))
5351 		return B_BAD_VALUE;
5352 
5353 	fix_protection(&protection);
5354 
5355 	area_id area = vm_create_anonymous_area(vm_current_user_address_space_id(),
5356 		(char *)name, &address, addressSpec, size, lock, protection);
5357 
5358 	if (area >= B_OK && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
5359 		delete_area(area);
5360 		return B_BAD_ADDRESS;
5361 	}
5362 
5363 	return area;
5364 }
5365 
5366 
5367 status_t
5368 _user_delete_area(area_id area)
5369 {
5370 	// Unlike the BeOS implementation, you can now only delete areas
5371 	// that you have created yourself from userland.
5372 	// The documentation to delete_area() explicetly states that this
5373 	// will be restricted in the future, and so it will.
5374 	return vm_delete_area(vm_current_user_address_space_id(), area);
5375 }
5376 
5377 
5378 // ToDo: create a BeOS style call for this!
5379 
5380 area_id
5381 _user_vm_map_file(const char *userName, void **userAddress, int addressSpec,
5382 	addr_t size, int protection, int mapping, const char *userPath, off_t offset)
5383 {
5384 	char name[B_OS_NAME_LENGTH];
5385 	char path[B_PATH_NAME_LENGTH];
5386 	void *address;
5387 	area_id area;
5388 
5389 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
5390 		|| !IS_USER_ADDRESS(userPath)
5391 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
5392 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
5393 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5394 		return B_BAD_ADDRESS;
5395 
5396 	// userland created areas can always be accessed by the kernel
5397 	protection |= B_KERNEL_READ_AREA | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
5398 
5399 	area = _vm_map_file(vm_current_user_address_space_id(), name, &address,
5400 		addressSpec, size, protection, mapping, path, offset, false);
5401 	if (area < B_OK)
5402 		return area;
5403 
5404 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
5405 		return B_BAD_ADDRESS;
5406 
5407 	return area;
5408 }
5409 
5410