xref: /haiku/src/system/kernel/vm/vm.cpp (revision 4f2fd49bdc6078128b1391191e4edac647044c3d)
1 /*
2  * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 
10 #include <vm.h>
11 
12 #include <ctype.h>
13 #include <stdlib.h>
14 #include <stdio.h>
15 #include <string.h>
16 #include <sys/mman.h>
17 
18 #include <OS.h>
19 #include <KernelExport.h>
20 
21 #include <AutoDeleter.h>
22 
23 #include <arch/cpu.h>
24 #include <arch/vm.h>
25 #include <boot/elf.h>
26 #include <boot/stage2.h>
27 #include <condition_variable.h>
28 #include <console.h>
29 #include <debug.h>
30 #include <file_cache.h>
31 #include <fs/fd.h>
32 #include <heap.h>
33 #include <int.h>
34 #include <lock.h>
35 #include <low_resource_manager.h>
36 #include <smp.h>
37 #include <system_info.h>
38 #include <thread.h>
39 #include <team.h>
40 #include <tracing.h>
41 #include <util/AutoLock.h>
42 #include <util/khash.h>
43 #include <vm_address_space.h>
44 #include <vm_cache.h>
45 #include <vm_page.h>
46 #include <vm_priv.h>
47 
48 #include "VMAnonymousCache.h"
49 #include "io_requests.h"
50 
51 
52 //#define TRACE_VM
53 //#define TRACE_FAULTS
54 #ifdef TRACE_VM
55 #	define TRACE(x) dprintf x
56 #else
57 #	define TRACE(x) ;
58 #endif
59 #ifdef TRACE_FAULTS
60 #	define FTRACE(x) dprintf x
61 #else
62 #	define FTRACE(x) ;
63 #endif
64 
65 #define ROUNDUP(a, b) (((a) + ((b)-1)) & ~((b)-1))
66 #define ROUNDOWN(a, b) (((a) / (b)) * (b))
67 
68 
69 class AddressSpaceReadLocker {
70 public:
71 	AddressSpaceReadLocker(team_id team);
72 	AddressSpaceReadLocker(vm_address_space* space, bool getNewReference);
73 	AddressSpaceReadLocker();
74 	~AddressSpaceReadLocker();
75 
76 	status_t SetTo(team_id team);
77 	void SetTo(vm_address_space* space, bool getNewReference);
78 	status_t SetFromArea(area_id areaID, vm_area*& area);
79 
80 	bool IsLocked() const { return fLocked; }
81 	void Unlock();
82 
83 	void Unset();
84 
85 	vm_address_space* AddressSpace() { return fSpace; }
86 
87 private:
88 	vm_address_space* fSpace;
89 	bool	fLocked;
90 };
91 
92 class AddressSpaceWriteLocker {
93 public:
94 	AddressSpaceWriteLocker(team_id team);
95 	AddressSpaceWriteLocker();
96 	~AddressSpaceWriteLocker();
97 
98 	status_t SetTo(team_id team);
99 	status_t SetFromArea(area_id areaID, vm_area*& area);
100 	status_t SetFromArea(team_id team, area_id areaID, bool allowKernel,
101 		vm_area*& area);
102 	status_t SetFromArea(team_id team, area_id areaID, vm_area*& area);
103 
104 	bool IsLocked() const { return fLocked; }
105 	void Unlock();
106 
107 	void DegradeToReadLock();
108 	void Unset();
109 
110 	vm_address_space* AddressSpace() { return fSpace; }
111 
112 private:
113 	vm_address_space* fSpace;
114 	bool	fLocked;
115 	bool	fDegraded;
116 };
117 
118 class MultiAddressSpaceLocker {
119 public:
120 	MultiAddressSpaceLocker();
121 	~MultiAddressSpaceLocker();
122 
123 	inline status_t AddTeam(team_id team, bool writeLock,
124 		vm_address_space** _space = NULL);
125 	inline status_t AddArea(area_id area, bool writeLock,
126 		vm_address_space** _space = NULL);
127 
128 	status_t AddAreaCacheAndLock(area_id areaID, bool writeLockThisOne,
129 		bool writeLockOthers, vm_area*& _area, vm_cache** _cache = NULL,
130 		bool checkNoCacheChange = false);
131 
132 	status_t Lock();
133 	void Unlock();
134 	bool IsLocked() const { return fLocked; }
135 
136 	void Unset();
137 
138 private:
139 	struct lock_item {
140 		vm_address_space*	space;
141 		bool				write_lock;
142 	};
143 
144 	bool _ResizeIfNeeded();
145 	int32 _IndexOfAddressSpace(vm_address_space* space) const;
146 	status_t _AddAddressSpace(vm_address_space* space, bool writeLock,
147 		vm_address_space** _space);
148 
149 	static int _CompareItems(const void* _a, const void* _b);
150 
151 	lock_item*	fItems;
152 	int32		fCapacity;
153 	int32		fCount;
154 	bool		fLocked;
155 };
156 
157 
158 class AreaCacheLocking {
159 public:
160 	inline bool Lock(vm_cache* lockable)
161 	{
162 		return false;
163 	}
164 
165 	inline void Unlock(vm_cache* lockable)
166 	{
167 		vm_area_put_locked_cache(lockable);
168 	}
169 };
170 
171 class AreaCacheLocker : public AutoLocker<vm_cache, AreaCacheLocking> {
172 public:
173 	inline AreaCacheLocker(vm_cache* cache = NULL)
174 		: AutoLocker<vm_cache, AreaCacheLocking>(cache, true)
175 	{
176 	}
177 
178 	inline AreaCacheLocker(vm_area* area)
179 		: AutoLocker<vm_cache, AreaCacheLocking>()
180 	{
181 		SetTo(area);
182 	}
183 
184 	inline void SetTo(vm_area* area)
185 	{
186 		return AutoLocker<vm_cache, AreaCacheLocking>::SetTo(
187 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
188 	}
189 };
190 
191 
192 #define AREA_HASH_TABLE_SIZE 1024
193 static area_id sNextAreaID = 1;
194 static hash_table *sAreaHash;
195 static rw_lock sAreaHashLock = RW_LOCK_INITIALIZER("area hash");
196 static mutex sMappingLock = MUTEX_INITIALIZER("page mappings");
197 static mutex sAreaCacheLock = MUTEX_INITIALIZER("area->cache");
198 
199 static off_t sAvailableMemory;
200 static off_t sNeededMemory;
201 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
202 static uint32 sPageFaults;
203 
204 #if DEBUG_CACHE_LIST
205 
206 struct cache_info {
207 	vm_cache*	cache;
208 	addr_t		page_count;
209 	addr_t		committed;
210 };
211 
212 static const int kCacheInfoTableCount = 100 * 1024;
213 static cache_info* sCacheInfoTable;
214 
215 #endif	// DEBUG_CACHE_LIST
216 
217 
218 // function declarations
219 static void delete_area(vm_address_space *addressSpace, vm_area *area);
220 static vm_address_space *get_address_space_by_area_id(area_id id);
221 static status_t vm_soft_fault(vm_address_space *addressSpace, addr_t address,
222 	bool isWrite, bool isUser);
223 static status_t map_backing_store(vm_address_space *addressSpace,
224 	vm_cache *cache, void **_virtualAddress, off_t offset, addr_t size,
225 	uint32 addressSpec, int wiring, int protection, int mapping,
226 	vm_area **_area, const char *areaName, bool unmapAddressRange, bool kernel);
227 
228 
229 //	#pragma mark -
230 
231 
232 AddressSpaceReadLocker::AddressSpaceReadLocker(team_id team)
233 	:
234 	fSpace(NULL),
235 	fLocked(false)
236 {
237 	SetTo(team);
238 }
239 
240 
241 /*! Takes over the reference of the address space, if \a getNewReference is
242 	\c false.
243 */
244 AddressSpaceReadLocker::AddressSpaceReadLocker(vm_address_space* space,
245 		bool getNewReference)
246 	:
247 	fSpace(NULL),
248 	fLocked(false)
249 {
250 	SetTo(space, getNewReference);
251 }
252 
253 
254 AddressSpaceReadLocker::AddressSpaceReadLocker()
255 	:
256 	fSpace(NULL),
257 	fLocked(false)
258 {
259 }
260 
261 
262 AddressSpaceReadLocker::~AddressSpaceReadLocker()
263 {
264 	Unset();
265 }
266 
267 
268 void
269 AddressSpaceReadLocker::Unset()
270 {
271 	Unlock();
272 	if (fSpace != NULL)
273 		vm_put_address_space(fSpace);
274 }
275 
276 
277 status_t
278 AddressSpaceReadLocker::SetTo(team_id team)
279 {
280 	fSpace = vm_get_address_space(team);
281 	if (fSpace == NULL)
282 		return B_BAD_TEAM_ID;
283 
284 	rw_lock_read_lock(&fSpace->lock);
285 	fLocked = true;
286 	return B_OK;
287 }
288 
289 
290 /*! Takes over the reference of the address space, if \a getNewReference is
291 	\c false.
292 */
293 void
294 AddressSpaceReadLocker::SetTo(vm_address_space* space, bool getNewReference)
295 {
296 	fSpace = space;
297 
298 	if (getNewReference)
299 		atomic_add(&fSpace->ref_count, 1);
300 
301 	rw_lock_read_lock(&fSpace->lock);
302 	fLocked = true;
303 }
304 
305 
306 status_t
307 AddressSpaceReadLocker::SetFromArea(area_id areaID, vm_area*& area)
308 {
309 	fSpace = get_address_space_by_area_id(areaID);
310 	if (fSpace == NULL)
311 		return B_BAD_TEAM_ID;
312 
313 	rw_lock_read_lock(&fSpace->lock);
314 
315 	rw_lock_read_lock(&sAreaHashLock);
316 	area = (vm_area *)hash_lookup(sAreaHash, &areaID);
317 	rw_lock_read_unlock(&sAreaHashLock);
318 
319 	if (area == NULL || area->address_space != fSpace) {
320 		rw_lock_read_unlock(&fSpace->lock);
321 		return B_BAD_VALUE;
322 	}
323 
324 	fLocked = true;
325 	return B_OK;
326 }
327 
328 
329 void
330 AddressSpaceReadLocker::Unlock()
331 {
332 	if (fLocked) {
333 		rw_lock_read_unlock(&fSpace->lock);
334 		fLocked = false;
335 	}
336 }
337 
338 
339 //	#pragma mark -
340 
341 
342 AddressSpaceWriteLocker::AddressSpaceWriteLocker(team_id team)
343 	:
344 	fSpace(NULL),
345 	fLocked(false),
346 	fDegraded(false)
347 {
348 	SetTo(team);
349 }
350 
351 
352 AddressSpaceWriteLocker::AddressSpaceWriteLocker()
353 	:
354 	fSpace(NULL),
355 	fLocked(false),
356 	fDegraded(false)
357 {
358 }
359 
360 
361 AddressSpaceWriteLocker::~AddressSpaceWriteLocker()
362 {
363 	Unset();
364 }
365 
366 
367 void
368 AddressSpaceWriteLocker::Unset()
369 {
370 	Unlock();
371 	if (fSpace != NULL)
372 		vm_put_address_space(fSpace);
373 }
374 
375 
376 status_t
377 AddressSpaceWriteLocker::SetTo(team_id team)
378 {
379 	fSpace = vm_get_address_space(team);
380 	if (fSpace == NULL)
381 		return B_BAD_TEAM_ID;
382 
383 	rw_lock_write_lock(&fSpace->lock);
384 	fLocked = true;
385 	return B_OK;
386 }
387 
388 
389 status_t
390 AddressSpaceWriteLocker::SetFromArea(area_id areaID, vm_area*& area)
391 {
392 	fSpace = get_address_space_by_area_id(areaID);
393 	if (fSpace == NULL)
394 		return B_BAD_VALUE;
395 
396 	rw_lock_write_lock(&fSpace->lock);
397 
398 	rw_lock_read_lock(&sAreaHashLock);
399 	area = (vm_area*)hash_lookup(sAreaHash, &areaID);
400 	rw_lock_read_unlock(&sAreaHashLock);
401 
402 	if (area == NULL || area->address_space != fSpace) {
403 		rw_lock_write_unlock(&fSpace->lock);
404 		return B_BAD_VALUE;
405 	}
406 
407 	fLocked = true;
408 	return B_OK;
409 }
410 
411 
412 status_t
413 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID,
414 	bool allowKernel, vm_area*& area)
415 {
416 	rw_lock_read_lock(&sAreaHashLock);
417 
418 	area = (vm_area *)hash_lookup(sAreaHash, &areaID);
419 	if (area != NULL
420 		&& (area->address_space->id == team
421 			|| allowKernel && team == vm_kernel_address_space_id())) {
422 		fSpace = area->address_space;
423 		atomic_add(&fSpace->ref_count, 1);
424 	}
425 
426 	rw_lock_read_unlock(&sAreaHashLock);
427 
428 	if (fSpace == NULL)
429 		return B_BAD_VALUE;
430 
431 	// Second try to get the area -- this time with the address space
432 	// write lock held
433 
434 	rw_lock_write_lock(&fSpace->lock);
435 
436 	rw_lock_read_lock(&sAreaHashLock);
437 	area = (vm_area *)hash_lookup(sAreaHash, &areaID);
438 	rw_lock_read_unlock(&sAreaHashLock);
439 
440 	if (area == NULL) {
441 		rw_lock_write_unlock(&fSpace->lock);
442 		return B_BAD_VALUE;
443 	}
444 
445 	fLocked = true;
446 	return B_OK;
447 }
448 
449 
450 status_t
451 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID,
452 	vm_area*& area)
453 {
454 	return SetFromArea(team, areaID, false, area);
455 }
456 
457 
458 void
459 AddressSpaceWriteLocker::Unlock()
460 {
461 	if (fLocked) {
462 		if (fDegraded)
463 			rw_lock_read_unlock(&fSpace->lock);
464 		else
465 			rw_lock_write_unlock(&fSpace->lock);
466 		fLocked = false;
467 		fDegraded = false;
468 	}
469 }
470 
471 
472 void
473 AddressSpaceWriteLocker::DegradeToReadLock()
474 {
475 	// TODO: the current R/W lock implementation just keeps the write lock here
476 	rw_lock_read_lock(&fSpace->lock);
477 	rw_lock_write_unlock(&fSpace->lock);
478 	fDegraded = true;
479 }
480 
481 
482 //	#pragma mark -
483 
484 
485 MultiAddressSpaceLocker::MultiAddressSpaceLocker()
486 	:
487 	fItems(NULL),
488 	fCapacity(0),
489 	fCount(0),
490 	fLocked(false)
491 {
492 }
493 
494 
495 MultiAddressSpaceLocker::~MultiAddressSpaceLocker()
496 {
497 	Unset();
498 	free(fItems);
499 }
500 
501 
502 /*static*/ int
503 MultiAddressSpaceLocker::_CompareItems(const void* _a, const void* _b)
504 {
505 	lock_item* a = (lock_item*)_a;
506 	lock_item* b = (lock_item*)_b;
507 	return a->space->id - b->space->id;
508 }
509 
510 
511 bool
512 MultiAddressSpaceLocker::_ResizeIfNeeded()
513 {
514 	if (fCount == fCapacity) {
515 		lock_item* items = (lock_item*)realloc(fItems,
516 			(fCapacity + 4) * sizeof(lock_item));
517 		if (items == NULL)
518 			return false;
519 
520 		fCapacity += 4;
521 		fItems = items;
522 	}
523 
524 	return true;
525 }
526 
527 
528 int32
529 MultiAddressSpaceLocker::_IndexOfAddressSpace(vm_address_space* space) const
530 {
531 	for (int32 i = 0; i < fCount; i++) {
532 		if (fItems[i].space == space)
533 			return i;
534 	}
535 
536 	return -1;
537 }
538 
539 
540 status_t
541 MultiAddressSpaceLocker::_AddAddressSpace(vm_address_space* space,
542 	bool writeLock, vm_address_space** _space)
543 {
544 	if (!space)
545 		return B_BAD_VALUE;
546 
547 	int32 index = _IndexOfAddressSpace(space);
548 	if (index < 0) {
549 		if (!_ResizeIfNeeded()) {
550 			vm_put_address_space(space);
551 			return B_NO_MEMORY;
552 		}
553 
554 		lock_item& item = fItems[fCount++];
555 		item.space = space;
556 		item.write_lock = writeLock;
557 	} else {
558 
559 		// one reference is enough
560 		vm_put_address_space(space);
561 
562 		fItems[index].write_lock |= writeLock;
563 	}
564 
565 	if (_space != NULL)
566 		*_space = space;
567 
568 	return B_OK;
569 }
570 
571 
572 inline status_t
573 MultiAddressSpaceLocker::AddTeam(team_id team, bool writeLock,
574 	vm_address_space** _space)
575 {
576 	return _AddAddressSpace(vm_get_address_space(team), writeLock,
577 		_space);
578 }
579 
580 
581 inline status_t
582 MultiAddressSpaceLocker::AddArea(area_id area, bool writeLock,
583 	vm_address_space** _space)
584 {
585 	return _AddAddressSpace(get_address_space_by_area_id(area), writeLock,
586 		_space);
587 }
588 
589 
590 void
591 MultiAddressSpaceLocker::Unset()
592 {
593 	Unlock();
594 
595 	for (int32 i = 0; i < fCount; i++)
596 		vm_put_address_space(fItems[i].space);
597 
598 	fCount = 0;
599 }
600 
601 
602 status_t
603 MultiAddressSpaceLocker::Lock()
604 {
605 	ASSERT(!fLocked);
606 
607 	qsort(fItems, fCount, sizeof(lock_item), &_CompareItems);
608 
609 	for (int32 i = 0; i < fCount; i++) {
610 		status_t status;
611 		if (fItems[i].write_lock)
612 			status = rw_lock_write_lock(&fItems[i].space->lock);
613 		else
614 			status = rw_lock_read_lock(&fItems[i].space->lock);
615 
616 		if (status < B_OK) {
617 			while (--i >= 0) {
618 				if (fItems[i].write_lock)
619 					rw_lock_write_unlock(&fItems[i].space->lock);
620 				else
621 					rw_lock_read_unlock(&fItems[i].space->lock);
622 			}
623 			return status;
624 		}
625 	}
626 
627 	fLocked = true;
628 	return B_OK;
629 }
630 
631 
632 void
633 MultiAddressSpaceLocker::Unlock()
634 {
635 	if (!fLocked)
636 		return;
637 
638 	for (int32 i = 0; i < fCount; i++) {
639 		if (fItems[i].write_lock)
640 			rw_lock_write_unlock(&fItems[i].space->lock);
641 		else
642 			rw_lock_read_unlock(&fItems[i].space->lock);
643 	}
644 
645 	fLocked = false;
646 }
647 
648 
649 /*!	Adds all address spaces of the areas associated with the given area's cache,
650 	locks them, and locks the cache (including a reference to it). It retries
651 	until the situation is stable (i.e. the neither cache nor cache's areas
652 	changed) or an error occurs. If \c checkNoCacheChange ist \c true it does
653 	not return until all areas' \c no_cache_change flags is clear.
654 */
655 status_t
656 MultiAddressSpaceLocker::AddAreaCacheAndLock(area_id areaID,
657 	bool writeLockThisOne, bool writeLockOthers, vm_area*& _area,
658 	vm_cache** _cache, bool checkNoCacheChange)
659 {
660 	// remember the original state
661 	int originalCount = fCount;
662 	lock_item* originalItems = NULL;
663 	if (fCount > 0) {
664 		originalItems = new(nothrow) lock_item[fCount];
665 		if (originalItems == NULL)
666 			return B_NO_MEMORY;
667 		memcpy(originalItems, fItems, fCount * sizeof(lock_item));
668 	}
669 	ArrayDeleter<lock_item> _(originalItems);
670 
671 	// get the cache
672 	vm_cache* cache;
673 	vm_area* area;
674 	status_t error;
675 	{
676 		AddressSpaceReadLocker locker;
677 		error = locker.SetFromArea(areaID, area);
678 		if (error != B_OK)
679 			return error;
680 
681 		cache = vm_area_get_locked_cache(area);
682 	}
683 
684 	while (true) {
685 		// add all areas
686 		vm_area* firstArea = cache->areas;
687 		for (vm_area* current = firstArea; current;
688 				current = current->cache_next) {
689 			error = AddArea(current->id,
690 				current == area ? writeLockThisOne : writeLockOthers);
691 			if (error != B_OK) {
692 				vm_area_put_locked_cache(cache);
693 				return error;
694 			}
695 		}
696 
697 		// unlock the cache and attempt to lock the address spaces
698 		vm_area_put_locked_cache(cache);
699 
700 		error = Lock();
701 		if (error != B_OK)
702 			return error;
703 
704 		// lock the cache again and check whether anything has changed
705 
706 		// check whether the area is gone in the meantime
707 		rw_lock_read_lock(&sAreaHashLock);
708 		area = (vm_area *)hash_lookup(sAreaHash, &areaID);
709 		rw_lock_read_unlock(&sAreaHashLock);
710 
711 		if (area == NULL) {
712 			Unlock();
713 			return B_BAD_VALUE;
714 		}
715 
716 		// lock the cache
717 		vm_cache* oldCache = cache;
718 		cache = vm_area_get_locked_cache(area);
719 
720 		// If neither the area's cache has changed nor its area list we're
721 		// done...
722 		bool done = (cache == oldCache || firstArea == cache->areas);
723 
724 		// ... unless we're supposed to check the areas' "no_cache_change" flag
725 		bool yield = false;
726 		if (done && checkNoCacheChange) {
727 			for (vm_area *tempArea = cache->areas; tempArea != NULL;
728 					tempArea = tempArea->cache_next) {
729 				if (tempArea->no_cache_change) {
730 					done = false;
731 					yield = true;
732 					break;
733 				}
734 			}
735 		}
736 
737 		// If everything looks dandy, return the values.
738 		if (done) {
739 			_area = area;
740 			if (_cache != NULL)
741 				*_cache = cache;
742 			return B_OK;
743 		}
744 
745 		// Restore the original state and try again.
746 
747 		// Unlock the address spaces, but keep the cache locked for the next
748 		// iteration.
749 		Unlock();
750 
751 		// Get an additional reference to the original address spaces.
752 		for (int32 i = 0; i < originalCount; i++)
753 			atomic_add(&originalItems[i].space->ref_count, 1);
754 
755 		// Release all references to the current address spaces.
756 		for (int32 i = 0; i < fCount; i++)
757 			vm_put_address_space(fItems[i].space);
758 
759 		// Copy over the original state.
760 		fCount = originalCount;
761 		if (originalItems != NULL)
762 			memcpy(fItems, originalItems, fCount * sizeof(lock_item));
763 
764 		if (yield)
765 			thread_yield(true);
766 	}
767 }
768 
769 
770 //	#pragma mark -
771 
772 
773 #if VM_PAGE_FAULT_TRACING
774 
775 namespace VMPageFaultTracing {
776 
777 class PageFaultStart : public AbstractTraceEntry {
778 public:
779 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
780 		:
781 		fAddress(address),
782 		fPC(pc),
783 		fWrite(write),
784 		fUser(user)
785 	{
786 		Initialized();
787 	}
788 
789 	virtual void AddDump(TraceOutput& out)
790 	{
791 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
792 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
793 	}
794 
795 private:
796 	addr_t	fAddress;
797 	addr_t	fPC;
798 	bool	fWrite;
799 	bool	fUser;
800 };
801 
802 
803 // page fault errors
804 enum {
805 	PAGE_FAULT_ERROR_NO_AREA		= 0,
806 	PAGE_FAULT_ERROR_KERNEL_ONLY,
807 	PAGE_FAULT_ERROR_READ_ONLY,
808 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
809 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
810 };
811 
812 
813 class PageFaultError : public AbstractTraceEntry {
814 public:
815 	PageFaultError(area_id area, status_t error)
816 		:
817 		fArea(area),
818 		fError(error)
819 	{
820 		Initialized();
821 	}
822 
823 	virtual void AddDump(TraceOutput& out)
824 	{
825 		switch (fError) {
826 			case PAGE_FAULT_ERROR_NO_AREA:
827 				out.Print("page fault error: no area");
828 				break;
829 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
830 				out.Print("page fault error: area: %ld, kernel only", fArea);
831 				break;
832 			case PAGE_FAULT_ERROR_READ_ONLY:
833 				out.Print("page fault error: area: %ld, read only", fArea);
834 				break;
835 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
836 				out.Print("page fault error: kernel touching bad user memory");
837 				break;
838 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
839 				out.Print("page fault error: no address space");
840 				break;
841 			default:
842 				out.Print("page fault error: area: %ld, error: %s", fArea,
843 					strerror(fError));
844 				break;
845 		}
846 	}
847 
848 private:
849 	area_id		fArea;
850 	status_t	fError;
851 };
852 
853 
854 class PageFaultDone : public AbstractTraceEntry {
855 public:
856 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
857 			vm_page* page)
858 		:
859 		fArea(area),
860 		fTopCache(topCache),
861 		fCache(cache),
862 		fPage(page)
863 	{
864 		Initialized();
865 	}
866 
867 	virtual void AddDump(TraceOutput& out)
868 	{
869 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
870 			"page: %p", fArea, fTopCache, fCache, fPage);
871 	}
872 
873 private:
874 	area_id		fArea;
875 	VMCache*	fTopCache;
876 	VMCache*	fCache;
877 	vm_page*	fPage;
878 };
879 
880 }	// namespace VMPageFaultTracing
881 
882 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
883 #else
884 #	define TPF(x) ;
885 #endif	// VM_PAGE_FAULT_TRACING
886 
887 
888 //	#pragma mark -
889 
890 
891 static int
892 area_compare(void *_area, const void *key)
893 {
894 	vm_area *area = (vm_area *)_area;
895 	const area_id *id = (const area_id *)key;
896 
897 	if (area->id == *id)
898 		return 0;
899 
900 	return -1;
901 }
902 
903 
904 static uint32
905 area_hash(void *_area, const void *key, uint32 range)
906 {
907 	vm_area *area = (vm_area *)_area;
908 	const area_id *id = (const area_id *)key;
909 
910 	if (area != NULL)
911 		return area->id % range;
912 
913 	return (uint32)*id % range;
914 }
915 
916 
917 static vm_address_space *
918 get_address_space_by_area_id(area_id id)
919 {
920 	vm_address_space* addressSpace = NULL;
921 
922 	rw_lock_read_lock(&sAreaHashLock);
923 
924 	vm_area *area = (vm_area *)hash_lookup(sAreaHash, &id);
925 	if (area != NULL) {
926 		addressSpace = area->address_space;
927 		atomic_add(&addressSpace->ref_count, 1);
928 	}
929 
930 	rw_lock_read_unlock(&sAreaHashLock);
931 
932 	return addressSpace;
933 }
934 
935 
936 //! You need to have the address space locked when calling this function
937 static vm_area *
938 lookup_area(vm_address_space* addressSpace, area_id id)
939 {
940 	rw_lock_read_lock(&sAreaHashLock);
941 
942 	vm_area *area = (vm_area *)hash_lookup(sAreaHash, &id);
943 	if (area != NULL && area->address_space != addressSpace)
944 		area = NULL;
945 
946 	rw_lock_read_unlock(&sAreaHashLock);
947 
948 	return area;
949 }
950 
951 
952 static vm_area *
953 create_reserved_area_struct(vm_address_space *addressSpace, uint32 flags)
954 {
955 	vm_area *reserved = (vm_area *)malloc_nogrow(sizeof(vm_area));
956 	if (reserved == NULL)
957 		return NULL;
958 
959 	memset(reserved, 0, sizeof(vm_area));
960 	reserved->id = RESERVED_AREA_ID;
961 		// this marks it as reserved space
962 	reserved->protection = flags;
963 	reserved->address_space = addressSpace;
964 
965 	return reserved;
966 }
967 
968 
969 static vm_area *
970 create_area_struct(vm_address_space *addressSpace, const char *name,
971 	uint32 wiring, uint32 protection)
972 {
973 	// restrict the area name to B_OS_NAME_LENGTH
974 	size_t length = strlen(name) + 1;
975 	if (length > B_OS_NAME_LENGTH)
976 		length = B_OS_NAME_LENGTH;
977 
978 	vm_area *area = (vm_area *)malloc_nogrow(sizeof(vm_area));
979 	if (area == NULL)
980 		return NULL;
981 
982 	area->name = (char *)malloc_nogrow(length);
983 	if (area->name == NULL) {
984 		free(area);
985 		return NULL;
986 	}
987 	strlcpy(area->name, name, length);
988 
989 	area->id = atomic_add(&sNextAreaID, 1);
990 	area->base = 0;
991 	area->size = 0;
992 	area->protection = protection;
993 	area->wiring = wiring;
994 	area->memory_type = 0;
995 
996 	area->cache = NULL;
997 	area->no_cache_change = 0;
998 	area->cache_offset = 0;
999 
1000 	area->address_space = addressSpace;
1001 	area->address_space_next = NULL;
1002 	area->cache_next = area->cache_prev = NULL;
1003 	area->hash_next = NULL;
1004 	new (&area->mappings) vm_area_mappings;
1005 	area->page_protections = NULL;
1006 
1007 	return area;
1008 }
1009 
1010 
1011 /**	Finds a reserved area that covers the region spanned by \a start and
1012  *	\a size, inserts the \a area into that region and makes sure that
1013  *	there are reserved regions for the remaining parts.
1014  */
1015 
1016 static status_t
1017 find_reserved_area(vm_address_space *addressSpace, addr_t start,
1018 	addr_t size, vm_area *area)
1019 {
1020 	vm_area *next, *last = NULL;
1021 
1022 	next = addressSpace->areas;
1023 	while (next) {
1024 		if (next->base <= start && next->base + next->size >= start + size) {
1025 			// this area covers the requested range
1026 			if (next->id != RESERVED_AREA_ID) {
1027 				// but it's not reserved space, it's a real area
1028 				return B_BAD_VALUE;
1029 			}
1030 
1031 			break;
1032 		}
1033 		last = next;
1034 		next = next->address_space_next;
1035 	}
1036 	if (next == NULL)
1037 		return B_ENTRY_NOT_FOUND;
1038 
1039 	// now we have to transfer the requested part of the reserved
1040 	// range to the new area - and remove, resize or split the old
1041 	// reserved area.
1042 
1043 	if (start == next->base) {
1044 		// the area starts at the beginning of the reserved range
1045 		if (last)
1046 			last->address_space_next = area;
1047 		else
1048 			addressSpace->areas = area;
1049 
1050 		if (size == next->size) {
1051 			// the new area fully covers the reversed range
1052 			area->address_space_next = next->address_space_next;
1053 			vm_put_address_space(addressSpace);
1054 			free(next);
1055 		} else {
1056 			// resize the reserved range behind the area
1057 			area->address_space_next = next;
1058 			next->base += size;
1059 			next->size -= size;
1060 		}
1061 	} else if (start + size == next->base + next->size) {
1062 		// the area is at the end of the reserved range
1063 		area->address_space_next = next->address_space_next;
1064 		next->address_space_next = area;
1065 
1066 		// resize the reserved range before the area
1067 		next->size = start - next->base;
1068 	} else {
1069 		// the area splits the reserved range into two separate ones
1070 		// we need a new reserved area to cover this space
1071 		vm_area *reserved = create_reserved_area_struct(addressSpace,
1072 			next->protection);
1073 		if (reserved == NULL)
1074 			return B_NO_MEMORY;
1075 
1076 		atomic_add(&addressSpace->ref_count, 1);
1077 		reserved->address_space_next = next->address_space_next;
1078 		area->address_space_next = reserved;
1079 		next->address_space_next = area;
1080 
1081 		// resize regions
1082 		reserved->size = next->base + next->size - start - size;
1083 		next->size = start - next->base;
1084 		reserved->base = start + size;
1085 		reserved->cache_offset = next->cache_offset;
1086 	}
1087 
1088 	area->base = start;
1089 	area->size = size;
1090 	addressSpace->change_count++;
1091 
1092 	return B_OK;
1093 }
1094 
1095 
1096 /*!	Must be called with this address space's sem held */
1097 static status_t
1098 find_and_insert_area_slot(vm_address_space *addressSpace, addr_t start,
1099 	addr_t size, addr_t end, uint32 addressSpec, vm_area *area)
1100 {
1101 	vm_area *last = NULL;
1102 	vm_area *next;
1103 	bool foundSpot = false;
1104 
1105 	TRACE(("find_and_insert_area_slot: address space %p, start 0x%lx, "
1106 		"size %ld, end 0x%lx, addressSpec %ld, area %p\n", addressSpace, start,
1107 		size, end, addressSpec, area));
1108 
1109 	// do some sanity checking
1110 	if (start < addressSpace->base || size == 0
1111 		|| (end - 1) > (addressSpace->base + (addressSpace->size - 1))
1112 		|| start + size > end)
1113 		return B_BAD_ADDRESS;
1114 
1115 	if (addressSpec == B_EXACT_ADDRESS) {
1116 		// search for a reserved area
1117 		status_t status = find_reserved_area(addressSpace, start, size, area);
1118 		if (status == B_OK || status == B_BAD_VALUE)
1119 			return status;
1120 
1121 		// There was no reserved area, and the slot doesn't seem to be used
1122 		// already
1123 		// ToDo: this could be further optimized.
1124 	}
1125 
1126 	size_t alignment = B_PAGE_SIZE;
1127 	if (addressSpec == B_ANY_KERNEL_BLOCK_ADDRESS) {
1128 		// align the memory to the next power of two of the size
1129 		while (alignment < size)
1130 			alignment <<= 1;
1131 	}
1132 
1133 	start = ROUNDUP(start, alignment);
1134 
1135 	// walk up to the spot where we should start searching
1136 second_chance:
1137 	next = addressSpace->areas;
1138 	while (next) {
1139 		if (next->base >= start + size) {
1140 			// we have a winner
1141 			break;
1142 		}
1143 		last = next;
1144 		next = next->address_space_next;
1145 	}
1146 
1147 	// find the right spot depending on the address specification - the area
1148 	// will be inserted directly after "last" ("next" is not referenced anymore)
1149 
1150 	switch (addressSpec) {
1151 		case B_ANY_ADDRESS:
1152 		case B_ANY_KERNEL_ADDRESS:
1153 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1154 			// find a hole big enough for a new area
1155 			if (!last) {
1156 				// see if we can build it at the beginning of the virtual map
1157 				if (!next || (next->base >= ROUNDUP(addressSpace->base,
1158 						alignment) + size)) {
1159 					foundSpot = true;
1160 					area->base = ROUNDUP(addressSpace->base, alignment);
1161 					break;
1162 				}
1163 				last = next;
1164 				next = next->address_space_next;
1165 			}
1166 			// keep walking
1167 			while (next) {
1168 				if (next->base >= ROUNDUP(last->base + last->size, alignment)
1169 						+ size) {
1170 					// we found a spot (it'll be filled up below)
1171 					break;
1172 				}
1173 				last = next;
1174 				next = next->address_space_next;
1175 			}
1176 
1177 			if ((addressSpace->base + (addressSpace->size - 1)) >= (ROUNDUP(
1178 					last->base + last->size, alignment) + (size - 1))) {
1179 				// got a spot
1180 				foundSpot = true;
1181 				area->base = ROUNDUP(last->base + last->size, alignment);
1182 				break;
1183 			} else {
1184 				// We didn't find a free spot - if there were any reserved areas
1185 				// with the RESERVED_AVOID_BASE flag set, we can now test those
1186 				// for free space
1187 				// ToDo: it would make sense to start with the biggest of them
1188 				next = addressSpace->areas;
1189 				last = NULL;
1190 				for (last = NULL; next; next = next->address_space_next,
1191 						last = next) {
1192 					// ToDo: take free space after the reserved area into account!
1193 					if (next->base == ROUNDUP(next->base, alignment)
1194 						&& next->size == size) {
1195 						// The reserved area is entirely covered, and thus,
1196 						// removed
1197 						if (last)
1198 							last->address_space_next = next->address_space_next;
1199 						else
1200 							addressSpace->areas = next->address_space_next;
1201 
1202 						foundSpot = true;
1203 						area->base = next->base;
1204 						free(next);
1205 						break;
1206 					}
1207 					if (next->size - (ROUNDUP(next->base, alignment)
1208 							- next->base) >= size) {
1209 						// The new area will be placed at the end of the
1210 						// reserved area, and the reserved area will be resized
1211 						// to make space
1212 						foundSpot = true;
1213 						next->size -= size;
1214 						last = next;
1215 						area->base = next->base + next->size;
1216 						break;
1217 					}
1218 				}
1219 			}
1220 			break;
1221 
1222 		case B_BASE_ADDRESS:
1223 			// find a hole big enough for a new area beginning with "start"
1224 			if (!last) {
1225 				// see if we can build it at the beginning of the specified start
1226 				if (!next || (next->base >= start + size)) {
1227 					foundSpot = true;
1228 					area->base = start;
1229 					break;
1230 				}
1231 				last = next;
1232 				next = next->address_space_next;
1233 			}
1234 			// keep walking
1235 			while (next) {
1236 				if (next->base >= last->base + last->size + size) {
1237 					// we found a spot (it'll be filled up below)
1238 					break;
1239 				}
1240 				last = next;
1241 				next = next->address_space_next;
1242 			}
1243 
1244 			if ((addressSpace->base + (addressSpace->size - 1))
1245 					>= (last->base + last->size + (size - 1))) {
1246 				// got a spot
1247 				foundSpot = true;
1248 				if (last->base + last->size <= start)
1249 					area->base = start;
1250 				else
1251 					area->base = last->base + last->size;
1252 				break;
1253 			}
1254 			// we didn't find a free spot in the requested range, so we'll
1255 			// try again without any restrictions
1256 			start = addressSpace->base;
1257 			addressSpec = B_ANY_ADDRESS;
1258 			last = NULL;
1259 			goto second_chance;
1260 
1261 		case B_EXACT_ADDRESS:
1262 			// see if we can create it exactly here
1263 			if (!last) {
1264 				if (!next || (next->base >= start + size)) {
1265 					foundSpot = true;
1266 					area->base = start;
1267 					break;
1268 				}
1269 			} else {
1270 				if (next) {
1271 					if (last->base + last->size <= start && next->base >= start + size) {
1272 						foundSpot = true;
1273 						area->base = start;
1274 						break;
1275 					}
1276 				} else {
1277 					if ((last->base + (last->size - 1)) <= start - 1) {
1278 						foundSpot = true;
1279 						area->base = start;
1280 					}
1281 				}
1282 			}
1283 			break;
1284 		default:
1285 			return B_BAD_VALUE;
1286 	}
1287 
1288 	if (!foundSpot)
1289 		return addressSpec == B_EXACT_ADDRESS ? B_BAD_VALUE : B_NO_MEMORY;
1290 
1291 	area->size = size;
1292 	if (last) {
1293 		area->address_space_next = last->address_space_next;
1294 		last->address_space_next = area;
1295 	} else {
1296 		area->address_space_next = addressSpace->areas;
1297 		addressSpace->areas = area;
1298 	}
1299 	addressSpace->change_count++;
1300 	return B_OK;
1301 }
1302 
1303 
1304 /*!	This inserts the area you pass into the specified address space.
1305 	It will also set the "_address" argument to its base address when
1306 	the call succeeds.
1307 	You need to hold the vm_address_space semaphore.
1308 */
1309 static status_t
1310 insert_area(vm_address_space *addressSpace, void **_address,
1311 	uint32 addressSpec, addr_t size, vm_area *area)
1312 {
1313 	addr_t searchBase, searchEnd;
1314 	status_t status;
1315 
1316 	switch (addressSpec) {
1317 		case B_EXACT_ADDRESS:
1318 			searchBase = (addr_t)*_address;
1319 			searchEnd = (addr_t)*_address + size;
1320 			break;
1321 
1322 		case B_BASE_ADDRESS:
1323 			searchBase = (addr_t)*_address;
1324 			searchEnd = addressSpace->base + (addressSpace->size - 1);
1325 			break;
1326 
1327 		case B_ANY_ADDRESS:
1328 		case B_ANY_KERNEL_ADDRESS:
1329 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1330 			searchBase = addressSpace->base;
1331 			// TODO: remove this again when vm86 mode is moved into the kernel
1332 			// completely (currently needs a userland address space!)
1333 			if (searchBase == USER_BASE)
1334 				searchBase = USER_BASE_ANY;
1335 			searchEnd = addressSpace->base + (addressSpace->size - 1);
1336 			break;
1337 
1338 		default:
1339 			return B_BAD_VALUE;
1340 	}
1341 
1342 	status = find_and_insert_area_slot(addressSpace, searchBase, size,
1343 		searchEnd, addressSpec, area);
1344 	if (status == B_OK) {
1345 		// ToDo: do we have to do anything about B_ANY_KERNEL_ADDRESS
1346 		//		vs. B_ANY_KERNEL_BLOCK_ADDRESS here?
1347 		*_address = (void *)area->base;
1348 	}
1349 
1350 	return status;
1351 }
1352 
1353 
1354 static inline void
1355 set_area_page_protection(vm_area* area, addr_t pageAddress, uint32 protection)
1356 {
1357 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
1358 	uint32 pageIndex = (pageAddress - area->base) / B_PAGE_SIZE;
1359 	uint8& entry = area->page_protections[pageIndex / 2];
1360 	if (pageIndex % 2 == 0)
1361 		entry = entry & 0xf0 | protection;
1362 	else
1363 		entry = entry & 0x0f | (protection << 4);
1364 }
1365 
1366 
1367 static inline uint32
1368 get_area_page_protection(vm_area* area, addr_t pageAddress)
1369 {
1370 	if (area->page_protections == NULL)
1371 		return area->protection;
1372 
1373 	uint32 pageIndex = (pageAddress - area->base) / B_PAGE_SIZE;
1374 	uint32 protection = area->page_protections[pageIndex / 2];
1375 	if (pageIndex % 2 == 0)
1376 		protection &= 0x0f;
1377 	else
1378 		protection >>= 4;
1379 
1380 	return protection | B_KERNEL_READ_AREA
1381 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
1382 }
1383 
1384 
1385 /*!	Cuts a piece out of an area. If the given cut range covers the complete
1386 	area, it is deleted. If it covers the beginning or the end, the area is
1387 	resized accordingly. If the range covers some part in the middle of the
1388 	area, it is split in two; in this case the second area is returned via
1389 	\a _secondArea (the variable is left untouched in the other cases).
1390 	The address space must be write locked.
1391 */
1392 static status_t
1393 cut_area(vm_address_space* addressSpace, vm_area* area, addr_t address,
1394 	addr_t lastAddress, vm_area** _secondArea, bool kernel)
1395 {
1396 	// Does the cut range intersect with the area at all?
1397 	addr_t areaLast = area->base + (area->size - 1);
1398 	if (area->base > lastAddress || areaLast < address)
1399 		return B_OK;
1400 
1401 	// Is the area fully covered?
1402 	if (area->base >= address && areaLast <= lastAddress) {
1403 		delete_area(addressSpace, area);
1404 		return B_OK;
1405 	}
1406 
1407 	AreaCacheLocker cacheLocker(area);
1408 	vm_cache* cache = area->cache;
1409 
1410 	// Cut the end only?
1411 	if (areaLast <= lastAddress) {
1412 		addr_t newSize = address - area->base;
1413 
1414 		// unmap pages
1415 		vm_unmap_pages(area, address, area->size - newSize, false);
1416 
1417 		// If no one else uses the area's cache, we can resize it, too.
1418 		if (cache->areas == area && area->cache_next == NULL
1419 			&& list_is_empty(&cache->consumers)) {
1420 			status_t error = cache->Resize(cache->virtual_base + newSize);
1421 			if (error != B_OK)
1422 				return error;
1423 		}
1424 
1425 		area->size = newSize;
1426 
1427 		return B_OK;
1428 	}
1429 
1430 	// Cut the beginning only?
1431 	if (area->base >= address) {
1432 		addr_t newBase = lastAddress + 1;
1433 		addr_t newSize = areaLast - lastAddress;
1434 
1435 		// unmap pages
1436 		vm_unmap_pages(area, area->base, newBase - area->base, false);
1437 
1438 		// TODO: If no one else uses the area's cache, we should resize it, too!
1439 
1440 		area->cache_offset += newBase - area->base;
1441 		area->base = newBase;
1442 		area->size = newSize;
1443 
1444 		return B_OK;
1445 	}
1446 
1447 	// The tough part -- cut a piece out of the middle of the area.
1448 	// We do that by shrinking the area to the begin section and creating a
1449 	// new area for the end section.
1450 
1451 	addr_t firstNewSize = address - area->base;
1452 	addr_t secondBase = lastAddress + 1;
1453 	addr_t secondSize = areaLast - lastAddress;
1454 
1455 	// unmap pages
1456 	vm_unmap_pages(area, address, area->size - firstNewSize, false);
1457 
1458 	// resize the area
1459 	addr_t oldSize = area->size;
1460 	area->size = firstNewSize;
1461 
1462 	// TODO: If no one else uses the area's cache, we might want to create a
1463 	// new cache for the second area, transfer the concerned pages from the
1464 	// first cache to it and resize the first cache.
1465 
1466 	// map the second area
1467 	vm_area* secondArea;
1468 	void* secondBaseAddress = (void*)secondBase;
1469 	status_t error = map_backing_store(addressSpace, cache, &secondBaseAddress,
1470 		area->cache_offset + (secondBase - area->base), secondSize,
1471 		B_EXACT_ADDRESS, area->wiring, area->protection, REGION_NO_PRIVATE_MAP,
1472 		&secondArea, area->name, false, kernel);
1473 	if (error != B_OK) {
1474 		area->size = oldSize;
1475 		return error;
1476 	}
1477 
1478 	// We need a cache reference for the new area.
1479 	cache->AcquireRefLocked();
1480 
1481 	if (_secondArea != NULL)
1482 		*_secondArea = secondArea;
1483 
1484 	return B_OK;
1485 }
1486 
1487 
1488 static inline void
1489 increment_page_wired_count(vm_page* page)
1490 {
1491 	// TODO: needs to be atomic on all platforms!
1492 	// ... but at least the check isn't. Consequently we should hold
1493 	// sMappingLock, which would allows us to even avoid atomic_add() on
1494 	// gMappedPagesCount.
1495 	if (page->wired_count++ == 0) {
1496 		if (page->mappings.IsEmpty())
1497 			atomic_add(&gMappedPagesCount, 1);
1498 	}
1499 }
1500 
1501 
1502 static inline void
1503 decrement_page_wired_count(vm_page* page)
1504 {
1505 	if (--page->wired_count == 0) {
1506 		// TODO: needs to be atomic on all platforms!
1507 		// See above!
1508 		if (page->mappings.IsEmpty())
1509 			atomic_add(&gMappedPagesCount, -1);
1510 	}
1511 }
1512 
1513 
1514 /*!	Deletes all areas in the given address range.
1515 	The address space must be write-locked.
1516 */
1517 static status_t
1518 unmap_address_range(vm_address_space *addressSpace, addr_t address, addr_t size,
1519 	bool kernel)
1520 {
1521 	size = PAGE_ALIGN(size);
1522 	addr_t lastAddress = address + (size - 1);
1523 
1524 	// Check, whether the caller is allowed to modify the concerned areas.
1525 	vm_area* area;
1526 	if (!kernel) {
1527 		area = addressSpace->areas;
1528 		while (area != NULL) {
1529 			vm_area* nextArea = area->address_space_next;
1530 
1531 			if (area->id != RESERVED_AREA_ID) {
1532 				addr_t areaLast = area->base + (area->size - 1);
1533 				if (area->base < lastAddress && address < areaLast) {
1534 					if ((area->protection & B_KERNEL_AREA) != 0)
1535 						return B_NOT_ALLOWED;
1536 				}
1537 			}
1538 
1539 			area = nextArea;
1540 		}
1541 	}
1542 
1543 	area = addressSpace->areas;
1544 	while (area != NULL) {
1545 		vm_area* nextArea = area->address_space_next;
1546 
1547 		if (area->id != RESERVED_AREA_ID) {
1548 			addr_t areaLast = area->base + (area->size - 1);
1549 			if (area->base < lastAddress && address < areaLast) {
1550 				status_t error = cut_area(addressSpace, area, address,
1551 					lastAddress, NULL, kernel);
1552 				if (error != B_OK)
1553 					return error;
1554 					// Failing after already messing with areas is ugly, but we
1555 					// can't do anything about it.
1556 			}
1557 		}
1558 
1559 		area = nextArea;
1560 	}
1561 
1562 	return B_OK;
1563 }
1564 
1565 
1566 /*! You need to hold the lock of the cache and the write lock of the address
1567 	space when calling this function.
1568 	Note, that in case of error your cache will be temporarily unlocked.
1569 */
1570 static status_t
1571 map_backing_store(vm_address_space *addressSpace, vm_cache *cache,
1572 	void **_virtualAddress, off_t offset, addr_t size, uint32 addressSpec,
1573 	int wiring, int protection, int mapping, vm_area **_area,
1574 	const char *areaName, bool unmapAddressRange, bool kernel)
1575 {
1576 	TRACE(("map_backing_store: aspace %p, cache %p, *vaddr %p, offset 0x%Lx, size %lu, addressSpec %ld, wiring %d, protection %d, _area %p, area_name '%s'\n",
1577 		addressSpace, cache, *_virtualAddress, offset, size, addressSpec,
1578 		wiring, protection, _area, areaName));
1579 	cache->AssertLocked();
1580 
1581 	vm_area *area = create_area_struct(addressSpace, areaName, wiring,
1582 		protection);
1583 	if (area == NULL)
1584 		return B_NO_MEMORY;
1585 
1586 	status_t status;
1587 
1588 	// if this is a private map, we need to create a new cache
1589 	// to handle the private copies of pages as they are written to
1590 	vm_cache* sourceCache = cache;
1591 	if (mapping == REGION_PRIVATE_MAP) {
1592 		vm_cache *newCache;
1593 
1594 		// create an anonymous cache
1595 		status = VMCacheFactory::CreateAnonymousCache(newCache,
1596 			(protection & B_STACK_AREA) != 0, 0, USER_STACK_GUARD_PAGES, true);
1597 		if (status != B_OK)
1598 			goto err1;
1599 
1600 		newCache->Lock();
1601 		newCache->temporary = 1;
1602 		newCache->scan_skip = cache->scan_skip;
1603 		newCache->virtual_base = offset;
1604 		newCache->virtual_end = offset + size;
1605 
1606 		cache->AddConsumer(newCache);
1607 
1608 		cache = newCache;
1609 	}
1610 
1611 	status = cache->SetMinimalCommitment(size);
1612 	if (status != B_OK)
1613 		goto err2;
1614 
1615 	// check to see if this address space has entered DELETE state
1616 	if (addressSpace->state == VM_ASPACE_STATE_DELETION) {
1617 		// okay, someone is trying to delete this address space now, so we can't
1618 		// insert the area, so back out
1619 		status = B_BAD_TEAM_ID;
1620 		goto err2;
1621 	}
1622 
1623 	if (addressSpec == B_EXACT_ADDRESS && unmapAddressRange) {
1624 		status = unmap_address_range(addressSpace, (addr_t)*_virtualAddress,
1625 			size, kernel);
1626 		if (status != B_OK)
1627 			goto err2;
1628 	}
1629 
1630 	status = insert_area(addressSpace, _virtualAddress, addressSpec, size, area);
1631 	if (status < B_OK)
1632 		goto err2;
1633 
1634 	// attach the cache to the area
1635 	area->cache = cache;
1636 	area->cache_offset = offset;
1637 
1638 	// point the cache back to the area
1639 	cache->InsertAreaLocked(area);
1640 	if (mapping == REGION_PRIVATE_MAP)
1641 		cache->Unlock();
1642 
1643 	// insert the area in the global area hash table
1644 	rw_lock_write_lock(&sAreaHashLock);
1645 	hash_insert(sAreaHash, area);
1646 	rw_lock_write_unlock(&sAreaHashLock);
1647 
1648 	// grab a ref to the address space (the area holds this)
1649 	atomic_add(&addressSpace->ref_count, 1);
1650 
1651 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1652 //		cache, sourceCache, areaName, area);
1653 
1654 	*_area = area;
1655 	return B_OK;
1656 
1657 err2:
1658 	if (mapping == REGION_PRIVATE_MAP) {
1659 		// We created this cache, so we must delete it again. Note, that we
1660 		// need to temporarily unlock the source cache or we'll otherwise
1661 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1662 		sourceCache->Unlock();
1663 		cache->ReleaseRefAndUnlock();
1664 		sourceCache->Lock();
1665 	}
1666 err1:
1667 	free(area->name);
1668 	free(area);
1669 	return status;
1670 }
1671 
1672 
1673 status_t
1674 vm_unreserve_address_range(team_id team, void *address, addr_t size)
1675 {
1676 	AddressSpaceWriteLocker locker(team);
1677 	if (!locker.IsLocked())
1678 		return B_BAD_TEAM_ID;
1679 
1680 	// check to see if this address space has entered DELETE state
1681 	if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) {
1682 		// okay, someone is trying to delete this address space now, so we can't
1683 		// insert the area, so back out
1684 		return B_BAD_TEAM_ID;
1685 	}
1686 
1687 	// search area list and remove any matching reserved ranges
1688 
1689 	vm_area* area = locker.AddressSpace()->areas;
1690 	vm_area* last = NULL;
1691 	while (area) {
1692 		// the area must be completely part of the reserved range
1693 		if (area->id == RESERVED_AREA_ID && area->base >= (addr_t)address
1694 			&& area->base + area->size <= (addr_t)address + size) {
1695 			// remove reserved range
1696 			vm_area *reserved = area;
1697 			if (last)
1698 				last->address_space_next = reserved->address_space_next;
1699 			else
1700 				locker.AddressSpace()->areas = reserved->address_space_next;
1701 
1702 			area = reserved->address_space_next;
1703 			vm_put_address_space(locker.AddressSpace());
1704 			free(reserved);
1705 			continue;
1706 		}
1707 
1708 		last = area;
1709 		area = area->address_space_next;
1710 	}
1711 
1712 	return B_OK;
1713 }
1714 
1715 
1716 status_t
1717 vm_reserve_address_range(team_id team, void **_address, uint32 addressSpec,
1718 	addr_t size, uint32 flags)
1719 {
1720 	if (size == 0)
1721 		return B_BAD_VALUE;
1722 
1723 	AddressSpaceWriteLocker locker(team);
1724 	if (!locker.IsLocked())
1725 		return B_BAD_TEAM_ID;
1726 
1727 	// check to see if this address space has entered DELETE state
1728 	if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) {
1729 		// okay, someone is trying to delete this address space now, so we
1730 		// can't insert the area, let's back out
1731 		return B_BAD_TEAM_ID;
1732 	}
1733 
1734 	vm_area *area = create_reserved_area_struct(locker.AddressSpace(), flags);
1735 	if (area == NULL)
1736 		return B_NO_MEMORY;
1737 
1738 	status_t status = insert_area(locker.AddressSpace(), _address, addressSpec,
1739 		size, area);
1740 	if (status < B_OK) {
1741 		free(area);
1742 		return status;
1743 	}
1744 
1745 	// the area is now reserved!
1746 
1747 	area->cache_offset = area->base;
1748 		// we cache the original base address here
1749 
1750 	atomic_add(&locker.AddressSpace()->ref_count, 1);
1751 	return B_OK;
1752 }
1753 
1754 
1755 area_id
1756 vm_create_anonymous_area(team_id team, const char *name, void **address,
1757 	uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection,
1758 	uint32 flags, bool kernel)
1759 {
1760 	vm_area *area;
1761 	vm_cache *cache;
1762 	vm_page *page = NULL;
1763 	bool isStack = (protection & B_STACK_AREA) != 0;
1764 	page_num_t guardPages;
1765 	bool canOvercommit = false;
1766 	addr_t physicalBase = 0;
1767 
1768 	TRACE(("create_anonymous_area [%d] %s: size 0x%lx\n", team, name, size));
1769 
1770 	size = PAGE_ALIGN(size);
1771 
1772 	if (size == 0)
1773 		return B_BAD_VALUE;
1774 	if (!arch_vm_supports_protection(protection))
1775 		return B_NOT_SUPPORTED;
1776 
1777 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1778 		canOvercommit = true;
1779 
1780 #ifdef DEBUG_KERNEL_STACKS
1781 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1782 		isStack = true;
1783 #endif
1784 
1785 	/* check parameters */
1786 	switch (addressSpec) {
1787 		case B_ANY_ADDRESS:
1788 		case B_EXACT_ADDRESS:
1789 		case B_BASE_ADDRESS:
1790 		case B_ANY_KERNEL_ADDRESS:
1791 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1792 			break;
1793 		case B_PHYSICAL_BASE_ADDRESS:
1794 			physicalBase = (addr_t)*address;
1795 			addressSpec = B_ANY_KERNEL_ADDRESS;
1796 			break;
1797 
1798 		default:
1799 			return B_BAD_VALUE;
1800 	}
1801 
1802 	bool doReserveMemory = false;
1803 	switch (wiring) {
1804 		case B_NO_LOCK:
1805 			break;
1806 		case B_FULL_LOCK:
1807 		case B_LAZY_LOCK:
1808 		case B_CONTIGUOUS:
1809 			doReserveMemory = true;
1810 			break;
1811 		case B_ALREADY_WIRED:
1812 			break;
1813 		case B_LOMEM:
1814 		//case B_SLOWMEM:
1815 			dprintf("B_LOMEM/SLOWMEM is not yet supported!\n");
1816 			wiring = B_FULL_LOCK;
1817 			doReserveMemory = true;
1818 			break;
1819 		default:
1820 			return B_BAD_VALUE;
1821 	}
1822 
1823 	// For full lock or contiguous areas we're also going to map the pages and
1824 	// thus need to reserve pages for the mapping backend upfront.
1825 	addr_t reservedMapPages = 0;
1826 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1827 		AddressSpaceWriteLocker locker;
1828 		status_t status = locker.SetTo(team);
1829 		if (status != B_OK)
1830 			return status;
1831 
1832 		vm_translation_map *map = &locker.AddressSpace()->translation_map;
1833 		reservedMapPages = map->ops->map_max_pages_need(map, 0, size - 1);
1834 	}
1835 
1836 	// Reserve memory before acquiring the address space lock. This reduces the
1837 	// chances of failure, since while holding the write lock to the address
1838 	// space (if it is the kernel address space that is), the low memory handler
1839 	// won't be able to free anything for us.
1840 	addr_t reservedMemory = 0;
1841 	if (doReserveMemory) {
1842 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1843 		if (vm_try_reserve_memory(size, timeout) != B_OK)
1844 			return B_NO_MEMORY;
1845 		reservedMemory = size;
1846 		// TODO: We don't reserve the memory for the pages for the page
1847 		// directories/tables. We actually need to do since we currently don't
1848 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1849 		// there are actually less physical pages than there should be, which
1850 		// can get the VM into trouble in low memory situations.
1851 	}
1852 
1853 	AddressSpaceWriteLocker locker;
1854 	vm_address_space *addressSpace;
1855 	status_t status;
1856 
1857 	// For full lock areas reserve the pages before locking the address
1858 	// space. E.g. block caches can't release their memory while we hold the
1859 	// address space lock.
1860 	page_num_t reservedPages = reservedMapPages;
1861 	if (wiring == B_FULL_LOCK)
1862 		reservedPages += size / B_PAGE_SIZE;
1863 	if (reservedPages > 0) {
1864 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1865 			if (!vm_page_try_reserve_pages(reservedPages)) {
1866 				reservedPages = 0;
1867 				status = B_WOULD_BLOCK;
1868 				goto err0;
1869 			}
1870 		} else
1871 			vm_page_reserve_pages(reservedPages);
1872 	}
1873 
1874 	status = locker.SetTo(team);
1875 	if (status != B_OK)
1876 		goto err0;
1877 
1878 	addressSpace = locker.AddressSpace();
1879 
1880 	if (wiring == B_CONTIGUOUS) {
1881 		// we try to allocate the page run here upfront as this may easily
1882 		// fail for obvious reasons
1883 		page = vm_page_allocate_page_run(PAGE_STATE_CLEAR, physicalBase,
1884 			size / B_PAGE_SIZE);
1885 		if (page == NULL) {
1886 			status = B_NO_MEMORY;
1887 			goto err0;
1888 		}
1889 	}
1890 
1891 	// create an anonymous cache
1892 	// if it's a stack, make sure that two pages are available at least
1893 	guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0
1894 		? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0;
1895 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1896 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1897 		wiring == B_NO_LOCK);
1898 	if (status != B_OK)
1899 		goto err1;
1900 
1901 	cache->temporary = 1;
1902 	cache->virtual_end = size;
1903 	cache->committed_size = reservedMemory;
1904 		// TODO: This should be done via a method.
1905 	reservedMemory = 0;
1906 
1907 	switch (wiring) {
1908 		case B_LAZY_LOCK:
1909 		case B_FULL_LOCK:
1910 		case B_CONTIGUOUS:
1911 		case B_ALREADY_WIRED:
1912 			cache->scan_skip = 1;
1913 			break;
1914 		case B_NO_LOCK:
1915 			cache->scan_skip = 0;
1916 			break;
1917 	}
1918 
1919 	cache->Lock();
1920 
1921 	status = map_backing_store(addressSpace, cache, address, 0, size,
1922 		addressSpec, wiring, protection, REGION_NO_PRIVATE_MAP, &area, name,
1923 		(flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0, kernel);
1924 
1925 	if (status < B_OK) {
1926 		cache->ReleaseRefAndUnlock();
1927 		goto err1;
1928 	}
1929 
1930 	locker.DegradeToReadLock();
1931 
1932 	switch (wiring) {
1933 		case B_NO_LOCK:
1934 		case B_LAZY_LOCK:
1935 			// do nothing - the pages are mapped in as needed
1936 			break;
1937 
1938 		case B_FULL_LOCK:
1939 		{
1940 			// Allocate and map all pages for this area
1941 
1942 			off_t offset = 0;
1943 			for (addr_t address = area->base; address < area->base + (area->size - 1);
1944 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1945 #ifdef DEBUG_KERNEL_STACKS
1946 #	ifdef STACK_GROWS_DOWNWARDS
1947 				if (isStack && address < area->base + KERNEL_STACK_GUARD_PAGES
1948 						* B_PAGE_SIZE)
1949 #	else
1950 				if (isStack && address >= area->base + area->size
1951 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1952 #	endif
1953 					continue;
1954 #endif
1955 				vm_page *page = vm_page_allocate_page(PAGE_STATE_CLEAR, true);
1956 				cache->InsertPage(page, offset);
1957 				vm_map_page(area, page, address, protection);
1958 
1959 				// Periodically unreserve pages we've already allocated, so that
1960 				// we don't unnecessarily increase the pressure on the VM.
1961 				if (offset > 0 && offset % (128 * B_PAGE_SIZE) == 0) {
1962 					page_num_t toUnreserve = 128;
1963 					vm_page_unreserve_pages(toUnreserve);
1964 					reservedPages -= toUnreserve;
1965 				}
1966 			}
1967 
1968 			break;
1969 		}
1970 
1971 		case B_ALREADY_WIRED:
1972 		{
1973 			// the pages should already be mapped. This is only really useful during
1974 			// boot time. Find the appropriate vm_page objects and stick them in
1975 			// the cache object.
1976 			vm_translation_map *map = &addressSpace->translation_map;
1977 			off_t offset = 0;
1978 
1979 			if (!gKernelStartup)
1980 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1981 
1982 			map->ops->lock(map);
1983 
1984 			for (addr_t virtualAddress = area->base; virtualAddress < area->base
1985 					+ (area->size - 1); virtualAddress += B_PAGE_SIZE,
1986 					offset += B_PAGE_SIZE) {
1987 				addr_t physicalAddress;
1988 				uint32 flags;
1989 				status = map->ops->query(map, virtualAddress,
1990 					&physicalAddress, &flags);
1991 				if (status < B_OK) {
1992 					panic("looking up mapping failed for va 0x%lx\n",
1993 						virtualAddress);
1994 				}
1995 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1996 				if (page == NULL) {
1997 					panic("looking up page failed for pa 0x%lx\n",
1998 						physicalAddress);
1999 				}
2000 
2001 				increment_page_wired_count(page);
2002 				vm_page_set_state(page, PAGE_STATE_WIRED);
2003 				cache->InsertPage(page, offset);
2004 			}
2005 
2006 			map->ops->unlock(map);
2007 			break;
2008 		}
2009 
2010 		case B_CONTIGUOUS:
2011 		{
2012 			// We have already allocated our continuous pages run, so we can now just
2013 			// map them in the address space
2014 			vm_translation_map *map = &addressSpace->translation_map;
2015 			addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE;
2016 			addr_t virtualAddress = area->base;
2017 			off_t offset = 0;
2018 
2019 			map->ops->lock(map);
2020 
2021 			for (virtualAddress = area->base; virtualAddress < area->base
2022 					+ (area->size - 1); virtualAddress += B_PAGE_SIZE,
2023 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
2024 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
2025 				if (page == NULL)
2026 					panic("couldn't lookup physical page just allocated\n");
2027 
2028 				status = map->ops->map(map, virtualAddress, physicalAddress,
2029 					protection);
2030 				if (status < B_OK)
2031 					panic("couldn't map physical page in page run\n");
2032 
2033 				increment_page_wired_count(page);
2034 				vm_page_set_state(page, PAGE_STATE_WIRED);
2035 				cache->InsertPage(page, offset);
2036 			}
2037 
2038 			map->ops->unlock(map);
2039 			break;
2040 		}
2041 
2042 		default:
2043 			break;
2044 	}
2045 
2046 	cache->Unlock();
2047 
2048 	if (reservedPages > 0)
2049 		vm_page_unreserve_pages(reservedPages);
2050 
2051 	TRACE(("vm_create_anonymous_area: done\n"));
2052 
2053 	area->cache_type = CACHE_TYPE_RAM;
2054 	return area->id;
2055 
2056 err1:
2057 	if (wiring == B_CONTIGUOUS) {
2058 		// we had reserved the area space upfront...
2059 		addr_t pageNumber = page->physical_page_number;
2060 		int32 i;
2061 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
2062 			page = vm_lookup_page(pageNumber);
2063 			if (page == NULL)
2064 				panic("couldn't lookup physical page just allocated\n");
2065 
2066 			vm_page_set_state(page, PAGE_STATE_FREE);
2067 		}
2068 	}
2069 
2070 err0:
2071 	if (reservedPages > 0)
2072 		vm_page_unreserve_pages(reservedPages);
2073 	if (reservedMemory > 0)
2074 		vm_unreserve_memory(reservedMemory);
2075 
2076 	return status;
2077 }
2078 
2079 
2080 area_id
2081 vm_map_physical_memory(team_id team, const char *name, void **_address,
2082 	uint32 addressSpec, addr_t size, uint32 protection, addr_t physicalAddress)
2083 {
2084 	vm_area *area;
2085 	vm_cache *cache;
2086 	addr_t mapOffset;
2087 
2088 	TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, "
2089 		"spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team,
2090 		name, _address, addressSpec, size, protection, physicalAddress));
2091 
2092 	if (!arch_vm_supports_protection(protection))
2093 		return B_NOT_SUPPORTED;
2094 
2095 	AddressSpaceWriteLocker locker(team);
2096 	if (!locker.IsLocked())
2097 		return B_BAD_TEAM_ID;
2098 
2099 	// if the physical address is somewhat inside a page,
2100 	// move the actual area down to align on a page boundary
2101 	mapOffset = physicalAddress % B_PAGE_SIZE;
2102 	size += mapOffset;
2103 	physicalAddress -= mapOffset;
2104 
2105 	size = PAGE_ALIGN(size);
2106 
2107 	// create an device cache
2108 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
2109 	if (status != B_OK)
2110 		return status;
2111 
2112 	// tell the page scanner to skip over this area, it's pages are special
2113 	cache->scan_skip = 1;
2114 	cache->virtual_end = size;
2115 
2116 	cache->Lock();
2117 
2118 	status = map_backing_store(locker.AddressSpace(), cache, _address,
2119 		0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection,
2120 		REGION_NO_PRIVATE_MAP, &area, name, false, true);
2121 
2122 	if (status < B_OK)
2123 		cache->ReleaseRefLocked();
2124 
2125 	cache->Unlock();
2126 
2127 	if (status >= B_OK && (addressSpec & B_MTR_MASK) != 0) {
2128 		// set requested memory type
2129 		status = arch_vm_set_memory_type(area, physicalAddress,
2130 			addressSpec & B_MTR_MASK);
2131 		if (status < B_OK)
2132 			delete_area(locker.AddressSpace(), area);
2133 	}
2134 
2135 	if (status >= B_OK) {
2136 		// make sure our area is mapped in completely
2137 
2138 		vm_translation_map *map = &locker.AddressSpace()->translation_map;
2139 		size_t reservePages = map->ops->map_max_pages_need(map, area->base,
2140 			area->base + (size - 1));
2141 
2142 		vm_page_reserve_pages(reservePages);
2143 		map->ops->lock(map);
2144 
2145 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
2146 			map->ops->map(map, area->base + offset, physicalAddress + offset,
2147 				protection);
2148 		}
2149 
2150 		map->ops->unlock(map);
2151 		vm_page_unreserve_pages(reservePages);
2152 	}
2153 
2154 	if (status < B_OK)
2155 		return status;
2156 
2157 	// modify the pointer returned to be offset back into the new area
2158 	// the same way the physical address in was offset
2159 	*_address = (void *)((addr_t)*_address + mapOffset);
2160 
2161 	area->cache_type = CACHE_TYPE_DEVICE;
2162 	return area->id;
2163 }
2164 
2165 
2166 area_id
2167 vm_create_null_area(team_id team, const char *name, void **address,
2168 	uint32 addressSpec, addr_t size)
2169 {
2170 	vm_area *area;
2171 	vm_cache *cache;
2172 	status_t status;
2173 
2174 	AddressSpaceWriteLocker locker(team);
2175 	if (!locker.IsLocked())
2176 		return B_BAD_TEAM_ID;
2177 
2178 	size = PAGE_ALIGN(size);
2179 
2180 	// create an null cache
2181 	status = VMCacheFactory::CreateNullCache(cache);
2182 	if (status != B_OK)
2183 		return status;
2184 
2185 	// tell the page scanner to skip over this area, no pages will be mapped here
2186 	cache->scan_skip = 1;
2187 	cache->virtual_end = size;
2188 
2189 	cache->Lock();
2190 
2191 	status = map_backing_store(locker.AddressSpace(), cache, address, 0, size,
2192 		addressSpec, 0, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, &area, name,
2193 		false, true);
2194 
2195 	if (status < B_OK) {
2196 		cache->ReleaseRefAndUnlock();
2197 		return status;
2198 	}
2199 
2200 	cache->Unlock();
2201 
2202 	area->cache_type = CACHE_TYPE_NULL;
2203 	return area->id;
2204 }
2205 
2206 
2207 /*!	Creates the vnode cache for the specified \a vnode.
2208 	The vnode has to be marked busy when calling this function.
2209 */
2210 status_t
2211 vm_create_vnode_cache(struct vnode *vnode, struct VMCache **cache)
2212 {
2213 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
2214 }
2215 
2216 
2217 /*!	\a cache must be locked. The area's address space must be read-locked.
2218 */
2219 static void
2220 pre_map_area_pages(vm_area* area, VMCache* cache)
2221 {
2222 	addr_t baseAddress = area->base;
2223 	addr_t cacheOffset = area->cache_offset;
2224 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
2225 	page_num_t endPage = firstPage + area->size / B_PAGE_SIZE;
2226 
2227 	for (VMCachePagesTree::Iterator it
2228 				= cache->pages.GetIterator(firstPage, true, true);
2229 			vm_page *page = it.Next();) {
2230 		if (page->cache_offset >= endPage)
2231 			break;
2232 
2233 		// skip inactive pages
2234 		if (page->state == PAGE_STATE_BUSY || page->usage_count <= 0)
2235 			continue;
2236 
2237 		vm_map_page(area, page,
2238 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
2239 			B_READ_AREA | B_KERNEL_READ_AREA);
2240 	}
2241 }
2242 
2243 
2244 /*!	Will map the file specified by \a fd to an area in memory.
2245 	The file will be mirrored beginning at the specified \a offset. The
2246 	\a offset and \a size arguments have to be page aligned.
2247 */
2248 static area_id
2249 _vm_map_file(team_id team, const char *name, void **_address, uint32 addressSpec,
2250 	size_t size, uint32 protection, uint32 mapping, int fd, off_t offset,
2251 	bool kernel)
2252 {
2253 	// TODO: for binary files, we want to make sure that they get the
2254 	//	copy of a file at a given time, ie. later changes should not
2255 	//	make it into the mapped copy -- this will need quite some changes
2256 	//	to be done in a nice way
2257 	TRACE(("_vm_map_file(fd = %d, offset = %Ld, size = %lu, mapping %ld)\n",
2258 		fd, offset, size, mapping));
2259 
2260 	offset = ROUNDOWN(offset, B_PAGE_SIZE);
2261 	size = PAGE_ALIGN(size);
2262 
2263 	if (mapping == REGION_NO_PRIVATE_MAP)
2264 		protection |= B_SHARED_AREA;
2265 
2266 	if (fd < 0) {
2267 		uint32 flags = addressSpec == B_EXACT_ADDRESS
2268 			? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
2269 		return vm_create_anonymous_area(team, name, _address, addressSpec, size,
2270 			B_NO_LOCK, protection, flags, kernel);
2271 	}
2272 
2273 	// get the open flags of the FD
2274 	file_descriptor *descriptor = get_fd(get_current_io_context(kernel), fd);
2275 	if (descriptor == NULL)
2276 		return EBADF;
2277 	int32 openMode = descriptor->open_mode;
2278 	put_fd(descriptor);
2279 
2280 	// The FD must open for reading at any rate. For shared mapping with write
2281 	// access, additionally the FD must be open for writing.
2282 	if ((openMode & O_ACCMODE) == O_WRONLY
2283 		|| mapping == REGION_NO_PRIVATE_MAP
2284 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2285 			&& (openMode & O_ACCMODE) == O_RDONLY) {
2286 		return EACCES;
2287 	}
2288 
2289 	// get the vnode for the object, this also grabs a ref to it
2290 	struct vnode *vnode = NULL;
2291 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2292 	if (status < B_OK)
2293 		return status;
2294 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
2295 
2296 	// If we're going to pre-map pages, we need to reserve the pages needed by
2297 	// the mapping backend upfront.
2298 	page_num_t reservedPreMapPages = 0;
2299 	if ((protection & B_READ_AREA) != 0) {
2300 		AddressSpaceWriteLocker locker;
2301 		status = locker.SetTo(team);
2302 		if (status != B_OK)
2303 			return status;
2304 
2305 		vm_translation_map *map = &locker.AddressSpace()->translation_map;
2306 		reservedPreMapPages = map->ops->map_max_pages_need(map, 0, size - 1);
2307 
2308 		locker.Unlock();
2309 
2310 		vm_page_reserve_pages(reservedPreMapPages);
2311 	}
2312 
2313 	struct PageUnreserver {
2314 		PageUnreserver(page_num_t count)
2315 			: fCount(count)
2316 		{
2317 		}
2318 
2319 		~PageUnreserver()
2320 		{
2321 			if (fCount > 0)
2322 				vm_page_unreserve_pages(fCount);
2323 		}
2324 
2325 		page_num_t	fCount;
2326 	} pageUnreserver(reservedPreMapPages);
2327 
2328 	AddressSpaceWriteLocker locker(team);
2329 	if (!locker.IsLocked())
2330 		return B_BAD_TEAM_ID;
2331 
2332 	// TODO: this only works for file systems that use the file cache
2333 	vm_cache *cache;
2334 	status = vfs_get_vnode_cache(vnode, &cache, false);
2335 	if (status < B_OK)
2336 		return status;
2337 
2338 	cache->Lock();
2339 
2340 	vm_area *area;
2341 	status = map_backing_store(locker.AddressSpace(), cache, _address,
2342 		offset, size, addressSpec, 0, protection, mapping, &area, name,
2343 		addressSpec == B_EXACT_ADDRESS, kernel);
2344 
2345 	if (status < B_OK || mapping == REGION_PRIVATE_MAP) {
2346 		// map_backing_store() cannot know we no longer need the ref
2347 		cache->ReleaseRefLocked();
2348 	}
2349 
2350 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2351 		pre_map_area_pages(area, cache);
2352 
2353 	cache->Unlock();
2354 
2355 	if (status < B_OK)
2356 		return status;
2357 
2358 	area->cache_type = CACHE_TYPE_VNODE;
2359 	return area->id;
2360 }
2361 
2362 
2363 area_id
2364 vm_map_file(team_id aid, const char *name, void **address, uint32 addressSpec,
2365 	addr_t size, uint32 protection, uint32 mapping, int fd, off_t offset)
2366 {
2367 	if (!arch_vm_supports_protection(protection))
2368 		return B_NOT_SUPPORTED;
2369 
2370 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2371 		mapping, fd, offset, true);
2372 }
2373 
2374 
2375 vm_cache *
2376 vm_area_get_locked_cache(vm_area *area)
2377 {
2378 	mutex_lock(&sAreaCacheLock);
2379 
2380 	while (true) {
2381 		vm_cache* cache = area->cache;
2382 
2383 		if (!cache->SwitchLock(&sAreaCacheLock)) {
2384 			// cache has been deleted
2385 			mutex_lock(&sAreaCacheLock);
2386 			continue;
2387 		}
2388 
2389 		mutex_lock(&sAreaCacheLock);
2390 
2391 		if (cache == area->cache) {
2392 			cache->AcquireRefLocked();
2393 			mutex_unlock(&sAreaCacheLock);
2394 			return cache;
2395 		}
2396 
2397 		// the cache changed in the meantime
2398 		cache->Unlock();
2399 	}
2400 }
2401 
2402 
2403 void
2404 vm_area_put_locked_cache(vm_cache *cache)
2405 {
2406 	cache->ReleaseRefAndUnlock();
2407 }
2408 
2409 
2410 area_id
2411 vm_clone_area(team_id team, const char *name, void **address,
2412 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2413 	bool kernel)
2414 {
2415 	vm_area *newArea = NULL;
2416 	vm_area *sourceArea;
2417 
2418 	// Check whether the source area exists and is cloneable. If so, mark it
2419 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2420 	{
2421 		AddressSpaceWriteLocker locker;
2422 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2423 		if (status != B_OK)
2424 			return status;
2425 
2426 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2427 			return B_NOT_ALLOWED;
2428 
2429 		sourceArea->protection |= B_SHARED_AREA;
2430 		protection |= B_SHARED_AREA;
2431 	}
2432 
2433 	// Now lock both address spaces and actually do the cloning.
2434 
2435 	MultiAddressSpaceLocker locker;
2436 	vm_address_space *sourceAddressSpace;
2437 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2438 	if (status != B_OK)
2439 		return status;
2440 
2441 	vm_address_space *targetAddressSpace;
2442 	status = locker.AddTeam(team, true, &targetAddressSpace);
2443 	if (status != B_OK)
2444 		return status;
2445 
2446 	status = locker.Lock();
2447 	if (status != B_OK)
2448 		return status;
2449 
2450 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2451 	if (sourceArea == NULL)
2452 		return B_BAD_VALUE;
2453 
2454 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2455 		return B_NOT_ALLOWED;
2456 
2457 	vm_cache *cache = vm_area_get_locked_cache(sourceArea);
2458 
2459 	// ToDo: for now, B_USER_CLONEABLE is disabled, until all drivers
2460 	//	have been adapted. Maybe it should be part of the kernel settings,
2461 	//	anyway (so that old drivers can always work).
2462 #if 0
2463 	if (sourceArea->aspace == vm_kernel_address_space() && addressSpace != vm_kernel_address_space()
2464 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
2465 		// kernel areas must not be cloned in userland, unless explicitly
2466 		// declared user-cloneable upon construction
2467 		status = B_NOT_ALLOWED;
2468 	} else
2469 #endif
2470 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
2471 		status = B_NOT_ALLOWED;
2472 	else {
2473 		status = map_backing_store(targetAddressSpace, cache, address,
2474 			sourceArea->cache_offset, sourceArea->size, addressSpec,
2475 			sourceArea->wiring, protection, mapping, &newArea, name, false,
2476 			kernel);
2477 	}
2478 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2479 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2480 		// to create a new cache, and has therefore already acquired a reference
2481 		// to the source cache - but otherwise it has no idea that we need
2482 		// one.
2483 		cache->AcquireRefLocked();
2484 	}
2485 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2486 		// we need to map in everything at this point
2487 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2488 			// we don't have actual pages to map but a physical area
2489 			vm_translation_map *map = &sourceArea->address_space->translation_map;
2490 			map->ops->lock(map);
2491 
2492 			addr_t physicalAddress;
2493 			uint32 oldProtection;
2494 			map->ops->query(map, sourceArea->base, &physicalAddress,
2495 				&oldProtection);
2496 
2497 			map->ops->unlock(map);
2498 
2499 			map = &targetAddressSpace->translation_map;
2500 			size_t reservePages = map->ops->map_max_pages_need(map,
2501 				newArea->base, newArea->base + (newArea->size - 1));
2502 
2503 			vm_page_reserve_pages(reservePages);
2504 			map->ops->lock(map);
2505 
2506 			for (addr_t offset = 0; offset < newArea->size;
2507 					offset += B_PAGE_SIZE) {
2508 				map->ops->map(map, newArea->base + offset,
2509 					physicalAddress + offset, protection);
2510 			}
2511 
2512 			map->ops->unlock(map);
2513 			vm_page_unreserve_pages(reservePages);
2514 		} else {
2515 			vm_translation_map *map = &targetAddressSpace->translation_map;
2516 			size_t reservePages = map->ops->map_max_pages_need(map,
2517 				newArea->base, newArea->base + (newArea->size - 1));
2518 			vm_page_reserve_pages(reservePages);
2519 
2520 			// map in all pages from source
2521 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2522 					vm_page* page  = it.Next();) {
2523 				vm_map_page(newArea, page, newArea->base
2524 					+ ((page->cache_offset << PAGE_SHIFT)
2525 					- newArea->cache_offset), protection);
2526 			}
2527 
2528 			vm_page_unreserve_pages(reservePages);
2529 		}
2530 	}
2531 	if (status == B_OK)
2532 		newArea->cache_type = sourceArea->cache_type;
2533 
2534 	vm_area_put_locked_cache(cache);
2535 
2536 	if (status < B_OK)
2537 		return status;
2538 
2539 	return newArea->id;
2540 }
2541 
2542 
2543 //! The address space must be write locked at this point
2544 static void
2545 remove_area_from_address_space(vm_address_space *addressSpace, vm_area *area)
2546 {
2547 	vm_area *temp, *last = NULL;
2548 
2549 	temp = addressSpace->areas;
2550 	while (temp != NULL) {
2551 		if (area == temp) {
2552 			if (last != NULL) {
2553 				last->address_space_next = temp->address_space_next;
2554 			} else {
2555 				addressSpace->areas = temp->address_space_next;
2556 			}
2557 			addressSpace->change_count++;
2558 			break;
2559 		}
2560 		last = temp;
2561 		temp = temp->address_space_next;
2562 	}
2563 	if (area == addressSpace->area_hint)
2564 		addressSpace->area_hint = NULL;
2565 
2566 	if (temp == NULL)
2567 		panic("vm_area_release_ref: area not found in aspace's area list\n");
2568 }
2569 
2570 
2571 static void
2572 delete_area(vm_address_space *addressSpace, vm_area *area)
2573 {
2574 	rw_lock_write_lock(&sAreaHashLock);
2575 	hash_remove(sAreaHash, area);
2576 	rw_lock_write_unlock(&sAreaHashLock);
2577 
2578 	// At this point the area is removed from the global hash table, but
2579 	// still exists in the area list.
2580 
2581 	// Unmap the virtual address space the area occupied
2582 	vm_unmap_pages(area, area->base, area->size, !area->cache->temporary);
2583 
2584 	if (!area->cache->temporary)
2585 		area->cache->WriteModified();
2586 
2587 	arch_vm_unset_memory_type(area);
2588 	remove_area_from_address_space(addressSpace, area);
2589 	vm_put_address_space(addressSpace);
2590 
2591 	area->cache->RemoveArea(area);
2592 	area->cache->ReleaseRef();
2593 
2594 	free(area->page_protections);
2595 	free(area->name);
2596 	free(area);
2597 }
2598 
2599 
2600 status_t
2601 vm_delete_area(team_id team, area_id id, bool kernel)
2602 {
2603 	TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id));
2604 
2605 	AddressSpaceWriteLocker locker;
2606 	vm_area *area;
2607 	status_t status = locker.SetFromArea(team, id, area);
2608 	if (status < B_OK)
2609 		return status;
2610 
2611 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2612 		return B_NOT_ALLOWED;
2613 
2614 	delete_area(locker.AddressSpace(), area);
2615 	return B_OK;
2616 }
2617 
2618 
2619 /*!	Creates a new cache on top of given cache, moves all areas from
2620 	the old cache to the new one, and changes the protection of all affected
2621 	areas' pages to read-only.
2622 	Preconditions:
2623 	- The given cache must be locked.
2624 	- All of the cache's areas' address spaces must be read locked.
2625 	- All of the cache's areas must have a clear \c no_cache_change flags.
2626 */
2627 static status_t
2628 vm_copy_on_write_area(vm_cache* lowerCache)
2629 {
2630 	vm_cache *upperCache;
2631 
2632 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2633 
2634 	// We need to separate the cache from its areas. The cache goes one level
2635 	// deeper and we create a new cache inbetween.
2636 
2637 	// create an anonymous cache
2638 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2639 		0, true);
2640 	if (status != B_OK)
2641 		return status;
2642 
2643 	upperCache->Lock();
2644 
2645 	upperCache->temporary = 1;
2646 	upperCache->scan_skip = lowerCache->scan_skip;
2647 	upperCache->virtual_base = lowerCache->virtual_base;
2648 	upperCache->virtual_end = lowerCache->virtual_end;
2649 
2650 	// transfer the lower cache areas to the upper cache
2651 	mutex_lock(&sAreaCacheLock);
2652 
2653 	upperCache->areas = lowerCache->areas;
2654 	lowerCache->areas = NULL;
2655 
2656 	for (vm_area *tempArea = upperCache->areas; tempArea != NULL;
2657 			tempArea = tempArea->cache_next) {
2658 		ASSERT(!tempArea->no_cache_change);
2659 
2660 		tempArea->cache = upperCache;
2661 		upperCache->AcquireRefLocked();
2662 		lowerCache->ReleaseRefLocked();
2663 	}
2664 
2665 	mutex_unlock(&sAreaCacheLock);
2666 
2667 	lowerCache->AddConsumer(upperCache);
2668 
2669 	// We now need to remap all pages from all of the cache's areas read-only, so that
2670 	// a copy will be created on next write access
2671 
2672 	for (vm_area *tempArea = upperCache->areas; tempArea != NULL;
2673 			tempArea = tempArea->cache_next) {
2674 		// The area must be readable in the same way it was previously writable
2675 		uint32 protection = B_KERNEL_READ_AREA;
2676 		if (tempArea->protection & B_READ_AREA)
2677 			protection |= B_READ_AREA;
2678 
2679 		vm_translation_map *map = &tempArea->address_space->translation_map;
2680 		map->ops->lock(map);
2681 		map->ops->protect(map, tempArea->base, tempArea->base - 1 + tempArea->size, protection);
2682 		map->ops->unlock(map);
2683 	}
2684 
2685 	vm_area_put_locked_cache(upperCache);
2686 
2687 	return B_OK;
2688 }
2689 
2690 
2691 area_id
2692 vm_copy_area(team_id team, const char *name, void **_address,
2693 	uint32 addressSpec, uint32 protection, area_id sourceID)
2694 {
2695 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2696 
2697 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2698 		// set the same protection for the kernel as for userland
2699 		protection |= B_KERNEL_READ_AREA;
2700 		if (writableCopy)
2701 			protection |= B_KERNEL_WRITE_AREA;
2702 	}
2703 
2704 	// Do the locking: target address space, all address spaces associated with
2705 	// the source cache, and the cache itself.
2706 	MultiAddressSpaceLocker locker;
2707 	vm_address_space *targetAddressSpace;
2708 	vm_cache *cache;
2709 	vm_area* source;
2710 	status_t status = locker.AddTeam(team, true, &targetAddressSpace);
2711 	if (status == B_OK) {
2712 		status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2713 			&cache, true);
2714 	}
2715 	if (status != B_OK)
2716 		return status;
2717 
2718 	AreaCacheLocker cacheLocker(cache);	// already locked
2719 
2720 	if (addressSpec == B_CLONE_ADDRESS) {
2721 		addressSpec = B_EXACT_ADDRESS;
2722 		*_address = (void *)source->base;
2723 	}
2724 
2725 	bool sharedArea = (source->protection & B_SHARED_AREA) != 0;
2726 
2727 	// First, create a cache on top of the source area, respectively use the
2728 	// existing one, if this is a shared area.
2729 
2730 	vm_area *target;
2731 	status = map_backing_store(targetAddressSpace, cache, _address,
2732 		source->cache_offset, source->size, addressSpec, source->wiring,
2733 		protection, sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2734 		&target, name, false, true);
2735 	if (status < B_OK)
2736 		return status;
2737 
2738 	if (sharedArea) {
2739 		// The new area uses the old area's cache, but map_backing_store()
2740 		// hasn't acquired a ref. So we have to do that now.
2741 		cache->AcquireRefLocked();
2742 	}
2743 
2744 	// If the source area is writable, we need to move it one layer up as well
2745 
2746 	if (!sharedArea) {
2747 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2748 			// TODO: do something more useful if this fails!
2749 			if (vm_copy_on_write_area(cache) < B_OK)
2750 				panic("vm_copy_on_write_area() failed!\n");
2751 		}
2752 	}
2753 
2754 	// we return the ID of the newly created area
2755 	return target->id;
2756 }
2757 
2758 
2759 //! You need to hold the cache lock when calling this function
2760 static int32
2761 count_writable_areas(vm_cache *cache, vm_area *ignoreArea)
2762 {
2763 	struct vm_area *area = cache->areas;
2764 	uint32 count = 0;
2765 
2766 	for (; area != NULL; area = area->cache_next) {
2767 		if (area != ignoreArea
2768 			&& (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0)
2769 			count++;
2770 	}
2771 
2772 	return count;
2773 }
2774 
2775 
2776 static status_t
2777 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2778 	bool kernel)
2779 {
2780 	TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = %#lx)\n",
2781 		team, areaID, newProtection));
2782 
2783 	if (!arch_vm_supports_protection(newProtection))
2784 		return B_NOT_SUPPORTED;
2785 
2786 	// lock address spaces and cache
2787 	MultiAddressSpaceLocker locker;
2788 	vm_cache *cache;
2789 	vm_area* area;
2790 	status_t status = locker.AddAreaCacheAndLock(areaID, true, false, area,
2791 		&cache, true);
2792 	AreaCacheLocker cacheLocker(cache);	// already locked
2793 
2794 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2795 		return B_NOT_ALLOWED;
2796 
2797 	if (area->protection == newProtection)
2798 		return B_OK;
2799 
2800 	if (team != vm_kernel_address_space_id()
2801 		&& area->address_space->id != team) {
2802 		// unless you're the kernel, you are only allowed to set
2803 		// the protection of your own areas
2804 		return B_NOT_ALLOWED;
2805 	}
2806 
2807 	bool changePageProtection = true;
2808 
2809 	if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2810 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0) {
2811 		// writable -> !writable
2812 
2813 		if (cache->source != NULL && cache->temporary) {
2814 			if (count_writable_areas(cache, area) == 0) {
2815 				// Since this cache now lives from the pages in its source cache,
2816 				// we can change the cache's commitment to take only those pages
2817 				// into account that really are in this cache.
2818 
2819 				status = cache->Commit(cache->page_count * B_PAGE_SIZE);
2820 
2821 				// ToDo: we may be able to join with our source cache, if count == 0
2822 			}
2823 		}
2824 	} else if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0
2825 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
2826 		// !writable -> writable
2827 
2828 		if (!list_is_empty(&cache->consumers)) {
2829 			// There are consumers -- we have to insert a new cache. Fortunately
2830 			// vm_copy_on_write_area() does everything that's needed.
2831 			changePageProtection = false;
2832 			status = vm_copy_on_write_area(cache);
2833 		} else {
2834 			// No consumers, so we don't need to insert a new one.
2835 			if (cache->source != NULL && cache->temporary) {
2836 				// the cache's commitment must contain all possible pages
2837 				status = cache->Commit(cache->virtual_end
2838 					- cache->virtual_base);
2839 			}
2840 
2841 			if (status == B_OK && cache->source != NULL) {
2842 				// There's a source cache, hence we can't just change all pages'
2843 				// protection or we might allow writing into pages belonging to
2844 				// a lower cache.
2845 				changePageProtection = false;
2846 
2847 				struct vm_translation_map *map
2848 					= &area->address_space->translation_map;
2849 				map->ops->lock(map);
2850 
2851 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2852 						vm_page* page = it.Next();) {
2853 					addr_t address = area->base
2854 						+ (page->cache_offset << PAGE_SHIFT);
2855 					map->ops->protect(map, address, address - 1 + B_PAGE_SIZE,
2856 						newProtection);
2857 				}
2858 
2859 				map->ops->unlock(map);
2860 			}
2861 		}
2862 	} else {
2863 		// we don't have anything special to do in all other cases
2864 	}
2865 
2866 	if (status == B_OK) {
2867 		// remap existing pages in this cache
2868 		struct vm_translation_map *map = &area->address_space->translation_map;
2869 
2870 		if (changePageProtection) {
2871 			map->ops->lock(map);
2872 			map->ops->protect(map, area->base, area->base + area->size,
2873 				newProtection);
2874 			map->ops->unlock(map);
2875 		}
2876 
2877 		area->protection = newProtection;
2878 	}
2879 
2880 	return status;
2881 }
2882 
2883 
2884 status_t
2885 vm_get_page_mapping(team_id team, addr_t vaddr, addr_t *paddr)
2886 {
2887 	vm_address_space *addressSpace = vm_get_address_space(team);
2888 	if (addressSpace == NULL)
2889 		return B_BAD_TEAM_ID;
2890 
2891 	uint32 dummyFlags;
2892 	status_t status = addressSpace->translation_map.ops->query(
2893 		&addressSpace->translation_map, vaddr, paddr, &dummyFlags);
2894 
2895 	vm_put_address_space(addressSpace);
2896 	return status;
2897 }
2898 
2899 
2900 static inline addr_t
2901 virtual_page_address(vm_area *area, vm_page *page)
2902 {
2903 	return area->base
2904 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
2905 }
2906 
2907 
2908 bool
2909 vm_test_map_modification(vm_page *page)
2910 {
2911 	MutexLocker locker(sMappingLock);
2912 
2913 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2914 	vm_page_mapping *mapping;
2915 	while ((mapping = iterator.Next()) != NULL) {
2916 		vm_area *area = mapping->area;
2917 		vm_translation_map *map = &area->address_space->translation_map;
2918 
2919 		addr_t physicalAddress;
2920 		uint32 flags;
2921 		map->ops->lock(map);
2922 		map->ops->query(map, virtual_page_address(area, page),
2923 			&physicalAddress, &flags);
2924 		map->ops->unlock(map);
2925 
2926 		if (flags & PAGE_MODIFIED)
2927 			return true;
2928 	}
2929 
2930 	return false;
2931 }
2932 
2933 
2934 int32
2935 vm_test_map_activation(vm_page *page, bool *_modified)
2936 {
2937 	int32 activation = 0;
2938 	bool modified = false;
2939 
2940 	MutexLocker locker(sMappingLock);
2941 
2942 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2943 	vm_page_mapping *mapping;
2944 	while ((mapping = iterator.Next()) != NULL) {
2945 		vm_area *area = mapping->area;
2946 		vm_translation_map *map = &area->address_space->translation_map;
2947 
2948 		addr_t physicalAddress;
2949 		uint32 flags;
2950 		map->ops->lock(map);
2951 		map->ops->query(map, virtual_page_address(area, page),
2952 			&physicalAddress, &flags);
2953 		map->ops->unlock(map);
2954 
2955 		if (flags & PAGE_ACCESSED)
2956 			activation++;
2957 		if (flags & PAGE_MODIFIED)
2958 			modified = true;
2959 	}
2960 
2961 	if (_modified != NULL)
2962 		*_modified = modified;
2963 
2964 	return activation;
2965 }
2966 
2967 
2968 void
2969 vm_clear_map_flags(vm_page *page, uint32 flags)
2970 {
2971 	MutexLocker locker(sMappingLock);
2972 
2973 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2974 	vm_page_mapping *mapping;
2975 	while ((mapping = iterator.Next()) != NULL) {
2976 		vm_area *area = mapping->area;
2977 		vm_translation_map *map = &area->address_space->translation_map;
2978 
2979 		map->ops->lock(map);
2980 		map->ops->clear_flags(map, virtual_page_address(area, page), flags);
2981 		map->ops->unlock(map);
2982 	}
2983 }
2984 
2985 
2986 /*!	Removes all mappings from a page.
2987 	After you've called this function, the page is unmapped from memory.
2988 	The accumulated page flags of all mappings can be found in \a _flags.
2989 */
2990 void
2991 vm_remove_all_page_mappings(vm_page *page, uint32 *_flags)
2992 {
2993 	uint32 accumulatedFlags = 0;
2994 	MutexLocker locker(sMappingLock);
2995 
2996 	vm_page_mappings queue;
2997 	queue.MoveFrom(&page->mappings);
2998 
2999 	vm_page_mappings::Iterator iterator = queue.GetIterator();
3000 	vm_page_mapping *mapping;
3001 	while ((mapping = iterator.Next()) != NULL) {
3002 		vm_area *area = mapping->area;
3003 		vm_translation_map *map = &area->address_space->translation_map;
3004 		addr_t physicalAddress;
3005 		uint32 flags;
3006 
3007 		map->ops->lock(map);
3008 		addr_t address = virtual_page_address(area, page);
3009 		map->ops->unmap(map, address, address + (B_PAGE_SIZE - 1));
3010 		map->ops->flush(map);
3011 		map->ops->query(map, address, &physicalAddress, &flags);
3012 		map->ops->unlock(map);
3013 
3014 		area->mappings.Remove(mapping);
3015 
3016 		accumulatedFlags |= flags;
3017 	}
3018 
3019 	if (page->wired_count == 0 && !queue.IsEmpty())
3020 		atomic_add(&gMappedPagesCount, -1);
3021 
3022 	locker.Unlock();
3023 
3024 	// free now unused mappings
3025 
3026 	while ((mapping = queue.RemoveHead()) != NULL) {
3027 		free(mapping);
3028 	}
3029 
3030 	if (_flags != NULL)
3031 		*_flags = accumulatedFlags;
3032 }
3033 
3034 
3035 status_t
3036 vm_unmap_pages(vm_area *area, addr_t base, size_t size, bool preserveModified)
3037 {
3038 	vm_translation_map *map = &area->address_space->translation_map;
3039 	addr_t end = base + (size - 1);
3040 
3041 	map->ops->lock(map);
3042 
3043 	if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE) {
3044 		// iterate through all pages and decrease their wired count
3045 		for (addr_t virtualAddress = base; virtualAddress < end;
3046 				virtualAddress += B_PAGE_SIZE) {
3047 			addr_t physicalAddress;
3048 			uint32 flags;
3049 			status_t status = map->ops->query(map, virtualAddress,
3050 				&physicalAddress, &flags);
3051 			if (status < B_OK || (flags & PAGE_PRESENT) == 0)
3052 				continue;
3053 
3054 			vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3055 			if (page == NULL) {
3056 				panic("area %p looking up page failed for pa 0x%lx\n", area,
3057 					physicalAddress);
3058 			}
3059 
3060 			decrement_page_wired_count(page);
3061 		}
3062 	}
3063 
3064 	map->ops->unmap(map, base, end);
3065 	if (preserveModified) {
3066 		map->ops->flush(map);
3067 
3068 		for (addr_t virtualAddress = base; virtualAddress < end;
3069 				virtualAddress += B_PAGE_SIZE) {
3070 			addr_t physicalAddress;
3071 			uint32 flags;
3072 			status_t status = map->ops->query(map, virtualAddress,
3073 				&physicalAddress, &flags);
3074 			if (status < B_OK || (flags & PAGE_PRESENT) == 0)
3075 				continue;
3076 
3077 			vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3078 			if (page == NULL) {
3079 				panic("area %p looking up page failed for pa 0x%lx\n", area,
3080 					physicalAddress);
3081 			}
3082 
3083 			if ((flags & PAGE_MODIFIED) != 0
3084 				&& page->state != PAGE_STATE_MODIFIED)
3085 				vm_page_set_state(page, PAGE_STATE_MODIFIED);
3086 		}
3087 	}
3088 	map->ops->unlock(map);
3089 
3090 	if (area->wiring == B_NO_LOCK) {
3091 		uint32 startOffset = (area->cache_offset + base - area->base)
3092 			>> PAGE_SHIFT;
3093 		uint32 endOffset = startOffset + (size >> PAGE_SHIFT);
3094 		vm_page_mapping *mapping;
3095 		vm_area_mappings queue;
3096 
3097 		mutex_lock(&sMappingLock);
3098 		map->ops->lock(map);
3099 
3100 		vm_area_mappings::Iterator iterator = area->mappings.GetIterator();
3101 		while (iterator.HasNext()) {
3102 			mapping = iterator.Next();
3103 
3104 			vm_page *page = mapping->page;
3105 			if (page->cache_offset < startOffset
3106 				|| page->cache_offset >= endOffset)
3107 				continue;
3108 
3109 			page->mappings.Remove(mapping);
3110 			iterator.Remove();
3111 
3112 			if (page->mappings.IsEmpty() && page->wired_count == 0)
3113 				atomic_add(&gMappedPagesCount, -1);
3114 
3115 			queue.Add(mapping);
3116 		}
3117 
3118 		map->ops->unlock(map);
3119 		mutex_unlock(&sMappingLock);
3120 
3121 		while ((mapping = queue.RemoveHead()) != NULL) {
3122 			free(mapping);
3123 		}
3124 	}
3125 
3126 	return B_OK;
3127 }
3128 
3129 
3130 /*!	When calling this function, you need to have pages reserved! */
3131 status_t
3132 vm_map_page(vm_area *area, vm_page *page, addr_t address, uint32 protection)
3133 {
3134 	vm_translation_map *map = &area->address_space->translation_map;
3135 	vm_page_mapping *mapping = NULL;
3136 
3137 	if (area->wiring == B_NO_LOCK) {
3138 		mapping = (vm_page_mapping *)malloc_nogrow(sizeof(vm_page_mapping));
3139 		if (mapping == NULL)
3140 			return B_NO_MEMORY;
3141 
3142 		mapping->page = page;
3143 		mapping->area = area;
3144 	}
3145 
3146 	map->ops->lock(map);
3147 	map->ops->map(map, address, page->physical_page_number * B_PAGE_SIZE,
3148 		protection);
3149 	map->ops->unlock(map);
3150 
3151 	if (area->wiring != B_NO_LOCK) {
3152 		increment_page_wired_count(page);
3153 	} else {
3154 		// insert mapping into lists
3155 		MutexLocker locker(sMappingLock);
3156 
3157 		if (page->mappings.IsEmpty() && page->wired_count == 0)
3158 			atomic_add(&gMappedPagesCount, 1);
3159 
3160 		page->mappings.Add(mapping);
3161 		area->mappings.Add(mapping);
3162 	}
3163 
3164 	if (page->usage_count < 0)
3165 		page->usage_count = 1;
3166 
3167 	if (page->state != PAGE_STATE_MODIFIED)
3168 		vm_page_set_state(page, PAGE_STATE_ACTIVE);
3169 
3170 	return B_OK;
3171 }
3172 
3173 
3174 static int
3175 display_mem(int argc, char **argv)
3176 {
3177 	bool physical = false;
3178 	addr_t copyAddress;
3179 	int32 displayWidth;
3180 	int32 itemSize;
3181 	int32 num = -1;
3182 	addr_t address;
3183 	int i = 1, j;
3184 
3185 	if (argc > 1 && argv[1][0] == '-') {
3186 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3187 			physical = true;
3188 			i++;
3189 		} else
3190 			i = 99;
3191 	}
3192 
3193 	if (argc < i + 1 || argc > i + 2) {
3194 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3195 			"\tdl - 8 bytes\n"
3196 			"\tdw - 4 bytes\n"
3197 			"\tds - 2 bytes\n"
3198 			"\tdb - 1 byte\n"
3199 			"\tstring - a whole string\n"
3200 			"  -p or --physical only allows memory from a single page to be "
3201 			"displayed.\n");
3202 		return 0;
3203 	}
3204 
3205 	address = parse_expression(argv[i]);
3206 
3207 	if (argc > i + 1)
3208 		num = parse_expression(argv[i + 1]);
3209 
3210 	// build the format string
3211 	if (strcmp(argv[0], "db") == 0) {
3212 		itemSize = 1;
3213 		displayWidth = 16;
3214 	} else if (strcmp(argv[0], "ds") == 0) {
3215 		itemSize = 2;
3216 		displayWidth = 8;
3217 	} else if (strcmp(argv[0], "dw") == 0) {
3218 		itemSize = 4;
3219 		displayWidth = 4;
3220 	} else if (strcmp(argv[0], "dl") == 0) {
3221 		itemSize = 8;
3222 		displayWidth = 2;
3223 	} else if (strcmp(argv[0], "string") == 0) {
3224 		itemSize = 1;
3225 		displayWidth = -1;
3226 	} else {
3227 		kprintf("display_mem called in an invalid way!\n");
3228 		return 0;
3229 	}
3230 
3231 	if (num <= 0)
3232 		num = displayWidth;
3233 
3234 	void* physicalPageHandle = NULL;
3235 
3236 	if (physical) {
3237 		int32 offset = address & (B_PAGE_SIZE - 1);
3238 		if (num * itemSize + offset > B_PAGE_SIZE) {
3239 			num = (B_PAGE_SIZE - offset) / itemSize;
3240 			kprintf("NOTE: number of bytes has been cut to page size\n");
3241 		}
3242 
3243 		address = ROUNDOWN(address, B_PAGE_SIZE);
3244 
3245 		if (vm_get_physical_page_debug(address, &copyAddress,
3246 				&physicalPageHandle) != B_OK) {
3247 			kprintf("getting the hardware page failed.");
3248 			return 0;
3249 		}
3250 
3251 		address += offset;
3252 		copyAddress += offset;
3253 	} else
3254 		copyAddress = address;
3255 
3256 	if (!strcmp(argv[0], "string")) {
3257 		kprintf("%p \"", (char*)copyAddress);
3258 
3259 		// string mode
3260 		for (i = 0; true; i++) {
3261 			char c;
3262 			if (user_memcpy(&c, (char*)copyAddress + i, 1) != B_OK
3263 				|| c == '\0')
3264 				break;
3265 
3266 			if (c == '\n')
3267 				kprintf("\\n");
3268 			else if (c == '\t')
3269 				kprintf("\\t");
3270 			else {
3271 				if (!isprint(c))
3272 					c = '.';
3273 
3274 				kprintf("%c", c);
3275 			}
3276 		}
3277 
3278 		kprintf("\"\n");
3279 	} else {
3280 		// number mode
3281 		for (i = 0; i < num; i++) {
3282 			uint32 value;
3283 
3284 			if ((i % displayWidth) == 0) {
3285 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3286 				if (i != 0)
3287 					kprintf("\n");
3288 
3289 				kprintf("[0x%lx]  ", address + i * itemSize);
3290 
3291 				for (j = 0; j < displayed; j++) {
3292 					char c;
3293 					if (user_memcpy(&c, (char*)copyAddress + i * itemSize + j,
3294 							1) != B_OK) {
3295 						displayed = j;
3296 						break;
3297 					}
3298 					if (!isprint(c))
3299 						c = '.';
3300 
3301 					kprintf("%c", c);
3302 				}
3303 				if (num > displayWidth) {
3304 					// make sure the spacing in the last line is correct
3305 					for (j = displayed; j < displayWidth * itemSize; j++)
3306 						kprintf(" ");
3307 				}
3308 				kprintf("  ");
3309 			}
3310 
3311 			if (user_memcpy(&value, (uint8*)copyAddress + i * itemSize,
3312 					itemSize) != B_OK) {
3313 				kprintf("read fault");
3314 				break;
3315 			}
3316 
3317 			switch (itemSize) {
3318 				case 1:
3319 					kprintf(" %02x", *(uint8 *)&value);
3320 					break;
3321 				case 2:
3322 					kprintf(" %04x", *(uint16 *)&value);
3323 					break;
3324 				case 4:
3325 					kprintf(" %08lx", *(uint32 *)&value);
3326 					break;
3327 				case 8:
3328 					kprintf(" %016Lx", *(uint64 *)&value);
3329 					break;
3330 			}
3331 		}
3332 
3333 		kprintf("\n");
3334 	}
3335 
3336 	if (physical) {
3337 		copyAddress = ROUNDOWN(copyAddress, B_PAGE_SIZE);
3338 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3339 	}
3340 	return 0;
3341 }
3342 
3343 
3344 static void
3345 dump_cache_tree_recursively(vm_cache* cache, int level,
3346 	vm_cache* highlightCache)
3347 {
3348 	// print this cache
3349 	for (int i = 0; i < level; i++)
3350 		kprintf("  ");
3351 	if (cache == highlightCache)
3352 		kprintf("%p <--\n", cache);
3353 	else
3354 		kprintf("%p\n", cache);
3355 
3356 	// recursively print its consumers
3357 	vm_cache* consumer = NULL;
3358 	while ((consumer = (vm_cache *)list_get_next_item(&cache->consumers,
3359 			consumer)) != NULL) {
3360 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3361 	}
3362 }
3363 
3364 
3365 static int
3366 dump_cache_tree(int argc, char **argv)
3367 {
3368 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3369 		kprintf("usage: %s <address>\n", argv[0]);
3370 		return 0;
3371 	}
3372 
3373 	addr_t address = parse_expression(argv[1]);
3374 	if (address == 0)
3375 		return 0;
3376 
3377 	vm_cache *cache = (vm_cache *)address;
3378 	vm_cache *root = cache;
3379 
3380 	// find the root cache (the transitive source)
3381 	while (root->source != NULL)
3382 		root = root->source;
3383 
3384 	dump_cache_tree_recursively(root, 0, cache);
3385 
3386 	return 0;
3387 }
3388 
3389 
3390 static const char *
3391 cache_type_to_string(int32 type)
3392 {
3393 	switch (type) {
3394 		case CACHE_TYPE_RAM:
3395 			return "RAM";
3396 		case CACHE_TYPE_DEVICE:
3397 			return "device";
3398 		case CACHE_TYPE_VNODE:
3399 			return "vnode";
3400 		case CACHE_TYPE_NULL:
3401 			return "null";
3402 
3403 		default:
3404 			return "unknown";
3405 	}
3406 }
3407 
3408 
3409 #if DEBUG_CACHE_LIST
3410 
3411 static void
3412 update_cache_info_recursively(vm_cache* cache, cache_info& info)
3413 {
3414 	info.page_count += cache->page_count;
3415 	if (cache->type == CACHE_TYPE_RAM)
3416 		info.committed += cache->committed_size;
3417 
3418 	// recurse
3419 	vm_cache* consumer = NULL;
3420 	while ((consumer = (vm_cache *)list_get_next_item(&cache->consumers,
3421 			consumer)) != NULL) {
3422 		update_cache_info_recursively(consumer, info);
3423 	}
3424 }
3425 
3426 
3427 static int
3428 cache_info_compare_page_count(const void* _a, const void* _b)
3429 {
3430 	const cache_info* a = (const cache_info*)_a;
3431 	const cache_info* b = (const cache_info*)_b;
3432 	if (a->page_count == b->page_count)
3433 		return 0;
3434 	return a->page_count < b->page_count ? 1 : -1;
3435 }
3436 
3437 
3438 static int
3439 cache_info_compare_committed(const void* _a, const void* _b)
3440 {
3441 	const cache_info* a = (const cache_info*)_a;
3442 	const cache_info* b = (const cache_info*)_b;
3443 	if (a->committed == b->committed)
3444 		return 0;
3445 	return a->committed < b->committed ? 1 : -1;
3446 }
3447 
3448 
3449 static void
3450 dump_caches_recursively(vm_cache* cache, cache_info& info, int level)
3451 {
3452 	for (int i = 0; i < level; i++)
3453 		kprintf("  ");
3454 
3455 	kprintf("%p: type: %s, base: %lld, size: %lld, pages: %lu", cache,
3456 		cache_type_to_string(cache->type), cache->virtual_base,
3457 		cache->virtual_end, cache->page_count);
3458 
3459 	if (level == 0)
3460 		kprintf("/%lu", info.page_count);
3461 
3462 	if (cache->type == CACHE_TYPE_RAM || level == 0 && info.committed > 0) {
3463 		kprintf(", committed: %lld", cache->committed_size);
3464 
3465 		if (level == 0)
3466 			kprintf("/%lu", info.committed);
3467 	}
3468 
3469 	// areas
3470 	if (cache->areas != NULL) {
3471 		vm_area* area = cache->areas;
3472 		kprintf(", areas: %ld (%s, team: %ld)", area->id, area->name,
3473 			area->address_space->id);
3474 
3475 		while (area->cache_next != NULL) {
3476 			area = area->cache_next;
3477 			kprintf(", %ld", area->id);
3478 		}
3479 	}
3480 
3481 	kputs("\n");
3482 
3483 	// recurse
3484 	vm_cache* consumer = NULL;
3485 	while ((consumer = (vm_cache *)list_get_next_item(&cache->consumers,
3486 			consumer)) != NULL) {
3487 		dump_caches_recursively(consumer, info, level + 1);
3488 	}
3489 }
3490 
3491 
3492 static int
3493 dump_caches(int argc, char **argv)
3494 {
3495 	if (sCacheInfoTable == NULL) {
3496 		kprintf("No cache info table!\n");
3497 		return 0;
3498 	}
3499 
3500 	bool sortByPageCount = true;
3501 
3502 	for (int32 i = 1; i < argc; i++) {
3503 		if (strcmp(argv[i], "-c") == 0) {
3504 			sortByPageCount = false;
3505 		} else {
3506 			print_debugger_command_usage(argv[0]);
3507 			return 0;
3508 		}
3509 	}
3510 
3511 	uint32 totalCount = 0;
3512 	uint32 rootCount = 0;
3513 	off_t totalCommitted = 0;
3514 	page_num_t totalPages = 0;
3515 
3516 	vm_cache* cache = gDebugCacheList;
3517 	while (cache) {
3518 		totalCount++;
3519 		if (cache->source == NULL) {
3520 			cache_info stackInfo;
3521 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3522 				? sCacheInfoTable[rootCount] : stackInfo;
3523 			rootCount++;
3524 			info.cache = cache;
3525 			info.page_count = 0;
3526 			info.committed = 0;
3527 			update_cache_info_recursively(cache, info);
3528 			totalCommitted += info.committed;
3529 			totalPages += info.page_count;
3530 		}
3531 
3532 		cache = cache->debug_next;
3533 	}
3534 
3535 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3536 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3537 			sortByPageCount
3538 				? &cache_info_compare_page_count
3539 				: &cache_info_compare_committed);
3540 	}
3541 
3542 	kprintf("total committed memory: %lld, total used pages: %lu\n",
3543 		totalCommitted, totalPages);
3544 	kprintf("%lu caches (%lu root caches), sorted by %s per cache "
3545 		"tree...\n\n", totalCount, rootCount,
3546 		sortByPageCount ? "page count" : "committed size");
3547 
3548 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3549 		for (uint32 i = 0; i < rootCount; i++) {
3550 			cache_info& info = sCacheInfoTable[i];
3551 			dump_caches_recursively(info.cache, info, 0);
3552 		}
3553 	} else
3554 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3555 
3556 	return 0;
3557 }
3558 
3559 #endif	// DEBUG_CACHE_LIST
3560 
3561 
3562 static int
3563 dump_cache(int argc, char **argv)
3564 {
3565 	vm_cache *cache;
3566 	bool showPages = false;
3567 	int i = 1;
3568 
3569 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3570 		kprintf("usage: %s [-ps] <address>\n"
3571 			"  if -p is specified, all pages are shown, if -s is used\n"
3572 			"  only the cache info is shown respectively.\n", argv[0]);
3573 		return 0;
3574 	}
3575 	while (argv[i][0] == '-') {
3576 		char *arg = argv[i] + 1;
3577 		while (arg[0]) {
3578 			if (arg[0] == 'p')
3579 				showPages = true;
3580 			arg++;
3581 		}
3582 		i++;
3583 	}
3584 	if (argv[i] == NULL) {
3585 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3586 		return 0;
3587 	}
3588 
3589 	addr_t address = parse_expression(argv[i]);
3590 	if (address == 0)
3591 		return 0;
3592 
3593 	cache = (vm_cache *)address;
3594 
3595 	kprintf("CACHE %p:\n", cache);
3596 	kprintf("  ref_count:    %ld\n", cache->RefCount());
3597 	kprintf("  source:       %p\n", cache->source);
3598 	kprintf("  type:         %s\n", cache_type_to_string(cache->type));
3599 	kprintf("  virtual_base: 0x%Lx\n", cache->virtual_base);
3600 	kprintf("  virtual_end:  0x%Lx\n", cache->virtual_end);
3601 	kprintf("  temporary:    %ld\n", cache->temporary);
3602 	kprintf("  scan_skip:    %ld\n", cache->scan_skip);
3603 	kprintf("  lock:         %p\n", cache->GetLock());
3604 #if KDEBUG
3605 	kprintf("  lock.holder:  %ld\n", cache->GetLock()->holder);
3606 #endif
3607 	kprintf("  areas:\n");
3608 
3609 	for (vm_area *area = cache->areas; area != NULL; area = area->cache_next) {
3610 		kprintf("    area 0x%lx, %s\n", area->id, area->name);
3611 		kprintf("\tbase_addr:  0x%lx, size: 0x%lx\n", area->base, area->size);
3612 		kprintf("\tprotection: 0x%lx\n", area->protection);
3613 		kprintf("\towner:      0x%lx\n", area->address_space->id);
3614 	}
3615 
3616 	kprintf("  consumers:\n");
3617 	vm_cache *consumer = NULL;
3618 	while ((consumer = (vm_cache *)list_get_next_item(&cache->consumers, consumer)) != NULL) {
3619 		kprintf("\t%p\n", consumer);
3620 	}
3621 
3622 	kprintf("  pages:\n");
3623 	if (showPages) {
3624 		for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
3625 				vm_page *page = it.Next();) {
3626 			if (page->type == PAGE_TYPE_PHYSICAL) {
3627 				kprintf("\t%p ppn 0x%lx offset 0x%lx type %u state %u (%s) "
3628 					"wired_count %u\n", page, page->physical_page_number,
3629 					page->cache_offset, page->type, page->state,
3630 					page_state_to_string(page->state), page->wired_count);
3631 			} else if(page->type == PAGE_TYPE_DUMMY) {
3632 				kprintf("\t%p DUMMY PAGE state %u (%s)\n",
3633 					page, page->state, page_state_to_string(page->state));
3634 			} else
3635 				kprintf("\t%p UNKNOWN PAGE type %u\n", page, page->type);
3636 		}
3637 	} else
3638 		kprintf("\t%ld in cache\n", cache->page_count);
3639 
3640 	return 0;
3641 }
3642 
3643 
3644 static void
3645 dump_area_struct(vm_area *area, bool mappings)
3646 {
3647 	kprintf("AREA: %p\n", area);
3648 	kprintf("name:\t\t'%s'\n", area->name);
3649 	kprintf("owner:\t\t0x%lx\n", area->address_space->id);
3650 	kprintf("id:\t\t0x%lx\n", area->id);
3651 	kprintf("base:\t\t0x%lx\n", area->base);
3652 	kprintf("size:\t\t0x%lx\n", area->size);
3653 	kprintf("protection:\t0x%lx\n", area->protection);
3654 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3655 	kprintf("memory_type:\t0x%x\n", area->memory_type);
3656 	kprintf("cache:\t\t%p\n", area->cache);
3657 	kprintf("cache_type:\t%s\n", cache_type_to_string(area->cache_type));
3658 	kprintf("cache_offset:\t0x%Lx\n", area->cache_offset);
3659 	kprintf("cache_next:\t%p\n", area->cache_next);
3660 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3661 
3662 	vm_area_mappings::Iterator iterator = area->mappings.GetIterator();
3663 	if (mappings) {
3664 		kprintf("page mappings:\n");
3665 		while (iterator.HasNext()) {
3666 			vm_page_mapping *mapping = iterator.Next();
3667 			kprintf("  %p", mapping->page);
3668 		}
3669 		kprintf("\n");
3670 	} else {
3671 		uint32 count = 0;
3672 		while (iterator.Next() != NULL) {
3673 			count++;
3674 		}
3675 		kprintf("page mappings:\t%lu\n", count);
3676 	}
3677 }
3678 
3679 
3680 static int
3681 dump_area(int argc, char **argv)
3682 {
3683 	bool mappings = false;
3684 	bool found = false;
3685 	int32 index = 1;
3686 	vm_area *area;
3687 	addr_t num;
3688 
3689 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3690 		kprintf("usage: area [-m] <id|address|name>\n");
3691 		return 0;
3692 	}
3693 
3694 	if (!strcmp(argv[1], "-m")) {
3695 		mappings = true;
3696 		index++;
3697 	}
3698 
3699 	num = parse_expression(argv[index]);
3700 
3701 	// walk through the area list, looking for the arguments as a name
3702 	struct hash_iterator iter;
3703 
3704 	hash_open(sAreaHash, &iter);
3705 	while ((area = (vm_area *)hash_next(sAreaHash, &iter)) != NULL) {
3706 		if ((area->name != NULL && !strcmp(argv[index], area->name))
3707 			|| num != 0
3708 				&& ((addr_t)area->id == num
3709 					|| area->base <= num && area->base + area->size > num)) {
3710 			dump_area_struct(area, mappings);
3711 			found = true;
3712 		}
3713 	}
3714 
3715 	if (!found)
3716 		kprintf("could not find area %s (%ld)\n", argv[index], num);
3717 	return 0;
3718 }
3719 
3720 
3721 static int
3722 dump_area_list(int argc, char **argv)
3723 {
3724 	vm_area *area;
3725 	struct hash_iterator iter;
3726 	const char *name = NULL;
3727 	int32 id = 0;
3728 
3729 	if (argc > 1) {
3730 		id = parse_expression(argv[1]);
3731 		if (id == 0)
3732 			name = argv[1];
3733 	}
3734 
3735 	kprintf("addr          id  base\t\tsize    protect lock  name\n");
3736 
3737 	hash_open(sAreaHash, &iter);
3738 	while ((area = (vm_area *)hash_next(sAreaHash, &iter)) != NULL) {
3739 		if (id != 0 && area->address_space->id != id
3740 			|| name != NULL && strstr(area->name, name) == NULL)
3741 			continue;
3742 
3743 		kprintf("%p %5lx  %p\t%p %4lx\t%4d  %s\n", area, area->id, (void *)area->base,
3744 			(void *)area->size, area->protection, area->wiring, area->name);
3745 	}
3746 	hash_close(sAreaHash, &iter, false);
3747 	return 0;
3748 }
3749 
3750 
3751 static int
3752 dump_available_memory(int argc, char **argv)
3753 {
3754 	kprintf("Available memory: %Ld/%lu bytes\n",
3755 		sAvailableMemory, vm_page_num_pages() * B_PAGE_SIZE);
3756 	return 0;
3757 }
3758 
3759 
3760 status_t
3761 vm_delete_areas(struct vm_address_space *addressSpace)
3762 {
3763 	vm_area *area;
3764 	vm_area *next, *last = NULL;
3765 
3766 	TRACE(("vm_delete_areas: called on address space 0x%lx\n",
3767 		addressSpace->id));
3768 
3769 	rw_lock_write_lock(&addressSpace->lock);
3770 
3771 	// remove all reserved areas in this address space
3772 
3773 	for (area = addressSpace->areas; area; area = next) {
3774 		next = area->address_space_next;
3775 
3776 		if (area->id == RESERVED_AREA_ID) {
3777 			// just remove it
3778 			if (last)
3779 				last->address_space_next = area->address_space_next;
3780 			else
3781 				addressSpace->areas = area->address_space_next;
3782 
3783 			vm_put_address_space(addressSpace);
3784 			free(area);
3785 			continue;
3786 		}
3787 
3788 		last = area;
3789 	}
3790 
3791 	// delete all the areas in this address space
3792 
3793 	for (area = addressSpace->areas; area; area = next) {
3794 		next = area->address_space_next;
3795 		delete_area(addressSpace, area);
3796 	}
3797 
3798 	rw_lock_write_unlock(&addressSpace->lock);
3799 	return B_OK;
3800 }
3801 
3802 
3803 static area_id
3804 vm_area_for(team_id team, addr_t address)
3805 {
3806 	AddressSpaceReadLocker locker(team);
3807 	if (!locker.IsLocked())
3808 		return B_BAD_TEAM_ID;
3809 
3810 	vm_area *area = vm_area_lookup(locker.AddressSpace(), address);
3811 	if (area != NULL)
3812 		return area->id;
3813 
3814 	return B_ERROR;
3815 }
3816 
3817 
3818 /*!
3819 	Frees physical pages that were used during the boot process.
3820 */
3821 static void
3822 unmap_and_free_physical_pages(vm_translation_map *map, addr_t start, addr_t end)
3823 {
3824 	// free all physical pages in the specified range
3825 
3826 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3827 		addr_t physicalAddress;
3828 		uint32 flags;
3829 
3830 		if (map->ops->query(map, current, &physicalAddress, &flags) == B_OK) {
3831 			vm_page *page = vm_lookup_page(current / B_PAGE_SIZE);
3832 			if (page != NULL)
3833 				vm_page_set_state(page, PAGE_STATE_FREE);
3834 		}
3835 	}
3836 
3837 	// unmap the memory
3838 	map->ops->unmap(map, start, end - 1);
3839 }
3840 
3841 
3842 void
3843 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3844 {
3845 	vm_translation_map *map = &vm_kernel_address_space()->translation_map;
3846 	addr_t end = start + size;
3847 	addr_t lastEnd = start;
3848 	vm_area *area;
3849 
3850 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", (void *)start, (void *)end));
3851 
3852 	// The areas are sorted in virtual address space order, so
3853 	// we just have to find the holes between them that fall
3854 	// into the area we should dispose
3855 
3856 	map->ops->lock(map);
3857 
3858 	for (area = vm_kernel_address_space()->areas; area; area = area->address_space_next) {
3859 		addr_t areaStart = area->base;
3860 		addr_t areaEnd = areaStart + area->size;
3861 
3862 		if (area->id == RESERVED_AREA_ID)
3863 			continue;
3864 
3865 		if (areaEnd >= end) {
3866 			// we are done, the areas are already beyond of what we have to free
3867 			lastEnd = end;
3868 			break;
3869 		}
3870 
3871 		if (areaStart > lastEnd) {
3872 			// this is something we can free
3873 			TRACE(("free boot range: get rid of %p - %p\n", (void *)lastEnd, (void *)areaStart));
3874 			unmap_and_free_physical_pages(map, lastEnd, areaStart);
3875 		}
3876 
3877 		lastEnd = areaEnd;
3878 	}
3879 
3880 	if (lastEnd < end) {
3881 		// we can also get rid of some space at the end of the area
3882 		TRACE(("free boot range: also remove %p - %p\n", (void *)lastEnd, (void *)end));
3883 		unmap_and_free_physical_pages(map, lastEnd, end);
3884 	}
3885 
3886 	map->ops->unlock(map);
3887 }
3888 
3889 
3890 static void
3891 create_preloaded_image_areas(struct preloaded_image *image)
3892 {
3893 	char name[B_OS_NAME_LENGTH];
3894 	void *address;
3895 	int32 length;
3896 
3897 	// use file name to create a good area name
3898 	char *fileName = strrchr(image->name, '/');
3899 	if (fileName == NULL)
3900 		fileName = image->name;
3901 	else
3902 		fileName++;
3903 
3904 	length = strlen(fileName);
3905 	// make sure there is enough space for the suffix
3906 	if (length > 25)
3907 		length = 25;
3908 
3909 	memcpy(name, fileName, length);
3910 	strcpy(name + length, "_text");
3911 	address = (void *)ROUNDOWN(image->text_region.start, B_PAGE_SIZE);
3912 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3913 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3914 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3915 		// this will later be remapped read-only/executable by the
3916 		// ELF initialization code
3917 
3918 	strcpy(name + length, "_data");
3919 	address = (void *)ROUNDOWN(image->data_region.start, B_PAGE_SIZE);
3920 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3921 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3922 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3923 }
3924 
3925 
3926 /**	Frees all previously kernel arguments areas from the kernel_args structure.
3927  *	Any boot loader resources contained in that arguments must not be accessed
3928  *	anymore past this point.
3929  */
3930 
3931 void
3932 vm_free_kernel_args(kernel_args *args)
3933 {
3934 	uint32 i;
3935 
3936 	TRACE(("vm_free_kernel_args()\n"));
3937 
3938 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3939 		area_id area = area_for((void *)args->kernel_args_range[i].start);
3940 		if (area >= B_OK)
3941 			delete_area(area);
3942 	}
3943 }
3944 
3945 
3946 static void
3947 allocate_kernel_args(kernel_args *args)
3948 {
3949 	uint32 i;
3950 
3951 	TRACE(("allocate_kernel_args()\n"));
3952 
3953 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3954 		void *address = (void *)args->kernel_args_range[i].start;
3955 
3956 		create_area("_kernel args_", &address, B_EXACT_ADDRESS, args->kernel_args_range[i].size,
3957 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3958 	}
3959 }
3960 
3961 
3962 static void
3963 unreserve_boot_loader_ranges(kernel_args *args)
3964 {
3965 	uint32 i;
3966 
3967 	TRACE(("unreserve_boot_loader_ranges()\n"));
3968 
3969 	for (i = 0; i < args->num_virtual_allocated_ranges; i++) {
3970 		vm_unreserve_address_range(vm_kernel_address_space_id(),
3971 			(void *)args->virtual_allocated_range[i].start,
3972 			args->virtual_allocated_range[i].size);
3973 	}
3974 }
3975 
3976 
3977 static void
3978 reserve_boot_loader_ranges(kernel_args *args)
3979 {
3980 	uint32 i;
3981 
3982 	TRACE(("reserve_boot_loader_ranges()\n"));
3983 
3984 	for (i = 0; i < args->num_virtual_allocated_ranges; i++) {
3985 		void *address = (void *)args->virtual_allocated_range[i].start;
3986 
3987 		// If the address is no kernel address, we just skip it. The
3988 		// architecture specific code has to deal with it.
3989 		if (!IS_KERNEL_ADDRESS(address)) {
3990 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n",
3991 				address, args->virtual_allocated_range[i].size);
3992 			continue;
3993 		}
3994 
3995 		status_t status = vm_reserve_address_range(vm_kernel_address_space_id(), &address,
3996 			B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3997 		if (status < B_OK)
3998 			panic("could not reserve boot loader ranges\n");
3999 	}
4000 }
4001 
4002 
4003 static addr_t
4004 allocate_early_virtual(kernel_args *args, size_t size)
4005 {
4006 	addr_t spot = 0;
4007 	uint32 i;
4008 	int last_valloc_entry = 0;
4009 
4010 	size = PAGE_ALIGN(size);
4011 	// find a slot in the virtual allocation addr range
4012 	for (i = 1; i < args->num_virtual_allocated_ranges; i++) {
4013 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
4014 			+ args->virtual_allocated_range[i - 1].size;
4015 		last_valloc_entry = i;
4016 		// check to see if the space between this one and the last is big enough
4017 		if (previousRangeEnd >= KERNEL_BASE
4018 			&& args->virtual_allocated_range[i].start
4019 				- previousRangeEnd >= size) {
4020 			spot = previousRangeEnd;
4021 			args->virtual_allocated_range[i - 1].size += size;
4022 			goto out;
4023 		}
4024 	}
4025 	if (spot == 0) {
4026 		// we hadn't found one between allocation ranges. this is ok.
4027 		// see if there's a gap after the last one
4028 		addr_t lastRangeEnd
4029 			= args->virtual_allocated_range[last_valloc_entry].start
4030 				+ args->virtual_allocated_range[last_valloc_entry].size;
4031 		if (KERNEL_BASE + (KERNEL_SIZE - 1) - lastRangeEnd >= size) {
4032 			spot = lastRangeEnd;
4033 			args->virtual_allocated_range[last_valloc_entry].size += size;
4034 			goto out;
4035 		}
4036 		// see if there's a gap before the first one
4037 		if (args->virtual_allocated_range[0].start > KERNEL_BASE) {
4038 			if (args->virtual_allocated_range[0].start - KERNEL_BASE >= size) {
4039 				args->virtual_allocated_range[0].start -= size;
4040 				spot = args->virtual_allocated_range[0].start;
4041 				goto out;
4042 			}
4043 		}
4044 	}
4045 
4046 out:
4047 	return spot;
4048 }
4049 
4050 
4051 static bool
4052 is_page_in_physical_memory_range(kernel_args *args, addr_t address)
4053 {
4054 	// TODO: horrible brute-force method of determining if the page can be allocated
4055 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4056 		if (address >= args->physical_memory_range[i].start
4057 			&& address < args->physical_memory_range[i].start
4058 				+ args->physical_memory_range[i].size)
4059 			return true;
4060 	}
4061 	return false;
4062 }
4063 
4064 
4065 static addr_t
4066 allocate_early_physical_page(kernel_args *args)
4067 {
4068 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4069 		addr_t nextPage;
4070 
4071 		nextPage = args->physical_allocated_range[i].start
4072 			+ args->physical_allocated_range[i].size;
4073 		// see if the page after the next allocated paddr run can be allocated
4074 		if (i + 1 < args->num_physical_allocated_ranges
4075 			&& args->physical_allocated_range[i + 1].size != 0) {
4076 			// see if the next page will collide with the next allocated range
4077 			if (nextPage >= args->physical_allocated_range[i+1].start)
4078 				continue;
4079 		}
4080 		// see if the next physical page fits in the memory block
4081 		if (is_page_in_physical_memory_range(args, nextPage)) {
4082 			// we got one!
4083 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4084 			return nextPage / B_PAGE_SIZE;
4085 		}
4086 	}
4087 
4088 	return 0;
4089 		// could not allocate a block
4090 }
4091 
4092 
4093 /*!
4094 	This one uses the kernel_args' physical and virtual memory ranges to
4095 	allocate some pages before the VM is completely up.
4096 */
4097 addr_t
4098 vm_allocate_early(kernel_args *args, size_t virtualSize, size_t physicalSize,
4099 	uint32 attributes)
4100 {
4101 	if (physicalSize > virtualSize)
4102 		physicalSize = virtualSize;
4103 
4104 	// find the vaddr to allocate at
4105 	addr_t virtualBase = allocate_early_virtual(args, virtualSize);
4106 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualAddress);
4107 
4108 	// map the pages
4109 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4110 		addr_t physicalAddress = allocate_early_physical_page(args);
4111 		if (physicalAddress == 0)
4112 			panic("error allocating early page!\n");
4113 
4114 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4115 
4116 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
4117 			physicalAddress * B_PAGE_SIZE, attributes,
4118 			&allocate_early_physical_page);
4119 	}
4120 
4121 	return virtualBase;
4122 }
4123 
4124 
4125 status_t
4126 vm_init(kernel_args *args)
4127 {
4128 	struct preloaded_image *image;
4129 	void *address;
4130 	status_t err = 0;
4131 	uint32 i;
4132 
4133 	TRACE(("vm_init: entry\n"));
4134 	err = arch_vm_translation_map_init(args);
4135 	err = arch_vm_init(args);
4136 
4137 	// initialize some globals
4138 	sNextAreaID = 1;
4139 
4140 	vm_page_init_num_pages(args);
4141 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4142 
4143 	size_t heapSize = INITIAL_HEAP_SIZE;
4144 	// try to accomodate low memory systems
4145 	while (heapSize > sAvailableMemory / 8)
4146 		heapSize /= 2;
4147 	if (heapSize < 1024 * 1024)
4148 		panic("vm_init: go buy some RAM please.");
4149 
4150 	// map in the new heap and initialize it
4151 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4152 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4153 	TRACE(("heap at 0x%lx\n", heapBase));
4154 	heap_init(heapBase, heapSize);
4155 
4156 	size_t slabInitialSize = args->num_cpus * 2 * B_PAGE_SIZE;
4157 	addr_t slabInitialBase = vm_allocate_early(args, slabInitialSize,
4158 		slabInitialSize, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4159 	slab_init(args, slabInitialBase, slabInitialSize);
4160 
4161 	// initialize the free page list and physical page mapper
4162 	vm_page_init(args);
4163 
4164 	// initialize the hash table that stores the pages mapped to caches
4165 	vm_cache_init(args);
4166 
4167 	{
4168 		vm_area *area;
4169 		sAreaHash = hash_init(AREA_HASH_TABLE_SIZE,
4170 			(addr_t)&area->hash_next - (addr_t)area,
4171 			&area_compare, &area_hash);
4172 		if (sAreaHash == NULL)
4173 			panic("vm_init: error creating aspace hash table\n");
4174 	}
4175 
4176 	vm_address_space_init();
4177 	reserve_boot_loader_ranges(args);
4178 
4179 	// do any further initialization that the architecture dependant layers may need now
4180 	arch_vm_translation_map_init_post_area(args);
4181 	arch_vm_init_post_area(args);
4182 	vm_page_init_post_area(args);
4183 
4184 	// allocate areas to represent stuff that already exists
4185 
4186 	address = (void *)ROUNDOWN(heapBase, B_PAGE_SIZE);
4187 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4188 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4189 
4190 	address = (void *)ROUNDOWN(slabInitialBase, B_PAGE_SIZE);
4191 	create_area("initial slab space", &address, B_EXACT_ADDRESS,
4192 		slabInitialSize, B_ALREADY_WIRED, B_KERNEL_READ_AREA
4193 		| B_KERNEL_WRITE_AREA);
4194 
4195 	allocate_kernel_args(args);
4196 
4197 	create_preloaded_image_areas(&args->kernel_image);
4198 
4199 	// allocate areas for preloaded images
4200 	for (image = args->preloaded_images; image != NULL; image = image->next) {
4201 		create_preloaded_image_areas(image);
4202 	}
4203 
4204 	// allocate kernel stacks
4205 	for (i = 0; i < args->num_cpus; i++) {
4206 		char name[64];
4207 
4208 		sprintf(name, "idle thread %lu kstack", i + 1);
4209 		address = (void *)args->cpu_kstack[i].start;
4210 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4211 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4212 	}
4213 
4214 #if DEBUG_CACHE_LIST
4215 	create_area("cache info table", (void**)&sCacheInfoTable,
4216 		B_ANY_KERNEL_ADDRESS,
4217 		ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4218 		B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4219 #endif	// DEBUG_CACHE_LIST
4220 
4221 	// add some debugger commands
4222 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4223 	add_debugger_command("area", &dump_area, "Dump info about a particular area");
4224 	add_debugger_command("cache", &dump_cache, "Dump vm_cache");
4225 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump vm_cache tree");
4226 #if DEBUG_CACHE_LIST
4227 	add_debugger_command_etc("caches", &dump_caches,
4228 		"List all vm_cache trees",
4229 		"[ \"-c\" ]\n"
4230 		"All cache trees are listed sorted in decreasing order by number of\n"
4231 		"used pages or, if \"-c\" is specified, by size of committed memory.\n",
4232 		0);
4233 #endif
4234 	add_debugger_command("avail", &dump_available_memory, "Dump available memory");
4235 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4236 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4237 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4238 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4239 	add_debugger_command("string", &display_mem, "dump strings");
4240 
4241 	TRACE(("vm_init: exit\n"));
4242 
4243 	return err;
4244 }
4245 
4246 
4247 status_t
4248 vm_init_post_sem(kernel_args *args)
4249 {
4250 	// This frees all unused boot loader resources and makes its space available again
4251 	arch_vm_init_end(args);
4252 	unreserve_boot_loader_ranges(args);
4253 
4254 	// fill in all of the semaphores that were not allocated before
4255 	// since we're still single threaded and only the kernel address space exists,
4256 	// it isn't that hard to find all of the ones we need to create
4257 
4258 	arch_vm_translation_map_init_post_sem(args);
4259 	vm_address_space_init_post_sem();
4260 
4261 	slab_init_post_sem();
4262 	return heap_init_post_sem();
4263 }
4264 
4265 
4266 status_t
4267 vm_init_post_thread(kernel_args *args)
4268 {
4269 	vm_page_init_post_thread(args);
4270 	vm_daemon_init();
4271 	slab_init_post_thread();
4272 	return heap_init_post_thread();
4273 }
4274 
4275 
4276 status_t
4277 vm_init_post_modules(kernel_args *args)
4278 {
4279 	return arch_vm_init_post_modules(args);
4280 }
4281 
4282 
4283 void
4284 permit_page_faults(void)
4285 {
4286 	struct thread *thread = thread_get_current_thread();
4287 	if (thread != NULL)
4288 		atomic_add(&thread->page_faults_allowed, 1);
4289 }
4290 
4291 
4292 void
4293 forbid_page_faults(void)
4294 {
4295 	struct thread *thread = thread_get_current_thread();
4296 	if (thread != NULL)
4297 		atomic_add(&thread->page_faults_allowed, -1);
4298 }
4299 
4300 
4301 status_t
4302 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser,
4303 	addr_t *newIP)
4304 {
4305 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4306 		faultAddress));
4307 
4308 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4309 
4310 	addr_t pageAddress = ROUNDOWN(address, B_PAGE_SIZE);
4311 	vm_address_space *addressSpace = NULL;
4312 
4313 	status_t status = B_OK;
4314 	*newIP = 0;
4315 	atomic_add((int32*)&sPageFaults, 1);
4316 
4317 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4318 		addressSpace = vm_get_kernel_address_space();
4319 	} else if (IS_USER_ADDRESS(pageAddress)) {
4320 		addressSpace = vm_get_current_user_address_space();
4321 		if (addressSpace == NULL) {
4322 			if (!isUser) {
4323 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4324 					"memory!\n");
4325 				status = B_BAD_ADDRESS;
4326 				TPF(PageFaultError(-1,
4327 					VMPageFaultTracing
4328 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4329 			} else {
4330 				// XXX weird state.
4331 				panic("vm_page_fault: non kernel thread accessing user memory "
4332 					"that doesn't exist!\n");
4333 				status = B_BAD_ADDRESS;
4334 			}
4335 		}
4336 	} else {
4337 		// the hit was probably in the 64k DMZ between kernel and user space
4338 		// this keeps a user space thread from passing a buffer that crosses
4339 		// into kernel space
4340 		status = B_BAD_ADDRESS;
4341 		TPF(PageFaultError(-1,
4342 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4343 	}
4344 
4345 	if (status == B_OK)
4346 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser);
4347 
4348 	if (status < B_OK) {
4349 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at 0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n",
4350 			strerror(status), address, faultAddress, isWrite, isUser,
4351 			thread_get_current_thread_id());
4352 		if (!isUser) {
4353 			struct thread *thread = thread_get_current_thread();
4354 			if (thread != NULL && thread->fault_handler != 0) {
4355 				// this will cause the arch dependant page fault handler to
4356 				// modify the IP on the interrupt frame or whatever to return
4357 				// to this address
4358 				*newIP = thread->fault_handler;
4359 			} else {
4360 				// unhandled page fault in the kernel
4361 				panic("vm_page_fault: unhandled page fault in kernel space at 0x%lx, ip 0x%lx\n",
4362 					address, faultAddress);
4363 			}
4364 		} else {
4365 #if 1
4366 			rw_lock_read_lock(&addressSpace->lock);
4367 
4368 			// TODO: remove me once we have proper userland debugging support
4369 			// (and tools)
4370 			vm_area *area = vm_area_lookup(addressSpace, faultAddress);
4371 
4372 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
4373 // fault and someone is already waiting for a write lock on the same address
4374 // space. This thread will then try to acquire the semaphore again and will
4375 // be queued after the writer.
4376 			struct thread *thread = thread_get_current_thread();
4377 			dprintf("vm_page_fault: thread \"%s\" (%ld) in team \"%s\" (%ld) "
4378 				"tried to %s address %#lx, ip %#lx (\"%s\" +%#lx)\n",
4379 				thread->name, thread->id, thread->team->name, thread->team->id,
4380 				isWrite ? "write" : "read", address, faultAddress,
4381 				area ? area->name : "???",
4382 				faultAddress - (area ? area->base : 0x0));
4383 
4384 			// We can print a stack trace of the userland thread here.
4385 #if 1
4386 			if (area) {
4387 				struct stack_frame {
4388 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
4389 						struct stack_frame*	previous;
4390 						void*				return_address;
4391 					#else
4392 						// ...
4393 					#warning writeme
4394 					#endif
4395 				} frame;
4396 #ifdef __INTEL__
4397 				struct iframe *iframe = i386_get_user_iframe();
4398 				if (iframe == NULL)
4399 					panic("iframe is NULL!");
4400 
4401 				status_t status = user_memcpy(&frame, (void *)iframe->ebp,
4402 					sizeof(struct stack_frame));
4403 #elif defined(__POWERPC__)
4404 				struct iframe *iframe = ppc_get_user_iframe();
4405 				if (iframe == NULL)
4406 					panic("iframe is NULL!");
4407 
4408 				status_t status = user_memcpy(&frame, (void *)iframe->r1,
4409 					sizeof(struct stack_frame));
4410 #else
4411 #	warning "vm_page_fault() stack trace won't work"
4412 				status = B_ERROR;
4413 #endif
4414 
4415 				dprintf("stack trace:\n");
4416 				int32 maxFrames = 50;
4417 				while (status == B_OK && --maxFrames >= 0
4418 						&& frame.return_address != NULL) {
4419 					dprintf("  %p", frame.return_address);
4420 					area = vm_area_lookup(addressSpace,
4421 						(addr_t)frame.return_address);
4422 					if (area) {
4423 						dprintf(" (%s + %#lx)", area->name,
4424 							(addr_t)frame.return_address - area->base);
4425 					}
4426 					dprintf("\n");
4427 
4428 					status = user_memcpy(&frame, frame.previous,
4429 						sizeof(struct stack_frame));
4430 				}
4431 			}
4432 #endif	// 0 (stack trace)
4433 
4434 			rw_lock_read_unlock(&addressSpace->lock);
4435 #endif
4436 
4437 			// TODO: the fault_callback is a temporary solution for vm86
4438 			if (thread->fault_callback == NULL
4439 				|| thread->fault_callback(address, faultAddress, isWrite)) {
4440 				// If the thread has a signal handler for SIGSEGV, we simply
4441 				// send it the signal. Otherwise we notify the user debugger
4442 				// first.
4443 				struct sigaction action;
4444 				if (sigaction(SIGSEGV, NULL, &action) == 0
4445 					&& action.sa_handler != SIG_DFL
4446 					&& action.sa_handler != SIG_IGN) {
4447 					send_signal(thread->id, SIGSEGV);
4448 				} else if (user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4449 						SIGSEGV)) {
4450 					send_signal(thread->id, SIGSEGV);
4451 				}
4452 			}
4453 		}
4454 	}
4455 
4456 	if (addressSpace != NULL)
4457 		vm_put_address_space(addressSpace);
4458 
4459 	return B_HANDLED_INTERRUPT;
4460 }
4461 
4462 
4463 static inline status_t
4464 fault_acquire_locked_source(vm_cache *cache, vm_cache **_source)
4465 {
4466 	vm_cache *source = cache->source;
4467 	if (source == NULL)
4468 		return B_ERROR;
4469 
4470 	source->Lock();
4471 	source->AcquireRefLocked();
4472 
4473 	*_source = source;
4474 	return B_OK;
4475 }
4476 
4477 
4478 /*!
4479 	Inserts a busy dummy page into a cache, and makes sure the cache won't go
4480 	away by grabbing a reference to it.
4481 */
4482 static inline void
4483 fault_insert_dummy_page(vm_cache *cache, vm_dummy_page &dummyPage,
4484 	off_t cacheOffset)
4485 {
4486 	dummyPage.state = PAGE_STATE_BUSY;
4487 	cache->AcquireRefLocked();
4488 	cache->InsertPage(&dummyPage, cacheOffset);
4489 	dummyPage.busy_condition.Publish(&dummyPage, "page");
4490 }
4491 
4492 
4493 /*!
4494 	Removes the busy dummy page from a cache, and releases its reference to
4495 	the cache.
4496 */
4497 static inline void
4498 fault_remove_dummy_page(vm_dummy_page &dummyPage, bool isLocked)
4499 {
4500 	vm_cache *cache = dummyPage.cache;
4501 	if (!isLocked)
4502 		cache->Lock();
4503 
4504 	if (dummyPage.state == PAGE_STATE_BUSY) {
4505 		cache->RemovePage(&dummyPage);
4506 		dummyPage.state = PAGE_STATE_INACTIVE;
4507 		dummyPage.busy_condition.Unpublish();
4508 	}
4509 
4510 	cache->ReleaseRefLocked();
4511 
4512 	if (!isLocked)
4513 		cache->Unlock();
4514 }
4515 
4516 
4517 /*!
4518 	Finds a page at the specified \a cacheOffset in either the \a topCacheRef
4519 	or in its source chain. Will also page in a missing page in case there is
4520 	a cache that has the page.
4521 	If it couldn't find a page, it will return the vm_cache that should get it,
4522 	otherwise, it will return the vm_cache that contains the cache.
4523 	It always grabs a reference to the vm_cache that it returns, and also locks it.
4524 */
4525 static inline status_t
4526 fault_find_page(vm_translation_map *map, vm_cache *topCache,
4527 	off_t cacheOffset, bool isWrite, vm_dummy_page &dummyPage,
4528 	vm_cache **_pageCache, vm_page** _page, bool* _restart)
4529 {
4530 	*_restart = false;
4531 	vm_cache *cache = topCache;
4532 	vm_cache *lastCache = NULL;
4533 	vm_page *page = NULL;
4534 
4535 	cache->Lock();
4536 	cache->AcquireRefLocked();
4537 		// we release this later in the loop
4538 
4539 	while (cache != NULL) {
4540 		if (lastCache != NULL)
4541 			lastCache->ReleaseRefAndUnlock();
4542 
4543 		// we hold the lock of the cache at this point
4544 
4545 		lastCache = cache;
4546 
4547 		for (;;) {
4548 			page = cache->LookupPage(cacheOffset);
4549 			if (page != NULL && page->state != PAGE_STATE_BUSY) {
4550 				// we found the page
4551 				break;
4552 			}
4553 			if (page == NULL || page == &dummyPage)
4554 				break;
4555 
4556 			// page must be busy -- wait for it to become unbusy
4557 			{
4558 				ConditionVariableEntry entry;
4559 				entry.Add(page);
4560 				cache->Unlock();
4561 				entry.Wait();
4562 				cache->Lock();
4563 			}
4564 		}
4565 
4566 		if (page != NULL && page != &dummyPage)
4567 			break;
4568 
4569 		// The current cache does not contain the page we're looking for
4570 
4571 		// see if the backing store has it
4572 		if (cache->HasPage(cacheOffset)) {
4573 			// insert a fresh page and mark it busy -- we're going to read it in
4574 			page = vm_page_allocate_page(PAGE_STATE_FREE, true);
4575 			cache->InsertPage(page, cacheOffset);
4576 
4577 			ConditionVariable busyCondition;
4578 			busyCondition.Publish(page, "page");
4579 
4580 			cache->Unlock();
4581 
4582 			// get a virtual address for the page
4583 			iovec vec;
4584 			vec.iov_base = (void*)(page->physical_page_number * B_PAGE_SIZE);
4585 			size_t bytesRead = vec.iov_len = B_PAGE_SIZE;
4586 
4587 			// read it in
4588 			status_t status = cache->Read(cacheOffset, &vec, 1,
4589 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4590 
4591 			cache->Lock();
4592 
4593 			if (status < B_OK) {
4594 				// on error remove and free the page
4595 				dprintf("reading page from cache %p returned: %s!\n",
4596 					cache, strerror(status));
4597 
4598 				busyCondition.Unpublish();
4599 				cache->RemovePage(page);
4600 				vm_page_set_state(page, PAGE_STATE_FREE);
4601 
4602 				cache->ReleaseRefAndUnlock();
4603 				return status;
4604 			}
4605 
4606 			// mark the page unbusy again
4607 			page->state = PAGE_STATE_ACTIVE;
4608 			busyCondition.Unpublish();
4609 			break;
4610 		}
4611 
4612 		// If we're at the top most cache, insert the dummy page here to keep
4613 		// other threads from faulting on the same address and chasing us up the
4614 		// cache chain
4615 		if (cache == topCache && dummyPage.state != PAGE_STATE_BUSY)
4616 			fault_insert_dummy_page(cache, dummyPage, cacheOffset);
4617 
4618 		vm_cache *nextCache;
4619 		status_t status = fault_acquire_locked_source(cache, &nextCache);
4620 		if (status < B_OK)
4621 			nextCache = NULL;
4622 
4623 		// at this point, we still hold a ref to this cache (through lastCacheRef)
4624 
4625 		cache = nextCache;
4626 	}
4627 
4628 	if (page == &dummyPage)
4629 		page = NULL;
4630 
4631 	if (page == NULL) {
4632 		// there was no adequate page, determine the cache for a clean one
4633 
4634 		ASSERT(cache == NULL);
4635 
4636 		// We rolled off the end of the cache chain, so we need to decide which
4637 		// cache will get the new page we're about to create.
4638 		cache = isWrite ? topCache : lastCache;
4639 			// Read-only pages come in the deepest cache - only the
4640 			// top most cache may have direct write access.
4641 		if (cache != lastCache) {
4642 			lastCache->ReleaseRefAndUnlock();
4643 			cache->Lock();
4644 			cache->AcquireRefLocked();
4645 		}
4646 
4647 		vm_page* newPage = cache->LookupPage(cacheOffset);
4648 		if (newPage && newPage != &dummyPage) {
4649 			// A new page turned up. It could be the one we're looking
4650 			// for, but it could as well be a dummy page from someone
4651 			// else or an otherwise busy page. We can't really handle
4652 			// that here. Hence we completely restart this functions.
4653 			cache->ReleaseRefAndUnlock();
4654 			*_restart = true;
4655 		}
4656 	} else {
4657 		// we still own reference and lock to the cache
4658 	}
4659 
4660 	*_pageCache = cache;
4661 	*_page = page;
4662 	return B_OK;
4663 }
4664 
4665 
4666 /*!
4667 	Returns the page that should be mapped into the area that got the fault.
4668 	It returns the owner of the page in \a sourceCache - it keeps a reference
4669 	to it, and has also locked it on exit.
4670 */
4671 static inline status_t
4672 fault_get_page(vm_translation_map *map, vm_cache *topCache, off_t cacheOffset,
4673 	bool isWrite, vm_dummy_page &dummyPage, vm_cache **_sourceCache,
4674 	vm_cache **_copiedSource, vm_page** _page)
4675 {
4676 	vm_cache *cache;
4677 	vm_page *page;
4678 	bool restart;
4679 	for (;;) {
4680 		status_t status = fault_find_page(map, topCache, cacheOffset, isWrite,
4681 			dummyPage, &cache, &page, &restart);
4682 		if (status != B_OK)
4683 			return status;
4684 
4685 		if (!restart)
4686 			break;
4687 
4688 		// Remove the dummy page, if it has been inserted.
4689 		topCache->Lock();
4690 
4691 		if (dummyPage.state == PAGE_STATE_BUSY) {
4692 			ASSERT_PRINT(dummyPage.cache == topCache, "dummy page: %p\n",
4693 				&dummyPage);
4694 			fault_remove_dummy_page(dummyPage, true);
4695 		}
4696 
4697 		topCache->Unlock();
4698 	}
4699 
4700 	if (page == NULL) {
4701 		// we still haven't found a page, so we allocate a clean one
4702 
4703 		page = vm_page_allocate_page(PAGE_STATE_CLEAR, true);
4704 		FTRACE(("vm_soft_fault: just allocated page 0x%lx\n", page->physical_page_number));
4705 
4706 		// Insert the new page into our cache, and replace it with the dummy page if necessary
4707 
4708 		// If we inserted a dummy page into this cache (i.e. if it is the top
4709 		// cache), we have to remove it now
4710 		if (dummyPage.state == PAGE_STATE_BUSY && dummyPage.cache == cache) {
4711 #if DEBUG_PAGE_CACHE_TRANSITIONS
4712 			page->debug_flags = dummyPage.debug_flags | 0x8;
4713 			if (dummyPage.collided_page != NULL) {
4714 				dummyPage.collided_page->collided_page = page;
4715 				page->collided_page = dummyPage.collided_page;
4716 			}
4717 #endif	// DEBUG_PAGE_CACHE_TRANSITIONS
4718 
4719 			fault_remove_dummy_page(dummyPage, true);
4720 		}
4721 
4722 		cache->InsertPage(page, cacheOffset);
4723 
4724 		if (dummyPage.state == PAGE_STATE_BUSY) {
4725 #if DEBUG_PAGE_CACHE_TRANSITIONS
4726 			page->debug_flags = dummyPage.debug_flags | 0x10;
4727 			if (dummyPage.collided_page != NULL) {
4728 				dummyPage.collided_page->collided_page = page;
4729 				page->collided_page = dummyPage.collided_page;
4730 			}
4731 #endif	// DEBUG_PAGE_CACHE_TRANSITIONS
4732 
4733 			// This is not the top cache into which we inserted the dummy page,
4734 			// let's remove it from there. We need to temporarily unlock our
4735 			// cache to comply with the cache locking policy.
4736 			cache->Unlock();
4737 			fault_remove_dummy_page(dummyPage, false);
4738 			cache->Lock();
4739 		}
4740 	}
4741 
4742 	// We now have the page and a cache it belongs to - we now need to make
4743 	// sure that the area's cache can access it, too, and sees the correct data
4744 
4745 	if (page->cache != topCache && isWrite) {
4746 		// Now we have a page that has the data we want, but in the wrong cache
4747 		// object so we need to copy it and stick it into the top cache.
4748 		// Note that this and the "if" before are mutual exclusive. If
4749 		// fault_find_page() didn't find the page, it would return the top cache
4750 		// for write faults.
4751 		vm_page *sourcePage = page;
4752 
4753 		// ToDo: if memory is low, it might be a good idea to steal the page
4754 		//	from our source cache - if possible, that is
4755 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4756 		page = vm_page_allocate_page(PAGE_STATE_FREE, true);
4757 #if 0
4758 if (cacheOffset == 0x12000)
4759 	dprintf("%ld: copy page %p to page %p from cache %p to cache %p\n", find_thread(NULL),
4760 		sourcePage, page, sourcePage->cache, topCacheRef->cache);
4761 #endif
4762 
4763 		// copy the page
4764 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4765 			sourcePage->physical_page_number * B_PAGE_SIZE);
4766 
4767 		if (sourcePage->state != PAGE_STATE_MODIFIED)
4768 			vm_page_set_state(sourcePage, PAGE_STATE_ACTIVE);
4769 
4770 		cache->Unlock();
4771 		topCache->Lock();
4772 
4773 		// Since the top cache has been unlocked for a while, someone else
4774 		// (RemoveConsumer()) might have replaced our dummy page.
4775 		vm_page* newPage = NULL;
4776 		for (;;) {
4777 			newPage = topCache->LookupPage(cacheOffset);
4778 			if (newPage == NULL || newPage == &dummyPage) {
4779 				newPage = NULL;
4780 				break;
4781 			}
4782 
4783 			if (newPage->state != PAGE_STATE_BUSY)
4784 				break;
4785 
4786 			// The page is busy, wait till it becomes unbusy.
4787 			ConditionVariableEntry entry;
4788 			entry.Add(newPage);
4789 			topCache->Unlock();
4790 			entry.Wait();
4791 			topCache->Lock();
4792 		}
4793 
4794 		if (newPage) {
4795 			// Indeed someone else threw in a page. We free ours and are happy.
4796 			vm_page_set_state(page, PAGE_STATE_FREE);
4797 			page = newPage;
4798 		} else {
4799 			// Insert the new page into our cache and remove the dummy page, if
4800 			// necessary.
4801 
4802 			// if we inserted a dummy page into this cache, we have to remove it now
4803 			if (dummyPage.state == PAGE_STATE_BUSY) {
4804 				ASSERT_PRINT(dummyPage.cache == topCache, "dummy page: %p\n",
4805 					&dummyPage);
4806 				fault_remove_dummy_page(dummyPage, true);
4807 			}
4808 
4809 			topCache->InsertPage(page, cacheOffset);
4810 		}
4811 
4812 		*_copiedSource = cache;
4813 
4814 		cache = topCache;
4815 		cache->AcquireRefLocked();
4816 	}
4817 
4818 	*_sourceCache = cache;
4819 	*_page = page;
4820 	return B_OK;
4821 }
4822 
4823 
4824 static status_t
4825 vm_soft_fault(vm_address_space *addressSpace, addr_t originalAddress,
4826 	bool isWrite, bool isUser)
4827 {
4828 	FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n",
4829 		thread_get_current_thread_id(), originalAddress, isWrite, isUser));
4830 
4831 	AddressSpaceReadLocker locker(addressSpace, true);
4832 
4833 	atomic_add(&addressSpace->fault_count, 1);
4834 
4835 	// Get the area the fault was in
4836 
4837 	addr_t address = ROUNDOWN(originalAddress, B_PAGE_SIZE);
4838 
4839 	vm_area *area = vm_area_lookup(addressSpace, address);
4840 	if (area == NULL) {
4841 		dprintf("vm_soft_fault: va 0x%lx not covered by area in address space\n",
4842 			originalAddress);
4843 		TPF(PageFaultError(-1, VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4844 		return B_BAD_ADDRESS;
4845 	}
4846 
4847 	// check permissions
4848 	uint32 protection = get_area_page_protection(area, address);
4849 	if (isUser && (protection & B_USER_PROTECTION) == 0) {
4850 		dprintf("user access on kernel area 0x%lx at %p\n", area->id, (void *)originalAddress);
4851 		TPF(PageFaultError(area->id,
4852 			VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4853 		return B_PERMISSION_DENIED;
4854 	}
4855 	if (isWrite && (protection
4856 			& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4857 		dprintf("write access attempted on read-only area 0x%lx at %p\n",
4858 			area->id, (void *)originalAddress);
4859 		TPF(PageFaultError(area->id,
4860 			VMPageFaultTracing::PAGE_FAULT_ERROR_READ_ONLY));
4861 		return B_PERMISSION_DENIED;
4862 	}
4863 
4864 	// We have the area, it was a valid access, so let's try to resolve the page fault now.
4865 	// At first, the top most cache from the area is investigated
4866 
4867 	vm_cache *topCache = vm_area_get_locked_cache(area);
4868 	off_t cacheOffset = address - area->base + area->cache_offset;
4869 
4870 	atomic_add(&area->no_cache_change, 1);
4871 		// make sure the area's cache isn't replaced during the page fault
4872 
4873 	// See if this cache has a fault handler - this will do all the work for us
4874 	{
4875 		// Note, since the page fault is resolved with interrupts enabled, the
4876 		// fault handler could be called more than once for the same reason -
4877 		// the store must take this into account
4878 		status_t status = topCache->Fault(addressSpace, cacheOffset);
4879 		if (status != B_BAD_HANDLER) {
4880 			vm_area_put_locked_cache(topCache);
4881 			return status;
4882 		}
4883 	}
4884 
4885 	topCache->Unlock();
4886 
4887 	// The top most cache has no fault handler, so let's see if the cache or its sources
4888 	// already have the page we're searching for (we're going from top to bottom)
4889 
4890 	vm_translation_map *map = &addressSpace->translation_map;
4891 	size_t reservePages = 2 + map->ops->map_max_pages_need(map,
4892 		originalAddress, originalAddress);
4893 	vm_page_reserve_pages(reservePages);
4894 		// we may need up to 2 pages - reserving them upfront makes sure
4895 		// we don't have any cache locked, so that the page daemon/thief
4896 		// can do their job without problems
4897 
4898 	vm_dummy_page dummyPage;
4899 	dummyPage.cache = NULL;
4900 	dummyPage.state = PAGE_STATE_INACTIVE;
4901 	dummyPage.type = PAGE_TYPE_DUMMY;
4902 	dummyPage.wired_count = 0;
4903 #if DEBUG_PAGE_CACHE_TRANSITIONS
4904 	dummyPage.debug_flags = 0;
4905 	dummyPage.collided_page = NULL;
4906 #endif	// DEBUG_PAGE_CACHE_TRANSITIONS
4907 
4908 	vm_cache *copiedPageSource = NULL;
4909 	vm_cache *pageSource;
4910 	vm_page *page;
4911 	// TODO: We keep the address space read lock during the whole operation
4912 	// which might be rather expensive depending on where the data has to
4913 	// be retrieved from.
4914 	status_t status = fault_get_page(map, topCache, cacheOffset, isWrite,
4915 		dummyPage, &pageSource, &copiedPageSource, &page);
4916 
4917 	if (status == B_OK) {
4918 		// All went fine, all there is left to do is to map the page into the address space
4919 		TPF(PageFaultDone(area->id, topCache, page->cache, page));
4920 
4921 		// In case this is a copy-on-write page, we need to unmap it from the area now
4922 		if (isWrite && page->cache == topCache)
4923 			vm_unmap_pages(area, address, B_PAGE_SIZE, true);
4924 
4925 		// TODO: there is currently no mechanism to prevent a page being mapped
4926 		//	more than once in case of a second page fault!
4927 
4928 		// If the page doesn't reside in the area's cache, we need to make sure it's
4929 		// mapped in read-only, so that we cannot overwrite someone else's data (copy-on-write)
4930 		uint32 newProtection = protection;
4931 		if (page->cache != topCache && !isWrite)
4932 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4933 
4934 		vm_map_page(area, page, address, newProtection);
4935 
4936 		pageSource->ReleaseRefAndUnlock();
4937 	} else
4938 		TPF(PageFaultError(area->id, status));
4939 
4940 	atomic_add(&area->no_cache_change, -1);
4941 
4942 	if (copiedPageSource)
4943 		copiedPageSource->ReleaseRef();
4944 
4945 	if (dummyPage.state == PAGE_STATE_BUSY) {
4946 		// We still have the dummy page in the cache - that happens if we didn't need
4947 		// to allocate a new page before, but could use one in another cache
4948 		fault_remove_dummy_page(dummyPage, false);
4949 	}
4950 
4951 	topCache->ReleaseRef();
4952 	vm_page_unreserve_pages(reservePages);
4953 
4954 	return status;
4955 }
4956 
4957 
4958 /*! You must have the address space's sem held */
4959 vm_area *
4960 vm_area_lookup(vm_address_space *addressSpace, addr_t address)
4961 {
4962 	vm_area *area;
4963 
4964 	// check the areas list first
4965 	area = addressSpace->area_hint;
4966 	if (area && area->base <= address && area->base + (area->size - 1) >= address)
4967 		goto found;
4968 
4969 	for (area = addressSpace->areas; area != NULL; area = area->address_space_next) {
4970 		if (area->id == RESERVED_AREA_ID)
4971 			continue;
4972 
4973 		if (area->base <= address && area->base + (area->size - 1) >= address)
4974 			break;
4975 	}
4976 
4977 found:
4978 	if (area)
4979 		addressSpace->area_hint = area;
4980 
4981 	return area;
4982 }
4983 
4984 
4985 status_t
4986 vm_get_physical_page(addr_t paddr, addr_t* _vaddr, void** _handle)
4987 {
4988 	return vm_kernel_address_space()->translation_map.ops->get_physical_page(
4989 		paddr, _vaddr, _handle);
4990 }
4991 
4992 status_t
4993 vm_put_physical_page(addr_t vaddr, void* handle)
4994 {
4995 	return vm_kernel_address_space()->translation_map.ops->put_physical_page(
4996 		vaddr, handle);
4997 }
4998 
4999 
5000 status_t
5001 vm_get_physical_page_current_cpu(addr_t paddr, addr_t* _vaddr, void** _handle)
5002 {
5003 	return vm_kernel_address_space()->translation_map.ops
5004 		->get_physical_page_current_cpu(paddr, _vaddr, _handle);
5005 }
5006 
5007 status_t
5008 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
5009 {
5010 	return vm_kernel_address_space()->translation_map.ops
5011 		->put_physical_page_current_cpu(vaddr, handle);
5012 }
5013 
5014 
5015 status_t
5016 vm_get_physical_page_debug(addr_t paddr, addr_t* _vaddr, void** _handle)
5017 {
5018 	return vm_kernel_address_space()->translation_map.ops
5019 		->get_physical_page_debug(paddr, _vaddr, _handle);
5020 }
5021 
5022 status_t
5023 vm_put_physical_page_debug(addr_t vaddr, void* handle)
5024 {
5025 	return vm_kernel_address_space()->translation_map.ops
5026 		->put_physical_page_debug(vaddr, handle);
5027 }
5028 
5029 
5030 void
5031 vm_get_info(system_memory_info* info)
5032 {
5033 	swap_get_info(info);
5034 
5035 	info->max_memory = vm_page_num_pages() * B_PAGE_SIZE;
5036 	info->page_faults = sPageFaults;
5037 
5038 	MutexLocker locker(sAvailableMemoryLock);
5039 	info->free_memory = sAvailableMemory;
5040 	info->needed_memory = sNeededMemory;
5041 }
5042 
5043 
5044 uint32
5045 vm_num_page_faults(void)
5046 {
5047 	return sPageFaults;
5048 }
5049 
5050 
5051 off_t
5052 vm_available_memory(void)
5053 {
5054 	MutexLocker locker(sAvailableMemoryLock);
5055 	return sAvailableMemory;
5056 }
5057 
5058 
5059 off_t
5060 vm_available_not_needed_memory(void)
5061 {
5062 	MutexLocker locker(sAvailableMemoryLock);
5063 	return sAvailableMemory - sNeededMemory;
5064 }
5065 
5066 
5067 void
5068 vm_unreserve_memory(size_t amount)
5069 {
5070 	mutex_lock(&sAvailableMemoryLock);
5071 
5072 	sAvailableMemory += amount;
5073 
5074 	mutex_unlock(&sAvailableMemoryLock);
5075 }
5076 
5077 
5078 status_t
5079 vm_try_reserve_memory(size_t amount, bigtime_t timeout)
5080 {
5081 	MutexLocker locker(sAvailableMemoryLock);
5082 
5083 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5084 
5085 	if (sAvailableMemory >= amount) {
5086 		sAvailableMemory -= amount;
5087 		return B_OK;
5088 	}
5089 
5090 	if (timeout <= 0)
5091 		return B_NO_MEMORY;
5092 
5093 	// turn timeout into an absolute timeout
5094 	timeout += system_time();
5095 
5096 	// loop until we've got the memory or the timeout occurs
5097 	do {
5098 		sNeededMemory += amount;
5099 
5100 		// call the low resource manager
5101 		locker.Unlock();
5102 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5103 			B_ABSOLUTE_TIMEOUT, timeout);
5104 		locker.Lock();
5105 
5106 		sNeededMemory -= amount;
5107 
5108 		if (sAvailableMemory >= amount) {
5109 			sAvailableMemory -= amount;
5110 			return B_OK;
5111 		}
5112 	} while (timeout > system_time());
5113 
5114 	return B_NO_MEMORY;
5115 }
5116 
5117 
5118 status_t
5119 vm_set_area_memory_type(area_id id, addr_t physicalBase, uint32 type)
5120 {
5121 	AddressSpaceReadLocker locker;
5122 	vm_area *area;
5123 	status_t status = locker.SetFromArea(id, area);
5124 	if (status != B_OK)
5125 		return status;
5126 
5127 	return arch_vm_set_memory_type(area, physicalBase, type);
5128 }
5129 
5130 
5131 /**	This function enforces some protection properties:
5132  *	 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well
5133  *	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5134  *	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
5135  *	   and B_KERNEL_WRITE_AREA.
5136  */
5137 
5138 static void
5139 fix_protection(uint32 *protection)
5140 {
5141 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5142 		if ((*protection & B_USER_PROTECTION) == 0
5143 			|| (*protection & B_WRITE_AREA) != 0)
5144 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5145 		else
5146 			*protection |= B_KERNEL_READ_AREA;
5147 	}
5148 }
5149 
5150 
5151 static void
5152 fill_area_info(struct vm_area *area, area_info *info, size_t size)
5153 {
5154 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5155 	info->area = area->id;
5156 	info->address = (void *)area->base;
5157 	info->size = area->size;
5158 	info->protection = area->protection;
5159 	info->lock = B_FULL_LOCK;
5160 	info->team = area->address_space->id;
5161 	info->copy_count = 0;
5162 	info->in_count = 0;
5163 	info->out_count = 0;
5164 		// ToDo: retrieve real values here!
5165 
5166 	vm_cache *cache = vm_area_get_locked_cache(area);
5167 
5168 	// Note, this is a simplification; the cache could be larger than this area
5169 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5170 
5171 	vm_area_put_locked_cache(cache);
5172 }
5173 
5174 
5175 /*!
5176 	Tests whether or not the area that contains the specified address
5177 	needs any kind of locking, and actually exists.
5178 	Used by both lock_memory() and unlock_memory().
5179 */
5180 static status_t
5181 test_lock_memory(vm_address_space *addressSpace, addr_t address,
5182 	bool &needsLocking)
5183 {
5184 	rw_lock_read_lock(&addressSpace->lock);
5185 
5186 	vm_area *area = vm_area_lookup(addressSpace, address);
5187 	if (area != NULL) {
5188 		// This determines if we need to lock the memory at all
5189 		needsLocking = area->cache_type != CACHE_TYPE_NULL
5190 			&& area->cache_type != CACHE_TYPE_DEVICE
5191 			&& area->wiring != B_FULL_LOCK
5192 			&& area->wiring != B_CONTIGUOUS;
5193 	}
5194 
5195 	rw_lock_read_unlock(&addressSpace->lock);
5196 
5197 	if (area == NULL)
5198 		return B_BAD_ADDRESS;
5199 
5200 	return B_OK;
5201 }
5202 
5203 
5204 static status_t
5205 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5206 {
5207 	// is newSize a multiple of B_PAGE_SIZE?
5208 	if (newSize & (B_PAGE_SIZE - 1))
5209 		return B_BAD_VALUE;
5210 
5211 	// lock all affected address spaces and the cache
5212 	vm_area* area;
5213 	vm_cache* cache;
5214 
5215 	MultiAddressSpaceLocker locker;
5216 	status_t status = locker.AddAreaCacheAndLock(areaID, true, true, area,
5217 		&cache);
5218 	if (status != B_OK)
5219 		return status;
5220 	AreaCacheLocker cacheLocker(cache);	// already locked
5221 
5222 	// enforce restrictions
5223 	if (!kernel) {
5224 		if ((area->protection & B_KERNEL_AREA) != 0)
5225 			return B_NOT_ALLOWED;
5226 		// TODO: Enforce all restrictions (team, etc.)!
5227 	}
5228 
5229 	size_t oldSize = area->size;
5230 	if (newSize == oldSize)
5231 		return B_OK;
5232 
5233 	// Resize all areas of this area's cache
5234 
5235 	if (cache->type != CACHE_TYPE_RAM)
5236 		return B_NOT_ALLOWED;
5237 
5238 	if (oldSize < newSize) {
5239 		// We need to check if all areas of this cache can be resized
5240 
5241 		for (vm_area* current = cache->areas; current != NULL;
5242 				current = current->cache_next) {
5243 			vm_area *next = current->address_space_next;
5244 			if (next != NULL && next->base <= (current->base + newSize)) {
5245 				// If the area was created inside a reserved area, it can
5246 				// also be resized in that area
5247 				// ToDo: if there is free space after the reserved area, it could be used as well...
5248 				if (next->id == RESERVED_AREA_ID
5249 					&& next->cache_offset <= current->base
5250 					&& next->base - 1 + next->size >= current->base - 1 + newSize)
5251 					continue;
5252 
5253 				return B_ERROR;
5254 			}
5255 		}
5256 	}
5257 
5258 	// Okay, looks good so far, so let's do it
5259 
5260 	if (oldSize < newSize) {
5261 		// Growing the cache can fail, so we do it first.
5262 		status = cache->Resize(cache->virtual_base + newSize);
5263 		if (status != B_OK)
5264 			return status;
5265 	}
5266 
5267 	for (vm_area* current = cache->areas; current != NULL;
5268 			current = current->cache_next) {
5269 		vm_area *next = current->address_space_next;
5270 		if (next != NULL && next->base <= (current->base + newSize)) {
5271 			if (next->id == RESERVED_AREA_ID
5272 				&& next->cache_offset <= current->base
5273 				&& next->base - 1 + next->size >= current->base - 1 + newSize) {
5274 				// resize reserved area
5275 				addr_t offset = current->base + newSize - next->base;
5276 				if (next->size <= offset) {
5277 					current->address_space_next = next->address_space_next;
5278 					free(next);
5279 				} else {
5280 					next->size -= offset;
5281 					next->base += offset;
5282 				}
5283 			} else {
5284 				panic("resize situation for area %p has changed although we "
5285 					"should have the address space lock", current);
5286 				status = B_ERROR;
5287 				break;
5288 			}
5289 		}
5290 
5291 		current->size = newSize;
5292 
5293 		// we also need to unmap all pages beyond the new size, if the area has shrinked
5294 		if (newSize < oldSize) {
5295 			vm_unmap_pages(current, current->base + newSize, oldSize - newSize,
5296 				false);
5297 		}
5298 	}
5299 
5300 	// shrinking the cache can't fail, so we do it now
5301 	if (status == B_OK && newSize < oldSize)
5302 		status = cache->Resize(cache->virtual_base + newSize);
5303 
5304 	if (status < B_OK) {
5305 		// This shouldn't really be possible, but hey, who knows
5306 		for (vm_area* current = cache->areas; current != NULL;
5307 				current = current->cache_next) {
5308 			current->size = oldSize;
5309 		}
5310 
5311 		cache->Resize(cache->virtual_base + oldSize);
5312 	}
5313 
5314 	// TODO: we must honour the lock restrictions of this area
5315 	return status;
5316 }
5317 
5318 
5319 status_t
5320 vm_memset_physical(addr_t address, int value, size_t length)
5321 {
5322 	return vm_kernel_address_space()->translation_map.ops->memset_physical(
5323 		address, value, length);
5324 }
5325 
5326 
5327 status_t
5328 vm_memcpy_from_physical(void* to, addr_t from, size_t length, bool user)
5329 {
5330 	return vm_kernel_address_space()->translation_map.ops->memcpy_from_physical(
5331 		to, from, length, user);
5332 }
5333 
5334 
5335 status_t
5336 vm_memcpy_to_physical(addr_t to, const void* _from, size_t length, bool user)
5337 {
5338 	return vm_kernel_address_space()->translation_map.ops->memcpy_to_physical(
5339 		to, _from, length, user);
5340 }
5341 
5342 
5343 void
5344 vm_memcpy_physical_page(addr_t to, addr_t from)
5345 {
5346 	return vm_kernel_address_space()->translation_map.ops->memcpy_physical_page(
5347 		to, from);
5348 }
5349 
5350 
5351 //	#pragma mark - kernel public API
5352 
5353 
5354 status_t
5355 user_memcpy(void *to, const void *from, size_t size)
5356 {
5357 	if (arch_cpu_user_memcpy(to, from, size, &thread_get_current_thread()->fault_handler) < B_OK)
5358 		return B_BAD_ADDRESS;
5359 	return B_OK;
5360 }
5361 
5362 
5363 /**	\brief Copies at most (\a size - 1) characters from the string in \a from to
5364  *	the string in \a to, NULL-terminating the result.
5365  *
5366  *	\param to Pointer to the destination C-string.
5367  *	\param from Pointer to the source C-string.
5368  *	\param size Size in bytes of the string buffer pointed to by \a to.
5369  *
5370  *	\return strlen(\a from).
5371  */
5372 
5373 ssize_t
5374 user_strlcpy(char *to, const char *from, size_t size)
5375 {
5376 	return arch_cpu_user_strlcpy(to, from, size, &thread_get_current_thread()->fault_handler);
5377 }
5378 
5379 
5380 status_t
5381 user_memset(void *s, char c, size_t count)
5382 {
5383 	if (arch_cpu_user_memset(s, c, count, &thread_get_current_thread()->fault_handler) < B_OK)
5384 		return B_BAD_ADDRESS;
5385 	return B_OK;
5386 }
5387 
5388 
5389 status_t
5390 lock_memory_etc(team_id team, void *address, size_t numBytes, uint32 flags)
5391 {
5392 	vm_address_space *addressSpace = NULL;
5393 	struct vm_translation_map *map;
5394 	addr_t unalignedBase = (addr_t)address;
5395 	addr_t end = unalignedBase + numBytes;
5396 	addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE);
5397 	bool isUser = IS_USER_ADDRESS(address);
5398 	bool needsLocking = true;
5399 
5400 	if (isUser) {
5401 		if (team == B_CURRENT_TEAM)
5402 			addressSpace = vm_get_current_user_address_space();
5403 		else
5404 			addressSpace = vm_get_address_space(team);
5405 	} else
5406 		addressSpace = vm_get_kernel_address_space();
5407 	if (addressSpace == NULL)
5408 		return B_ERROR;
5409 
5410 	// test if we're on an area that allows faults at all
5411 
5412 	map = &addressSpace->translation_map;
5413 
5414 	status_t status = test_lock_memory(addressSpace, base, needsLocking);
5415 	if (status < B_OK)
5416 		goto out;
5417 	if (!needsLocking)
5418 		goto out;
5419 
5420 	for (; base < end; base += B_PAGE_SIZE) {
5421 		addr_t physicalAddress;
5422 		uint32 protection;
5423 		status_t status;
5424 
5425 		map->ops->lock(map);
5426 		status = map->ops->query(map, base, &physicalAddress, &protection);
5427 		map->ops->unlock(map);
5428 
5429 		if (status < B_OK)
5430 			goto out;
5431 
5432 		if ((protection & PAGE_PRESENT) != 0) {
5433 			// if B_READ_DEVICE is set, the caller intents to write to the locked
5434 			// memory, so if it hasn't been mapped writable, we'll try the soft
5435 			// fault anyway
5436 			if ((flags & B_READ_DEVICE) == 0
5437 				|| (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
5438 				// update wiring
5439 				vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
5440 				if (page == NULL)
5441 					panic("couldn't lookup physical page just allocated\n");
5442 
5443 				increment_page_wired_count(page);
5444 				continue;
5445 			}
5446 		}
5447 
5448 		status = vm_soft_fault(addressSpace, base, (flags & B_READ_DEVICE) != 0,
5449 			isUser);
5450 		if (status != B_OK)	{
5451 			dprintf("lock_memory(address = %p, numBytes = %lu, flags = %lu) failed: %s\n",
5452 				(void *)unalignedBase, numBytes, flags, strerror(status));
5453 			goto out;
5454 		}
5455 
5456 		// TODO: Here's a race condition. We should probably add a parameter
5457 		// to vm_soft_fault() that would cause the page's wired count to be
5458 		// incremented immediately.
5459 		// TODO: After memory has been locked in an area, we need to prevent the
5460 		// area from being deleted, resized, cut, etc. That could be done using
5461 		// a "locked pages" count in vm_area, and maybe a condition variable, if
5462 		// we want to allow waiting for the area to become eligible for these
5463 		// operations again.
5464 
5465 		map->ops->lock(map);
5466 		status = map->ops->query(map, base, &physicalAddress, &protection);
5467 		map->ops->unlock(map);
5468 
5469 		if (status < B_OK)
5470 			goto out;
5471 
5472 		// update wiring
5473 		vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
5474 		if (page == NULL)
5475 			panic("couldn't lookup physical page");
5476 
5477 		increment_page_wired_count(page);
5478 			// TODO: needs to be atomic on all platforms!
5479 	}
5480 
5481 out:
5482 	vm_put_address_space(addressSpace);
5483 	return status;
5484 }
5485 
5486 
5487 status_t
5488 lock_memory(void *address, size_t numBytes, uint32 flags)
5489 {
5490 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5491 }
5492 
5493 
5494 status_t
5495 unlock_memory_etc(team_id team, void *address, size_t numBytes, uint32 flags)
5496 {
5497 	vm_address_space *addressSpace = NULL;
5498 	struct vm_translation_map *map;
5499 	addr_t unalignedBase = (addr_t)address;
5500 	addr_t end = unalignedBase + numBytes;
5501 	addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE);
5502 	bool needsLocking = true;
5503 
5504 	if (IS_USER_ADDRESS(address)) {
5505 		if (team == B_CURRENT_TEAM)
5506 			addressSpace = vm_get_current_user_address_space();
5507 		else
5508 			addressSpace = vm_get_address_space(team);
5509 	} else
5510 		addressSpace = vm_get_kernel_address_space();
5511 	if (addressSpace == NULL)
5512 		return B_ERROR;
5513 
5514 	map = &addressSpace->translation_map;
5515 
5516 	status_t status = test_lock_memory(addressSpace, base, needsLocking);
5517 	if (status < B_OK)
5518 		goto out;
5519 	if (!needsLocking)
5520 		goto out;
5521 
5522 	for (; base < end; base += B_PAGE_SIZE) {
5523 		map->ops->lock(map);
5524 
5525 		addr_t physicalAddress;
5526 		uint32 protection;
5527 		status = map->ops->query(map, base, &physicalAddress,
5528 			&protection);
5529 
5530 		map->ops->unlock(map);
5531 
5532 		if (status < B_OK)
5533 			goto out;
5534 		if ((protection & PAGE_PRESENT) == 0)
5535 			panic("calling unlock_memory() on unmapped memory!");
5536 
5537 		// update wiring
5538 		vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
5539 		if (page == NULL)
5540 			panic("couldn't lookup physical page");
5541 
5542 		decrement_page_wired_count(page);
5543 	}
5544 
5545 out:
5546 	vm_put_address_space(addressSpace);
5547 	return status;
5548 }
5549 
5550 
5551 status_t
5552 unlock_memory(void *address, size_t numBytes, uint32 flags)
5553 {
5554 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5555 }
5556 
5557 
5558 /*!	Similar to get_memory_map(), but also allows to specify the address space
5559 	for the memory in question and has a saner semantics.
5560 	Returns \c B_OK when the complete range could be translated or
5561 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5562 	case the actual number of entries is written to \c *_numEntries. Any other
5563 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5564 	in this case.
5565 */
5566 status_t
5567 get_memory_map_etc(team_id team, const void *address, size_t numBytes,
5568 	physical_entry *table, uint32* _numEntries)
5569 {
5570 	uint32 numEntries = *_numEntries;
5571 	*_numEntries = 0;
5572 
5573 	vm_address_space *addressSpace;
5574 	addr_t virtualAddress = (addr_t)address;
5575 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5576 	addr_t physicalAddress;
5577 	status_t status = B_OK;
5578 	int32 index = -1;
5579 	addr_t offset = 0;
5580 	bool interrupts = are_interrupts_enabled();
5581 
5582 	TRACE(("get_memory_map_etc(%ld, %p, %lu bytes, %ld entries)\n", team,
5583 		address, numBytes, numEntries));
5584 
5585 	if (numEntries == 0 || numBytes == 0)
5586 		return B_BAD_VALUE;
5587 
5588 	// in which address space is the address to be found?
5589 	if (IS_USER_ADDRESS(virtualAddress)) {
5590 		if (team == B_CURRENT_TEAM)
5591 			addressSpace = vm_get_current_user_address_space();
5592 		else
5593 			addressSpace = vm_get_address_space(team);
5594 	} else
5595 		addressSpace = vm_get_kernel_address_space();
5596 
5597 	if (addressSpace == NULL)
5598 		return B_ERROR;
5599 
5600 	vm_translation_map *map = &addressSpace->translation_map;
5601 
5602 	if (interrupts)
5603 		map->ops->lock(map);
5604 
5605 	while (offset < numBytes) {
5606 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5607 		uint32 flags;
5608 
5609 		if (interrupts) {
5610 			status = map->ops->query(map, (addr_t)address + offset,
5611 				&physicalAddress, &flags);
5612 		} else {
5613 			status = map->ops->query_interrupt(map, (addr_t)address + offset,
5614 				&physicalAddress, &flags);
5615 		}
5616 		if (status < B_OK)
5617 			break;
5618 		if ((flags & PAGE_PRESENT) == 0) {
5619 			panic("get_memory_map() called on unmapped memory!");
5620 			return B_BAD_ADDRESS;
5621 		}
5622 
5623 		if (index < 0 && pageOffset > 0) {
5624 			physicalAddress += pageOffset;
5625 			if (bytes > B_PAGE_SIZE - pageOffset)
5626 				bytes = B_PAGE_SIZE - pageOffset;
5627 		}
5628 
5629 		// need to switch to the next physical_entry?
5630 		if (index < 0 || (addr_t)table[index].address
5631 				!= physicalAddress - table[index].size) {
5632 			if ((uint32)++index + 1 > numEntries) {
5633 				// table to small
5634 				status = B_BUFFER_OVERFLOW;
5635 				break;
5636 			}
5637 			table[index].address = (void *)physicalAddress;
5638 			table[index].size = bytes;
5639 		} else {
5640 			// page does fit in current entry
5641 			table[index].size += bytes;
5642 		}
5643 
5644 		offset += bytes;
5645 	}
5646 
5647 	if (interrupts)
5648 		map->ops->unlock(map);
5649 
5650 	if (status != B_OK)
5651 		return status;
5652 
5653 	if ((uint32)index + 1 > numEntries) {
5654 		*_numEntries = index;
5655 		return B_BUFFER_OVERFLOW;
5656 	}
5657 
5658 	*_numEntries = index + 1;
5659 	return B_OK;
5660 }
5661 
5662 
5663 /*!	According to the BeBook, this function should always succeed.
5664 	This is no longer the case.
5665 */
5666 long
5667 get_memory_map(const void *address, ulong numBytes, physical_entry *table,
5668 	long numEntries)
5669 {
5670 	uint32 entriesRead = numEntries;
5671 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5672 		table, &entriesRead);
5673 	if (error != B_OK)
5674 		return error;
5675 
5676 	// close the entry list
5677 
5678 	// if it's only one entry, we will silently accept the missing ending
5679 	if (numEntries == 1)
5680 		return B_OK;
5681 
5682 	if (entriesRead + 1 > (uint32)numEntries)
5683 		return B_BUFFER_OVERFLOW;
5684 
5685 	table[entriesRead].address = NULL;
5686 	table[entriesRead].size = 0;
5687 
5688 	return B_OK;
5689 }
5690 
5691 
5692 area_id
5693 area_for(void *address)
5694 {
5695 	team_id space;
5696 
5697 	if (IS_USER_ADDRESS(address)) {
5698 		// we try the user team address space, if any
5699 		space = vm_current_user_address_space_id();
5700 		if (space < B_OK)
5701 			return space;
5702 	} else
5703 		space = vm_kernel_address_space_id();
5704 
5705 	return vm_area_for(space, (addr_t)address);
5706 }
5707 
5708 
5709 area_id
5710 find_area(const char *name)
5711 {
5712 	rw_lock_read_lock(&sAreaHashLock);
5713 	struct hash_iterator iterator;
5714 	hash_open(sAreaHash, &iterator);
5715 
5716 	vm_area *area;
5717 	area_id id = B_NAME_NOT_FOUND;
5718 	while ((area = (vm_area *)hash_next(sAreaHash, &iterator)) != NULL) {
5719 		if (area->id == RESERVED_AREA_ID)
5720 			continue;
5721 
5722 		if (!strcmp(area->name, name)) {
5723 			id = area->id;
5724 			break;
5725 		}
5726 	}
5727 
5728 	hash_close(sAreaHash, &iterator, false);
5729 	rw_lock_read_unlock(&sAreaHashLock);
5730 
5731 	return id;
5732 }
5733 
5734 
5735 status_t
5736 _get_area_info(area_id id, area_info *info, size_t size)
5737 {
5738 	if (size != sizeof(area_info) || info == NULL)
5739 		return B_BAD_VALUE;
5740 
5741 	AddressSpaceReadLocker locker;
5742 	vm_area *area;
5743 	status_t status = locker.SetFromArea(id, area);
5744 	if (status != B_OK)
5745 		return status;
5746 
5747 	fill_area_info(area, info, size);
5748 	return B_OK;
5749 }
5750 
5751 
5752 status_t
5753 _get_next_area_info(team_id team, int32 *cookie, area_info *info, size_t size)
5754 {
5755 	addr_t nextBase = *(addr_t *)cookie;
5756 
5757 	// we're already through the list
5758 	if (nextBase == (addr_t)-1)
5759 		return B_ENTRY_NOT_FOUND;
5760 
5761 	if (team == B_CURRENT_TEAM)
5762 		team = team_get_current_team_id();
5763 
5764 	AddressSpaceReadLocker locker(team);
5765 	if (!locker.IsLocked())
5766 		return B_BAD_TEAM_ID;
5767 
5768 	vm_area *area;
5769 	for (area = locker.AddressSpace()->areas; area != NULL;
5770 			area = area->address_space_next) {
5771 		if (area->id == RESERVED_AREA_ID)
5772 			continue;
5773 
5774 		if (area->base > nextBase)
5775 			break;
5776 	}
5777 
5778 	if (area == NULL) {
5779 		nextBase = (addr_t)-1;
5780 		return B_ENTRY_NOT_FOUND;
5781 	}
5782 
5783 	fill_area_info(area, info, size);
5784 	*cookie = (int32)(area->base);
5785 
5786 	return B_OK;
5787 }
5788 
5789 
5790 status_t
5791 set_area_protection(area_id area, uint32 newProtection)
5792 {
5793 	fix_protection(&newProtection);
5794 
5795 	return vm_set_area_protection(vm_kernel_address_space_id(), area,
5796 		newProtection, true);
5797 }
5798 
5799 
5800 status_t
5801 resize_area(area_id areaID, size_t newSize)
5802 {
5803 	return vm_resize_area(areaID, newSize, true);
5804 }
5805 
5806 
5807 /**	Transfers the specified area to a new team. The caller must be the owner
5808  *	of the area (not yet enforced but probably should be).
5809  *	This function is currently not exported to the kernel namespace, but is
5810  *	only accessible using the _kern_transfer_area() syscall.
5811  */
5812 
5813 static area_id
5814 transfer_area(area_id id, void **_address, uint32 addressSpec, team_id target,
5815 	bool kernel)
5816 {
5817 	area_info info;
5818 	status_t status = get_area_info(id, &info);
5819 	if (status < B_OK)
5820 		return status;
5821 
5822 	area_id clonedArea = vm_clone_area(target, info.name, _address,
5823 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
5824 	if (clonedArea < B_OK)
5825 		return clonedArea;
5826 
5827 	status = vm_delete_area(info.team, id, kernel);
5828 	if (status < B_OK) {
5829 		vm_delete_area(target, clonedArea, kernel);
5830 		return status;
5831 	}
5832 
5833 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
5834 
5835 	return clonedArea;
5836 }
5837 
5838 
5839 area_id
5840 map_physical_memory(const char *name, void *physicalAddress, size_t numBytes,
5841 	uint32 addressSpec, uint32 protection, void **_virtualAddress)
5842 {
5843 	if (!arch_vm_supports_protection(protection))
5844 		return B_NOT_SUPPORTED;
5845 
5846 	fix_protection(&protection);
5847 
5848 	return vm_map_physical_memory(vm_kernel_address_space_id(), name, _virtualAddress,
5849 		addressSpec, numBytes, protection, (addr_t)physicalAddress);
5850 }
5851 
5852 
5853 area_id
5854 clone_area(const char *name, void **_address, uint32 addressSpec,
5855 	uint32 protection, area_id source)
5856 {
5857 	if ((protection & B_KERNEL_PROTECTION) == 0)
5858 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5859 
5860 	return vm_clone_area(vm_kernel_address_space_id(), name, _address,
5861 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
5862 }
5863 
5864 
5865 area_id
5866 create_area_etc(team_id team, const char *name, void **address,
5867 	uint32 addressSpec, uint32 size, uint32 lock, uint32 protection,
5868 	uint32 flags)
5869 {
5870 	fix_protection(&protection);
5871 
5872 	return vm_create_anonymous_area(team, (char *)name, address, addressSpec,
5873 		size, lock, protection, flags, true);
5874 }
5875 
5876 
5877 area_id
5878 create_area(const char *name, void **_address, uint32 addressSpec, size_t size, uint32 lock,
5879 	uint32 protection)
5880 {
5881 	fix_protection(&protection);
5882 
5883 	return vm_create_anonymous_area(vm_kernel_address_space_id(), (char *)name, _address,
5884 		addressSpec, size, lock, protection, 0, true);
5885 }
5886 
5887 
5888 status_t
5889 delete_area(area_id area)
5890 {
5891 	return vm_delete_area(vm_kernel_address_space_id(), area, true);
5892 }
5893 
5894 
5895 //	#pragma mark - Userland syscalls
5896 
5897 
5898 status_t
5899 _user_reserve_heap_address_range(addr_t* userAddress, uint32 addressSpec, addr_t size)
5900 {
5901 	// filter out some unavailable values (for userland)
5902 	switch (addressSpec) {
5903 		case B_ANY_KERNEL_ADDRESS:
5904 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5905 			return B_BAD_VALUE;
5906 	}
5907 
5908 	addr_t address;
5909 
5910 	if (!IS_USER_ADDRESS(userAddress)
5911 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5912 		return B_BAD_ADDRESS;
5913 
5914 	status_t status = vm_reserve_address_range(vm_current_user_address_space_id(),
5915 		(void **)&address, addressSpec, size, RESERVED_AVOID_BASE);
5916 	if (status < B_OK)
5917 		return status;
5918 
5919 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
5920 		vm_unreserve_address_range(vm_current_user_address_space_id(),
5921 			(void *)address, size);
5922 		return B_BAD_ADDRESS;
5923 	}
5924 
5925 	return B_OK;
5926 }
5927 
5928 
5929 area_id
5930 _user_area_for(void *address)
5931 {
5932 	return vm_area_for(vm_current_user_address_space_id(), (addr_t)address);
5933 }
5934 
5935 
5936 area_id
5937 _user_find_area(const char *userName)
5938 {
5939 	char name[B_OS_NAME_LENGTH];
5940 
5941 	if (!IS_USER_ADDRESS(userName)
5942 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
5943 		return B_BAD_ADDRESS;
5944 
5945 	return find_area(name);
5946 }
5947 
5948 
5949 status_t
5950 _user_get_area_info(area_id area, area_info *userInfo)
5951 {
5952 	if (!IS_USER_ADDRESS(userInfo))
5953 		return B_BAD_ADDRESS;
5954 
5955 	area_info info;
5956 	status_t status = get_area_info(area, &info);
5957 	if (status < B_OK)
5958 		return status;
5959 
5960 	// TODO: do we want to prevent userland from seeing kernel protections?
5961 	//info.protection &= B_USER_PROTECTION;
5962 
5963 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5964 		return B_BAD_ADDRESS;
5965 
5966 	return status;
5967 }
5968 
5969 
5970 status_t
5971 _user_get_next_area_info(team_id team, int32 *userCookie, area_info *userInfo)
5972 {
5973 	int32 cookie;
5974 
5975 	if (!IS_USER_ADDRESS(userCookie)
5976 		|| !IS_USER_ADDRESS(userInfo)
5977 		|| user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK)
5978 		return B_BAD_ADDRESS;
5979 
5980 	area_info info;
5981 	status_t status = _get_next_area_info(team, &cookie, &info, sizeof(area_info));
5982 	if (status != B_OK)
5983 		return status;
5984 
5985 	//info.protection &= B_USER_PROTECTION;
5986 
5987 	if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK
5988 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5989 		return B_BAD_ADDRESS;
5990 
5991 	return status;
5992 }
5993 
5994 
5995 status_t
5996 _user_set_area_protection(area_id area, uint32 newProtection)
5997 {
5998 	if ((newProtection & ~B_USER_PROTECTION) != 0)
5999 		return B_BAD_VALUE;
6000 
6001 	fix_protection(&newProtection);
6002 
6003 	return vm_set_area_protection(vm_current_user_address_space_id(), area,
6004 		newProtection, false);
6005 }
6006 
6007 
6008 status_t
6009 _user_resize_area(area_id area, size_t newSize)
6010 {
6011 	// ToDo: Since we restrict deleting of areas to those owned by the team,
6012 	// we should also do that for resizing (check other functions, too).
6013 	return vm_resize_area(area, newSize, false);
6014 }
6015 
6016 
6017 area_id
6018 _user_transfer_area(area_id area, void **userAddress, uint32 addressSpec, team_id target)
6019 {
6020 	// filter out some unavailable values (for userland)
6021 	switch (addressSpec) {
6022 		case B_ANY_KERNEL_ADDRESS:
6023 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6024 			return B_BAD_VALUE;
6025 	}
6026 
6027 	void *address;
6028 	if (!IS_USER_ADDRESS(userAddress)
6029 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6030 		return B_BAD_ADDRESS;
6031 
6032 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6033 	if (newArea < B_OK)
6034 		return newArea;
6035 
6036 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6037 		return B_BAD_ADDRESS;
6038 
6039 	return newArea;
6040 }
6041 
6042 
6043 area_id
6044 _user_clone_area(const char *userName, void **userAddress, uint32 addressSpec,
6045 	uint32 protection, area_id sourceArea)
6046 {
6047 	char name[B_OS_NAME_LENGTH];
6048 	void *address;
6049 
6050 	// filter out some unavailable values (for userland)
6051 	switch (addressSpec) {
6052 		case B_ANY_KERNEL_ADDRESS:
6053 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6054 			return B_BAD_VALUE;
6055 	}
6056 	if ((protection & ~B_USER_PROTECTION) != 0)
6057 		return B_BAD_VALUE;
6058 
6059 	if (!IS_USER_ADDRESS(userName)
6060 		|| !IS_USER_ADDRESS(userAddress)
6061 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6062 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6063 		return B_BAD_ADDRESS;
6064 
6065 	fix_protection(&protection);
6066 
6067 	area_id clonedArea = vm_clone_area(vm_current_user_address_space_id(), name, &address,
6068 		addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, false);
6069 	if (clonedArea < B_OK)
6070 		return clonedArea;
6071 
6072 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6073 		delete_area(clonedArea);
6074 		return B_BAD_ADDRESS;
6075 	}
6076 
6077 	return clonedArea;
6078 }
6079 
6080 
6081 area_id
6082 _user_create_area(const char *userName, void **userAddress, uint32 addressSpec,
6083 	size_t size, uint32 lock, uint32 protection)
6084 {
6085 	char name[B_OS_NAME_LENGTH];
6086 	void *address;
6087 
6088 	// filter out some unavailable values (for userland)
6089 	switch (addressSpec) {
6090 		case B_ANY_KERNEL_ADDRESS:
6091 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6092 			return B_BAD_VALUE;
6093 	}
6094 	if ((protection & ~B_USER_PROTECTION) != 0)
6095 		return B_BAD_VALUE;
6096 
6097 	if (!IS_USER_ADDRESS(userName)
6098 		|| !IS_USER_ADDRESS(userAddress)
6099 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6100 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6101 		return B_BAD_ADDRESS;
6102 
6103 	if (addressSpec == B_EXACT_ADDRESS
6104 		&& IS_KERNEL_ADDRESS(address))
6105 		return B_BAD_VALUE;
6106 
6107 	fix_protection(&protection);
6108 
6109 	area_id area = vm_create_anonymous_area(vm_current_user_address_space_id(),
6110 		(char *)name, &address, addressSpec, size, lock, protection, 0, false);
6111 
6112 	if (area >= B_OK && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6113 		delete_area(area);
6114 		return B_BAD_ADDRESS;
6115 	}
6116 
6117 	return area;
6118 }
6119 
6120 
6121 status_t
6122 _user_delete_area(area_id area)
6123 {
6124 	// Unlike the BeOS implementation, you can now only delete areas
6125 	// that you have created yourself from userland.
6126 	// The documentation to delete_area() explicetly states that this
6127 	// will be restricted in the future, and so it will.
6128 	return vm_delete_area(vm_current_user_address_space_id(), area, false);
6129 }
6130 
6131 
6132 // ToDo: create a BeOS style call for this!
6133 
6134 area_id
6135 _user_map_file(const char *userName, void **userAddress, int addressSpec,
6136 	size_t size, int protection, int mapping, int fd, off_t offset)
6137 {
6138 	char name[B_OS_NAME_LENGTH];
6139 	void *address;
6140 	area_id area;
6141 
6142 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6143 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6144 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6145 		return B_BAD_ADDRESS;
6146 
6147 	if (addressSpec == B_EXACT_ADDRESS) {
6148 		if ((addr_t)address + size < (addr_t)address)
6149 			return B_BAD_VALUE;
6150 		if (!IS_USER_ADDRESS(address)
6151 				|| !IS_USER_ADDRESS((addr_t)address + size)) {
6152 			return B_BAD_ADDRESS;
6153 		}
6154 	}
6155 
6156 	// userland created areas can always be accessed by the kernel
6157 	protection |= B_KERNEL_READ_AREA
6158 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
6159 
6160 	area = _vm_map_file(vm_current_user_address_space_id(), name, &address,
6161 		addressSpec, size, protection, mapping, fd, offset, false);
6162 	if (area < B_OK)
6163 		return area;
6164 
6165 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6166 		return B_BAD_ADDRESS;
6167 
6168 	return area;
6169 }
6170 
6171 
6172 status_t
6173 _user_unmap_memory(void *_address, size_t size)
6174 {
6175 	addr_t address = (addr_t)_address;
6176 
6177 	// check params
6178 	if (size == 0 || (addr_t)address + size < (addr_t)address)
6179 		return B_BAD_VALUE;
6180 
6181 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6182 		return B_BAD_ADDRESS;
6183 
6184 	// write lock the address space
6185 	AddressSpaceWriteLocker locker;
6186 	status_t status = locker.SetTo(team_get_current_team_id());
6187 	if (status != B_OK)
6188 		return status;
6189 
6190 	// unmap
6191 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6192 }
6193 
6194 
6195 status_t
6196 _user_set_memory_protection(void* _address, size_t size, int protection)
6197 {
6198 	// check address range
6199 	addr_t address = (addr_t)_address;
6200 	size = PAGE_ALIGN(size);
6201 
6202 	if ((address % B_PAGE_SIZE) != 0)
6203 		return B_BAD_VALUE;
6204 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6205 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6206 		// weird error code required by POSIX
6207 		return ENOMEM;
6208 	}
6209 
6210 	// extend and check protection
6211 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
6212 	uint32 actualProtection = protection | B_KERNEL_READ_AREA
6213 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
6214 
6215 	if (!arch_vm_supports_protection(actualProtection))
6216 		return B_NOT_SUPPORTED;
6217 
6218 	// We need to write lock the address space, since we're going to play with
6219 	// the areas.
6220 	AddressSpaceWriteLocker locker;
6221 	status_t status = locker.SetTo(team_get_current_team_id());
6222 	if (status != B_OK)
6223 		return status;
6224 
6225 	// First round: Check whether the whole range is covered by areas and we are
6226 	// allowed to modify them.
6227 	addr_t currentAddress = address;
6228 	size_t sizeLeft = size;
6229 	while (sizeLeft > 0) {
6230 		vm_area* area = vm_area_lookup(locker.AddressSpace(), currentAddress);
6231 		if (area == NULL)
6232 			return B_NO_MEMORY;
6233 
6234 		if ((area->protection & B_KERNEL_AREA) != 0)
6235 			return B_NOT_ALLOWED;
6236 
6237 		// TODO: For (shared) mapped files we should check whether the new
6238 		// protections are compatible with the file permissions. We don't have
6239 		// a way to do that yet, though.
6240 
6241 		addr_t offset = currentAddress - area->base;
6242 		size_t rangeSize = min_c(area->size - offset, sizeLeft);
6243 
6244 		currentAddress += rangeSize;
6245 		sizeLeft -= rangeSize;
6246 	}
6247 
6248 	// Second round: If the protections differ from that of the area, create a
6249 	// page protection array and re-map mapped pages.
6250 	vm_translation_map* map = &locker.AddressSpace()->translation_map;
6251 	currentAddress = address;
6252 	sizeLeft = size;
6253 	while (sizeLeft > 0) {
6254 		vm_area* area = vm_area_lookup(locker.AddressSpace(), currentAddress);
6255 		if (area == NULL)
6256 			return B_NO_MEMORY;
6257 
6258 		addr_t offset = currentAddress - area->base;
6259 		size_t rangeSize = min_c(area->size - offset, sizeLeft);
6260 
6261 		currentAddress += rangeSize;
6262 		sizeLeft -= rangeSize;
6263 
6264 		if (area->page_protections == NULL) {
6265 			if (area->protection == actualProtection)
6266 				continue;
6267 
6268 			// In the page protections we store only the three user protections,
6269 			// so we use 4 bits per page.
6270 			uint32 bytes = (area->size / B_PAGE_SIZE + 1) / 2;
6271 			area->page_protections = (uint8*)malloc(bytes);
6272 			if (area->page_protections == NULL)
6273 				return B_NO_MEMORY;
6274 
6275 			// init the page protections for all pages to that of the area
6276 			uint32 areaProtection = area->protection
6277 				& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
6278 			memset(area->page_protections,
6279 				areaProtection | (areaProtection << 4), bytes);
6280 		}
6281 
6282 		for (addr_t pageAddress = area->base + offset;
6283 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6284 			map->ops->lock(map);
6285 
6286 			set_area_page_protection(area, pageAddress, protection);
6287 
6288 			addr_t physicalAddress;
6289 			uint32 flags;
6290 
6291 			status_t error = map->ops->query(map, pageAddress, &physicalAddress,
6292 				&flags);
6293 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6294 				map->ops->unlock(map);
6295 				continue;
6296 			}
6297 
6298 			vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6299 			if (page == NULL) {
6300 				panic("area %p looking up page failed for pa 0x%lx\n", area,
6301 					physicalAddress);
6302 				map->ops->unlock(map);
6303 				return B_ERROR;;
6304 			}
6305 
6306 			// If the page is not in the topmost cache and write access is
6307 			// requested, we have to unmap it. Otherwise we can re-map it with
6308 			// the new protection.
6309 			bool unmapPage = page->cache != area->cache
6310 				&& (protection & B_WRITE_AREA) != 0;
6311 
6312 			if (!unmapPage) {
6313 				map->ops->unmap(map, pageAddress,
6314 					pageAddress + B_PAGE_SIZE - 1);
6315 				map->ops->map(map, pageAddress, physicalAddress,
6316 					actualProtection);
6317 			}
6318 
6319 			map->ops->unlock(map);
6320 
6321 			if (unmapPage)
6322 				vm_unmap_pages(area, pageAddress, B_PAGE_SIZE, true);
6323 		}
6324 	}
6325 
6326 	return B_OK;
6327 }
6328 
6329 
6330 status_t
6331 _user_sync_memory(void *_address, size_t size, int flags)
6332 {
6333 	addr_t address = (addr_t)_address;
6334 	size = PAGE_ALIGN(size);
6335 
6336 	// check params
6337 	if ((address % B_PAGE_SIZE) != 0)
6338 		return B_BAD_VALUE;
6339 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6340 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6341 		// weird error code required by POSIX
6342 		return ENOMEM;
6343 	}
6344 
6345 	bool writeSync = (flags & MS_SYNC) != 0;
6346 	bool writeAsync = (flags & MS_ASYNC) != 0;
6347 	if (writeSync && writeAsync)
6348 		return B_BAD_VALUE;
6349 
6350 	if (size == 0 || !writeSync && !writeAsync)
6351 		return B_OK;
6352 
6353 	// iterate through the range and sync all concerned areas
6354 	while (size > 0) {
6355 		// read lock the address space
6356 		AddressSpaceReadLocker locker;
6357 		status_t error = locker.SetTo(team_get_current_team_id());
6358 		if (error != B_OK)
6359 			return error;
6360 
6361 		// get the first area
6362 		vm_area* area = vm_area_lookup(locker.AddressSpace(), address);
6363 		if (area == NULL)
6364 			return B_NO_MEMORY;
6365 
6366 		uint32 offset = address - area->base;
6367 		size_t rangeSize = min_c(area->size - offset, size);
6368 		offset += area->cache_offset;
6369 
6370 		// lock the cache
6371 		AreaCacheLocker cacheLocker(area);
6372 		if (!cacheLocker)
6373 			return B_BAD_VALUE;
6374 		vm_cache* cache = area->cache;
6375 
6376 		locker.Unlock();
6377 
6378 		uint32 firstPage = offset >> PAGE_SHIFT;
6379 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6380 
6381 		// write the pages
6382 		if (cache->type == CACHE_TYPE_VNODE) {
6383 			if (writeSync) {
6384 				// synchronous
6385 				error = vm_page_write_modified_page_range(cache, firstPage,
6386 					endPage);
6387 				if (error != B_OK)
6388 					return error;
6389 			} else {
6390 				// asynchronous
6391 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6392 				// TODO: This is probably not quite what is supposed to happen.
6393 				// Especially when a lot has to be written, it might take ages
6394 				// until it really hits the disk.
6395 			}
6396 		}
6397 
6398 		address += rangeSize;
6399 		size -= rangeSize;
6400 	}
6401 
6402 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6403 	// synchronize multiple mappings of the same file. In our VM they never get
6404 	// out of sync, though, so we don't have to do anything.
6405 
6406 	return B_OK;
6407 }
6408 
6409 
6410 status_t
6411 _user_memory_advice(void* address, size_t size, int advice)
6412 {
6413 	// TODO: Implement!
6414 	return B_OK;
6415 }
6416