xref: /haiku/src/system/kernel/vm/vm.cpp (revision ddac407426cd3b3d0b4589d7a161b300b3539a2a)
1 /*
2  * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <OS.h>
20 #include <KernelExport.h>
21 
22 #include <AutoDeleter.h>
23 
24 #include <arch/cpu.h>
25 #include <arch/vm.h>
26 #include <boot/elf.h>
27 #include <boot/stage2.h>
28 #include <condition_variable.h>
29 #include <console.h>
30 #include <debug.h>
31 #include <file_cache.h>
32 #include <fs/fd.h>
33 #include <heap.h>
34 #include <int.h>
35 #include <lock.h>
36 #include <low_resource_manager.h>
37 #include <smp.h>
38 #include <system_info.h>
39 #include <thread.h>
40 #include <team.h>
41 #include <tracing.h>
42 #include <util/AutoLock.h>
43 #include <util/khash.h>
44 #include <vm_address_space.h>
45 #include <vm_cache.h>
46 #include <vm_page.h>
47 #include <vm_priv.h>
48 
49 #include "VMAnonymousCache.h"
50 #include "IORequest.h"
51 
52 
53 //#define TRACE_VM
54 //#define TRACE_FAULTS
55 #ifdef TRACE_VM
56 #	define TRACE(x) dprintf x
57 #else
58 #	define TRACE(x) ;
59 #endif
60 #ifdef TRACE_FAULTS
61 #	define FTRACE(x) dprintf x
62 #else
63 #	define FTRACE(x) ;
64 #endif
65 
66 #define ROUNDUP(a, b) (((a) + ((b)-1)) & ~((b)-1))
67 #define ROUNDOWN(a, b) (((a) / (b)) * (b))
68 
69 
70 class AddressSpaceReadLocker {
71 public:
72 	AddressSpaceReadLocker(team_id team);
73 	AddressSpaceReadLocker(vm_address_space* space, bool getNewReference);
74 	AddressSpaceReadLocker();
75 	~AddressSpaceReadLocker();
76 
77 	status_t SetTo(team_id team);
78 	void SetTo(vm_address_space* space, bool getNewReference);
79 	status_t SetFromArea(area_id areaID, vm_area*& area);
80 
81 	bool IsLocked() const { return fLocked; }
82 	bool Lock();
83 	void Unlock();
84 
85 	void Unset();
86 
87 	vm_address_space* AddressSpace() { return fSpace; }
88 
89 private:
90 	vm_address_space* fSpace;
91 	bool	fLocked;
92 };
93 
94 class AddressSpaceWriteLocker {
95 public:
96 	AddressSpaceWriteLocker(team_id team);
97 	AddressSpaceWriteLocker();
98 	~AddressSpaceWriteLocker();
99 
100 	status_t SetTo(team_id team);
101 	status_t SetFromArea(area_id areaID, vm_area*& area);
102 	status_t SetFromArea(team_id team, area_id areaID, bool allowKernel,
103 		vm_area*& area);
104 	status_t SetFromArea(team_id team, area_id areaID, vm_area*& area);
105 
106 	bool IsLocked() const { return fLocked; }
107 	void Unlock();
108 
109 	void DegradeToReadLock();
110 	void Unset();
111 
112 	vm_address_space* AddressSpace() { return fSpace; }
113 
114 private:
115 	vm_address_space* fSpace;
116 	bool	fLocked;
117 	bool	fDegraded;
118 };
119 
120 class MultiAddressSpaceLocker {
121 public:
122 	MultiAddressSpaceLocker();
123 	~MultiAddressSpaceLocker();
124 
125 	inline status_t AddTeam(team_id team, bool writeLock,
126 		vm_address_space** _space = NULL);
127 	inline status_t AddArea(area_id area, bool writeLock,
128 		vm_address_space** _space = NULL);
129 
130 	status_t AddAreaCacheAndLock(area_id areaID, bool writeLockThisOne,
131 		bool writeLockOthers, vm_area*& _area, vm_cache** _cache = NULL);
132 
133 	status_t Lock();
134 	void Unlock();
135 	bool IsLocked() const { return fLocked; }
136 
137 	void Unset();
138 
139 private:
140 	struct lock_item {
141 		vm_address_space*	space;
142 		bool				write_lock;
143 	};
144 
145 	bool _ResizeIfNeeded();
146 	int32 _IndexOfAddressSpace(vm_address_space* space) const;
147 	status_t _AddAddressSpace(vm_address_space* space, bool writeLock,
148 		vm_address_space** _space);
149 
150 	static int _CompareItems(const void* _a, const void* _b);
151 
152 	lock_item*	fItems;
153 	int32		fCapacity;
154 	int32		fCount;
155 	bool		fLocked;
156 };
157 
158 
159 class AreaCacheLocking {
160 public:
161 	inline bool Lock(vm_cache* lockable)
162 	{
163 		return false;
164 	}
165 
166 	inline void Unlock(vm_cache* lockable)
167 	{
168 		vm_area_put_locked_cache(lockable);
169 	}
170 };
171 
172 class AreaCacheLocker : public AutoLocker<vm_cache, AreaCacheLocking> {
173 public:
174 	inline AreaCacheLocker(vm_cache* cache = NULL)
175 		: AutoLocker<vm_cache, AreaCacheLocking>(cache, true)
176 	{
177 	}
178 
179 	inline AreaCacheLocker(vm_area* area)
180 		: AutoLocker<vm_cache, AreaCacheLocking>()
181 	{
182 		SetTo(area);
183 	}
184 
185 	inline void SetTo(vm_area* area)
186 	{
187 		return AutoLocker<vm_cache, AreaCacheLocking>::SetTo(
188 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
189 	}
190 };
191 
192 
193 #define AREA_HASH_TABLE_SIZE 1024
194 static area_id sNextAreaID = 1;
195 static hash_table* sAreaHash;
196 static rw_lock sAreaHashLock = RW_LOCK_INITIALIZER("area hash");
197 static mutex sMappingLock = MUTEX_INITIALIZER("page mappings");
198 static mutex sAreaCacheLock = MUTEX_INITIALIZER("area->cache");
199 
200 static off_t sAvailableMemory;
201 static off_t sNeededMemory;
202 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
203 static uint32 sPageFaults;
204 
205 #if DEBUG_CACHE_LIST
206 
207 struct cache_info {
208 	vm_cache*	cache;
209 	addr_t		page_count;
210 	addr_t		committed;
211 };
212 
213 static const int kCacheInfoTableCount = 100 * 1024;
214 static cache_info* sCacheInfoTable;
215 
216 #endif	// DEBUG_CACHE_LIST
217 
218 
219 // function declarations
220 static void delete_area(vm_address_space* addressSpace, vm_area* area);
221 static vm_address_space* get_address_space_by_area_id(area_id id);
222 static status_t vm_soft_fault(vm_address_space* addressSpace, addr_t address,
223 	bool isWrite, bool isUser);
224 static status_t map_backing_store(vm_address_space* addressSpace,
225 	vm_cache* cache, void** _virtualAddress, off_t offset, addr_t size,
226 	uint32 addressSpec, int wiring, int protection, int mapping,
227 	vm_area** _area, const char* areaName, bool unmapAddressRange, bool kernel);
228 
229 
230 //	#pragma mark -
231 
232 
233 AddressSpaceReadLocker::AddressSpaceReadLocker(team_id team)
234 	:
235 	fSpace(NULL),
236 	fLocked(false)
237 {
238 	SetTo(team);
239 }
240 
241 
242 /*! Takes over the reference of the address space, if \a getNewReference is
243 	\c false.
244 */
245 AddressSpaceReadLocker::AddressSpaceReadLocker(vm_address_space* space,
246 		bool getNewReference)
247 	:
248 	fSpace(NULL),
249 	fLocked(false)
250 {
251 	SetTo(space, getNewReference);
252 }
253 
254 
255 AddressSpaceReadLocker::AddressSpaceReadLocker()
256 	:
257 	fSpace(NULL),
258 	fLocked(false)
259 {
260 }
261 
262 
263 AddressSpaceReadLocker::~AddressSpaceReadLocker()
264 {
265 	Unset();
266 }
267 
268 
269 void
270 AddressSpaceReadLocker::Unset()
271 {
272 	Unlock();
273 	if (fSpace != NULL)
274 		vm_put_address_space(fSpace);
275 }
276 
277 
278 status_t
279 AddressSpaceReadLocker::SetTo(team_id team)
280 {
281 	fSpace = vm_get_address_space(team);
282 	if (fSpace == NULL)
283 		return B_BAD_TEAM_ID;
284 
285 	rw_lock_read_lock(&fSpace->lock);
286 	fLocked = true;
287 	return B_OK;
288 }
289 
290 
291 /*! Takes over the reference of the address space, if \a getNewReference is
292 	\c false.
293 */
294 void
295 AddressSpaceReadLocker::SetTo(vm_address_space* space, bool getNewReference)
296 {
297 	fSpace = space;
298 
299 	if (getNewReference)
300 		atomic_add(&fSpace->ref_count, 1);
301 
302 	rw_lock_read_lock(&fSpace->lock);
303 	fLocked = true;
304 }
305 
306 
307 status_t
308 AddressSpaceReadLocker::SetFromArea(area_id areaID, vm_area*& area)
309 {
310 	fSpace = get_address_space_by_area_id(areaID);
311 	if (fSpace == NULL)
312 		return B_BAD_TEAM_ID;
313 
314 	rw_lock_read_lock(&fSpace->lock);
315 
316 	rw_lock_read_lock(&sAreaHashLock);
317 	area = (vm_area*)hash_lookup(sAreaHash, &areaID);
318 	rw_lock_read_unlock(&sAreaHashLock);
319 
320 	if (area == NULL || area->address_space != fSpace) {
321 		rw_lock_read_unlock(&fSpace->lock);
322 		return B_BAD_VALUE;
323 	}
324 
325 	fLocked = true;
326 	return B_OK;
327 }
328 
329 
330 bool
331 AddressSpaceReadLocker::Lock()
332 {
333 	if (fLocked)
334 		return true;
335 	if (fSpace == NULL)
336 		return false;
337 
338 	rw_lock_read_lock(&fSpace->lock);
339 	fLocked = true;
340 
341 	return true;
342 }
343 
344 
345 void
346 AddressSpaceReadLocker::Unlock()
347 {
348 	if (fLocked) {
349 		rw_lock_read_unlock(&fSpace->lock);
350 		fLocked = false;
351 	}
352 }
353 
354 
355 //	#pragma mark -
356 
357 
358 AddressSpaceWriteLocker::AddressSpaceWriteLocker(team_id team)
359 	:
360 	fSpace(NULL),
361 	fLocked(false),
362 	fDegraded(false)
363 {
364 	SetTo(team);
365 }
366 
367 
368 AddressSpaceWriteLocker::AddressSpaceWriteLocker()
369 	:
370 	fSpace(NULL),
371 	fLocked(false),
372 	fDegraded(false)
373 {
374 }
375 
376 
377 AddressSpaceWriteLocker::~AddressSpaceWriteLocker()
378 {
379 	Unset();
380 }
381 
382 
383 void
384 AddressSpaceWriteLocker::Unset()
385 {
386 	Unlock();
387 	if (fSpace != NULL)
388 		vm_put_address_space(fSpace);
389 }
390 
391 
392 status_t
393 AddressSpaceWriteLocker::SetTo(team_id team)
394 {
395 	fSpace = vm_get_address_space(team);
396 	if (fSpace == NULL)
397 		return B_BAD_TEAM_ID;
398 
399 	rw_lock_write_lock(&fSpace->lock);
400 	fLocked = true;
401 	return B_OK;
402 }
403 
404 
405 status_t
406 AddressSpaceWriteLocker::SetFromArea(area_id areaID, vm_area*& area)
407 {
408 	fSpace = get_address_space_by_area_id(areaID);
409 	if (fSpace == NULL)
410 		return B_BAD_VALUE;
411 
412 	rw_lock_write_lock(&fSpace->lock);
413 
414 	rw_lock_read_lock(&sAreaHashLock);
415 	area = (vm_area*)hash_lookup(sAreaHash, &areaID);
416 	rw_lock_read_unlock(&sAreaHashLock);
417 
418 	if (area == NULL || area->address_space != fSpace) {
419 		rw_lock_write_unlock(&fSpace->lock);
420 		return B_BAD_VALUE;
421 	}
422 
423 	fLocked = true;
424 	return B_OK;
425 }
426 
427 
428 status_t
429 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID,
430 	bool allowKernel, vm_area*& area)
431 {
432 	rw_lock_read_lock(&sAreaHashLock);
433 
434 	area = (vm_area*)hash_lookup(sAreaHash, &areaID);
435 	if (area != NULL
436 		&& (area->address_space->id == team
437 			|| (allowKernel && team == vm_kernel_address_space_id()))) {
438 		fSpace = area->address_space;
439 		atomic_add(&fSpace->ref_count, 1);
440 	}
441 
442 	rw_lock_read_unlock(&sAreaHashLock);
443 
444 	if (fSpace == NULL)
445 		return B_BAD_VALUE;
446 
447 	// Second try to get the area -- this time with the address space
448 	// write lock held
449 
450 	rw_lock_write_lock(&fSpace->lock);
451 
452 	rw_lock_read_lock(&sAreaHashLock);
453 	area = (vm_area*)hash_lookup(sAreaHash, &areaID);
454 	rw_lock_read_unlock(&sAreaHashLock);
455 
456 	if (area == NULL) {
457 		rw_lock_write_unlock(&fSpace->lock);
458 		return B_BAD_VALUE;
459 	}
460 
461 	fLocked = true;
462 	return B_OK;
463 }
464 
465 
466 status_t
467 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID,
468 	vm_area*& area)
469 {
470 	return SetFromArea(team, areaID, false, area);
471 }
472 
473 
474 void
475 AddressSpaceWriteLocker::Unlock()
476 {
477 	if (fLocked) {
478 		if (fDegraded)
479 			rw_lock_read_unlock(&fSpace->lock);
480 		else
481 			rw_lock_write_unlock(&fSpace->lock);
482 		fLocked = false;
483 		fDegraded = false;
484 	}
485 }
486 
487 
488 void
489 AddressSpaceWriteLocker::DegradeToReadLock()
490 {
491 	// TODO: the current R/W lock implementation just keeps the write lock here
492 	rw_lock_read_lock(&fSpace->lock);
493 	rw_lock_write_unlock(&fSpace->lock);
494 	fDegraded = true;
495 }
496 
497 
498 //	#pragma mark -
499 
500 
501 MultiAddressSpaceLocker::MultiAddressSpaceLocker()
502 	:
503 	fItems(NULL),
504 	fCapacity(0),
505 	fCount(0),
506 	fLocked(false)
507 {
508 }
509 
510 
511 MultiAddressSpaceLocker::~MultiAddressSpaceLocker()
512 {
513 	Unset();
514 	free(fItems);
515 }
516 
517 
518 /*static*/ int
519 MultiAddressSpaceLocker::_CompareItems(const void* _a, const void* _b)
520 {
521 	lock_item* a = (lock_item*)_a;
522 	lock_item* b = (lock_item*)_b;
523 	return a->space->id - b->space->id;
524 }
525 
526 
527 bool
528 MultiAddressSpaceLocker::_ResizeIfNeeded()
529 {
530 	if (fCount == fCapacity) {
531 		lock_item* items = (lock_item*)realloc(fItems,
532 			(fCapacity + 4) * sizeof(lock_item));
533 		if (items == NULL)
534 			return false;
535 
536 		fCapacity += 4;
537 		fItems = items;
538 	}
539 
540 	return true;
541 }
542 
543 
544 int32
545 MultiAddressSpaceLocker::_IndexOfAddressSpace(vm_address_space* space) const
546 {
547 	for (int32 i = 0; i < fCount; i++) {
548 		if (fItems[i].space == space)
549 			return i;
550 	}
551 
552 	return -1;
553 }
554 
555 
556 status_t
557 MultiAddressSpaceLocker::_AddAddressSpace(vm_address_space* space,
558 	bool writeLock, vm_address_space** _space)
559 {
560 	if (!space)
561 		return B_BAD_VALUE;
562 
563 	int32 index = _IndexOfAddressSpace(space);
564 	if (index < 0) {
565 		if (!_ResizeIfNeeded()) {
566 			vm_put_address_space(space);
567 			return B_NO_MEMORY;
568 		}
569 
570 		lock_item& item = fItems[fCount++];
571 		item.space = space;
572 		item.write_lock = writeLock;
573 	} else {
574 
575 		// one reference is enough
576 		vm_put_address_space(space);
577 
578 		fItems[index].write_lock |= writeLock;
579 	}
580 
581 	if (_space != NULL)
582 		*_space = space;
583 
584 	return B_OK;
585 }
586 
587 
588 inline status_t
589 MultiAddressSpaceLocker::AddTeam(team_id team, bool writeLock,
590 	vm_address_space** _space)
591 {
592 	return _AddAddressSpace(vm_get_address_space(team), writeLock,
593 		_space);
594 }
595 
596 
597 inline status_t
598 MultiAddressSpaceLocker::AddArea(area_id area, bool writeLock,
599 	vm_address_space** _space)
600 {
601 	return _AddAddressSpace(get_address_space_by_area_id(area), writeLock,
602 		_space);
603 }
604 
605 
606 void
607 MultiAddressSpaceLocker::Unset()
608 {
609 	Unlock();
610 
611 	for (int32 i = 0; i < fCount; i++)
612 		vm_put_address_space(fItems[i].space);
613 
614 	fCount = 0;
615 }
616 
617 
618 status_t
619 MultiAddressSpaceLocker::Lock()
620 {
621 	ASSERT(!fLocked);
622 
623 	qsort(fItems, fCount, sizeof(lock_item), &_CompareItems);
624 
625 	for (int32 i = 0; i < fCount; i++) {
626 		status_t status;
627 		if (fItems[i].write_lock)
628 			status = rw_lock_write_lock(&fItems[i].space->lock);
629 		else
630 			status = rw_lock_read_lock(&fItems[i].space->lock);
631 
632 		if (status < B_OK) {
633 			while (--i >= 0) {
634 				if (fItems[i].write_lock)
635 					rw_lock_write_unlock(&fItems[i].space->lock);
636 				else
637 					rw_lock_read_unlock(&fItems[i].space->lock);
638 			}
639 			return status;
640 		}
641 	}
642 
643 	fLocked = true;
644 	return B_OK;
645 }
646 
647 
648 void
649 MultiAddressSpaceLocker::Unlock()
650 {
651 	if (!fLocked)
652 		return;
653 
654 	for (int32 i = 0; i < fCount; i++) {
655 		if (fItems[i].write_lock)
656 			rw_lock_write_unlock(&fItems[i].space->lock);
657 		else
658 			rw_lock_read_unlock(&fItems[i].space->lock);
659 	}
660 
661 	fLocked = false;
662 }
663 
664 
665 /*!	Adds all address spaces of the areas associated with the given area's cache,
666 	locks them, and locks the cache (including a reference to it). It retries
667 	until the situation is stable (i.e. the neither cache nor cache's areas
668 	changed) or an error occurs.
669 */
670 status_t
671 MultiAddressSpaceLocker::AddAreaCacheAndLock(area_id areaID,
672 	bool writeLockThisOne, bool writeLockOthers, vm_area*& _area,
673 	vm_cache** _cache)
674 {
675 	// remember the original state
676 	int originalCount = fCount;
677 	lock_item* originalItems = NULL;
678 	if (fCount > 0) {
679 		originalItems = new(nothrow) lock_item[fCount];
680 		if (originalItems == NULL)
681 			return B_NO_MEMORY;
682 		memcpy(originalItems, fItems, fCount * sizeof(lock_item));
683 	}
684 	ArrayDeleter<lock_item> _(originalItems);
685 
686 	// get the cache
687 	vm_cache* cache;
688 	vm_area* area;
689 	status_t error;
690 	{
691 		AddressSpaceReadLocker locker;
692 		error = locker.SetFromArea(areaID, area);
693 		if (error != B_OK)
694 			return error;
695 
696 		cache = vm_area_get_locked_cache(area);
697 	}
698 
699 	while (true) {
700 		// add all areas
701 		vm_area* firstArea = cache->areas;
702 		for (vm_area* current = firstArea; current;
703 				current = current->cache_next) {
704 			error = AddArea(current->id,
705 				current == area ? writeLockThisOne : writeLockOthers);
706 			if (error != B_OK) {
707 				vm_area_put_locked_cache(cache);
708 				return error;
709 			}
710 		}
711 
712 		// unlock the cache and attempt to lock the address spaces
713 		vm_area_put_locked_cache(cache);
714 
715 		error = Lock();
716 		if (error != B_OK)
717 			return error;
718 
719 		// lock the cache again and check whether anything has changed
720 
721 		// check whether the area is gone in the meantime
722 		rw_lock_read_lock(&sAreaHashLock);
723 		area = (vm_area*)hash_lookup(sAreaHash, &areaID);
724 		rw_lock_read_unlock(&sAreaHashLock);
725 
726 		if (area == NULL) {
727 			Unlock();
728 			return B_BAD_VALUE;
729 		}
730 
731 		// lock the cache
732 		vm_cache* oldCache = cache;
733 		cache = vm_area_get_locked_cache(area);
734 
735 		// If neither the area's cache has changed nor its area list we're
736 		// done.
737 		if (cache == oldCache && firstArea == cache->areas) {
738 			_area = area;
739 			if (_cache != NULL)
740 				*_cache = cache;
741 			return B_OK;
742 		}
743 
744 		// Restore the original state and try again.
745 
746 		// Unlock the address spaces, but keep the cache locked for the next
747 		// iteration.
748 		Unlock();
749 
750 		// Get an additional reference to the original address spaces.
751 		for (int32 i = 0; i < originalCount; i++)
752 			atomic_add(&originalItems[i].space->ref_count, 1);
753 
754 		// Release all references to the current address spaces.
755 		for (int32 i = 0; i < fCount; i++)
756 			vm_put_address_space(fItems[i].space);
757 
758 		// Copy over the original state.
759 		fCount = originalCount;
760 		if (originalItems != NULL)
761 			memcpy(fItems, originalItems, fCount * sizeof(lock_item));
762 	}
763 }
764 
765 
766 //	#pragma mark -
767 
768 
769 #if VM_PAGE_FAULT_TRACING
770 
771 namespace VMPageFaultTracing {
772 
773 class PageFaultStart : public AbstractTraceEntry {
774 public:
775 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
776 		:
777 		fAddress(address),
778 		fPC(pc),
779 		fWrite(write),
780 		fUser(user)
781 	{
782 		Initialized();
783 	}
784 
785 	virtual void AddDump(TraceOutput& out)
786 	{
787 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
788 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
789 	}
790 
791 private:
792 	addr_t	fAddress;
793 	addr_t	fPC;
794 	bool	fWrite;
795 	bool	fUser;
796 };
797 
798 
799 // page fault errors
800 enum {
801 	PAGE_FAULT_ERROR_NO_AREA		= 0,
802 	PAGE_FAULT_ERROR_KERNEL_ONLY,
803 	PAGE_FAULT_ERROR_READ_ONLY,
804 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
805 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
806 };
807 
808 
809 class PageFaultError : public AbstractTraceEntry {
810 public:
811 	PageFaultError(area_id area, status_t error)
812 		:
813 		fArea(area),
814 		fError(error)
815 	{
816 		Initialized();
817 	}
818 
819 	virtual void AddDump(TraceOutput& out)
820 	{
821 		switch (fError) {
822 			case PAGE_FAULT_ERROR_NO_AREA:
823 				out.Print("page fault error: no area");
824 				break;
825 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
826 				out.Print("page fault error: area: %ld, kernel only", fArea);
827 				break;
828 			case PAGE_FAULT_ERROR_READ_ONLY:
829 				out.Print("page fault error: area: %ld, read only", fArea);
830 				break;
831 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
832 				out.Print("page fault error: kernel touching bad user memory");
833 				break;
834 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
835 				out.Print("page fault error: no address space");
836 				break;
837 			default:
838 				out.Print("page fault error: area: %ld, error: %s", fArea,
839 					strerror(fError));
840 				break;
841 		}
842 	}
843 
844 private:
845 	area_id		fArea;
846 	status_t	fError;
847 };
848 
849 
850 class PageFaultDone : public AbstractTraceEntry {
851 public:
852 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
853 			vm_page* page)
854 		:
855 		fArea(area),
856 		fTopCache(topCache),
857 		fCache(cache),
858 		fPage(page)
859 	{
860 		Initialized();
861 	}
862 
863 	virtual void AddDump(TraceOutput& out)
864 	{
865 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
866 			"page: %p", fArea, fTopCache, fCache, fPage);
867 	}
868 
869 private:
870 	area_id		fArea;
871 	VMCache*	fTopCache;
872 	VMCache*	fCache;
873 	vm_page*	fPage;
874 };
875 
876 }	// namespace VMPageFaultTracing
877 
878 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
879 #else
880 #	define TPF(x) ;
881 #endif	// VM_PAGE_FAULT_TRACING
882 
883 
884 //	#pragma mark -
885 
886 
887 static int
888 area_compare(void* _area, const void* key)
889 {
890 	vm_area* area = (vm_area*)_area;
891 	const area_id* id = (const area_id*)key;
892 
893 	if (area->id == *id)
894 		return 0;
895 
896 	return -1;
897 }
898 
899 
900 static uint32
901 area_hash(void* _area, const void* key, uint32 range)
902 {
903 	vm_area* area = (vm_area*)_area;
904 	const area_id* id = (const area_id*)key;
905 
906 	if (area != NULL)
907 		return area->id % range;
908 
909 	return (uint32)*id % range;
910 }
911 
912 
913 static vm_address_space*
914 get_address_space_by_area_id(area_id id)
915 {
916 	vm_address_space* addressSpace = NULL;
917 
918 	rw_lock_read_lock(&sAreaHashLock);
919 
920 	vm_area* area = (vm_area*)hash_lookup(sAreaHash, &id);
921 	if (area != NULL) {
922 		addressSpace = area->address_space;
923 		atomic_add(&addressSpace->ref_count, 1);
924 	}
925 
926 	rw_lock_read_unlock(&sAreaHashLock);
927 
928 	return addressSpace;
929 }
930 
931 
932 //! You need to have the address space locked when calling this function
933 static vm_area*
934 lookup_area(vm_address_space* addressSpace, area_id id)
935 {
936 	rw_lock_read_lock(&sAreaHashLock);
937 
938 	vm_area* area = (vm_area*)hash_lookup(sAreaHash, &id);
939 	if (area != NULL && area->address_space != addressSpace)
940 		area = NULL;
941 
942 	rw_lock_read_unlock(&sAreaHashLock);
943 
944 	return area;
945 }
946 
947 
948 static vm_area*
949 create_reserved_area_struct(vm_address_space* addressSpace, uint32 flags)
950 {
951 	vm_area* reserved = (vm_area*)malloc_nogrow(sizeof(vm_area));
952 	if (reserved == NULL)
953 		return NULL;
954 
955 	memset(reserved, 0, sizeof(vm_area));
956 	reserved->id = RESERVED_AREA_ID;
957 		// this marks it as reserved space
958 	reserved->protection = flags;
959 	reserved->address_space = addressSpace;
960 
961 	return reserved;
962 }
963 
964 
965 static vm_area*
966 create_area_struct(vm_address_space* addressSpace, const char* name,
967 	uint32 wiring, uint32 protection)
968 {
969 	// restrict the area name to B_OS_NAME_LENGTH
970 	size_t length = strlen(name) + 1;
971 	if (length > B_OS_NAME_LENGTH)
972 		length = B_OS_NAME_LENGTH;
973 
974 	vm_area* area = (vm_area*)malloc_nogrow(sizeof(vm_area));
975 	if (area == NULL)
976 		return NULL;
977 
978 	area->name = (char*)malloc_nogrow(length);
979 	if (area->name == NULL) {
980 		free(area);
981 		return NULL;
982 	}
983 	strlcpy(area->name, name, length);
984 
985 	area->id = atomic_add(&sNextAreaID, 1);
986 	area->base = 0;
987 	area->size = 0;
988 	area->protection = protection;
989 	area->wiring = wiring;
990 	area->memory_type = 0;
991 
992 	area->cache = NULL;
993 	area->cache_offset = 0;
994 
995 	area->address_space = addressSpace;
996 	area->address_space_next = NULL;
997 	area->cache_next = area->cache_prev = NULL;
998 	area->hash_next = NULL;
999 	new (&area->mappings) vm_area_mappings;
1000 	area->page_protections = NULL;
1001 
1002 	return area;
1003 }
1004 
1005 
1006 /*!	Finds a reserved area that covers the region spanned by \a start and
1007 	\a size, inserts the \a area into that region and makes sure that
1008 	there are reserved regions for the remaining parts.
1009 */
1010 static status_t
1011 find_reserved_area(vm_address_space* addressSpace, addr_t start,
1012 	addr_t size, vm_area* area)
1013 {
1014 	vm_area* last = NULL;
1015 	vm_area* next;
1016 
1017 	next = addressSpace->areas;
1018 	while (next) {
1019 		if (next->base <= start && next->base + next->size >= start + size) {
1020 			// this area covers the requested range
1021 			if (next->id != RESERVED_AREA_ID) {
1022 				// but it's not reserved space, it's a real area
1023 				return B_BAD_VALUE;
1024 			}
1025 
1026 			break;
1027 		}
1028 		last = next;
1029 		next = next->address_space_next;
1030 	}
1031 	if (next == NULL)
1032 		return B_ENTRY_NOT_FOUND;
1033 
1034 	// now we have to transfer the requested part of the reserved
1035 	// range to the new area - and remove, resize or split the old
1036 	// reserved area.
1037 
1038 	if (start == next->base) {
1039 		// the area starts at the beginning of the reserved range
1040 		if (last)
1041 			last->address_space_next = area;
1042 		else
1043 			addressSpace->areas = area;
1044 
1045 		if (size == next->size) {
1046 			// the new area fully covers the reversed range
1047 			area->address_space_next = next->address_space_next;
1048 			vm_put_address_space(addressSpace);
1049 			free(next);
1050 		} else {
1051 			// resize the reserved range behind the area
1052 			area->address_space_next = next;
1053 			next->base += size;
1054 			next->size -= size;
1055 		}
1056 	} else if (start + size == next->base + next->size) {
1057 		// the area is at the end of the reserved range
1058 		area->address_space_next = next->address_space_next;
1059 		next->address_space_next = area;
1060 
1061 		// resize the reserved range before the area
1062 		next->size = start - next->base;
1063 	} else {
1064 		// the area splits the reserved range into two separate ones
1065 		// we need a new reserved area to cover this space
1066 		vm_area* reserved = create_reserved_area_struct(addressSpace,
1067 			next->protection);
1068 		if (reserved == NULL)
1069 			return B_NO_MEMORY;
1070 
1071 		atomic_add(&addressSpace->ref_count, 1);
1072 		reserved->address_space_next = next->address_space_next;
1073 		area->address_space_next = reserved;
1074 		next->address_space_next = area;
1075 
1076 		// resize regions
1077 		reserved->size = next->base + next->size - start - size;
1078 		next->size = start - next->base;
1079 		reserved->base = start + size;
1080 		reserved->cache_offset = next->cache_offset;
1081 	}
1082 
1083 	area->base = start;
1084 	area->size = size;
1085 	addressSpace->change_count++;
1086 
1087 	return B_OK;
1088 }
1089 
1090 
1091 /*!	Must be called with this address space's sem held */
1092 static status_t
1093 find_and_insert_area_slot(vm_address_space* addressSpace, addr_t start,
1094 	addr_t size, addr_t end, uint32 addressSpec, vm_area* area)
1095 {
1096 	vm_area* last = NULL;
1097 	vm_area* next;
1098 	bool foundSpot = false;
1099 
1100 	TRACE(("find_and_insert_area_slot: address space %p, start 0x%lx, "
1101 		"size %ld, end 0x%lx, addressSpec %ld, area %p\n", addressSpace, start,
1102 		size, end, addressSpec, area));
1103 
1104 	// do some sanity checking
1105 	if (start < addressSpace->base || size == 0
1106 		|| (end - 1) > (addressSpace->base + (addressSpace->size - 1))
1107 		|| start + size > end)
1108 		return B_BAD_ADDRESS;
1109 
1110 	if (addressSpec == B_EXACT_ADDRESS) {
1111 		// search for a reserved area
1112 		status_t status = find_reserved_area(addressSpace, start, size, area);
1113 		if (status == B_OK || status == B_BAD_VALUE)
1114 			return status;
1115 
1116 		// There was no reserved area, and the slot doesn't seem to be used
1117 		// already
1118 		// TODO: this could be further optimized.
1119 	}
1120 
1121 	size_t alignment = B_PAGE_SIZE;
1122 	if (addressSpec == B_ANY_KERNEL_BLOCK_ADDRESS) {
1123 		// align the memory to the next power of two of the size
1124 		while (alignment < size)
1125 			alignment <<= 1;
1126 	}
1127 
1128 	start = ROUNDUP(start, alignment);
1129 
1130 	// walk up to the spot where we should start searching
1131 second_chance:
1132 	next = addressSpace->areas;
1133 	while (next) {
1134 		if (next->base >= start + size) {
1135 			// we have a winner
1136 			break;
1137 		}
1138 		last = next;
1139 		next = next->address_space_next;
1140 	}
1141 
1142 	// find the right spot depending on the address specification - the area
1143 	// will be inserted directly after "last" ("next" is not referenced anymore)
1144 
1145 	switch (addressSpec) {
1146 		case B_ANY_ADDRESS:
1147 		case B_ANY_KERNEL_ADDRESS:
1148 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1149 			// find a hole big enough for a new area
1150 			if (!last) {
1151 				// see if we can build it at the beginning of the virtual map
1152 				if (!next || (next->base >= ROUNDUP(addressSpace->base,
1153 						alignment) + size)) {
1154 					foundSpot = true;
1155 					area->base = ROUNDUP(addressSpace->base, alignment);
1156 					break;
1157 				}
1158 				last = next;
1159 				next = next->address_space_next;
1160 			}
1161 			// keep walking
1162 			while (next) {
1163 				if (next->base >= ROUNDUP(last->base + last->size, alignment)
1164 						+ size) {
1165 					// we found a spot (it'll be filled up below)
1166 					break;
1167 				}
1168 				last = next;
1169 				next = next->address_space_next;
1170 			}
1171 
1172 			if ((addressSpace->base + (addressSpace->size - 1)) >= (ROUNDUP(
1173 					last->base + last->size, alignment) + (size - 1))) {
1174 				// got a spot
1175 				foundSpot = true;
1176 				area->base = ROUNDUP(last->base + last->size, alignment);
1177 				break;
1178 			} else {
1179 				// We didn't find a free spot - if there were any reserved areas
1180 				// with the RESERVED_AVOID_BASE flag set, we can now test those
1181 				// for free space
1182 				// TODO: it would make sense to start with the biggest of them
1183 				next = addressSpace->areas;
1184 				last = NULL;
1185 				for (last = NULL; next; next = next->address_space_next) {
1186 					if (next->id != RESERVED_AREA_ID) {
1187 						last = next;
1188 						continue;
1189 					}
1190 
1191 					// TODO: take free space after the reserved area into
1192 					// account!
1193 					if (next->base == ROUNDUP(next->base, alignment)
1194 						&& next->size == size) {
1195 						// The reserved area is entirely covered, and thus,
1196 						// removed
1197 						if (last)
1198 							last->address_space_next = next->address_space_next;
1199 						else
1200 							addressSpace->areas = next->address_space_next;
1201 
1202 						foundSpot = true;
1203 						area->base = next->base;
1204 						free(next);
1205 						break;
1206 					}
1207 					if (next->size - (ROUNDUP(next->base, alignment)
1208 							- next->base) >= size) {
1209 						// The new area will be placed at the end of the
1210 						// reserved area, and the reserved area will be resized
1211 						// to make space
1212 						foundSpot = true;
1213 						next->size -= size;
1214 						last = next;
1215 						area->base = next->base + next->size;
1216 						break;
1217 					}
1218 
1219 					last = next;
1220 				}
1221 			}
1222 			break;
1223 
1224 		case B_BASE_ADDRESS:
1225 			// find a hole big enough for a new area beginning with "start"
1226 			if (!last) {
1227 				// see if we can build it at the beginning of the specified start
1228 				if (!next || (next->base >= start + size)) {
1229 					foundSpot = true;
1230 					area->base = start;
1231 					break;
1232 				}
1233 				last = next;
1234 				next = next->address_space_next;
1235 			}
1236 			// keep walking
1237 			while (next) {
1238 				if (next->base >= last->base + last->size + size) {
1239 					// we found a spot (it'll be filled up below)
1240 					break;
1241 				}
1242 				last = next;
1243 				next = next->address_space_next;
1244 			}
1245 
1246 			if ((addressSpace->base + (addressSpace->size - 1))
1247 					>= (last->base + last->size + (size - 1))) {
1248 				// got a spot
1249 				foundSpot = true;
1250 				if (last->base + last->size <= start)
1251 					area->base = start;
1252 				else
1253 					area->base = last->base + last->size;
1254 				break;
1255 			}
1256 			// we didn't find a free spot in the requested range, so we'll
1257 			// try again without any restrictions
1258 			start = addressSpace->base;
1259 			addressSpec = B_ANY_ADDRESS;
1260 			last = NULL;
1261 			goto second_chance;
1262 
1263 		case B_EXACT_ADDRESS:
1264 			// see if we can create it exactly here
1265 			if (!last) {
1266 				if (!next || (next->base >= start + size)) {
1267 					foundSpot = true;
1268 					area->base = start;
1269 					break;
1270 				}
1271 			} else {
1272 				if (next) {
1273 					if (last->base + last->size <= start
1274 						&& next->base >= start + size) {
1275 						foundSpot = true;
1276 						area->base = start;
1277 						break;
1278 					}
1279 				} else {
1280 					if ((last->base + (last->size - 1)) <= start - 1) {
1281 						foundSpot = true;
1282 						area->base = start;
1283 					}
1284 				}
1285 			}
1286 			break;
1287 		default:
1288 			return B_BAD_VALUE;
1289 	}
1290 
1291 	if (!foundSpot)
1292 		return addressSpec == B_EXACT_ADDRESS ? B_BAD_VALUE : B_NO_MEMORY;
1293 
1294 	area->size = size;
1295 	if (last) {
1296 		area->address_space_next = last->address_space_next;
1297 		last->address_space_next = area;
1298 	} else {
1299 		area->address_space_next = addressSpace->areas;
1300 		addressSpace->areas = area;
1301 	}
1302 	addressSpace->change_count++;
1303 	return B_OK;
1304 }
1305 
1306 
1307 /*!	This inserts the area you pass into the specified address space.
1308 	It will also set the "_address" argument to its base address when
1309 	the call succeeds.
1310 	You need to hold the vm_address_space semaphore.
1311 */
1312 static status_t
1313 insert_area(vm_address_space* addressSpace, void** _address,
1314 	uint32 addressSpec, addr_t size, vm_area* area)
1315 {
1316 	addr_t searchBase, searchEnd;
1317 	status_t status;
1318 
1319 	switch (addressSpec) {
1320 		case B_EXACT_ADDRESS:
1321 			searchBase = (addr_t)*_address;
1322 			searchEnd = (addr_t)*_address + size;
1323 			break;
1324 
1325 		case B_BASE_ADDRESS:
1326 			searchBase = (addr_t)*_address;
1327 			searchEnd = addressSpace->base + (addressSpace->size - 1);
1328 			break;
1329 
1330 		case B_ANY_ADDRESS:
1331 		case B_ANY_KERNEL_ADDRESS:
1332 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1333 			searchBase = addressSpace->base;
1334 			// TODO: remove this again when vm86 mode is moved into the kernel
1335 			// completely (currently needs a userland address space!)
1336 			if (searchBase == USER_BASE)
1337 				searchBase = USER_BASE_ANY;
1338 			searchEnd = addressSpace->base + (addressSpace->size - 1);
1339 			break;
1340 
1341 		default:
1342 			return B_BAD_VALUE;
1343 	}
1344 
1345 	status = find_and_insert_area_slot(addressSpace, searchBase, size,
1346 		searchEnd, addressSpec, area);
1347 	if (status == B_OK) {
1348 		// TODO: do we have to do anything about B_ANY_KERNEL_ADDRESS
1349 		// vs. B_ANY_KERNEL_BLOCK_ADDRESS here?
1350 		*_address = (void*)area->base;
1351 	}
1352 
1353 	return status;
1354 }
1355 
1356 
1357 static inline void
1358 set_area_page_protection(vm_area* area, addr_t pageAddress, uint32 protection)
1359 {
1360 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
1361 	uint32 pageIndex = (pageAddress - area->base) / B_PAGE_SIZE;
1362 	uint8& entry = area->page_protections[pageIndex / 2];
1363 	if (pageIndex % 2 == 0)
1364 		entry = (entry & 0xf0) | protection;
1365 	else
1366 		entry = (entry & 0x0f) | (protection << 4);
1367 }
1368 
1369 
1370 static inline uint32
1371 get_area_page_protection(vm_area* area, addr_t pageAddress)
1372 {
1373 	if (area->page_protections == NULL)
1374 		return area->protection;
1375 
1376 	uint32 pageIndex = (pageAddress - area->base) / B_PAGE_SIZE;
1377 	uint32 protection = area->page_protections[pageIndex / 2];
1378 	if (pageIndex % 2 == 0)
1379 		protection &= 0x0f;
1380 	else
1381 		protection >>= 4;
1382 
1383 	return protection | B_KERNEL_READ_AREA
1384 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
1385 }
1386 
1387 
1388 /*!	Cuts a piece out of an area. If the given cut range covers the complete
1389 	area, it is deleted. If it covers the beginning or the end, the area is
1390 	resized accordingly. If the range covers some part in the middle of the
1391 	area, it is split in two; in this case the second area is returned via
1392 	\a _secondArea (the variable is left untouched in the other cases).
1393 	The address space must be write locked.
1394 */
1395 static status_t
1396 cut_area(vm_address_space* addressSpace, vm_area* area, addr_t address,
1397 	addr_t lastAddress, vm_area** _secondArea, bool kernel)
1398 {
1399 	// Does the cut range intersect with the area at all?
1400 	addr_t areaLast = area->base + (area->size - 1);
1401 	if (area->base > lastAddress || areaLast < address)
1402 		return B_OK;
1403 
1404 	// Is the area fully covered?
1405 	if (area->base >= address && areaLast <= lastAddress) {
1406 		delete_area(addressSpace, area);
1407 		return B_OK;
1408 	}
1409 
1410 	AreaCacheLocker cacheLocker(area);
1411 	vm_cache* cache = area->cache;
1412 
1413 	// Cut the end only?
1414 	if (areaLast <= lastAddress) {
1415 		addr_t newSize = address - area->base;
1416 
1417 		// unmap pages
1418 		vm_unmap_pages(area, address, area->size - newSize, false);
1419 
1420 		// If no one else uses the area's cache, we can resize it, too.
1421 		if (cache->areas == area && area->cache_next == NULL
1422 			&& list_is_empty(&cache->consumers)) {
1423 			status_t error = cache->Resize(cache->virtual_base + newSize);
1424 			if (error != B_OK)
1425 				return error;
1426 		}
1427 
1428 		area->size = newSize;
1429 
1430 		return B_OK;
1431 	}
1432 
1433 	// Cut the beginning only?
1434 	if (area->base >= address) {
1435 		addr_t newBase = lastAddress + 1;
1436 		addr_t newSize = areaLast - lastAddress;
1437 
1438 		// unmap pages
1439 		vm_unmap_pages(area, area->base, newBase - area->base, false);
1440 
1441 		// TODO: If no one else uses the area's cache, we should resize it, too!
1442 
1443 		area->cache_offset += newBase - area->base;
1444 		area->base = newBase;
1445 		area->size = newSize;
1446 
1447 		return B_OK;
1448 	}
1449 
1450 	// The tough part -- cut a piece out of the middle of the area.
1451 	// We do that by shrinking the area to the begin section and creating a
1452 	// new area for the end section.
1453 
1454 	addr_t firstNewSize = address - area->base;
1455 	addr_t secondBase = lastAddress + 1;
1456 	addr_t secondSize = areaLast - lastAddress;
1457 
1458 	// unmap pages
1459 	vm_unmap_pages(area, address, area->size - firstNewSize, false);
1460 
1461 	// resize the area
1462 	addr_t oldSize = area->size;
1463 	area->size = firstNewSize;
1464 
1465 	// TODO: If no one else uses the area's cache, we might want to create a
1466 	// new cache for the second area, transfer the concerned pages from the
1467 	// first cache to it and resize the first cache.
1468 
1469 	// map the second area
1470 	vm_area* secondArea;
1471 	void* secondBaseAddress = (void*)secondBase;
1472 	status_t error = map_backing_store(addressSpace, cache, &secondBaseAddress,
1473 		area->cache_offset + (secondBase - area->base), secondSize,
1474 		B_EXACT_ADDRESS, area->wiring, area->protection, REGION_NO_PRIVATE_MAP,
1475 		&secondArea, area->name, false, kernel);
1476 	if (error != B_OK) {
1477 		area->size = oldSize;
1478 		return error;
1479 	}
1480 
1481 	// We need a cache reference for the new area.
1482 	cache->AcquireRefLocked();
1483 
1484 	if (_secondArea != NULL)
1485 		*_secondArea = secondArea;
1486 
1487 	return B_OK;
1488 }
1489 
1490 
1491 static inline void
1492 increment_page_wired_count(vm_page* page)
1493 {
1494 	// TODO: needs to be atomic on all platforms!
1495 	// ... but at least the check isn't. Consequently we should hold
1496 	// sMappingLock, which would allows us to even avoid atomic_add() on
1497 	// gMappedPagesCount.
1498 	if (page->wired_count++ == 0) {
1499 		if (page->mappings.IsEmpty())
1500 			atomic_add(&gMappedPagesCount, 1);
1501 	}
1502 }
1503 
1504 
1505 static inline void
1506 decrement_page_wired_count(vm_page* page)
1507 {
1508 	if (--page->wired_count == 0) {
1509 		// TODO: needs to be atomic on all platforms!
1510 		// See above!
1511 		if (page->mappings.IsEmpty())
1512 			atomic_add(&gMappedPagesCount, -1);
1513 	}
1514 }
1515 
1516 
1517 /*!	Deletes all areas in the given address range.
1518 	The address space must be write-locked.
1519 */
1520 static status_t
1521 unmap_address_range(vm_address_space* addressSpace, addr_t address, addr_t size,
1522 	bool kernel)
1523 {
1524 	size = PAGE_ALIGN(size);
1525 	addr_t lastAddress = address + (size - 1);
1526 
1527 	// Check, whether the caller is allowed to modify the concerned areas.
1528 	vm_area* area;
1529 	if (!kernel) {
1530 		area = addressSpace->areas;
1531 		while (area != NULL) {
1532 			vm_area* nextArea = area->address_space_next;
1533 
1534 			if (area->id != RESERVED_AREA_ID) {
1535 				addr_t areaLast = area->base + (area->size - 1);
1536 				if (area->base < lastAddress && address < areaLast) {
1537 					if ((area->protection & B_KERNEL_AREA) != 0)
1538 						return B_NOT_ALLOWED;
1539 				}
1540 			}
1541 
1542 			area = nextArea;
1543 		}
1544 	}
1545 
1546 	area = addressSpace->areas;
1547 	while (area != NULL) {
1548 		vm_area* nextArea = area->address_space_next;
1549 
1550 		if (area->id != RESERVED_AREA_ID) {
1551 			addr_t areaLast = area->base + (area->size - 1);
1552 			if (area->base < lastAddress && address < areaLast) {
1553 				status_t error = cut_area(addressSpace, area, address,
1554 					lastAddress, NULL, kernel);
1555 				if (error != B_OK)
1556 					return error;
1557 					// Failing after already messing with areas is ugly, but we
1558 					// can't do anything about it.
1559 			}
1560 		}
1561 
1562 		area = nextArea;
1563 	}
1564 
1565 	return B_OK;
1566 }
1567 
1568 
1569 /*! You need to hold the lock of the cache and the write lock of the address
1570 	space when calling this function.
1571 	Note, that in case of error your cache will be temporarily unlocked.
1572 */
1573 static status_t
1574 map_backing_store(vm_address_space* addressSpace, vm_cache* cache,
1575 	void** _virtualAddress, off_t offset, addr_t size, uint32 addressSpec,
1576 	int wiring, int protection, int mapping, vm_area** _area,
1577 	const char* areaName, bool unmapAddressRange, bool kernel)
1578 {
1579 	TRACE(("map_backing_store: aspace %p, cache %p, *vaddr %p, offset 0x%Lx, "
1580 		"size %lu, addressSpec %ld, wiring %d, protection %d, area %p, areaName "
1581 		"'%s'\n", addressSpace, cache, *_virtualAddress, offset, size,
1582 		addressSpec, wiring, protection, _area, areaName));
1583 	cache->AssertLocked();
1584 
1585 	vm_area* area = create_area_struct(addressSpace, areaName, wiring,
1586 		protection);
1587 	if (area == NULL)
1588 		return B_NO_MEMORY;
1589 
1590 	status_t status;
1591 
1592 	// if this is a private map, we need to create a new cache
1593 	// to handle the private copies of pages as they are written to
1594 	vm_cache* sourceCache = cache;
1595 	if (mapping == REGION_PRIVATE_MAP) {
1596 		vm_cache* newCache;
1597 
1598 		// create an anonymous cache
1599 		status = VMCacheFactory::CreateAnonymousCache(newCache,
1600 			(protection & B_STACK_AREA) != 0, 0, USER_STACK_GUARD_PAGES, true);
1601 		if (status != B_OK)
1602 			goto err1;
1603 
1604 		newCache->Lock();
1605 		newCache->temporary = 1;
1606 		newCache->scan_skip = cache->scan_skip;
1607 		newCache->virtual_base = offset;
1608 		newCache->virtual_end = offset + size;
1609 
1610 		cache->AddConsumer(newCache);
1611 
1612 		cache = newCache;
1613 	}
1614 
1615 	status = cache->SetMinimalCommitment(size);
1616 	if (status != B_OK)
1617 		goto err2;
1618 
1619 	// check to see if this address space has entered DELETE state
1620 	if (addressSpace->state == VM_ASPACE_STATE_DELETION) {
1621 		// okay, someone is trying to delete this address space now, so we can't
1622 		// insert the area, so back out
1623 		status = B_BAD_TEAM_ID;
1624 		goto err2;
1625 	}
1626 
1627 	if (addressSpec == B_EXACT_ADDRESS && unmapAddressRange) {
1628 		status = unmap_address_range(addressSpace, (addr_t)*_virtualAddress,
1629 			size, kernel);
1630 		if (status != B_OK)
1631 			goto err2;
1632 	}
1633 
1634 	status = insert_area(addressSpace, _virtualAddress, addressSpec, size, area);
1635 	if (status != B_OK)
1636 		goto err2;
1637 
1638 	// attach the cache to the area
1639 	area->cache = cache;
1640 	area->cache_offset = offset;
1641 
1642 	// point the cache back to the area
1643 	cache->InsertAreaLocked(area);
1644 	if (mapping == REGION_PRIVATE_MAP)
1645 		cache->Unlock();
1646 
1647 	// insert the area in the global area hash table
1648 	rw_lock_write_lock(&sAreaHashLock);
1649 	hash_insert(sAreaHash, area);
1650 	rw_lock_write_unlock(&sAreaHashLock);
1651 
1652 	// grab a ref to the address space (the area holds this)
1653 	atomic_add(&addressSpace->ref_count, 1);
1654 
1655 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1656 //		cache, sourceCache, areaName, area);
1657 
1658 	*_area = area;
1659 	return B_OK;
1660 
1661 err2:
1662 	if (mapping == REGION_PRIVATE_MAP) {
1663 		// We created this cache, so we must delete it again. Note, that we
1664 		// need to temporarily unlock the source cache or we'll otherwise
1665 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1666 		sourceCache->Unlock();
1667 		cache->ReleaseRefAndUnlock();
1668 		sourceCache->Lock();
1669 	}
1670 err1:
1671 	free(area->name);
1672 	free(area);
1673 	return status;
1674 }
1675 
1676 
1677 status_t
1678 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1679 {
1680 	AddressSpaceWriteLocker locker(team);
1681 	if (!locker.IsLocked())
1682 		return B_BAD_TEAM_ID;
1683 
1684 	// check to see if this address space has entered DELETE state
1685 	if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) {
1686 		// okay, someone is trying to delete this address space now, so we can't
1687 		// insert the area, so back out
1688 		return B_BAD_TEAM_ID;
1689 	}
1690 
1691 	// search area list and remove any matching reserved ranges
1692 
1693 	vm_area* area = locker.AddressSpace()->areas;
1694 	vm_area* last = NULL;
1695 	while (area) {
1696 		// the area must be completely part of the reserved range
1697 		if (area->id == RESERVED_AREA_ID && area->base >= (addr_t)address
1698 			&& area->base + area->size <= (addr_t)address + size) {
1699 			// remove reserved range
1700 			vm_area* reserved = area;
1701 			if (last)
1702 				last->address_space_next = reserved->address_space_next;
1703 			else
1704 				locker.AddressSpace()->areas = reserved->address_space_next;
1705 
1706 			area = reserved->address_space_next;
1707 			vm_put_address_space(locker.AddressSpace());
1708 			free(reserved);
1709 			continue;
1710 		}
1711 
1712 		last = area;
1713 		area = area->address_space_next;
1714 	}
1715 
1716 	return B_OK;
1717 }
1718 
1719 
1720 status_t
1721 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1722 	addr_t size, uint32 flags)
1723 {
1724 	if (size == 0)
1725 		return B_BAD_VALUE;
1726 
1727 	AddressSpaceWriteLocker locker(team);
1728 	if (!locker.IsLocked())
1729 		return B_BAD_TEAM_ID;
1730 
1731 	// check to see if this address space has entered DELETE state
1732 	if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) {
1733 		// okay, someone is trying to delete this address space now, so we
1734 		// can't insert the area, let's back out
1735 		return B_BAD_TEAM_ID;
1736 	}
1737 
1738 	vm_area* area = create_reserved_area_struct(locker.AddressSpace(), flags);
1739 	if (area == NULL)
1740 		return B_NO_MEMORY;
1741 
1742 	status_t status = insert_area(locker.AddressSpace(), _address, addressSpec,
1743 		size, area);
1744 	if (status != B_OK) {
1745 		free(area);
1746 		return status;
1747 	}
1748 
1749 	// the area is now reserved!
1750 
1751 	area->cache_offset = area->base;
1752 		// we cache the original base address here
1753 
1754 	atomic_add(&locker.AddressSpace()->ref_count, 1);
1755 	return B_OK;
1756 }
1757 
1758 
1759 area_id
1760 vm_create_anonymous_area(team_id team, const char* name, void** address,
1761 	uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection,
1762 	uint32 flags, bool kernel)
1763 {
1764 	vm_area* area;
1765 	vm_cache* cache;
1766 	vm_page* page = NULL;
1767 	bool isStack = (protection & B_STACK_AREA) != 0;
1768 	page_num_t guardPages;
1769 	bool canOvercommit = false;
1770 	addr_t physicalBase = 0;
1771 
1772 	TRACE(("create_anonymous_area [%d] %s: size 0x%lx\n", team, name, size));
1773 
1774 	size = PAGE_ALIGN(size);
1775 
1776 	if (size == 0)
1777 		return B_BAD_VALUE;
1778 	if (!arch_vm_supports_protection(protection))
1779 		return B_NOT_SUPPORTED;
1780 
1781 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1782 		canOvercommit = true;
1783 
1784 #ifdef DEBUG_KERNEL_STACKS
1785 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1786 		isStack = true;
1787 #endif
1788 
1789 	// check parameters
1790 	switch (addressSpec) {
1791 		case B_ANY_ADDRESS:
1792 		case B_EXACT_ADDRESS:
1793 		case B_BASE_ADDRESS:
1794 		case B_ANY_KERNEL_ADDRESS:
1795 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1796 			break;
1797 		case B_PHYSICAL_BASE_ADDRESS:
1798 			physicalBase = (addr_t)*address;
1799 			addressSpec = B_ANY_KERNEL_ADDRESS;
1800 			break;
1801 
1802 		default:
1803 			return B_BAD_VALUE;
1804 	}
1805 
1806 	bool doReserveMemory = false;
1807 	switch (wiring) {
1808 		case B_NO_LOCK:
1809 			break;
1810 		case B_FULL_LOCK:
1811 		case B_LAZY_LOCK:
1812 		case B_CONTIGUOUS:
1813 			doReserveMemory = true;
1814 			break;
1815 		case B_ALREADY_WIRED:
1816 			break;
1817 		case B_LOMEM:
1818 		//case B_SLOWMEM:
1819 			dprintf("B_LOMEM/SLOWMEM is not yet supported!\n");
1820 			wiring = B_FULL_LOCK;
1821 			doReserveMemory = true;
1822 			break;
1823 		default:
1824 			return B_BAD_VALUE;
1825 	}
1826 
1827 	// For full lock or contiguous areas we're also going to map the pages and
1828 	// thus need to reserve pages for the mapping backend upfront.
1829 	addr_t reservedMapPages = 0;
1830 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1831 		AddressSpaceWriteLocker locker;
1832 		status_t status = locker.SetTo(team);
1833 		if (status != B_OK)
1834 			return status;
1835 
1836 		vm_translation_map* map = &locker.AddressSpace()->translation_map;
1837 		reservedMapPages = map->ops->map_max_pages_need(map, 0, size - 1);
1838 	}
1839 
1840 	// Reserve memory before acquiring the address space lock. This reduces the
1841 	// chances of failure, since while holding the write lock to the address
1842 	// space (if it is the kernel address space that is), the low memory handler
1843 	// won't be able to free anything for us.
1844 	addr_t reservedMemory = 0;
1845 	if (doReserveMemory) {
1846 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1847 		if (vm_try_reserve_memory(size, timeout) != B_OK)
1848 			return B_NO_MEMORY;
1849 		reservedMemory = size;
1850 		// TODO: We don't reserve the memory for the pages for the page
1851 		// directories/tables. We actually need to do since we currently don't
1852 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1853 		// there are actually less physical pages than there should be, which
1854 		// can get the VM into trouble in low memory situations.
1855 	}
1856 
1857 	AddressSpaceWriteLocker locker;
1858 	vm_address_space* addressSpace;
1859 	status_t status;
1860 
1861 	// For full lock areas reserve the pages before locking the address
1862 	// space. E.g. block caches can't release their memory while we hold the
1863 	// address space lock.
1864 	page_num_t reservedPages = reservedMapPages;
1865 	if (wiring == B_FULL_LOCK)
1866 		reservedPages += size / B_PAGE_SIZE;
1867 	if (reservedPages > 0) {
1868 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1869 			if (!vm_page_try_reserve_pages(reservedPages)) {
1870 				reservedPages = 0;
1871 				status = B_WOULD_BLOCK;
1872 				goto err0;
1873 			}
1874 		} else
1875 			vm_page_reserve_pages(reservedPages);
1876 	}
1877 
1878 	status = locker.SetTo(team);
1879 	if (status != B_OK)
1880 		goto err0;
1881 
1882 	addressSpace = locker.AddressSpace();
1883 
1884 	if (wiring == B_CONTIGUOUS) {
1885 		// we try to allocate the page run here upfront as this may easily
1886 		// fail for obvious reasons
1887 		page = vm_page_allocate_page_run(PAGE_STATE_CLEAR, physicalBase,
1888 			size / B_PAGE_SIZE);
1889 		if (page == NULL) {
1890 			status = B_NO_MEMORY;
1891 			goto err0;
1892 		}
1893 	}
1894 
1895 	// create an anonymous cache
1896 	// if it's a stack, make sure that two pages are available at least
1897 	guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0
1898 		? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0;
1899 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1900 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1901 		wiring == B_NO_LOCK);
1902 	if (status != B_OK)
1903 		goto err1;
1904 
1905 	cache->temporary = 1;
1906 	cache->virtual_end = size;
1907 	cache->committed_size = reservedMemory;
1908 		// TODO: This should be done via a method.
1909 	reservedMemory = 0;
1910 
1911 	switch (wiring) {
1912 		case B_LAZY_LOCK:
1913 		case B_FULL_LOCK:
1914 		case B_CONTIGUOUS:
1915 		case B_ALREADY_WIRED:
1916 			cache->scan_skip = 1;
1917 			break;
1918 		case B_NO_LOCK:
1919 			cache->scan_skip = 0;
1920 			break;
1921 	}
1922 
1923 	cache->Lock();
1924 
1925 	status = map_backing_store(addressSpace, cache, address, 0, size,
1926 		addressSpec, wiring, protection, REGION_NO_PRIVATE_MAP, &area, name,
1927 		(flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0, kernel);
1928 
1929 	if (status < B_OK) {
1930 		cache->ReleaseRefAndUnlock();
1931 		goto err1;
1932 	}
1933 
1934 	locker.DegradeToReadLock();
1935 
1936 	switch (wiring) {
1937 		case B_NO_LOCK:
1938 		case B_LAZY_LOCK:
1939 			// do nothing - the pages are mapped in as needed
1940 			break;
1941 
1942 		case B_FULL_LOCK:
1943 		{
1944 			// Allocate and map all pages for this area
1945 
1946 			off_t offset = 0;
1947 			for (addr_t address = area->base;
1948 					address < area->base + (area->size - 1);
1949 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1950 #ifdef DEBUG_KERNEL_STACKS
1951 #	ifdef STACK_GROWS_DOWNWARDS
1952 				if (isStack && address < area->base + KERNEL_STACK_GUARD_PAGES
1953 						* B_PAGE_SIZE)
1954 #	else
1955 				if (isStack && address >= area->base + area->size
1956 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1957 #	endif
1958 					continue;
1959 #endif
1960 				vm_page* page = vm_page_allocate_page(PAGE_STATE_CLEAR, true);
1961 				cache->InsertPage(page, offset);
1962 				vm_map_page(area, page, address, protection);
1963 
1964 				// Periodically unreserve pages we've already allocated, so that
1965 				// we don't unnecessarily increase the pressure on the VM.
1966 				if (offset > 0 && offset % (128 * B_PAGE_SIZE) == 0) {
1967 					page_num_t toUnreserve = 128;
1968 					vm_page_unreserve_pages(toUnreserve);
1969 					reservedPages -= toUnreserve;
1970 				}
1971 			}
1972 
1973 			break;
1974 		}
1975 
1976 		case B_ALREADY_WIRED:
1977 		{
1978 			// The pages should already be mapped. This is only really useful
1979 			// during boot time. Find the appropriate vm_page objects and stick
1980 			// them in the cache object.
1981 			vm_translation_map* map = &addressSpace->translation_map;
1982 			off_t offset = 0;
1983 
1984 			if (!gKernelStartup)
1985 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1986 
1987 			map->ops->lock(map);
1988 
1989 			for (addr_t virtualAddress = area->base; virtualAddress < area->base
1990 					+ (area->size - 1); virtualAddress += B_PAGE_SIZE,
1991 					offset += B_PAGE_SIZE) {
1992 				addr_t physicalAddress;
1993 				uint32 flags;
1994 				status = map->ops->query(map, virtualAddress,
1995 					&physicalAddress, &flags);
1996 				if (status < B_OK) {
1997 					panic("looking up mapping failed for va 0x%lx\n",
1998 						virtualAddress);
1999 				}
2000 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
2001 				if (page == NULL) {
2002 					panic("looking up page failed for pa 0x%lx\n",
2003 						physicalAddress);
2004 				}
2005 
2006 				increment_page_wired_count(page);
2007 				vm_page_set_state(page, PAGE_STATE_WIRED);
2008 				cache->InsertPage(page, offset);
2009 			}
2010 
2011 			map->ops->unlock(map);
2012 			break;
2013 		}
2014 
2015 		case B_CONTIGUOUS:
2016 		{
2017 			// We have already allocated our continuous pages run, so we can now
2018 			// just map them in the address space
2019 			vm_translation_map* map = &addressSpace->translation_map;
2020 			addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE;
2021 			addr_t virtualAddress = area->base;
2022 			off_t offset = 0;
2023 
2024 			map->ops->lock(map);
2025 
2026 			for (virtualAddress = area->base; virtualAddress < area->base
2027 					+ (area->size - 1); virtualAddress += B_PAGE_SIZE,
2028 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
2029 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
2030 				if (page == NULL)
2031 					panic("couldn't lookup physical page just allocated\n");
2032 
2033 				status = map->ops->map(map, virtualAddress, physicalAddress,
2034 					protection);
2035 				if (status < B_OK)
2036 					panic("couldn't map physical page in page run\n");
2037 
2038 				increment_page_wired_count(page);
2039 				vm_page_set_state(page, PAGE_STATE_WIRED);
2040 				cache->InsertPage(page, offset);
2041 			}
2042 
2043 			map->ops->unlock(map);
2044 			break;
2045 		}
2046 
2047 		default:
2048 			break;
2049 	}
2050 
2051 	cache->Unlock();
2052 
2053 	if (reservedPages > 0)
2054 		vm_page_unreserve_pages(reservedPages);
2055 
2056 	TRACE(("vm_create_anonymous_area: done\n"));
2057 
2058 	area->cache_type = CACHE_TYPE_RAM;
2059 	return area->id;
2060 
2061 err1:
2062 	if (wiring == B_CONTIGUOUS) {
2063 		// we had reserved the area space upfront...
2064 		addr_t pageNumber = page->physical_page_number;
2065 		int32 i;
2066 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
2067 			page = vm_lookup_page(pageNumber);
2068 			if (page == NULL)
2069 				panic("couldn't lookup physical page just allocated\n");
2070 
2071 			vm_page_set_state(page, PAGE_STATE_FREE);
2072 		}
2073 	}
2074 
2075 err0:
2076 	if (reservedPages > 0)
2077 		vm_page_unreserve_pages(reservedPages);
2078 	if (reservedMemory > 0)
2079 		vm_unreserve_memory(reservedMemory);
2080 
2081 	return status;
2082 }
2083 
2084 
2085 area_id
2086 vm_map_physical_memory(team_id team, const char* name, void** _address,
2087 	uint32 addressSpec, addr_t size, uint32 protection, addr_t physicalAddress)
2088 {
2089 	vm_area* area;
2090 	vm_cache* cache;
2091 	addr_t mapOffset;
2092 
2093 	TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, "
2094 		"spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team,
2095 		name, _address, addressSpec, size, protection, physicalAddress));
2096 
2097 	if (!arch_vm_supports_protection(protection))
2098 		return B_NOT_SUPPORTED;
2099 
2100 	AddressSpaceWriteLocker locker(team);
2101 	if (!locker.IsLocked())
2102 		return B_BAD_TEAM_ID;
2103 
2104 	// if the physical address is somewhat inside a page,
2105 	// move the actual area down to align on a page boundary
2106 	mapOffset = physicalAddress % B_PAGE_SIZE;
2107 	size += mapOffset;
2108 	physicalAddress -= mapOffset;
2109 
2110 	size = PAGE_ALIGN(size);
2111 
2112 	// create an device cache
2113 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
2114 	if (status != B_OK)
2115 		return status;
2116 
2117 	// tell the page scanner to skip over this area, it's pages are special
2118 	cache->scan_skip = 1;
2119 	cache->virtual_end = size;
2120 
2121 	cache->Lock();
2122 
2123 	status = map_backing_store(locker.AddressSpace(), cache, _address,
2124 		0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection,
2125 		REGION_NO_PRIVATE_MAP, &area, name, false, true);
2126 
2127 	if (status < B_OK)
2128 		cache->ReleaseRefLocked();
2129 
2130 	cache->Unlock();
2131 
2132 	if (status >= B_OK && (addressSpec & B_MTR_MASK) != 0) {
2133 		// set requested memory type
2134 		status = arch_vm_set_memory_type(area, physicalAddress,
2135 			addressSpec & B_MTR_MASK);
2136 		if (status < B_OK)
2137 			delete_area(locker.AddressSpace(), area);
2138 	}
2139 
2140 	if (status >= B_OK) {
2141 		// make sure our area is mapped in completely
2142 
2143 		vm_translation_map* map = &locker.AddressSpace()->translation_map;
2144 		size_t reservePages = map->ops->map_max_pages_need(map, area->base,
2145 			area->base + (size - 1));
2146 
2147 		vm_page_reserve_pages(reservePages);
2148 		map->ops->lock(map);
2149 
2150 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
2151 			map->ops->map(map, area->base + offset, physicalAddress + offset,
2152 				protection);
2153 		}
2154 
2155 		map->ops->unlock(map);
2156 		vm_page_unreserve_pages(reservePages);
2157 	}
2158 
2159 	if (status < B_OK)
2160 		return status;
2161 
2162 	// modify the pointer returned to be offset back into the new area
2163 	// the same way the physical address in was offset
2164 	*_address = (void*)((addr_t)*_address + mapOffset);
2165 
2166 	area->cache_type = CACHE_TYPE_DEVICE;
2167 	return area->id;
2168 }
2169 
2170 
2171 area_id
2172 vm_create_null_area(team_id team, const char* name, void** address,
2173 	uint32 addressSpec, addr_t size)
2174 {
2175 	vm_area* area;
2176 	vm_cache* cache;
2177 	status_t status;
2178 
2179 	AddressSpaceWriteLocker locker(team);
2180 	if (!locker.IsLocked())
2181 		return B_BAD_TEAM_ID;
2182 
2183 	size = PAGE_ALIGN(size);
2184 
2185 	// create an null cache
2186 	status = VMCacheFactory::CreateNullCache(cache);
2187 	if (status != B_OK)
2188 		return status;
2189 
2190 	// tell the page scanner to skip over this area, no pages will be mapped here
2191 	cache->scan_skip = 1;
2192 	cache->virtual_end = size;
2193 
2194 	cache->Lock();
2195 
2196 	status = map_backing_store(locker.AddressSpace(), cache, address, 0, size,
2197 		addressSpec, 0, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, &area, name,
2198 		false, true);
2199 
2200 	if (status < B_OK) {
2201 		cache->ReleaseRefAndUnlock();
2202 		return status;
2203 	}
2204 
2205 	cache->Unlock();
2206 
2207 	area->cache_type = CACHE_TYPE_NULL;
2208 	return area->id;
2209 }
2210 
2211 
2212 /*!	Creates the vnode cache for the specified \a vnode.
2213 	The vnode has to be marked busy when calling this function.
2214 */
2215 status_t
2216 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
2217 {
2218 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
2219 }
2220 
2221 
2222 /*!	\a cache must be locked. The area's address space must be read-locked.
2223 */
2224 static void
2225 pre_map_area_pages(vm_area* area, VMCache* cache)
2226 {
2227 	addr_t baseAddress = area->base;
2228 	addr_t cacheOffset = area->cache_offset;
2229 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
2230 	page_num_t endPage = firstPage + area->size / B_PAGE_SIZE;
2231 
2232 	for (VMCachePagesTree::Iterator it
2233 				= cache->pages.GetIterator(firstPage, true, true);
2234 			vm_page* page = it.Next();) {
2235 		if (page->cache_offset >= endPage)
2236 			break;
2237 
2238 		// skip inactive pages
2239 		if (page->state == PAGE_STATE_BUSY || page->usage_count <= 0)
2240 			continue;
2241 
2242 		vm_map_page(area, page,
2243 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
2244 			B_READ_AREA | B_KERNEL_READ_AREA);
2245 	}
2246 }
2247 
2248 
2249 /*!	Will map the file specified by \a fd to an area in memory.
2250 	The file will be mirrored beginning at the specified \a offset. The
2251 	\a offset and \a size arguments have to be page aligned.
2252 */
2253 static area_id
2254 _vm_map_file(team_id team, const char* name, void** _address,
2255 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2256 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2257 {
2258 	// TODO: for binary files, we want to make sure that they get the
2259 	//	copy of a file at a given time, ie. later changes should not
2260 	//	make it into the mapped copy -- this will need quite some changes
2261 	//	to be done in a nice way
2262 	TRACE(("_vm_map_file(fd = %d, offset = %Ld, size = %lu, mapping %ld)\n",
2263 		fd, offset, size, mapping));
2264 
2265 	offset = ROUNDOWN(offset, B_PAGE_SIZE);
2266 	size = PAGE_ALIGN(size);
2267 
2268 	if (mapping == REGION_NO_PRIVATE_MAP)
2269 		protection |= B_SHARED_AREA;
2270 	if (addressSpec != B_EXACT_ADDRESS)
2271 		unmapAddressRange = false;
2272 
2273 	if (fd < 0) {
2274 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
2275 		return vm_create_anonymous_area(team, name, _address, addressSpec, size,
2276 			B_NO_LOCK, protection, flags, kernel);
2277 	}
2278 
2279 	// get the open flags of the FD
2280 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2281 	if (descriptor == NULL)
2282 		return EBADF;
2283 	int32 openMode = descriptor->open_mode;
2284 	put_fd(descriptor);
2285 
2286 	// The FD must open for reading at any rate. For shared mapping with write
2287 	// access, additionally the FD must be open for writing.
2288 	if ((openMode & O_ACCMODE) == O_WRONLY
2289 		|| (mapping == REGION_NO_PRIVATE_MAP
2290 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2291 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2292 		return EACCES;
2293 	}
2294 
2295 	// get the vnode for the object, this also grabs a ref to it
2296 	struct vnode* vnode = NULL;
2297 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2298 	if (status < B_OK)
2299 		return status;
2300 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
2301 
2302 	// If we're going to pre-map pages, we need to reserve the pages needed by
2303 	// the mapping backend upfront.
2304 	page_num_t reservedPreMapPages = 0;
2305 	if ((protection & B_READ_AREA) != 0) {
2306 		AddressSpaceWriteLocker locker;
2307 		status = locker.SetTo(team);
2308 		if (status != B_OK)
2309 			return status;
2310 
2311 		vm_translation_map* map = &locker.AddressSpace()->translation_map;
2312 		reservedPreMapPages = map->ops->map_max_pages_need(map, 0, size - 1);
2313 
2314 		locker.Unlock();
2315 
2316 		vm_page_reserve_pages(reservedPreMapPages);
2317 	}
2318 
2319 	struct PageUnreserver {
2320 		PageUnreserver(page_num_t count)
2321 			: fCount(count)
2322 		{
2323 		}
2324 
2325 		~PageUnreserver()
2326 		{
2327 			if (fCount > 0)
2328 				vm_page_unreserve_pages(fCount);
2329 		}
2330 
2331 		page_num_t	fCount;
2332 	} pageUnreserver(reservedPreMapPages);
2333 
2334 	AddressSpaceWriteLocker locker(team);
2335 	if (!locker.IsLocked())
2336 		return B_BAD_TEAM_ID;
2337 
2338 	// TODO: this only works for file systems that use the file cache
2339 	vm_cache* cache;
2340 	status = vfs_get_vnode_cache(vnode, &cache, false);
2341 	if (status < B_OK)
2342 		return status;
2343 
2344 	cache->Lock();
2345 
2346 	vm_area* area;
2347 	status = map_backing_store(locker.AddressSpace(), cache, _address,
2348 		offset, size, addressSpec, 0, protection, mapping, &area, name,
2349 		unmapAddressRange, kernel);
2350 
2351 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2352 		// map_backing_store() cannot know we no longer need the ref
2353 		cache->ReleaseRefLocked();
2354 	}
2355 
2356 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2357 		pre_map_area_pages(area, cache);
2358 
2359 	cache->Unlock();
2360 
2361 	if (status == B_OK) {
2362 		// TODO: this probably deserves a smarter solution, ie. don't always
2363 		// prefetch stuff, and also, probably don't trigger it at this place.
2364 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2365 			// prefetches at max 10 MB starting from "offset"
2366 	}
2367 
2368 	if (status != B_OK)
2369 		return status;
2370 
2371 	area->cache_type = CACHE_TYPE_VNODE;
2372 	return area->id;
2373 }
2374 
2375 
2376 area_id
2377 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2378 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2379 	int fd, off_t offset)
2380 {
2381 	if (!arch_vm_supports_protection(protection))
2382 		return B_NOT_SUPPORTED;
2383 
2384 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2385 		mapping, unmapAddressRange, fd, offset, true);
2386 }
2387 
2388 
2389 vm_cache*
2390 vm_area_get_locked_cache(vm_area* area)
2391 {
2392 	mutex_lock(&sAreaCacheLock);
2393 
2394 	while (true) {
2395 		vm_cache* cache = area->cache;
2396 
2397 		if (!cache->SwitchLock(&sAreaCacheLock)) {
2398 			// cache has been deleted
2399 			mutex_lock(&sAreaCacheLock);
2400 			continue;
2401 		}
2402 
2403 		mutex_lock(&sAreaCacheLock);
2404 
2405 		if (cache == area->cache) {
2406 			cache->AcquireRefLocked();
2407 			mutex_unlock(&sAreaCacheLock);
2408 			return cache;
2409 		}
2410 
2411 		// the cache changed in the meantime
2412 		cache->Unlock();
2413 	}
2414 }
2415 
2416 
2417 void
2418 vm_area_put_locked_cache(vm_cache* cache)
2419 {
2420 	cache->ReleaseRefAndUnlock();
2421 }
2422 
2423 
2424 area_id
2425 vm_clone_area(team_id team, const char* name, void** address,
2426 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2427 	bool kernel)
2428 {
2429 	vm_area* newArea = NULL;
2430 	vm_area* sourceArea;
2431 
2432 	// Check whether the source area exists and is cloneable. If so, mark it
2433 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2434 	{
2435 		AddressSpaceWriteLocker locker;
2436 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2437 		if (status != B_OK)
2438 			return status;
2439 
2440 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2441 			return B_NOT_ALLOWED;
2442 
2443 		sourceArea->protection |= B_SHARED_AREA;
2444 		protection |= B_SHARED_AREA;
2445 	}
2446 
2447 	// Now lock both address spaces and actually do the cloning.
2448 
2449 	MultiAddressSpaceLocker locker;
2450 	vm_address_space* sourceAddressSpace;
2451 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2452 	if (status != B_OK)
2453 		return status;
2454 
2455 	vm_address_space* targetAddressSpace;
2456 	status = locker.AddTeam(team, true, &targetAddressSpace);
2457 	if (status != B_OK)
2458 		return status;
2459 
2460 	status = locker.Lock();
2461 	if (status != B_OK)
2462 		return status;
2463 
2464 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2465 	if (sourceArea == NULL)
2466 		return B_BAD_VALUE;
2467 
2468 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2469 		return B_NOT_ALLOWED;
2470 
2471 	vm_cache* cache = vm_area_get_locked_cache(sourceArea);
2472 
2473 	// TODO: for now, B_USER_CLONEABLE is disabled, until all drivers
2474 	//	have been adapted. Maybe it should be part of the kernel settings,
2475 	//	anyway (so that old drivers can always work).
2476 #if 0
2477 	if (sourceArea->aspace == vm_kernel_address_space()
2478 		&& addressSpace != vm_kernel_address_space()
2479 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
2480 		// kernel areas must not be cloned in userland, unless explicitly
2481 		// declared user-cloneable upon construction
2482 		status = B_NOT_ALLOWED;
2483 	} else
2484 #endif
2485 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
2486 		status = B_NOT_ALLOWED;
2487 	else {
2488 		status = map_backing_store(targetAddressSpace, cache, address,
2489 			sourceArea->cache_offset, sourceArea->size, addressSpec,
2490 			sourceArea->wiring, protection, mapping, &newArea, name, false,
2491 			kernel);
2492 	}
2493 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2494 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2495 		// to create a new cache, and has therefore already acquired a reference
2496 		// to the source cache - but otherwise it has no idea that we need
2497 		// one.
2498 		cache->AcquireRefLocked();
2499 	}
2500 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2501 		// we need to map in everything at this point
2502 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2503 			// we don't have actual pages to map but a physical area
2504 			vm_translation_map* map
2505 				= &sourceArea->address_space->translation_map;
2506 			map->ops->lock(map);
2507 
2508 			addr_t physicalAddress;
2509 			uint32 oldProtection;
2510 			map->ops->query(map, sourceArea->base, &physicalAddress,
2511 				&oldProtection);
2512 
2513 			map->ops->unlock(map);
2514 
2515 			map = &targetAddressSpace->translation_map;
2516 			size_t reservePages = map->ops->map_max_pages_need(map,
2517 				newArea->base, newArea->base + (newArea->size - 1));
2518 
2519 			vm_page_reserve_pages(reservePages);
2520 			map->ops->lock(map);
2521 
2522 			for (addr_t offset = 0; offset < newArea->size;
2523 					offset += B_PAGE_SIZE) {
2524 				map->ops->map(map, newArea->base + offset,
2525 					physicalAddress + offset, protection);
2526 			}
2527 
2528 			map->ops->unlock(map);
2529 			vm_page_unreserve_pages(reservePages);
2530 		} else {
2531 			vm_translation_map* map = &targetAddressSpace->translation_map;
2532 			size_t reservePages = map->ops->map_max_pages_need(map,
2533 				newArea->base, newArea->base + (newArea->size - 1));
2534 			vm_page_reserve_pages(reservePages);
2535 
2536 			// map in all pages from source
2537 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2538 					vm_page* page  = it.Next();) {
2539 				vm_map_page(newArea, page, newArea->base
2540 					+ ((page->cache_offset << PAGE_SHIFT)
2541 					- newArea->cache_offset), protection);
2542 			}
2543 
2544 			vm_page_unreserve_pages(reservePages);
2545 		}
2546 	}
2547 	if (status == B_OK)
2548 		newArea->cache_type = sourceArea->cache_type;
2549 
2550 	vm_area_put_locked_cache(cache);
2551 
2552 	if (status < B_OK)
2553 		return status;
2554 
2555 	return newArea->id;
2556 }
2557 
2558 
2559 //! The address space must be write locked at this point
2560 static void
2561 remove_area_from_address_space(vm_address_space* addressSpace, vm_area* area)
2562 {
2563 	vm_area* temp = addressSpace->areas;
2564 	vm_area* last = NULL;
2565 
2566 	while (temp != NULL) {
2567 		if (area == temp) {
2568 			if (last != NULL) {
2569 				last->address_space_next = temp->address_space_next;
2570 			} else {
2571 				addressSpace->areas = temp->address_space_next;
2572 			}
2573 			addressSpace->change_count++;
2574 			break;
2575 		}
2576 		last = temp;
2577 		temp = temp->address_space_next;
2578 	}
2579 	if (area == addressSpace->area_hint)
2580 		addressSpace->area_hint = NULL;
2581 
2582 	if (temp == NULL)
2583 		panic("vm_area_release_ref: area not found in aspace's area list\n");
2584 }
2585 
2586 
2587 static void
2588 delete_area(vm_address_space* addressSpace, vm_area* area)
2589 {
2590 	rw_lock_write_lock(&sAreaHashLock);
2591 	hash_remove(sAreaHash, area);
2592 	rw_lock_write_unlock(&sAreaHashLock);
2593 
2594 	// At this point the area is removed from the global hash table, but
2595 	// still exists in the area list.
2596 
2597 	// Unmap the virtual address space the area occupied
2598 	vm_unmap_pages(area, area->base, area->size, !area->cache->temporary);
2599 
2600 	if (!area->cache->temporary)
2601 		area->cache->WriteModified();
2602 
2603 	arch_vm_unset_memory_type(area);
2604 	remove_area_from_address_space(addressSpace, area);
2605 	vm_put_address_space(addressSpace);
2606 
2607 	area->cache->RemoveArea(area);
2608 	area->cache->ReleaseRef();
2609 
2610 	free(area->page_protections);
2611 	free(area->name);
2612 	free(area);
2613 }
2614 
2615 
2616 status_t
2617 vm_delete_area(team_id team, area_id id, bool kernel)
2618 {
2619 	TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id));
2620 
2621 	AddressSpaceWriteLocker locker;
2622 	vm_area* area;
2623 	status_t status = locker.SetFromArea(team, id, area);
2624 	if (status < B_OK)
2625 		return status;
2626 
2627 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2628 		return B_NOT_ALLOWED;
2629 
2630 	delete_area(locker.AddressSpace(), area);
2631 	return B_OK;
2632 }
2633 
2634 
2635 /*!	Creates a new cache on top of given cache, moves all areas from
2636 	the old cache to the new one, and changes the protection of all affected
2637 	areas' pages to read-only.
2638 	Preconditions:
2639 	- The given cache must be locked.
2640 	- All of the cache's areas' address spaces must be read locked.
2641 */
2642 static status_t
2643 vm_copy_on_write_area(vm_cache* lowerCache)
2644 {
2645 	vm_cache* upperCache;
2646 
2647 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2648 
2649 	// We need to separate the cache from its areas. The cache goes one level
2650 	// deeper and we create a new cache inbetween.
2651 
2652 	// create an anonymous cache
2653 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2654 		0, true);
2655 	if (status != B_OK)
2656 		return status;
2657 
2658 	upperCache->Lock();
2659 
2660 	upperCache->temporary = 1;
2661 	upperCache->scan_skip = lowerCache->scan_skip;
2662 	upperCache->virtual_base = lowerCache->virtual_base;
2663 	upperCache->virtual_end = lowerCache->virtual_end;
2664 
2665 	// transfer the lower cache areas to the upper cache
2666 	mutex_lock(&sAreaCacheLock);
2667 
2668 	upperCache->areas = lowerCache->areas;
2669 	lowerCache->areas = NULL;
2670 
2671 	for (vm_area* tempArea = upperCache->areas; tempArea != NULL;
2672 			tempArea = tempArea->cache_next) {
2673 		tempArea->cache = upperCache;
2674 		upperCache->AcquireRefLocked();
2675 		lowerCache->ReleaseRefLocked();
2676 	}
2677 
2678 	mutex_unlock(&sAreaCacheLock);
2679 
2680 	lowerCache->AddConsumer(upperCache);
2681 
2682 	// We now need to remap all pages from all of the cache's areas read-only, so
2683 	// that a copy will be created on next write access
2684 
2685 	for (vm_area* tempArea = upperCache->areas; tempArea != NULL;
2686 			tempArea = tempArea->cache_next) {
2687 		// The area must be readable in the same way it was previously writable
2688 		uint32 protection = B_KERNEL_READ_AREA;
2689 		if ((tempArea->protection & B_READ_AREA) != 0)
2690 			protection |= B_READ_AREA;
2691 
2692 		vm_translation_map* map = &tempArea->address_space->translation_map;
2693 		map->ops->lock(map);
2694 		map->ops->protect(map, tempArea->base,
2695 			tempArea->base - 1 + tempArea->size, protection);
2696 		map->ops->unlock(map);
2697 	}
2698 
2699 	vm_area_put_locked_cache(upperCache);
2700 
2701 	return B_OK;
2702 }
2703 
2704 
2705 area_id
2706 vm_copy_area(team_id team, const char* name, void** _address,
2707 	uint32 addressSpec, uint32 protection, area_id sourceID)
2708 {
2709 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2710 
2711 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2712 		// set the same protection for the kernel as for userland
2713 		protection |= B_KERNEL_READ_AREA;
2714 		if (writableCopy)
2715 			protection |= B_KERNEL_WRITE_AREA;
2716 	}
2717 
2718 	// Do the locking: target address space, all address spaces associated with
2719 	// the source cache, and the cache itself.
2720 	MultiAddressSpaceLocker locker;
2721 	vm_address_space* targetAddressSpace;
2722 	vm_cache* cache;
2723 	vm_area* source;
2724 	status_t status = locker.AddTeam(team, true, &targetAddressSpace);
2725 	if (status == B_OK) {
2726 		status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2727 			&cache);
2728 	}
2729 	if (status != B_OK)
2730 		return status;
2731 
2732 	AreaCacheLocker cacheLocker(cache);	// already locked
2733 
2734 	if (addressSpec == B_CLONE_ADDRESS) {
2735 		addressSpec = B_EXACT_ADDRESS;
2736 		*_address = (void*)source->base;
2737 	}
2738 
2739 	bool sharedArea = (source->protection & B_SHARED_AREA) != 0;
2740 
2741 	// First, create a cache on top of the source area, respectively use the
2742 	// existing one, if this is a shared area.
2743 
2744 	vm_area* target;
2745 	status = map_backing_store(targetAddressSpace, cache, _address,
2746 		source->cache_offset, source->size, addressSpec, source->wiring,
2747 		protection, sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2748 		&target, name, false, true);
2749 	if (status < B_OK)
2750 		return status;
2751 
2752 	if (sharedArea) {
2753 		// The new area uses the old area's cache, but map_backing_store()
2754 		// hasn't acquired a ref. So we have to do that now.
2755 		cache->AcquireRefLocked();
2756 	}
2757 
2758 	// If the source area is writable, we need to move it one layer up as well
2759 
2760 	if (!sharedArea) {
2761 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2762 			// TODO: do something more useful if this fails!
2763 			if (vm_copy_on_write_area(cache) < B_OK)
2764 				panic("vm_copy_on_write_area() failed!\n");
2765 		}
2766 	}
2767 
2768 	// we return the ID of the newly created area
2769 	return target->id;
2770 }
2771 
2772 
2773 //! You need to hold the cache lock when calling this function
2774 static int32
2775 count_writable_areas(vm_cache* cache, vm_area* ignoreArea)
2776 {
2777 	struct vm_area* area = cache->areas;
2778 	uint32 count = 0;
2779 
2780 	for (; area != NULL; area = area->cache_next) {
2781 		if (area != ignoreArea
2782 			&& (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0)
2783 			count++;
2784 	}
2785 
2786 	return count;
2787 }
2788 
2789 
2790 static status_t
2791 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2792 	bool kernel)
2793 {
2794 	TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = "
2795 		"%#lx)\n", team, areaID, newProtection));
2796 
2797 	if (!arch_vm_supports_protection(newProtection))
2798 		return B_NOT_SUPPORTED;
2799 
2800 	// lock address spaces and cache
2801 	MultiAddressSpaceLocker locker;
2802 	vm_cache* cache;
2803 	vm_area* area;
2804 	status_t status = locker.AddAreaCacheAndLock(areaID, true, false, area,
2805 		&cache);
2806 	AreaCacheLocker cacheLocker(cache);	// already locked
2807 
2808 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2809 		return B_NOT_ALLOWED;
2810 
2811 	if (area->protection == newProtection)
2812 		return B_OK;
2813 
2814 	if (team != vm_kernel_address_space_id()
2815 		&& area->address_space->id != team) {
2816 		// unless you're the kernel, you are only allowed to set
2817 		// the protection of your own areas
2818 		return B_NOT_ALLOWED;
2819 	}
2820 
2821 	bool changePageProtection = true;
2822 
2823 	if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2824 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0) {
2825 		// writable -> !writable
2826 
2827 		if (cache->source != NULL && cache->temporary) {
2828 			if (count_writable_areas(cache, area) == 0) {
2829 				// Since this cache now lives from the pages in its source cache,
2830 				// we can change the cache's commitment to take only those pages
2831 				// into account that really are in this cache.
2832 
2833 				status = cache->Commit(cache->page_count * B_PAGE_SIZE);
2834 
2835 				// TODO: we may be able to join with our source cache, if
2836 				// count == 0
2837 			}
2838 		}
2839 	} else if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0
2840 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
2841 		// !writable -> writable
2842 
2843 		if (!list_is_empty(&cache->consumers)) {
2844 			// There are consumers -- we have to insert a new cache. Fortunately
2845 			// vm_copy_on_write_area() does everything that's needed.
2846 			changePageProtection = false;
2847 			status = vm_copy_on_write_area(cache);
2848 		} else {
2849 			// No consumers, so we don't need to insert a new one.
2850 			if (cache->source != NULL && cache->temporary) {
2851 				// the cache's commitment must contain all possible pages
2852 				status = cache->Commit(cache->virtual_end
2853 					- cache->virtual_base);
2854 			}
2855 
2856 			if (status == B_OK && cache->source != NULL) {
2857 				// There's a source cache, hence we can't just change all pages'
2858 				// protection or we might allow writing into pages belonging to
2859 				// a lower cache.
2860 				changePageProtection = false;
2861 
2862 				struct vm_translation_map* map
2863 					= &area->address_space->translation_map;
2864 				map->ops->lock(map);
2865 
2866 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2867 						vm_page* page = it.Next();) {
2868 					addr_t address = area->base
2869 						+ (page->cache_offset << PAGE_SHIFT);
2870 					map->ops->protect(map, address, address - 1 + B_PAGE_SIZE,
2871 						newProtection);
2872 				}
2873 
2874 				map->ops->unlock(map);
2875 			}
2876 		}
2877 	} else {
2878 		// we don't have anything special to do in all other cases
2879 	}
2880 
2881 	if (status == B_OK) {
2882 		// remap existing pages in this cache
2883 		struct vm_translation_map* map = &area->address_space->translation_map;
2884 
2885 		if (changePageProtection) {
2886 			map->ops->lock(map);
2887 			map->ops->protect(map, area->base, area->base - 1 + area->size,
2888 				newProtection);
2889 			map->ops->unlock(map);
2890 		}
2891 
2892 		area->protection = newProtection;
2893 	}
2894 
2895 	return status;
2896 }
2897 
2898 
2899 status_t
2900 vm_get_page_mapping(team_id team, addr_t vaddr, addr_t* paddr)
2901 {
2902 	vm_address_space* addressSpace = vm_get_address_space(team);
2903 	if (addressSpace == NULL)
2904 		return B_BAD_TEAM_ID;
2905 
2906 	uint32 dummyFlags;
2907 	status_t status = addressSpace->translation_map.ops->query(
2908 		&addressSpace->translation_map, vaddr, paddr, &dummyFlags);
2909 
2910 	vm_put_address_space(addressSpace);
2911 	return status;
2912 }
2913 
2914 
2915 static inline addr_t
2916 virtual_page_address(vm_area* area, vm_page* page)
2917 {
2918 	return area->base
2919 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
2920 }
2921 
2922 
2923 bool
2924 vm_test_map_modification(vm_page* page)
2925 {
2926 	MutexLocker locker(sMappingLock);
2927 
2928 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2929 	vm_page_mapping* mapping;
2930 	while ((mapping = iterator.Next()) != NULL) {
2931 		vm_area* area = mapping->area;
2932 		vm_translation_map* map = &area->address_space->translation_map;
2933 
2934 		addr_t physicalAddress;
2935 		uint32 flags;
2936 		map->ops->lock(map);
2937 		map->ops->query(map, virtual_page_address(area, page),
2938 			&physicalAddress, &flags);
2939 		map->ops->unlock(map);
2940 
2941 		if ((flags & PAGE_MODIFIED) != 0)
2942 			return true;
2943 	}
2944 
2945 	return false;
2946 }
2947 
2948 
2949 int32
2950 vm_test_map_activation(vm_page* page, bool* _modified)
2951 {
2952 	int32 activation = 0;
2953 	bool modified = false;
2954 
2955 	MutexLocker locker(sMappingLock);
2956 
2957 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2958 	vm_page_mapping* mapping;
2959 	while ((mapping = iterator.Next()) != NULL) {
2960 		vm_area* area = mapping->area;
2961 		vm_translation_map* map = &area->address_space->translation_map;
2962 
2963 		addr_t physicalAddress;
2964 		uint32 flags;
2965 		map->ops->lock(map);
2966 		map->ops->query(map, virtual_page_address(area, page),
2967 			&physicalAddress, &flags);
2968 		map->ops->unlock(map);
2969 
2970 		if ((flags & PAGE_ACCESSED) != 0)
2971 			activation++;
2972 		if ((flags & PAGE_MODIFIED) != 0)
2973 			modified = true;
2974 	}
2975 
2976 	if (_modified != NULL)
2977 		*_modified = modified;
2978 
2979 	return activation;
2980 }
2981 
2982 
2983 void
2984 vm_clear_map_flags(vm_page* page, uint32 flags)
2985 {
2986 	MutexLocker locker(sMappingLock);
2987 
2988 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2989 	vm_page_mapping* mapping;
2990 	while ((mapping = iterator.Next()) != NULL) {
2991 		vm_area* area = mapping->area;
2992 		vm_translation_map* map = &area->address_space->translation_map;
2993 
2994 		map->ops->lock(map);
2995 		map->ops->clear_flags(map, virtual_page_address(area, page), flags);
2996 		map->ops->unlock(map);
2997 	}
2998 }
2999 
3000 
3001 /*!	Removes all mappings from a page.
3002 	After you've called this function, the page is unmapped from memory.
3003 	The accumulated page flags of all mappings can be found in \a _flags.
3004 */
3005 void
3006 vm_remove_all_page_mappings(vm_page* page, uint32* _flags)
3007 {
3008 	uint32 accumulatedFlags = 0;
3009 	MutexLocker locker(sMappingLock);
3010 
3011 	vm_page_mappings queue;
3012 	queue.MoveFrom(&page->mappings);
3013 
3014 	vm_page_mappings::Iterator iterator = queue.GetIterator();
3015 	vm_page_mapping* mapping;
3016 	while ((mapping = iterator.Next()) != NULL) {
3017 		vm_area* area = mapping->area;
3018 		vm_translation_map* map = &area->address_space->translation_map;
3019 		addr_t physicalAddress;
3020 		uint32 flags;
3021 
3022 		map->ops->lock(map);
3023 		addr_t address = virtual_page_address(area, page);
3024 		map->ops->unmap(map, address, address + (B_PAGE_SIZE - 1));
3025 		map->ops->flush(map);
3026 		map->ops->query(map, address, &physicalAddress, &flags);
3027 		map->ops->unlock(map);
3028 
3029 		area->mappings.Remove(mapping);
3030 
3031 		accumulatedFlags |= flags;
3032 	}
3033 
3034 	if (page->wired_count == 0 && !queue.IsEmpty())
3035 		atomic_add(&gMappedPagesCount, -1);
3036 
3037 	locker.Unlock();
3038 
3039 	// free now unused mappings
3040 
3041 	while ((mapping = queue.RemoveHead()) != NULL) {
3042 		free(mapping);
3043 	}
3044 
3045 	if (_flags != NULL)
3046 		*_flags = accumulatedFlags;
3047 }
3048 
3049 
3050 bool
3051 vm_unmap_page(vm_area* area, addr_t virtualAddress, bool preserveModified)
3052 {
3053 	vm_translation_map* map = &area->address_space->translation_map;
3054 
3055 	map->ops->lock(map);
3056 
3057 	addr_t physicalAddress;
3058 	uint32 flags;
3059 	status_t status = map->ops->query(map, virtualAddress, &physicalAddress,
3060 		&flags);
3061 	if (status < B_OK || (flags & PAGE_PRESENT) == 0) {
3062 		map->ops->unlock(map);
3063 		return false;
3064 	}
3065 	vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3066 	if (page == NULL && area->cache_type != CACHE_TYPE_DEVICE) {
3067 		panic("area %p looking up page failed for pa 0x%lx\n", area,
3068 			physicalAddress);
3069 	}
3070 
3071 	if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE)
3072 		decrement_page_wired_count(page);
3073 
3074 	map->ops->unmap(map, virtualAddress, virtualAddress + B_PAGE_SIZE - 1);
3075 
3076 	if (preserveModified) {
3077 		map->ops->flush(map);
3078 
3079 		status = map->ops->query(map, virtualAddress, &physicalAddress, &flags);
3080 		if ((flags & PAGE_MODIFIED) != 0 && page->state != PAGE_STATE_MODIFIED)
3081 			vm_page_set_state(page, PAGE_STATE_MODIFIED);
3082 	}
3083 
3084 	map->ops->unlock(map);
3085 
3086 	if (area->wiring == B_NO_LOCK) {
3087 		vm_page_mapping* mapping;
3088 
3089 		mutex_lock(&sMappingLock);
3090 		map->ops->lock(map);
3091 
3092 		vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3093 		while (iterator.HasNext()) {
3094 			mapping = iterator.Next();
3095 
3096 			if (mapping->area == area) {
3097 				area->mappings.Remove(mapping);
3098 				page->mappings.Remove(mapping);
3099 
3100 				if (page->mappings.IsEmpty() && page->wired_count == 0)
3101 					atomic_add(&gMappedPagesCount, -1);
3102 
3103 				map->ops->unlock(map);
3104 				mutex_unlock(&sMappingLock);
3105 
3106 				free(mapping);
3107 
3108 				return true;
3109 			}
3110 		}
3111 
3112 		map->ops->unlock(map);
3113 		mutex_unlock(&sMappingLock);
3114 
3115 		dprintf("vm_unmap_page: couldn't find mapping for area %p in page %p\n",
3116 			area, page);
3117 	}
3118 
3119 	return true;
3120 }
3121 
3122 
3123 status_t
3124 vm_unmap_pages(vm_area* area, addr_t base, size_t size, bool preserveModified)
3125 {
3126 	vm_translation_map* map = &area->address_space->translation_map;
3127 	addr_t end = base + (size - 1);
3128 
3129 	map->ops->lock(map);
3130 
3131 	if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE) {
3132 		// iterate through all pages and decrease their wired count
3133 		for (addr_t virtualAddress = base; virtualAddress < end;
3134 				virtualAddress += B_PAGE_SIZE) {
3135 			addr_t physicalAddress;
3136 			uint32 flags;
3137 			status_t status = map->ops->query(map, virtualAddress,
3138 				&physicalAddress, &flags);
3139 			if (status < B_OK || (flags & PAGE_PRESENT) == 0)
3140 				continue;
3141 
3142 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3143 			if (page == NULL) {
3144 				panic("area %p looking up page failed for pa 0x%lx\n", area,
3145 					physicalAddress);
3146 			}
3147 
3148 			decrement_page_wired_count(page);
3149 		}
3150 	}
3151 
3152 	map->ops->unmap(map, base, end);
3153 	if (preserveModified) {
3154 		map->ops->flush(map);
3155 
3156 		for (addr_t virtualAddress = base; virtualAddress < end;
3157 				virtualAddress += B_PAGE_SIZE) {
3158 			addr_t physicalAddress;
3159 			uint32 flags;
3160 			status_t status = map->ops->query(map, virtualAddress,
3161 				&physicalAddress, &flags);
3162 			if (status < B_OK || (flags & PAGE_PRESENT) == 0)
3163 				continue;
3164 
3165 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3166 			if (page == NULL) {
3167 				panic("area %p looking up page failed for pa 0x%lx\n", area,
3168 					physicalAddress);
3169 			}
3170 
3171 			if ((flags & PAGE_MODIFIED) != 0
3172 				&& page->state != PAGE_STATE_MODIFIED)
3173 				vm_page_set_state(page, PAGE_STATE_MODIFIED);
3174 		}
3175 	}
3176 	map->ops->unlock(map);
3177 
3178 	if (area->wiring == B_NO_LOCK) {
3179 		uint32 startOffset = (area->cache_offset + base - area->base)
3180 			>> PAGE_SHIFT;
3181 		uint32 endOffset = startOffset + (size >> PAGE_SHIFT);
3182 		vm_page_mapping* mapping;
3183 		vm_area_mappings queue;
3184 
3185 		mutex_lock(&sMappingLock);
3186 		map->ops->lock(map);
3187 
3188 		vm_area_mappings::Iterator iterator = area->mappings.GetIterator();
3189 		while (iterator.HasNext()) {
3190 			mapping = iterator.Next();
3191 
3192 			vm_page* page = mapping->page;
3193 			if (page->cache_offset < startOffset
3194 				|| page->cache_offset >= endOffset)
3195 				continue;
3196 
3197 			page->mappings.Remove(mapping);
3198 			iterator.Remove();
3199 
3200 			if (page->mappings.IsEmpty() && page->wired_count == 0)
3201 				atomic_add(&gMappedPagesCount, -1);
3202 
3203 			queue.Add(mapping);
3204 		}
3205 
3206 		map->ops->unlock(map);
3207 		mutex_unlock(&sMappingLock);
3208 
3209 		while ((mapping = queue.RemoveHead()) != NULL) {
3210 			free(mapping);
3211 		}
3212 	}
3213 
3214 	return B_OK;
3215 }
3216 
3217 
3218 /*!	When calling this function, you need to have pages reserved! */
3219 status_t
3220 vm_map_page(vm_area* area, vm_page* page, addr_t address, uint32 protection)
3221 {
3222 	vm_translation_map* map = &area->address_space->translation_map;
3223 	vm_page_mapping* mapping = NULL;
3224 
3225 	if (area->wiring == B_NO_LOCK) {
3226 		mapping = (vm_page_mapping*)malloc_nogrow(sizeof(vm_page_mapping));
3227 		if (mapping == NULL)
3228 			return B_NO_MEMORY;
3229 
3230 		mapping->page = page;
3231 		mapping->area = area;
3232 	}
3233 
3234 	map->ops->lock(map);
3235 	map->ops->map(map, address, page->physical_page_number * B_PAGE_SIZE,
3236 		protection);
3237 	map->ops->unlock(map);
3238 
3239 	if (area->wiring != B_NO_LOCK) {
3240 		increment_page_wired_count(page);
3241 	} else {
3242 		// insert mapping into lists
3243 		MutexLocker locker(sMappingLock);
3244 
3245 		if (page->mappings.IsEmpty() && page->wired_count == 0)
3246 			atomic_add(&gMappedPagesCount, 1);
3247 
3248 		page->mappings.Add(mapping);
3249 		area->mappings.Add(mapping);
3250 	}
3251 
3252 	if (page->usage_count < 0)
3253 		page->usage_count = 1;
3254 
3255 	if (page->state != PAGE_STATE_MODIFIED)
3256 		vm_page_set_state(page, PAGE_STATE_ACTIVE);
3257 
3258 	return B_OK;
3259 }
3260 
3261 
3262 static int
3263 display_mem(int argc, char** argv)
3264 {
3265 	bool physical = false;
3266 	addr_t copyAddress;
3267 	int32 displayWidth;
3268 	int32 itemSize;
3269 	int32 num = -1;
3270 	addr_t address;
3271 	int i = 1, j;
3272 
3273 	if (argc > 1 && argv[1][0] == '-') {
3274 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3275 			physical = true;
3276 			i++;
3277 		} else
3278 			i = 99;
3279 	}
3280 
3281 	if (argc < i + 1 || argc > i + 2) {
3282 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3283 			"\tdl - 8 bytes\n"
3284 			"\tdw - 4 bytes\n"
3285 			"\tds - 2 bytes\n"
3286 			"\tdb - 1 byte\n"
3287 			"\tstring - a whole string\n"
3288 			"  -p or --physical only allows memory from a single page to be "
3289 			"displayed.\n");
3290 		return 0;
3291 	}
3292 
3293 	address = parse_expression(argv[i]);
3294 
3295 	if (argc > i + 1)
3296 		num = parse_expression(argv[i + 1]);
3297 
3298 	// build the format string
3299 	if (strcmp(argv[0], "db") == 0) {
3300 		itemSize = 1;
3301 		displayWidth = 16;
3302 	} else if (strcmp(argv[0], "ds") == 0) {
3303 		itemSize = 2;
3304 		displayWidth = 8;
3305 	} else if (strcmp(argv[0], "dw") == 0) {
3306 		itemSize = 4;
3307 		displayWidth = 4;
3308 	} else if (strcmp(argv[0], "dl") == 0) {
3309 		itemSize = 8;
3310 		displayWidth = 2;
3311 	} else if (strcmp(argv[0], "string") == 0) {
3312 		itemSize = 1;
3313 		displayWidth = -1;
3314 	} else {
3315 		kprintf("display_mem called in an invalid way!\n");
3316 		return 0;
3317 	}
3318 
3319 	if (num <= 0)
3320 		num = displayWidth;
3321 
3322 	void* physicalPageHandle = NULL;
3323 
3324 	if (physical) {
3325 		int32 offset = address & (B_PAGE_SIZE - 1);
3326 		if (num * itemSize + offset > B_PAGE_SIZE) {
3327 			num = (B_PAGE_SIZE - offset) / itemSize;
3328 			kprintf("NOTE: number of bytes has been cut to page size\n");
3329 		}
3330 
3331 		address = ROUNDOWN(address, B_PAGE_SIZE);
3332 
3333 		if (vm_get_physical_page_debug(address, &copyAddress,
3334 				&physicalPageHandle) != B_OK) {
3335 			kprintf("getting the hardware page failed.");
3336 			return 0;
3337 		}
3338 
3339 		address += offset;
3340 		copyAddress += offset;
3341 	} else
3342 		copyAddress = address;
3343 
3344 	if (!strcmp(argv[0], "string")) {
3345 		kprintf("%p \"", (char*)copyAddress);
3346 
3347 		// string mode
3348 		for (i = 0; true; i++) {
3349 			char c;
3350 			if (user_memcpy(&c, (char*)copyAddress + i, 1) != B_OK
3351 				|| c == '\0')
3352 				break;
3353 
3354 			if (c == '\n')
3355 				kprintf("\\n");
3356 			else if (c == '\t')
3357 				kprintf("\\t");
3358 			else {
3359 				if (!isprint(c))
3360 					c = '.';
3361 
3362 				kprintf("%c", c);
3363 			}
3364 		}
3365 
3366 		kprintf("\"\n");
3367 	} else {
3368 		// number mode
3369 		for (i = 0; i < num; i++) {
3370 			uint32 value;
3371 
3372 			if ((i % displayWidth) == 0) {
3373 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3374 				if (i != 0)
3375 					kprintf("\n");
3376 
3377 				kprintf("[0x%lx]  ", address + i * itemSize);
3378 
3379 				for (j = 0; j < displayed; j++) {
3380 					char c;
3381 					if (user_memcpy(&c, (char*)copyAddress + i * itemSize + j,
3382 							1) != B_OK) {
3383 						displayed = j;
3384 						break;
3385 					}
3386 					if (!isprint(c))
3387 						c = '.';
3388 
3389 					kprintf("%c", c);
3390 				}
3391 				if (num > displayWidth) {
3392 					// make sure the spacing in the last line is correct
3393 					for (j = displayed; j < displayWidth * itemSize; j++)
3394 						kprintf(" ");
3395 				}
3396 				kprintf("  ");
3397 			}
3398 
3399 			if (user_memcpy(&value, (uint8*)copyAddress + i * itemSize,
3400 					itemSize) != B_OK) {
3401 				kprintf("read fault");
3402 				break;
3403 			}
3404 
3405 			switch (itemSize) {
3406 				case 1:
3407 					kprintf(" %02x", *(uint8*)&value);
3408 					break;
3409 				case 2:
3410 					kprintf(" %04x", *(uint16*)&value);
3411 					break;
3412 				case 4:
3413 					kprintf(" %08lx", *(uint32*)&value);
3414 					break;
3415 				case 8:
3416 					kprintf(" %016Lx", *(uint64*)&value);
3417 					break;
3418 			}
3419 		}
3420 
3421 		kprintf("\n");
3422 	}
3423 
3424 	if (physical) {
3425 		copyAddress = ROUNDOWN(copyAddress, B_PAGE_SIZE);
3426 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3427 	}
3428 	return 0;
3429 }
3430 
3431 
3432 static void
3433 dump_cache_tree_recursively(vm_cache* cache, int level,
3434 	vm_cache* highlightCache)
3435 {
3436 	// print this cache
3437 	for (int i = 0; i < level; i++)
3438 		kprintf("  ");
3439 	if (cache == highlightCache)
3440 		kprintf("%p <--\n", cache);
3441 	else
3442 		kprintf("%p\n", cache);
3443 
3444 	// recursively print its consumers
3445 	vm_cache* consumer = NULL;
3446 	while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers,
3447 			consumer)) != NULL) {
3448 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3449 	}
3450 }
3451 
3452 
3453 static int
3454 dump_cache_tree(int argc, char** argv)
3455 {
3456 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3457 		kprintf("usage: %s <address>\n", argv[0]);
3458 		return 0;
3459 	}
3460 
3461 	addr_t address = parse_expression(argv[1]);
3462 	if (address == 0)
3463 		return 0;
3464 
3465 	vm_cache* cache = (vm_cache*)address;
3466 	vm_cache* root = cache;
3467 
3468 	// find the root cache (the transitive source)
3469 	while (root->source != NULL)
3470 		root = root->source;
3471 
3472 	dump_cache_tree_recursively(root, 0, cache);
3473 
3474 	return 0;
3475 }
3476 
3477 
3478 static const char*
3479 cache_type_to_string(int32 type)
3480 {
3481 	switch (type) {
3482 		case CACHE_TYPE_RAM:
3483 			return "RAM";
3484 		case CACHE_TYPE_DEVICE:
3485 			return "device";
3486 		case CACHE_TYPE_VNODE:
3487 			return "vnode";
3488 		case CACHE_TYPE_NULL:
3489 			return "null";
3490 
3491 		default:
3492 			return "unknown";
3493 	}
3494 }
3495 
3496 
3497 #if DEBUG_CACHE_LIST
3498 
3499 static void
3500 update_cache_info_recursively(vm_cache* cache, cache_info& info)
3501 {
3502 	info.page_count += cache->page_count;
3503 	if (cache->type == CACHE_TYPE_RAM)
3504 		info.committed += cache->committed_size;
3505 
3506 	// recurse
3507 	vm_cache* consumer = NULL;
3508 	while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers,
3509 			consumer)) != NULL) {
3510 		update_cache_info_recursively(consumer, info);
3511 	}
3512 }
3513 
3514 
3515 static int
3516 cache_info_compare_page_count(const void* _a, const void* _b)
3517 {
3518 	const cache_info* a = (const cache_info*)_a;
3519 	const cache_info* b = (const cache_info*)_b;
3520 	if (a->page_count == b->page_count)
3521 		return 0;
3522 	return a->page_count < b->page_count ? 1 : -1;
3523 }
3524 
3525 
3526 static int
3527 cache_info_compare_committed(const void* _a, const void* _b)
3528 {
3529 	const cache_info* a = (const cache_info*)_a;
3530 	const cache_info* b = (const cache_info*)_b;
3531 	if (a->committed == b->committed)
3532 		return 0;
3533 	return a->committed < b->committed ? 1 : -1;
3534 }
3535 
3536 
3537 static void
3538 dump_caches_recursively(vm_cache* cache, cache_info& info, int level)
3539 {
3540 	for (int i = 0; i < level; i++)
3541 		kprintf("  ");
3542 
3543 	kprintf("%p: type: %s, base: %lld, size: %lld, pages: %lu", cache,
3544 		cache_type_to_string(cache->type), cache->virtual_base,
3545 		cache->virtual_end, cache->page_count);
3546 
3547 	if (level == 0)
3548 		kprintf("/%lu", info.page_count);
3549 
3550 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3551 		kprintf(", committed: %lld", cache->committed_size);
3552 
3553 		if (level == 0)
3554 			kprintf("/%lu", info.committed);
3555 	}
3556 
3557 	// areas
3558 	if (cache->areas != NULL) {
3559 		vm_area* area = cache->areas;
3560 		kprintf(", areas: %ld (%s, team: %ld)", area->id, area->name,
3561 			area->address_space->id);
3562 
3563 		while (area->cache_next != NULL) {
3564 			area = area->cache_next;
3565 			kprintf(", %ld", area->id);
3566 		}
3567 	}
3568 
3569 	kputs("\n");
3570 
3571 	// recurse
3572 	vm_cache* consumer = NULL;
3573 	while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers,
3574 			consumer)) != NULL) {
3575 		dump_caches_recursively(consumer, info, level + 1);
3576 	}
3577 }
3578 
3579 
3580 static int
3581 dump_caches(int argc, char** argv)
3582 {
3583 	if (sCacheInfoTable == NULL) {
3584 		kprintf("No cache info table!\n");
3585 		return 0;
3586 	}
3587 
3588 	bool sortByPageCount = true;
3589 
3590 	for (int32 i = 1; i < argc; i++) {
3591 		if (strcmp(argv[i], "-c") == 0) {
3592 			sortByPageCount = false;
3593 		} else {
3594 			print_debugger_command_usage(argv[0]);
3595 			return 0;
3596 		}
3597 	}
3598 
3599 	uint32 totalCount = 0;
3600 	uint32 rootCount = 0;
3601 	off_t totalCommitted = 0;
3602 	page_num_t totalPages = 0;
3603 
3604 	vm_cache* cache = gDebugCacheList;
3605 	while (cache) {
3606 		totalCount++;
3607 		if (cache->source == NULL) {
3608 			cache_info stackInfo;
3609 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3610 				? sCacheInfoTable[rootCount] : stackInfo;
3611 			rootCount++;
3612 			info.cache = cache;
3613 			info.page_count = 0;
3614 			info.committed = 0;
3615 			update_cache_info_recursively(cache, info);
3616 			totalCommitted += info.committed;
3617 			totalPages += info.page_count;
3618 		}
3619 
3620 		cache = cache->debug_next;
3621 	}
3622 
3623 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3624 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3625 			sortByPageCount
3626 				? &cache_info_compare_page_count
3627 				: &cache_info_compare_committed);
3628 	}
3629 
3630 	kprintf("total committed memory: %lld, total used pages: %lu\n",
3631 		totalCommitted, totalPages);
3632 	kprintf("%lu caches (%lu root caches), sorted by %s per cache "
3633 		"tree...\n\n", totalCount, rootCount,
3634 		sortByPageCount ? "page count" : "committed size");
3635 
3636 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3637 		for (uint32 i = 0; i < rootCount; i++) {
3638 			cache_info& info = sCacheInfoTable[i];
3639 			dump_caches_recursively(info.cache, info, 0);
3640 		}
3641 	} else
3642 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3643 
3644 	return 0;
3645 }
3646 
3647 #endif	// DEBUG_CACHE_LIST
3648 
3649 
3650 static int
3651 dump_cache(int argc, char** argv)
3652 {
3653 	vm_cache* cache;
3654 	bool showPages = false;
3655 	int i = 1;
3656 
3657 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3658 		kprintf("usage: %s [-ps] <address>\n"
3659 			"  if -p is specified, all pages are shown, if -s is used\n"
3660 			"  only the cache info is shown respectively.\n", argv[0]);
3661 		return 0;
3662 	}
3663 	while (argv[i][0] == '-') {
3664 		char* arg = argv[i] + 1;
3665 		while (arg[0]) {
3666 			if (arg[0] == 'p')
3667 				showPages = true;
3668 			arg++;
3669 		}
3670 		i++;
3671 	}
3672 	if (argv[i] == NULL) {
3673 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3674 		return 0;
3675 	}
3676 
3677 	addr_t address = parse_expression(argv[i]);
3678 	if (address == 0)
3679 		return 0;
3680 
3681 	cache = (vm_cache*)address;
3682 
3683 	kprintf("CACHE %p:\n", cache);
3684 	kprintf("  ref_count:    %ld\n", cache->RefCount());
3685 	kprintf("  source:       %p\n", cache->source);
3686 	kprintf("  type:         %s\n", cache_type_to_string(cache->type));
3687 	kprintf("  virtual_base: 0x%Lx\n", cache->virtual_base);
3688 	kprintf("  virtual_end:  0x%Lx\n", cache->virtual_end);
3689 	kprintf("  temporary:    %ld\n", cache->temporary);
3690 	kprintf("  scan_skip:    %ld\n", cache->scan_skip);
3691 	kprintf("  lock:         %p\n", cache->GetLock());
3692 #if KDEBUG
3693 	kprintf("  lock.holder:  %ld\n", cache->GetLock()->holder);
3694 #endif
3695 	kprintf("  areas:\n");
3696 
3697 	for (vm_area* area = cache->areas; area != NULL; area = area->cache_next) {
3698 		kprintf("    area 0x%lx, %s\n", area->id, area->name);
3699 		kprintf("\tbase_addr:  0x%lx, size: 0x%lx\n", area->base, area->size);
3700 		kprintf("\tprotection: 0x%lx\n", area->protection);
3701 		kprintf("\towner:      0x%lx\n", area->address_space->id);
3702 	}
3703 
3704 	kprintf("  consumers:\n");
3705 	vm_cache* consumer = NULL;
3706 	while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers,
3707 				consumer)) != NULL) {
3708 		kprintf("\t%p\n", consumer);
3709 	}
3710 
3711 	kprintf("  pages:\n");
3712 	if (showPages) {
3713 		for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
3714 				vm_page* page = it.Next();) {
3715 			if (page->type == PAGE_TYPE_PHYSICAL) {
3716 				kprintf("\t%p ppn 0x%lx offset 0x%lx type %u state %u (%s) "
3717 					"wired_count %u\n", page, page->physical_page_number,
3718 					page->cache_offset, page->type, page->state,
3719 					page_state_to_string(page->state), page->wired_count);
3720 			} else if(page->type == PAGE_TYPE_DUMMY) {
3721 				kprintf("\t%p DUMMY PAGE state %u (%s)\n",
3722 					page, page->state, page_state_to_string(page->state));
3723 			} else
3724 				kprintf("\t%p UNKNOWN PAGE type %u\n", page, page->type);
3725 		}
3726 	} else
3727 		kprintf("\t%ld in cache\n", cache->page_count);
3728 
3729 	return 0;
3730 }
3731 
3732 
3733 static void
3734 dump_area_struct(vm_area* area, bool mappings)
3735 {
3736 	kprintf("AREA: %p\n", area);
3737 	kprintf("name:\t\t'%s'\n", area->name);
3738 	kprintf("owner:\t\t0x%lx\n", area->address_space->id);
3739 	kprintf("id:\t\t0x%lx\n", area->id);
3740 	kprintf("base:\t\t0x%lx\n", area->base);
3741 	kprintf("size:\t\t0x%lx\n", area->size);
3742 	kprintf("protection:\t0x%lx\n", area->protection);
3743 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3744 	kprintf("memory_type:\t0x%x\n", area->memory_type);
3745 	kprintf("cache:\t\t%p\n", area->cache);
3746 	kprintf("cache_type:\t%s\n", cache_type_to_string(area->cache_type));
3747 	kprintf("cache_offset:\t0x%Lx\n", area->cache_offset);
3748 	kprintf("cache_next:\t%p\n", area->cache_next);
3749 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3750 
3751 	vm_area_mappings::Iterator iterator = area->mappings.GetIterator();
3752 	if (mappings) {
3753 		kprintf("page mappings:\n");
3754 		while (iterator.HasNext()) {
3755 			vm_page_mapping* mapping = iterator.Next();
3756 			kprintf("  %p", mapping->page);
3757 		}
3758 		kprintf("\n");
3759 	} else {
3760 		uint32 count = 0;
3761 		while (iterator.Next() != NULL) {
3762 			count++;
3763 		}
3764 		kprintf("page mappings:\t%lu\n", count);
3765 	}
3766 }
3767 
3768 
3769 static int
3770 dump_area(int argc, char** argv)
3771 {
3772 	bool mappings = false;
3773 	bool found = false;
3774 	int32 index = 1;
3775 	vm_area* area;
3776 	addr_t num;
3777 
3778 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3779 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3780 			"All areas matching either id/address/name are listed. You can\n"
3781 			"force to check only a specific item by prefixing the specifier\n"
3782 			"with the id/contains/address/name keywords.\n"
3783 			"-m shows the area's mappings as well.\n");
3784 		return 0;
3785 	}
3786 
3787 	if (!strcmp(argv[1], "-m")) {
3788 		mappings = true;
3789 		index++;
3790 	}
3791 
3792 	int32 mode = 0xf;
3793 	if (!strcmp(argv[index], "id"))
3794 		mode = 1;
3795 	else if (!strcmp(argv[index], "contains"))
3796 		mode = 2;
3797 	else if (!strcmp(argv[index], "name"))
3798 		mode = 4;
3799 	else if (!strcmp(argv[index], "address"))
3800 		mode = 0;
3801 	if (mode != 0xf)
3802 		index++;
3803 
3804 	if (index >= argc) {
3805 		kprintf("No area specifier given.\n");
3806 		return 0;
3807 	}
3808 
3809 	num = parse_expression(argv[index]);
3810 
3811 	if (mode == 0) {
3812 		dump_area_struct((struct vm_area*)num, mappings);
3813 	} else {
3814 		// walk through the area list, looking for the arguments as a name
3815 		struct hash_iterator iter;
3816 
3817 		hash_open(sAreaHash, &iter);
3818 		while ((area = (vm_area*)hash_next(sAreaHash, &iter)) != NULL) {
3819 			if (((mode & 4) != 0 && area->name != NULL
3820 					&& !strcmp(argv[index], area->name))
3821 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3822 					|| (((mode & 2) != 0 && area->base <= num
3823 						&& area->base + area->size > num))))) {
3824 				dump_area_struct(area, mappings);
3825 				found = true;
3826 			}
3827 		}
3828 
3829 		if (!found)
3830 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3831 	}
3832 
3833 	return 0;
3834 }
3835 
3836 
3837 static int
3838 dump_area_list(int argc, char** argv)
3839 {
3840 	vm_area* area;
3841 	struct hash_iterator iter;
3842 	const char* name = NULL;
3843 	int32 id = 0;
3844 
3845 	if (argc > 1) {
3846 		id = parse_expression(argv[1]);
3847 		if (id == 0)
3848 			name = argv[1];
3849 	}
3850 
3851 	kprintf("addr          id  base\t\tsize    protect lock  name\n");
3852 
3853 	hash_open(sAreaHash, &iter);
3854 	while ((area = (vm_area*)hash_next(sAreaHash, &iter)) != NULL) {
3855 		if ((id != 0 && area->address_space->id != id)
3856 			|| (name != NULL && strstr(area->name, name) == NULL))
3857 			continue;
3858 
3859 		kprintf("%p %5lx  %p\t%p %4lx\t%4d  %s\n", area, area->id,
3860 			(void*)area->base, (void*)area->size, area->protection, area->wiring,
3861 			area->name);
3862 	}
3863 	hash_close(sAreaHash, &iter, false);
3864 	return 0;
3865 }
3866 
3867 
3868 static int
3869 dump_available_memory(int argc, char** argv)
3870 {
3871 	kprintf("Available memory: %Ld/%lu bytes\n",
3872 		sAvailableMemory, vm_page_num_pages() * B_PAGE_SIZE);
3873 	return 0;
3874 }
3875 
3876 
3877 status_t
3878 vm_delete_areas(struct vm_address_space* addressSpace)
3879 {
3880 	vm_area* area;
3881 	vm_area* next;
3882 	vm_area* last = NULL;
3883 
3884 	TRACE(("vm_delete_areas: called on address space 0x%lx\n",
3885 		addressSpace->id));
3886 
3887 	rw_lock_write_lock(&addressSpace->lock);
3888 
3889 	// remove all reserved areas in this address space
3890 
3891 	for (area = addressSpace->areas; area; area = next) {
3892 		next = area->address_space_next;
3893 
3894 		if (area->id == RESERVED_AREA_ID) {
3895 			// just remove it
3896 			if (last)
3897 				last->address_space_next = area->address_space_next;
3898 			else
3899 				addressSpace->areas = area->address_space_next;
3900 
3901 			vm_put_address_space(addressSpace);
3902 			free(area);
3903 			continue;
3904 		}
3905 
3906 		last = area;
3907 	}
3908 
3909 	// delete all the areas in this address space
3910 
3911 	for (area = addressSpace->areas; area; area = next) {
3912 		next = area->address_space_next;
3913 		delete_area(addressSpace, area);
3914 	}
3915 
3916 	rw_lock_write_unlock(&addressSpace->lock);
3917 	return B_OK;
3918 }
3919 
3920 
3921 static area_id
3922 vm_area_for(team_id team, addr_t address)
3923 {
3924 	AddressSpaceReadLocker locker(team);
3925 	if (!locker.IsLocked())
3926 		return B_BAD_TEAM_ID;
3927 
3928 	vm_area* area = vm_area_lookup(locker.AddressSpace(), address);
3929 	if (area != NULL)
3930 		return area->id;
3931 
3932 	return B_ERROR;
3933 }
3934 
3935 
3936 /*!	Frees physical pages that were used during the boot process.
3937 */
3938 static void
3939 unmap_and_free_physical_pages(vm_translation_map* map, addr_t start, addr_t end)
3940 {
3941 	// free all physical pages in the specified range
3942 
3943 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3944 		addr_t physicalAddress;
3945 		uint32 flags;
3946 
3947 		if (map->ops->query(map, current, &physicalAddress, &flags) == B_OK) {
3948 			vm_page* page = vm_lookup_page(current / B_PAGE_SIZE);
3949 			if (page != NULL)
3950 				vm_page_set_state(page, PAGE_STATE_FREE);
3951 		}
3952 	}
3953 
3954 	// unmap the memory
3955 	map->ops->unmap(map, start, end - 1);
3956 }
3957 
3958 
3959 void
3960 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3961 {
3962 	vm_translation_map* map = &vm_kernel_address_space()->translation_map;
3963 	addr_t end = start + size;
3964 	addr_t lastEnd = start;
3965 	vm_area* area;
3966 
3967 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3968 		(void*)start, (void*)end));
3969 
3970 	// The areas are sorted in virtual address space order, so
3971 	// we just have to find the holes between them that fall
3972 	// into the area we should dispose
3973 
3974 	map->ops->lock(map);
3975 
3976 	for (area = vm_kernel_address_space()->areas; area != NULL;
3977 			area = area->address_space_next) {
3978 		addr_t areaStart = area->base;
3979 		addr_t areaEnd = areaStart + area->size;
3980 
3981 		if (area->id == RESERVED_AREA_ID)
3982 			continue;
3983 
3984 		if (areaEnd >= end) {
3985 			// we are done, the areas are already beyond of what we have to free
3986 			lastEnd = end;
3987 			break;
3988 		}
3989 
3990 		if (areaStart > lastEnd) {
3991 			// this is something we can free
3992 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3993 				(void*)areaStart));
3994 			unmap_and_free_physical_pages(map, lastEnd, areaStart);
3995 		}
3996 
3997 		lastEnd = areaEnd;
3998 	}
3999 
4000 	if (lastEnd < end) {
4001 		// we can also get rid of some space at the end of the area
4002 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
4003 			(void*)end));
4004 		unmap_and_free_physical_pages(map, lastEnd, end);
4005 	}
4006 
4007 	map->ops->unlock(map);
4008 }
4009 
4010 
4011 static void
4012 create_preloaded_image_areas(struct preloaded_image* image)
4013 {
4014 	char name[B_OS_NAME_LENGTH];
4015 	void* address;
4016 	int32 length;
4017 
4018 	// use file name to create a good area name
4019 	char* fileName = strrchr(image->name, '/');
4020 	if (fileName == NULL)
4021 		fileName = image->name;
4022 	else
4023 		fileName++;
4024 
4025 	length = strlen(fileName);
4026 	// make sure there is enough space for the suffix
4027 	if (length > 25)
4028 		length = 25;
4029 
4030 	memcpy(name, fileName, length);
4031 	strcpy(name + length, "_text");
4032 	address = (void*)ROUNDOWN(image->text_region.start, B_PAGE_SIZE);
4033 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4034 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
4035 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4036 		// this will later be remapped read-only/executable by the
4037 		// ELF initialization code
4038 
4039 	strcpy(name + length, "_data");
4040 	address = (void*)ROUNDOWN(image->data_region.start, B_PAGE_SIZE);
4041 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4042 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
4043 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4044 }
4045 
4046 
4047 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
4048 	Any boot loader resources contained in that arguments must not be accessed
4049 	anymore past this point.
4050 */
4051 void
4052 vm_free_kernel_args(kernel_args* args)
4053 {
4054 	uint32 i;
4055 
4056 	TRACE(("vm_free_kernel_args()\n"));
4057 
4058 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
4059 		area_id area = area_for((void*)args->kernel_args_range[i].start);
4060 		if (area >= B_OK)
4061 			delete_area(area);
4062 	}
4063 }
4064 
4065 
4066 static void
4067 allocate_kernel_args(kernel_args* args)
4068 {
4069 	TRACE(("allocate_kernel_args()\n"));
4070 
4071 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
4072 		void* address = (void*)args->kernel_args_range[i].start;
4073 
4074 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
4075 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
4076 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4077 	}
4078 }
4079 
4080 
4081 static void
4082 unreserve_boot_loader_ranges(kernel_args* args)
4083 {
4084 	TRACE(("unreserve_boot_loader_ranges()\n"));
4085 
4086 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4087 		vm_unreserve_address_range(vm_kernel_address_space_id(),
4088 			(void*)args->virtual_allocated_range[i].start,
4089 			args->virtual_allocated_range[i].size);
4090 	}
4091 }
4092 
4093 
4094 static void
4095 reserve_boot_loader_ranges(kernel_args* args)
4096 {
4097 	TRACE(("reserve_boot_loader_ranges()\n"));
4098 
4099 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4100 		void* address = (void*)args->virtual_allocated_range[i].start;
4101 
4102 		// If the address is no kernel address, we just skip it. The
4103 		// architecture specific code has to deal with it.
4104 		if (!IS_KERNEL_ADDRESS(address)) {
4105 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n",
4106 				address, args->virtual_allocated_range[i].size);
4107 			continue;
4108 		}
4109 
4110 		status_t status = vm_reserve_address_range(vm_kernel_address_space_id(),
4111 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
4112 		if (status < B_OK)
4113 			panic("could not reserve boot loader ranges\n");
4114 	}
4115 }
4116 
4117 
4118 static addr_t
4119 allocate_early_virtual(kernel_args* args, size_t size)
4120 {
4121 	addr_t spot = 0;
4122 	uint32 i;
4123 	int last_valloc_entry = 0;
4124 
4125 	size = PAGE_ALIGN(size);
4126 	// find a slot in the virtual allocation addr range
4127 	for (i = 1; i < args->num_virtual_allocated_ranges; i++) {
4128 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
4129 			+ args->virtual_allocated_range[i - 1].size;
4130 		last_valloc_entry = i;
4131 		// check to see if the space between this one and the last is big enough
4132 		if (previousRangeEnd >= KERNEL_BASE
4133 			&& args->virtual_allocated_range[i].start
4134 				- previousRangeEnd >= size) {
4135 			spot = previousRangeEnd;
4136 			args->virtual_allocated_range[i - 1].size += size;
4137 			goto out;
4138 		}
4139 	}
4140 	if (spot == 0) {
4141 		// we hadn't found one between allocation ranges. this is ok.
4142 		// see if there's a gap after the last one
4143 		addr_t lastRangeEnd
4144 			= args->virtual_allocated_range[last_valloc_entry].start
4145 				+ args->virtual_allocated_range[last_valloc_entry].size;
4146 		if (KERNEL_BASE + (KERNEL_SIZE - 1) - lastRangeEnd >= size) {
4147 			spot = lastRangeEnd;
4148 			args->virtual_allocated_range[last_valloc_entry].size += size;
4149 			goto out;
4150 		}
4151 		// see if there's a gap before the first one
4152 		if (args->virtual_allocated_range[0].start > KERNEL_BASE) {
4153 			if (args->virtual_allocated_range[0].start - KERNEL_BASE >= size) {
4154 				args->virtual_allocated_range[0].start -= size;
4155 				spot = args->virtual_allocated_range[0].start;
4156 				goto out;
4157 			}
4158 		}
4159 	}
4160 
4161 out:
4162 	return spot;
4163 }
4164 
4165 
4166 static bool
4167 is_page_in_physical_memory_range(kernel_args* args, addr_t address)
4168 {
4169 	// TODO: horrible brute-force method of determining if the page can be
4170 	// allocated
4171 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4172 		if (address >= args->physical_memory_range[i].start
4173 			&& address < args->physical_memory_range[i].start
4174 				+ args->physical_memory_range[i].size)
4175 			return true;
4176 	}
4177 	return false;
4178 }
4179 
4180 
4181 static addr_t
4182 allocate_early_physical_page(kernel_args* args)
4183 {
4184 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4185 		addr_t nextPage;
4186 
4187 		nextPage = args->physical_allocated_range[i].start
4188 			+ args->physical_allocated_range[i].size;
4189 		// see if the page after the next allocated paddr run can be allocated
4190 		if (i + 1 < args->num_physical_allocated_ranges
4191 			&& args->physical_allocated_range[i + 1].size != 0) {
4192 			// see if the next page will collide with the next allocated range
4193 			if (nextPage >= args->physical_allocated_range[i+1].start)
4194 				continue;
4195 		}
4196 		// see if the next physical page fits in the memory block
4197 		if (is_page_in_physical_memory_range(args, nextPage)) {
4198 			// we got one!
4199 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4200 			return nextPage / B_PAGE_SIZE;
4201 		}
4202 	}
4203 
4204 	return 0;
4205 		// could not allocate a block
4206 }
4207 
4208 
4209 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4210 	allocate some pages before the VM is completely up.
4211 */
4212 addr_t
4213 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4214 	uint32 attributes)
4215 {
4216 	if (physicalSize > virtualSize)
4217 		physicalSize = virtualSize;
4218 
4219 	// find the vaddr to allocate at
4220 	addr_t virtualBase = allocate_early_virtual(args, virtualSize);
4221 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualAddress);
4222 
4223 	// map the pages
4224 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4225 		addr_t physicalAddress = allocate_early_physical_page(args);
4226 		if (physicalAddress == 0)
4227 			panic("error allocating early page!\n");
4228 
4229 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4230 
4231 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
4232 			physicalAddress * B_PAGE_SIZE, attributes,
4233 			&allocate_early_physical_page);
4234 	}
4235 
4236 	return virtualBase;
4237 }
4238 
4239 
4240 /*!	The main entrance point to initialize the VM. */
4241 status_t
4242 vm_init(kernel_args* args)
4243 {
4244 	struct preloaded_image* image;
4245 	void* address;
4246 	status_t err = 0;
4247 	uint32 i;
4248 
4249 	TRACE(("vm_init: entry\n"));
4250 	err = arch_vm_translation_map_init(args);
4251 	err = arch_vm_init(args);
4252 
4253 	// initialize some globals
4254 	sNextAreaID = 1;
4255 
4256 	vm_page_init_num_pages(args);
4257 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4258 
4259 	size_t heapSize = INITIAL_HEAP_SIZE;
4260 	// try to accomodate low memory systems
4261 	while (heapSize > sAvailableMemory / 8)
4262 		heapSize /= 2;
4263 	if (heapSize < 1024 * 1024)
4264 		panic("vm_init: go buy some RAM please.");
4265 
4266 	// map in the new heap and initialize it
4267 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4268 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4269 	TRACE(("heap at 0x%lx\n", heapBase));
4270 	heap_init(heapBase, heapSize);
4271 
4272 	size_t slabInitialSize = args->num_cpus * 2 * B_PAGE_SIZE;
4273 	addr_t slabInitialBase = vm_allocate_early(args, slabInitialSize,
4274 		slabInitialSize, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4275 	slab_init(args, slabInitialBase, slabInitialSize);
4276 
4277 	// initialize the free page list and physical page mapper
4278 	vm_page_init(args);
4279 
4280 	// initialize the hash table that stores the pages mapped to caches
4281 	vm_cache_init(args);
4282 
4283 	{
4284 		vm_area* area;
4285 		sAreaHash = hash_init(AREA_HASH_TABLE_SIZE,
4286 			(addr_t)&area->hash_next - (addr_t)area,
4287 			&area_compare, &area_hash);
4288 		if (sAreaHash == NULL)
4289 			panic("vm_init: error creating aspace hash table\n");
4290 	}
4291 
4292 	vm_address_space_init();
4293 	reserve_boot_loader_ranges(args);
4294 
4295 	// Do any further initialization that the architecture dependant layers may
4296 	// need now
4297 	arch_vm_translation_map_init_post_area(args);
4298 	arch_vm_init_post_area(args);
4299 	vm_page_init_post_area(args);
4300 
4301 	// allocate areas to represent stuff that already exists
4302 
4303 	address = (void*)ROUNDOWN(heapBase, B_PAGE_SIZE);
4304 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4305 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4306 
4307 	address = (void*)ROUNDOWN(slabInitialBase, B_PAGE_SIZE);
4308 	create_area("initial slab space", &address, B_EXACT_ADDRESS,
4309 		slabInitialSize, B_ALREADY_WIRED, B_KERNEL_READ_AREA
4310 		| B_KERNEL_WRITE_AREA);
4311 
4312 	allocate_kernel_args(args);
4313 
4314 	create_preloaded_image_areas(&args->kernel_image);
4315 
4316 	// allocate areas for preloaded images
4317 	for (image = args->preloaded_images; image != NULL; image = image->next) {
4318 		create_preloaded_image_areas(image);
4319 	}
4320 
4321 	// allocate kernel stacks
4322 	for (i = 0; i < args->num_cpus; i++) {
4323 		char name[64];
4324 
4325 		sprintf(name, "idle thread %lu kstack", i + 1);
4326 		address = (void*)args->cpu_kstack[i].start;
4327 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4328 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4329 	}
4330 
4331 #if DEBUG_CACHE_LIST
4332 	create_area("cache info table", (void**)&sCacheInfoTable,
4333 		B_ANY_KERNEL_ADDRESS,
4334 		ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4335 		B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4336 #endif	// DEBUG_CACHE_LIST
4337 
4338 	// add some debugger commands
4339 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4340 	add_debugger_command("area", &dump_area,
4341 		"Dump info about a particular area");
4342 	add_debugger_command("cache", &dump_cache, "Dump vm_cache");
4343 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump vm_cache tree");
4344 #if DEBUG_CACHE_LIST
4345 	add_debugger_command_etc("caches", &dump_caches,
4346 		"List all vm_cache trees",
4347 		"[ \"-c\" ]\n"
4348 		"All cache trees are listed sorted in decreasing order by number of\n"
4349 		"used pages or, if \"-c\" is specified, by size of committed memory.\n",
4350 		0);
4351 #endif
4352 	add_debugger_command("avail", &dump_available_memory,
4353 		"Dump available memory");
4354 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4355 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4356 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4357 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4358 	add_debugger_command("string", &display_mem, "dump strings");
4359 
4360 	TRACE(("vm_init: exit\n"));
4361 
4362 	return err;
4363 }
4364 
4365 
4366 status_t
4367 vm_init_post_sem(kernel_args* args)
4368 {
4369 	// This frees all unused boot loader resources and makes its space available
4370 	// again
4371 	arch_vm_init_end(args);
4372 	unreserve_boot_loader_ranges(args);
4373 
4374 	// fill in all of the semaphores that were not allocated before
4375 	// since we're still single threaded and only the kernel address space
4376 	// exists, it isn't that hard to find all of the ones we need to create
4377 
4378 	arch_vm_translation_map_init_post_sem(args);
4379 	vm_address_space_init_post_sem();
4380 
4381 	slab_init_post_sem();
4382 	return heap_init_post_sem();
4383 }
4384 
4385 
4386 status_t
4387 vm_init_post_thread(kernel_args* args)
4388 {
4389 	vm_page_init_post_thread(args);
4390 	vm_daemon_init();
4391 	slab_init_post_thread();
4392 	return heap_init_post_thread();
4393 }
4394 
4395 
4396 status_t
4397 vm_init_post_modules(kernel_args* args)
4398 {
4399 	return arch_vm_init_post_modules(args);
4400 }
4401 
4402 
4403 void
4404 permit_page_faults(void)
4405 {
4406 	struct thread* thread = thread_get_current_thread();
4407 	if (thread != NULL)
4408 		atomic_add(&thread->page_faults_allowed, 1);
4409 }
4410 
4411 
4412 void
4413 forbid_page_faults(void)
4414 {
4415 	struct thread* thread = thread_get_current_thread();
4416 	if (thread != NULL)
4417 		atomic_add(&thread->page_faults_allowed, -1);
4418 }
4419 
4420 
4421 status_t
4422 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser,
4423 	addr_t* newIP)
4424 {
4425 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4426 		faultAddress));
4427 
4428 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4429 
4430 	addr_t pageAddress = ROUNDOWN(address, B_PAGE_SIZE);
4431 	vm_address_space* addressSpace = NULL;
4432 
4433 	status_t status = B_OK;
4434 	*newIP = 0;
4435 	atomic_add((int32*)&sPageFaults, 1);
4436 
4437 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4438 		addressSpace = vm_get_kernel_address_space();
4439 	} else if (IS_USER_ADDRESS(pageAddress)) {
4440 		addressSpace = vm_get_current_user_address_space();
4441 		if (addressSpace == NULL) {
4442 			if (!isUser) {
4443 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4444 					"memory!\n");
4445 				status = B_BAD_ADDRESS;
4446 				TPF(PageFaultError(-1,
4447 					VMPageFaultTracing
4448 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4449 			} else {
4450 				// XXX weird state.
4451 				panic("vm_page_fault: non kernel thread accessing user memory "
4452 					"that doesn't exist!\n");
4453 				status = B_BAD_ADDRESS;
4454 			}
4455 		}
4456 	} else {
4457 		// the hit was probably in the 64k DMZ between kernel and user space
4458 		// this keeps a user space thread from passing a buffer that crosses
4459 		// into kernel space
4460 		status = B_BAD_ADDRESS;
4461 		TPF(PageFaultError(-1,
4462 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4463 	}
4464 
4465 	if (status == B_OK)
4466 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser);
4467 
4468 	if (status < B_OK) {
4469 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4470 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n",
4471 			strerror(status), address, faultAddress, isWrite, isUser,
4472 			thread_get_current_thread_id());
4473 		if (!isUser) {
4474 			struct thread* thread = thread_get_current_thread();
4475 			if (thread != NULL && thread->fault_handler != 0) {
4476 				// this will cause the arch dependant page fault handler to
4477 				// modify the IP on the interrupt frame or whatever to return
4478 				// to this address
4479 				*newIP = thread->fault_handler;
4480 			} else {
4481 				// unhandled page fault in the kernel
4482 				panic("vm_page_fault: unhandled page fault in kernel space at "
4483 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4484 			}
4485 		} else {
4486 #if 1
4487 			rw_lock_read_lock(&addressSpace->lock);
4488 
4489 			// TODO: remove me once we have proper userland debugging support
4490 			// (and tools)
4491 			vm_area* area = vm_area_lookup(addressSpace, faultAddress);
4492 
4493 			struct thread* thread = thread_get_current_thread();
4494 			dprintf("vm_page_fault: thread \"%s\" (%ld) in team \"%s\" (%ld) "
4495 				"tried to %s address %#lx, ip %#lx (\"%s\" +%#lx)\n",
4496 				thread->name, thread->id, thread->team->name, thread->team->id,
4497 				isWrite ? "write" : "read", address, faultAddress,
4498 				area ? area->name : "???",
4499 				faultAddress - (area ? area->base : 0x0));
4500 
4501 			// We can print a stack trace of the userland thread here.
4502 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
4503 // fault and someone is already waiting for a write lock on the same address
4504 // space. This thread will then try to acquire the lock again and will
4505 // be queued after the writer.
4506 #if 0
4507 			if (area) {
4508 				struct stack_frame {
4509 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
4510 						struct stack_frame*	previous;
4511 						void*				return_address;
4512 					#else
4513 						// ...
4514 					#warning writeme
4515 					#endif
4516 				} frame;
4517 #ifdef __INTEL__
4518 				struct iframe* iframe = i386_get_user_iframe();
4519 				if (iframe == NULL)
4520 					panic("iframe is NULL!");
4521 
4522 				status_t status = user_memcpy(&frame, (void*)iframe->ebp,
4523 					sizeof(struct stack_frame));
4524 #elif defined(__POWERPC__)
4525 				struct iframe* iframe = ppc_get_user_iframe();
4526 				if (iframe == NULL)
4527 					panic("iframe is NULL!");
4528 
4529 				status_t status = user_memcpy(&frame, (void*)iframe->r1,
4530 					sizeof(struct stack_frame));
4531 #else
4532 #	warning "vm_page_fault() stack trace won't work"
4533 				status = B_ERROR;
4534 #endif
4535 
4536 				dprintf("stack trace:\n");
4537 				int32 maxFrames = 50;
4538 				while (status == B_OK && --maxFrames >= 0
4539 						&& frame.return_address != NULL) {
4540 					dprintf("  %p", frame.return_address);
4541 					area = vm_area_lookup(addressSpace,
4542 						(addr_t)frame.return_address);
4543 					if (area) {
4544 						dprintf(" (%s + %#lx)", area->name,
4545 							(addr_t)frame.return_address - area->base);
4546 					}
4547 					dprintf("\n");
4548 
4549 					status = user_memcpy(&frame, frame.previous,
4550 						sizeof(struct stack_frame));
4551 				}
4552 			}
4553 #endif	// 0 (stack trace)
4554 
4555 			rw_lock_read_unlock(&addressSpace->lock);
4556 #endif
4557 
4558 			// TODO: the fault_callback is a temporary solution for vm86
4559 			if (thread->fault_callback == NULL
4560 				|| thread->fault_callback(address, faultAddress, isWrite)) {
4561 				// If the thread has a signal handler for SIGSEGV, we simply
4562 				// send it the signal. Otherwise we notify the user debugger
4563 				// first.
4564 				struct sigaction action;
4565 				if (sigaction(SIGSEGV, NULL, &action) == 0
4566 					&& action.sa_handler != SIG_DFL
4567 					&& action.sa_handler != SIG_IGN) {
4568 					send_signal(thread->id, SIGSEGV);
4569 				} else if (user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4570 						SIGSEGV)) {
4571 					send_signal(thread->id, SIGSEGV);
4572 				}
4573 			}
4574 		}
4575 	}
4576 
4577 	if (addressSpace != NULL)
4578 		vm_put_address_space(addressSpace);
4579 
4580 	return B_HANDLED_INTERRUPT;
4581 }
4582 
4583 
4584 class VMCacheChainLocker {
4585 public:
4586 	VMCacheChainLocker()
4587 		:
4588 		fTopCache(NULL),
4589 		fBottomCache(NULL)
4590 	{
4591 	}
4592 
4593 	void SetTo(VMCache* topCache)
4594 	{
4595 		fTopCache = topCache;
4596 		fBottomCache = topCache;
4597 	}
4598 
4599 	VMCache* LockSourceCache()
4600 	{
4601 		if (fBottomCache == NULL || fBottomCache->source == NULL)
4602 			return NULL;
4603 
4604 		fBottomCache = fBottomCache->source;
4605 		fBottomCache->Lock();
4606 		fBottomCache->AcquireRefLocked();
4607 
4608 		return fBottomCache;
4609 	}
4610 
4611 	void Unlock()
4612 	{
4613 		if (fTopCache == NULL)
4614 			return;
4615 
4616 		VMCache* cache = fTopCache;
4617 		while (cache != NULL) {
4618 			VMCache* nextCache = cache->source;
4619 			cache->ReleaseRefAndUnlock();
4620 
4621 			if (cache == fBottomCache)
4622 				break;
4623 
4624 			cache = nextCache;
4625 		}
4626 
4627 		fTopCache = NULL;
4628 		fBottomCache = NULL;
4629 	}
4630 
4631 private:
4632 	VMCache*	fTopCache;
4633 	VMCache*	fBottomCache;
4634 };
4635 
4636 
4637 struct PageFaultContext {
4638 	AddressSpaceReadLocker	addressSpaceLocker;
4639 	VMCacheChainLocker		cacheChainLocker;
4640 
4641 	vm_translation_map*		map;
4642 	vm_cache*				topCache;
4643 	off_t					cacheOffset;
4644 	bool					isWrite;
4645 
4646 	// return values
4647 	vm_page*				page;
4648 	bool					restart;
4649 
4650 
4651 	PageFaultContext(vm_address_space* addressSpace, bool isWrite)
4652 		:
4653 		addressSpaceLocker(addressSpace, true),
4654 		map(&addressSpace->translation_map),
4655 		isWrite(isWrite)
4656 	{
4657 	}
4658 
4659 	~PageFaultContext()
4660 	{
4661 		UnlockAll();
4662 	}
4663 
4664 	void Prepare(VMCache* topCache, off_t cacheOffset)
4665 	{
4666 		this->topCache = topCache;
4667 		this->cacheOffset = cacheOffset;
4668 		page = NULL;
4669 		restart = false;
4670 
4671 		cacheChainLocker.SetTo(topCache);
4672 	}
4673 
4674 	void UnlockAll()
4675 	{
4676 		topCache = NULL;
4677 		addressSpaceLocker.Unlock();
4678 		cacheChainLocker.Unlock();
4679 	}
4680 };
4681 
4682 
4683 /*!	Gets the page that should be mapped into the area.
4684 	Returns an error code other than \c B_OK, if the page couldn't be found or
4685 	paged in. The locking state of the address space and the caches is undefined
4686 	in that case.
4687 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4688 	had to unlock the address space and all caches and is supposed to be called
4689 	again.
4690 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4691 	found. It is returned in \c context.page. The address space will still be
4692 	locked as well as all caches starting from the top cache to at least the
4693 	cache the page lives in.
4694 */
4695 static inline status_t
4696 fault_get_page(PageFaultContext& context)
4697 {
4698 	vm_cache* cache = context.topCache;
4699 	vm_cache* lastCache = NULL;
4700 	vm_page* page = NULL;
4701 
4702 	while (cache != NULL) {
4703 		// We already hold the lock of the cache at this point.
4704 
4705 		lastCache = cache;
4706 
4707 		for (;;) {
4708 			page = cache->LookupPage(context.cacheOffset);
4709 			if (page == NULL || page->state != PAGE_STATE_BUSY) {
4710 				// Either there is no page or there is one and it is not busy.
4711 				break;
4712 			}
4713 
4714 			// page must be busy -- wait for it to become unbusy
4715 			ConditionVariableEntry entry;
4716 			entry.Add(page);
4717 			context.UnlockAll();
4718 			entry.Wait();
4719 
4720 			// restart the whole process
4721 			context.restart = true;
4722 			return B_OK;
4723 		}
4724 
4725 		if (page != NULL)
4726 			break;
4727 
4728 		// The current cache does not contain the page we're looking for.
4729 
4730 		// see if the backing store has it
4731 		if (cache->HasPage(context.cacheOffset)) {
4732 			// insert a fresh page and mark it busy -- we're going to read it in
4733 			page = vm_page_allocate_page(PAGE_STATE_FREE, true);
4734 			cache->InsertPage(page, context.cacheOffset);
4735 
4736 			ConditionVariable busyCondition;
4737 			busyCondition.Publish(page, "page");
4738 
4739 			// We need to unlock all caches and the address space while reading
4740 			// the page in. Keep a reference to the cache around.
4741 			cache->AcquireRefLocked();
4742 			context.UnlockAll();
4743 
4744 			// read the page in
4745 			iovec vec;
4746 			vec.iov_base = (void*)(page->physical_page_number * B_PAGE_SIZE);
4747 			size_t bytesRead = vec.iov_len = B_PAGE_SIZE;
4748 
4749 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4750 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4751 
4752 			cache->Lock();
4753 
4754 			if (status < B_OK) {
4755 				// on error remove and free the page
4756 				dprintf("reading page from cache %p returned: %s!\n",
4757 					cache, strerror(status));
4758 
4759 				busyCondition.Unpublish();
4760 				cache->RemovePage(page);
4761 				vm_page_set_state(page, PAGE_STATE_FREE);
4762 
4763 				cache->ReleaseRefAndUnlock();
4764 				return status;
4765 			}
4766 
4767 			// mark the page unbusy again
4768 			page->state = PAGE_STATE_ACTIVE;
4769 			busyCondition.Unpublish();
4770 
4771 			// Since we needed to unlock everything temporarily, the area
4772 			// situation might have changed. So we need to restart the whole
4773 			// process.
4774 			cache->ReleaseRefAndUnlock();
4775 			context.restart = true;
4776 			return B_OK;
4777 		}
4778 
4779 		cache = context.cacheChainLocker.LockSourceCache();
4780 	}
4781 
4782 	if (page == NULL) {
4783 		// There was no adequate page, determine the cache for a clean one.
4784 		// Read-only pages come in the deepest cache, only the top most cache
4785 		// may have direct write access.
4786 		cache = context.isWrite ? context.topCache : lastCache;
4787 
4788 		// allocate a clean page
4789 		page = vm_page_allocate_page(PAGE_STATE_CLEAR, true);
4790 		FTRACE(("vm_soft_fault: just allocated page 0x%lx\n",
4791 			page->physical_page_number));
4792 
4793 		// insert the new page into our cache
4794 		cache->InsertPage(page, context.cacheOffset);
4795 
4796 	} else if (page->cache != context.topCache && context.isWrite) {
4797 		// We have a page that has the data we want, but in the wrong cache
4798 		// object so we need to copy it and stick it into the top cache.
4799 		vm_page* sourcePage = page;
4800 
4801 		// TODO: If memory is low, it might be a good idea to steal the page
4802 		// from our source cache -- if possible, that is.
4803 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4804 		page = vm_page_allocate_page(PAGE_STATE_FREE, true);
4805 
4806 		// copy the page
4807 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4808 			sourcePage->physical_page_number * B_PAGE_SIZE);
4809 
4810 		// insert the new page into our cache
4811 		context.topCache->InsertPage(page, context.cacheOffset);
4812 	}
4813 
4814 	context.page = page;
4815 	return B_OK;
4816 }
4817 
4818 
4819 static status_t
4820 vm_soft_fault(vm_address_space* addressSpace, addr_t originalAddress,
4821 	bool isWrite, bool isUser)
4822 {
4823 	FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n",
4824 		thread_get_current_thread_id(), originalAddress, isWrite, isUser));
4825 
4826 	PageFaultContext context(addressSpace, isWrite);
4827 
4828 	addr_t address = ROUNDOWN(originalAddress, B_PAGE_SIZE);
4829 	status_t status = B_OK;
4830 
4831 	atomic_add(&addressSpace->fault_count, 1);
4832 
4833 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4834 	// the pages upfront makes sure we don't have any cache locked, so that the
4835 	// page daemon/thief can do their job without problems.
4836 	size_t reservePages = 2 + context.map->ops->map_max_pages_need(context.map,
4837 		originalAddress, originalAddress);
4838 	context.addressSpaceLocker.Unlock();
4839 	vm_page_reserve_pages(reservePages);
4840 
4841 	while (true) {
4842 		context.addressSpaceLocker.Lock();
4843 
4844 		// get the area the fault was in
4845 		vm_area* area = vm_area_lookup(addressSpace, address);
4846 		if (area == NULL) {
4847 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4848 				"space\n", originalAddress);
4849 			TPF(PageFaultError(-1,
4850 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4851 			status = B_BAD_ADDRESS;
4852 			break;
4853 		}
4854 
4855 		// check permissions
4856 		uint32 protection = get_area_page_protection(area, address);
4857 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4858 			dprintf("user access on kernel area 0x%lx at %p\n", area->id,
4859 				(void*)originalAddress);
4860 			TPF(PageFaultError(area->id,
4861 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4862 			status = B_PERMISSION_DENIED;
4863 			break;
4864 		}
4865 		if (isWrite && (protection
4866 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4867 			dprintf("write access attempted on read-only area 0x%lx at %p\n",
4868 				area->id, (void*)originalAddress);
4869 			TPF(PageFaultError(area->id,
4870 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_ONLY));
4871 			status = B_PERMISSION_DENIED;
4872 			break;
4873 		}
4874 
4875 		// We have the area, it was a valid access, so let's try to resolve the
4876 		// page fault now.
4877 		// At first, the top most cache from the area is investigated.
4878 
4879 		context.Prepare(vm_area_get_locked_cache(area),
4880 			address - area->base + area->cache_offset);
4881 
4882 		// See if this cache has a fault handler -- this will do all the work
4883 		// for us.
4884 		{
4885 			// Note, since the page fault is resolved with interrupts enabled,
4886 			// the fault handler could be called more than once for the same
4887 			// reason -- the store must take this into account.
4888 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4889 			if (status != B_BAD_HANDLER)
4890 				break;
4891 		}
4892 
4893 		// The top most cache has no fault handler, so let's see if the cache or
4894 		// its sources already have the page we're searching for (we're going
4895 		// from top to bottom).
4896 		status = fault_get_page(context);
4897 		if (status != B_OK) {
4898 			TPF(PageFaultError(area->id, status));
4899 			break;
4900 		}
4901 
4902 		if (context.restart)
4903 			continue;
4904 
4905 		// All went fine, all there is left to do is to map the page into the
4906 		// address space.
4907 		TPF(PageFaultDone(area->id, context.topCache, context.page->cache,
4908 			context.page));
4909 
4910 		// If the page doesn't reside in the area's cache, we need to make sure
4911 		// it's mapped in read-only, so that we cannot overwrite someone else's
4912 		// data (copy-on-write)
4913 		uint32 newProtection = protection;
4914 		if (context.page->cache != context.topCache && !isWrite)
4915 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4916 
4917 		bool unmapPage = false;
4918 		bool mapPage = true;
4919 
4920 		// check whether there's already a page mapped at the address
4921 		context.map->ops->lock(context.map);
4922 
4923 		addr_t physicalAddress;
4924 		uint32 flags;
4925 		vm_page* mappedPage;
4926 		if (context.map->ops->query(context.map, address, &physicalAddress,
4927 				&flags) == B_OK
4928 			&& (flags & PAGE_PRESENT) != 0
4929 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4930 				!= NULL) {
4931 			// Yep there's already a page. If it's ours, we can simply adjust
4932 			// its protection. Otherwise we have to unmap it.
4933 			if (mappedPage == context.page) {
4934 				context.map->ops->protect(context.map, address,
4935 					address + (B_PAGE_SIZE - 1), newProtection);
4936 
4937 				mapPage = false;
4938 			} else
4939 				unmapPage = true;
4940 		}
4941 
4942 		context.map->ops->unlock(context.map);
4943 
4944 		if (unmapPage)
4945 			vm_unmap_page(area, address, true);
4946 
4947 		if (mapPage)
4948 			vm_map_page(area, context.page, address, newProtection);
4949 
4950 		break;
4951 	}
4952 
4953 	vm_page_unreserve_pages(reservePages);
4954 
4955 	return status;
4956 }
4957 
4958 
4959 /*! You must have the address space's sem held */
4960 vm_area*
4961 vm_area_lookup(vm_address_space* addressSpace, addr_t address)
4962 {
4963 	vm_area* area;
4964 
4965 	// check the areas list first
4966 	area = addressSpace->area_hint;
4967 	if (area != NULL
4968 		&& area->base <= address
4969 		&& area->base + (area->size - 1) >= address)
4970 		goto found;
4971 
4972 	for (area = addressSpace->areas; area != NULL;
4973 			area = area->address_space_next) {
4974 		if (area->id == RESERVED_AREA_ID)
4975 			continue;
4976 
4977 		if (area->base <= address && area->base + (area->size - 1) >= address)
4978 			break;
4979 	}
4980 
4981 found:
4982 	if (area)
4983 		addressSpace->area_hint = area;
4984 
4985 	return area;
4986 }
4987 
4988 
4989 status_t
4990 vm_get_physical_page(addr_t paddr, addr_t* _vaddr, void** _handle)
4991 {
4992 	return vm_kernel_address_space()->translation_map.ops->get_physical_page(
4993 		paddr, _vaddr, _handle);
4994 }
4995 
4996 status_t
4997 vm_put_physical_page(addr_t vaddr, void* handle)
4998 {
4999 	return vm_kernel_address_space()->translation_map.ops->put_physical_page(
5000 		vaddr, handle);
5001 }
5002 
5003 
5004 status_t
5005 vm_get_physical_page_current_cpu(addr_t paddr, addr_t* _vaddr, void** _handle)
5006 {
5007 	return vm_kernel_address_space()->translation_map.ops
5008 		->get_physical_page_current_cpu(paddr, _vaddr, _handle);
5009 }
5010 
5011 status_t
5012 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
5013 {
5014 	return vm_kernel_address_space()->translation_map.ops
5015 		->put_physical_page_current_cpu(vaddr, handle);
5016 }
5017 
5018 
5019 status_t
5020 vm_get_physical_page_debug(addr_t paddr, addr_t* _vaddr, void** _handle)
5021 {
5022 	return vm_kernel_address_space()->translation_map.ops
5023 		->get_physical_page_debug(paddr, _vaddr, _handle);
5024 }
5025 
5026 status_t
5027 vm_put_physical_page_debug(addr_t vaddr, void* handle)
5028 {
5029 	return vm_kernel_address_space()->translation_map.ops
5030 		->put_physical_page_debug(vaddr, handle);
5031 }
5032 
5033 
5034 void
5035 vm_get_info(system_memory_info* info)
5036 {
5037 	swap_get_info(info);
5038 
5039 	info->max_memory = vm_page_num_pages() * B_PAGE_SIZE;
5040 	info->page_faults = sPageFaults;
5041 
5042 	MutexLocker locker(sAvailableMemoryLock);
5043 	info->free_memory = sAvailableMemory;
5044 	info->needed_memory = sNeededMemory;
5045 }
5046 
5047 
5048 uint32
5049 vm_num_page_faults(void)
5050 {
5051 	return sPageFaults;
5052 }
5053 
5054 
5055 off_t
5056 vm_available_memory(void)
5057 {
5058 	MutexLocker locker(sAvailableMemoryLock);
5059 	return sAvailableMemory;
5060 }
5061 
5062 
5063 off_t
5064 vm_available_not_needed_memory(void)
5065 {
5066 	MutexLocker locker(sAvailableMemoryLock);
5067 	return sAvailableMemory - sNeededMemory;
5068 }
5069 
5070 
5071 void
5072 vm_unreserve_memory(size_t amount)
5073 {
5074 	mutex_lock(&sAvailableMemoryLock);
5075 
5076 	sAvailableMemory += amount;
5077 
5078 	mutex_unlock(&sAvailableMemoryLock);
5079 }
5080 
5081 
5082 status_t
5083 vm_try_reserve_memory(size_t amount, bigtime_t timeout)
5084 {
5085 	MutexLocker locker(sAvailableMemoryLock);
5086 
5087 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5088 
5089 	if (sAvailableMemory >= amount) {
5090 		sAvailableMemory -= amount;
5091 		return B_OK;
5092 	}
5093 
5094 	if (timeout <= 0)
5095 		return B_NO_MEMORY;
5096 
5097 	// turn timeout into an absolute timeout
5098 	timeout += system_time();
5099 
5100 	// loop until we've got the memory or the timeout occurs
5101 	do {
5102 		sNeededMemory += amount;
5103 
5104 		// call the low resource manager
5105 		locker.Unlock();
5106 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5107 			B_ABSOLUTE_TIMEOUT, timeout);
5108 		locker.Lock();
5109 
5110 		sNeededMemory -= amount;
5111 
5112 		if (sAvailableMemory >= amount) {
5113 			sAvailableMemory -= amount;
5114 			return B_OK;
5115 		}
5116 	} while (timeout > system_time());
5117 
5118 	return B_NO_MEMORY;
5119 }
5120 
5121 
5122 status_t
5123 vm_set_area_memory_type(area_id id, addr_t physicalBase, uint32 type)
5124 {
5125 	AddressSpaceReadLocker locker;
5126 	vm_area* area;
5127 	status_t status = locker.SetFromArea(id, area);
5128 	if (status != B_OK)
5129 		return status;
5130 
5131 	return arch_vm_set_memory_type(area, physicalBase, type);
5132 }
5133 
5134 
5135 /*!	This function enforces some protection properties:
5136 	 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well
5137 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5138 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
5139 	   and B_KERNEL_WRITE_AREA.
5140 */
5141 static void
5142 fix_protection(uint32* protection)
5143 {
5144 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5145 		if ((*protection & B_USER_PROTECTION) == 0
5146 			|| (*protection & B_WRITE_AREA) != 0)
5147 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5148 		else
5149 			*protection |= B_KERNEL_READ_AREA;
5150 	}
5151 }
5152 
5153 
5154 static void
5155 fill_area_info(struct vm_area* area, area_info* info, size_t size)
5156 {
5157 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5158 	info->area = area->id;
5159 	info->address = (void*)area->base;
5160 	info->size = area->size;
5161 	info->protection = area->protection;
5162 	info->lock = B_FULL_LOCK;
5163 	info->team = area->address_space->id;
5164 	info->copy_count = 0;
5165 	info->in_count = 0;
5166 	info->out_count = 0;
5167 		// TODO: retrieve real values here!
5168 
5169 	vm_cache* cache = vm_area_get_locked_cache(area);
5170 
5171 	// Note, this is a simplification; the cache could be larger than this area
5172 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5173 
5174 	vm_area_put_locked_cache(cache);
5175 }
5176 
5177 
5178 /*!
5179 	Tests whether or not the area that contains the specified address
5180 	needs any kind of locking, and actually exists.
5181 	Used by both lock_memory() and unlock_memory().
5182 */
5183 static status_t
5184 test_lock_memory(vm_address_space* addressSpace, addr_t address,
5185 	bool& needsLocking)
5186 {
5187 	rw_lock_read_lock(&addressSpace->lock);
5188 
5189 	vm_area* area = vm_area_lookup(addressSpace, address);
5190 	if (area != NULL) {
5191 		// This determines if we need to lock the memory at all
5192 		needsLocking = area->cache_type != CACHE_TYPE_NULL
5193 			&& area->cache_type != CACHE_TYPE_DEVICE
5194 			&& area->wiring != B_FULL_LOCK
5195 			&& area->wiring != B_CONTIGUOUS;
5196 	}
5197 
5198 	rw_lock_read_unlock(&addressSpace->lock);
5199 
5200 	if (area == NULL)
5201 		return B_BAD_ADDRESS;
5202 
5203 	return B_OK;
5204 }
5205 
5206 
5207 static status_t
5208 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5209 {
5210 	// is newSize a multiple of B_PAGE_SIZE?
5211 	if (newSize & (B_PAGE_SIZE - 1))
5212 		return B_BAD_VALUE;
5213 
5214 	// lock all affected address spaces and the cache
5215 	vm_area* area;
5216 	vm_cache* cache;
5217 
5218 	MultiAddressSpaceLocker locker;
5219 	status_t status = locker.AddAreaCacheAndLock(areaID, true, true, area,
5220 		&cache);
5221 	if (status != B_OK)
5222 		return status;
5223 	AreaCacheLocker cacheLocker(cache);	// already locked
5224 
5225 	// enforce restrictions
5226 	if (!kernel) {
5227 		if ((area->protection & B_KERNEL_AREA) != 0)
5228 			return B_NOT_ALLOWED;
5229 		// TODO: Enforce all restrictions (team, etc.)!
5230 	}
5231 
5232 	size_t oldSize = area->size;
5233 	if (newSize == oldSize)
5234 		return B_OK;
5235 
5236 	// Resize all areas of this area's cache
5237 
5238 	if (cache->type != CACHE_TYPE_RAM)
5239 		return B_NOT_ALLOWED;
5240 
5241 	if (oldSize < newSize) {
5242 		// We need to check if all areas of this cache can be resized
5243 
5244 		for (vm_area* current = cache->areas; current != NULL;
5245 				current = current->cache_next) {
5246 			vm_area* next = current->address_space_next;
5247 			if (next != NULL && next->base <= (current->base + newSize)) {
5248 				// If the area was created inside a reserved area, it can
5249 				// also be resized in that area
5250 				// TODO: if there is free space after the reserved area, it could
5251 				// be used as well...
5252 				if (next->id == RESERVED_AREA_ID
5253 					&& next->cache_offset <= current->base
5254 					&& next->base - 1 + next->size
5255 						>= current->base - 1 + newSize)
5256 					continue;
5257 
5258 				return B_ERROR;
5259 			}
5260 		}
5261 	}
5262 
5263 	// Okay, looks good so far, so let's do it
5264 
5265 	if (oldSize < newSize) {
5266 		// Growing the cache can fail, so we do it first.
5267 		status = cache->Resize(cache->virtual_base + newSize);
5268 		if (status != B_OK)
5269 			return status;
5270 	}
5271 
5272 	for (vm_area* current = cache->areas; current != NULL;
5273 			current = current->cache_next) {
5274 		vm_area* next = current->address_space_next;
5275 		if (next != NULL && next->base <= (current->base + newSize)) {
5276 			if (next->id == RESERVED_AREA_ID
5277 				&& next->cache_offset <= current->base
5278 				&& next->base - 1 + next->size >= current->base - 1 + newSize) {
5279 				// resize reserved area
5280 				addr_t offset = current->base + newSize - next->base;
5281 				if (next->size <= offset) {
5282 					current->address_space_next = next->address_space_next;
5283 					free(next);
5284 				} else {
5285 					next->size -= offset;
5286 					next->base += offset;
5287 				}
5288 			} else {
5289 				panic("resize situation for area %p has changed although we "
5290 					"should have the address space lock", current);
5291 				status = B_ERROR;
5292 				break;
5293 			}
5294 		}
5295 
5296 		current->size = newSize;
5297 
5298 		// We also need to unmap all pages beyond the new size, if the area has
5299 		// shrinked
5300 		if (newSize < oldSize) {
5301 			vm_unmap_pages(current, current->base + newSize, oldSize - newSize,
5302 				false);
5303 		}
5304 	}
5305 
5306 	// shrinking the cache can't fail, so we do it now
5307 	if (status == B_OK && newSize < oldSize)
5308 		status = cache->Resize(cache->virtual_base + newSize);
5309 
5310 	if (status < B_OK) {
5311 		// This shouldn't really be possible, but hey, who knows
5312 		for (vm_area* current = cache->areas; current != NULL;
5313 				current = current->cache_next) {
5314 			current->size = oldSize;
5315 		}
5316 
5317 		cache->Resize(cache->virtual_base + oldSize);
5318 	}
5319 
5320 	// TODO: we must honour the lock restrictions of this area
5321 	return status;
5322 }
5323 
5324 
5325 status_t
5326 vm_memset_physical(addr_t address, int value, size_t length)
5327 {
5328 	return vm_kernel_address_space()->translation_map.ops->memset_physical(
5329 		address, value, length);
5330 }
5331 
5332 
5333 status_t
5334 vm_memcpy_from_physical(void* to, addr_t from, size_t length, bool user)
5335 {
5336 	return vm_kernel_address_space()->translation_map.ops->memcpy_from_physical(
5337 		to, from, length, user);
5338 }
5339 
5340 
5341 status_t
5342 vm_memcpy_to_physical(addr_t to, const void* _from, size_t length, bool user)
5343 {
5344 	return vm_kernel_address_space()->translation_map.ops->memcpy_to_physical(
5345 		to, _from, length, user);
5346 }
5347 
5348 
5349 void
5350 vm_memcpy_physical_page(addr_t to, addr_t from)
5351 {
5352 	return vm_kernel_address_space()->translation_map.ops->memcpy_physical_page(
5353 		to, from);
5354 }
5355 
5356 
5357 //	#pragma mark - kernel public API
5358 
5359 
5360 status_t
5361 user_memcpy(void* to, const void* from, size_t size)
5362 {
5363 	if (arch_cpu_user_memcpy(to, from, size,
5364 			&thread_get_current_thread()->fault_handler) < B_OK)
5365 		return B_BAD_ADDRESS;
5366 
5367 	return B_OK;
5368 }
5369 
5370 
5371 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5372 	the string in \a to, NULL-terminating the result.
5373 
5374 	\param to Pointer to the destination C-string.
5375 	\param from Pointer to the source C-string.
5376 	\param size Size in bytes of the string buffer pointed to by \a to.
5377 
5378 	\return strlen(\a from).
5379 */
5380 ssize_t
5381 user_strlcpy(char* to, const char* from, size_t size)
5382 {
5383 	return arch_cpu_user_strlcpy(to, from, size,
5384 		&thread_get_current_thread()->fault_handler);
5385 }
5386 
5387 
5388 status_t
5389 user_memset(void* s, char c, size_t count)
5390 {
5391 	if (arch_cpu_user_memset(s, c, count,
5392 			&thread_get_current_thread()->fault_handler) < B_OK)
5393 		return B_BAD_ADDRESS;
5394 
5395 	return B_OK;
5396 }
5397 
5398 
5399 status_t
5400 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5401 {
5402 	vm_address_space* addressSpace = NULL;
5403 	struct vm_translation_map* map;
5404 	addr_t unalignedBase = (addr_t)address;
5405 	addr_t end = unalignedBase + numBytes;
5406 	addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE);
5407 	bool isUser = IS_USER_ADDRESS(address);
5408 	bool needsLocking = true;
5409 
5410 	if (isUser) {
5411 		if (team == B_CURRENT_TEAM)
5412 			addressSpace = vm_get_current_user_address_space();
5413 		else
5414 			addressSpace = vm_get_address_space(team);
5415 	} else
5416 		addressSpace = vm_get_kernel_address_space();
5417 	if (addressSpace == NULL)
5418 		return B_ERROR;
5419 
5420 	// test if we're on an area that allows faults at all
5421 
5422 	map = &addressSpace->translation_map;
5423 
5424 	status_t status = test_lock_memory(addressSpace, base, needsLocking);
5425 	if (status < B_OK)
5426 		goto out;
5427 	if (!needsLocking)
5428 		goto out;
5429 
5430 	for (; base < end; base += B_PAGE_SIZE) {
5431 		addr_t physicalAddress;
5432 		uint32 protection;
5433 		status_t status;
5434 
5435 		map->ops->lock(map);
5436 		status = map->ops->query(map, base, &physicalAddress, &protection);
5437 		map->ops->unlock(map);
5438 
5439 		if (status < B_OK)
5440 			goto out;
5441 
5442 		if ((protection & PAGE_PRESENT) != 0) {
5443 			// if B_READ_DEVICE is set, the caller intents to write to the locked
5444 			// memory, so if it hasn't been mapped writable, we'll try the soft
5445 			// fault anyway
5446 			if ((flags & B_READ_DEVICE) == 0
5447 				|| (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
5448 				// update wiring
5449 				vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
5450 				if (page == NULL)
5451 					panic("couldn't lookup physical page just allocated\n");
5452 
5453 				increment_page_wired_count(page);
5454 				continue;
5455 			}
5456 		}
5457 
5458 		status = vm_soft_fault(addressSpace, base, (flags & B_READ_DEVICE) != 0,
5459 			isUser);
5460 		if (status != B_OK)	{
5461 			dprintf("lock_memory(address = %p, numBytes = %lu, flags = %lu) "
5462 				"failed: %s\n", (void*)unalignedBase, numBytes, flags,
5463 				strerror(status));
5464 			goto out;
5465 		}
5466 
5467 		// TODO: Here's a race condition. We should probably add a parameter
5468 		// to vm_soft_fault() that would cause the page's wired count to be
5469 		// incremented immediately.
5470 		// TODO: After memory has been locked in an area, we need to prevent the
5471 		// area from being deleted, resized, cut, etc. That could be done using
5472 		// a "locked pages" count in vm_area, and maybe a condition variable, if
5473 		// we want to allow waiting for the area to become eligible for these
5474 		// operations again.
5475 
5476 		map->ops->lock(map);
5477 		status = map->ops->query(map, base, &physicalAddress, &protection);
5478 		map->ops->unlock(map);
5479 
5480 		if (status < B_OK)
5481 			goto out;
5482 
5483 		// update wiring
5484 		vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
5485 		if (page == NULL)
5486 			panic("couldn't lookup physical page");
5487 
5488 		increment_page_wired_count(page);
5489 			// TODO: needs to be atomic on all platforms!
5490 	}
5491 
5492 out:
5493 	vm_put_address_space(addressSpace);
5494 	return status;
5495 }
5496 
5497 
5498 status_t
5499 lock_memory(void* address, size_t numBytes, uint32 flags)
5500 {
5501 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5502 }
5503 
5504 
5505 status_t
5506 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5507 {
5508 	vm_address_space* addressSpace = NULL;
5509 	struct vm_translation_map* map;
5510 	addr_t unalignedBase = (addr_t)address;
5511 	addr_t end = unalignedBase + numBytes;
5512 	addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE);
5513 	bool needsLocking = true;
5514 
5515 	if (IS_USER_ADDRESS(address)) {
5516 		if (team == B_CURRENT_TEAM)
5517 			addressSpace = vm_get_current_user_address_space();
5518 		else
5519 			addressSpace = vm_get_address_space(team);
5520 	} else
5521 		addressSpace = vm_get_kernel_address_space();
5522 	if (addressSpace == NULL)
5523 		return B_ERROR;
5524 
5525 	map = &addressSpace->translation_map;
5526 
5527 	status_t status = test_lock_memory(addressSpace, base, needsLocking);
5528 	if (status < B_OK)
5529 		goto out;
5530 	if (!needsLocking)
5531 		goto out;
5532 
5533 	for (; base < end; base += B_PAGE_SIZE) {
5534 		map->ops->lock(map);
5535 
5536 		addr_t physicalAddress;
5537 		uint32 protection;
5538 		status = map->ops->query(map, base, &physicalAddress,
5539 			&protection);
5540 
5541 		map->ops->unlock(map);
5542 
5543 		if (status < B_OK)
5544 			goto out;
5545 		if ((protection & PAGE_PRESENT) == 0)
5546 			panic("calling unlock_memory() on unmapped memory!");
5547 
5548 		// update wiring
5549 		vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
5550 		if (page == NULL)
5551 			panic("couldn't lookup physical page");
5552 
5553 		decrement_page_wired_count(page);
5554 	}
5555 
5556 out:
5557 	vm_put_address_space(addressSpace);
5558 	return status;
5559 }
5560 
5561 
5562 status_t
5563 unlock_memory(void* address, size_t numBytes, uint32 flags)
5564 {
5565 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5566 }
5567 
5568 
5569 /*!	Similar to get_memory_map(), but also allows to specify the address space
5570 	for the memory in question and has a saner semantics.
5571 	Returns \c B_OK when the complete range could be translated or
5572 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5573 	case the actual number of entries is written to \c *_numEntries. Any other
5574 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5575 	in this case.
5576 */
5577 status_t
5578 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5579 	physical_entry* table, uint32* _numEntries)
5580 {
5581 	uint32 numEntries = *_numEntries;
5582 	*_numEntries = 0;
5583 
5584 	vm_address_space* addressSpace;
5585 	addr_t virtualAddress = (addr_t)address;
5586 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5587 	addr_t physicalAddress;
5588 	status_t status = B_OK;
5589 	int32 index = -1;
5590 	addr_t offset = 0;
5591 	bool interrupts = are_interrupts_enabled();
5592 
5593 	TRACE(("get_memory_map_etc(%ld, %p, %lu bytes, %ld entries)\n", team,
5594 		address, numBytes, numEntries));
5595 
5596 	if (numEntries == 0 || numBytes == 0)
5597 		return B_BAD_VALUE;
5598 
5599 	// in which address space is the address to be found?
5600 	if (IS_USER_ADDRESS(virtualAddress)) {
5601 		if (team == B_CURRENT_TEAM)
5602 			addressSpace = vm_get_current_user_address_space();
5603 		else
5604 			addressSpace = vm_get_address_space(team);
5605 	} else
5606 		addressSpace = vm_get_kernel_address_space();
5607 
5608 	if (addressSpace == NULL)
5609 		return B_ERROR;
5610 
5611 	vm_translation_map* map = &addressSpace->translation_map;
5612 
5613 	if (interrupts)
5614 		map->ops->lock(map);
5615 
5616 	while (offset < numBytes) {
5617 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5618 		uint32 flags;
5619 
5620 		if (interrupts) {
5621 			status = map->ops->query(map, (addr_t)address + offset,
5622 				&physicalAddress, &flags);
5623 		} else {
5624 			status = map->ops->query_interrupt(map, (addr_t)address + offset,
5625 				&physicalAddress, &flags);
5626 		}
5627 		if (status < B_OK)
5628 			break;
5629 		if ((flags & PAGE_PRESENT) == 0) {
5630 			panic("get_memory_map() called on unmapped memory!");
5631 			return B_BAD_ADDRESS;
5632 		}
5633 
5634 		if (index < 0 && pageOffset > 0) {
5635 			physicalAddress += pageOffset;
5636 			if (bytes > B_PAGE_SIZE - pageOffset)
5637 				bytes = B_PAGE_SIZE - pageOffset;
5638 		}
5639 
5640 		// need to switch to the next physical_entry?
5641 		if (index < 0 || (addr_t)table[index].address
5642 				!= physicalAddress - table[index].size) {
5643 			if ((uint32)++index + 1 > numEntries) {
5644 				// table to small
5645 				status = B_BUFFER_OVERFLOW;
5646 				break;
5647 			}
5648 			table[index].address = (void*)physicalAddress;
5649 			table[index].size = bytes;
5650 		} else {
5651 			// page does fit in current entry
5652 			table[index].size += bytes;
5653 		}
5654 
5655 		offset += bytes;
5656 	}
5657 
5658 	if (interrupts)
5659 		map->ops->unlock(map);
5660 
5661 	if (status != B_OK)
5662 		return status;
5663 
5664 	if ((uint32)index + 1 > numEntries) {
5665 		*_numEntries = index;
5666 		return B_BUFFER_OVERFLOW;
5667 	}
5668 
5669 	*_numEntries = index + 1;
5670 	return B_OK;
5671 }
5672 
5673 
5674 /*!	According to the BeBook, this function should always succeed.
5675 	This is no longer the case.
5676 */
5677 long
5678 get_memory_map(const void* address, ulong numBytes, physical_entry* table,
5679 	long numEntries)
5680 {
5681 	uint32 entriesRead = numEntries;
5682 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5683 		table, &entriesRead);
5684 	if (error != B_OK)
5685 		return error;
5686 
5687 	// close the entry list
5688 
5689 	// if it's only one entry, we will silently accept the missing ending
5690 	if (numEntries == 1)
5691 		return B_OK;
5692 
5693 	if (entriesRead + 1 > (uint32)numEntries)
5694 		return B_BUFFER_OVERFLOW;
5695 
5696 	table[entriesRead].address = NULL;
5697 	table[entriesRead].size = 0;
5698 
5699 	return B_OK;
5700 }
5701 
5702 
5703 area_id
5704 area_for(void* address)
5705 {
5706 	team_id space;
5707 
5708 	if (IS_USER_ADDRESS(address)) {
5709 		// we try the user team address space, if any
5710 		space = vm_current_user_address_space_id();
5711 		if (space < B_OK)
5712 			return space;
5713 	} else
5714 		space = vm_kernel_address_space_id();
5715 
5716 	return vm_area_for(space, (addr_t)address);
5717 }
5718 
5719 
5720 area_id
5721 find_area(const char* name)
5722 {
5723 	rw_lock_read_lock(&sAreaHashLock);
5724 	struct hash_iterator iterator;
5725 	hash_open(sAreaHash, &iterator);
5726 
5727 	vm_area* area;
5728 	area_id id = B_NAME_NOT_FOUND;
5729 	while ((area = (vm_area*)hash_next(sAreaHash, &iterator)) != NULL) {
5730 		if (area->id == RESERVED_AREA_ID)
5731 			continue;
5732 
5733 		if (!strcmp(area->name, name)) {
5734 			id = area->id;
5735 			break;
5736 		}
5737 	}
5738 
5739 	hash_close(sAreaHash, &iterator, false);
5740 	rw_lock_read_unlock(&sAreaHashLock);
5741 
5742 	return id;
5743 }
5744 
5745 
5746 status_t
5747 _get_area_info(area_id id, area_info* info, size_t size)
5748 {
5749 	if (size != sizeof(area_info) || info == NULL)
5750 		return B_BAD_VALUE;
5751 
5752 	AddressSpaceReadLocker locker;
5753 	vm_area* area;
5754 	status_t status = locker.SetFromArea(id, area);
5755 	if (status != B_OK)
5756 		return status;
5757 
5758 	fill_area_info(area, info, size);
5759 	return B_OK;
5760 }
5761 
5762 
5763 status_t
5764 _get_next_area_info(team_id team, int32* cookie, area_info* info, size_t size)
5765 {
5766 	addr_t nextBase = *(addr_t*)cookie;
5767 
5768 	// we're already through the list
5769 	if (nextBase == (addr_t)-1)
5770 		return B_ENTRY_NOT_FOUND;
5771 
5772 	if (team == B_CURRENT_TEAM)
5773 		team = team_get_current_team_id();
5774 
5775 	AddressSpaceReadLocker locker(team);
5776 	if (!locker.IsLocked())
5777 		return B_BAD_TEAM_ID;
5778 
5779 	vm_area* area;
5780 	for (area = locker.AddressSpace()->areas; area != NULL;
5781 			area = area->address_space_next) {
5782 		if (area->id == RESERVED_AREA_ID)
5783 			continue;
5784 
5785 		if (area->base > nextBase)
5786 			break;
5787 	}
5788 
5789 	if (area == NULL) {
5790 		nextBase = (addr_t)-1;
5791 		return B_ENTRY_NOT_FOUND;
5792 	}
5793 
5794 	fill_area_info(area, info, size);
5795 	*cookie = (int32)(area->base);
5796 
5797 	return B_OK;
5798 }
5799 
5800 
5801 status_t
5802 set_area_protection(area_id area, uint32 newProtection)
5803 {
5804 	fix_protection(&newProtection);
5805 
5806 	return vm_set_area_protection(vm_kernel_address_space_id(), area,
5807 		newProtection, true);
5808 }
5809 
5810 
5811 status_t
5812 resize_area(area_id areaID, size_t newSize)
5813 {
5814 	return vm_resize_area(areaID, newSize, true);
5815 }
5816 
5817 
5818 /*!	Transfers the specified area to a new team. The caller must be the owner
5819 	of the area (not yet enforced but probably should be).
5820 	This function is currently not exported to the kernel namespace, but is
5821 	only accessible using the _kern_transfer_area() syscall.
5822 */
5823 static area_id
5824 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
5825 	bool kernel)
5826 {
5827 	area_info info;
5828 	status_t status = get_area_info(id, &info);
5829 	if (status < B_OK)
5830 		return status;
5831 
5832 	area_id clonedArea = vm_clone_area(target, info.name, _address,
5833 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
5834 	if (clonedArea < B_OK)
5835 		return clonedArea;
5836 
5837 	status = vm_delete_area(info.team, id, kernel);
5838 	if (status < B_OK) {
5839 		vm_delete_area(target, clonedArea, kernel);
5840 		return status;
5841 	}
5842 
5843 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
5844 
5845 	return clonedArea;
5846 }
5847 
5848 
5849 area_id
5850 map_physical_memory(const char* name, void* physicalAddress, size_t numBytes,
5851 	uint32 addressSpec, uint32 protection, void** _virtualAddress)
5852 {
5853 	if (!arch_vm_supports_protection(protection))
5854 		return B_NOT_SUPPORTED;
5855 
5856 	fix_protection(&protection);
5857 
5858 	return vm_map_physical_memory(vm_kernel_address_space_id(), name,
5859 		_virtualAddress, addressSpec, numBytes, protection,
5860 		(addr_t)physicalAddress);
5861 }
5862 
5863 
5864 area_id
5865 clone_area(const char* name, void** _address, uint32 addressSpec,
5866 	uint32 protection, area_id source)
5867 {
5868 	if ((protection & B_KERNEL_PROTECTION) == 0)
5869 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5870 
5871 	return vm_clone_area(vm_kernel_address_space_id(), name, _address,
5872 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
5873 }
5874 
5875 
5876 area_id
5877 create_area_etc(team_id team, const char* name, void** address,
5878 	uint32 addressSpec, uint32 size, uint32 lock, uint32 protection,
5879 	uint32 flags)
5880 {
5881 	fix_protection(&protection);
5882 
5883 	return vm_create_anonymous_area(team, (char*)name, address, addressSpec,
5884 		size, lock, protection, flags, true);
5885 }
5886 
5887 
5888 area_id
5889 create_area(const char* name, void** _address, uint32 addressSpec, size_t size,
5890 	uint32 lock, uint32 protection)
5891 {
5892 	fix_protection(&protection);
5893 
5894 	return vm_create_anonymous_area(vm_kernel_address_space_id(), (char*)name,
5895 		_address, addressSpec, size, lock, protection, 0, true);
5896 }
5897 
5898 
5899 status_t
5900 delete_area(area_id area)
5901 {
5902 	return vm_delete_area(vm_kernel_address_space_id(), area, true);
5903 }
5904 
5905 
5906 //	#pragma mark - Userland syscalls
5907 
5908 
5909 status_t
5910 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, addr_t size)
5911 {
5912 	// filter out some unavailable values (for userland)
5913 	switch (addressSpec) {
5914 		case B_ANY_KERNEL_ADDRESS:
5915 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5916 			return B_BAD_VALUE;
5917 	}
5918 
5919 	addr_t address;
5920 
5921 	if (!IS_USER_ADDRESS(userAddress)
5922 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
5923 		return B_BAD_ADDRESS;
5924 
5925 	status_t status = vm_reserve_address_range(
5926 		vm_current_user_address_space_id(), (void**)&address, addressSpec, size,
5927 		RESERVED_AVOID_BASE);
5928 	if (status != B_OK)
5929 		return status;
5930 
5931 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
5932 		vm_unreserve_address_range(vm_current_user_address_space_id(),
5933 			(void*)address, size);
5934 		return B_BAD_ADDRESS;
5935 	}
5936 
5937 	return B_OK;
5938 }
5939 
5940 
5941 status_t
5942 _user_unreserve_address_range(addr_t address, addr_t size)
5943 {
5944 	return vm_unreserve_address_range(vm_current_user_address_space_id(),
5945 		(void*)address, size);
5946 }
5947 
5948 
5949 area_id
5950 _user_area_for(void* address)
5951 {
5952 	return vm_area_for(vm_current_user_address_space_id(), (addr_t)address);
5953 }
5954 
5955 
5956 area_id
5957 _user_find_area(const char* userName)
5958 {
5959 	char name[B_OS_NAME_LENGTH];
5960 
5961 	if (!IS_USER_ADDRESS(userName)
5962 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
5963 		return B_BAD_ADDRESS;
5964 
5965 	return find_area(name);
5966 }
5967 
5968 
5969 status_t
5970 _user_get_area_info(area_id area, area_info* userInfo)
5971 {
5972 	if (!IS_USER_ADDRESS(userInfo))
5973 		return B_BAD_ADDRESS;
5974 
5975 	area_info info;
5976 	status_t status = get_area_info(area, &info);
5977 	if (status < B_OK)
5978 		return status;
5979 
5980 	// TODO: do we want to prevent userland from seeing kernel protections?
5981 	//info.protection &= B_USER_PROTECTION;
5982 
5983 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5984 		return B_BAD_ADDRESS;
5985 
5986 	return status;
5987 }
5988 
5989 
5990 status_t
5991 _user_get_next_area_info(team_id team, int32* userCookie, area_info* userInfo)
5992 {
5993 	int32 cookie;
5994 
5995 	if (!IS_USER_ADDRESS(userCookie)
5996 		|| !IS_USER_ADDRESS(userInfo)
5997 		|| user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK)
5998 		return B_BAD_ADDRESS;
5999 
6000 	area_info info;
6001 	status_t status = _get_next_area_info(team, &cookie, &info,
6002 		sizeof(area_info));
6003 	if (status != B_OK)
6004 		return status;
6005 
6006 	//info.protection &= B_USER_PROTECTION;
6007 
6008 	if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK
6009 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6010 		return B_BAD_ADDRESS;
6011 
6012 	return status;
6013 }
6014 
6015 
6016 status_t
6017 _user_set_area_protection(area_id area, uint32 newProtection)
6018 {
6019 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6020 		return B_BAD_VALUE;
6021 
6022 	fix_protection(&newProtection);
6023 
6024 	return vm_set_area_protection(vm_current_user_address_space_id(), area,
6025 		newProtection, false);
6026 }
6027 
6028 
6029 status_t
6030 _user_resize_area(area_id area, size_t newSize)
6031 {
6032 	// TODO: Since we restrict deleting of areas to those owned by the team,
6033 	// we should also do that for resizing (check other functions, too).
6034 	return vm_resize_area(area, newSize, false);
6035 }
6036 
6037 
6038 area_id
6039 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6040 	team_id target)
6041 {
6042 	// filter out some unavailable values (for userland)
6043 	switch (addressSpec) {
6044 		case B_ANY_KERNEL_ADDRESS:
6045 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6046 			return B_BAD_VALUE;
6047 	}
6048 
6049 	void* address;
6050 	if (!IS_USER_ADDRESS(userAddress)
6051 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6052 		return B_BAD_ADDRESS;
6053 
6054 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6055 	if (newArea < B_OK)
6056 		return newArea;
6057 
6058 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6059 		return B_BAD_ADDRESS;
6060 
6061 	return newArea;
6062 }
6063 
6064 
6065 area_id
6066 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6067 	uint32 protection, area_id sourceArea)
6068 {
6069 	char name[B_OS_NAME_LENGTH];
6070 	void* address;
6071 
6072 	// filter out some unavailable values (for userland)
6073 	switch (addressSpec) {
6074 		case B_ANY_KERNEL_ADDRESS:
6075 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6076 			return B_BAD_VALUE;
6077 	}
6078 	if ((protection & ~B_USER_PROTECTION) != 0)
6079 		return B_BAD_VALUE;
6080 
6081 	if (!IS_USER_ADDRESS(userName)
6082 		|| !IS_USER_ADDRESS(userAddress)
6083 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6084 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6085 		return B_BAD_ADDRESS;
6086 
6087 	fix_protection(&protection);
6088 
6089 	area_id clonedArea = vm_clone_area(vm_current_user_address_space_id(), name,
6090 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6091 		false);
6092 	if (clonedArea < B_OK)
6093 		return clonedArea;
6094 
6095 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6096 		delete_area(clonedArea);
6097 		return B_BAD_ADDRESS;
6098 	}
6099 
6100 	return clonedArea;
6101 }
6102 
6103 
6104 area_id
6105 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6106 	size_t size, uint32 lock, uint32 protection)
6107 {
6108 	char name[B_OS_NAME_LENGTH];
6109 	void* address;
6110 
6111 	// filter out some unavailable values (for userland)
6112 	switch (addressSpec) {
6113 		case B_ANY_KERNEL_ADDRESS:
6114 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6115 			return B_BAD_VALUE;
6116 	}
6117 	if ((protection & ~B_USER_PROTECTION) != 0)
6118 		return B_BAD_VALUE;
6119 
6120 	if (!IS_USER_ADDRESS(userName)
6121 		|| !IS_USER_ADDRESS(userAddress)
6122 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6123 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6124 		return B_BAD_ADDRESS;
6125 
6126 	if (addressSpec == B_EXACT_ADDRESS
6127 		&& IS_KERNEL_ADDRESS(address))
6128 		return B_BAD_VALUE;
6129 
6130 	fix_protection(&protection);
6131 
6132 	area_id area = vm_create_anonymous_area(vm_current_user_address_space_id(),
6133 		(char*)name, &address, addressSpec, size, lock, protection, 0, false);
6134 
6135 	if (area >= B_OK
6136 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6137 		delete_area(area);
6138 		return B_BAD_ADDRESS;
6139 	}
6140 
6141 	return area;
6142 }
6143 
6144 
6145 status_t
6146 _user_delete_area(area_id area)
6147 {
6148 	// Unlike the BeOS implementation, you can now only delete areas
6149 	// that you have created yourself from userland.
6150 	// The documentation to delete_area() explicitly states that this
6151 	// will be restricted in the future, and so it will.
6152 	return vm_delete_area(vm_current_user_address_space_id(), area, false);
6153 }
6154 
6155 
6156 // TODO: create a BeOS style call for this!
6157 
6158 area_id
6159 _user_map_file(const char* userName, void** userAddress, int addressSpec,
6160 	size_t size, int protection, int mapping, bool unmapAddressRange, int fd,
6161 	off_t offset)
6162 {
6163 	char name[B_OS_NAME_LENGTH];
6164 	void* address;
6165 	area_id area;
6166 
6167 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6168 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6169 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6170 		return B_BAD_ADDRESS;
6171 
6172 	if (addressSpec == B_EXACT_ADDRESS) {
6173 		if ((addr_t)address + size < (addr_t)address)
6174 			return B_BAD_VALUE;
6175 		if (!IS_USER_ADDRESS(address)
6176 				|| !IS_USER_ADDRESS((addr_t)address + size)) {
6177 			return B_BAD_ADDRESS;
6178 		}
6179 	}
6180 
6181 	// userland created areas can always be accessed by the kernel
6182 	protection |= B_KERNEL_READ_AREA
6183 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
6184 
6185 	area = _vm_map_file(vm_current_user_address_space_id(), name, &address,
6186 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6187 		false);
6188 	if (area < B_OK)
6189 		return area;
6190 
6191 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6192 		return B_BAD_ADDRESS;
6193 
6194 	return area;
6195 }
6196 
6197 
6198 status_t
6199 _user_unmap_memory(void* _address, size_t size)
6200 {
6201 	addr_t address = (addr_t)_address;
6202 
6203 	// check params
6204 	if (size == 0 || (addr_t)address + size < (addr_t)address)
6205 		return B_BAD_VALUE;
6206 
6207 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6208 		return B_BAD_ADDRESS;
6209 
6210 	// write lock the address space
6211 	AddressSpaceWriteLocker locker;
6212 	status_t status = locker.SetTo(team_get_current_team_id());
6213 	if (status != B_OK)
6214 		return status;
6215 
6216 	// unmap
6217 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6218 }
6219 
6220 
6221 status_t
6222 _user_set_memory_protection(void* _address, size_t size, int protection)
6223 {
6224 	// check address range
6225 	addr_t address = (addr_t)_address;
6226 	size = PAGE_ALIGN(size);
6227 
6228 	if ((address % B_PAGE_SIZE) != 0)
6229 		return B_BAD_VALUE;
6230 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6231 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6232 		// weird error code required by POSIX
6233 		return ENOMEM;
6234 	}
6235 
6236 	// extend and check protection
6237 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
6238 	uint32 actualProtection = protection | B_KERNEL_READ_AREA
6239 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
6240 
6241 	if (!arch_vm_supports_protection(actualProtection))
6242 		return B_NOT_SUPPORTED;
6243 
6244 	// We need to write lock the address space, since we're going to play with
6245 	// the areas.
6246 	AddressSpaceWriteLocker locker;
6247 	status_t status = locker.SetTo(team_get_current_team_id());
6248 	if (status != B_OK)
6249 		return status;
6250 
6251 	// First round: Check whether the whole range is covered by areas and we are
6252 	// allowed to modify them.
6253 	addr_t currentAddress = address;
6254 	size_t sizeLeft = size;
6255 	while (sizeLeft > 0) {
6256 		vm_area* area = vm_area_lookup(locker.AddressSpace(), currentAddress);
6257 		if (area == NULL)
6258 			return B_NO_MEMORY;
6259 
6260 		if ((area->protection & B_KERNEL_AREA) != 0)
6261 			return B_NOT_ALLOWED;
6262 
6263 		// TODO: For (shared) mapped files we should check whether the new
6264 		// protections are compatible with the file permissions. We don't have
6265 		// a way to do that yet, though.
6266 
6267 		addr_t offset = currentAddress - area->base;
6268 		size_t rangeSize = min_c(area->size - offset, sizeLeft);
6269 
6270 		currentAddress += rangeSize;
6271 		sizeLeft -= rangeSize;
6272 	}
6273 
6274 	// Second round: If the protections differ from that of the area, create a
6275 	// page protection array and re-map mapped pages.
6276 	vm_translation_map* map = &locker.AddressSpace()->translation_map;
6277 	currentAddress = address;
6278 	sizeLeft = size;
6279 	while (sizeLeft > 0) {
6280 		vm_area* area = vm_area_lookup(locker.AddressSpace(), currentAddress);
6281 		if (area == NULL)
6282 			return B_NO_MEMORY;
6283 
6284 		addr_t offset = currentAddress - area->base;
6285 		size_t rangeSize = min_c(area->size - offset, sizeLeft);
6286 
6287 		currentAddress += rangeSize;
6288 		sizeLeft -= rangeSize;
6289 
6290 		if (area->page_protections == NULL) {
6291 			if (area->protection == actualProtection)
6292 				continue;
6293 
6294 			// In the page protections we store only the three user protections,
6295 			// so we use 4 bits per page.
6296 			uint32 bytes = (area->size / B_PAGE_SIZE + 1) / 2;
6297 			area->page_protections = (uint8*)malloc(bytes);
6298 			if (area->page_protections == NULL)
6299 				return B_NO_MEMORY;
6300 
6301 			// init the page protections for all pages to that of the area
6302 			uint32 areaProtection = area->protection
6303 				& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
6304 			memset(area->page_protections,
6305 				areaProtection | (areaProtection << 4), bytes);
6306 		}
6307 
6308 		for (addr_t pageAddress = area->base + offset;
6309 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6310 			map->ops->lock(map);
6311 
6312 			set_area_page_protection(area, pageAddress, protection);
6313 
6314 			addr_t physicalAddress;
6315 			uint32 flags;
6316 
6317 			status_t error = map->ops->query(map, pageAddress, &physicalAddress,
6318 				&flags);
6319 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6320 				map->ops->unlock(map);
6321 				continue;
6322 			}
6323 
6324 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6325 			if (page == NULL) {
6326 				panic("area %p looking up page failed for pa 0x%lx\n", area,
6327 					physicalAddress);
6328 				map->ops->unlock(map);
6329 				return B_ERROR;;
6330 			}
6331 
6332 			// If the page is not in the topmost cache and write access is
6333 			// requested, we have to unmap it. Otherwise we can re-map it with
6334 			// the new protection.
6335 			bool unmapPage = page->cache != area->cache
6336 				&& (protection & B_WRITE_AREA) != 0;
6337 
6338 			if (!unmapPage) {
6339 				map->ops->unmap(map, pageAddress,
6340 					pageAddress + B_PAGE_SIZE - 1);
6341 				map->ops->map(map, pageAddress, physicalAddress,
6342 					actualProtection);
6343 			}
6344 
6345 			map->ops->unlock(map);
6346 
6347 			if (unmapPage)
6348 				vm_unmap_page(area, pageAddress, true);
6349 		}
6350 	}
6351 
6352 	return B_OK;
6353 }
6354 
6355 
6356 status_t
6357 _user_sync_memory(void* _address, size_t size, int flags)
6358 {
6359 	addr_t address = (addr_t)_address;
6360 	size = PAGE_ALIGN(size);
6361 
6362 	// check params
6363 	if ((address % B_PAGE_SIZE) != 0)
6364 		return B_BAD_VALUE;
6365 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6366 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6367 		// weird error code required by POSIX
6368 		return ENOMEM;
6369 	}
6370 
6371 	bool writeSync = (flags & MS_SYNC) != 0;
6372 	bool writeAsync = (flags & MS_ASYNC) != 0;
6373 	if (writeSync && writeAsync)
6374 		return B_BAD_VALUE;
6375 
6376 	if (size == 0 || (!writeSync && !writeAsync))
6377 		return B_OK;
6378 
6379 	// iterate through the range and sync all concerned areas
6380 	while (size > 0) {
6381 		// read lock the address space
6382 		AddressSpaceReadLocker locker;
6383 		status_t error = locker.SetTo(team_get_current_team_id());
6384 		if (error != B_OK)
6385 			return error;
6386 
6387 		// get the first area
6388 		vm_area* area = vm_area_lookup(locker.AddressSpace(), address);
6389 		if (area == NULL)
6390 			return B_NO_MEMORY;
6391 
6392 		uint32 offset = address - area->base;
6393 		size_t rangeSize = min_c(area->size - offset, size);
6394 		offset += area->cache_offset;
6395 
6396 		// lock the cache
6397 		AreaCacheLocker cacheLocker(area);
6398 		if (!cacheLocker)
6399 			return B_BAD_VALUE;
6400 		vm_cache* cache = area->cache;
6401 
6402 		locker.Unlock();
6403 
6404 		uint32 firstPage = offset >> PAGE_SHIFT;
6405 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6406 
6407 		// write the pages
6408 		if (cache->type == CACHE_TYPE_VNODE) {
6409 			if (writeSync) {
6410 				// synchronous
6411 				error = vm_page_write_modified_page_range(cache, firstPage,
6412 					endPage);
6413 				if (error != B_OK)
6414 					return error;
6415 			} else {
6416 				// asynchronous
6417 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6418 				// TODO: This is probably not quite what is supposed to happen.
6419 				// Especially when a lot has to be written, it might take ages
6420 				// until it really hits the disk.
6421 			}
6422 		}
6423 
6424 		address += rangeSize;
6425 		size -= rangeSize;
6426 	}
6427 
6428 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6429 	// synchronize multiple mappings of the same file. In our VM they never get
6430 	// out of sync, though, so we don't have to do anything.
6431 
6432 	return B_OK;
6433 }
6434 
6435 
6436 status_t
6437 _user_memory_advice(void* address, size_t size, int advice)
6438 {
6439 	// TODO: Implement!
6440 	return B_OK;
6441 }
6442