xref: /haiku/src/system/kernel/vm/vm.cpp (revision 0dfceb2b8af9cd27312407bf35879ae38980a664)
1 /*
2  * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <arch/cpu.h>
27 #include <arch/vm.h>
28 #include <boot/elf.h>
29 #include <boot/stage2.h>
30 #include <condition_variable.h>
31 #include <console.h>
32 #include <debug.h>
33 #include <file_cache.h>
34 #include <fs/fd.h>
35 #include <heap.h>
36 #include <kernel.h>
37 #include <int.h>
38 #include <lock.h>
39 #include <low_resource_manager.h>
40 #include <smp.h>
41 #include <system_info.h>
42 #include <thread.h>
43 #include <team.h>
44 #include <tracing.h>
45 #include <util/AutoLock.h>
46 #include <util/khash.h>
47 #include <vm_address_space.h>
48 #include <vm_cache.h>
49 #include <vm_page.h>
50 #include <vm_priv.h>
51 
52 #include "VMAnonymousCache.h"
53 #include "IORequest.h"
54 
55 
56 //#define TRACE_VM
57 //#define TRACE_FAULTS
58 #ifdef TRACE_VM
59 #	define TRACE(x) dprintf x
60 #else
61 #	define TRACE(x) ;
62 #endif
63 #ifdef TRACE_FAULTS
64 #	define FTRACE(x) dprintf x
65 #else
66 #	define FTRACE(x) ;
67 #endif
68 
69 
70 class AddressSpaceReadLocker {
71 public:
72 	AddressSpaceReadLocker(team_id team);
73 	AddressSpaceReadLocker(vm_address_space* space, bool getNewReference);
74 	AddressSpaceReadLocker();
75 	~AddressSpaceReadLocker();
76 
77 	status_t SetTo(team_id team);
78 	void SetTo(vm_address_space* space, bool getNewReference);
79 	status_t SetFromArea(area_id areaID, vm_area*& area);
80 
81 	bool IsLocked() const { return fLocked; }
82 	bool Lock();
83 	void Unlock();
84 
85 	void Unset();
86 
87 	vm_address_space* AddressSpace() { return fSpace; }
88 
89 private:
90 	vm_address_space* fSpace;
91 	bool	fLocked;
92 };
93 
94 class AddressSpaceWriteLocker {
95 public:
96 	AddressSpaceWriteLocker(team_id team);
97 	AddressSpaceWriteLocker();
98 	~AddressSpaceWriteLocker();
99 
100 	status_t SetTo(team_id team);
101 	status_t SetFromArea(area_id areaID, vm_area*& area);
102 	status_t SetFromArea(team_id team, area_id areaID, bool allowKernel,
103 		vm_area*& area);
104 	status_t SetFromArea(team_id team, area_id areaID, vm_area*& area);
105 
106 	bool IsLocked() const { return fLocked; }
107 	void Unlock();
108 
109 	void DegradeToReadLock();
110 	void Unset();
111 
112 	vm_address_space* AddressSpace() { return fSpace; }
113 
114 private:
115 	vm_address_space* fSpace;
116 	bool	fLocked;
117 	bool	fDegraded;
118 };
119 
120 class MultiAddressSpaceLocker {
121 public:
122 	MultiAddressSpaceLocker();
123 	~MultiAddressSpaceLocker();
124 
125 	inline status_t AddTeam(team_id team, bool writeLock,
126 		vm_address_space** _space = NULL);
127 	inline status_t AddArea(area_id area, bool writeLock,
128 		vm_address_space** _space = NULL);
129 
130 	status_t AddAreaCacheAndLock(area_id areaID, bool writeLockThisOne,
131 		bool writeLockOthers, vm_area*& _area, vm_cache** _cache = NULL);
132 
133 	status_t Lock();
134 	void Unlock();
135 	bool IsLocked() const { return fLocked; }
136 
137 	void Unset();
138 
139 private:
140 	struct lock_item {
141 		vm_address_space*	space;
142 		bool				write_lock;
143 	};
144 
145 	bool _ResizeIfNeeded();
146 	int32 _IndexOfAddressSpace(vm_address_space* space) const;
147 	status_t _AddAddressSpace(vm_address_space* space, bool writeLock,
148 		vm_address_space** _space);
149 
150 	static int _CompareItems(const void* _a, const void* _b);
151 
152 	lock_item*	fItems;
153 	int32		fCapacity;
154 	int32		fCount;
155 	bool		fLocked;
156 };
157 
158 
159 class AreaCacheLocking {
160 public:
161 	inline bool Lock(vm_cache* lockable)
162 	{
163 		return false;
164 	}
165 
166 	inline void Unlock(vm_cache* lockable)
167 	{
168 		vm_area_put_locked_cache(lockable);
169 	}
170 };
171 
172 class AreaCacheLocker : public AutoLocker<vm_cache, AreaCacheLocking> {
173 public:
174 	inline AreaCacheLocker(vm_cache* cache = NULL)
175 		: AutoLocker<vm_cache, AreaCacheLocking>(cache, true)
176 	{
177 	}
178 
179 	inline AreaCacheLocker(vm_area* area)
180 		: AutoLocker<vm_cache, AreaCacheLocking>()
181 	{
182 		SetTo(area);
183 	}
184 
185 	inline void SetTo(vm_area* area)
186 	{
187 		return AutoLocker<vm_cache, AreaCacheLocking>::SetTo(
188 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
189 	}
190 };
191 
192 
193 #define AREA_HASH_TABLE_SIZE 1024
194 static area_id sNextAreaID = 1;
195 static hash_table* sAreaHash;
196 static rw_lock sAreaHashLock = RW_LOCK_INITIALIZER("area hash");
197 static mutex sMappingLock = MUTEX_INITIALIZER("page mappings");
198 static mutex sAreaCacheLock = MUTEX_INITIALIZER("area->cache");
199 
200 static off_t sAvailableMemory;
201 static off_t sNeededMemory;
202 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
203 static uint32 sPageFaults;
204 
205 #if DEBUG_CACHE_LIST
206 
207 struct cache_info {
208 	vm_cache*	cache;
209 	addr_t		page_count;
210 	addr_t		committed;
211 };
212 
213 static const int kCacheInfoTableCount = 100 * 1024;
214 static cache_info* sCacheInfoTable;
215 
216 #endif	// DEBUG_CACHE_LIST
217 
218 
219 // function declarations
220 static void delete_area(vm_address_space* addressSpace, vm_area* area);
221 static vm_address_space* get_address_space_by_area_id(area_id id);
222 static status_t vm_soft_fault(vm_address_space* addressSpace, addr_t address,
223 	bool isWrite, bool isUser);
224 static status_t map_backing_store(vm_address_space* addressSpace,
225 	vm_cache* cache, void** _virtualAddress, off_t offset, addr_t size,
226 	uint32 addressSpec, int wiring, int protection, int mapping,
227 	vm_area** _area, const char* areaName, bool unmapAddressRange, bool kernel);
228 
229 
230 static size_t sKernelAddressSpaceLeft = KERNEL_SIZE;
231 
232 
233 //	#pragma mark -
234 
235 
236 AddressSpaceReadLocker::AddressSpaceReadLocker(team_id team)
237 	:
238 	fSpace(NULL),
239 	fLocked(false)
240 {
241 	SetTo(team);
242 }
243 
244 
245 /*! Takes over the reference of the address space, if \a getNewReference is
246 	\c false.
247 */
248 AddressSpaceReadLocker::AddressSpaceReadLocker(vm_address_space* space,
249 		bool getNewReference)
250 	:
251 	fSpace(NULL),
252 	fLocked(false)
253 {
254 	SetTo(space, getNewReference);
255 }
256 
257 
258 AddressSpaceReadLocker::AddressSpaceReadLocker()
259 	:
260 	fSpace(NULL),
261 	fLocked(false)
262 {
263 }
264 
265 
266 AddressSpaceReadLocker::~AddressSpaceReadLocker()
267 {
268 	Unset();
269 }
270 
271 
272 void
273 AddressSpaceReadLocker::Unset()
274 {
275 	Unlock();
276 	if (fSpace != NULL)
277 		vm_put_address_space(fSpace);
278 }
279 
280 
281 status_t
282 AddressSpaceReadLocker::SetTo(team_id team)
283 {
284 	fSpace = vm_get_address_space(team);
285 	if (fSpace == NULL)
286 		return B_BAD_TEAM_ID;
287 
288 	rw_lock_read_lock(&fSpace->lock);
289 	fLocked = true;
290 	return B_OK;
291 }
292 
293 
294 /*! Takes over the reference of the address space, if \a getNewReference is
295 	\c false.
296 */
297 void
298 AddressSpaceReadLocker::SetTo(vm_address_space* space, bool getNewReference)
299 {
300 	fSpace = space;
301 
302 	if (getNewReference)
303 		atomic_add(&fSpace->ref_count, 1);
304 
305 	rw_lock_read_lock(&fSpace->lock);
306 	fLocked = true;
307 }
308 
309 
310 status_t
311 AddressSpaceReadLocker::SetFromArea(area_id areaID, vm_area*& area)
312 {
313 	fSpace = get_address_space_by_area_id(areaID);
314 	if (fSpace == NULL)
315 		return B_BAD_TEAM_ID;
316 
317 	rw_lock_read_lock(&fSpace->lock);
318 
319 	rw_lock_read_lock(&sAreaHashLock);
320 	area = (vm_area*)hash_lookup(sAreaHash, &areaID);
321 	rw_lock_read_unlock(&sAreaHashLock);
322 
323 	if (area == NULL || area->address_space != fSpace) {
324 		rw_lock_read_unlock(&fSpace->lock);
325 		return B_BAD_VALUE;
326 	}
327 
328 	fLocked = true;
329 	return B_OK;
330 }
331 
332 
333 bool
334 AddressSpaceReadLocker::Lock()
335 {
336 	if (fLocked)
337 		return true;
338 	if (fSpace == NULL)
339 		return false;
340 
341 	rw_lock_read_lock(&fSpace->lock);
342 	fLocked = true;
343 
344 	return true;
345 }
346 
347 
348 void
349 AddressSpaceReadLocker::Unlock()
350 {
351 	if (fLocked) {
352 		rw_lock_read_unlock(&fSpace->lock);
353 		fLocked = false;
354 	}
355 }
356 
357 
358 //	#pragma mark -
359 
360 
361 AddressSpaceWriteLocker::AddressSpaceWriteLocker(team_id team)
362 	:
363 	fSpace(NULL),
364 	fLocked(false),
365 	fDegraded(false)
366 {
367 	SetTo(team);
368 }
369 
370 
371 AddressSpaceWriteLocker::AddressSpaceWriteLocker()
372 	:
373 	fSpace(NULL),
374 	fLocked(false),
375 	fDegraded(false)
376 {
377 }
378 
379 
380 AddressSpaceWriteLocker::~AddressSpaceWriteLocker()
381 {
382 	Unset();
383 }
384 
385 
386 void
387 AddressSpaceWriteLocker::Unset()
388 {
389 	Unlock();
390 	if (fSpace != NULL)
391 		vm_put_address_space(fSpace);
392 }
393 
394 
395 status_t
396 AddressSpaceWriteLocker::SetTo(team_id team)
397 {
398 	fSpace = vm_get_address_space(team);
399 	if (fSpace == NULL)
400 		return B_BAD_TEAM_ID;
401 
402 	rw_lock_write_lock(&fSpace->lock);
403 	fLocked = true;
404 	return B_OK;
405 }
406 
407 
408 status_t
409 AddressSpaceWriteLocker::SetFromArea(area_id areaID, vm_area*& area)
410 {
411 	fSpace = get_address_space_by_area_id(areaID);
412 	if (fSpace == NULL)
413 		return B_BAD_VALUE;
414 
415 	rw_lock_write_lock(&fSpace->lock);
416 
417 	rw_lock_read_lock(&sAreaHashLock);
418 	area = (vm_area*)hash_lookup(sAreaHash, &areaID);
419 	rw_lock_read_unlock(&sAreaHashLock);
420 
421 	if (area == NULL || area->address_space != fSpace) {
422 		rw_lock_write_unlock(&fSpace->lock);
423 		return B_BAD_VALUE;
424 	}
425 
426 	fLocked = true;
427 	return B_OK;
428 }
429 
430 
431 status_t
432 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID,
433 	bool allowKernel, vm_area*& area)
434 {
435 	rw_lock_read_lock(&sAreaHashLock);
436 
437 	area = (vm_area*)hash_lookup(sAreaHash, &areaID);
438 	if (area != NULL
439 		&& (area->address_space->id == team
440 			|| (allowKernel && team == vm_kernel_address_space_id()))) {
441 		fSpace = area->address_space;
442 		atomic_add(&fSpace->ref_count, 1);
443 	}
444 
445 	rw_lock_read_unlock(&sAreaHashLock);
446 
447 	if (fSpace == NULL)
448 		return B_BAD_VALUE;
449 
450 	// Second try to get the area -- this time with the address space
451 	// write lock held
452 
453 	rw_lock_write_lock(&fSpace->lock);
454 
455 	rw_lock_read_lock(&sAreaHashLock);
456 	area = (vm_area*)hash_lookup(sAreaHash, &areaID);
457 	rw_lock_read_unlock(&sAreaHashLock);
458 
459 	if (area == NULL) {
460 		rw_lock_write_unlock(&fSpace->lock);
461 		return B_BAD_VALUE;
462 	}
463 
464 	fLocked = true;
465 	return B_OK;
466 }
467 
468 
469 status_t
470 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID,
471 	vm_area*& area)
472 {
473 	return SetFromArea(team, areaID, false, area);
474 }
475 
476 
477 void
478 AddressSpaceWriteLocker::Unlock()
479 {
480 	if (fLocked) {
481 		if (fDegraded)
482 			rw_lock_read_unlock(&fSpace->lock);
483 		else
484 			rw_lock_write_unlock(&fSpace->lock);
485 		fLocked = false;
486 		fDegraded = false;
487 	}
488 }
489 
490 
491 void
492 AddressSpaceWriteLocker::DegradeToReadLock()
493 {
494 	// TODO: the current R/W lock implementation just keeps the write lock here
495 	rw_lock_read_lock(&fSpace->lock);
496 	rw_lock_write_unlock(&fSpace->lock);
497 	fDegraded = true;
498 }
499 
500 
501 //	#pragma mark -
502 
503 
504 MultiAddressSpaceLocker::MultiAddressSpaceLocker()
505 	:
506 	fItems(NULL),
507 	fCapacity(0),
508 	fCount(0),
509 	fLocked(false)
510 {
511 }
512 
513 
514 MultiAddressSpaceLocker::~MultiAddressSpaceLocker()
515 {
516 	Unset();
517 	free(fItems);
518 }
519 
520 
521 /*static*/ int
522 MultiAddressSpaceLocker::_CompareItems(const void* _a, const void* _b)
523 {
524 	lock_item* a = (lock_item*)_a;
525 	lock_item* b = (lock_item*)_b;
526 	return a->space->id - b->space->id;
527 }
528 
529 
530 bool
531 MultiAddressSpaceLocker::_ResizeIfNeeded()
532 {
533 	if (fCount == fCapacity) {
534 		lock_item* items = (lock_item*)realloc(fItems,
535 			(fCapacity + 4) * sizeof(lock_item));
536 		if (items == NULL)
537 			return false;
538 
539 		fCapacity += 4;
540 		fItems = items;
541 	}
542 
543 	return true;
544 }
545 
546 
547 int32
548 MultiAddressSpaceLocker::_IndexOfAddressSpace(vm_address_space* space) const
549 {
550 	for (int32 i = 0; i < fCount; i++) {
551 		if (fItems[i].space == space)
552 			return i;
553 	}
554 
555 	return -1;
556 }
557 
558 
559 status_t
560 MultiAddressSpaceLocker::_AddAddressSpace(vm_address_space* space,
561 	bool writeLock, vm_address_space** _space)
562 {
563 	if (!space)
564 		return B_BAD_VALUE;
565 
566 	int32 index = _IndexOfAddressSpace(space);
567 	if (index < 0) {
568 		if (!_ResizeIfNeeded()) {
569 			vm_put_address_space(space);
570 			return B_NO_MEMORY;
571 		}
572 
573 		lock_item& item = fItems[fCount++];
574 		item.space = space;
575 		item.write_lock = writeLock;
576 	} else {
577 
578 		// one reference is enough
579 		vm_put_address_space(space);
580 
581 		fItems[index].write_lock |= writeLock;
582 	}
583 
584 	if (_space != NULL)
585 		*_space = space;
586 
587 	return B_OK;
588 }
589 
590 
591 inline status_t
592 MultiAddressSpaceLocker::AddTeam(team_id team, bool writeLock,
593 	vm_address_space** _space)
594 {
595 	return _AddAddressSpace(vm_get_address_space(team), writeLock,
596 		_space);
597 }
598 
599 
600 inline status_t
601 MultiAddressSpaceLocker::AddArea(area_id area, bool writeLock,
602 	vm_address_space** _space)
603 {
604 	return _AddAddressSpace(get_address_space_by_area_id(area), writeLock,
605 		_space);
606 }
607 
608 
609 void
610 MultiAddressSpaceLocker::Unset()
611 {
612 	Unlock();
613 
614 	for (int32 i = 0; i < fCount; i++)
615 		vm_put_address_space(fItems[i].space);
616 
617 	fCount = 0;
618 }
619 
620 
621 status_t
622 MultiAddressSpaceLocker::Lock()
623 {
624 	ASSERT(!fLocked);
625 
626 	qsort(fItems, fCount, sizeof(lock_item), &_CompareItems);
627 
628 	for (int32 i = 0; i < fCount; i++) {
629 		status_t status;
630 		if (fItems[i].write_lock)
631 			status = rw_lock_write_lock(&fItems[i].space->lock);
632 		else
633 			status = rw_lock_read_lock(&fItems[i].space->lock);
634 
635 		if (status < B_OK) {
636 			while (--i >= 0) {
637 				if (fItems[i].write_lock)
638 					rw_lock_write_unlock(&fItems[i].space->lock);
639 				else
640 					rw_lock_read_unlock(&fItems[i].space->lock);
641 			}
642 			return status;
643 		}
644 	}
645 
646 	fLocked = true;
647 	return B_OK;
648 }
649 
650 
651 void
652 MultiAddressSpaceLocker::Unlock()
653 {
654 	if (!fLocked)
655 		return;
656 
657 	for (int32 i = 0; i < fCount; i++) {
658 		if (fItems[i].write_lock)
659 			rw_lock_write_unlock(&fItems[i].space->lock);
660 		else
661 			rw_lock_read_unlock(&fItems[i].space->lock);
662 	}
663 
664 	fLocked = false;
665 }
666 
667 
668 /*!	Adds all address spaces of the areas associated with the given area's cache,
669 	locks them, and locks the cache (including a reference to it). It retries
670 	until the situation is stable (i.e. the neither cache nor cache's areas
671 	changed) or an error occurs.
672 */
673 status_t
674 MultiAddressSpaceLocker::AddAreaCacheAndLock(area_id areaID,
675 	bool writeLockThisOne, bool writeLockOthers, vm_area*& _area,
676 	vm_cache** _cache)
677 {
678 	// remember the original state
679 	int originalCount = fCount;
680 	lock_item* originalItems = NULL;
681 	if (fCount > 0) {
682 		originalItems = new(nothrow) lock_item[fCount];
683 		if (originalItems == NULL)
684 			return B_NO_MEMORY;
685 		memcpy(originalItems, fItems, fCount * sizeof(lock_item));
686 	}
687 	ArrayDeleter<lock_item> _(originalItems);
688 
689 	// get the cache
690 	vm_cache* cache;
691 	vm_area* area;
692 	status_t error;
693 	{
694 		AddressSpaceReadLocker locker;
695 		error = locker.SetFromArea(areaID, area);
696 		if (error != B_OK)
697 			return error;
698 
699 		cache = vm_area_get_locked_cache(area);
700 	}
701 
702 	while (true) {
703 		// add all areas
704 		vm_area* firstArea = cache->areas;
705 		for (vm_area* current = firstArea; current;
706 				current = current->cache_next) {
707 			error = AddArea(current->id,
708 				current == area ? writeLockThisOne : writeLockOthers);
709 			if (error != B_OK) {
710 				vm_area_put_locked_cache(cache);
711 				return error;
712 			}
713 		}
714 
715 		// unlock the cache and attempt to lock the address spaces
716 		vm_area_put_locked_cache(cache);
717 
718 		error = Lock();
719 		if (error != B_OK)
720 			return error;
721 
722 		// lock the cache again and check whether anything has changed
723 
724 		// check whether the area is gone in the meantime
725 		rw_lock_read_lock(&sAreaHashLock);
726 		area = (vm_area*)hash_lookup(sAreaHash, &areaID);
727 		rw_lock_read_unlock(&sAreaHashLock);
728 
729 		if (area == NULL) {
730 			Unlock();
731 			return B_BAD_VALUE;
732 		}
733 
734 		// lock the cache
735 		vm_cache* oldCache = cache;
736 		cache = vm_area_get_locked_cache(area);
737 
738 		// If neither the area's cache has changed nor its area list we're
739 		// done.
740 		if (cache == oldCache && firstArea == cache->areas) {
741 			_area = area;
742 			if (_cache != NULL)
743 				*_cache = cache;
744 			return B_OK;
745 		}
746 
747 		// Restore the original state and try again.
748 
749 		// Unlock the address spaces, but keep the cache locked for the next
750 		// iteration.
751 		Unlock();
752 
753 		// Get an additional reference to the original address spaces.
754 		for (int32 i = 0; i < originalCount; i++)
755 			atomic_add(&originalItems[i].space->ref_count, 1);
756 
757 		// Release all references to the current address spaces.
758 		for (int32 i = 0; i < fCount; i++)
759 			vm_put_address_space(fItems[i].space);
760 
761 		// Copy over the original state.
762 		fCount = originalCount;
763 		if (originalItems != NULL)
764 			memcpy(fItems, originalItems, fCount * sizeof(lock_item));
765 	}
766 }
767 
768 
769 //	#pragma mark -
770 
771 
772 #if VM_PAGE_FAULT_TRACING
773 
774 namespace VMPageFaultTracing {
775 
776 class PageFaultStart : public AbstractTraceEntry {
777 public:
778 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
779 		:
780 		fAddress(address),
781 		fPC(pc),
782 		fWrite(write),
783 		fUser(user)
784 	{
785 		Initialized();
786 	}
787 
788 	virtual void AddDump(TraceOutput& out)
789 	{
790 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
791 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
792 	}
793 
794 private:
795 	addr_t	fAddress;
796 	addr_t	fPC;
797 	bool	fWrite;
798 	bool	fUser;
799 };
800 
801 
802 // page fault errors
803 enum {
804 	PAGE_FAULT_ERROR_NO_AREA		= 0,
805 	PAGE_FAULT_ERROR_KERNEL_ONLY,
806 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
807 	PAGE_FAULT_ERROR_READ_PROTECTED,
808 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
809 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
810 };
811 
812 
813 class PageFaultError : public AbstractTraceEntry {
814 public:
815 	PageFaultError(area_id area, status_t error)
816 		:
817 		fArea(area),
818 		fError(error)
819 	{
820 		Initialized();
821 	}
822 
823 	virtual void AddDump(TraceOutput& out)
824 	{
825 		switch (fError) {
826 			case PAGE_FAULT_ERROR_NO_AREA:
827 				out.Print("page fault error: no area");
828 				break;
829 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
830 				out.Print("page fault error: area: %ld, kernel only", fArea);
831 				break;
832 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
833 				out.Print("page fault error: area: %ld, write protected",
834 					fArea);
835 				break;
836 			case PAGE_FAULT_ERROR_READ_PROTECTED:
837 				out.Print("page fault error: area: %ld, read protected", fArea);
838 				break;
839 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
840 				out.Print("page fault error: kernel touching bad user memory");
841 				break;
842 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
843 				out.Print("page fault error: no address space");
844 				break;
845 			default:
846 				out.Print("page fault error: area: %ld, error: %s", fArea,
847 					strerror(fError));
848 				break;
849 		}
850 	}
851 
852 private:
853 	area_id		fArea;
854 	status_t	fError;
855 };
856 
857 
858 class PageFaultDone : public AbstractTraceEntry {
859 public:
860 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
861 			vm_page* page)
862 		:
863 		fArea(area),
864 		fTopCache(topCache),
865 		fCache(cache),
866 		fPage(page)
867 	{
868 		Initialized();
869 	}
870 
871 	virtual void AddDump(TraceOutput& out)
872 	{
873 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
874 			"page: %p", fArea, fTopCache, fCache, fPage);
875 	}
876 
877 private:
878 	area_id		fArea;
879 	VMCache*	fTopCache;
880 	VMCache*	fCache;
881 	vm_page*	fPage;
882 };
883 
884 }	// namespace VMPageFaultTracing
885 
886 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
887 #else
888 #	define TPF(x) ;
889 #endif	// VM_PAGE_FAULT_TRACING
890 
891 
892 //	#pragma mark -
893 
894 
895 static int
896 area_compare(void* _area, const void* key)
897 {
898 	vm_area* area = (vm_area*)_area;
899 	const area_id* id = (const area_id*)key;
900 
901 	if (area->id == *id)
902 		return 0;
903 
904 	return -1;
905 }
906 
907 
908 static uint32
909 area_hash(void* _area, const void* key, uint32 range)
910 {
911 	vm_area* area = (vm_area*)_area;
912 	const area_id* id = (const area_id*)key;
913 
914 	if (area != NULL)
915 		return area->id % range;
916 
917 	return (uint32)*id % range;
918 }
919 
920 
921 static vm_address_space*
922 get_address_space_by_area_id(area_id id)
923 {
924 	vm_address_space* addressSpace = NULL;
925 
926 	rw_lock_read_lock(&sAreaHashLock);
927 
928 	vm_area* area = (vm_area*)hash_lookup(sAreaHash, &id);
929 	if (area != NULL) {
930 		addressSpace = area->address_space;
931 		atomic_add(&addressSpace->ref_count, 1);
932 	}
933 
934 	rw_lock_read_unlock(&sAreaHashLock);
935 
936 	return addressSpace;
937 }
938 
939 
940 //! You need to have the address space locked when calling this function
941 static vm_area*
942 lookup_area(vm_address_space* addressSpace, area_id id)
943 {
944 	rw_lock_read_lock(&sAreaHashLock);
945 
946 	vm_area* area = (vm_area*)hash_lookup(sAreaHash, &id);
947 	if (area != NULL && area->address_space != addressSpace)
948 		area = NULL;
949 
950 	rw_lock_read_unlock(&sAreaHashLock);
951 
952 	return area;
953 }
954 
955 
956 static vm_area*
957 create_reserved_area_struct(vm_address_space* addressSpace, uint32 flags)
958 {
959 	vm_area* reserved = (vm_area*)malloc_nogrow(sizeof(vm_area));
960 	if (reserved == NULL)
961 		return NULL;
962 
963 	memset(reserved, 0, sizeof(vm_area));
964 	reserved->id = RESERVED_AREA_ID;
965 		// this marks it as reserved space
966 	reserved->protection = flags;
967 	reserved->address_space = addressSpace;
968 
969 	return reserved;
970 }
971 
972 
973 static vm_area*
974 create_area_struct(vm_address_space* addressSpace, const char* name,
975 	uint32 wiring, uint32 protection)
976 {
977 	// restrict the area name to B_OS_NAME_LENGTH
978 	size_t length = strlen(name) + 1;
979 	if (length > B_OS_NAME_LENGTH)
980 		length = B_OS_NAME_LENGTH;
981 
982 	vm_area* area = (vm_area*)malloc_nogrow(sizeof(vm_area));
983 	if (area == NULL)
984 		return NULL;
985 
986 	area->name = (char*)malloc_nogrow(length);
987 	if (area->name == NULL) {
988 		free(area);
989 		return NULL;
990 	}
991 	strlcpy(area->name, name, length);
992 
993 	area->id = atomic_add(&sNextAreaID, 1);
994 	area->base = 0;
995 	area->size = 0;
996 	area->protection = protection;
997 	area->wiring = wiring;
998 	area->memory_type = 0;
999 
1000 	area->cache = NULL;
1001 	area->cache_offset = 0;
1002 
1003 	area->address_space = addressSpace;
1004 	area->address_space_next = NULL;
1005 	area->cache_next = area->cache_prev = NULL;
1006 	area->hash_next = NULL;
1007 	new (&area->mappings) vm_area_mappings;
1008 	area->page_protections = NULL;
1009 
1010 	return area;
1011 }
1012 
1013 
1014 /*!	Finds a reserved area that covers the region spanned by \a start and
1015 	\a size, inserts the \a area into that region and makes sure that
1016 	there are reserved regions for the remaining parts.
1017 */
1018 static status_t
1019 find_reserved_area(vm_address_space* addressSpace, addr_t start,
1020 	addr_t size, vm_area* area)
1021 {
1022 	vm_area* last = NULL;
1023 	vm_area* next;
1024 
1025 	next = addressSpace->areas;
1026 	while (next != NULL) {
1027 		if (next->base <= start
1028 			&& next->base + (next->size - 1) >= start + (size - 1)) {
1029 			// This area covers the requested range
1030 			if (next->id != RESERVED_AREA_ID) {
1031 				// but it's not reserved space, it's a real area
1032 				return B_BAD_VALUE;
1033 			}
1034 
1035 			break;
1036 		}
1037 
1038 		last = next;
1039 		next = next->address_space_next;
1040 	}
1041 
1042 	if (next == NULL)
1043 		return B_ENTRY_NOT_FOUND;
1044 
1045 	// Now we have to transfer the requested part of the reserved
1046 	// range to the new area - and remove, resize or split the old
1047 	// reserved area.
1048 
1049 	if (start == next->base) {
1050 		// the area starts at the beginning of the reserved range
1051 		if (last)
1052 			last->address_space_next = area;
1053 		else
1054 			addressSpace->areas = area;
1055 
1056 		if (size == next->size) {
1057 			// the new area fully covers the reversed range
1058 			area->address_space_next = next->address_space_next;
1059 			vm_put_address_space(addressSpace);
1060 			free(next);
1061 		} else {
1062 			// resize the reserved range behind the area
1063 			area->address_space_next = next;
1064 			next->base += size;
1065 			next->size -= size;
1066 		}
1067 	} else if (start + size == next->base + next->size) {
1068 		// the area is at the end of the reserved range
1069 		area->address_space_next = next->address_space_next;
1070 		next->address_space_next = area;
1071 
1072 		// resize the reserved range before the area
1073 		next->size = start - next->base;
1074 	} else {
1075 		// the area splits the reserved range into two separate ones
1076 		// we need a new reserved area to cover this space
1077 		vm_area* reserved = create_reserved_area_struct(addressSpace,
1078 			next->protection);
1079 		if (reserved == NULL)
1080 			return B_NO_MEMORY;
1081 
1082 		atomic_add(&addressSpace->ref_count, 1);
1083 		reserved->address_space_next = next->address_space_next;
1084 		area->address_space_next = reserved;
1085 		next->address_space_next = area;
1086 
1087 		// resize regions
1088 		reserved->size = next->base + next->size - start - size;
1089 		next->size = start - next->base;
1090 		reserved->base = start + size;
1091 		reserved->cache_offset = next->cache_offset;
1092 	}
1093 
1094 	area->base = start;
1095 	area->size = size;
1096 	addressSpace->change_count++;
1097 
1098 	return B_OK;
1099 }
1100 
1101 
1102 /*!	Verifies that an area with the given aligned base and size fits into
1103 	the spot defined by base and limit and does check for overflows.
1104 */
1105 static inline bool
1106 is_valid_spot(addr_t base, addr_t alignedBase, addr_t size, addr_t limit)
1107 {
1108 	return (alignedBase >= base && alignedBase + (size - 1) > alignedBase
1109 		&& alignedBase + (size - 1) <= limit);
1110 }
1111 
1112 
1113 /*!	Must be called with this address space's write lock held */
1114 static status_t
1115 find_and_insert_area_slot(vm_address_space* addressSpace, addr_t start,
1116 	addr_t size, addr_t end, uint32 addressSpec, vm_area* area)
1117 {
1118 	vm_area* last = NULL;
1119 	vm_area* next;
1120 	bool foundSpot = false;
1121 
1122 	TRACE(("find_and_insert_area_slot: address space %p, start 0x%lx, "
1123 		"size %ld, end 0x%lx, addressSpec %ld, area %p\n", addressSpace, start,
1124 		size, end, addressSpec, area));
1125 
1126 	// do some sanity checking
1127 	if (start < addressSpace->base || size == 0
1128 		|| end > addressSpace->base + (addressSpace->size - 1)
1129 		|| start + (size - 1) > end)
1130 		return B_BAD_ADDRESS;
1131 
1132 	if (addressSpec == B_EXACT_ADDRESS && area->id != RESERVED_AREA_ID) {
1133 		// search for a reserved area
1134 		status_t status = find_reserved_area(addressSpace, start, size, area);
1135 		if (status == B_OK || status == B_BAD_VALUE)
1136 			return status;
1137 
1138 		// There was no reserved area, and the slot doesn't seem to be used
1139 		// already
1140 		// TODO: this could be further optimized.
1141 	}
1142 
1143 	size_t alignment = B_PAGE_SIZE;
1144 	if (addressSpec == B_ANY_KERNEL_BLOCK_ADDRESS) {
1145 		// align the memory to the next power of two of the size
1146 		while (alignment < size)
1147 			alignment <<= 1;
1148 	}
1149 
1150 	start = ROUNDUP(start, alignment);
1151 
1152 	// walk up to the spot where we should start searching
1153 second_chance:
1154 	next = addressSpace->areas;
1155 	while (next != NULL) {
1156 		if (next->base > start + (size - 1)) {
1157 			// we have a winner
1158 			break;
1159 		}
1160 
1161 		last = next;
1162 		next = next->address_space_next;
1163 	}
1164 
1165 	// find the right spot depending on the address specification - the area
1166 	// will be inserted directly after "last" ("next" is not referenced anymore)
1167 
1168 	switch (addressSpec) {
1169 		case B_ANY_ADDRESS:
1170 		case B_ANY_KERNEL_ADDRESS:
1171 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1172 		{
1173 			// find a hole big enough for a new area
1174 			if (last == NULL) {
1175 				// see if we can build it at the beginning of the virtual map
1176 				addr_t alignedBase = ROUNDUP(addressSpace->base, alignment);
1177 				if (is_valid_spot(addressSpace->base, alignedBase, size,
1178 						next == NULL ? end : next->base)) {
1179 					foundSpot = true;
1180 					area->base = alignedBase;
1181 					break;
1182 				}
1183 
1184 				last = next;
1185 				next = next->address_space_next;
1186 			}
1187 
1188 			// keep walking
1189 			while (next != NULL) {
1190 				addr_t alignedBase = ROUNDUP(last->base + last->size, alignment);
1191 				if (is_valid_spot(last->base + (last->size - 1), alignedBase,
1192 						size, next->base)) {
1193 					foundSpot = true;
1194 					area->base = alignedBase;
1195 					break;
1196 				}
1197 
1198 				last = next;
1199 				next = next->address_space_next;
1200 			}
1201 
1202 			if (foundSpot)
1203 				break;
1204 
1205 			addr_t alignedBase = ROUNDUP(last->base + last->size, alignment);
1206 			if (is_valid_spot(last->base + (last->size - 1), alignedBase,
1207 					size, end)) {
1208 				// got a spot
1209 				foundSpot = true;
1210 				area->base = alignedBase;
1211 				break;
1212 			} else if (area->id != RESERVED_AREA_ID) {
1213 				// We didn't find a free spot - if there are any reserved areas,
1214 				// we can now test those for free space
1215 				// TODO: it would make sense to start with the biggest of them
1216 				next = addressSpace->areas;
1217 				for (last = NULL; next != NULL;
1218 						next = next->address_space_next) {
1219 					if (next->id != RESERVED_AREA_ID) {
1220 						last = next;
1221 						continue;
1222 					}
1223 
1224 					// TODO: take free space after the reserved area into
1225 					// account!
1226 					addr_t alignedBase = ROUNDUP(next->base, alignment);
1227 					if (next->base == alignedBase && next->size == size) {
1228 						// The reserved area is entirely covered, and thus,
1229 						// removed
1230 						if (last)
1231 							last->address_space_next = next->address_space_next;
1232 						else
1233 							addressSpace->areas = next->address_space_next;
1234 
1235 						foundSpot = true;
1236 						area->base = alignedBase;
1237 						free(next);
1238 						break;
1239 					}
1240 
1241 					if ((next->protection & RESERVED_AVOID_BASE) == 0
1242 						&&  alignedBase == next->base && next->size >= size) {
1243 						// The new area will be placed at the beginning of the
1244 						// reserved area and the reserved area will be offset
1245 						// and resized
1246 						foundSpot = true;
1247 						next->base += size;
1248 						next->size -= size;
1249 						area->base = alignedBase;
1250 						break;
1251 					}
1252 
1253 					if (is_valid_spot(next->base, alignedBase, size,
1254 							next->base + (next->size - 1))) {
1255 						// The new area will be placed at the end of the
1256 						// reserved area, and the reserved area will be resized
1257 						// to make space
1258 						alignedBase = ROUNDDOWN(next->base + next->size - size,
1259 							alignment);
1260 
1261 						foundSpot = true;
1262 						next->size = alignedBase - next->base;
1263 						area->base = alignedBase;
1264 						last = next;
1265 						break;
1266 					}
1267 
1268 					last = next;
1269 				}
1270 			}
1271 			break;
1272 		}
1273 
1274 		case B_BASE_ADDRESS:
1275 		{
1276 			// find a hole big enough for a new area beginning with "start"
1277 			if (last == NULL) {
1278 				// see if we can build it at the beginning of the specified start
1279 				if (next == NULL || next->base > start + (size - 1)) {
1280 					foundSpot = true;
1281 					area->base = start;
1282 					break;
1283 				}
1284 
1285 				last = next;
1286 				next = next->address_space_next;
1287 			}
1288 
1289 			// keep walking
1290 			while (next != NULL) {
1291 				if (next->base - (last->base + last->size) >= size) {
1292 					// we found a spot (it'll be filled up below)
1293 					break;
1294 				}
1295 
1296 				last = next;
1297 				next = next->address_space_next;
1298 			}
1299 
1300 			addr_t lastEnd = last->base + (last->size - 1);
1301 			if (next != NULL || end - lastEnd >= size) {
1302 				// got a spot
1303 				foundSpot = true;
1304 				if (lastEnd < start)
1305 					area->base = start;
1306 				else
1307 					area->base = lastEnd + 1;
1308 				break;
1309 			}
1310 
1311 			// we didn't find a free spot in the requested range, so we'll
1312 			// try again without any restrictions
1313 			start = addressSpace->base;
1314 			addressSpec = B_ANY_ADDRESS;
1315 			last = NULL;
1316 			goto second_chance;
1317 		}
1318 
1319 		case B_EXACT_ADDRESS:
1320 			// see if we can create it exactly here
1321 			if ((last == NULL || last->base + (last->size - 1) < start)
1322 				&& (next == NULL || next->base > start + (size - 1))) {
1323 				foundSpot = true;
1324 				area->base = start;
1325 				break;
1326 			}
1327 			break;
1328 		default:
1329 			return B_BAD_VALUE;
1330 	}
1331 
1332 	if (!foundSpot)
1333 		return addressSpec == B_EXACT_ADDRESS ? B_BAD_VALUE : B_NO_MEMORY;
1334 
1335 	area->size = size;
1336 	if (last) {
1337 		area->address_space_next = last->address_space_next;
1338 		last->address_space_next = area;
1339 	} else {
1340 		area->address_space_next = addressSpace->areas;
1341 		addressSpace->areas = area;
1342 	}
1343 
1344 	addressSpace->change_count++;
1345 	return B_OK;
1346 }
1347 
1348 
1349 /*!	This inserts the area you pass into the specified address space.
1350 	It will also set the "_address" argument to its base address when
1351 	the call succeeds.
1352 	You need to hold the vm_address_space write lock.
1353 */
1354 static status_t
1355 insert_area(vm_address_space* addressSpace, void** _address,
1356 	uint32 addressSpec, addr_t size, vm_area* area)
1357 {
1358 	addr_t searchBase, searchEnd;
1359 	status_t status;
1360 
1361 	switch (addressSpec) {
1362 		case B_EXACT_ADDRESS:
1363 			searchBase = (addr_t)*_address;
1364 			searchEnd = (addr_t)*_address + (size - 1);
1365 			break;
1366 
1367 		case B_BASE_ADDRESS:
1368 			searchBase = (addr_t)*_address;
1369 			searchEnd = addressSpace->base + (addressSpace->size - 1);
1370 			break;
1371 
1372 		case B_ANY_ADDRESS:
1373 		case B_ANY_KERNEL_ADDRESS:
1374 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1375 			searchBase = addressSpace->base;
1376 			// TODO: remove this again when vm86 mode is moved into the kernel
1377 			// completely (currently needs a userland address space!)
1378 			if (searchBase == USER_BASE)
1379 				searchBase = USER_BASE_ANY;
1380 			searchEnd = addressSpace->base + (addressSpace->size - 1);
1381 			break;
1382 
1383 		default:
1384 			return B_BAD_VALUE;
1385 	}
1386 
1387 	status = find_and_insert_area_slot(addressSpace, searchBase, size,
1388 		searchEnd, addressSpec, area);
1389 	if (status == B_OK) {
1390 		*_address = (void*)area->base;
1391 
1392 		if (addressSpace == vm_kernel_address_space())
1393 			sKernelAddressSpaceLeft -= area->size;
1394 	}
1395 
1396 	return status;
1397 }
1398 
1399 
1400 static inline void
1401 set_area_page_protection(vm_area* area, addr_t pageAddress, uint32 protection)
1402 {
1403 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
1404 	uint32 pageIndex = (pageAddress - area->base) / B_PAGE_SIZE;
1405 	uint8& entry = area->page_protections[pageIndex / 2];
1406 	if (pageIndex % 2 == 0)
1407 		entry = (entry & 0xf0) | protection;
1408 	else
1409 		entry = (entry & 0x0f) | (protection << 4);
1410 }
1411 
1412 
1413 static inline uint32
1414 get_area_page_protection(vm_area* area, addr_t pageAddress)
1415 {
1416 	if (area->page_protections == NULL)
1417 		return area->protection;
1418 
1419 	uint32 pageIndex = (pageAddress - area->base) / B_PAGE_SIZE;
1420 	uint32 protection = area->page_protections[pageIndex / 2];
1421 	if (pageIndex % 2 == 0)
1422 		protection &= 0x0f;
1423 	else
1424 		protection >>= 4;
1425 
1426 	return protection | B_KERNEL_READ_AREA
1427 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
1428 }
1429 
1430 
1431 /*!	Cuts a piece out of an area. If the given cut range covers the complete
1432 	area, it is deleted. If it covers the beginning or the end, the area is
1433 	resized accordingly. If the range covers some part in the middle of the
1434 	area, it is split in two; in this case the second area is returned via
1435 	\a _secondArea (the variable is left untouched in the other cases).
1436 	The address space must be write locked.
1437 */
1438 static status_t
1439 cut_area(vm_address_space* addressSpace, vm_area* area, addr_t address,
1440 	addr_t lastAddress, vm_area** _secondArea, bool kernel)
1441 {
1442 	// Does the cut range intersect with the area at all?
1443 	addr_t areaLast = area->base + (area->size - 1);
1444 	if (area->base > lastAddress || areaLast < address)
1445 		return B_OK;
1446 
1447 	// Is the area fully covered?
1448 	if (area->base >= address && areaLast <= lastAddress) {
1449 		delete_area(addressSpace, area);
1450 		return B_OK;
1451 	}
1452 
1453 	AreaCacheLocker cacheLocker(area);
1454 	vm_cache* cache = area->cache;
1455 
1456 	// Cut the end only?
1457 	if (areaLast <= lastAddress) {
1458 		addr_t newSize = address - area->base;
1459 
1460 		// unmap pages
1461 		vm_unmap_pages(area, address, area->size - newSize, false);
1462 
1463 		// If no one else uses the area's cache, we can resize it, too.
1464 		if (cache->areas == area && area->cache_next == NULL
1465 			&& list_is_empty(&cache->consumers)) {
1466 			status_t error = cache->Resize(cache->virtual_base + newSize);
1467 			if (error != B_OK)
1468 				return error;
1469 		}
1470 
1471 		area->size = newSize;
1472 
1473 		return B_OK;
1474 	}
1475 
1476 	// Cut the beginning only?
1477 	if (area->base >= address) {
1478 		addr_t newBase = lastAddress + 1;
1479 		addr_t newSize = areaLast - lastAddress;
1480 
1481 		// unmap pages
1482 		vm_unmap_pages(area, area->base, newBase - area->base, false);
1483 
1484 		// TODO: If no one else uses the area's cache, we should resize it, too!
1485 
1486 		area->cache_offset += newBase - area->base;
1487 		area->base = newBase;
1488 		area->size = newSize;
1489 
1490 		return B_OK;
1491 	}
1492 
1493 	// The tough part -- cut a piece out of the middle of the area.
1494 	// We do that by shrinking the area to the begin section and creating a
1495 	// new area for the end section.
1496 
1497 	addr_t firstNewSize = address - area->base;
1498 	addr_t secondBase = lastAddress + 1;
1499 	addr_t secondSize = areaLast - lastAddress;
1500 
1501 	// unmap pages
1502 	vm_unmap_pages(area, address, area->size - firstNewSize, false);
1503 
1504 	// resize the area
1505 	addr_t oldSize = area->size;
1506 	area->size = firstNewSize;
1507 
1508 	// TODO: If no one else uses the area's cache, we might want to create a
1509 	// new cache for the second area, transfer the concerned pages from the
1510 	// first cache to it and resize the first cache.
1511 
1512 	// map the second area
1513 	vm_area* secondArea;
1514 	void* secondBaseAddress = (void*)secondBase;
1515 	status_t error = map_backing_store(addressSpace, cache, &secondBaseAddress,
1516 		area->cache_offset + (secondBase - area->base), secondSize,
1517 		B_EXACT_ADDRESS, area->wiring, area->protection, REGION_NO_PRIVATE_MAP,
1518 		&secondArea, area->name, false, kernel);
1519 	if (error != B_OK) {
1520 		area->size = oldSize;
1521 		return error;
1522 	}
1523 
1524 	// We need a cache reference for the new area.
1525 	cache->AcquireRefLocked();
1526 
1527 	if (_secondArea != NULL)
1528 		*_secondArea = secondArea;
1529 
1530 	return B_OK;
1531 }
1532 
1533 
1534 static inline void
1535 increment_page_wired_count(vm_page* page)
1536 {
1537 	// TODO: needs to be atomic on all platforms!
1538 	// ... but at least the check isn't. Consequently we should hold
1539 	// sMappingLock, which would allows us to even avoid atomic_add() on
1540 	// gMappedPagesCount.
1541 	if (page->wired_count++ == 0) {
1542 		if (page->mappings.IsEmpty())
1543 			atomic_add(&gMappedPagesCount, 1);
1544 	}
1545 }
1546 
1547 
1548 static inline void
1549 decrement_page_wired_count(vm_page* page)
1550 {
1551 	if (--page->wired_count == 0) {
1552 		// TODO: needs to be atomic on all platforms!
1553 		// See above!
1554 		if (page->mappings.IsEmpty())
1555 			atomic_add(&gMappedPagesCount, -1);
1556 	}
1557 }
1558 
1559 
1560 /*!	Deletes all areas in the given address range.
1561 	The address space must be write-locked.
1562 */
1563 static status_t
1564 unmap_address_range(vm_address_space* addressSpace, addr_t address, addr_t size,
1565 	bool kernel)
1566 {
1567 	size = PAGE_ALIGN(size);
1568 	addr_t lastAddress = address + (size - 1);
1569 
1570 	// Check, whether the caller is allowed to modify the concerned areas.
1571 	vm_area* area;
1572 	if (!kernel) {
1573 		area = addressSpace->areas;
1574 		while (area != NULL) {
1575 			vm_area* nextArea = area->address_space_next;
1576 
1577 			if (area->id != RESERVED_AREA_ID) {
1578 				addr_t areaLast = area->base + (area->size - 1);
1579 				if (area->base < lastAddress && address < areaLast) {
1580 					if ((area->protection & B_KERNEL_AREA) != 0)
1581 						return B_NOT_ALLOWED;
1582 				}
1583 			}
1584 
1585 			area = nextArea;
1586 		}
1587 	}
1588 
1589 	area = addressSpace->areas;
1590 	while (area != NULL) {
1591 		vm_area* nextArea = area->address_space_next;
1592 
1593 		if (area->id != RESERVED_AREA_ID) {
1594 			addr_t areaLast = area->base + (area->size - 1);
1595 			if (area->base < lastAddress && address < areaLast) {
1596 				status_t error = cut_area(addressSpace, area, address,
1597 					lastAddress, NULL, kernel);
1598 				if (error != B_OK)
1599 					return error;
1600 					// Failing after already messing with areas is ugly, but we
1601 					// can't do anything about it.
1602 			}
1603 		}
1604 
1605 		area = nextArea;
1606 	}
1607 
1608 	return B_OK;
1609 }
1610 
1611 
1612 /*! You need to hold the lock of the cache and the write lock of the address
1613 	space when calling this function.
1614 	Note, that in case of error your cache will be temporarily unlocked.
1615 */
1616 static status_t
1617 map_backing_store(vm_address_space* addressSpace, vm_cache* cache,
1618 	void** _virtualAddress, off_t offset, addr_t size, uint32 addressSpec,
1619 	int wiring, int protection, int mapping, vm_area** _area,
1620 	const char* areaName, bool unmapAddressRange, bool kernel)
1621 {
1622 	TRACE(("map_backing_store: aspace %p, cache %p, *vaddr %p, offset 0x%Lx, "
1623 		"size %lu, addressSpec %ld, wiring %d, protection %d, area %p, areaName "
1624 		"'%s'\n", addressSpace, cache, *_virtualAddress, offset, size,
1625 		addressSpec, wiring, protection, _area, areaName));
1626 	cache->AssertLocked();
1627 
1628 	vm_area* area = create_area_struct(addressSpace, areaName, wiring,
1629 		protection);
1630 	if (area == NULL)
1631 		return B_NO_MEMORY;
1632 
1633 	status_t status;
1634 
1635 	// if this is a private map, we need to create a new cache
1636 	// to handle the private copies of pages as they are written to
1637 	vm_cache* sourceCache = cache;
1638 	if (mapping == REGION_PRIVATE_MAP) {
1639 		vm_cache* newCache;
1640 
1641 		// create an anonymous cache
1642 		status = VMCacheFactory::CreateAnonymousCache(newCache,
1643 			(protection & B_STACK_AREA) != 0, 0, USER_STACK_GUARD_PAGES, true);
1644 		if (status != B_OK)
1645 			goto err1;
1646 
1647 		newCache->Lock();
1648 		newCache->temporary = 1;
1649 		newCache->scan_skip = cache->scan_skip;
1650 		newCache->virtual_base = offset;
1651 		newCache->virtual_end = offset + size;
1652 
1653 		cache->AddConsumer(newCache);
1654 
1655 		cache = newCache;
1656 	}
1657 
1658 	status = cache->SetMinimalCommitment(size);
1659 	if (status != B_OK)
1660 		goto err2;
1661 
1662 	// check to see if this address space has entered DELETE state
1663 	if (addressSpace->state == VM_ASPACE_STATE_DELETION) {
1664 		// okay, someone is trying to delete this address space now, so we can't
1665 		// insert the area, so back out
1666 		status = B_BAD_TEAM_ID;
1667 		goto err2;
1668 	}
1669 
1670 	if (addressSpec == B_EXACT_ADDRESS && unmapAddressRange) {
1671 		status = unmap_address_range(addressSpace, (addr_t)*_virtualAddress,
1672 			size, kernel);
1673 		if (status != B_OK)
1674 			goto err2;
1675 	}
1676 
1677 	status = insert_area(addressSpace, _virtualAddress, addressSpec, size, area);
1678 	if (status != B_OK) {
1679 		// TODO: wait and try again once this is working in the backend
1680 #if 0
1681 		if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) {
1682 			low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size,
1683 				0, 0);
1684 		}
1685 #endif
1686 		goto err2;
1687 	}
1688 
1689 	// attach the cache to the area
1690 	area->cache = cache;
1691 	area->cache_offset = offset;
1692 
1693 	// point the cache back to the area
1694 	cache->InsertAreaLocked(area);
1695 	if (mapping == REGION_PRIVATE_MAP)
1696 		cache->Unlock();
1697 
1698 	// insert the area in the global area hash table
1699 	rw_lock_write_lock(&sAreaHashLock);
1700 	hash_insert(sAreaHash, area);
1701 	rw_lock_write_unlock(&sAreaHashLock);
1702 
1703 	// grab a ref to the address space (the area holds this)
1704 	atomic_add(&addressSpace->ref_count, 1);
1705 
1706 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1707 //		cache, sourceCache, areaName, area);
1708 
1709 	*_area = area;
1710 	return B_OK;
1711 
1712 err2:
1713 	if (mapping == REGION_PRIVATE_MAP) {
1714 		// We created this cache, so we must delete it again. Note, that we
1715 		// need to temporarily unlock the source cache or we'll otherwise
1716 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1717 		sourceCache->Unlock();
1718 		cache->ReleaseRefAndUnlock();
1719 		sourceCache->Lock();
1720 	}
1721 err1:
1722 	free(area->name);
1723 	free(area);
1724 	return status;
1725 }
1726 
1727 
1728 status_t
1729 vm_block_address_range(const char* name, void* address, addr_t size)
1730 {
1731 	if (!arch_vm_supports_protection(0))
1732 		return B_NOT_SUPPORTED;
1733 
1734 	AddressSpaceWriteLocker locker;
1735 	status_t status = locker.SetTo(vm_kernel_address_space_id());
1736 	if (status != B_OK)
1737 		return status;
1738 
1739 	vm_address_space* addressSpace = locker.AddressSpace();
1740 
1741 	// create an anonymous cache
1742 	vm_cache* cache;
1743 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false);
1744 	if (status != B_OK)
1745 		return status;
1746 
1747 	cache->temporary = 1;
1748 	cache->virtual_end = size;
1749 	cache->scan_skip = 1;
1750 	cache->Lock();
1751 
1752 	vm_area* area;
1753 	void* areaAddress = address;
1754 	status = map_backing_store(addressSpace, cache, &areaAddress, 0, size,
1755 		B_EXACT_ADDRESS, B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, &area, name,
1756 		false, true);
1757 	if (status != B_OK) {
1758 		cache->ReleaseRefAndUnlock();
1759 		return status;
1760 	}
1761 
1762 	cache->Unlock();
1763 	area->cache_type = CACHE_TYPE_RAM;
1764 	return area->id;
1765 }
1766 
1767 
1768 status_t
1769 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1770 {
1771 	AddressSpaceWriteLocker locker(team);
1772 	if (!locker.IsLocked())
1773 		return B_BAD_TEAM_ID;
1774 
1775 	// check to see if this address space has entered DELETE state
1776 	if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) {
1777 		// okay, someone is trying to delete this address space now, so we can't
1778 		// insert the area, so back out
1779 		return B_BAD_TEAM_ID;
1780 	}
1781 
1782 	// search area list and remove any matching reserved ranges
1783 
1784 	vm_area* area = locker.AddressSpace()->areas;
1785 	vm_area* last = NULL;
1786 	while (area) {
1787 		// the area must be completely part of the reserved range
1788 		if (area->id == RESERVED_AREA_ID && area->base >= (addr_t)address
1789 			&& area->base + area->size <= (addr_t)address + size) {
1790 			// remove reserved range
1791 			vm_area* reserved = area;
1792 			if (last)
1793 				last->address_space_next = reserved->address_space_next;
1794 			else
1795 				locker.AddressSpace()->areas = reserved->address_space_next;
1796 
1797 			area = reserved->address_space_next;
1798 			vm_put_address_space(locker.AddressSpace());
1799 			free(reserved);
1800 			continue;
1801 		}
1802 
1803 		last = area;
1804 		area = area->address_space_next;
1805 	}
1806 
1807 	return B_OK;
1808 }
1809 
1810 
1811 status_t
1812 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1813 	addr_t size, uint32 flags)
1814 {
1815 	if (size == 0)
1816 		return B_BAD_VALUE;
1817 
1818 	AddressSpaceWriteLocker locker(team);
1819 	if (!locker.IsLocked())
1820 		return B_BAD_TEAM_ID;
1821 
1822 	// check to see if this address space has entered DELETE state
1823 	if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) {
1824 		// okay, someone is trying to delete this address space now, so we
1825 		// can't insert the area, let's back out
1826 		return B_BAD_TEAM_ID;
1827 	}
1828 
1829 	vm_area* area = create_reserved_area_struct(locker.AddressSpace(), flags);
1830 	if (area == NULL)
1831 		return B_NO_MEMORY;
1832 
1833 	status_t status = insert_area(locker.AddressSpace(), _address, addressSpec,
1834 		size, area);
1835 	if (status != B_OK) {
1836 		free(area);
1837 		return status;
1838 	}
1839 
1840 	// the area is now reserved!
1841 
1842 	area->cache_offset = area->base;
1843 		// we cache the original base address here
1844 
1845 	atomic_add(&locker.AddressSpace()->ref_count, 1);
1846 	return B_OK;
1847 }
1848 
1849 
1850 area_id
1851 vm_create_anonymous_area(team_id team, const char* name, void** address,
1852 	uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection,
1853 	addr_t physicalAddress, uint32 flags, bool kernel)
1854 {
1855 	vm_area* area;
1856 	vm_cache* cache;
1857 	vm_page* page = NULL;
1858 	bool isStack = (protection & B_STACK_AREA) != 0;
1859 	page_num_t guardPages;
1860 	bool canOvercommit = false;
1861 	uint32 newPageState = (flags & CREATE_AREA_DONT_CLEAR) != 0
1862 		? PAGE_STATE_FREE : PAGE_STATE_CLEAR;
1863 
1864 	TRACE(("create_anonymous_area [%d] %s: size 0x%lx\n", team, name, size));
1865 
1866 	size = PAGE_ALIGN(size);
1867 
1868 	if (size == 0)
1869 		return B_BAD_VALUE;
1870 	if (!arch_vm_supports_protection(protection))
1871 		return B_NOT_SUPPORTED;
1872 
1873 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1874 		canOvercommit = true;
1875 
1876 #ifdef DEBUG_KERNEL_STACKS
1877 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1878 		isStack = true;
1879 #endif
1880 
1881 	// check parameters
1882 	switch (addressSpec) {
1883 		case B_ANY_ADDRESS:
1884 		case B_EXACT_ADDRESS:
1885 		case B_BASE_ADDRESS:
1886 		case B_ANY_KERNEL_ADDRESS:
1887 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1888 			break;
1889 		case B_PHYSICAL_BASE_ADDRESS:
1890 			physicalAddress = (addr_t)*address;
1891 			addressSpec = B_ANY_KERNEL_ADDRESS;
1892 			break;
1893 
1894 		default:
1895 			return B_BAD_VALUE;
1896 	}
1897 
1898 	if (physicalAddress != 0)
1899 		wiring = B_CONTIGUOUS;
1900 
1901 	bool doReserveMemory = false;
1902 	switch (wiring) {
1903 		case B_NO_LOCK:
1904 			break;
1905 		case B_FULL_LOCK:
1906 		case B_LAZY_LOCK:
1907 		case B_CONTIGUOUS:
1908 			doReserveMemory = true;
1909 			break;
1910 		case B_ALREADY_WIRED:
1911 			break;
1912 		case B_LOMEM:
1913 		//case B_SLOWMEM:
1914 			dprintf("B_LOMEM/SLOWMEM is not yet supported!\n");
1915 			wiring = B_FULL_LOCK;
1916 			doReserveMemory = true;
1917 			break;
1918 		default:
1919 			return B_BAD_VALUE;
1920 	}
1921 
1922 	// For full lock or contiguous areas we're also going to map the pages and
1923 	// thus need to reserve pages for the mapping backend upfront.
1924 	addr_t reservedMapPages = 0;
1925 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1926 		AddressSpaceWriteLocker locker;
1927 		status_t status = locker.SetTo(team);
1928 		if (status != B_OK)
1929 			return status;
1930 
1931 		vm_translation_map* map = &locker.AddressSpace()->translation_map;
1932 		reservedMapPages = map->ops->map_max_pages_need(map, 0, size - 1);
1933 	}
1934 
1935 	// Reserve memory before acquiring the address space lock. This reduces the
1936 	// chances of failure, since while holding the write lock to the address
1937 	// space (if it is the kernel address space that is), the low memory handler
1938 	// won't be able to free anything for us.
1939 	addr_t reservedMemory = 0;
1940 	if (doReserveMemory) {
1941 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1942 		if (vm_try_reserve_memory(size, timeout) != B_OK)
1943 			return B_NO_MEMORY;
1944 		reservedMemory = size;
1945 		// TODO: We don't reserve the memory for the pages for the page
1946 		// directories/tables. We actually need to do since we currently don't
1947 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1948 		// there are actually less physical pages than there should be, which
1949 		// can get the VM into trouble in low memory situations.
1950 	}
1951 
1952 	AddressSpaceWriteLocker locker;
1953 	vm_address_space* addressSpace;
1954 	status_t status;
1955 
1956 	// For full lock areas reserve the pages before locking the address
1957 	// space. E.g. block caches can't release their memory while we hold the
1958 	// address space lock.
1959 	page_num_t reservedPages = reservedMapPages;
1960 	if (wiring == B_FULL_LOCK)
1961 		reservedPages += size / B_PAGE_SIZE;
1962 	if (reservedPages > 0) {
1963 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1964 			if (!vm_page_try_reserve_pages(reservedPages)) {
1965 				reservedPages = 0;
1966 				status = B_WOULD_BLOCK;
1967 				goto err0;
1968 			}
1969 		} else
1970 			vm_page_reserve_pages(reservedPages);
1971 	}
1972 
1973 	status = locker.SetTo(team);
1974 	if (status != B_OK)
1975 		goto err0;
1976 
1977 	addressSpace = locker.AddressSpace();
1978 
1979 	if (wiring == B_CONTIGUOUS) {
1980 		// we try to allocate the page run here upfront as this may easily
1981 		// fail for obvious reasons
1982 		page = vm_page_allocate_page_run(newPageState, physicalAddress,
1983 			size / B_PAGE_SIZE);
1984 		if (page == NULL) {
1985 			status = B_NO_MEMORY;
1986 			goto err0;
1987 		}
1988 	}
1989 
1990 	// create an anonymous cache
1991 	// if it's a stack, make sure that two pages are available at least
1992 	guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0
1993 		? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0;
1994 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1995 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1996 		wiring == B_NO_LOCK);
1997 	if (status != B_OK)
1998 		goto err1;
1999 
2000 	cache->temporary = 1;
2001 	cache->virtual_end = size;
2002 	cache->committed_size = reservedMemory;
2003 		// TODO: This should be done via a method.
2004 	reservedMemory = 0;
2005 
2006 	switch (wiring) {
2007 		case B_LAZY_LOCK:
2008 		case B_FULL_LOCK:
2009 		case B_CONTIGUOUS:
2010 		case B_ALREADY_WIRED:
2011 			cache->scan_skip = 1;
2012 			break;
2013 		case B_NO_LOCK:
2014 			cache->scan_skip = 0;
2015 			break;
2016 	}
2017 
2018 	cache->Lock();
2019 
2020 	status = map_backing_store(addressSpace, cache, address, 0, size,
2021 		addressSpec, wiring, protection, REGION_NO_PRIVATE_MAP, &area, name,
2022 		(flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0, kernel);
2023 
2024 	if (status != B_OK) {
2025 		cache->ReleaseRefAndUnlock();
2026 		goto err1;
2027 	}
2028 
2029 	locker.DegradeToReadLock();
2030 
2031 	switch (wiring) {
2032 		case B_NO_LOCK:
2033 		case B_LAZY_LOCK:
2034 			// do nothing - the pages are mapped in as needed
2035 			break;
2036 
2037 		case B_FULL_LOCK:
2038 		{
2039 			// Allocate and map all pages for this area
2040 
2041 			off_t offset = 0;
2042 			for (addr_t address = area->base;
2043 					address < area->base + (area->size - 1);
2044 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
2045 #ifdef DEBUG_KERNEL_STACKS
2046 #	ifdef STACK_GROWS_DOWNWARDS
2047 				if (isStack && address < area->base + KERNEL_STACK_GUARD_PAGES
2048 						* B_PAGE_SIZE)
2049 #	else
2050 				if (isStack && address >= area->base + area->size
2051 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
2052 #	endif
2053 					continue;
2054 #endif
2055 				vm_page* page = vm_page_allocate_page(newPageState, true);
2056 				cache->InsertPage(page, offset);
2057 				vm_map_page(area, page, address, protection);
2058 
2059 				// Periodically unreserve pages we've already allocated, so that
2060 				// we don't unnecessarily increase the pressure on the VM.
2061 				if (offset > 0 && offset % (128 * B_PAGE_SIZE) == 0) {
2062 					page_num_t toUnreserve = 128;
2063 					vm_page_unreserve_pages(toUnreserve);
2064 					reservedPages -= toUnreserve;
2065 				}
2066 			}
2067 
2068 			break;
2069 		}
2070 
2071 		case B_ALREADY_WIRED:
2072 		{
2073 			// The pages should already be mapped. This is only really useful
2074 			// during boot time. Find the appropriate vm_page objects and stick
2075 			// them in the cache object.
2076 			vm_translation_map* map = &addressSpace->translation_map;
2077 			off_t offset = 0;
2078 
2079 			if (!gKernelStartup)
2080 				panic("ALREADY_WIRED flag used outside kernel startup\n");
2081 
2082 			map->ops->lock(map);
2083 
2084 			for (addr_t virtualAddress = area->base; virtualAddress < area->base
2085 					+ (area->size - 1); virtualAddress += B_PAGE_SIZE,
2086 					offset += B_PAGE_SIZE) {
2087 				addr_t physicalAddress;
2088 				uint32 flags;
2089 				status = map->ops->query(map, virtualAddress,
2090 					&physicalAddress, &flags);
2091 				if (status < B_OK) {
2092 					panic("looking up mapping failed for va 0x%lx\n",
2093 						virtualAddress);
2094 				}
2095 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
2096 				if (page == NULL) {
2097 					panic("looking up page failed for pa 0x%lx\n",
2098 						physicalAddress);
2099 				}
2100 
2101 				increment_page_wired_count(page);
2102 				vm_page_set_state(page, PAGE_STATE_WIRED);
2103 				cache->InsertPage(page, offset);
2104 			}
2105 
2106 			map->ops->unlock(map);
2107 			break;
2108 		}
2109 
2110 		case B_CONTIGUOUS:
2111 		{
2112 			// We have already allocated our continuous pages run, so we can now
2113 			// just map them in the address space
2114 			vm_translation_map* map = &addressSpace->translation_map;
2115 			addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE;
2116 			addr_t virtualAddress = area->base;
2117 			off_t offset = 0;
2118 
2119 			map->ops->lock(map);
2120 
2121 			for (virtualAddress = area->base; virtualAddress < area->base
2122 					+ (area->size - 1); virtualAddress += B_PAGE_SIZE,
2123 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
2124 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
2125 				if (page == NULL)
2126 					panic("couldn't lookup physical page just allocated\n");
2127 
2128 				status = map->ops->map(map, virtualAddress, physicalAddress,
2129 					protection);
2130 				if (status < B_OK)
2131 					panic("couldn't map physical page in page run\n");
2132 
2133 				increment_page_wired_count(page);
2134 				vm_page_set_state(page, PAGE_STATE_WIRED);
2135 				cache->InsertPage(page, offset);
2136 			}
2137 
2138 			map->ops->unlock(map);
2139 			break;
2140 		}
2141 
2142 		default:
2143 			break;
2144 	}
2145 
2146 	cache->Unlock();
2147 
2148 	if (reservedPages > 0)
2149 		vm_page_unreserve_pages(reservedPages);
2150 
2151 	TRACE(("vm_create_anonymous_area: done\n"));
2152 
2153 	area->cache_type = CACHE_TYPE_RAM;
2154 	return area->id;
2155 
2156 err1:
2157 	if (wiring == B_CONTIGUOUS) {
2158 		// we had reserved the area space upfront...
2159 		addr_t pageNumber = page->physical_page_number;
2160 		int32 i;
2161 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
2162 			page = vm_lookup_page(pageNumber);
2163 			if (page == NULL)
2164 				panic("couldn't lookup physical page just allocated\n");
2165 
2166 			vm_page_set_state(page, PAGE_STATE_FREE);
2167 		}
2168 	}
2169 
2170 err0:
2171 	if (reservedPages > 0)
2172 		vm_page_unreserve_pages(reservedPages);
2173 	if (reservedMemory > 0)
2174 		vm_unreserve_memory(reservedMemory);
2175 
2176 	return status;
2177 }
2178 
2179 
2180 area_id
2181 vm_map_physical_memory(team_id team, const char* name, void** _address,
2182 	uint32 addressSpec, addr_t size, uint32 protection, addr_t physicalAddress)
2183 {
2184 	vm_area* area;
2185 	vm_cache* cache;
2186 	addr_t mapOffset;
2187 
2188 	TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, "
2189 		"spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team,
2190 		name, _address, addressSpec, size, protection, physicalAddress));
2191 
2192 	if (!arch_vm_supports_protection(protection))
2193 		return B_NOT_SUPPORTED;
2194 
2195 	AddressSpaceWriteLocker locker(team);
2196 	if (!locker.IsLocked())
2197 		return B_BAD_TEAM_ID;
2198 
2199 	// if the physical address is somewhat inside a page,
2200 	// move the actual area down to align on a page boundary
2201 	mapOffset = physicalAddress % B_PAGE_SIZE;
2202 	size += mapOffset;
2203 	physicalAddress -= mapOffset;
2204 
2205 	size = PAGE_ALIGN(size);
2206 
2207 	// create a device cache
2208 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
2209 	if (status != B_OK)
2210 		return status;
2211 
2212 	// tell the page scanner to skip over this area, it's pages are special
2213 	cache->scan_skip = 1;
2214 	cache->virtual_end = size;
2215 
2216 	cache->Lock();
2217 
2218 	status = map_backing_store(locker.AddressSpace(), cache, _address,
2219 		0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection,
2220 		REGION_NO_PRIVATE_MAP, &area, name, false, true);
2221 
2222 	if (status < B_OK)
2223 		cache->ReleaseRefLocked();
2224 
2225 	cache->Unlock();
2226 
2227 	if (status >= B_OK && (addressSpec & B_MTR_MASK) != 0) {
2228 		// set requested memory type
2229 		status = arch_vm_set_memory_type(area, physicalAddress,
2230 			addressSpec & B_MTR_MASK);
2231 		if (status < B_OK)
2232 			delete_area(locker.AddressSpace(), area);
2233 	}
2234 
2235 	if (status >= B_OK) {
2236 		// make sure our area is mapped in completely
2237 
2238 		vm_translation_map* map = &locker.AddressSpace()->translation_map;
2239 		size_t reservePages = map->ops->map_max_pages_need(map, area->base,
2240 			area->base + (size - 1));
2241 
2242 		vm_page_reserve_pages(reservePages);
2243 		map->ops->lock(map);
2244 
2245 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
2246 			map->ops->map(map, area->base + offset, physicalAddress + offset,
2247 				protection);
2248 		}
2249 
2250 		map->ops->unlock(map);
2251 		vm_page_unreserve_pages(reservePages);
2252 	}
2253 
2254 	if (status < B_OK)
2255 		return status;
2256 
2257 	// modify the pointer returned to be offset back into the new area
2258 	// the same way the physical address in was offset
2259 	*_address = (void*)((addr_t)*_address + mapOffset);
2260 
2261 	area->cache_type = CACHE_TYPE_DEVICE;
2262 	return area->id;
2263 }
2264 
2265 
2266 area_id
2267 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
2268 	uint32 addressSpec, addr_t* _size, uint32 protection, struct iovec* vecs,
2269 	uint32 vecCount)
2270 {
2271 	TRACE(("vm_map_physical_memory_vecs(team = %ld, \"%s\", virtual = %p, "
2272 		"spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team,
2273 		name, _address, addressSpec, size, protection, physicalAddress));
2274 
2275 	if (!arch_vm_supports_protection(protection)
2276 		|| (addressSpec & B_MTR_MASK) != 0) {
2277 		return B_NOT_SUPPORTED;
2278 	}
2279 
2280 	AddressSpaceWriteLocker locker(team);
2281 	if (!locker.IsLocked())
2282 		return B_BAD_TEAM_ID;
2283 
2284 	if (vecCount == 0)
2285 		return B_BAD_VALUE;
2286 
2287 	addr_t size = 0;
2288 	for (uint32 i = 0; i < vecCount; i++) {
2289 		if ((addr_t)vecs[i].iov_base % B_PAGE_SIZE != 0
2290 			|| vecs[i].iov_len % B_PAGE_SIZE != 0) {
2291 			return B_BAD_VALUE;
2292 		}
2293 
2294 		size += vecs[i].iov_len;
2295 	}
2296 
2297 	// create a device cache
2298 	vm_cache* cache;
2299 	status_t result = VMCacheFactory::CreateDeviceCache(cache,
2300 		(addr_t)vecs[0].iov_base);
2301 	if (result != B_OK)
2302 		return result;
2303 
2304 	// tell the page scanner to skip over this area, it's pages are special
2305 	cache->scan_skip = 1;
2306 	cache->virtual_end = size;
2307 
2308 	cache->Lock();
2309 
2310 	vm_area* area;
2311 	result = map_backing_store(locker.AddressSpace(), cache, _address,
2312 		0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection,
2313 		REGION_NO_PRIVATE_MAP, &area, name, false, true);
2314 
2315 	if (result != B_OK)
2316 		cache->ReleaseRefLocked();
2317 
2318 	cache->Unlock();
2319 
2320 	if (result != B_OK)
2321 		return result;
2322 
2323 	vm_translation_map* map = &locker.AddressSpace()->translation_map;
2324 	size_t reservePages = map->ops->map_max_pages_need(map, area->base,
2325 		area->base + (size - 1));
2326 
2327 	vm_page_reserve_pages(reservePages);
2328 	map->ops->lock(map);
2329 
2330 	uint32 vecIndex = 0;
2331 	size_t vecOffset = 0;
2332 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
2333 		while (vecOffset >= vecs[vecIndex].iov_len && vecIndex < vecCount) {
2334 			vecOffset = 0;
2335 			vecIndex++;
2336 		}
2337 
2338 		if (vecIndex >= vecCount)
2339 			break;
2340 
2341 		map->ops->map(map, area->base + offset,
2342 			(addr_t)vecs[vecIndex].iov_base + vecOffset, protection);
2343 
2344 		vecOffset += B_PAGE_SIZE;
2345 	}
2346 
2347 	map->ops->unlock(map);
2348 	vm_page_unreserve_pages(reservePages);
2349 
2350 	if (_size != NULL)
2351 		*_size = size;
2352 
2353 	area->cache_type = CACHE_TYPE_DEVICE;
2354 	return area->id;
2355 }
2356 
2357 
2358 area_id
2359 vm_create_null_area(team_id team, const char* name, void** address,
2360 	uint32 addressSpec, addr_t size)
2361 {
2362 	vm_area* area;
2363 	vm_cache* cache;
2364 	status_t status;
2365 
2366 	AddressSpaceWriteLocker locker(team);
2367 	if (!locker.IsLocked())
2368 		return B_BAD_TEAM_ID;
2369 
2370 	size = PAGE_ALIGN(size);
2371 
2372 	// create an null cache
2373 	status = VMCacheFactory::CreateNullCache(cache);
2374 	if (status != B_OK)
2375 		return status;
2376 
2377 	// tell the page scanner to skip over this area, no pages will be mapped here
2378 	cache->scan_skip = 1;
2379 	cache->virtual_end = size;
2380 
2381 	cache->Lock();
2382 
2383 	status = map_backing_store(locker.AddressSpace(), cache, address, 0, size,
2384 		addressSpec, 0, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, &area, name,
2385 		false, true);
2386 
2387 	if (status < B_OK) {
2388 		cache->ReleaseRefAndUnlock();
2389 		return status;
2390 	}
2391 
2392 	cache->Unlock();
2393 
2394 	area->cache_type = CACHE_TYPE_NULL;
2395 	return area->id;
2396 }
2397 
2398 
2399 /*!	Creates the vnode cache for the specified \a vnode.
2400 	The vnode has to be marked busy when calling this function.
2401 */
2402 status_t
2403 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
2404 {
2405 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
2406 }
2407 
2408 
2409 /*!	\a cache must be locked. The area's address space must be read-locked.
2410 */
2411 static void
2412 pre_map_area_pages(vm_area* area, VMCache* cache)
2413 {
2414 	addr_t baseAddress = area->base;
2415 	addr_t cacheOffset = area->cache_offset;
2416 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
2417 	page_num_t endPage = firstPage + area->size / B_PAGE_SIZE;
2418 
2419 	for (VMCachePagesTree::Iterator it
2420 				= cache->pages.GetIterator(firstPage, true, true);
2421 			vm_page* page = it.Next();) {
2422 		if (page->cache_offset >= endPage)
2423 			break;
2424 
2425 		// skip inactive pages
2426 		if (page->state == PAGE_STATE_BUSY || page->usage_count <= 0)
2427 			continue;
2428 
2429 		vm_map_page(area, page,
2430 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
2431 			B_READ_AREA | B_KERNEL_READ_AREA);
2432 	}
2433 }
2434 
2435 
2436 /*!	Will map the file specified by \a fd to an area in memory.
2437 	The file will be mirrored beginning at the specified \a offset. The
2438 	\a offset and \a size arguments have to be page aligned.
2439 */
2440 static area_id
2441 _vm_map_file(team_id team, const char* name, void** _address,
2442 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2443 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2444 {
2445 	// TODO: for binary files, we want to make sure that they get the
2446 	//	copy of a file at a given time, ie. later changes should not
2447 	//	make it into the mapped copy -- this will need quite some changes
2448 	//	to be done in a nice way
2449 	TRACE(("_vm_map_file(fd = %d, offset = %Ld, size = %lu, mapping %ld)\n",
2450 		fd, offset, size, mapping));
2451 
2452 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2453 	size = PAGE_ALIGN(size);
2454 
2455 	if (mapping == REGION_NO_PRIVATE_MAP)
2456 		protection |= B_SHARED_AREA;
2457 	if (addressSpec != B_EXACT_ADDRESS)
2458 		unmapAddressRange = false;
2459 
2460 	if (fd < 0) {
2461 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
2462 		return vm_create_anonymous_area(team, name, _address, addressSpec, size,
2463 			B_NO_LOCK, protection, 0, flags, kernel);
2464 	}
2465 
2466 	// get the open flags of the FD
2467 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2468 	if (descriptor == NULL)
2469 		return EBADF;
2470 	int32 openMode = descriptor->open_mode;
2471 	put_fd(descriptor);
2472 
2473 	// The FD must open for reading at any rate. For shared mapping with write
2474 	// access, additionally the FD must be open for writing.
2475 	if ((openMode & O_ACCMODE) == O_WRONLY
2476 		|| (mapping == REGION_NO_PRIVATE_MAP
2477 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2478 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2479 		return EACCES;
2480 	}
2481 
2482 	// get the vnode for the object, this also grabs a ref to it
2483 	struct vnode* vnode = NULL;
2484 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2485 	if (status < B_OK)
2486 		return status;
2487 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
2488 
2489 	// If we're going to pre-map pages, we need to reserve the pages needed by
2490 	// the mapping backend upfront.
2491 	page_num_t reservedPreMapPages = 0;
2492 	if ((protection & B_READ_AREA) != 0) {
2493 		AddressSpaceWriteLocker locker;
2494 		status = locker.SetTo(team);
2495 		if (status != B_OK)
2496 			return status;
2497 
2498 		vm_translation_map* map = &locker.AddressSpace()->translation_map;
2499 		reservedPreMapPages = map->ops->map_max_pages_need(map, 0, size - 1);
2500 
2501 		locker.Unlock();
2502 
2503 		vm_page_reserve_pages(reservedPreMapPages);
2504 	}
2505 
2506 	struct PageUnreserver {
2507 		PageUnreserver(page_num_t count)
2508 			: fCount(count)
2509 		{
2510 		}
2511 
2512 		~PageUnreserver()
2513 		{
2514 			if (fCount > 0)
2515 				vm_page_unreserve_pages(fCount);
2516 		}
2517 
2518 		page_num_t	fCount;
2519 	} pageUnreserver(reservedPreMapPages);
2520 
2521 	AddressSpaceWriteLocker locker(team);
2522 	if (!locker.IsLocked())
2523 		return B_BAD_TEAM_ID;
2524 
2525 	// TODO: this only works for file systems that use the file cache
2526 	vm_cache* cache;
2527 	status = vfs_get_vnode_cache(vnode, &cache, false);
2528 	if (status < B_OK)
2529 		return status;
2530 
2531 	cache->Lock();
2532 
2533 	vm_area* area;
2534 	status = map_backing_store(locker.AddressSpace(), cache, _address,
2535 		offset, size, addressSpec, 0, protection, mapping, &area, name,
2536 		unmapAddressRange, kernel);
2537 
2538 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2539 		// map_backing_store() cannot know we no longer need the ref
2540 		cache->ReleaseRefLocked();
2541 	}
2542 
2543 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2544 		pre_map_area_pages(area, cache);
2545 
2546 	cache->Unlock();
2547 
2548 	if (status == B_OK) {
2549 		// TODO: this probably deserves a smarter solution, ie. don't always
2550 		// prefetch stuff, and also, probably don't trigger it at this place.
2551 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2552 			// prefetches at max 10 MB starting from "offset"
2553 	}
2554 
2555 	if (status != B_OK)
2556 		return status;
2557 
2558 	area->cache_type = CACHE_TYPE_VNODE;
2559 	return area->id;
2560 }
2561 
2562 
2563 area_id
2564 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2565 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2566 	int fd, off_t offset)
2567 {
2568 	if (!arch_vm_supports_protection(protection))
2569 		return B_NOT_SUPPORTED;
2570 
2571 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2572 		mapping, unmapAddressRange, fd, offset, true);
2573 }
2574 
2575 
2576 vm_cache*
2577 vm_area_get_locked_cache(vm_area* area)
2578 {
2579 	mutex_lock(&sAreaCacheLock);
2580 
2581 	while (true) {
2582 		vm_cache* cache = area->cache;
2583 
2584 		if (!cache->SwitchLock(&sAreaCacheLock)) {
2585 			// cache has been deleted
2586 			mutex_lock(&sAreaCacheLock);
2587 			continue;
2588 		}
2589 
2590 		mutex_lock(&sAreaCacheLock);
2591 
2592 		if (cache == area->cache) {
2593 			cache->AcquireRefLocked();
2594 			mutex_unlock(&sAreaCacheLock);
2595 			return cache;
2596 		}
2597 
2598 		// the cache changed in the meantime
2599 		cache->Unlock();
2600 	}
2601 }
2602 
2603 
2604 void
2605 vm_area_put_locked_cache(vm_cache* cache)
2606 {
2607 	cache->ReleaseRefAndUnlock();
2608 }
2609 
2610 
2611 area_id
2612 vm_clone_area(team_id team, const char* name, void** address,
2613 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2614 	bool kernel)
2615 {
2616 	vm_area* newArea = NULL;
2617 	vm_area* sourceArea;
2618 
2619 	// Check whether the source area exists and is cloneable. If so, mark it
2620 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2621 	{
2622 		AddressSpaceWriteLocker locker;
2623 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2624 		if (status != B_OK)
2625 			return status;
2626 
2627 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2628 			return B_NOT_ALLOWED;
2629 
2630 		sourceArea->protection |= B_SHARED_AREA;
2631 		protection |= B_SHARED_AREA;
2632 	}
2633 
2634 	// Now lock both address spaces and actually do the cloning.
2635 
2636 	MultiAddressSpaceLocker locker;
2637 	vm_address_space* sourceAddressSpace;
2638 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2639 	if (status != B_OK)
2640 		return status;
2641 
2642 	vm_address_space* targetAddressSpace;
2643 	status = locker.AddTeam(team, true, &targetAddressSpace);
2644 	if (status != B_OK)
2645 		return status;
2646 
2647 	status = locker.Lock();
2648 	if (status != B_OK)
2649 		return status;
2650 
2651 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2652 	if (sourceArea == NULL)
2653 		return B_BAD_VALUE;
2654 
2655 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2656 		return B_NOT_ALLOWED;
2657 
2658 	vm_cache* cache = vm_area_get_locked_cache(sourceArea);
2659 
2660 	// TODO: for now, B_USER_CLONEABLE is disabled, until all drivers
2661 	//	have been adapted. Maybe it should be part of the kernel settings,
2662 	//	anyway (so that old drivers can always work).
2663 #if 0
2664 	if (sourceArea->aspace == vm_kernel_address_space()
2665 		&& addressSpace != vm_kernel_address_space()
2666 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
2667 		// kernel areas must not be cloned in userland, unless explicitly
2668 		// declared user-cloneable upon construction
2669 		status = B_NOT_ALLOWED;
2670 	} else
2671 #endif
2672 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
2673 		status = B_NOT_ALLOWED;
2674 	else {
2675 		status = map_backing_store(targetAddressSpace, cache, address,
2676 			sourceArea->cache_offset, sourceArea->size, addressSpec,
2677 			sourceArea->wiring, protection, mapping, &newArea, name, false,
2678 			kernel);
2679 	}
2680 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2681 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2682 		// to create a new cache, and has therefore already acquired a reference
2683 		// to the source cache - but otherwise it has no idea that we need
2684 		// one.
2685 		cache->AcquireRefLocked();
2686 	}
2687 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2688 		// we need to map in everything at this point
2689 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2690 			// we don't have actual pages to map but a physical area
2691 			vm_translation_map* map
2692 				= &sourceArea->address_space->translation_map;
2693 			map->ops->lock(map);
2694 
2695 			addr_t physicalAddress;
2696 			uint32 oldProtection;
2697 			map->ops->query(map, sourceArea->base, &physicalAddress,
2698 				&oldProtection);
2699 
2700 			map->ops->unlock(map);
2701 
2702 			map = &targetAddressSpace->translation_map;
2703 			size_t reservePages = map->ops->map_max_pages_need(map,
2704 				newArea->base, newArea->base + (newArea->size - 1));
2705 
2706 			vm_page_reserve_pages(reservePages);
2707 			map->ops->lock(map);
2708 
2709 			for (addr_t offset = 0; offset < newArea->size;
2710 					offset += B_PAGE_SIZE) {
2711 				map->ops->map(map, newArea->base + offset,
2712 					physicalAddress + offset, protection);
2713 			}
2714 
2715 			map->ops->unlock(map);
2716 			vm_page_unreserve_pages(reservePages);
2717 		} else {
2718 			vm_translation_map* map = &targetAddressSpace->translation_map;
2719 			size_t reservePages = map->ops->map_max_pages_need(map,
2720 				newArea->base, newArea->base + (newArea->size - 1));
2721 			vm_page_reserve_pages(reservePages);
2722 
2723 			// map in all pages from source
2724 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2725 					vm_page* page  = it.Next();) {
2726 				vm_map_page(newArea, page, newArea->base
2727 					+ ((page->cache_offset << PAGE_SHIFT)
2728 					- newArea->cache_offset), protection);
2729 			}
2730 
2731 			vm_page_unreserve_pages(reservePages);
2732 		}
2733 	}
2734 	if (status == B_OK)
2735 		newArea->cache_type = sourceArea->cache_type;
2736 
2737 	vm_area_put_locked_cache(cache);
2738 
2739 	if (status < B_OK)
2740 		return status;
2741 
2742 	return newArea->id;
2743 }
2744 
2745 
2746 //! The address space must be write locked at this point
2747 static void
2748 remove_area_from_address_space(vm_address_space* addressSpace, vm_area* area)
2749 {
2750 	vm_area* temp = addressSpace->areas;
2751 	vm_area* last = NULL;
2752 
2753 	while (temp != NULL) {
2754 		if (area == temp) {
2755 			if (last != NULL) {
2756 				last->address_space_next = temp->address_space_next;
2757 			} else {
2758 				addressSpace->areas = temp->address_space_next;
2759 			}
2760 			addressSpace->change_count++;
2761 			break;
2762 		}
2763 		last = temp;
2764 		temp = temp->address_space_next;
2765 	}
2766 	if (area == addressSpace->area_hint)
2767 		addressSpace->area_hint = NULL;
2768 
2769 	if (addressSpace == vm_kernel_address_space())
2770 		sKernelAddressSpaceLeft -= area->size;
2771 
2772 	if (temp == NULL)
2773 		panic("vm_area_release_ref: area not found in aspace's area list\n");
2774 }
2775 
2776 
2777 static void
2778 delete_area(vm_address_space* addressSpace, vm_area* area)
2779 {
2780 	rw_lock_write_lock(&sAreaHashLock);
2781 	hash_remove(sAreaHash, area);
2782 	rw_lock_write_unlock(&sAreaHashLock);
2783 
2784 	// At this point the area is removed from the global hash table, but
2785 	// still exists in the area list.
2786 
2787 	// Unmap the virtual address space the area occupied
2788 	vm_unmap_pages(area, area->base, area->size, !area->cache->temporary);
2789 
2790 	if (!area->cache->temporary)
2791 		area->cache->WriteModified();
2792 
2793 	arch_vm_unset_memory_type(area);
2794 	remove_area_from_address_space(addressSpace, area);
2795 	vm_put_address_space(addressSpace);
2796 
2797 	area->cache->RemoveArea(area);
2798 	area->cache->ReleaseRef();
2799 
2800 	free(area->page_protections);
2801 	free(area->name);
2802 	free(area);
2803 }
2804 
2805 
2806 status_t
2807 vm_delete_area(team_id team, area_id id, bool kernel)
2808 {
2809 	TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id));
2810 
2811 	AddressSpaceWriteLocker locker;
2812 	vm_area* area;
2813 	status_t status = locker.SetFromArea(team, id, area);
2814 	if (status != B_OK)
2815 		return status;
2816 
2817 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2818 		return B_NOT_ALLOWED;
2819 
2820 	delete_area(locker.AddressSpace(), area);
2821 	return B_OK;
2822 }
2823 
2824 
2825 /*!	Creates a new cache on top of given cache, moves all areas from
2826 	the old cache to the new one, and changes the protection of all affected
2827 	areas' pages to read-only.
2828 	Preconditions:
2829 	- The given cache must be locked.
2830 	- All of the cache's areas' address spaces must be read locked.
2831 */
2832 static status_t
2833 vm_copy_on_write_area(vm_cache* lowerCache)
2834 {
2835 	vm_cache* upperCache;
2836 
2837 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2838 
2839 	// We need to separate the cache from its areas. The cache goes one level
2840 	// deeper and we create a new cache inbetween.
2841 
2842 	// create an anonymous cache
2843 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2844 		0, true);
2845 	if (status != B_OK)
2846 		return status;
2847 
2848 	upperCache->Lock();
2849 
2850 	upperCache->temporary = 1;
2851 	upperCache->scan_skip = lowerCache->scan_skip;
2852 	upperCache->virtual_base = lowerCache->virtual_base;
2853 	upperCache->virtual_end = lowerCache->virtual_end;
2854 
2855 	// transfer the lower cache areas to the upper cache
2856 	mutex_lock(&sAreaCacheLock);
2857 
2858 	upperCache->areas = lowerCache->areas;
2859 	lowerCache->areas = NULL;
2860 
2861 	for (vm_area* tempArea = upperCache->areas; tempArea != NULL;
2862 			tempArea = tempArea->cache_next) {
2863 		tempArea->cache = upperCache;
2864 		upperCache->AcquireRefLocked();
2865 		lowerCache->ReleaseRefLocked();
2866 	}
2867 
2868 	mutex_unlock(&sAreaCacheLock);
2869 
2870 	lowerCache->AddConsumer(upperCache);
2871 
2872 	// We now need to remap all pages from all of the cache's areas read-only, so
2873 	// that a copy will be created on next write access
2874 
2875 	for (vm_area* tempArea = upperCache->areas; tempArea != NULL;
2876 			tempArea = tempArea->cache_next) {
2877 		// The area must be readable in the same way it was previously writable
2878 		uint32 protection = B_KERNEL_READ_AREA;
2879 		if ((tempArea->protection & B_READ_AREA) != 0)
2880 			protection |= B_READ_AREA;
2881 
2882 		vm_translation_map* map = &tempArea->address_space->translation_map;
2883 		map->ops->lock(map);
2884 		map->ops->protect(map, tempArea->base,
2885 			tempArea->base - 1 + tempArea->size, protection);
2886 		map->ops->unlock(map);
2887 	}
2888 
2889 	vm_area_put_locked_cache(upperCache);
2890 
2891 	return B_OK;
2892 }
2893 
2894 
2895 area_id
2896 vm_copy_area(team_id team, const char* name, void** _address,
2897 	uint32 addressSpec, uint32 protection, area_id sourceID)
2898 {
2899 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2900 
2901 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2902 		// set the same protection for the kernel as for userland
2903 		protection |= B_KERNEL_READ_AREA;
2904 		if (writableCopy)
2905 			protection |= B_KERNEL_WRITE_AREA;
2906 	}
2907 
2908 	// Do the locking: target address space, all address spaces associated with
2909 	// the source cache, and the cache itself.
2910 	MultiAddressSpaceLocker locker;
2911 	vm_address_space* targetAddressSpace;
2912 	vm_cache* cache;
2913 	vm_area* source;
2914 	status_t status = locker.AddTeam(team, true, &targetAddressSpace);
2915 	if (status == B_OK) {
2916 		status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2917 			&cache);
2918 	}
2919 	if (status != B_OK)
2920 		return status;
2921 
2922 	AreaCacheLocker cacheLocker(cache);	// already locked
2923 
2924 	if (addressSpec == B_CLONE_ADDRESS) {
2925 		addressSpec = B_EXACT_ADDRESS;
2926 		*_address = (void*)source->base;
2927 	}
2928 
2929 	bool sharedArea = (source->protection & B_SHARED_AREA) != 0;
2930 
2931 	// First, create a cache on top of the source area, respectively use the
2932 	// existing one, if this is a shared area.
2933 
2934 	vm_area* target;
2935 	status = map_backing_store(targetAddressSpace, cache, _address,
2936 		source->cache_offset, source->size, addressSpec, source->wiring,
2937 		protection, sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2938 		&target, name, false, true);
2939 	if (status < B_OK)
2940 		return status;
2941 
2942 	if (sharedArea) {
2943 		// The new area uses the old area's cache, but map_backing_store()
2944 		// hasn't acquired a ref. So we have to do that now.
2945 		cache->AcquireRefLocked();
2946 	}
2947 
2948 	// If the source area is writable, we need to move it one layer up as well
2949 
2950 	if (!sharedArea) {
2951 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2952 			// TODO: do something more useful if this fails!
2953 			if (vm_copy_on_write_area(cache) < B_OK)
2954 				panic("vm_copy_on_write_area() failed!\n");
2955 		}
2956 	}
2957 
2958 	// we return the ID of the newly created area
2959 	return target->id;
2960 }
2961 
2962 
2963 //! You need to hold the cache lock when calling this function
2964 static int32
2965 count_writable_areas(vm_cache* cache, vm_area* ignoreArea)
2966 {
2967 	struct vm_area* area = cache->areas;
2968 	uint32 count = 0;
2969 
2970 	for (; area != NULL; area = area->cache_next) {
2971 		if (area != ignoreArea
2972 			&& (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0)
2973 			count++;
2974 	}
2975 
2976 	return count;
2977 }
2978 
2979 
2980 static status_t
2981 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2982 	bool kernel)
2983 {
2984 	TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = "
2985 		"%#lx)\n", team, areaID, newProtection));
2986 
2987 	if (!arch_vm_supports_protection(newProtection))
2988 		return B_NOT_SUPPORTED;
2989 
2990 	// lock address spaces and cache
2991 	MultiAddressSpaceLocker locker;
2992 	vm_cache* cache;
2993 	vm_area* area;
2994 	status_t status = locker.AddAreaCacheAndLock(areaID, true, false, area,
2995 		&cache);
2996 	AreaCacheLocker cacheLocker(cache);	// already locked
2997 
2998 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2999 		return B_NOT_ALLOWED;
3000 
3001 	if (area->protection == newProtection)
3002 		return B_OK;
3003 
3004 	if (team != vm_kernel_address_space_id()
3005 		&& area->address_space->id != team) {
3006 		// unless you're the kernel, you are only allowed to set
3007 		// the protection of your own areas
3008 		return B_NOT_ALLOWED;
3009 	}
3010 
3011 	bool changePageProtection = true;
3012 
3013 	if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
3014 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0) {
3015 		// writable -> !writable
3016 
3017 		if (cache->source != NULL && cache->temporary) {
3018 			if (count_writable_areas(cache, area) == 0) {
3019 				// Since this cache now lives from the pages in its source cache,
3020 				// we can change the cache's commitment to take only those pages
3021 				// into account that really are in this cache.
3022 
3023 				status = cache->Commit(cache->page_count * B_PAGE_SIZE);
3024 
3025 				// TODO: we may be able to join with our source cache, if
3026 				// count == 0
3027 			}
3028 		}
3029 	} else if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0
3030 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
3031 		// !writable -> writable
3032 
3033 		if (!list_is_empty(&cache->consumers)) {
3034 			// There are consumers -- we have to insert a new cache. Fortunately
3035 			// vm_copy_on_write_area() does everything that's needed.
3036 			changePageProtection = false;
3037 			status = vm_copy_on_write_area(cache);
3038 		} else {
3039 			// No consumers, so we don't need to insert a new one.
3040 			if (cache->source != NULL && cache->temporary) {
3041 				// the cache's commitment must contain all possible pages
3042 				status = cache->Commit(cache->virtual_end
3043 					- cache->virtual_base);
3044 			}
3045 
3046 			if (status == B_OK && cache->source != NULL) {
3047 				// There's a source cache, hence we can't just change all pages'
3048 				// protection or we might allow writing into pages belonging to
3049 				// a lower cache.
3050 				changePageProtection = false;
3051 
3052 				struct vm_translation_map* map
3053 					= &area->address_space->translation_map;
3054 				map->ops->lock(map);
3055 
3056 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
3057 						vm_page* page = it.Next();) {
3058 					addr_t address = area->base
3059 						+ (page->cache_offset << PAGE_SHIFT);
3060 					map->ops->protect(map, address, address - 1 + B_PAGE_SIZE,
3061 						newProtection);
3062 				}
3063 
3064 				map->ops->unlock(map);
3065 			}
3066 		}
3067 	} else {
3068 		// we don't have anything special to do in all other cases
3069 	}
3070 
3071 	if (status == B_OK) {
3072 		// remap existing pages in this cache
3073 		struct vm_translation_map* map = &area->address_space->translation_map;
3074 
3075 		if (changePageProtection) {
3076 			map->ops->lock(map);
3077 			map->ops->protect(map, area->base, area->base - 1 + area->size,
3078 				newProtection);
3079 			map->ops->unlock(map);
3080 		}
3081 
3082 		area->protection = newProtection;
3083 	}
3084 
3085 	return status;
3086 }
3087 
3088 
3089 status_t
3090 vm_get_page_mapping(team_id team, addr_t vaddr, addr_t* paddr)
3091 {
3092 	vm_address_space* addressSpace = vm_get_address_space(team);
3093 	if (addressSpace == NULL)
3094 		return B_BAD_TEAM_ID;
3095 
3096 	uint32 dummyFlags;
3097 	status_t status = addressSpace->translation_map.ops->query(
3098 		&addressSpace->translation_map, vaddr, paddr, &dummyFlags);
3099 
3100 	vm_put_address_space(addressSpace);
3101 	return status;
3102 }
3103 
3104 
3105 static inline addr_t
3106 virtual_page_address(vm_area* area, vm_page* page)
3107 {
3108 	return area->base
3109 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
3110 }
3111 
3112 
3113 bool
3114 vm_test_map_modification(vm_page* page)
3115 {
3116 	MutexLocker locker(sMappingLock);
3117 
3118 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3119 	vm_page_mapping* mapping;
3120 	while ((mapping = iterator.Next()) != NULL) {
3121 		vm_area* area = mapping->area;
3122 		vm_translation_map* map = &area->address_space->translation_map;
3123 
3124 		addr_t physicalAddress;
3125 		uint32 flags;
3126 		map->ops->lock(map);
3127 		map->ops->query(map, virtual_page_address(area, page),
3128 			&physicalAddress, &flags);
3129 		map->ops->unlock(map);
3130 
3131 		if ((flags & PAGE_MODIFIED) != 0)
3132 			return true;
3133 	}
3134 
3135 	return false;
3136 }
3137 
3138 
3139 int32
3140 vm_test_map_activation(vm_page* page, bool* _modified)
3141 {
3142 	int32 activation = 0;
3143 	bool modified = false;
3144 
3145 	MutexLocker locker(sMappingLock);
3146 
3147 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3148 	vm_page_mapping* mapping;
3149 	while ((mapping = iterator.Next()) != NULL) {
3150 		vm_area* area = mapping->area;
3151 		vm_translation_map* map = &area->address_space->translation_map;
3152 
3153 		addr_t physicalAddress;
3154 		uint32 flags;
3155 		map->ops->lock(map);
3156 		map->ops->query(map, virtual_page_address(area, page),
3157 			&physicalAddress, &flags);
3158 		map->ops->unlock(map);
3159 
3160 		if ((flags & PAGE_ACCESSED) != 0)
3161 			activation++;
3162 		if ((flags & PAGE_MODIFIED) != 0)
3163 			modified = true;
3164 	}
3165 
3166 	if (_modified != NULL)
3167 		*_modified = modified;
3168 
3169 	return activation;
3170 }
3171 
3172 
3173 void
3174 vm_clear_map_flags(vm_page* page, uint32 flags)
3175 {
3176 	MutexLocker locker(sMappingLock);
3177 
3178 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3179 	vm_page_mapping* mapping;
3180 	while ((mapping = iterator.Next()) != NULL) {
3181 		vm_area* area = mapping->area;
3182 		vm_translation_map* map = &area->address_space->translation_map;
3183 
3184 		map->ops->lock(map);
3185 		map->ops->clear_flags(map, virtual_page_address(area, page), flags);
3186 		map->ops->unlock(map);
3187 	}
3188 }
3189 
3190 
3191 /*!	Removes all mappings from a page.
3192 	After you've called this function, the page is unmapped from memory.
3193 	The accumulated page flags of all mappings can be found in \a _flags.
3194 */
3195 void
3196 vm_remove_all_page_mappings(vm_page* page, uint32* _flags)
3197 {
3198 	uint32 accumulatedFlags = 0;
3199 	MutexLocker locker(sMappingLock);
3200 
3201 	vm_page_mappings queue;
3202 	queue.MoveFrom(&page->mappings);
3203 
3204 	vm_page_mappings::Iterator iterator = queue.GetIterator();
3205 	vm_page_mapping* mapping;
3206 	while ((mapping = iterator.Next()) != NULL) {
3207 		vm_area* area = mapping->area;
3208 		vm_translation_map* map = &area->address_space->translation_map;
3209 		addr_t physicalAddress;
3210 		uint32 flags;
3211 
3212 		map->ops->lock(map);
3213 		addr_t address = virtual_page_address(area, page);
3214 		map->ops->unmap(map, address, address + (B_PAGE_SIZE - 1));
3215 		map->ops->flush(map);
3216 		map->ops->query(map, address, &physicalAddress, &flags);
3217 		map->ops->unlock(map);
3218 
3219 		area->mappings.Remove(mapping);
3220 
3221 		accumulatedFlags |= flags;
3222 	}
3223 
3224 	if (page->wired_count == 0 && !queue.IsEmpty())
3225 		atomic_add(&gMappedPagesCount, -1);
3226 
3227 	locker.Unlock();
3228 
3229 	// free now unused mappings
3230 
3231 	while ((mapping = queue.RemoveHead()) != NULL) {
3232 		free(mapping);
3233 	}
3234 
3235 	if (_flags != NULL)
3236 		*_flags = accumulatedFlags;
3237 }
3238 
3239 
3240 bool
3241 vm_unmap_page(vm_area* area, addr_t virtualAddress, bool preserveModified)
3242 {
3243 	vm_translation_map* map = &area->address_space->translation_map;
3244 
3245 	map->ops->lock(map);
3246 
3247 	addr_t physicalAddress;
3248 	uint32 flags;
3249 	status_t status = map->ops->query(map, virtualAddress, &physicalAddress,
3250 		&flags);
3251 	if (status < B_OK || (flags & PAGE_PRESENT) == 0) {
3252 		map->ops->unlock(map);
3253 		return false;
3254 	}
3255 	vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3256 	if (page == NULL && area->cache_type != CACHE_TYPE_DEVICE) {
3257 		panic("area %p looking up page failed for pa 0x%lx\n", area,
3258 			physicalAddress);
3259 	}
3260 
3261 	if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE)
3262 		decrement_page_wired_count(page);
3263 
3264 	map->ops->unmap(map, virtualAddress, virtualAddress + B_PAGE_SIZE - 1);
3265 
3266 	if (preserveModified) {
3267 		map->ops->flush(map);
3268 
3269 		status = map->ops->query(map, virtualAddress, &physicalAddress, &flags);
3270 		if ((flags & PAGE_MODIFIED) != 0 && page->state != PAGE_STATE_MODIFIED)
3271 			vm_page_set_state(page, PAGE_STATE_MODIFIED);
3272 	}
3273 
3274 	map->ops->unlock(map);
3275 
3276 	if (area->wiring == B_NO_LOCK) {
3277 		vm_page_mapping* mapping;
3278 
3279 		mutex_lock(&sMappingLock);
3280 		map->ops->lock(map);
3281 
3282 		vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3283 		while (iterator.HasNext()) {
3284 			mapping = iterator.Next();
3285 
3286 			if (mapping->area == area) {
3287 				area->mappings.Remove(mapping);
3288 				page->mappings.Remove(mapping);
3289 
3290 				if (page->mappings.IsEmpty() && page->wired_count == 0)
3291 					atomic_add(&gMappedPagesCount, -1);
3292 
3293 				map->ops->unlock(map);
3294 				mutex_unlock(&sMappingLock);
3295 
3296 				free(mapping);
3297 
3298 				return true;
3299 			}
3300 		}
3301 
3302 		map->ops->unlock(map);
3303 		mutex_unlock(&sMappingLock);
3304 
3305 		dprintf("vm_unmap_page: couldn't find mapping for area %p in page %p\n",
3306 			area, page);
3307 	}
3308 
3309 	return true;
3310 }
3311 
3312 
3313 status_t
3314 vm_unmap_pages(vm_area* area, addr_t base, size_t size, bool preserveModified)
3315 {
3316 	vm_translation_map* map = &area->address_space->translation_map;
3317 	addr_t end = base + (size - 1);
3318 
3319 	map->ops->lock(map);
3320 
3321 	if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE) {
3322 		// iterate through all pages and decrease their wired count
3323 		for (addr_t virtualAddress = base; virtualAddress < end;
3324 				virtualAddress += B_PAGE_SIZE) {
3325 			addr_t physicalAddress;
3326 			uint32 flags;
3327 			status_t status = map->ops->query(map, virtualAddress,
3328 				&physicalAddress, &flags);
3329 			if (status < B_OK || (flags & PAGE_PRESENT) == 0)
3330 				continue;
3331 
3332 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3333 			if (page == NULL) {
3334 				panic("area %p looking up page failed for pa 0x%lx\n", area,
3335 					physicalAddress);
3336 			}
3337 
3338 			decrement_page_wired_count(page);
3339 		}
3340 	}
3341 
3342 	map->ops->unmap(map, base, end);
3343 	if (preserveModified) {
3344 		map->ops->flush(map);
3345 
3346 		for (addr_t virtualAddress = base; virtualAddress < end;
3347 				virtualAddress += B_PAGE_SIZE) {
3348 			addr_t physicalAddress;
3349 			uint32 flags;
3350 			status_t status = map->ops->query(map, virtualAddress,
3351 				&physicalAddress, &flags);
3352 			if (status < B_OK || (flags & PAGE_PRESENT) == 0)
3353 				continue;
3354 
3355 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3356 			if (page == NULL) {
3357 				panic("area %p looking up page failed for pa 0x%lx\n", area,
3358 					physicalAddress);
3359 			}
3360 
3361 			if ((flags & PAGE_MODIFIED) != 0
3362 				&& page->state != PAGE_STATE_MODIFIED)
3363 				vm_page_set_state(page, PAGE_STATE_MODIFIED);
3364 		}
3365 	}
3366 	map->ops->unlock(map);
3367 
3368 	if (area->wiring == B_NO_LOCK) {
3369 		uint32 startOffset = (area->cache_offset + base - area->base)
3370 			>> PAGE_SHIFT;
3371 		uint32 endOffset = startOffset + (size >> PAGE_SHIFT);
3372 		vm_page_mapping* mapping;
3373 		vm_area_mappings queue;
3374 
3375 		mutex_lock(&sMappingLock);
3376 		map->ops->lock(map);
3377 
3378 		vm_area_mappings::Iterator iterator = area->mappings.GetIterator();
3379 		while (iterator.HasNext()) {
3380 			mapping = iterator.Next();
3381 
3382 			vm_page* page = mapping->page;
3383 			if (page->cache_offset < startOffset
3384 				|| page->cache_offset >= endOffset)
3385 				continue;
3386 
3387 			page->mappings.Remove(mapping);
3388 			iterator.Remove();
3389 
3390 			if (page->mappings.IsEmpty() && page->wired_count == 0)
3391 				atomic_add(&gMappedPagesCount, -1);
3392 
3393 			queue.Add(mapping);
3394 		}
3395 
3396 		map->ops->unlock(map);
3397 		mutex_unlock(&sMappingLock);
3398 
3399 		while ((mapping = queue.RemoveHead()) != NULL) {
3400 			free(mapping);
3401 		}
3402 	}
3403 
3404 	return B_OK;
3405 }
3406 
3407 
3408 /*!	When calling this function, you need to have pages reserved! */
3409 status_t
3410 vm_map_page(vm_area* area, vm_page* page, addr_t address, uint32 protection)
3411 {
3412 	vm_translation_map* map = &area->address_space->translation_map;
3413 	vm_page_mapping* mapping = NULL;
3414 
3415 	if (area->wiring == B_NO_LOCK) {
3416 		mapping = (vm_page_mapping*)malloc_nogrow(sizeof(vm_page_mapping));
3417 		if (mapping == NULL)
3418 			return B_NO_MEMORY;
3419 
3420 		mapping->page = page;
3421 		mapping->area = area;
3422 	}
3423 
3424 	map->ops->lock(map);
3425 	map->ops->map(map, address, page->physical_page_number * B_PAGE_SIZE,
3426 		protection);
3427 	map->ops->unlock(map);
3428 
3429 	if (area->wiring != B_NO_LOCK) {
3430 		increment_page_wired_count(page);
3431 	} else {
3432 		// insert mapping into lists
3433 		MutexLocker locker(sMappingLock);
3434 
3435 		if (page->mappings.IsEmpty() && page->wired_count == 0)
3436 			atomic_add(&gMappedPagesCount, 1);
3437 
3438 		page->mappings.Add(mapping);
3439 		area->mappings.Add(mapping);
3440 	}
3441 
3442 	if (page->usage_count < 0)
3443 		page->usage_count = 1;
3444 
3445 	if (page->state != PAGE_STATE_MODIFIED)
3446 		vm_page_set_state(page, PAGE_STATE_ACTIVE);
3447 
3448 	return B_OK;
3449 }
3450 
3451 
3452 static int
3453 display_mem(int argc, char** argv)
3454 {
3455 	bool physical = false;
3456 	addr_t copyAddress;
3457 	int32 displayWidth;
3458 	int32 itemSize;
3459 	int32 num = -1;
3460 	addr_t address;
3461 	int i = 1, j;
3462 
3463 	if (argc > 1 && argv[1][0] == '-') {
3464 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3465 			physical = true;
3466 			i++;
3467 		} else
3468 			i = 99;
3469 	}
3470 
3471 	if (argc < i + 1 || argc > i + 2) {
3472 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3473 			"\tdl - 8 bytes\n"
3474 			"\tdw - 4 bytes\n"
3475 			"\tds - 2 bytes\n"
3476 			"\tdb - 1 byte\n"
3477 			"\tstring - a whole string\n"
3478 			"  -p or --physical only allows memory from a single page to be "
3479 			"displayed.\n");
3480 		return 0;
3481 	}
3482 
3483 	address = parse_expression(argv[i]);
3484 
3485 	if (argc > i + 1)
3486 		num = parse_expression(argv[i + 1]);
3487 
3488 	// build the format string
3489 	if (strcmp(argv[0], "db") == 0) {
3490 		itemSize = 1;
3491 		displayWidth = 16;
3492 	} else if (strcmp(argv[0], "ds") == 0) {
3493 		itemSize = 2;
3494 		displayWidth = 8;
3495 	} else if (strcmp(argv[0], "dw") == 0) {
3496 		itemSize = 4;
3497 		displayWidth = 4;
3498 	} else if (strcmp(argv[0], "dl") == 0) {
3499 		itemSize = 8;
3500 		displayWidth = 2;
3501 	} else if (strcmp(argv[0], "string") == 0) {
3502 		itemSize = 1;
3503 		displayWidth = -1;
3504 	} else {
3505 		kprintf("display_mem called in an invalid way!\n");
3506 		return 0;
3507 	}
3508 
3509 	if (num <= 0)
3510 		num = displayWidth;
3511 
3512 	void* physicalPageHandle = NULL;
3513 
3514 	if (physical) {
3515 		int32 offset = address & (B_PAGE_SIZE - 1);
3516 		if (num * itemSize + offset > B_PAGE_SIZE) {
3517 			num = (B_PAGE_SIZE - offset) / itemSize;
3518 			kprintf("NOTE: number of bytes has been cut to page size\n");
3519 		}
3520 
3521 		address = ROUNDDOWN(address, B_PAGE_SIZE);
3522 
3523 		if (vm_get_physical_page_debug(address, &copyAddress,
3524 				&physicalPageHandle) != B_OK) {
3525 			kprintf("getting the hardware page failed.");
3526 			return 0;
3527 		}
3528 
3529 		address += offset;
3530 		copyAddress += offset;
3531 	} else
3532 		copyAddress = address;
3533 
3534 	if (!strcmp(argv[0], "string")) {
3535 		kprintf("%p \"", (char*)copyAddress);
3536 
3537 		// string mode
3538 		for (i = 0; true; i++) {
3539 			char c;
3540 			if (debug_memcpy(&c, (char*)copyAddress + i, 1) != B_OK
3541 				|| c == '\0')
3542 				break;
3543 
3544 			if (c == '\n')
3545 				kprintf("\\n");
3546 			else if (c == '\t')
3547 				kprintf("\\t");
3548 			else {
3549 				if (!isprint(c))
3550 					c = '.';
3551 
3552 				kprintf("%c", c);
3553 			}
3554 		}
3555 
3556 		kprintf("\"\n");
3557 	} else {
3558 		// number mode
3559 		for (i = 0; i < num; i++) {
3560 			uint32 value;
3561 
3562 			if ((i % displayWidth) == 0) {
3563 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3564 				if (i != 0)
3565 					kprintf("\n");
3566 
3567 				kprintf("[0x%lx]  ", address + i * itemSize);
3568 
3569 				for (j = 0; j < displayed; j++) {
3570 					char c;
3571 					if (debug_memcpy(&c, (char*)copyAddress + i * itemSize + j,
3572 							1) != B_OK) {
3573 						displayed = j;
3574 						break;
3575 					}
3576 					if (!isprint(c))
3577 						c = '.';
3578 
3579 					kprintf("%c", c);
3580 				}
3581 				if (num > displayWidth) {
3582 					// make sure the spacing in the last line is correct
3583 					for (j = displayed; j < displayWidth * itemSize; j++)
3584 						kprintf(" ");
3585 				}
3586 				kprintf("  ");
3587 			}
3588 
3589 			if (debug_memcpy(&value, (uint8*)copyAddress + i * itemSize,
3590 					itemSize) != B_OK) {
3591 				kprintf("read fault");
3592 				break;
3593 			}
3594 
3595 			switch (itemSize) {
3596 				case 1:
3597 					kprintf(" %02x", *(uint8*)&value);
3598 					break;
3599 				case 2:
3600 					kprintf(" %04x", *(uint16*)&value);
3601 					break;
3602 				case 4:
3603 					kprintf(" %08lx", *(uint32*)&value);
3604 					break;
3605 				case 8:
3606 					kprintf(" %016Lx", *(uint64*)&value);
3607 					break;
3608 			}
3609 		}
3610 
3611 		kprintf("\n");
3612 	}
3613 
3614 	if (physical) {
3615 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3616 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3617 	}
3618 	return 0;
3619 }
3620 
3621 
3622 static void
3623 dump_cache_tree_recursively(vm_cache* cache, int level,
3624 	vm_cache* highlightCache)
3625 {
3626 	// print this cache
3627 	for (int i = 0; i < level; i++)
3628 		kprintf("  ");
3629 	if (cache == highlightCache)
3630 		kprintf("%p <--\n", cache);
3631 	else
3632 		kprintf("%p\n", cache);
3633 
3634 	// recursively print its consumers
3635 	vm_cache* consumer = NULL;
3636 	while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers,
3637 			consumer)) != NULL) {
3638 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3639 	}
3640 }
3641 
3642 
3643 static int
3644 dump_cache_tree(int argc, char** argv)
3645 {
3646 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3647 		kprintf("usage: %s <address>\n", argv[0]);
3648 		return 0;
3649 	}
3650 
3651 	addr_t address = parse_expression(argv[1]);
3652 	if (address == 0)
3653 		return 0;
3654 
3655 	vm_cache* cache = (vm_cache*)address;
3656 	vm_cache* root = cache;
3657 
3658 	// find the root cache (the transitive source)
3659 	while (root->source != NULL)
3660 		root = root->source;
3661 
3662 	dump_cache_tree_recursively(root, 0, cache);
3663 
3664 	return 0;
3665 }
3666 
3667 
3668 static const char*
3669 cache_type_to_string(int32 type)
3670 {
3671 	switch (type) {
3672 		case CACHE_TYPE_RAM:
3673 			return "RAM";
3674 		case CACHE_TYPE_DEVICE:
3675 			return "device";
3676 		case CACHE_TYPE_VNODE:
3677 			return "vnode";
3678 		case CACHE_TYPE_NULL:
3679 			return "null";
3680 
3681 		default:
3682 			return "unknown";
3683 	}
3684 }
3685 
3686 
3687 #if DEBUG_CACHE_LIST
3688 
3689 static void
3690 update_cache_info_recursively(vm_cache* cache, cache_info& info)
3691 {
3692 	info.page_count += cache->page_count;
3693 	if (cache->type == CACHE_TYPE_RAM)
3694 		info.committed += cache->committed_size;
3695 
3696 	// recurse
3697 	vm_cache* consumer = NULL;
3698 	while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers,
3699 			consumer)) != NULL) {
3700 		update_cache_info_recursively(consumer, info);
3701 	}
3702 }
3703 
3704 
3705 static int
3706 cache_info_compare_page_count(const void* _a, const void* _b)
3707 {
3708 	const cache_info* a = (const cache_info*)_a;
3709 	const cache_info* b = (const cache_info*)_b;
3710 	if (a->page_count == b->page_count)
3711 		return 0;
3712 	return a->page_count < b->page_count ? 1 : -1;
3713 }
3714 
3715 
3716 static int
3717 cache_info_compare_committed(const void* _a, const void* _b)
3718 {
3719 	const cache_info* a = (const cache_info*)_a;
3720 	const cache_info* b = (const cache_info*)_b;
3721 	if (a->committed == b->committed)
3722 		return 0;
3723 	return a->committed < b->committed ? 1 : -1;
3724 }
3725 
3726 
3727 static void
3728 dump_caches_recursively(vm_cache* cache, cache_info& info, int level)
3729 {
3730 	for (int i = 0; i < level; i++)
3731 		kprintf("  ");
3732 
3733 	kprintf("%p: type: %s, base: %lld, size: %lld, pages: %lu", cache,
3734 		cache_type_to_string(cache->type), cache->virtual_base,
3735 		cache->virtual_end, cache->page_count);
3736 
3737 	if (level == 0)
3738 		kprintf("/%lu", info.page_count);
3739 
3740 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3741 		kprintf(", committed: %lld", cache->committed_size);
3742 
3743 		if (level == 0)
3744 			kprintf("/%lu", info.committed);
3745 	}
3746 
3747 	// areas
3748 	if (cache->areas != NULL) {
3749 		vm_area* area = cache->areas;
3750 		kprintf(", areas: %ld (%s, team: %ld)", area->id, area->name,
3751 			area->address_space->id);
3752 
3753 		while (area->cache_next != NULL) {
3754 			area = area->cache_next;
3755 			kprintf(", %ld", area->id);
3756 		}
3757 	}
3758 
3759 	kputs("\n");
3760 
3761 	// recurse
3762 	vm_cache* consumer = NULL;
3763 	while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers,
3764 			consumer)) != NULL) {
3765 		dump_caches_recursively(consumer, info, level + 1);
3766 	}
3767 }
3768 
3769 
3770 static int
3771 dump_caches(int argc, char** argv)
3772 {
3773 	if (sCacheInfoTable == NULL) {
3774 		kprintf("No cache info table!\n");
3775 		return 0;
3776 	}
3777 
3778 	bool sortByPageCount = true;
3779 
3780 	for (int32 i = 1; i < argc; i++) {
3781 		if (strcmp(argv[i], "-c") == 0) {
3782 			sortByPageCount = false;
3783 		} else {
3784 			print_debugger_command_usage(argv[0]);
3785 			return 0;
3786 		}
3787 	}
3788 
3789 	uint32 totalCount = 0;
3790 	uint32 rootCount = 0;
3791 	off_t totalCommitted = 0;
3792 	page_num_t totalPages = 0;
3793 
3794 	vm_cache* cache = gDebugCacheList;
3795 	while (cache) {
3796 		totalCount++;
3797 		if (cache->source == NULL) {
3798 			cache_info stackInfo;
3799 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3800 				? sCacheInfoTable[rootCount] : stackInfo;
3801 			rootCount++;
3802 			info.cache = cache;
3803 			info.page_count = 0;
3804 			info.committed = 0;
3805 			update_cache_info_recursively(cache, info);
3806 			totalCommitted += info.committed;
3807 			totalPages += info.page_count;
3808 		}
3809 
3810 		cache = cache->debug_next;
3811 	}
3812 
3813 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3814 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3815 			sortByPageCount
3816 				? &cache_info_compare_page_count
3817 				: &cache_info_compare_committed);
3818 	}
3819 
3820 	kprintf("total committed memory: %lld, total used pages: %lu\n",
3821 		totalCommitted, totalPages);
3822 	kprintf("%lu caches (%lu root caches), sorted by %s per cache "
3823 		"tree...\n\n", totalCount, rootCount,
3824 		sortByPageCount ? "page count" : "committed size");
3825 
3826 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3827 		for (uint32 i = 0; i < rootCount; i++) {
3828 			cache_info& info = sCacheInfoTable[i];
3829 			dump_caches_recursively(info.cache, info, 0);
3830 		}
3831 	} else
3832 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3833 
3834 	return 0;
3835 }
3836 
3837 #endif	// DEBUG_CACHE_LIST
3838 
3839 
3840 static int
3841 dump_cache(int argc, char** argv)
3842 {
3843 	vm_cache* cache;
3844 	bool showPages = false;
3845 	int i = 1;
3846 
3847 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3848 		kprintf("usage: %s [-ps] <address>\n"
3849 			"  if -p is specified, all pages are shown, if -s is used\n"
3850 			"  only the cache info is shown respectively.\n", argv[0]);
3851 		return 0;
3852 	}
3853 	while (argv[i][0] == '-') {
3854 		char* arg = argv[i] + 1;
3855 		while (arg[0]) {
3856 			if (arg[0] == 'p')
3857 				showPages = true;
3858 			arg++;
3859 		}
3860 		i++;
3861 	}
3862 	if (argv[i] == NULL) {
3863 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3864 		return 0;
3865 	}
3866 
3867 	addr_t address = parse_expression(argv[i]);
3868 	if (address == 0)
3869 		return 0;
3870 
3871 	cache = (vm_cache*)address;
3872 
3873 	kprintf("CACHE %p:\n", cache);
3874 	kprintf("  ref_count:    %ld\n", cache->RefCount());
3875 	kprintf("  source:       %p\n", cache->source);
3876 	kprintf("  type:         %s\n", cache_type_to_string(cache->type));
3877 	kprintf("  virtual_base: 0x%Lx\n", cache->virtual_base);
3878 	kprintf("  virtual_end:  0x%Lx\n", cache->virtual_end);
3879 	kprintf("  temporary:    %ld\n", cache->temporary);
3880 	kprintf("  scan_skip:    %ld\n", cache->scan_skip);
3881 	kprintf("  lock:         %p\n", cache->GetLock());
3882 #if KDEBUG
3883 	kprintf("  lock.holder:  %ld\n", cache->GetLock()->holder);
3884 #endif
3885 	kprintf("  areas:\n");
3886 
3887 	for (vm_area* area = cache->areas; area != NULL; area = area->cache_next) {
3888 		kprintf("    area 0x%lx, %s\n", area->id, area->name);
3889 		kprintf("\tbase_addr:  0x%lx, size: 0x%lx\n", area->base, area->size);
3890 		kprintf("\tprotection: 0x%lx\n", area->protection);
3891 		kprintf("\towner:      0x%lx\n", area->address_space->id);
3892 	}
3893 
3894 	kprintf("  consumers:\n");
3895 	vm_cache* consumer = NULL;
3896 	while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers,
3897 				consumer)) != NULL) {
3898 		kprintf("\t%p\n", consumer);
3899 	}
3900 
3901 	kprintf("  pages:\n");
3902 	if (showPages) {
3903 		for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
3904 				vm_page* page = it.Next();) {
3905 			if (page->type == PAGE_TYPE_PHYSICAL) {
3906 				kprintf("\t%p ppn 0x%lx offset 0x%lx type %u state %u (%s) "
3907 					"wired_count %u\n", page, page->physical_page_number,
3908 					page->cache_offset, page->type, page->state,
3909 					page_state_to_string(page->state), page->wired_count);
3910 			} else if(page->type == PAGE_TYPE_DUMMY) {
3911 				kprintf("\t%p DUMMY PAGE state %u (%s)\n",
3912 					page, page->state, page_state_to_string(page->state));
3913 			} else
3914 				kprintf("\t%p UNKNOWN PAGE type %u\n", page, page->type);
3915 		}
3916 	} else
3917 		kprintf("\t%ld in cache\n", cache->page_count);
3918 
3919 	return 0;
3920 }
3921 
3922 
3923 static void
3924 dump_area_struct(vm_area* area, bool mappings)
3925 {
3926 	kprintf("AREA: %p\n", area);
3927 	kprintf("name:\t\t'%s'\n", area->name);
3928 	kprintf("owner:\t\t0x%lx\n", area->address_space->id);
3929 	kprintf("id:\t\t0x%lx\n", area->id);
3930 	kprintf("base:\t\t0x%lx\n", area->base);
3931 	kprintf("size:\t\t0x%lx\n", area->size);
3932 	kprintf("protection:\t0x%lx\n", area->protection);
3933 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3934 	kprintf("memory_type:\t0x%x\n", area->memory_type);
3935 	kprintf("cache:\t\t%p\n", area->cache);
3936 	kprintf("cache_type:\t%s\n", cache_type_to_string(area->cache_type));
3937 	kprintf("cache_offset:\t0x%Lx\n", area->cache_offset);
3938 	kprintf("cache_next:\t%p\n", area->cache_next);
3939 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3940 
3941 	vm_area_mappings::Iterator iterator = area->mappings.GetIterator();
3942 	if (mappings) {
3943 		kprintf("page mappings:\n");
3944 		while (iterator.HasNext()) {
3945 			vm_page_mapping* mapping = iterator.Next();
3946 			kprintf("  %p", mapping->page);
3947 		}
3948 		kprintf("\n");
3949 	} else {
3950 		uint32 count = 0;
3951 		while (iterator.Next() != NULL) {
3952 			count++;
3953 		}
3954 		kprintf("page mappings:\t%lu\n", count);
3955 	}
3956 }
3957 
3958 
3959 static int
3960 dump_area(int argc, char** argv)
3961 {
3962 	bool mappings = false;
3963 	bool found = false;
3964 	int32 index = 1;
3965 	vm_area* area;
3966 	addr_t num;
3967 
3968 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3969 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3970 			"All areas matching either id/address/name are listed. You can\n"
3971 			"force to check only a specific item by prefixing the specifier\n"
3972 			"with the id/contains/address/name keywords.\n"
3973 			"-m shows the area's mappings as well.\n");
3974 		return 0;
3975 	}
3976 
3977 	if (!strcmp(argv[1], "-m")) {
3978 		mappings = true;
3979 		index++;
3980 	}
3981 
3982 	int32 mode = 0xf;
3983 	if (!strcmp(argv[index], "id"))
3984 		mode = 1;
3985 	else if (!strcmp(argv[index], "contains"))
3986 		mode = 2;
3987 	else if (!strcmp(argv[index], "name"))
3988 		mode = 4;
3989 	else if (!strcmp(argv[index], "address"))
3990 		mode = 0;
3991 	if (mode != 0xf)
3992 		index++;
3993 
3994 	if (index >= argc) {
3995 		kprintf("No area specifier given.\n");
3996 		return 0;
3997 	}
3998 
3999 	num = parse_expression(argv[index]);
4000 
4001 	if (mode == 0) {
4002 		dump_area_struct((struct vm_area*)num, mappings);
4003 	} else {
4004 		// walk through the area list, looking for the arguments as a name
4005 		struct hash_iterator iter;
4006 
4007 		hash_open(sAreaHash, &iter);
4008 		while ((area = (vm_area*)hash_next(sAreaHash, &iter)) != NULL) {
4009 			if (((mode & 4) != 0 && area->name != NULL
4010 					&& !strcmp(argv[index], area->name))
4011 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
4012 					|| (((mode & 2) != 0 && area->base <= num
4013 						&& area->base + area->size > num))))) {
4014 				dump_area_struct(area, mappings);
4015 				found = true;
4016 			}
4017 		}
4018 
4019 		if (!found)
4020 			kprintf("could not find area %s (%ld)\n", argv[index], num);
4021 	}
4022 
4023 	return 0;
4024 }
4025 
4026 
4027 static int
4028 dump_area_list(int argc, char** argv)
4029 {
4030 	vm_area* area;
4031 	struct hash_iterator iter;
4032 	const char* name = NULL;
4033 	int32 id = 0;
4034 
4035 	if (argc > 1) {
4036 		id = parse_expression(argv[1]);
4037 		if (id == 0)
4038 			name = argv[1];
4039 	}
4040 
4041 	kprintf("addr          id  base\t\tsize    protect lock  name\n");
4042 
4043 	hash_open(sAreaHash, &iter);
4044 	while ((area = (vm_area*)hash_next(sAreaHash, &iter)) != NULL) {
4045 		if ((id != 0 && area->address_space->id != id)
4046 			|| (name != NULL && strstr(area->name, name) == NULL))
4047 			continue;
4048 
4049 		kprintf("%p %5lx  %p\t%p %4lx\t%4d  %s\n", area, area->id,
4050 			(void*)area->base, (void*)area->size, area->protection, area->wiring,
4051 			area->name);
4052 	}
4053 	hash_close(sAreaHash, &iter, false);
4054 	return 0;
4055 }
4056 
4057 
4058 static int
4059 dump_available_memory(int argc, char** argv)
4060 {
4061 	kprintf("Available memory: %Ld/%lu bytes\n",
4062 		sAvailableMemory, vm_page_num_pages() * B_PAGE_SIZE);
4063 	return 0;
4064 }
4065 
4066 
4067 status_t
4068 vm_delete_areas(struct vm_address_space* addressSpace)
4069 {
4070 	vm_area* area;
4071 	vm_area* next;
4072 	vm_area* last = NULL;
4073 
4074 	TRACE(("vm_delete_areas: called on address space 0x%lx\n",
4075 		addressSpace->id));
4076 
4077 	rw_lock_write_lock(&addressSpace->lock);
4078 
4079 	// remove all reserved areas in this address space
4080 
4081 	for (area = addressSpace->areas; area; area = next) {
4082 		next = area->address_space_next;
4083 
4084 		if (area->id == RESERVED_AREA_ID) {
4085 			// just remove it
4086 			if (last)
4087 				last->address_space_next = area->address_space_next;
4088 			else
4089 				addressSpace->areas = area->address_space_next;
4090 
4091 			vm_put_address_space(addressSpace);
4092 			free(area);
4093 			continue;
4094 		}
4095 
4096 		last = area;
4097 	}
4098 
4099 	// delete all the areas in this address space
4100 
4101 	for (area = addressSpace->areas; area; area = next) {
4102 		next = area->address_space_next;
4103 		delete_area(addressSpace, area);
4104 	}
4105 
4106 	rw_lock_write_unlock(&addressSpace->lock);
4107 	return B_OK;
4108 }
4109 
4110 
4111 static area_id
4112 vm_area_for(addr_t address, bool kernel)
4113 {
4114 	team_id team;
4115 	if (IS_USER_ADDRESS(address)) {
4116 		// we try the user team address space, if any
4117 		team = vm_current_user_address_space_id();
4118 		if (team < 0)
4119 			return team;
4120 	} else
4121 		team = vm_kernel_address_space_id();
4122 
4123 	AddressSpaceReadLocker locker(team);
4124 	if (!locker.IsLocked())
4125 		return B_BAD_TEAM_ID;
4126 
4127 	vm_area* area = vm_area_lookup(locker.AddressSpace(), address);
4128 	if (area != NULL) {
4129 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
4130 			return B_ERROR;
4131 
4132 		return area->id;
4133 	}
4134 
4135 	return B_ERROR;
4136 }
4137 
4138 
4139 /*!	Frees physical pages that were used during the boot process.
4140 */
4141 static void
4142 unmap_and_free_physical_pages(vm_translation_map* map, addr_t start, addr_t end)
4143 {
4144 	// free all physical pages in the specified range
4145 
4146 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
4147 		addr_t physicalAddress;
4148 		uint32 flags;
4149 
4150 		if (map->ops->query(map, current, &physicalAddress, &flags) == B_OK) {
4151 			vm_page* page = vm_lookup_page(current / B_PAGE_SIZE);
4152 			if (page != NULL)
4153 				vm_page_set_state(page, PAGE_STATE_FREE);
4154 		}
4155 	}
4156 
4157 	// unmap the memory
4158 	map->ops->unmap(map, start, end - 1);
4159 }
4160 
4161 
4162 void
4163 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
4164 {
4165 	vm_translation_map* map = &vm_kernel_address_space()->translation_map;
4166 	addr_t end = start + size;
4167 	addr_t lastEnd = start;
4168 	vm_area* area;
4169 
4170 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
4171 		(void*)start, (void*)end));
4172 
4173 	// The areas are sorted in virtual address space order, so
4174 	// we just have to find the holes between them that fall
4175 	// into the area we should dispose
4176 
4177 	map->ops->lock(map);
4178 
4179 	for (area = vm_kernel_address_space()->areas; area != NULL;
4180 			area = area->address_space_next) {
4181 		addr_t areaStart = area->base;
4182 		addr_t areaEnd = areaStart + area->size;
4183 
4184 		if (area->id == RESERVED_AREA_ID)
4185 			continue;
4186 
4187 		if (areaEnd >= end) {
4188 			// we are done, the areas are already beyond of what we have to free
4189 			lastEnd = end;
4190 			break;
4191 		}
4192 
4193 		if (areaStart > lastEnd) {
4194 			// this is something we can free
4195 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
4196 				(void*)areaStart));
4197 			unmap_and_free_physical_pages(map, lastEnd, areaStart);
4198 		}
4199 
4200 		lastEnd = areaEnd;
4201 	}
4202 
4203 	if (lastEnd < end) {
4204 		// we can also get rid of some space at the end of the area
4205 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
4206 			(void*)end));
4207 		unmap_and_free_physical_pages(map, lastEnd, end);
4208 	}
4209 
4210 	map->ops->unlock(map);
4211 }
4212 
4213 
4214 static void
4215 create_preloaded_image_areas(struct preloaded_image* image)
4216 {
4217 	char name[B_OS_NAME_LENGTH];
4218 	void* address;
4219 	int32 length;
4220 
4221 	// use file name to create a good area name
4222 	char* fileName = strrchr(image->name, '/');
4223 	if (fileName == NULL)
4224 		fileName = image->name;
4225 	else
4226 		fileName++;
4227 
4228 	length = strlen(fileName);
4229 	// make sure there is enough space for the suffix
4230 	if (length > 25)
4231 		length = 25;
4232 
4233 	memcpy(name, fileName, length);
4234 	strcpy(name + length, "_text");
4235 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
4236 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4237 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
4238 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4239 		// this will later be remapped read-only/executable by the
4240 		// ELF initialization code
4241 
4242 	strcpy(name + length, "_data");
4243 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
4244 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4245 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
4246 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4247 }
4248 
4249 
4250 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
4251 	Any boot loader resources contained in that arguments must not be accessed
4252 	anymore past this point.
4253 */
4254 void
4255 vm_free_kernel_args(kernel_args* args)
4256 {
4257 	uint32 i;
4258 
4259 	TRACE(("vm_free_kernel_args()\n"));
4260 
4261 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
4262 		area_id area = area_for((void*)args->kernel_args_range[i].start);
4263 		if (area >= B_OK)
4264 			delete_area(area);
4265 	}
4266 }
4267 
4268 
4269 static void
4270 allocate_kernel_args(kernel_args* args)
4271 {
4272 	TRACE(("allocate_kernel_args()\n"));
4273 
4274 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
4275 		void* address = (void*)args->kernel_args_range[i].start;
4276 
4277 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
4278 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
4279 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4280 	}
4281 }
4282 
4283 
4284 static void
4285 unreserve_boot_loader_ranges(kernel_args* args)
4286 {
4287 	TRACE(("unreserve_boot_loader_ranges()\n"));
4288 
4289 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4290 		vm_unreserve_address_range(vm_kernel_address_space_id(),
4291 			(void*)args->virtual_allocated_range[i].start,
4292 			args->virtual_allocated_range[i].size);
4293 	}
4294 }
4295 
4296 
4297 static void
4298 reserve_boot_loader_ranges(kernel_args* args)
4299 {
4300 	TRACE(("reserve_boot_loader_ranges()\n"));
4301 
4302 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4303 		void* address = (void*)args->virtual_allocated_range[i].start;
4304 
4305 		// If the address is no kernel address, we just skip it. The
4306 		// architecture specific code has to deal with it.
4307 		if (!IS_KERNEL_ADDRESS(address)) {
4308 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n",
4309 				address, args->virtual_allocated_range[i].size);
4310 			continue;
4311 		}
4312 
4313 		status_t status = vm_reserve_address_range(vm_kernel_address_space_id(),
4314 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
4315 		if (status < B_OK)
4316 			panic("could not reserve boot loader ranges\n");
4317 	}
4318 }
4319 
4320 
4321 static addr_t
4322 allocate_early_virtual(kernel_args* args, size_t size)
4323 {
4324 	addr_t spot = 0;
4325 	uint32 i;
4326 	int last_valloc_entry = 0;
4327 
4328 	size = PAGE_ALIGN(size);
4329 	// find a slot in the virtual allocation addr range
4330 	for (i = 1; i < args->num_virtual_allocated_ranges; i++) {
4331 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
4332 			+ args->virtual_allocated_range[i - 1].size;
4333 		last_valloc_entry = i;
4334 		// check to see if the space between this one and the last is big enough
4335 		if (previousRangeEnd >= KERNEL_BASE
4336 			&& args->virtual_allocated_range[i].start
4337 				- previousRangeEnd >= size) {
4338 			spot = previousRangeEnd;
4339 			args->virtual_allocated_range[i - 1].size += size;
4340 			goto out;
4341 		}
4342 	}
4343 	if (spot == 0) {
4344 		// we hadn't found one between allocation ranges. this is ok.
4345 		// see if there's a gap after the last one
4346 		addr_t lastRangeEnd
4347 			= args->virtual_allocated_range[last_valloc_entry].start
4348 				+ args->virtual_allocated_range[last_valloc_entry].size;
4349 		if (KERNEL_BASE + (KERNEL_SIZE - 1) - lastRangeEnd >= size) {
4350 			spot = lastRangeEnd;
4351 			args->virtual_allocated_range[last_valloc_entry].size += size;
4352 			goto out;
4353 		}
4354 		// see if there's a gap before the first one
4355 		if (args->virtual_allocated_range[0].start > KERNEL_BASE) {
4356 			if (args->virtual_allocated_range[0].start - KERNEL_BASE >= size) {
4357 				args->virtual_allocated_range[0].start -= size;
4358 				spot = args->virtual_allocated_range[0].start;
4359 				goto out;
4360 			}
4361 		}
4362 	}
4363 
4364 out:
4365 	return spot;
4366 }
4367 
4368 
4369 static bool
4370 is_page_in_physical_memory_range(kernel_args* args, addr_t address)
4371 {
4372 	// TODO: horrible brute-force method of determining if the page can be
4373 	// allocated
4374 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4375 		if (address >= args->physical_memory_range[i].start
4376 			&& address < args->physical_memory_range[i].start
4377 				+ args->physical_memory_range[i].size)
4378 			return true;
4379 	}
4380 	return false;
4381 }
4382 
4383 
4384 static addr_t
4385 allocate_early_physical_page(kernel_args* args)
4386 {
4387 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4388 		addr_t nextPage;
4389 
4390 		nextPage = args->physical_allocated_range[i].start
4391 			+ args->physical_allocated_range[i].size;
4392 		// see if the page after the next allocated paddr run can be allocated
4393 		if (i + 1 < args->num_physical_allocated_ranges
4394 			&& args->physical_allocated_range[i + 1].size != 0) {
4395 			// see if the next page will collide with the next allocated range
4396 			if (nextPage >= args->physical_allocated_range[i+1].start)
4397 				continue;
4398 		}
4399 		// see if the next physical page fits in the memory block
4400 		if (is_page_in_physical_memory_range(args, nextPage)) {
4401 			// we got one!
4402 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4403 			return nextPage / B_PAGE_SIZE;
4404 		}
4405 	}
4406 
4407 	return 0;
4408 		// could not allocate a block
4409 }
4410 
4411 
4412 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4413 	allocate some pages before the VM is completely up.
4414 */
4415 addr_t
4416 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4417 	uint32 attributes)
4418 {
4419 	if (physicalSize > virtualSize)
4420 		physicalSize = virtualSize;
4421 
4422 	// find the vaddr to allocate at
4423 	addr_t virtualBase = allocate_early_virtual(args, virtualSize);
4424 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualAddress);
4425 
4426 	// map the pages
4427 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4428 		addr_t physicalAddress = allocate_early_physical_page(args);
4429 		if (physicalAddress == 0)
4430 			panic("error allocating early page!\n");
4431 
4432 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4433 
4434 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
4435 			physicalAddress * B_PAGE_SIZE, attributes,
4436 			&allocate_early_physical_page);
4437 	}
4438 
4439 	return virtualBase;
4440 }
4441 
4442 
4443 /*!	The main entrance point to initialize the VM. */
4444 status_t
4445 vm_init(kernel_args* args)
4446 {
4447 	struct preloaded_image* image;
4448 	void* address;
4449 	status_t err = 0;
4450 	uint32 i;
4451 
4452 	TRACE(("vm_init: entry\n"));
4453 	err = arch_vm_translation_map_init(args);
4454 	err = arch_vm_init(args);
4455 
4456 	// initialize some globals
4457 	sNextAreaID = 1;
4458 
4459 	vm_page_init_num_pages(args);
4460 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4461 
4462 	size_t heapSize = INITIAL_HEAP_SIZE;
4463 	// try to accomodate low memory systems
4464 	while (heapSize > sAvailableMemory / 8)
4465 		heapSize /= 2;
4466 	if (heapSize < 1024 * 1024)
4467 		panic("vm_init: go buy some RAM please.");
4468 
4469 	// map in the new heap and initialize it
4470 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4471 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4472 	TRACE(("heap at 0x%lx\n", heapBase));
4473 	heap_init(heapBase, heapSize);
4474 
4475 	size_t slabInitialSize = args->num_cpus * 2 * B_PAGE_SIZE;
4476 	addr_t slabInitialBase = vm_allocate_early(args, slabInitialSize,
4477 		slabInitialSize, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4478 	slab_init(args, slabInitialBase, slabInitialSize);
4479 
4480 	// initialize the free page list and physical page mapper
4481 	vm_page_init(args);
4482 
4483 	// initialize the hash table that stores the pages mapped to caches
4484 	vm_cache_init(args);
4485 
4486 	{
4487 		vm_area* area;
4488 		sAreaHash = hash_init(AREA_HASH_TABLE_SIZE,
4489 			(addr_t)&area->hash_next - (addr_t)area,
4490 			&area_compare, &area_hash);
4491 		if (sAreaHash == NULL)
4492 			panic("vm_init: error creating aspace hash table\n");
4493 	}
4494 
4495 	vm_address_space_init();
4496 	reserve_boot_loader_ranges(args);
4497 
4498 	// Do any further initialization that the architecture dependant layers may
4499 	// need now
4500 	arch_vm_translation_map_init_post_area(args);
4501 	arch_vm_init_post_area(args);
4502 	vm_page_init_post_area(args);
4503 
4504 	// allocate areas to represent stuff that already exists
4505 
4506 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4507 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4508 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4509 
4510 	address = (void*)ROUNDDOWN(slabInitialBase, B_PAGE_SIZE);
4511 	create_area("initial slab space", &address, B_EXACT_ADDRESS,
4512 		slabInitialSize, B_ALREADY_WIRED, B_KERNEL_READ_AREA
4513 		| B_KERNEL_WRITE_AREA);
4514 
4515 	allocate_kernel_args(args);
4516 
4517 	create_preloaded_image_areas(&args->kernel_image);
4518 
4519 	// allocate areas for preloaded images
4520 	for (image = args->preloaded_images; image != NULL; image = image->next) {
4521 		create_preloaded_image_areas(image);
4522 	}
4523 
4524 	// allocate kernel stacks
4525 	for (i = 0; i < args->num_cpus; i++) {
4526 		char name[64];
4527 
4528 		sprintf(name, "idle thread %lu kstack", i + 1);
4529 		address = (void*)args->cpu_kstack[i].start;
4530 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4531 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4532 	}
4533 
4534 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4535 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4536 
4537 #if DEBUG_CACHE_LIST
4538 	create_area("cache info table", (void**)&sCacheInfoTable,
4539 		B_ANY_KERNEL_ADDRESS,
4540 		ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4541 		B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4542 #endif	// DEBUG_CACHE_LIST
4543 
4544 	// add some debugger commands
4545 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4546 	add_debugger_command("area", &dump_area,
4547 		"Dump info about a particular area");
4548 	add_debugger_command("cache", &dump_cache, "Dump vm_cache");
4549 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump vm_cache tree");
4550 #if DEBUG_CACHE_LIST
4551 	add_debugger_command_etc("caches", &dump_caches,
4552 		"List all vm_cache trees",
4553 		"[ \"-c\" ]\n"
4554 		"All cache trees are listed sorted in decreasing order by number of\n"
4555 		"used pages or, if \"-c\" is specified, by size of committed memory.\n",
4556 		0);
4557 #endif
4558 	add_debugger_command("avail", &dump_available_memory,
4559 		"Dump available memory");
4560 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4561 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4562 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4563 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4564 	add_debugger_command("string", &display_mem, "dump strings");
4565 
4566 	TRACE(("vm_init: exit\n"));
4567 
4568 	return err;
4569 }
4570 
4571 
4572 status_t
4573 vm_init_post_sem(kernel_args* args)
4574 {
4575 	// This frees all unused boot loader resources and makes its space available
4576 	// again
4577 	arch_vm_init_end(args);
4578 	unreserve_boot_loader_ranges(args);
4579 
4580 	// fill in all of the semaphores that were not allocated before
4581 	// since we're still single threaded and only the kernel address space
4582 	// exists, it isn't that hard to find all of the ones we need to create
4583 
4584 	arch_vm_translation_map_init_post_sem(args);
4585 	vm_address_space_init_post_sem();
4586 
4587 	slab_init_post_sem();
4588 	return heap_init_post_sem();
4589 }
4590 
4591 
4592 status_t
4593 vm_init_post_thread(kernel_args* args)
4594 {
4595 	vm_page_init_post_thread(args);
4596 	vm_daemon_init();
4597 	slab_init_post_thread();
4598 	return heap_init_post_thread();
4599 }
4600 
4601 
4602 status_t
4603 vm_init_post_modules(kernel_args* args)
4604 {
4605 	return arch_vm_init_post_modules(args);
4606 }
4607 
4608 
4609 void
4610 permit_page_faults(void)
4611 {
4612 	struct thread* thread = thread_get_current_thread();
4613 	if (thread != NULL)
4614 		atomic_add(&thread->page_faults_allowed, 1);
4615 }
4616 
4617 
4618 void
4619 forbid_page_faults(void)
4620 {
4621 	struct thread* thread = thread_get_current_thread();
4622 	if (thread != NULL)
4623 		atomic_add(&thread->page_faults_allowed, -1);
4624 }
4625 
4626 
4627 status_t
4628 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser,
4629 	addr_t* newIP)
4630 {
4631 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4632 		faultAddress));
4633 
4634 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4635 
4636 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4637 	vm_address_space* addressSpace = NULL;
4638 
4639 	status_t status = B_OK;
4640 	*newIP = 0;
4641 	atomic_add((int32*)&sPageFaults, 1);
4642 
4643 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4644 		addressSpace = vm_get_kernel_address_space();
4645 	} else if (IS_USER_ADDRESS(pageAddress)) {
4646 		addressSpace = vm_get_current_user_address_space();
4647 		if (addressSpace == NULL) {
4648 			if (!isUser) {
4649 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4650 					"memory!\n");
4651 				status = B_BAD_ADDRESS;
4652 				TPF(PageFaultError(-1,
4653 					VMPageFaultTracing
4654 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4655 			} else {
4656 				// XXX weird state.
4657 				panic("vm_page_fault: non kernel thread accessing user memory "
4658 					"that doesn't exist!\n");
4659 				status = B_BAD_ADDRESS;
4660 			}
4661 		}
4662 	} else {
4663 		// the hit was probably in the 64k DMZ between kernel and user space
4664 		// this keeps a user space thread from passing a buffer that crosses
4665 		// into kernel space
4666 		status = B_BAD_ADDRESS;
4667 		TPF(PageFaultError(-1,
4668 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4669 	}
4670 
4671 	if (status == B_OK)
4672 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser);
4673 
4674 	if (status < B_OK) {
4675 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4676 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n",
4677 			strerror(status), address, faultAddress, isWrite, isUser,
4678 			thread_get_current_thread_id());
4679 		if (!isUser) {
4680 			struct thread* thread = thread_get_current_thread();
4681 			if (thread != NULL && thread->fault_handler != 0) {
4682 				// this will cause the arch dependant page fault handler to
4683 				// modify the IP on the interrupt frame or whatever to return
4684 				// to this address
4685 				*newIP = thread->fault_handler;
4686 			} else {
4687 				// unhandled page fault in the kernel
4688 				panic("vm_page_fault: unhandled page fault in kernel space at "
4689 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4690 			}
4691 		} else {
4692 #if 1
4693 			rw_lock_read_lock(&addressSpace->lock);
4694 
4695 			// TODO: remove me once we have proper userland debugging support
4696 			// (and tools)
4697 			vm_area* area = vm_area_lookup(addressSpace, faultAddress);
4698 
4699 			struct thread* thread = thread_get_current_thread();
4700 			dprintf("vm_page_fault: thread \"%s\" (%ld) in team \"%s\" (%ld) "
4701 				"tried to %s address %#lx, ip %#lx (\"%s\" +%#lx)\n",
4702 				thread->name, thread->id, thread->team->name, thread->team->id,
4703 				isWrite ? "write" : "read", address, faultAddress,
4704 				area ? area->name : "???",
4705 				faultAddress - (area ? area->base : 0x0));
4706 
4707 			// We can print a stack trace of the userland thread here.
4708 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
4709 // fault and someone is already waiting for a write lock on the same address
4710 // space. This thread will then try to acquire the lock again and will
4711 // be queued after the writer.
4712 #	if 0
4713 			if (area) {
4714 				struct stack_frame {
4715 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
4716 						struct stack_frame*	previous;
4717 						void*				return_address;
4718 					#else
4719 						// ...
4720 					#warning writeme
4721 					#endif
4722 				} frame;
4723 #		ifdef __INTEL__
4724 				struct iframe* iframe = i386_get_user_iframe();
4725 				if (iframe == NULL)
4726 					panic("iframe is NULL!");
4727 
4728 				status_t status = user_memcpy(&frame, (void*)iframe->ebp,
4729 					sizeof(struct stack_frame));
4730 #		elif defined(__POWERPC__)
4731 				struct iframe* iframe = ppc_get_user_iframe();
4732 				if (iframe == NULL)
4733 					panic("iframe is NULL!");
4734 
4735 				status_t status = user_memcpy(&frame, (void*)iframe->r1,
4736 					sizeof(struct stack_frame));
4737 #		else
4738 #			warning "vm_page_fault() stack trace won't work"
4739 				status = B_ERROR;
4740 #		endif
4741 
4742 				dprintf("stack trace:\n");
4743 				int32 maxFrames = 50;
4744 				while (status == B_OK && --maxFrames >= 0
4745 						&& frame.return_address != NULL) {
4746 					dprintf("  %p", frame.return_address);
4747 					area = vm_area_lookup(addressSpace,
4748 						(addr_t)frame.return_address);
4749 					if (area) {
4750 						dprintf(" (%s + %#lx)", area->name,
4751 							(addr_t)frame.return_address - area->base);
4752 					}
4753 					dprintf("\n");
4754 
4755 					status = user_memcpy(&frame, frame.previous,
4756 						sizeof(struct stack_frame));
4757 				}
4758 			}
4759 #	endif	// 0 (stack trace)
4760 
4761 			rw_lock_read_unlock(&addressSpace->lock);
4762 #endif
4763 
4764 			// TODO: the fault_callback is a temporary solution for vm86
4765 			if (thread->fault_callback == NULL
4766 				|| thread->fault_callback(address, faultAddress, isWrite)) {
4767 				// If the thread has a signal handler for SIGSEGV, we simply
4768 				// send it the signal. Otherwise we notify the user debugger
4769 				// first.
4770 				struct sigaction action;
4771 				if (sigaction(SIGSEGV, NULL, &action) == 0
4772 					&& action.sa_handler != SIG_DFL
4773 					&& action.sa_handler != SIG_IGN) {
4774 					send_signal(thread->id, SIGSEGV);
4775 				} else if (user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4776 						SIGSEGV)) {
4777 					send_signal(thread->id, SIGSEGV);
4778 				}
4779 			}
4780 		}
4781 	}
4782 
4783 	if (addressSpace != NULL)
4784 		vm_put_address_space(addressSpace);
4785 
4786 	return B_HANDLED_INTERRUPT;
4787 }
4788 
4789 
4790 class VMCacheChainLocker {
4791 public:
4792 	VMCacheChainLocker()
4793 		:
4794 		fTopCache(NULL),
4795 		fBottomCache(NULL)
4796 	{
4797 	}
4798 
4799 	void SetTo(VMCache* topCache)
4800 	{
4801 		fTopCache = topCache;
4802 		fBottomCache = topCache;
4803 	}
4804 
4805 	VMCache* LockSourceCache()
4806 	{
4807 		if (fBottomCache == NULL || fBottomCache->source == NULL)
4808 			return NULL;
4809 
4810 		fBottomCache = fBottomCache->source;
4811 		fBottomCache->Lock();
4812 		fBottomCache->AcquireRefLocked();
4813 
4814 		return fBottomCache;
4815 	}
4816 
4817 	void Unlock()
4818 	{
4819 		if (fTopCache == NULL)
4820 			return;
4821 
4822 		VMCache* cache = fTopCache;
4823 		while (cache != NULL) {
4824 			VMCache* nextCache = cache->source;
4825 			cache->ReleaseRefAndUnlock();
4826 
4827 			if (cache == fBottomCache)
4828 				break;
4829 
4830 			cache = nextCache;
4831 		}
4832 
4833 		fTopCache = NULL;
4834 		fBottomCache = NULL;
4835 	}
4836 
4837 private:
4838 	VMCache*	fTopCache;
4839 	VMCache*	fBottomCache;
4840 };
4841 
4842 
4843 struct PageFaultContext {
4844 	AddressSpaceReadLocker	addressSpaceLocker;
4845 	VMCacheChainLocker		cacheChainLocker;
4846 
4847 	vm_translation_map*		map;
4848 	vm_cache*				topCache;
4849 	off_t					cacheOffset;
4850 	bool					isWrite;
4851 
4852 	// return values
4853 	vm_page*				page;
4854 	bool					restart;
4855 
4856 
4857 	PageFaultContext(vm_address_space* addressSpace, bool isWrite)
4858 		:
4859 		addressSpaceLocker(addressSpace, true),
4860 		map(&addressSpace->translation_map),
4861 		isWrite(isWrite)
4862 	{
4863 	}
4864 
4865 	~PageFaultContext()
4866 	{
4867 		UnlockAll();
4868 	}
4869 
4870 	void Prepare(VMCache* topCache, off_t cacheOffset)
4871 	{
4872 		this->topCache = topCache;
4873 		this->cacheOffset = cacheOffset;
4874 		page = NULL;
4875 		restart = false;
4876 
4877 		cacheChainLocker.SetTo(topCache);
4878 	}
4879 
4880 	void UnlockAll()
4881 	{
4882 		topCache = NULL;
4883 		addressSpaceLocker.Unlock();
4884 		cacheChainLocker.Unlock();
4885 	}
4886 };
4887 
4888 
4889 /*!	Gets the page that should be mapped into the area.
4890 	Returns an error code other than \c B_OK, if the page couldn't be found or
4891 	paged in. The locking state of the address space and the caches is undefined
4892 	in that case.
4893 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4894 	had to unlock the address space and all caches and is supposed to be called
4895 	again.
4896 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4897 	found. It is returned in \c context.page. The address space will still be
4898 	locked as well as all caches starting from the top cache to at least the
4899 	cache the page lives in.
4900 */
4901 static inline status_t
4902 fault_get_page(PageFaultContext& context)
4903 {
4904 	vm_cache* cache = context.topCache;
4905 	vm_cache* lastCache = NULL;
4906 	vm_page* page = NULL;
4907 
4908 	while (cache != NULL) {
4909 		// We already hold the lock of the cache at this point.
4910 
4911 		lastCache = cache;
4912 
4913 		for (;;) {
4914 			page = cache->LookupPage(context.cacheOffset);
4915 			if (page == NULL || page->state != PAGE_STATE_BUSY) {
4916 				// Either there is no page or there is one and it is not busy.
4917 				break;
4918 			}
4919 
4920 			// page must be busy -- wait for it to become unbusy
4921 			ConditionVariableEntry entry;
4922 			entry.Add(page);
4923 			context.UnlockAll();
4924 			entry.Wait();
4925 
4926 			// restart the whole process
4927 			context.restart = true;
4928 			return B_OK;
4929 		}
4930 
4931 		if (page != NULL)
4932 			break;
4933 
4934 		// The current cache does not contain the page we're looking for.
4935 
4936 		// see if the backing store has it
4937 		if (cache->HasPage(context.cacheOffset)) {
4938 			// insert a fresh page and mark it busy -- we're going to read it in
4939 			page = vm_page_allocate_page(PAGE_STATE_FREE, true);
4940 			cache->InsertPage(page, context.cacheOffset);
4941 
4942 			ConditionVariable busyCondition;
4943 			busyCondition.Publish(page, "page");
4944 
4945 			// We need to unlock all caches and the address space while reading
4946 			// the page in. Keep a reference to the cache around.
4947 			cache->AcquireRefLocked();
4948 			context.UnlockAll();
4949 
4950 			// read the page in
4951 			iovec vec;
4952 			vec.iov_base = (void*)(page->physical_page_number * B_PAGE_SIZE);
4953 			size_t bytesRead = vec.iov_len = B_PAGE_SIZE;
4954 
4955 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4956 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4957 
4958 			cache->Lock();
4959 
4960 			if (status < B_OK) {
4961 				// on error remove and free the page
4962 				dprintf("reading page from cache %p returned: %s!\n",
4963 					cache, strerror(status));
4964 
4965 				busyCondition.Unpublish();
4966 				cache->RemovePage(page);
4967 				vm_page_set_state(page, PAGE_STATE_FREE);
4968 
4969 				cache->ReleaseRefAndUnlock();
4970 				return status;
4971 			}
4972 
4973 			// mark the page unbusy again
4974 			page->state = PAGE_STATE_ACTIVE;
4975 			busyCondition.Unpublish();
4976 
4977 			// Since we needed to unlock everything temporarily, the area
4978 			// situation might have changed. So we need to restart the whole
4979 			// process.
4980 			cache->ReleaseRefAndUnlock();
4981 			context.restart = true;
4982 			return B_OK;
4983 		}
4984 
4985 		cache = context.cacheChainLocker.LockSourceCache();
4986 	}
4987 
4988 	if (page == NULL) {
4989 		// There was no adequate page, determine the cache for a clean one.
4990 		// Read-only pages come in the deepest cache, only the top most cache
4991 		// may have direct write access.
4992 		cache = context.isWrite ? context.topCache : lastCache;
4993 
4994 		// allocate a clean page
4995 		page = vm_page_allocate_page(PAGE_STATE_CLEAR, true);
4996 		FTRACE(("vm_soft_fault: just allocated page 0x%lx\n",
4997 			page->physical_page_number));
4998 
4999 		// insert the new page into our cache
5000 		cache->InsertPage(page, context.cacheOffset);
5001 
5002 	} else if (page->cache != context.topCache && context.isWrite) {
5003 		// We have a page that has the data we want, but in the wrong cache
5004 		// object so we need to copy it and stick it into the top cache.
5005 		vm_page* sourcePage = page;
5006 
5007 		// TODO: If memory is low, it might be a good idea to steal the page
5008 		// from our source cache -- if possible, that is.
5009 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
5010 		page = vm_page_allocate_page(PAGE_STATE_FREE, true);
5011 
5012 		// copy the page
5013 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
5014 			sourcePage->physical_page_number * B_PAGE_SIZE);
5015 
5016 		// insert the new page into our cache
5017 		context.topCache->InsertPage(page, context.cacheOffset);
5018 	}
5019 
5020 	context.page = page;
5021 	return B_OK;
5022 }
5023 
5024 
5025 static status_t
5026 vm_soft_fault(vm_address_space* addressSpace, addr_t originalAddress,
5027 	bool isWrite, bool isUser)
5028 {
5029 	FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n",
5030 		thread_get_current_thread_id(), originalAddress, isWrite, isUser));
5031 
5032 	PageFaultContext context(addressSpace, isWrite);
5033 
5034 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
5035 	status_t status = B_OK;
5036 
5037 	atomic_add(&addressSpace->fault_count, 1);
5038 
5039 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
5040 	// the pages upfront makes sure we don't have any cache locked, so that the
5041 	// page daemon/thief can do their job without problems.
5042 	size_t reservePages = 2 + context.map->ops->map_max_pages_need(context.map,
5043 		originalAddress, originalAddress);
5044 	context.addressSpaceLocker.Unlock();
5045 	vm_page_reserve_pages(reservePages);
5046 
5047 	while (true) {
5048 		context.addressSpaceLocker.Lock();
5049 
5050 		// get the area the fault was in
5051 		vm_area* area = vm_area_lookup(addressSpace, address);
5052 		if (area == NULL) {
5053 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
5054 				"space\n", originalAddress);
5055 			TPF(PageFaultError(-1,
5056 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
5057 			status = B_BAD_ADDRESS;
5058 			break;
5059 		}
5060 
5061 		// check permissions
5062 		uint32 protection = get_area_page_protection(area, address);
5063 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
5064 			dprintf("user access on kernel area 0x%lx at %p\n", area->id,
5065 				(void*)originalAddress);
5066 			TPF(PageFaultError(area->id,
5067 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
5068 			status = B_PERMISSION_DENIED;
5069 			break;
5070 		}
5071 		if (isWrite && (protection
5072 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
5073 			dprintf("write access attempted on write-protected area 0x%lx at"
5074 				" %p\n", area->id, (void*)originalAddress);
5075 			TPF(PageFaultError(area->id,
5076 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
5077 			status = B_PERMISSION_DENIED;
5078 			break;
5079 		} else if (!isWrite && (protection
5080 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
5081 			dprintf("read access attempted on read-protected area 0x%lx at"
5082 				" %p\n", area->id, (void*)originalAddress);
5083 			TPF(PageFaultError(area->id,
5084 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
5085 			status = B_PERMISSION_DENIED;
5086 			break;
5087 		}
5088 
5089 		// We have the area, it was a valid access, so let's try to resolve the
5090 		// page fault now.
5091 		// At first, the top most cache from the area is investigated.
5092 
5093 		context.Prepare(vm_area_get_locked_cache(area),
5094 			address - area->base + area->cache_offset);
5095 
5096 		// See if this cache has a fault handler -- this will do all the work
5097 		// for us.
5098 		{
5099 			// Note, since the page fault is resolved with interrupts enabled,
5100 			// the fault handler could be called more than once for the same
5101 			// reason -- the store must take this into account.
5102 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
5103 			if (status != B_BAD_HANDLER)
5104 				break;
5105 		}
5106 
5107 		// The top most cache has no fault handler, so let's see if the cache or
5108 		// its sources already have the page we're searching for (we're going
5109 		// from top to bottom).
5110 		status = fault_get_page(context);
5111 		if (status != B_OK) {
5112 			TPF(PageFaultError(area->id, status));
5113 			break;
5114 		}
5115 
5116 		if (context.restart)
5117 			continue;
5118 
5119 		// All went fine, all there is left to do is to map the page into the
5120 		// address space.
5121 		TPF(PageFaultDone(area->id, context.topCache, context.page->cache,
5122 			context.page));
5123 
5124 		// If the page doesn't reside in the area's cache, we need to make sure
5125 		// it's mapped in read-only, so that we cannot overwrite someone else's
5126 		// data (copy-on-write)
5127 		uint32 newProtection = protection;
5128 		if (context.page->cache != context.topCache && !isWrite)
5129 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
5130 
5131 		bool unmapPage = false;
5132 		bool mapPage = true;
5133 
5134 		// check whether there's already a page mapped at the address
5135 		context.map->ops->lock(context.map);
5136 
5137 		addr_t physicalAddress;
5138 		uint32 flags;
5139 		vm_page* mappedPage;
5140 		if (context.map->ops->query(context.map, address, &physicalAddress,
5141 				&flags) == B_OK
5142 			&& (flags & PAGE_PRESENT) != 0
5143 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5144 				!= NULL) {
5145 			// Yep there's already a page. If it's ours, we can simply adjust
5146 			// its protection. Otherwise we have to unmap it.
5147 			if (mappedPage == context.page) {
5148 				context.map->ops->protect(context.map, address,
5149 					address + (B_PAGE_SIZE - 1), newProtection);
5150 
5151 				mapPage = false;
5152 			} else
5153 				unmapPage = true;
5154 		}
5155 
5156 		context.map->ops->unlock(context.map);
5157 
5158 		if (unmapPage)
5159 			vm_unmap_page(area, address, true);
5160 
5161 		if (mapPage)
5162 			vm_map_page(area, context.page, address, newProtection);
5163 
5164 		break;
5165 	}
5166 
5167 	vm_page_unreserve_pages(reservePages);
5168 
5169 	return status;
5170 }
5171 
5172 
5173 /*! You must have the address space's sem held */
5174 vm_area*
5175 vm_area_lookup(vm_address_space* addressSpace, addr_t address)
5176 {
5177 	vm_area* area;
5178 
5179 	// check the areas list first
5180 	area = addressSpace->area_hint;
5181 	if (area != NULL
5182 		&& area->base <= address
5183 		&& area->base + (area->size - 1) >= address)
5184 		goto found;
5185 
5186 	for (area = addressSpace->areas; area != NULL;
5187 			area = area->address_space_next) {
5188 		if (area->id == RESERVED_AREA_ID)
5189 			continue;
5190 
5191 		if (area->base <= address && area->base + (area->size - 1) >= address)
5192 			break;
5193 	}
5194 
5195 found:
5196 	if (area)
5197 		addressSpace->area_hint = area;
5198 
5199 	return area;
5200 }
5201 
5202 
5203 status_t
5204 vm_get_physical_page(addr_t paddr, addr_t* _vaddr, void** _handle)
5205 {
5206 	return vm_kernel_address_space()->translation_map.ops->get_physical_page(
5207 		paddr, _vaddr, _handle);
5208 }
5209 
5210 status_t
5211 vm_put_physical_page(addr_t vaddr, void* handle)
5212 {
5213 	return vm_kernel_address_space()->translation_map.ops->put_physical_page(
5214 		vaddr, handle);
5215 }
5216 
5217 
5218 status_t
5219 vm_get_physical_page_current_cpu(addr_t paddr, addr_t* _vaddr, void** _handle)
5220 {
5221 	return vm_kernel_address_space()->translation_map.ops
5222 		->get_physical_page_current_cpu(paddr, _vaddr, _handle);
5223 }
5224 
5225 status_t
5226 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
5227 {
5228 	return vm_kernel_address_space()->translation_map.ops
5229 		->put_physical_page_current_cpu(vaddr, handle);
5230 }
5231 
5232 
5233 status_t
5234 vm_get_physical_page_debug(addr_t paddr, addr_t* _vaddr, void** _handle)
5235 {
5236 	return vm_kernel_address_space()->translation_map.ops
5237 		->get_physical_page_debug(paddr, _vaddr, _handle);
5238 }
5239 
5240 status_t
5241 vm_put_physical_page_debug(addr_t vaddr, void* handle)
5242 {
5243 	return vm_kernel_address_space()->translation_map.ops
5244 		->put_physical_page_debug(vaddr, handle);
5245 }
5246 
5247 
5248 void
5249 vm_get_info(system_memory_info* info)
5250 {
5251 	swap_get_info(info);
5252 
5253 	info->max_memory = vm_page_num_pages() * B_PAGE_SIZE;
5254 	info->page_faults = sPageFaults;
5255 
5256 	MutexLocker locker(sAvailableMemoryLock);
5257 	info->free_memory = sAvailableMemory;
5258 	info->needed_memory = sNeededMemory;
5259 }
5260 
5261 
5262 uint32
5263 vm_num_page_faults(void)
5264 {
5265 	return sPageFaults;
5266 }
5267 
5268 
5269 off_t
5270 vm_available_memory(void)
5271 {
5272 	MutexLocker locker(sAvailableMemoryLock);
5273 	return sAvailableMemory;
5274 }
5275 
5276 
5277 off_t
5278 vm_available_not_needed_memory(void)
5279 {
5280 	MutexLocker locker(sAvailableMemoryLock);
5281 	return sAvailableMemory - sNeededMemory;
5282 }
5283 
5284 
5285 size_t
5286 vm_kernel_address_space_left(void)
5287 {
5288 	return sKernelAddressSpaceLeft;
5289 }
5290 
5291 
5292 void
5293 vm_unreserve_memory(size_t amount)
5294 {
5295 	mutex_lock(&sAvailableMemoryLock);
5296 
5297 	sAvailableMemory += amount;
5298 
5299 	mutex_unlock(&sAvailableMemoryLock);
5300 }
5301 
5302 
5303 status_t
5304 vm_try_reserve_memory(size_t amount, bigtime_t timeout)
5305 {
5306 	MutexLocker locker(sAvailableMemoryLock);
5307 
5308 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5309 
5310 	if (sAvailableMemory >= amount) {
5311 		sAvailableMemory -= amount;
5312 		return B_OK;
5313 	}
5314 
5315 	if (timeout <= 0)
5316 		return B_NO_MEMORY;
5317 
5318 	// turn timeout into an absolute timeout
5319 	timeout += system_time();
5320 
5321 	// loop until we've got the memory or the timeout occurs
5322 	do {
5323 		sNeededMemory += amount;
5324 
5325 		// call the low resource manager
5326 		locker.Unlock();
5327 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5328 			B_ABSOLUTE_TIMEOUT, timeout);
5329 		locker.Lock();
5330 
5331 		sNeededMemory -= amount;
5332 
5333 		if (sAvailableMemory >= amount) {
5334 			sAvailableMemory -= amount;
5335 			return B_OK;
5336 		}
5337 	} while (timeout > system_time());
5338 
5339 	return B_NO_MEMORY;
5340 }
5341 
5342 
5343 status_t
5344 vm_set_area_memory_type(area_id id, addr_t physicalBase, uint32 type)
5345 {
5346 	AddressSpaceReadLocker locker;
5347 	vm_area* area;
5348 	status_t status = locker.SetFromArea(id, area);
5349 	if (status != B_OK)
5350 		return status;
5351 
5352 	return arch_vm_set_memory_type(area, physicalBase, type);
5353 }
5354 
5355 
5356 /*!	This function enforces some protection properties:
5357 	 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well
5358 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5359 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
5360 	   and B_KERNEL_WRITE_AREA.
5361 */
5362 static void
5363 fix_protection(uint32* protection)
5364 {
5365 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5366 		if ((*protection & B_USER_PROTECTION) == 0
5367 			|| (*protection & B_WRITE_AREA) != 0)
5368 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5369 		else
5370 			*protection |= B_KERNEL_READ_AREA;
5371 	}
5372 }
5373 
5374 
5375 static void
5376 fill_area_info(struct vm_area* area, area_info* info, size_t size)
5377 {
5378 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5379 	info->area = area->id;
5380 	info->address = (void*)area->base;
5381 	info->size = area->size;
5382 	info->protection = area->protection;
5383 	info->lock = B_FULL_LOCK;
5384 	info->team = area->address_space->id;
5385 	info->copy_count = 0;
5386 	info->in_count = 0;
5387 	info->out_count = 0;
5388 		// TODO: retrieve real values here!
5389 
5390 	vm_cache* cache = vm_area_get_locked_cache(area);
5391 
5392 	// Note, this is a simplification; the cache could be larger than this area
5393 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5394 
5395 	vm_area_put_locked_cache(cache);
5396 }
5397 
5398 
5399 /*!
5400 	Tests whether or not the area that contains the specified address
5401 	needs any kind of locking, and actually exists.
5402 	Used by both lock_memory() and unlock_memory().
5403 */
5404 static status_t
5405 test_lock_memory(vm_address_space* addressSpace, addr_t address,
5406 	bool& needsLocking)
5407 {
5408 	rw_lock_read_lock(&addressSpace->lock);
5409 
5410 	vm_area* area = vm_area_lookup(addressSpace, address);
5411 	if (area != NULL) {
5412 		// This determines if we need to lock the memory at all
5413 		needsLocking = area->cache_type != CACHE_TYPE_NULL
5414 			&& area->cache_type != CACHE_TYPE_DEVICE
5415 			&& area->wiring != B_FULL_LOCK
5416 			&& area->wiring != B_CONTIGUOUS;
5417 	}
5418 
5419 	rw_lock_read_unlock(&addressSpace->lock);
5420 
5421 	if (area == NULL)
5422 		return B_BAD_ADDRESS;
5423 
5424 	return B_OK;
5425 }
5426 
5427 
5428 static status_t
5429 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5430 {
5431 	// is newSize a multiple of B_PAGE_SIZE?
5432 	if (newSize & (B_PAGE_SIZE - 1))
5433 		return B_BAD_VALUE;
5434 
5435 	// lock all affected address spaces and the cache
5436 	vm_area* area;
5437 	vm_cache* cache;
5438 
5439 	MultiAddressSpaceLocker locker;
5440 	status_t status = locker.AddAreaCacheAndLock(areaID, true, true, area,
5441 		&cache);
5442 	if (status != B_OK)
5443 		return status;
5444 	AreaCacheLocker cacheLocker(cache);	// already locked
5445 
5446 	// enforce restrictions
5447 	if (!kernel) {
5448 		if ((area->protection & B_KERNEL_AREA) != 0)
5449 			return B_NOT_ALLOWED;
5450 		// TODO: Enforce all restrictions (team, etc.)!
5451 	}
5452 
5453 	size_t oldSize = area->size;
5454 	if (newSize == oldSize)
5455 		return B_OK;
5456 
5457 	// Resize all areas of this area's cache
5458 
5459 	if (cache->type != CACHE_TYPE_RAM)
5460 		return B_NOT_ALLOWED;
5461 
5462 	if (oldSize < newSize) {
5463 		// We need to check if all areas of this cache can be resized
5464 
5465 		for (vm_area* current = cache->areas; current != NULL;
5466 				current = current->cache_next) {
5467 			vm_area* next = current->address_space_next;
5468 			if (next != NULL && next->base <= (current->base + newSize)) {
5469 				// If the area was created inside a reserved area, it can
5470 				// also be resized in that area
5471 				// TODO: if there is free space after the reserved area, it could
5472 				// be used as well...
5473 				if (next->id == RESERVED_AREA_ID
5474 					&& next->cache_offset <= current->base
5475 					&& next->base - 1 + next->size
5476 						>= current->base - 1 + newSize)
5477 					continue;
5478 
5479 				return B_ERROR;
5480 			}
5481 		}
5482 	}
5483 
5484 	// Okay, looks good so far, so let's do it
5485 
5486 	if (oldSize < newSize) {
5487 		// Growing the cache can fail, so we do it first.
5488 		status = cache->Resize(cache->virtual_base + newSize);
5489 		if (status != B_OK)
5490 			return status;
5491 	}
5492 
5493 	for (vm_area* current = cache->areas; current != NULL;
5494 			current = current->cache_next) {
5495 		vm_area* next = current->address_space_next;
5496 		if (next != NULL && next->base <= (current->base + newSize)) {
5497 			if (next->id == RESERVED_AREA_ID
5498 				&& next->cache_offset <= current->base
5499 				&& next->base - 1 + next->size >= current->base - 1 + newSize) {
5500 				// resize reserved area
5501 				addr_t offset = current->base + newSize - next->base;
5502 				if (next->size <= offset) {
5503 					current->address_space_next = next->address_space_next;
5504 					free(next);
5505 				} else {
5506 					next->size -= offset;
5507 					next->base += offset;
5508 				}
5509 			} else {
5510 				panic("resize situation for area %p has changed although we "
5511 					"should have the address space lock", current);
5512 				status = B_ERROR;
5513 				break;
5514 			}
5515 		}
5516 
5517 		current->size = newSize;
5518 
5519 		// We also need to unmap all pages beyond the new size, if the area has
5520 		// shrinked
5521 		if (newSize < oldSize) {
5522 			vm_unmap_pages(current, current->base + newSize, oldSize - newSize,
5523 				false);
5524 		}
5525 	}
5526 
5527 	// shrinking the cache can't fail, so we do it now
5528 	if (status == B_OK && newSize < oldSize)
5529 		status = cache->Resize(cache->virtual_base + newSize);
5530 
5531 	if (status < B_OK) {
5532 		// This shouldn't really be possible, but hey, who knows
5533 		for (vm_area* current = cache->areas; current != NULL;
5534 				current = current->cache_next) {
5535 			current->size = oldSize;
5536 		}
5537 
5538 		cache->Resize(cache->virtual_base + oldSize);
5539 	}
5540 
5541 	// TODO: we must honour the lock restrictions of this area
5542 	return status;
5543 }
5544 
5545 
5546 status_t
5547 vm_memset_physical(addr_t address, int value, size_t length)
5548 {
5549 	return vm_kernel_address_space()->translation_map.ops->memset_physical(
5550 		address, value, length);
5551 }
5552 
5553 
5554 status_t
5555 vm_memcpy_from_physical(void* to, addr_t from, size_t length, bool user)
5556 {
5557 	return vm_kernel_address_space()->translation_map.ops->memcpy_from_physical(
5558 		to, from, length, user);
5559 }
5560 
5561 
5562 status_t
5563 vm_memcpy_to_physical(addr_t to, const void* _from, size_t length, bool user)
5564 {
5565 	return vm_kernel_address_space()->translation_map.ops->memcpy_to_physical(
5566 		to, _from, length, user);
5567 }
5568 
5569 
5570 void
5571 vm_memcpy_physical_page(addr_t to, addr_t from)
5572 {
5573 	return vm_kernel_address_space()->translation_map.ops->memcpy_physical_page(
5574 		to, from);
5575 }
5576 
5577 
5578 //	#pragma mark - kernel public API
5579 
5580 
5581 status_t
5582 user_memcpy(void* to, const void* from, size_t size)
5583 {
5584 	// don't allow address overflows
5585 	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
5586 		return B_BAD_ADDRESS;
5587 
5588 	if (arch_cpu_user_memcpy(to, from, size,
5589 			&thread_get_current_thread()->fault_handler) < B_OK)
5590 		return B_BAD_ADDRESS;
5591 
5592 	return B_OK;
5593 }
5594 
5595 
5596 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5597 	the string in \a to, NULL-terminating the result.
5598 
5599 	\param to Pointer to the destination C-string.
5600 	\param from Pointer to the source C-string.
5601 	\param size Size in bytes of the string buffer pointed to by \a to.
5602 
5603 	\return strlen(\a from).
5604 */
5605 ssize_t
5606 user_strlcpy(char* to, const char* from, size_t size)
5607 {
5608 	if (to == NULL && size != 0)
5609 		return B_BAD_VALUE;
5610 	if (from == NULL)
5611 		return B_BAD_ADDRESS;
5612 
5613 	// limit size to avoid address overflows
5614 	size_t maxSize = std::min(size,
5615 		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
5616 		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
5617 		// the source address might still overflow.
5618 
5619 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize,
5620 		&thread_get_current_thread()->fault_handler);
5621 
5622 	// If we hit the address overflow boundary, fail.
5623 	if (result >= 0 && (size_t)result >= maxSize && maxSize < size)
5624 		return B_BAD_ADDRESS;
5625 
5626 	return result;
5627 }
5628 
5629 
5630 status_t
5631 user_memset(void* s, char c, size_t count)
5632 {
5633 	// don't allow address overflows
5634 	if ((addr_t)s + count < (addr_t)s)
5635 		return B_BAD_ADDRESS;
5636 
5637 	if (arch_cpu_user_memset(s, c, count,
5638 			&thread_get_current_thread()->fault_handler) < B_OK)
5639 		return B_BAD_ADDRESS;
5640 
5641 	return B_OK;
5642 }
5643 
5644 
5645 status_t
5646 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5647 {
5648 	vm_address_space* addressSpace = NULL;
5649 	struct vm_translation_map* map;
5650 	addr_t unalignedBase = (addr_t)address;
5651 	addr_t end = unalignedBase + numBytes;
5652 	addr_t base = ROUNDDOWN(unalignedBase, B_PAGE_SIZE);
5653 	bool isUser = IS_USER_ADDRESS(address);
5654 	bool needsLocking = true;
5655 
5656 	if (isUser) {
5657 		if (team == B_CURRENT_TEAM)
5658 			addressSpace = vm_get_current_user_address_space();
5659 		else
5660 			addressSpace = vm_get_address_space(team);
5661 	} else
5662 		addressSpace = vm_get_kernel_address_space();
5663 	if (addressSpace == NULL)
5664 		return B_ERROR;
5665 
5666 	// test if we're on an area that allows faults at all
5667 
5668 	map = &addressSpace->translation_map;
5669 
5670 	status_t status = test_lock_memory(addressSpace, base, needsLocking);
5671 	if (status < B_OK)
5672 		goto out;
5673 	if (!needsLocking)
5674 		goto out;
5675 
5676 	for (; base < end; base += B_PAGE_SIZE) {
5677 		addr_t physicalAddress;
5678 		uint32 protection;
5679 		status_t status;
5680 
5681 		map->ops->lock(map);
5682 		status = map->ops->query(map, base, &physicalAddress, &protection);
5683 		map->ops->unlock(map);
5684 
5685 		if (status < B_OK)
5686 			goto out;
5687 
5688 		if ((protection & PAGE_PRESENT) != 0) {
5689 			// if B_READ_DEVICE is set, the caller intents to write to the locked
5690 			// memory, so if it hasn't been mapped writable, we'll try the soft
5691 			// fault anyway
5692 			if ((flags & B_READ_DEVICE) == 0
5693 				|| (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
5694 				// update wiring
5695 				vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
5696 				if (page == NULL)
5697 					panic("couldn't lookup physical page just allocated\n");
5698 
5699 				increment_page_wired_count(page);
5700 				continue;
5701 			}
5702 		}
5703 
5704 		status = vm_soft_fault(addressSpace, base, (flags & B_READ_DEVICE) != 0,
5705 			isUser);
5706 		if (status != B_OK)	{
5707 			dprintf("lock_memory(address = %p, numBytes = %lu, flags = %lu) "
5708 				"failed: %s\n", (void*)unalignedBase, numBytes, flags,
5709 				strerror(status));
5710 			goto out;
5711 		}
5712 
5713 		// TODO: Here's a race condition. We should probably add a parameter
5714 		// to vm_soft_fault() that would cause the page's wired count to be
5715 		// incremented immediately.
5716 		// TODO: After memory has been locked in an area, we need to prevent the
5717 		// area from being deleted, resized, cut, etc. That could be done using
5718 		// a "locked pages" count in vm_area, and maybe a condition variable, if
5719 		// we want to allow waiting for the area to become eligible for these
5720 		// operations again.
5721 
5722 		map->ops->lock(map);
5723 		status = map->ops->query(map, base, &physicalAddress, &protection);
5724 		map->ops->unlock(map);
5725 
5726 		if (status < B_OK)
5727 			goto out;
5728 
5729 		// update wiring
5730 		vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
5731 		if (page == NULL)
5732 			panic("couldn't lookup physical page");
5733 
5734 		increment_page_wired_count(page);
5735 			// TODO: needs to be atomic on all platforms!
5736 	}
5737 
5738 out:
5739 	vm_put_address_space(addressSpace);
5740 	return status;
5741 }
5742 
5743 
5744 status_t
5745 lock_memory(void* address, size_t numBytes, uint32 flags)
5746 {
5747 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5748 }
5749 
5750 
5751 status_t
5752 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5753 {
5754 	vm_address_space* addressSpace = NULL;
5755 	struct vm_translation_map* map;
5756 	addr_t unalignedBase = (addr_t)address;
5757 	addr_t end = unalignedBase + numBytes;
5758 	addr_t base = ROUNDDOWN(unalignedBase, B_PAGE_SIZE);
5759 	bool needsLocking = true;
5760 
5761 	if (IS_USER_ADDRESS(address)) {
5762 		if (team == B_CURRENT_TEAM)
5763 			addressSpace = vm_get_current_user_address_space();
5764 		else
5765 			addressSpace = vm_get_address_space(team);
5766 	} else
5767 		addressSpace = vm_get_kernel_address_space();
5768 	if (addressSpace == NULL)
5769 		return B_ERROR;
5770 
5771 	map = &addressSpace->translation_map;
5772 
5773 	status_t status = test_lock_memory(addressSpace, base, needsLocking);
5774 	if (status < B_OK)
5775 		goto out;
5776 	if (!needsLocking)
5777 		goto out;
5778 
5779 	for (; base < end; base += B_PAGE_SIZE) {
5780 		map->ops->lock(map);
5781 
5782 		addr_t physicalAddress;
5783 		uint32 protection;
5784 		status = map->ops->query(map, base, &physicalAddress,
5785 			&protection);
5786 
5787 		map->ops->unlock(map);
5788 
5789 		if (status < B_OK)
5790 			goto out;
5791 		if ((protection & PAGE_PRESENT) == 0)
5792 			panic("calling unlock_memory() on unmapped memory!");
5793 
5794 		// update wiring
5795 		vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
5796 		if (page == NULL)
5797 			panic("couldn't lookup physical page");
5798 
5799 		decrement_page_wired_count(page);
5800 	}
5801 
5802 out:
5803 	vm_put_address_space(addressSpace);
5804 	return status;
5805 }
5806 
5807 
5808 status_t
5809 unlock_memory(void* address, size_t numBytes, uint32 flags)
5810 {
5811 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5812 }
5813 
5814 
5815 /*!	Similar to get_memory_map(), but also allows to specify the address space
5816 	for the memory in question and has a saner semantics.
5817 	Returns \c B_OK when the complete range could be translated or
5818 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5819 	case the actual number of entries is written to \c *_numEntries. Any other
5820 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5821 	in this case.
5822 */
5823 status_t
5824 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5825 	physical_entry* table, uint32* _numEntries)
5826 {
5827 	uint32 numEntries = *_numEntries;
5828 	*_numEntries = 0;
5829 
5830 	vm_address_space* addressSpace;
5831 	addr_t virtualAddress = (addr_t)address;
5832 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5833 	addr_t physicalAddress;
5834 	status_t status = B_OK;
5835 	int32 index = -1;
5836 	addr_t offset = 0;
5837 	bool interrupts = are_interrupts_enabled();
5838 
5839 	TRACE(("get_memory_map_etc(%ld, %p, %lu bytes, %ld entries)\n", team,
5840 		address, numBytes, numEntries));
5841 
5842 	if (numEntries == 0 || numBytes == 0)
5843 		return B_BAD_VALUE;
5844 
5845 	// in which address space is the address to be found?
5846 	if (IS_USER_ADDRESS(virtualAddress)) {
5847 		if (team == B_CURRENT_TEAM)
5848 			addressSpace = vm_get_current_user_address_space();
5849 		else
5850 			addressSpace = vm_get_address_space(team);
5851 	} else
5852 		addressSpace = vm_get_kernel_address_space();
5853 
5854 	if (addressSpace == NULL)
5855 		return B_ERROR;
5856 
5857 	vm_translation_map* map = &addressSpace->translation_map;
5858 
5859 	if (interrupts)
5860 		map->ops->lock(map);
5861 
5862 	while (offset < numBytes) {
5863 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5864 		uint32 flags;
5865 
5866 		if (interrupts) {
5867 			status = map->ops->query(map, (addr_t)address + offset,
5868 				&physicalAddress, &flags);
5869 		} else {
5870 			status = map->ops->query_interrupt(map, (addr_t)address + offset,
5871 				&physicalAddress, &flags);
5872 		}
5873 		if (status < B_OK)
5874 			break;
5875 		if ((flags & PAGE_PRESENT) == 0) {
5876 			panic("get_memory_map() called on unmapped memory!");
5877 			return B_BAD_ADDRESS;
5878 		}
5879 
5880 		if (index < 0 && pageOffset > 0) {
5881 			physicalAddress += pageOffset;
5882 			if (bytes > B_PAGE_SIZE - pageOffset)
5883 				bytes = B_PAGE_SIZE - pageOffset;
5884 		}
5885 
5886 		// need to switch to the next physical_entry?
5887 		if (index < 0 || (addr_t)table[index].address
5888 				!= physicalAddress - table[index].size) {
5889 			if ((uint32)++index + 1 > numEntries) {
5890 				// table to small
5891 				status = B_BUFFER_OVERFLOW;
5892 				break;
5893 			}
5894 			table[index].address = (void*)physicalAddress;
5895 			table[index].size = bytes;
5896 		} else {
5897 			// page does fit in current entry
5898 			table[index].size += bytes;
5899 		}
5900 
5901 		offset += bytes;
5902 	}
5903 
5904 	if (interrupts)
5905 		map->ops->unlock(map);
5906 
5907 	if (status != B_OK)
5908 		return status;
5909 
5910 	if ((uint32)index + 1 > numEntries) {
5911 		*_numEntries = index;
5912 		return B_BUFFER_OVERFLOW;
5913 	}
5914 
5915 	*_numEntries = index + 1;
5916 	return B_OK;
5917 }
5918 
5919 
5920 /*!	According to the BeBook, this function should always succeed.
5921 	This is no longer the case.
5922 */
5923 long
5924 get_memory_map(const void* address, ulong numBytes, physical_entry* table,
5925 	long numEntries)
5926 {
5927 	uint32 entriesRead = numEntries;
5928 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5929 		table, &entriesRead);
5930 	if (error != B_OK)
5931 		return error;
5932 
5933 	// close the entry list
5934 
5935 	// if it's only one entry, we will silently accept the missing ending
5936 	if (numEntries == 1)
5937 		return B_OK;
5938 
5939 	if (entriesRead + 1 > (uint32)numEntries)
5940 		return B_BUFFER_OVERFLOW;
5941 
5942 	table[entriesRead].address = NULL;
5943 	table[entriesRead].size = 0;
5944 
5945 	return B_OK;
5946 }
5947 
5948 
5949 area_id
5950 area_for(void* address)
5951 {
5952 	return vm_area_for((addr_t)address, true);
5953 }
5954 
5955 
5956 area_id
5957 find_area(const char* name)
5958 {
5959 	rw_lock_read_lock(&sAreaHashLock);
5960 	struct hash_iterator iterator;
5961 	hash_open(sAreaHash, &iterator);
5962 
5963 	vm_area* area;
5964 	area_id id = B_NAME_NOT_FOUND;
5965 	while ((area = (vm_area*)hash_next(sAreaHash, &iterator)) != NULL) {
5966 		if (area->id == RESERVED_AREA_ID)
5967 			continue;
5968 
5969 		if (!strcmp(area->name, name)) {
5970 			id = area->id;
5971 			break;
5972 		}
5973 	}
5974 
5975 	hash_close(sAreaHash, &iterator, false);
5976 	rw_lock_read_unlock(&sAreaHashLock);
5977 
5978 	return id;
5979 }
5980 
5981 
5982 status_t
5983 _get_area_info(area_id id, area_info* info, size_t size)
5984 {
5985 	if (size != sizeof(area_info) || info == NULL)
5986 		return B_BAD_VALUE;
5987 
5988 	AddressSpaceReadLocker locker;
5989 	vm_area* area;
5990 	status_t status = locker.SetFromArea(id, area);
5991 	if (status != B_OK)
5992 		return status;
5993 
5994 	fill_area_info(area, info, size);
5995 	return B_OK;
5996 }
5997 
5998 
5999 status_t
6000 _get_next_area_info(team_id team, int32* cookie, area_info* info, size_t size)
6001 {
6002 	addr_t nextBase = *(addr_t*)cookie;
6003 
6004 	// we're already through the list
6005 	if (nextBase == (addr_t)-1)
6006 		return B_ENTRY_NOT_FOUND;
6007 
6008 	if (team == B_CURRENT_TEAM)
6009 		team = team_get_current_team_id();
6010 
6011 	AddressSpaceReadLocker locker(team);
6012 	if (!locker.IsLocked())
6013 		return B_BAD_TEAM_ID;
6014 
6015 	vm_area* area;
6016 	for (area = locker.AddressSpace()->areas; area != NULL;
6017 			area = area->address_space_next) {
6018 		if (area->id == RESERVED_AREA_ID)
6019 			continue;
6020 
6021 		if (area->base > nextBase)
6022 			break;
6023 	}
6024 
6025 	if (area == NULL) {
6026 		nextBase = (addr_t)-1;
6027 		return B_ENTRY_NOT_FOUND;
6028 	}
6029 
6030 	fill_area_info(area, info, size);
6031 	*cookie = (int32)(area->base);
6032 
6033 	return B_OK;
6034 }
6035 
6036 
6037 status_t
6038 set_area_protection(area_id area, uint32 newProtection)
6039 {
6040 	fix_protection(&newProtection);
6041 
6042 	return vm_set_area_protection(vm_kernel_address_space_id(), area,
6043 		newProtection, true);
6044 }
6045 
6046 
6047 status_t
6048 resize_area(area_id areaID, size_t newSize)
6049 {
6050 	return vm_resize_area(areaID, newSize, true);
6051 }
6052 
6053 
6054 /*!	Transfers the specified area to a new team. The caller must be the owner
6055 	of the area (not yet enforced but probably should be).
6056 */
6057 area_id
6058 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6059 	bool kernel)
6060 {
6061 	area_info info;
6062 	status_t status = get_area_info(id, &info);
6063 	if (status != B_OK)
6064 		return status;
6065 
6066 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6067 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6068 	if (clonedArea < 0)
6069 		return clonedArea;
6070 
6071 	status = vm_delete_area(info.team, id, kernel);
6072 	if (status != B_OK) {
6073 		vm_delete_area(target, clonedArea, kernel);
6074 		return status;
6075 	}
6076 
6077 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6078 
6079 	return clonedArea;
6080 }
6081 
6082 
6083 area_id
6084 map_physical_memory(const char* name, void* physicalAddress, size_t numBytes,
6085 	uint32 addressSpec, uint32 protection, void** _virtualAddress)
6086 {
6087 	if (!arch_vm_supports_protection(protection))
6088 		return B_NOT_SUPPORTED;
6089 
6090 	fix_protection(&protection);
6091 
6092 	return vm_map_physical_memory(vm_kernel_address_space_id(), name,
6093 		_virtualAddress, addressSpec, numBytes, protection,
6094 		(addr_t)physicalAddress);
6095 }
6096 
6097 
6098 area_id
6099 clone_area(const char* name, void** _address, uint32 addressSpec,
6100 	uint32 protection, area_id source)
6101 {
6102 	if ((protection & B_KERNEL_PROTECTION) == 0)
6103 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6104 
6105 	return vm_clone_area(vm_kernel_address_space_id(), name, _address,
6106 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6107 }
6108 
6109 
6110 area_id
6111 create_area_etc(team_id team, const char* name, void** address,
6112 	uint32 addressSpec, uint32 size, uint32 lock, uint32 protection,
6113 	addr_t physicalAddress, uint32 flags)
6114 {
6115 	fix_protection(&protection);
6116 
6117 	return vm_create_anonymous_area(team, (char*)name, address, addressSpec,
6118 		size, lock, protection, physicalAddress, flags, true);
6119 }
6120 
6121 
6122 area_id
6123 create_area(const char* name, void** _address, uint32 addressSpec, size_t size,
6124 	uint32 lock, uint32 protection)
6125 {
6126 	fix_protection(&protection);
6127 
6128 	return vm_create_anonymous_area(vm_kernel_address_space_id(), (char*)name,
6129 		_address, addressSpec, size, lock, protection, 0, 0, true);
6130 }
6131 
6132 
6133 status_t
6134 delete_area(area_id area)
6135 {
6136 	return vm_delete_area(vm_kernel_address_space_id(), area, true);
6137 }
6138 
6139 
6140 //	#pragma mark - Userland syscalls
6141 
6142 
6143 status_t
6144 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6145 	addr_t size)
6146 {
6147 	// filter out some unavailable values (for userland)
6148 	switch (addressSpec) {
6149 		case B_ANY_KERNEL_ADDRESS:
6150 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6151 			return B_BAD_VALUE;
6152 	}
6153 
6154 	addr_t address;
6155 
6156 	if (!IS_USER_ADDRESS(userAddress)
6157 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6158 		return B_BAD_ADDRESS;
6159 
6160 	status_t status = vm_reserve_address_range(
6161 		vm_current_user_address_space_id(), (void**)&address, addressSpec, size,
6162 		RESERVED_AVOID_BASE);
6163 	if (status != B_OK)
6164 		return status;
6165 
6166 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6167 		vm_unreserve_address_range(vm_current_user_address_space_id(),
6168 			(void*)address, size);
6169 		return B_BAD_ADDRESS;
6170 	}
6171 
6172 	return B_OK;
6173 }
6174 
6175 
6176 status_t
6177 _user_unreserve_address_range(addr_t address, addr_t size)
6178 {
6179 	return vm_unreserve_address_range(vm_current_user_address_space_id(),
6180 		(void*)address, size);
6181 }
6182 
6183 
6184 area_id
6185 _user_area_for(void* address)
6186 {
6187 	return vm_area_for((addr_t)address, false);
6188 }
6189 
6190 
6191 area_id
6192 _user_find_area(const char* userName)
6193 {
6194 	char name[B_OS_NAME_LENGTH];
6195 
6196 	if (!IS_USER_ADDRESS(userName)
6197 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6198 		return B_BAD_ADDRESS;
6199 
6200 	return find_area(name);
6201 }
6202 
6203 
6204 status_t
6205 _user_get_area_info(area_id area, area_info* userInfo)
6206 {
6207 	if (!IS_USER_ADDRESS(userInfo))
6208 		return B_BAD_ADDRESS;
6209 
6210 	area_info info;
6211 	status_t status = get_area_info(area, &info);
6212 	if (status < B_OK)
6213 		return status;
6214 
6215 	// TODO: do we want to prevent userland from seeing kernel protections?
6216 	//info.protection &= B_USER_PROTECTION;
6217 
6218 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6219 		return B_BAD_ADDRESS;
6220 
6221 	return status;
6222 }
6223 
6224 
6225 status_t
6226 _user_get_next_area_info(team_id team, int32* userCookie, area_info* userInfo)
6227 {
6228 	int32 cookie;
6229 
6230 	if (!IS_USER_ADDRESS(userCookie)
6231 		|| !IS_USER_ADDRESS(userInfo)
6232 		|| user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK)
6233 		return B_BAD_ADDRESS;
6234 
6235 	area_info info;
6236 	status_t status = _get_next_area_info(team, &cookie, &info,
6237 		sizeof(area_info));
6238 	if (status != B_OK)
6239 		return status;
6240 
6241 	//info.protection &= B_USER_PROTECTION;
6242 
6243 	if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK
6244 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6245 		return B_BAD_ADDRESS;
6246 
6247 	return status;
6248 }
6249 
6250 
6251 status_t
6252 _user_set_area_protection(area_id area, uint32 newProtection)
6253 {
6254 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6255 		return B_BAD_VALUE;
6256 
6257 	fix_protection(&newProtection);
6258 
6259 	return vm_set_area_protection(vm_current_user_address_space_id(), area,
6260 		newProtection, false);
6261 }
6262 
6263 
6264 status_t
6265 _user_resize_area(area_id area, size_t newSize)
6266 {
6267 	// TODO: Since we restrict deleting of areas to those owned by the team,
6268 	// we should also do that for resizing (check other functions, too).
6269 	return vm_resize_area(area, newSize, false);
6270 }
6271 
6272 
6273 area_id
6274 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6275 	team_id target)
6276 {
6277 	// filter out some unavailable values (for userland)
6278 	switch (addressSpec) {
6279 		case B_ANY_KERNEL_ADDRESS:
6280 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6281 			return B_BAD_VALUE;
6282 	}
6283 
6284 	void* address;
6285 	if (!IS_USER_ADDRESS(userAddress)
6286 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6287 		return B_BAD_ADDRESS;
6288 
6289 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6290 	if (newArea < B_OK)
6291 		return newArea;
6292 
6293 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6294 		return B_BAD_ADDRESS;
6295 
6296 	return newArea;
6297 }
6298 
6299 
6300 area_id
6301 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6302 	uint32 protection, area_id sourceArea)
6303 {
6304 	char name[B_OS_NAME_LENGTH];
6305 	void* address;
6306 
6307 	// filter out some unavailable values (for userland)
6308 	switch (addressSpec) {
6309 		case B_ANY_KERNEL_ADDRESS:
6310 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6311 			return B_BAD_VALUE;
6312 	}
6313 	if ((protection & ~B_USER_PROTECTION) != 0)
6314 		return B_BAD_VALUE;
6315 
6316 	if (!IS_USER_ADDRESS(userName)
6317 		|| !IS_USER_ADDRESS(userAddress)
6318 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6319 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6320 		return B_BAD_ADDRESS;
6321 
6322 	fix_protection(&protection);
6323 
6324 	area_id clonedArea = vm_clone_area(vm_current_user_address_space_id(), name,
6325 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6326 		false);
6327 	if (clonedArea < B_OK)
6328 		return clonedArea;
6329 
6330 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6331 		delete_area(clonedArea);
6332 		return B_BAD_ADDRESS;
6333 	}
6334 
6335 	return clonedArea;
6336 }
6337 
6338 
6339 area_id
6340 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6341 	size_t size, uint32 lock, uint32 protection)
6342 {
6343 	char name[B_OS_NAME_LENGTH];
6344 	void* address;
6345 
6346 	// filter out some unavailable values (for userland)
6347 	switch (addressSpec) {
6348 		case B_ANY_KERNEL_ADDRESS:
6349 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6350 			return B_BAD_VALUE;
6351 	}
6352 	if ((protection & ~B_USER_PROTECTION) != 0)
6353 		return B_BAD_VALUE;
6354 
6355 	if (!IS_USER_ADDRESS(userName)
6356 		|| !IS_USER_ADDRESS(userAddress)
6357 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6358 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6359 		return B_BAD_ADDRESS;
6360 
6361 	if (addressSpec == B_EXACT_ADDRESS
6362 		&& IS_KERNEL_ADDRESS(address))
6363 		return B_BAD_VALUE;
6364 
6365 	fix_protection(&protection);
6366 
6367 	area_id area = vm_create_anonymous_area(vm_current_user_address_space_id(),
6368 		(char*)name, &address, addressSpec, size, lock, protection, 0, 0,
6369 		false);
6370 
6371 	if (area >= B_OK
6372 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6373 		delete_area(area);
6374 		return B_BAD_ADDRESS;
6375 	}
6376 
6377 	return area;
6378 }
6379 
6380 
6381 status_t
6382 _user_delete_area(area_id area)
6383 {
6384 	// Unlike the BeOS implementation, you can now only delete areas
6385 	// that you have created yourself from userland.
6386 	// The documentation to delete_area() explicitly states that this
6387 	// will be restricted in the future, and so it will.
6388 	return vm_delete_area(vm_current_user_address_space_id(), area, false);
6389 }
6390 
6391 
6392 // TODO: create a BeOS style call for this!
6393 
6394 area_id
6395 _user_map_file(const char* userName, void** userAddress, int addressSpec,
6396 	size_t size, int protection, int mapping, bool unmapAddressRange, int fd,
6397 	off_t offset)
6398 {
6399 	char name[B_OS_NAME_LENGTH];
6400 	void* address;
6401 	area_id area;
6402 
6403 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6404 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6405 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6406 		return B_BAD_ADDRESS;
6407 
6408 	if (addressSpec == B_EXACT_ADDRESS) {
6409 		if ((addr_t)address + size < (addr_t)address)
6410 			return B_BAD_VALUE;
6411 		if (!IS_USER_ADDRESS(address)
6412 				|| !IS_USER_ADDRESS((addr_t)address + size)) {
6413 			return B_BAD_ADDRESS;
6414 		}
6415 	}
6416 
6417 	// userland created areas can always be accessed by the kernel
6418 	protection |= B_KERNEL_READ_AREA
6419 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
6420 
6421 	area = _vm_map_file(vm_current_user_address_space_id(), name, &address,
6422 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6423 		false);
6424 	if (area < B_OK)
6425 		return area;
6426 
6427 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6428 		return B_BAD_ADDRESS;
6429 
6430 	return area;
6431 }
6432 
6433 
6434 status_t
6435 _user_unmap_memory(void* _address, size_t size)
6436 {
6437 	addr_t address = (addr_t)_address;
6438 
6439 	// check params
6440 	if (size == 0 || (addr_t)address + size < (addr_t)address)
6441 		return B_BAD_VALUE;
6442 
6443 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6444 		return B_BAD_ADDRESS;
6445 
6446 	// write lock the address space
6447 	AddressSpaceWriteLocker locker;
6448 	status_t status = locker.SetTo(team_get_current_team_id());
6449 	if (status != B_OK)
6450 		return status;
6451 
6452 	// unmap
6453 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6454 }
6455 
6456 
6457 status_t
6458 _user_set_memory_protection(void* _address, size_t size, int protection)
6459 {
6460 	// check address range
6461 	addr_t address = (addr_t)_address;
6462 	size = PAGE_ALIGN(size);
6463 
6464 	if ((address % B_PAGE_SIZE) != 0)
6465 		return B_BAD_VALUE;
6466 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6467 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6468 		// weird error code required by POSIX
6469 		return ENOMEM;
6470 	}
6471 
6472 	// extend and check protection
6473 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
6474 	uint32 actualProtection = protection | B_KERNEL_READ_AREA
6475 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
6476 
6477 	if (!arch_vm_supports_protection(actualProtection))
6478 		return B_NOT_SUPPORTED;
6479 
6480 	// We need to write lock the address space, since we're going to play with
6481 	// the areas.
6482 	AddressSpaceWriteLocker locker;
6483 	status_t status = locker.SetTo(team_get_current_team_id());
6484 	if (status != B_OK)
6485 		return status;
6486 
6487 	// First round: Check whether the whole range is covered by areas and we are
6488 	// allowed to modify them.
6489 	addr_t currentAddress = address;
6490 	size_t sizeLeft = size;
6491 	while (sizeLeft > 0) {
6492 		vm_area* area = vm_area_lookup(locker.AddressSpace(), currentAddress);
6493 		if (area == NULL)
6494 			return B_NO_MEMORY;
6495 
6496 		if ((area->protection & B_KERNEL_AREA) != 0)
6497 			return B_NOT_ALLOWED;
6498 
6499 		// TODO: For (shared) mapped files we should check whether the new
6500 		// protections are compatible with the file permissions. We don't have
6501 		// a way to do that yet, though.
6502 
6503 		addr_t offset = currentAddress - area->base;
6504 		size_t rangeSize = min_c(area->size - offset, sizeLeft);
6505 
6506 		currentAddress += rangeSize;
6507 		sizeLeft -= rangeSize;
6508 	}
6509 
6510 	// Second round: If the protections differ from that of the area, create a
6511 	// page protection array and re-map mapped pages.
6512 	vm_translation_map* map = &locker.AddressSpace()->translation_map;
6513 	currentAddress = address;
6514 	sizeLeft = size;
6515 	while (sizeLeft > 0) {
6516 		vm_area* area = vm_area_lookup(locker.AddressSpace(), currentAddress);
6517 		if (area == NULL)
6518 			return B_NO_MEMORY;
6519 
6520 		addr_t offset = currentAddress - area->base;
6521 		size_t rangeSize = min_c(area->size - offset, sizeLeft);
6522 
6523 		currentAddress += rangeSize;
6524 		sizeLeft -= rangeSize;
6525 
6526 		if (area->page_protections == NULL) {
6527 			if (area->protection == actualProtection)
6528 				continue;
6529 
6530 			// In the page protections we store only the three user protections,
6531 			// so we use 4 bits per page.
6532 			uint32 bytes = (area->size / B_PAGE_SIZE + 1) / 2;
6533 			area->page_protections = (uint8*)malloc(bytes);
6534 			if (area->page_protections == NULL)
6535 				return B_NO_MEMORY;
6536 
6537 			// init the page protections for all pages to that of the area
6538 			uint32 areaProtection = area->protection
6539 				& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
6540 			memset(area->page_protections,
6541 				areaProtection | (areaProtection << 4), bytes);
6542 		}
6543 
6544 		for (addr_t pageAddress = area->base + offset;
6545 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6546 			map->ops->lock(map);
6547 
6548 			set_area_page_protection(area, pageAddress, protection);
6549 
6550 			addr_t physicalAddress;
6551 			uint32 flags;
6552 
6553 			status_t error = map->ops->query(map, pageAddress, &physicalAddress,
6554 				&flags);
6555 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6556 				map->ops->unlock(map);
6557 				continue;
6558 			}
6559 
6560 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6561 			if (page == NULL) {
6562 				panic("area %p looking up page failed for pa 0x%lx\n", area,
6563 					physicalAddress);
6564 				map->ops->unlock(map);
6565 				return B_ERROR;;
6566 			}
6567 
6568 			// If the page is not in the topmost cache and write access is
6569 			// requested, we have to unmap it. Otherwise we can re-map it with
6570 			// the new protection.
6571 			bool unmapPage = page->cache != area->cache
6572 				&& (protection & B_WRITE_AREA) != 0;
6573 
6574 			if (!unmapPage) {
6575 				map->ops->unmap(map, pageAddress,
6576 					pageAddress + B_PAGE_SIZE - 1);
6577 				map->ops->map(map, pageAddress, physicalAddress,
6578 					actualProtection);
6579 			}
6580 
6581 			map->ops->unlock(map);
6582 
6583 			if (unmapPage)
6584 				vm_unmap_page(area, pageAddress, true);
6585 		}
6586 	}
6587 
6588 	return B_OK;
6589 }
6590 
6591 
6592 status_t
6593 _user_sync_memory(void* _address, size_t size, int flags)
6594 {
6595 	addr_t address = (addr_t)_address;
6596 	size = PAGE_ALIGN(size);
6597 
6598 	// check params
6599 	if ((address % B_PAGE_SIZE) != 0)
6600 		return B_BAD_VALUE;
6601 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6602 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6603 		// weird error code required by POSIX
6604 		return ENOMEM;
6605 	}
6606 
6607 	bool writeSync = (flags & MS_SYNC) != 0;
6608 	bool writeAsync = (flags & MS_ASYNC) != 0;
6609 	if (writeSync && writeAsync)
6610 		return B_BAD_VALUE;
6611 
6612 	if (size == 0 || (!writeSync && !writeAsync))
6613 		return B_OK;
6614 
6615 	// iterate through the range and sync all concerned areas
6616 	while (size > 0) {
6617 		// read lock the address space
6618 		AddressSpaceReadLocker locker;
6619 		status_t error = locker.SetTo(team_get_current_team_id());
6620 		if (error != B_OK)
6621 			return error;
6622 
6623 		// get the first area
6624 		vm_area* area = vm_area_lookup(locker.AddressSpace(), address);
6625 		if (area == NULL)
6626 			return B_NO_MEMORY;
6627 
6628 		uint32 offset = address - area->base;
6629 		size_t rangeSize = min_c(area->size - offset, size);
6630 		offset += area->cache_offset;
6631 
6632 		// lock the cache
6633 		AreaCacheLocker cacheLocker(area);
6634 		if (!cacheLocker)
6635 			return B_BAD_VALUE;
6636 		vm_cache* cache = area->cache;
6637 
6638 		locker.Unlock();
6639 
6640 		uint32 firstPage = offset >> PAGE_SHIFT;
6641 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6642 
6643 		// write the pages
6644 		if (cache->type == CACHE_TYPE_VNODE) {
6645 			if (writeSync) {
6646 				// synchronous
6647 				error = vm_page_write_modified_page_range(cache, firstPage,
6648 					endPage);
6649 				if (error != B_OK)
6650 					return error;
6651 			} else {
6652 				// asynchronous
6653 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6654 				// TODO: This is probably not quite what is supposed to happen.
6655 				// Especially when a lot has to be written, it might take ages
6656 				// until it really hits the disk.
6657 			}
6658 		}
6659 
6660 		address += rangeSize;
6661 		size -= rangeSize;
6662 	}
6663 
6664 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6665 	// synchronize multiple mappings of the same file. In our VM they never get
6666 	// out of sync, though, so we don't have to do anything.
6667 
6668 	return B_OK;
6669 }
6670 
6671 
6672 status_t
6673 _user_memory_advice(void* address, size_t size, int advice)
6674 {
6675 	// TODO: Implement!
6676 	return B_OK;
6677 }
6678