xref: /haiku/src/system/kernel/vm/vm.cpp (revision 239222b2369c39dc52df52b0a7cdd6cc0a91bc92)
1 /*
2  * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <arch/cpu.h>
27 #include <arch/vm.h>
28 #include <boot/elf.h>
29 #include <boot/stage2.h>
30 #include <condition_variable.h>
31 #include <console.h>
32 #include <debug.h>
33 #include <file_cache.h>
34 #include <fs/fd.h>
35 #include <heap.h>
36 #include <kernel.h>
37 #include <int.h>
38 #include <lock.h>
39 #include <low_resource_manager.h>
40 #include <smp.h>
41 #include <system_info.h>
42 #include <thread.h>
43 #include <team.h>
44 #include <tracing.h>
45 #include <util/AutoLock.h>
46 #include <util/khash.h>
47 #include <vm_address_space.h>
48 #include <vm_cache.h>
49 #include <vm_page.h>
50 #include <vm_priv.h>
51 
52 #include "VMAnonymousCache.h"
53 #include "IORequest.h"
54 
55 
56 //#define TRACE_VM
57 //#define TRACE_FAULTS
58 #ifdef TRACE_VM
59 #	define TRACE(x) dprintf x
60 #else
61 #	define TRACE(x) ;
62 #endif
63 #ifdef TRACE_FAULTS
64 #	define FTRACE(x) dprintf x
65 #else
66 #	define FTRACE(x) ;
67 #endif
68 
69 
70 class AddressSpaceReadLocker {
71 public:
72 	AddressSpaceReadLocker(team_id team);
73 	AddressSpaceReadLocker(vm_address_space* space, bool getNewReference);
74 	AddressSpaceReadLocker();
75 	~AddressSpaceReadLocker();
76 
77 	status_t SetTo(team_id team);
78 	void SetTo(vm_address_space* space, bool getNewReference);
79 	status_t SetFromArea(area_id areaID, vm_area*& area);
80 
81 	bool IsLocked() const { return fLocked; }
82 	bool Lock();
83 	void Unlock();
84 
85 	void Unset();
86 
87 	vm_address_space* AddressSpace() { return fSpace; }
88 
89 private:
90 	vm_address_space* fSpace;
91 	bool	fLocked;
92 };
93 
94 class AddressSpaceWriteLocker {
95 public:
96 	AddressSpaceWriteLocker(team_id team);
97 	AddressSpaceWriteLocker();
98 	~AddressSpaceWriteLocker();
99 
100 	status_t SetTo(team_id team);
101 	status_t SetFromArea(area_id areaID, vm_area*& area);
102 	status_t SetFromArea(team_id team, area_id areaID, bool allowKernel,
103 		vm_area*& area);
104 	status_t SetFromArea(team_id team, area_id areaID, vm_area*& area);
105 
106 	bool IsLocked() const { return fLocked; }
107 	void Unlock();
108 
109 	void DegradeToReadLock();
110 	void Unset();
111 
112 	vm_address_space* AddressSpace() { return fSpace; }
113 
114 private:
115 	vm_address_space* fSpace;
116 	bool	fLocked;
117 	bool	fDegraded;
118 };
119 
120 class MultiAddressSpaceLocker {
121 public:
122 	MultiAddressSpaceLocker();
123 	~MultiAddressSpaceLocker();
124 
125 	inline status_t AddTeam(team_id team, bool writeLock,
126 		vm_address_space** _space = NULL);
127 	inline status_t AddArea(area_id area, bool writeLock,
128 		vm_address_space** _space = NULL);
129 
130 	status_t AddAreaCacheAndLock(area_id areaID, bool writeLockThisOne,
131 		bool writeLockOthers, vm_area*& _area, vm_cache** _cache = NULL);
132 
133 	status_t Lock();
134 	void Unlock();
135 	bool IsLocked() const { return fLocked; }
136 
137 	void Unset();
138 
139 private:
140 	struct lock_item {
141 		vm_address_space*	space;
142 		bool				write_lock;
143 	};
144 
145 	bool _ResizeIfNeeded();
146 	int32 _IndexOfAddressSpace(vm_address_space* space) const;
147 	status_t _AddAddressSpace(vm_address_space* space, bool writeLock,
148 		vm_address_space** _space);
149 
150 	static int _CompareItems(const void* _a, const void* _b);
151 
152 	lock_item*	fItems;
153 	int32		fCapacity;
154 	int32		fCount;
155 	bool		fLocked;
156 };
157 
158 
159 class AreaCacheLocking {
160 public:
161 	inline bool Lock(vm_cache* lockable)
162 	{
163 		return false;
164 	}
165 
166 	inline void Unlock(vm_cache* lockable)
167 	{
168 		vm_area_put_locked_cache(lockable);
169 	}
170 };
171 
172 class AreaCacheLocker : public AutoLocker<vm_cache, AreaCacheLocking> {
173 public:
174 	inline AreaCacheLocker(vm_cache* cache = NULL)
175 		: AutoLocker<vm_cache, AreaCacheLocking>(cache, true)
176 	{
177 	}
178 
179 	inline AreaCacheLocker(vm_area* area)
180 		: AutoLocker<vm_cache, AreaCacheLocking>()
181 	{
182 		SetTo(area);
183 	}
184 
185 	inline void SetTo(vm_area* area)
186 	{
187 		return AutoLocker<vm_cache, AreaCacheLocking>::SetTo(
188 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
189 	}
190 };
191 
192 
193 #define AREA_HASH_TABLE_SIZE 1024
194 static area_id sNextAreaID = 1;
195 static hash_table* sAreaHash;
196 static rw_lock sAreaHashLock = RW_LOCK_INITIALIZER("area hash");
197 static mutex sMappingLock = MUTEX_INITIALIZER("page mappings");
198 static mutex sAreaCacheLock = MUTEX_INITIALIZER("area->cache");
199 
200 static off_t sAvailableMemory;
201 static off_t sNeededMemory;
202 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
203 static uint32 sPageFaults;
204 
205 #if DEBUG_CACHE_LIST
206 
207 struct cache_info {
208 	vm_cache*	cache;
209 	addr_t		page_count;
210 	addr_t		committed;
211 };
212 
213 static const int kCacheInfoTableCount = 100 * 1024;
214 static cache_info* sCacheInfoTable;
215 
216 #endif	// DEBUG_CACHE_LIST
217 
218 
219 // function declarations
220 static void delete_area(vm_address_space* addressSpace, vm_area* area);
221 static vm_address_space* get_address_space_by_area_id(area_id id);
222 static status_t vm_soft_fault(vm_address_space* addressSpace, addr_t address,
223 	bool isWrite, bool isUser);
224 static status_t map_backing_store(vm_address_space* addressSpace,
225 	vm_cache* cache, void** _virtualAddress, off_t offset, addr_t size,
226 	uint32 addressSpec, int wiring, int protection, int mapping,
227 	vm_area** _area, const char* areaName, bool unmapAddressRange, bool kernel);
228 
229 
230 //	#pragma mark -
231 
232 
233 AddressSpaceReadLocker::AddressSpaceReadLocker(team_id team)
234 	:
235 	fSpace(NULL),
236 	fLocked(false)
237 {
238 	SetTo(team);
239 }
240 
241 
242 /*! Takes over the reference of the address space, if \a getNewReference is
243 	\c false.
244 */
245 AddressSpaceReadLocker::AddressSpaceReadLocker(vm_address_space* space,
246 		bool getNewReference)
247 	:
248 	fSpace(NULL),
249 	fLocked(false)
250 {
251 	SetTo(space, getNewReference);
252 }
253 
254 
255 AddressSpaceReadLocker::AddressSpaceReadLocker()
256 	:
257 	fSpace(NULL),
258 	fLocked(false)
259 {
260 }
261 
262 
263 AddressSpaceReadLocker::~AddressSpaceReadLocker()
264 {
265 	Unset();
266 }
267 
268 
269 void
270 AddressSpaceReadLocker::Unset()
271 {
272 	Unlock();
273 	if (fSpace != NULL)
274 		vm_put_address_space(fSpace);
275 }
276 
277 
278 status_t
279 AddressSpaceReadLocker::SetTo(team_id team)
280 {
281 	fSpace = vm_get_address_space(team);
282 	if (fSpace == NULL)
283 		return B_BAD_TEAM_ID;
284 
285 	rw_lock_read_lock(&fSpace->lock);
286 	fLocked = true;
287 	return B_OK;
288 }
289 
290 
291 /*! Takes over the reference of the address space, if \a getNewReference is
292 	\c false.
293 */
294 void
295 AddressSpaceReadLocker::SetTo(vm_address_space* space, bool getNewReference)
296 {
297 	fSpace = space;
298 
299 	if (getNewReference)
300 		atomic_add(&fSpace->ref_count, 1);
301 
302 	rw_lock_read_lock(&fSpace->lock);
303 	fLocked = true;
304 }
305 
306 
307 status_t
308 AddressSpaceReadLocker::SetFromArea(area_id areaID, vm_area*& area)
309 {
310 	fSpace = get_address_space_by_area_id(areaID);
311 	if (fSpace == NULL)
312 		return B_BAD_TEAM_ID;
313 
314 	rw_lock_read_lock(&fSpace->lock);
315 
316 	rw_lock_read_lock(&sAreaHashLock);
317 	area = (vm_area*)hash_lookup(sAreaHash, &areaID);
318 	rw_lock_read_unlock(&sAreaHashLock);
319 
320 	if (area == NULL || area->address_space != fSpace) {
321 		rw_lock_read_unlock(&fSpace->lock);
322 		return B_BAD_VALUE;
323 	}
324 
325 	fLocked = true;
326 	return B_OK;
327 }
328 
329 
330 bool
331 AddressSpaceReadLocker::Lock()
332 {
333 	if (fLocked)
334 		return true;
335 	if (fSpace == NULL)
336 		return false;
337 
338 	rw_lock_read_lock(&fSpace->lock);
339 	fLocked = true;
340 
341 	return true;
342 }
343 
344 
345 void
346 AddressSpaceReadLocker::Unlock()
347 {
348 	if (fLocked) {
349 		rw_lock_read_unlock(&fSpace->lock);
350 		fLocked = false;
351 	}
352 }
353 
354 
355 //	#pragma mark -
356 
357 
358 AddressSpaceWriteLocker::AddressSpaceWriteLocker(team_id team)
359 	:
360 	fSpace(NULL),
361 	fLocked(false),
362 	fDegraded(false)
363 {
364 	SetTo(team);
365 }
366 
367 
368 AddressSpaceWriteLocker::AddressSpaceWriteLocker()
369 	:
370 	fSpace(NULL),
371 	fLocked(false),
372 	fDegraded(false)
373 {
374 }
375 
376 
377 AddressSpaceWriteLocker::~AddressSpaceWriteLocker()
378 {
379 	Unset();
380 }
381 
382 
383 void
384 AddressSpaceWriteLocker::Unset()
385 {
386 	Unlock();
387 	if (fSpace != NULL)
388 		vm_put_address_space(fSpace);
389 }
390 
391 
392 status_t
393 AddressSpaceWriteLocker::SetTo(team_id team)
394 {
395 	fSpace = vm_get_address_space(team);
396 	if (fSpace == NULL)
397 		return B_BAD_TEAM_ID;
398 
399 	rw_lock_write_lock(&fSpace->lock);
400 	fLocked = true;
401 	return B_OK;
402 }
403 
404 
405 status_t
406 AddressSpaceWriteLocker::SetFromArea(area_id areaID, vm_area*& area)
407 {
408 	fSpace = get_address_space_by_area_id(areaID);
409 	if (fSpace == NULL)
410 		return B_BAD_VALUE;
411 
412 	rw_lock_write_lock(&fSpace->lock);
413 
414 	rw_lock_read_lock(&sAreaHashLock);
415 	area = (vm_area*)hash_lookup(sAreaHash, &areaID);
416 	rw_lock_read_unlock(&sAreaHashLock);
417 
418 	if (area == NULL || area->address_space != fSpace) {
419 		rw_lock_write_unlock(&fSpace->lock);
420 		return B_BAD_VALUE;
421 	}
422 
423 	fLocked = true;
424 	return B_OK;
425 }
426 
427 
428 status_t
429 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID,
430 	bool allowKernel, vm_area*& area)
431 {
432 	rw_lock_read_lock(&sAreaHashLock);
433 
434 	area = (vm_area*)hash_lookup(sAreaHash, &areaID);
435 	if (area != NULL
436 		&& (area->address_space->id == team
437 			|| (allowKernel && team == vm_kernel_address_space_id()))) {
438 		fSpace = area->address_space;
439 		atomic_add(&fSpace->ref_count, 1);
440 	}
441 
442 	rw_lock_read_unlock(&sAreaHashLock);
443 
444 	if (fSpace == NULL)
445 		return B_BAD_VALUE;
446 
447 	// Second try to get the area -- this time with the address space
448 	// write lock held
449 
450 	rw_lock_write_lock(&fSpace->lock);
451 
452 	rw_lock_read_lock(&sAreaHashLock);
453 	area = (vm_area*)hash_lookup(sAreaHash, &areaID);
454 	rw_lock_read_unlock(&sAreaHashLock);
455 
456 	if (area == NULL) {
457 		rw_lock_write_unlock(&fSpace->lock);
458 		return B_BAD_VALUE;
459 	}
460 
461 	fLocked = true;
462 	return B_OK;
463 }
464 
465 
466 status_t
467 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID,
468 	vm_area*& area)
469 {
470 	return SetFromArea(team, areaID, false, area);
471 }
472 
473 
474 void
475 AddressSpaceWriteLocker::Unlock()
476 {
477 	if (fLocked) {
478 		if (fDegraded)
479 			rw_lock_read_unlock(&fSpace->lock);
480 		else
481 			rw_lock_write_unlock(&fSpace->lock);
482 		fLocked = false;
483 		fDegraded = false;
484 	}
485 }
486 
487 
488 void
489 AddressSpaceWriteLocker::DegradeToReadLock()
490 {
491 	// TODO: the current R/W lock implementation just keeps the write lock here
492 	rw_lock_read_lock(&fSpace->lock);
493 	rw_lock_write_unlock(&fSpace->lock);
494 	fDegraded = true;
495 }
496 
497 
498 //	#pragma mark -
499 
500 
501 MultiAddressSpaceLocker::MultiAddressSpaceLocker()
502 	:
503 	fItems(NULL),
504 	fCapacity(0),
505 	fCount(0),
506 	fLocked(false)
507 {
508 }
509 
510 
511 MultiAddressSpaceLocker::~MultiAddressSpaceLocker()
512 {
513 	Unset();
514 	free(fItems);
515 }
516 
517 
518 /*static*/ int
519 MultiAddressSpaceLocker::_CompareItems(const void* _a, const void* _b)
520 {
521 	lock_item* a = (lock_item*)_a;
522 	lock_item* b = (lock_item*)_b;
523 	return a->space->id - b->space->id;
524 }
525 
526 
527 bool
528 MultiAddressSpaceLocker::_ResizeIfNeeded()
529 {
530 	if (fCount == fCapacity) {
531 		lock_item* items = (lock_item*)realloc(fItems,
532 			(fCapacity + 4) * sizeof(lock_item));
533 		if (items == NULL)
534 			return false;
535 
536 		fCapacity += 4;
537 		fItems = items;
538 	}
539 
540 	return true;
541 }
542 
543 
544 int32
545 MultiAddressSpaceLocker::_IndexOfAddressSpace(vm_address_space* space) const
546 {
547 	for (int32 i = 0; i < fCount; i++) {
548 		if (fItems[i].space == space)
549 			return i;
550 	}
551 
552 	return -1;
553 }
554 
555 
556 status_t
557 MultiAddressSpaceLocker::_AddAddressSpace(vm_address_space* space,
558 	bool writeLock, vm_address_space** _space)
559 {
560 	if (!space)
561 		return B_BAD_VALUE;
562 
563 	int32 index = _IndexOfAddressSpace(space);
564 	if (index < 0) {
565 		if (!_ResizeIfNeeded()) {
566 			vm_put_address_space(space);
567 			return B_NO_MEMORY;
568 		}
569 
570 		lock_item& item = fItems[fCount++];
571 		item.space = space;
572 		item.write_lock = writeLock;
573 	} else {
574 
575 		// one reference is enough
576 		vm_put_address_space(space);
577 
578 		fItems[index].write_lock |= writeLock;
579 	}
580 
581 	if (_space != NULL)
582 		*_space = space;
583 
584 	return B_OK;
585 }
586 
587 
588 inline status_t
589 MultiAddressSpaceLocker::AddTeam(team_id team, bool writeLock,
590 	vm_address_space** _space)
591 {
592 	return _AddAddressSpace(vm_get_address_space(team), writeLock,
593 		_space);
594 }
595 
596 
597 inline status_t
598 MultiAddressSpaceLocker::AddArea(area_id area, bool writeLock,
599 	vm_address_space** _space)
600 {
601 	return _AddAddressSpace(get_address_space_by_area_id(area), writeLock,
602 		_space);
603 }
604 
605 
606 void
607 MultiAddressSpaceLocker::Unset()
608 {
609 	Unlock();
610 
611 	for (int32 i = 0; i < fCount; i++)
612 		vm_put_address_space(fItems[i].space);
613 
614 	fCount = 0;
615 }
616 
617 
618 status_t
619 MultiAddressSpaceLocker::Lock()
620 {
621 	ASSERT(!fLocked);
622 
623 	qsort(fItems, fCount, sizeof(lock_item), &_CompareItems);
624 
625 	for (int32 i = 0; i < fCount; i++) {
626 		status_t status;
627 		if (fItems[i].write_lock)
628 			status = rw_lock_write_lock(&fItems[i].space->lock);
629 		else
630 			status = rw_lock_read_lock(&fItems[i].space->lock);
631 
632 		if (status < B_OK) {
633 			while (--i >= 0) {
634 				if (fItems[i].write_lock)
635 					rw_lock_write_unlock(&fItems[i].space->lock);
636 				else
637 					rw_lock_read_unlock(&fItems[i].space->lock);
638 			}
639 			return status;
640 		}
641 	}
642 
643 	fLocked = true;
644 	return B_OK;
645 }
646 
647 
648 void
649 MultiAddressSpaceLocker::Unlock()
650 {
651 	if (!fLocked)
652 		return;
653 
654 	for (int32 i = 0; i < fCount; i++) {
655 		if (fItems[i].write_lock)
656 			rw_lock_write_unlock(&fItems[i].space->lock);
657 		else
658 			rw_lock_read_unlock(&fItems[i].space->lock);
659 	}
660 
661 	fLocked = false;
662 }
663 
664 
665 /*!	Adds all address spaces of the areas associated with the given area's cache,
666 	locks them, and locks the cache (including a reference to it). It retries
667 	until the situation is stable (i.e. the neither cache nor cache's areas
668 	changed) or an error occurs.
669 */
670 status_t
671 MultiAddressSpaceLocker::AddAreaCacheAndLock(area_id areaID,
672 	bool writeLockThisOne, bool writeLockOthers, vm_area*& _area,
673 	vm_cache** _cache)
674 {
675 	// remember the original state
676 	int originalCount = fCount;
677 	lock_item* originalItems = NULL;
678 	if (fCount > 0) {
679 		originalItems = new(nothrow) lock_item[fCount];
680 		if (originalItems == NULL)
681 			return B_NO_MEMORY;
682 		memcpy(originalItems, fItems, fCount * sizeof(lock_item));
683 	}
684 	ArrayDeleter<lock_item> _(originalItems);
685 
686 	// get the cache
687 	vm_cache* cache;
688 	vm_area* area;
689 	status_t error;
690 	{
691 		AddressSpaceReadLocker locker;
692 		error = locker.SetFromArea(areaID, area);
693 		if (error != B_OK)
694 			return error;
695 
696 		cache = vm_area_get_locked_cache(area);
697 	}
698 
699 	while (true) {
700 		// add all areas
701 		vm_area* firstArea = cache->areas;
702 		for (vm_area* current = firstArea; current;
703 				current = current->cache_next) {
704 			error = AddArea(current->id,
705 				current == area ? writeLockThisOne : writeLockOthers);
706 			if (error != B_OK) {
707 				vm_area_put_locked_cache(cache);
708 				return error;
709 			}
710 		}
711 
712 		// unlock the cache and attempt to lock the address spaces
713 		vm_area_put_locked_cache(cache);
714 
715 		error = Lock();
716 		if (error != B_OK)
717 			return error;
718 
719 		// lock the cache again and check whether anything has changed
720 
721 		// check whether the area is gone in the meantime
722 		rw_lock_read_lock(&sAreaHashLock);
723 		area = (vm_area*)hash_lookup(sAreaHash, &areaID);
724 		rw_lock_read_unlock(&sAreaHashLock);
725 
726 		if (area == NULL) {
727 			Unlock();
728 			return B_BAD_VALUE;
729 		}
730 
731 		// lock the cache
732 		vm_cache* oldCache = cache;
733 		cache = vm_area_get_locked_cache(area);
734 
735 		// If neither the area's cache has changed nor its area list we're
736 		// done.
737 		if (cache == oldCache && firstArea == cache->areas) {
738 			_area = area;
739 			if (_cache != NULL)
740 				*_cache = cache;
741 			return B_OK;
742 		}
743 
744 		// Restore the original state and try again.
745 
746 		// Unlock the address spaces, but keep the cache locked for the next
747 		// iteration.
748 		Unlock();
749 
750 		// Get an additional reference to the original address spaces.
751 		for (int32 i = 0; i < originalCount; i++)
752 			atomic_add(&originalItems[i].space->ref_count, 1);
753 
754 		// Release all references to the current address spaces.
755 		for (int32 i = 0; i < fCount; i++)
756 			vm_put_address_space(fItems[i].space);
757 
758 		// Copy over the original state.
759 		fCount = originalCount;
760 		if (originalItems != NULL)
761 			memcpy(fItems, originalItems, fCount * sizeof(lock_item));
762 	}
763 }
764 
765 
766 //	#pragma mark -
767 
768 
769 #if VM_PAGE_FAULT_TRACING
770 
771 namespace VMPageFaultTracing {
772 
773 class PageFaultStart : public AbstractTraceEntry {
774 public:
775 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
776 		:
777 		fAddress(address),
778 		fPC(pc),
779 		fWrite(write),
780 		fUser(user)
781 	{
782 		Initialized();
783 	}
784 
785 	virtual void AddDump(TraceOutput& out)
786 	{
787 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
788 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
789 	}
790 
791 private:
792 	addr_t	fAddress;
793 	addr_t	fPC;
794 	bool	fWrite;
795 	bool	fUser;
796 };
797 
798 
799 // page fault errors
800 enum {
801 	PAGE_FAULT_ERROR_NO_AREA		= 0,
802 	PAGE_FAULT_ERROR_KERNEL_ONLY,
803 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
804 	PAGE_FAULT_ERROR_READ_PROTECTED,
805 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
806 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
807 };
808 
809 
810 class PageFaultError : public AbstractTraceEntry {
811 public:
812 	PageFaultError(area_id area, status_t error)
813 		:
814 		fArea(area),
815 		fError(error)
816 	{
817 		Initialized();
818 	}
819 
820 	virtual void AddDump(TraceOutput& out)
821 	{
822 		switch (fError) {
823 			case PAGE_FAULT_ERROR_NO_AREA:
824 				out.Print("page fault error: no area");
825 				break;
826 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
827 				out.Print("page fault error: area: %ld, kernel only", fArea);
828 				break;
829 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
830 				out.Print("page fault error: area: %ld, write protected",
831 					fArea);
832 				break;
833 			case PAGE_FAULT_ERROR_READ_PROTECTED:
834 				out.Print("page fault error: area: %ld, read protected", fArea);
835 				break;
836 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
837 				out.Print("page fault error: kernel touching bad user memory");
838 				break;
839 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
840 				out.Print("page fault error: no address space");
841 				break;
842 			default:
843 				out.Print("page fault error: area: %ld, error: %s", fArea,
844 					strerror(fError));
845 				break;
846 		}
847 	}
848 
849 private:
850 	area_id		fArea;
851 	status_t	fError;
852 };
853 
854 
855 class PageFaultDone : public AbstractTraceEntry {
856 public:
857 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
858 			vm_page* page)
859 		:
860 		fArea(area),
861 		fTopCache(topCache),
862 		fCache(cache),
863 		fPage(page)
864 	{
865 		Initialized();
866 	}
867 
868 	virtual void AddDump(TraceOutput& out)
869 	{
870 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
871 			"page: %p", fArea, fTopCache, fCache, fPage);
872 	}
873 
874 private:
875 	area_id		fArea;
876 	VMCache*	fTopCache;
877 	VMCache*	fCache;
878 	vm_page*	fPage;
879 };
880 
881 }	// namespace VMPageFaultTracing
882 
883 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
884 #else
885 #	define TPF(x) ;
886 #endif	// VM_PAGE_FAULT_TRACING
887 
888 
889 //	#pragma mark -
890 
891 
892 static int
893 area_compare(void* _area, const void* key)
894 {
895 	vm_area* area = (vm_area*)_area;
896 	const area_id* id = (const area_id*)key;
897 
898 	if (area->id == *id)
899 		return 0;
900 
901 	return -1;
902 }
903 
904 
905 static uint32
906 area_hash(void* _area, const void* key, uint32 range)
907 {
908 	vm_area* area = (vm_area*)_area;
909 	const area_id* id = (const area_id*)key;
910 
911 	if (area != NULL)
912 		return area->id % range;
913 
914 	return (uint32)*id % range;
915 }
916 
917 
918 static vm_address_space*
919 get_address_space_by_area_id(area_id id)
920 {
921 	vm_address_space* addressSpace = NULL;
922 
923 	rw_lock_read_lock(&sAreaHashLock);
924 
925 	vm_area* area = (vm_area*)hash_lookup(sAreaHash, &id);
926 	if (area != NULL) {
927 		addressSpace = area->address_space;
928 		atomic_add(&addressSpace->ref_count, 1);
929 	}
930 
931 	rw_lock_read_unlock(&sAreaHashLock);
932 
933 	return addressSpace;
934 }
935 
936 
937 //! You need to have the address space locked when calling this function
938 static vm_area*
939 lookup_area(vm_address_space* addressSpace, area_id id)
940 {
941 	rw_lock_read_lock(&sAreaHashLock);
942 
943 	vm_area* area = (vm_area*)hash_lookup(sAreaHash, &id);
944 	if (area != NULL && area->address_space != addressSpace)
945 		area = NULL;
946 
947 	rw_lock_read_unlock(&sAreaHashLock);
948 
949 	return area;
950 }
951 
952 
953 static vm_area*
954 create_reserved_area_struct(vm_address_space* addressSpace, uint32 flags)
955 {
956 	vm_area* reserved = (vm_area*)malloc_nogrow(sizeof(vm_area));
957 	if (reserved == NULL)
958 		return NULL;
959 
960 	memset(reserved, 0, sizeof(vm_area));
961 	reserved->id = RESERVED_AREA_ID;
962 		// this marks it as reserved space
963 	reserved->protection = flags;
964 	reserved->address_space = addressSpace;
965 
966 	return reserved;
967 }
968 
969 
970 static vm_area*
971 create_area_struct(vm_address_space* addressSpace, const char* name,
972 	uint32 wiring, uint32 protection)
973 {
974 	// restrict the area name to B_OS_NAME_LENGTH
975 	size_t length = strlen(name) + 1;
976 	if (length > B_OS_NAME_LENGTH)
977 		length = B_OS_NAME_LENGTH;
978 
979 	vm_area* area = (vm_area*)malloc_nogrow(sizeof(vm_area));
980 	if (area == NULL)
981 		return NULL;
982 
983 	area->name = (char*)malloc_nogrow(length);
984 	if (area->name == NULL) {
985 		free(area);
986 		return NULL;
987 	}
988 	strlcpy(area->name, name, length);
989 
990 	area->id = atomic_add(&sNextAreaID, 1);
991 	area->base = 0;
992 	area->size = 0;
993 	area->protection = protection;
994 	area->wiring = wiring;
995 	area->memory_type = 0;
996 
997 	area->cache = NULL;
998 	area->cache_offset = 0;
999 
1000 	area->address_space = addressSpace;
1001 	area->address_space_next = NULL;
1002 	area->cache_next = area->cache_prev = NULL;
1003 	area->hash_next = NULL;
1004 	new (&area->mappings) vm_area_mappings;
1005 	area->page_protections = NULL;
1006 
1007 	return area;
1008 }
1009 
1010 
1011 /*!	Finds a reserved area that covers the region spanned by \a start and
1012 	\a size, inserts the \a area into that region and makes sure that
1013 	there are reserved regions for the remaining parts.
1014 */
1015 static status_t
1016 find_reserved_area(vm_address_space* addressSpace, addr_t start,
1017 	addr_t size, vm_area* area)
1018 {
1019 	vm_area* last = NULL;
1020 	vm_area* next;
1021 
1022 	next = addressSpace->areas;
1023 	while (next != NULL) {
1024 		if (next->base <= start
1025 			&& next->base + (next->size - 1) >= start + (size - 1)) {
1026 			// This area covers the requested range
1027 			if (next->id != RESERVED_AREA_ID) {
1028 				// but it's not reserved space, it's a real area
1029 				return B_BAD_VALUE;
1030 			}
1031 
1032 			break;
1033 		}
1034 
1035 		last = next;
1036 		next = next->address_space_next;
1037 	}
1038 
1039 	if (next == NULL)
1040 		return B_ENTRY_NOT_FOUND;
1041 
1042 	// Now we have to transfer the requested part of the reserved
1043 	// range to the new area - and remove, resize or split the old
1044 	// reserved area.
1045 
1046 	if (start == next->base) {
1047 		// the area starts at the beginning of the reserved range
1048 		if (last)
1049 			last->address_space_next = area;
1050 		else
1051 			addressSpace->areas = area;
1052 
1053 		if (size == next->size) {
1054 			// the new area fully covers the reversed range
1055 			area->address_space_next = next->address_space_next;
1056 			vm_put_address_space(addressSpace);
1057 			free(next);
1058 		} else {
1059 			// resize the reserved range behind the area
1060 			area->address_space_next = next;
1061 			next->base += size;
1062 			next->size -= size;
1063 		}
1064 	} else if (start + size == next->base + next->size) {
1065 		// the area is at the end of the reserved range
1066 		area->address_space_next = next->address_space_next;
1067 		next->address_space_next = area;
1068 
1069 		// resize the reserved range before the area
1070 		next->size = start - next->base;
1071 	} else {
1072 		// the area splits the reserved range into two separate ones
1073 		// we need a new reserved area to cover this space
1074 		vm_area* reserved = create_reserved_area_struct(addressSpace,
1075 			next->protection);
1076 		if (reserved == NULL)
1077 			return B_NO_MEMORY;
1078 
1079 		atomic_add(&addressSpace->ref_count, 1);
1080 		reserved->address_space_next = next->address_space_next;
1081 		area->address_space_next = reserved;
1082 		next->address_space_next = area;
1083 
1084 		// resize regions
1085 		reserved->size = next->base + next->size - start - size;
1086 		next->size = start - next->base;
1087 		reserved->base = start + size;
1088 		reserved->cache_offset = next->cache_offset;
1089 	}
1090 
1091 	area->base = start;
1092 	area->size = size;
1093 	addressSpace->change_count++;
1094 
1095 	return B_OK;
1096 }
1097 
1098 
1099 /*!	Verifies that an area with the given aligned base and size fits into
1100 	the spot defined by base and limit and does check for overflows.
1101 */
1102 static inline bool
1103 is_valid_spot(addr_t base, addr_t alignedBase, addr_t size, addr_t limit)
1104 {
1105 	return (alignedBase >= base && alignedBase + (size - 1) > alignedBase
1106 		&& alignedBase + (size - 1) <= limit);
1107 }
1108 
1109 
1110 /*!	Must be called with this address space's sem held */
1111 static status_t
1112 find_and_insert_area_slot(vm_address_space* addressSpace, addr_t start,
1113 	addr_t size, addr_t end, uint32 addressSpec, vm_area* area)
1114 {
1115 	vm_area* last = NULL;
1116 	vm_area* next;
1117 	bool foundSpot = false;
1118 
1119 	TRACE(("find_and_insert_area_slot: address space %p, start 0x%lx, "
1120 		"size %ld, end 0x%lx, addressSpec %ld, area %p\n", addressSpace, start,
1121 		size, end, addressSpec, area));
1122 
1123 	// do some sanity checking
1124 	if (start < addressSpace->base || size == 0
1125 		|| end > addressSpace->base + (addressSpace->size - 1)
1126 		|| start + (size - 1) > end)
1127 		return B_BAD_ADDRESS;
1128 
1129 	if (addressSpec == B_EXACT_ADDRESS && area->id != RESERVED_AREA_ID) {
1130 		// search for a reserved area
1131 		status_t status = find_reserved_area(addressSpace, start, size, area);
1132 		if (status == B_OK || status == B_BAD_VALUE)
1133 			return status;
1134 
1135 		// There was no reserved area, and the slot doesn't seem to be used
1136 		// already
1137 		// TODO: this could be further optimized.
1138 	}
1139 
1140 	size_t alignment = B_PAGE_SIZE;
1141 	if (addressSpec == B_ANY_KERNEL_BLOCK_ADDRESS) {
1142 		// align the memory to the next power of two of the size
1143 		while (alignment < size)
1144 			alignment <<= 1;
1145 	}
1146 
1147 	start = ROUNDUP(start, alignment);
1148 
1149 	// walk up to the spot where we should start searching
1150 second_chance:
1151 	next = addressSpace->areas;
1152 	while (next != NULL) {
1153 		if (next->base > start + (size - 1)) {
1154 			// we have a winner
1155 			break;
1156 		}
1157 
1158 		last = next;
1159 		next = next->address_space_next;
1160 	}
1161 
1162 	// find the right spot depending on the address specification - the area
1163 	// will be inserted directly after "last" ("next" is not referenced anymore)
1164 
1165 	switch (addressSpec) {
1166 		case B_ANY_ADDRESS:
1167 		case B_ANY_KERNEL_ADDRESS:
1168 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1169 		{
1170 			// find a hole big enough for a new area
1171 			if (last == NULL) {
1172 				// see if we can build it at the beginning of the virtual map
1173 				addr_t alignedBase = ROUNDUP(addressSpace->base, alignment);
1174 				if (is_valid_spot(addressSpace->base, alignedBase, size,
1175 						next == NULL ? end : next->base)) {
1176 					foundSpot = true;
1177 					area->base = alignedBase;
1178 					break;
1179 				}
1180 
1181 				last = next;
1182 				next = next->address_space_next;
1183 			}
1184 
1185 			// keep walking
1186 			while (next != NULL) {
1187 				addr_t alignedBase = ROUNDUP(last->base + last->size, alignment);
1188 				if (is_valid_spot(last->base + (last->size - 1), alignedBase,
1189 						size, next->base)) {
1190 					foundSpot = true;
1191 					area->base = alignedBase;
1192 					break;
1193 				}
1194 
1195 				last = next;
1196 				next = next->address_space_next;
1197 			}
1198 
1199 			if (foundSpot)
1200 				break;
1201 
1202 			addr_t alignedBase = ROUNDUP(last->base + last->size, alignment);
1203 			if (is_valid_spot(last->base + (last->size - 1), alignedBase,
1204 					size, end)) {
1205 				// got a spot
1206 				foundSpot = true;
1207 				area->base = alignedBase;
1208 				break;
1209 			} else if (area->id != RESERVED_AREA_ID) {
1210 				// We didn't find a free spot - if there are any reserved areas,
1211 				// we can now test those for free space
1212 				// TODO: it would make sense to start with the biggest of them
1213 				next = addressSpace->areas;
1214 				for (last = NULL; next != NULL;
1215 						next = next->address_space_next) {
1216 					if (next->id != RESERVED_AREA_ID) {
1217 						last = next;
1218 						continue;
1219 					}
1220 
1221 					// TODO: take free space after the reserved area into
1222 					// account!
1223 					addr_t alignedBase = ROUNDUP(next->base, alignment);
1224 					if (next->base == alignedBase && next->size == size) {
1225 						// The reserved area is entirely covered, and thus,
1226 						// removed
1227 						if (last)
1228 							last->address_space_next = next->address_space_next;
1229 						else
1230 							addressSpace->areas = next->address_space_next;
1231 
1232 						foundSpot = true;
1233 						area->base = alignedBase;
1234 						free(next);
1235 						break;
1236 					}
1237 
1238 					if ((next->protection & RESERVED_AVOID_BASE) == 0
1239 						&&  alignedBase == next->base && next->size >= size) {
1240 						// The new area will be placed at the beginning of the
1241 						// reserved area and the reserved area will be offset
1242 						// and resized
1243 						foundSpot = true;
1244 						next->base += size;
1245 						next->size -= size;
1246 						area->base = alignedBase;
1247 						break;
1248 					}
1249 
1250 					if (is_valid_spot(next->base, alignedBase, size,
1251 							next->base + (next->size - 1))) {
1252 						// The new area will be placed at the end of the
1253 						// reserved area, and the reserved area will be resized
1254 						// to make space
1255 						alignedBase = ROUNDDOWN(next->base + next->size - size,
1256 							alignment);
1257 
1258 						foundSpot = true;
1259 						next->size = alignedBase - next->base;
1260 						area->base = alignedBase;
1261 						last = next;
1262 						break;
1263 					}
1264 
1265 					last = next;
1266 				}
1267 			}
1268 			break;
1269 		}
1270 
1271 		case B_BASE_ADDRESS:
1272 		{
1273 			// find a hole big enough for a new area beginning with "start"
1274 			if (last == NULL) {
1275 				// see if we can build it at the beginning of the specified start
1276 				if (next == NULL || next->base > start + (size - 1)) {
1277 					foundSpot = true;
1278 					area->base = start;
1279 					break;
1280 				}
1281 
1282 				last = next;
1283 				next = next->address_space_next;
1284 			}
1285 
1286 			// keep walking
1287 			while (next != NULL) {
1288 				if (next->base - (last->base + last->size) >= size) {
1289 					// we found a spot (it'll be filled up below)
1290 					break;
1291 				}
1292 
1293 				last = next;
1294 				next = next->address_space_next;
1295 			}
1296 
1297 			addr_t lastEnd = last->base + (last->size - 1);
1298 			if (next != NULL || end - lastEnd >= size) {
1299 				// got a spot
1300 				foundSpot = true;
1301 				if (lastEnd < start)
1302 					area->base = start;
1303 				else
1304 					area->base = lastEnd + 1;
1305 				break;
1306 			}
1307 
1308 			// we didn't find a free spot in the requested range, so we'll
1309 			// try again without any restrictions
1310 			start = addressSpace->base;
1311 			addressSpec = B_ANY_ADDRESS;
1312 			last = NULL;
1313 			goto second_chance;
1314 		}
1315 
1316 		case B_EXACT_ADDRESS:
1317 			// see if we can create it exactly here
1318 			if ((last == NULL || last->base + (last->size - 1) < start)
1319 				&& (next == NULL || next->base > start + (size - 1))) {
1320 				foundSpot = true;
1321 				area->base = start;
1322 				break;
1323 			}
1324 			break;
1325 		default:
1326 			return B_BAD_VALUE;
1327 	}
1328 
1329 	if (!foundSpot)
1330 		return addressSpec == B_EXACT_ADDRESS ? B_BAD_VALUE : B_NO_MEMORY;
1331 
1332 	area->size = size;
1333 	if (last) {
1334 		area->address_space_next = last->address_space_next;
1335 		last->address_space_next = area;
1336 	} else {
1337 		area->address_space_next = addressSpace->areas;
1338 		addressSpace->areas = area;
1339 	}
1340 
1341 	addressSpace->change_count++;
1342 	return B_OK;
1343 }
1344 
1345 
1346 /*!	This inserts the area you pass into the specified address space.
1347 	It will also set the "_address" argument to its base address when
1348 	the call succeeds.
1349 	You need to hold the vm_address_space semaphore.
1350 */
1351 static status_t
1352 insert_area(vm_address_space* addressSpace, void** _address,
1353 	uint32 addressSpec, addr_t size, vm_area* area)
1354 {
1355 	addr_t searchBase, searchEnd;
1356 	status_t status;
1357 
1358 	switch (addressSpec) {
1359 		case B_EXACT_ADDRESS:
1360 			searchBase = (addr_t)*_address;
1361 			searchEnd = (addr_t)*_address + (size - 1);
1362 			break;
1363 
1364 		case B_BASE_ADDRESS:
1365 			searchBase = (addr_t)*_address;
1366 			searchEnd = addressSpace->base + (addressSpace->size - 1);
1367 			break;
1368 
1369 		case B_ANY_ADDRESS:
1370 		case B_ANY_KERNEL_ADDRESS:
1371 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1372 			searchBase = addressSpace->base;
1373 			// TODO: remove this again when vm86 mode is moved into the kernel
1374 			// completely (currently needs a userland address space!)
1375 			if (searchBase == USER_BASE)
1376 				searchBase = USER_BASE_ANY;
1377 			searchEnd = addressSpace->base + (addressSpace->size - 1);
1378 			break;
1379 
1380 		default:
1381 			return B_BAD_VALUE;
1382 	}
1383 
1384 	status = find_and_insert_area_slot(addressSpace, searchBase, size,
1385 		searchEnd, addressSpec, area);
1386 	if (status == B_OK)
1387 		*_address = (void*)area->base;
1388 
1389 	return status;
1390 }
1391 
1392 
1393 static inline void
1394 set_area_page_protection(vm_area* area, addr_t pageAddress, uint32 protection)
1395 {
1396 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
1397 	uint32 pageIndex = (pageAddress - area->base) / B_PAGE_SIZE;
1398 	uint8& entry = area->page_protections[pageIndex / 2];
1399 	if (pageIndex % 2 == 0)
1400 		entry = (entry & 0xf0) | protection;
1401 	else
1402 		entry = (entry & 0x0f) | (protection << 4);
1403 }
1404 
1405 
1406 static inline uint32
1407 get_area_page_protection(vm_area* area, addr_t pageAddress)
1408 {
1409 	if (area->page_protections == NULL)
1410 		return area->protection;
1411 
1412 	uint32 pageIndex = (pageAddress - area->base) / B_PAGE_SIZE;
1413 	uint32 protection = area->page_protections[pageIndex / 2];
1414 	if (pageIndex % 2 == 0)
1415 		protection &= 0x0f;
1416 	else
1417 		protection >>= 4;
1418 
1419 	return protection | B_KERNEL_READ_AREA
1420 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
1421 }
1422 
1423 
1424 /*!	Cuts a piece out of an area. If the given cut range covers the complete
1425 	area, it is deleted. If it covers the beginning or the end, the area is
1426 	resized accordingly. If the range covers some part in the middle of the
1427 	area, it is split in two; in this case the second area is returned via
1428 	\a _secondArea (the variable is left untouched in the other cases).
1429 	The address space must be write locked.
1430 */
1431 static status_t
1432 cut_area(vm_address_space* addressSpace, vm_area* area, addr_t address,
1433 	addr_t lastAddress, vm_area** _secondArea, bool kernel)
1434 {
1435 	// Does the cut range intersect with the area at all?
1436 	addr_t areaLast = area->base + (area->size - 1);
1437 	if (area->base > lastAddress || areaLast < address)
1438 		return B_OK;
1439 
1440 	// Is the area fully covered?
1441 	if (area->base >= address && areaLast <= lastAddress) {
1442 		delete_area(addressSpace, area);
1443 		return B_OK;
1444 	}
1445 
1446 	AreaCacheLocker cacheLocker(area);
1447 	vm_cache* cache = area->cache;
1448 
1449 	// Cut the end only?
1450 	if (areaLast <= lastAddress) {
1451 		addr_t newSize = address - area->base;
1452 
1453 		// unmap pages
1454 		vm_unmap_pages(area, address, area->size - newSize, false);
1455 
1456 		// If no one else uses the area's cache, we can resize it, too.
1457 		if (cache->areas == area && area->cache_next == NULL
1458 			&& list_is_empty(&cache->consumers)) {
1459 			status_t error = cache->Resize(cache->virtual_base + newSize);
1460 			if (error != B_OK)
1461 				return error;
1462 		}
1463 
1464 		area->size = newSize;
1465 
1466 		return B_OK;
1467 	}
1468 
1469 	// Cut the beginning only?
1470 	if (area->base >= address) {
1471 		addr_t newBase = lastAddress + 1;
1472 		addr_t newSize = areaLast - lastAddress;
1473 
1474 		// unmap pages
1475 		vm_unmap_pages(area, area->base, newBase - area->base, false);
1476 
1477 		// TODO: If no one else uses the area's cache, we should resize it, too!
1478 
1479 		area->cache_offset += newBase - area->base;
1480 		area->base = newBase;
1481 		area->size = newSize;
1482 
1483 		return B_OK;
1484 	}
1485 
1486 	// The tough part -- cut a piece out of the middle of the area.
1487 	// We do that by shrinking the area to the begin section and creating a
1488 	// new area for the end section.
1489 
1490 	addr_t firstNewSize = address - area->base;
1491 	addr_t secondBase = lastAddress + 1;
1492 	addr_t secondSize = areaLast - lastAddress;
1493 
1494 	// unmap pages
1495 	vm_unmap_pages(area, address, area->size - firstNewSize, false);
1496 
1497 	// resize the area
1498 	addr_t oldSize = area->size;
1499 	area->size = firstNewSize;
1500 
1501 	// TODO: If no one else uses the area's cache, we might want to create a
1502 	// new cache for the second area, transfer the concerned pages from the
1503 	// first cache to it and resize the first cache.
1504 
1505 	// map the second area
1506 	vm_area* secondArea;
1507 	void* secondBaseAddress = (void*)secondBase;
1508 	status_t error = map_backing_store(addressSpace, cache, &secondBaseAddress,
1509 		area->cache_offset + (secondBase - area->base), secondSize,
1510 		B_EXACT_ADDRESS, area->wiring, area->protection, REGION_NO_PRIVATE_MAP,
1511 		&secondArea, area->name, false, kernel);
1512 	if (error != B_OK) {
1513 		area->size = oldSize;
1514 		return error;
1515 	}
1516 
1517 	// We need a cache reference for the new area.
1518 	cache->AcquireRefLocked();
1519 
1520 	if (_secondArea != NULL)
1521 		*_secondArea = secondArea;
1522 
1523 	return B_OK;
1524 }
1525 
1526 
1527 static inline void
1528 increment_page_wired_count(vm_page* page)
1529 {
1530 	// TODO: needs to be atomic on all platforms!
1531 	// ... but at least the check isn't. Consequently we should hold
1532 	// sMappingLock, which would allows us to even avoid atomic_add() on
1533 	// gMappedPagesCount.
1534 	if (page->wired_count++ == 0) {
1535 		if (page->mappings.IsEmpty())
1536 			atomic_add(&gMappedPagesCount, 1);
1537 	}
1538 }
1539 
1540 
1541 static inline void
1542 decrement_page_wired_count(vm_page* page)
1543 {
1544 	if (--page->wired_count == 0) {
1545 		// TODO: needs to be atomic on all platforms!
1546 		// See above!
1547 		if (page->mappings.IsEmpty())
1548 			atomic_add(&gMappedPagesCount, -1);
1549 	}
1550 }
1551 
1552 
1553 /*!	Deletes all areas in the given address range.
1554 	The address space must be write-locked.
1555 */
1556 static status_t
1557 unmap_address_range(vm_address_space* addressSpace, addr_t address, addr_t size,
1558 	bool kernel)
1559 {
1560 	size = PAGE_ALIGN(size);
1561 	addr_t lastAddress = address + (size - 1);
1562 
1563 	// Check, whether the caller is allowed to modify the concerned areas.
1564 	vm_area* area;
1565 	if (!kernel) {
1566 		area = addressSpace->areas;
1567 		while (area != NULL) {
1568 			vm_area* nextArea = area->address_space_next;
1569 
1570 			if (area->id != RESERVED_AREA_ID) {
1571 				addr_t areaLast = area->base + (area->size - 1);
1572 				if (area->base < lastAddress && address < areaLast) {
1573 					if ((area->protection & B_KERNEL_AREA) != 0)
1574 						return B_NOT_ALLOWED;
1575 				}
1576 			}
1577 
1578 			area = nextArea;
1579 		}
1580 	}
1581 
1582 	area = addressSpace->areas;
1583 	while (area != NULL) {
1584 		vm_area* nextArea = area->address_space_next;
1585 
1586 		if (area->id != RESERVED_AREA_ID) {
1587 			addr_t areaLast = area->base + (area->size - 1);
1588 			if (area->base < lastAddress && address < areaLast) {
1589 				status_t error = cut_area(addressSpace, area, address,
1590 					lastAddress, NULL, kernel);
1591 				if (error != B_OK)
1592 					return error;
1593 					// Failing after already messing with areas is ugly, but we
1594 					// can't do anything about it.
1595 			}
1596 		}
1597 
1598 		area = nextArea;
1599 	}
1600 
1601 	return B_OK;
1602 }
1603 
1604 
1605 /*! You need to hold the lock of the cache and the write lock of the address
1606 	space when calling this function.
1607 	Note, that in case of error your cache will be temporarily unlocked.
1608 */
1609 static status_t
1610 map_backing_store(vm_address_space* addressSpace, vm_cache* cache,
1611 	void** _virtualAddress, off_t offset, addr_t size, uint32 addressSpec,
1612 	int wiring, int protection, int mapping, vm_area** _area,
1613 	const char* areaName, bool unmapAddressRange, bool kernel)
1614 {
1615 	TRACE(("map_backing_store: aspace %p, cache %p, *vaddr %p, offset 0x%Lx, "
1616 		"size %lu, addressSpec %ld, wiring %d, protection %d, area %p, areaName "
1617 		"'%s'\n", addressSpace, cache, *_virtualAddress, offset, size,
1618 		addressSpec, wiring, protection, _area, areaName));
1619 	cache->AssertLocked();
1620 
1621 	vm_area* area = create_area_struct(addressSpace, areaName, wiring,
1622 		protection);
1623 	if (area == NULL)
1624 		return B_NO_MEMORY;
1625 
1626 	status_t status;
1627 
1628 	// if this is a private map, we need to create a new cache
1629 	// to handle the private copies of pages as they are written to
1630 	vm_cache* sourceCache = cache;
1631 	if (mapping == REGION_PRIVATE_MAP) {
1632 		vm_cache* newCache;
1633 
1634 		// create an anonymous cache
1635 		status = VMCacheFactory::CreateAnonymousCache(newCache,
1636 			(protection & B_STACK_AREA) != 0, 0, USER_STACK_GUARD_PAGES, true);
1637 		if (status != B_OK)
1638 			goto err1;
1639 
1640 		newCache->Lock();
1641 		newCache->temporary = 1;
1642 		newCache->scan_skip = cache->scan_skip;
1643 		newCache->virtual_base = offset;
1644 		newCache->virtual_end = offset + size;
1645 
1646 		cache->AddConsumer(newCache);
1647 
1648 		cache = newCache;
1649 	}
1650 
1651 	status = cache->SetMinimalCommitment(size);
1652 	if (status != B_OK)
1653 		goto err2;
1654 
1655 	// check to see if this address space has entered DELETE state
1656 	if (addressSpace->state == VM_ASPACE_STATE_DELETION) {
1657 		// okay, someone is trying to delete this address space now, so we can't
1658 		// insert the area, so back out
1659 		status = B_BAD_TEAM_ID;
1660 		goto err2;
1661 	}
1662 
1663 	if (addressSpec == B_EXACT_ADDRESS && unmapAddressRange) {
1664 		status = unmap_address_range(addressSpace, (addr_t)*_virtualAddress,
1665 			size, kernel);
1666 		if (status != B_OK)
1667 			goto err2;
1668 	}
1669 
1670 	status = insert_area(addressSpace, _virtualAddress, addressSpec, size, area);
1671 	if (status != B_OK)
1672 		goto err2;
1673 
1674 	// attach the cache to the area
1675 	area->cache = cache;
1676 	area->cache_offset = offset;
1677 
1678 	// point the cache back to the area
1679 	cache->InsertAreaLocked(area);
1680 	if (mapping == REGION_PRIVATE_MAP)
1681 		cache->Unlock();
1682 
1683 	// insert the area in the global area hash table
1684 	rw_lock_write_lock(&sAreaHashLock);
1685 	hash_insert(sAreaHash, area);
1686 	rw_lock_write_unlock(&sAreaHashLock);
1687 
1688 	// grab a ref to the address space (the area holds this)
1689 	atomic_add(&addressSpace->ref_count, 1);
1690 
1691 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1692 //		cache, sourceCache, areaName, area);
1693 
1694 	*_area = area;
1695 	return B_OK;
1696 
1697 err2:
1698 	if (mapping == REGION_PRIVATE_MAP) {
1699 		// We created this cache, so we must delete it again. Note, that we
1700 		// need to temporarily unlock the source cache or we'll otherwise
1701 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1702 		sourceCache->Unlock();
1703 		cache->ReleaseRefAndUnlock();
1704 		sourceCache->Lock();
1705 	}
1706 err1:
1707 	free(area->name);
1708 	free(area);
1709 	return status;
1710 }
1711 
1712 
1713 status_t
1714 vm_block_address_range(const char* name, void* address, addr_t size)
1715 {
1716 	if (!arch_vm_supports_protection(0))
1717 		return B_NOT_SUPPORTED;
1718 
1719 	AddressSpaceWriteLocker locker;
1720 	status_t status = locker.SetTo(vm_kernel_address_space_id());
1721 	if (status != B_OK)
1722 		return status;
1723 
1724 	vm_address_space* addressSpace = locker.AddressSpace();
1725 
1726 	// create an anonymous cache
1727 	vm_cache* cache;
1728 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false);
1729 	if (status != B_OK)
1730 		return status;
1731 
1732 	cache->temporary = 1;
1733 	cache->virtual_end = size;
1734 	cache->scan_skip = 1;
1735 	cache->Lock();
1736 
1737 	vm_area* area;
1738 	void* areaAddress = address;
1739 	status = map_backing_store(addressSpace, cache, &areaAddress, 0, size,
1740 		B_EXACT_ADDRESS, B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, &area, name,
1741 		false, true);
1742 	if (status != B_OK) {
1743 		cache->ReleaseRefAndUnlock();
1744 		return status;
1745 	}
1746 
1747 	cache->Unlock();
1748 	area->cache_type = CACHE_TYPE_RAM;
1749 	return area->id;
1750 }
1751 
1752 
1753 status_t
1754 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1755 {
1756 	AddressSpaceWriteLocker locker(team);
1757 	if (!locker.IsLocked())
1758 		return B_BAD_TEAM_ID;
1759 
1760 	// check to see if this address space has entered DELETE state
1761 	if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) {
1762 		// okay, someone is trying to delete this address space now, so we can't
1763 		// insert the area, so back out
1764 		return B_BAD_TEAM_ID;
1765 	}
1766 
1767 	// search area list and remove any matching reserved ranges
1768 
1769 	vm_area* area = locker.AddressSpace()->areas;
1770 	vm_area* last = NULL;
1771 	while (area) {
1772 		// the area must be completely part of the reserved range
1773 		if (area->id == RESERVED_AREA_ID && area->base >= (addr_t)address
1774 			&& area->base + area->size <= (addr_t)address + size) {
1775 			// remove reserved range
1776 			vm_area* reserved = area;
1777 			if (last)
1778 				last->address_space_next = reserved->address_space_next;
1779 			else
1780 				locker.AddressSpace()->areas = reserved->address_space_next;
1781 
1782 			area = reserved->address_space_next;
1783 			vm_put_address_space(locker.AddressSpace());
1784 			free(reserved);
1785 			continue;
1786 		}
1787 
1788 		last = area;
1789 		area = area->address_space_next;
1790 	}
1791 
1792 	return B_OK;
1793 }
1794 
1795 
1796 status_t
1797 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1798 	addr_t size, uint32 flags)
1799 {
1800 	if (size == 0)
1801 		return B_BAD_VALUE;
1802 
1803 	AddressSpaceWriteLocker locker(team);
1804 	if (!locker.IsLocked())
1805 		return B_BAD_TEAM_ID;
1806 
1807 	// check to see if this address space has entered DELETE state
1808 	if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) {
1809 		// okay, someone is trying to delete this address space now, so we
1810 		// can't insert the area, let's back out
1811 		return B_BAD_TEAM_ID;
1812 	}
1813 
1814 	vm_area* area = create_reserved_area_struct(locker.AddressSpace(), flags);
1815 	if (area == NULL)
1816 		return B_NO_MEMORY;
1817 
1818 	status_t status = insert_area(locker.AddressSpace(), _address, addressSpec,
1819 		size, area);
1820 	if (status != B_OK) {
1821 		free(area);
1822 		return status;
1823 	}
1824 
1825 	// the area is now reserved!
1826 
1827 	area->cache_offset = area->base;
1828 		// we cache the original base address here
1829 
1830 	atomic_add(&locker.AddressSpace()->ref_count, 1);
1831 	return B_OK;
1832 }
1833 
1834 
1835 area_id
1836 vm_create_anonymous_area(team_id team, const char* name, void** address,
1837 	uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection,
1838 	addr_t physicalAddress, uint32 flags, bool kernel)
1839 {
1840 	vm_area* area;
1841 	vm_cache* cache;
1842 	vm_page* page = NULL;
1843 	bool isStack = (protection & B_STACK_AREA) != 0;
1844 	page_num_t guardPages;
1845 	bool canOvercommit = false;
1846 	uint32 newPageState = (flags & CREATE_AREA_DONT_CLEAR) != 0
1847 		? PAGE_STATE_FREE : PAGE_STATE_CLEAR;
1848 
1849 	TRACE(("create_anonymous_area [%d] %s: size 0x%lx\n", team, name, size));
1850 
1851 	size = PAGE_ALIGN(size);
1852 
1853 	if (size == 0)
1854 		return B_BAD_VALUE;
1855 	if (!arch_vm_supports_protection(protection))
1856 		return B_NOT_SUPPORTED;
1857 
1858 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1859 		canOvercommit = true;
1860 
1861 #ifdef DEBUG_KERNEL_STACKS
1862 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1863 		isStack = true;
1864 #endif
1865 
1866 	// check parameters
1867 	switch (addressSpec) {
1868 		case B_ANY_ADDRESS:
1869 		case B_EXACT_ADDRESS:
1870 		case B_BASE_ADDRESS:
1871 		case B_ANY_KERNEL_ADDRESS:
1872 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1873 			break;
1874 		case B_PHYSICAL_BASE_ADDRESS:
1875 			physicalAddress = (addr_t)*address;
1876 			addressSpec = B_ANY_KERNEL_ADDRESS;
1877 			break;
1878 
1879 		default:
1880 			return B_BAD_VALUE;
1881 	}
1882 
1883 	if (physicalAddress != 0)
1884 		wiring = B_CONTIGUOUS;
1885 
1886 	bool doReserveMemory = false;
1887 	switch (wiring) {
1888 		case B_NO_LOCK:
1889 			break;
1890 		case B_FULL_LOCK:
1891 		case B_LAZY_LOCK:
1892 		case B_CONTIGUOUS:
1893 			doReserveMemory = true;
1894 			break;
1895 		case B_ALREADY_WIRED:
1896 			break;
1897 		case B_LOMEM:
1898 		//case B_SLOWMEM:
1899 			dprintf("B_LOMEM/SLOWMEM is not yet supported!\n");
1900 			wiring = B_FULL_LOCK;
1901 			doReserveMemory = true;
1902 			break;
1903 		default:
1904 			return B_BAD_VALUE;
1905 	}
1906 
1907 	// For full lock or contiguous areas we're also going to map the pages and
1908 	// thus need to reserve pages for the mapping backend upfront.
1909 	addr_t reservedMapPages = 0;
1910 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1911 		AddressSpaceWriteLocker locker;
1912 		status_t status = locker.SetTo(team);
1913 		if (status != B_OK)
1914 			return status;
1915 
1916 		vm_translation_map* map = &locker.AddressSpace()->translation_map;
1917 		reservedMapPages = map->ops->map_max_pages_need(map, 0, size - 1);
1918 	}
1919 
1920 	// Reserve memory before acquiring the address space lock. This reduces the
1921 	// chances of failure, since while holding the write lock to the address
1922 	// space (if it is the kernel address space that is), the low memory handler
1923 	// won't be able to free anything for us.
1924 	addr_t reservedMemory = 0;
1925 	if (doReserveMemory) {
1926 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1927 		if (vm_try_reserve_memory(size, timeout) != B_OK)
1928 			return B_NO_MEMORY;
1929 		reservedMemory = size;
1930 		// TODO: We don't reserve the memory for the pages for the page
1931 		// directories/tables. We actually need to do since we currently don't
1932 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1933 		// there are actually less physical pages than there should be, which
1934 		// can get the VM into trouble in low memory situations.
1935 	}
1936 
1937 	AddressSpaceWriteLocker locker;
1938 	vm_address_space* addressSpace;
1939 	status_t status;
1940 
1941 	// For full lock areas reserve the pages before locking the address
1942 	// space. E.g. block caches can't release their memory while we hold the
1943 	// address space lock.
1944 	page_num_t reservedPages = reservedMapPages;
1945 	if (wiring == B_FULL_LOCK)
1946 		reservedPages += size / B_PAGE_SIZE;
1947 	if (reservedPages > 0) {
1948 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1949 			if (!vm_page_try_reserve_pages(reservedPages)) {
1950 				reservedPages = 0;
1951 				status = B_WOULD_BLOCK;
1952 				goto err0;
1953 			}
1954 		} else
1955 			vm_page_reserve_pages(reservedPages);
1956 	}
1957 
1958 	status = locker.SetTo(team);
1959 	if (status != B_OK)
1960 		goto err0;
1961 
1962 	addressSpace = locker.AddressSpace();
1963 
1964 	if (wiring == B_CONTIGUOUS) {
1965 		// we try to allocate the page run here upfront as this may easily
1966 		// fail for obvious reasons
1967 		page = vm_page_allocate_page_run(newPageState, physicalAddress,
1968 			size / B_PAGE_SIZE);
1969 		if (page == NULL) {
1970 			status = B_NO_MEMORY;
1971 			goto err0;
1972 		}
1973 	}
1974 
1975 	// create an anonymous cache
1976 	// if it's a stack, make sure that two pages are available at least
1977 	guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0
1978 		? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0;
1979 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1980 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1981 		wiring == B_NO_LOCK);
1982 	if (status != B_OK)
1983 		goto err1;
1984 
1985 	cache->temporary = 1;
1986 	cache->virtual_end = size;
1987 	cache->committed_size = reservedMemory;
1988 		// TODO: This should be done via a method.
1989 	reservedMemory = 0;
1990 
1991 	switch (wiring) {
1992 		case B_LAZY_LOCK:
1993 		case B_FULL_LOCK:
1994 		case B_CONTIGUOUS:
1995 		case B_ALREADY_WIRED:
1996 			cache->scan_skip = 1;
1997 			break;
1998 		case B_NO_LOCK:
1999 			cache->scan_skip = 0;
2000 			break;
2001 	}
2002 
2003 	cache->Lock();
2004 
2005 	status = map_backing_store(addressSpace, cache, address, 0, size,
2006 		addressSpec, wiring, protection, REGION_NO_PRIVATE_MAP, &area, name,
2007 		(flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0, kernel);
2008 
2009 	if (status < B_OK) {
2010 		cache->ReleaseRefAndUnlock();
2011 		goto err1;
2012 	}
2013 
2014 	locker.DegradeToReadLock();
2015 
2016 	switch (wiring) {
2017 		case B_NO_LOCK:
2018 		case B_LAZY_LOCK:
2019 			// do nothing - the pages are mapped in as needed
2020 			break;
2021 
2022 		case B_FULL_LOCK:
2023 		{
2024 			// Allocate and map all pages for this area
2025 
2026 			off_t offset = 0;
2027 			for (addr_t address = area->base;
2028 					address < area->base + (area->size - 1);
2029 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
2030 #ifdef DEBUG_KERNEL_STACKS
2031 #	ifdef STACK_GROWS_DOWNWARDS
2032 				if (isStack && address < area->base + KERNEL_STACK_GUARD_PAGES
2033 						* B_PAGE_SIZE)
2034 #	else
2035 				if (isStack && address >= area->base + area->size
2036 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
2037 #	endif
2038 					continue;
2039 #endif
2040 				vm_page* page = vm_page_allocate_page(newPageState, true);
2041 				cache->InsertPage(page, offset);
2042 				vm_map_page(area, page, address, protection);
2043 
2044 				// Periodically unreserve pages we've already allocated, so that
2045 				// we don't unnecessarily increase the pressure on the VM.
2046 				if (offset > 0 && offset % (128 * B_PAGE_SIZE) == 0) {
2047 					page_num_t toUnreserve = 128;
2048 					vm_page_unreserve_pages(toUnreserve);
2049 					reservedPages -= toUnreserve;
2050 				}
2051 			}
2052 
2053 			break;
2054 		}
2055 
2056 		case B_ALREADY_WIRED:
2057 		{
2058 			// The pages should already be mapped. This is only really useful
2059 			// during boot time. Find the appropriate vm_page objects and stick
2060 			// them in the cache object.
2061 			vm_translation_map* map = &addressSpace->translation_map;
2062 			off_t offset = 0;
2063 
2064 			if (!gKernelStartup)
2065 				panic("ALREADY_WIRED flag used outside kernel startup\n");
2066 
2067 			map->ops->lock(map);
2068 
2069 			for (addr_t virtualAddress = area->base; virtualAddress < area->base
2070 					+ (area->size - 1); virtualAddress += B_PAGE_SIZE,
2071 					offset += B_PAGE_SIZE) {
2072 				addr_t physicalAddress;
2073 				uint32 flags;
2074 				status = map->ops->query(map, virtualAddress,
2075 					&physicalAddress, &flags);
2076 				if (status < B_OK) {
2077 					panic("looking up mapping failed for va 0x%lx\n",
2078 						virtualAddress);
2079 				}
2080 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
2081 				if (page == NULL) {
2082 					panic("looking up page failed for pa 0x%lx\n",
2083 						physicalAddress);
2084 				}
2085 
2086 				increment_page_wired_count(page);
2087 				vm_page_set_state(page, PAGE_STATE_WIRED);
2088 				cache->InsertPage(page, offset);
2089 			}
2090 
2091 			map->ops->unlock(map);
2092 			break;
2093 		}
2094 
2095 		case B_CONTIGUOUS:
2096 		{
2097 			// We have already allocated our continuous pages run, so we can now
2098 			// just map them in the address space
2099 			vm_translation_map* map = &addressSpace->translation_map;
2100 			addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE;
2101 			addr_t virtualAddress = area->base;
2102 			off_t offset = 0;
2103 
2104 			map->ops->lock(map);
2105 
2106 			for (virtualAddress = area->base; virtualAddress < area->base
2107 					+ (area->size - 1); virtualAddress += B_PAGE_SIZE,
2108 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
2109 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
2110 				if (page == NULL)
2111 					panic("couldn't lookup physical page just allocated\n");
2112 
2113 				status = map->ops->map(map, virtualAddress, physicalAddress,
2114 					protection);
2115 				if (status < B_OK)
2116 					panic("couldn't map physical page in page run\n");
2117 
2118 				increment_page_wired_count(page);
2119 				vm_page_set_state(page, PAGE_STATE_WIRED);
2120 				cache->InsertPage(page, offset);
2121 			}
2122 
2123 			map->ops->unlock(map);
2124 			break;
2125 		}
2126 
2127 		default:
2128 			break;
2129 	}
2130 
2131 	cache->Unlock();
2132 
2133 	if (reservedPages > 0)
2134 		vm_page_unreserve_pages(reservedPages);
2135 
2136 	TRACE(("vm_create_anonymous_area: done\n"));
2137 
2138 	area->cache_type = CACHE_TYPE_RAM;
2139 	return area->id;
2140 
2141 err1:
2142 	if (wiring == B_CONTIGUOUS) {
2143 		// we had reserved the area space upfront...
2144 		addr_t pageNumber = page->physical_page_number;
2145 		int32 i;
2146 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
2147 			page = vm_lookup_page(pageNumber);
2148 			if (page == NULL)
2149 				panic("couldn't lookup physical page just allocated\n");
2150 
2151 			vm_page_set_state(page, PAGE_STATE_FREE);
2152 		}
2153 	}
2154 
2155 err0:
2156 	if (reservedPages > 0)
2157 		vm_page_unreserve_pages(reservedPages);
2158 	if (reservedMemory > 0)
2159 		vm_unreserve_memory(reservedMemory);
2160 
2161 	return status;
2162 }
2163 
2164 
2165 area_id
2166 vm_map_physical_memory(team_id team, const char* name, void** _address,
2167 	uint32 addressSpec, addr_t size, uint32 protection, addr_t physicalAddress)
2168 {
2169 	vm_area* area;
2170 	vm_cache* cache;
2171 	addr_t mapOffset;
2172 
2173 	TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, "
2174 		"spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team,
2175 		name, _address, addressSpec, size, protection, physicalAddress));
2176 
2177 	if (!arch_vm_supports_protection(protection))
2178 		return B_NOT_SUPPORTED;
2179 
2180 	AddressSpaceWriteLocker locker(team);
2181 	if (!locker.IsLocked())
2182 		return B_BAD_TEAM_ID;
2183 
2184 	// if the physical address is somewhat inside a page,
2185 	// move the actual area down to align on a page boundary
2186 	mapOffset = physicalAddress % B_PAGE_SIZE;
2187 	size += mapOffset;
2188 	physicalAddress -= mapOffset;
2189 
2190 	size = PAGE_ALIGN(size);
2191 
2192 	// create a device cache
2193 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
2194 	if (status != B_OK)
2195 		return status;
2196 
2197 	// tell the page scanner to skip over this area, it's pages are special
2198 	cache->scan_skip = 1;
2199 	cache->virtual_end = size;
2200 
2201 	cache->Lock();
2202 
2203 	status = map_backing_store(locker.AddressSpace(), cache, _address,
2204 		0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection,
2205 		REGION_NO_PRIVATE_MAP, &area, name, false, true);
2206 
2207 	if (status < B_OK)
2208 		cache->ReleaseRefLocked();
2209 
2210 	cache->Unlock();
2211 
2212 	if (status >= B_OK && (addressSpec & B_MTR_MASK) != 0) {
2213 		// set requested memory type
2214 		status = arch_vm_set_memory_type(area, physicalAddress,
2215 			addressSpec & B_MTR_MASK);
2216 		if (status < B_OK)
2217 			delete_area(locker.AddressSpace(), area);
2218 	}
2219 
2220 	if (status >= B_OK) {
2221 		// make sure our area is mapped in completely
2222 
2223 		vm_translation_map* map = &locker.AddressSpace()->translation_map;
2224 		size_t reservePages = map->ops->map_max_pages_need(map, area->base,
2225 			area->base + (size - 1));
2226 
2227 		vm_page_reserve_pages(reservePages);
2228 		map->ops->lock(map);
2229 
2230 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
2231 			map->ops->map(map, area->base + offset, physicalAddress + offset,
2232 				protection);
2233 		}
2234 
2235 		map->ops->unlock(map);
2236 		vm_page_unreserve_pages(reservePages);
2237 	}
2238 
2239 	if (status < B_OK)
2240 		return status;
2241 
2242 	// modify the pointer returned to be offset back into the new area
2243 	// the same way the physical address in was offset
2244 	*_address = (void*)((addr_t)*_address + mapOffset);
2245 
2246 	area->cache_type = CACHE_TYPE_DEVICE;
2247 	return area->id;
2248 }
2249 
2250 
2251 area_id
2252 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
2253 	uint32 addressSpec, addr_t* _size, uint32 protection, struct iovec* vecs,
2254 	uint32 vecCount)
2255 {
2256 	TRACE(("vm_map_physical_memory_vecs(team = %ld, \"%s\", virtual = %p, "
2257 		"spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team,
2258 		name, _address, addressSpec, size, protection, physicalAddress));
2259 
2260 	if (!arch_vm_supports_protection(protection)
2261 		|| (addressSpec & B_MTR_MASK) != 0) {
2262 		return B_NOT_SUPPORTED;
2263 	}
2264 
2265 	AddressSpaceWriteLocker locker(team);
2266 	if (!locker.IsLocked())
2267 		return B_BAD_TEAM_ID;
2268 
2269 	if (vecCount == 0)
2270 		return B_BAD_VALUE;
2271 
2272 	addr_t size = 0;
2273 	for (uint32 i = 0; i < vecCount; i++) {
2274 		if ((addr_t)vecs[i].iov_base % B_PAGE_SIZE != 0
2275 			|| vecs[i].iov_len % B_PAGE_SIZE != 0) {
2276 			return B_BAD_VALUE;
2277 		}
2278 
2279 		size += vecs[i].iov_len;
2280 	}
2281 
2282 	// create a device cache
2283 	vm_cache* cache;
2284 	status_t result = VMCacheFactory::CreateDeviceCache(cache,
2285 		(addr_t)vecs[0].iov_base);
2286 	if (result != B_OK)
2287 		return result;
2288 
2289 	// tell the page scanner to skip over this area, it's pages are special
2290 	cache->scan_skip = 1;
2291 	cache->virtual_end = size;
2292 
2293 	cache->Lock();
2294 
2295 	vm_area* area;
2296 	result = map_backing_store(locker.AddressSpace(), cache, _address,
2297 		0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection,
2298 		REGION_NO_PRIVATE_MAP, &area, name, false, true);
2299 
2300 	if (result != B_OK)
2301 		cache->ReleaseRefLocked();
2302 
2303 	cache->Unlock();
2304 
2305 	if (result != B_OK)
2306 		return result;
2307 
2308 	vm_translation_map* map = &locker.AddressSpace()->translation_map;
2309 	size_t reservePages = map->ops->map_max_pages_need(map, area->base,
2310 		area->base + (size - 1));
2311 
2312 	vm_page_reserve_pages(reservePages);
2313 	map->ops->lock(map);
2314 
2315 	uint32 vecIndex = 0;
2316 	size_t vecOffset = 0;
2317 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
2318 		while (vecOffset >= vecs[vecIndex].iov_len && vecIndex < vecCount) {
2319 			vecOffset = 0;
2320 			vecIndex++;
2321 		}
2322 
2323 		if (vecIndex >= vecCount)
2324 			break;
2325 
2326 		map->ops->map(map, area->base + offset,
2327 			(addr_t)vecs[vecIndex].iov_base + vecOffset, protection);
2328 
2329 		vecOffset += B_PAGE_SIZE;
2330 	}
2331 
2332 	map->ops->unlock(map);
2333 	vm_page_unreserve_pages(reservePages);
2334 
2335 	if (_size != NULL)
2336 		*_size = size;
2337 
2338 	area->cache_type = CACHE_TYPE_DEVICE;
2339 	return area->id;
2340 }
2341 
2342 
2343 area_id
2344 vm_create_null_area(team_id team, const char* name, void** address,
2345 	uint32 addressSpec, addr_t size)
2346 {
2347 	vm_area* area;
2348 	vm_cache* cache;
2349 	status_t status;
2350 
2351 	AddressSpaceWriteLocker locker(team);
2352 	if (!locker.IsLocked())
2353 		return B_BAD_TEAM_ID;
2354 
2355 	size = PAGE_ALIGN(size);
2356 
2357 	// create an null cache
2358 	status = VMCacheFactory::CreateNullCache(cache);
2359 	if (status != B_OK)
2360 		return status;
2361 
2362 	// tell the page scanner to skip over this area, no pages will be mapped here
2363 	cache->scan_skip = 1;
2364 	cache->virtual_end = size;
2365 
2366 	cache->Lock();
2367 
2368 	status = map_backing_store(locker.AddressSpace(), cache, address, 0, size,
2369 		addressSpec, 0, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, &area, name,
2370 		false, true);
2371 
2372 	if (status < B_OK) {
2373 		cache->ReleaseRefAndUnlock();
2374 		return status;
2375 	}
2376 
2377 	cache->Unlock();
2378 
2379 	area->cache_type = CACHE_TYPE_NULL;
2380 	return area->id;
2381 }
2382 
2383 
2384 /*!	Creates the vnode cache for the specified \a vnode.
2385 	The vnode has to be marked busy when calling this function.
2386 */
2387 status_t
2388 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
2389 {
2390 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
2391 }
2392 
2393 
2394 /*!	\a cache must be locked. The area's address space must be read-locked.
2395 */
2396 static void
2397 pre_map_area_pages(vm_area* area, VMCache* cache)
2398 {
2399 	addr_t baseAddress = area->base;
2400 	addr_t cacheOffset = area->cache_offset;
2401 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
2402 	page_num_t endPage = firstPage + area->size / B_PAGE_SIZE;
2403 
2404 	for (VMCachePagesTree::Iterator it
2405 				= cache->pages.GetIterator(firstPage, true, true);
2406 			vm_page* page = it.Next();) {
2407 		if (page->cache_offset >= endPage)
2408 			break;
2409 
2410 		// skip inactive pages
2411 		if (page->state == PAGE_STATE_BUSY || page->usage_count <= 0)
2412 			continue;
2413 
2414 		vm_map_page(area, page,
2415 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
2416 			B_READ_AREA | B_KERNEL_READ_AREA);
2417 	}
2418 }
2419 
2420 
2421 /*!	Will map the file specified by \a fd to an area in memory.
2422 	The file will be mirrored beginning at the specified \a offset. The
2423 	\a offset and \a size arguments have to be page aligned.
2424 */
2425 static area_id
2426 _vm_map_file(team_id team, const char* name, void** _address,
2427 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2428 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2429 {
2430 	// TODO: for binary files, we want to make sure that they get the
2431 	//	copy of a file at a given time, ie. later changes should not
2432 	//	make it into the mapped copy -- this will need quite some changes
2433 	//	to be done in a nice way
2434 	TRACE(("_vm_map_file(fd = %d, offset = %Ld, size = %lu, mapping %ld)\n",
2435 		fd, offset, size, mapping));
2436 
2437 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2438 	size = PAGE_ALIGN(size);
2439 
2440 	if (mapping == REGION_NO_PRIVATE_MAP)
2441 		protection |= B_SHARED_AREA;
2442 	if (addressSpec != B_EXACT_ADDRESS)
2443 		unmapAddressRange = false;
2444 
2445 	if (fd < 0) {
2446 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
2447 		return vm_create_anonymous_area(team, name, _address, addressSpec, size,
2448 			B_NO_LOCK, protection, 0, flags, kernel);
2449 	}
2450 
2451 	// get the open flags of the FD
2452 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2453 	if (descriptor == NULL)
2454 		return EBADF;
2455 	int32 openMode = descriptor->open_mode;
2456 	put_fd(descriptor);
2457 
2458 	// The FD must open for reading at any rate. For shared mapping with write
2459 	// access, additionally the FD must be open for writing.
2460 	if ((openMode & O_ACCMODE) == O_WRONLY
2461 		|| (mapping == REGION_NO_PRIVATE_MAP
2462 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2463 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2464 		return EACCES;
2465 	}
2466 
2467 	// get the vnode for the object, this also grabs a ref to it
2468 	struct vnode* vnode = NULL;
2469 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2470 	if (status < B_OK)
2471 		return status;
2472 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
2473 
2474 	// If we're going to pre-map pages, we need to reserve the pages needed by
2475 	// the mapping backend upfront.
2476 	page_num_t reservedPreMapPages = 0;
2477 	if ((protection & B_READ_AREA) != 0) {
2478 		AddressSpaceWriteLocker locker;
2479 		status = locker.SetTo(team);
2480 		if (status != B_OK)
2481 			return status;
2482 
2483 		vm_translation_map* map = &locker.AddressSpace()->translation_map;
2484 		reservedPreMapPages = map->ops->map_max_pages_need(map, 0, size - 1);
2485 
2486 		locker.Unlock();
2487 
2488 		vm_page_reserve_pages(reservedPreMapPages);
2489 	}
2490 
2491 	struct PageUnreserver {
2492 		PageUnreserver(page_num_t count)
2493 			: fCount(count)
2494 		{
2495 		}
2496 
2497 		~PageUnreserver()
2498 		{
2499 			if (fCount > 0)
2500 				vm_page_unreserve_pages(fCount);
2501 		}
2502 
2503 		page_num_t	fCount;
2504 	} pageUnreserver(reservedPreMapPages);
2505 
2506 	AddressSpaceWriteLocker locker(team);
2507 	if (!locker.IsLocked())
2508 		return B_BAD_TEAM_ID;
2509 
2510 	// TODO: this only works for file systems that use the file cache
2511 	vm_cache* cache;
2512 	status = vfs_get_vnode_cache(vnode, &cache, false);
2513 	if (status < B_OK)
2514 		return status;
2515 
2516 	cache->Lock();
2517 
2518 	vm_area* area;
2519 	status = map_backing_store(locker.AddressSpace(), cache, _address,
2520 		offset, size, addressSpec, 0, protection, mapping, &area, name,
2521 		unmapAddressRange, kernel);
2522 
2523 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2524 		// map_backing_store() cannot know we no longer need the ref
2525 		cache->ReleaseRefLocked();
2526 	}
2527 
2528 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2529 		pre_map_area_pages(area, cache);
2530 
2531 	cache->Unlock();
2532 
2533 	if (status == B_OK) {
2534 		// TODO: this probably deserves a smarter solution, ie. don't always
2535 		// prefetch stuff, and also, probably don't trigger it at this place.
2536 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2537 			// prefetches at max 10 MB starting from "offset"
2538 	}
2539 
2540 	if (status != B_OK)
2541 		return status;
2542 
2543 	area->cache_type = CACHE_TYPE_VNODE;
2544 	return area->id;
2545 }
2546 
2547 
2548 area_id
2549 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2550 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2551 	int fd, off_t offset)
2552 {
2553 	if (!arch_vm_supports_protection(protection))
2554 		return B_NOT_SUPPORTED;
2555 
2556 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2557 		mapping, unmapAddressRange, fd, offset, true);
2558 }
2559 
2560 
2561 vm_cache*
2562 vm_area_get_locked_cache(vm_area* area)
2563 {
2564 	mutex_lock(&sAreaCacheLock);
2565 
2566 	while (true) {
2567 		vm_cache* cache = area->cache;
2568 
2569 		if (!cache->SwitchLock(&sAreaCacheLock)) {
2570 			// cache has been deleted
2571 			mutex_lock(&sAreaCacheLock);
2572 			continue;
2573 		}
2574 
2575 		mutex_lock(&sAreaCacheLock);
2576 
2577 		if (cache == area->cache) {
2578 			cache->AcquireRefLocked();
2579 			mutex_unlock(&sAreaCacheLock);
2580 			return cache;
2581 		}
2582 
2583 		// the cache changed in the meantime
2584 		cache->Unlock();
2585 	}
2586 }
2587 
2588 
2589 void
2590 vm_area_put_locked_cache(vm_cache* cache)
2591 {
2592 	cache->ReleaseRefAndUnlock();
2593 }
2594 
2595 
2596 area_id
2597 vm_clone_area(team_id team, const char* name, void** address,
2598 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2599 	bool kernel)
2600 {
2601 	vm_area* newArea = NULL;
2602 	vm_area* sourceArea;
2603 
2604 	// Check whether the source area exists and is cloneable. If so, mark it
2605 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2606 	{
2607 		AddressSpaceWriteLocker locker;
2608 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2609 		if (status != B_OK)
2610 			return status;
2611 
2612 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2613 			return B_NOT_ALLOWED;
2614 
2615 		sourceArea->protection |= B_SHARED_AREA;
2616 		protection |= B_SHARED_AREA;
2617 	}
2618 
2619 	// Now lock both address spaces and actually do the cloning.
2620 
2621 	MultiAddressSpaceLocker locker;
2622 	vm_address_space* sourceAddressSpace;
2623 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2624 	if (status != B_OK)
2625 		return status;
2626 
2627 	vm_address_space* targetAddressSpace;
2628 	status = locker.AddTeam(team, true, &targetAddressSpace);
2629 	if (status != B_OK)
2630 		return status;
2631 
2632 	status = locker.Lock();
2633 	if (status != B_OK)
2634 		return status;
2635 
2636 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2637 	if (sourceArea == NULL)
2638 		return B_BAD_VALUE;
2639 
2640 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2641 		return B_NOT_ALLOWED;
2642 
2643 	vm_cache* cache = vm_area_get_locked_cache(sourceArea);
2644 
2645 	// TODO: for now, B_USER_CLONEABLE is disabled, until all drivers
2646 	//	have been adapted. Maybe it should be part of the kernel settings,
2647 	//	anyway (so that old drivers can always work).
2648 #if 0
2649 	if (sourceArea->aspace == vm_kernel_address_space()
2650 		&& addressSpace != vm_kernel_address_space()
2651 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
2652 		// kernel areas must not be cloned in userland, unless explicitly
2653 		// declared user-cloneable upon construction
2654 		status = B_NOT_ALLOWED;
2655 	} else
2656 #endif
2657 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
2658 		status = B_NOT_ALLOWED;
2659 	else {
2660 		status = map_backing_store(targetAddressSpace, cache, address,
2661 			sourceArea->cache_offset, sourceArea->size, addressSpec,
2662 			sourceArea->wiring, protection, mapping, &newArea, name, false,
2663 			kernel);
2664 	}
2665 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2666 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2667 		// to create a new cache, and has therefore already acquired a reference
2668 		// to the source cache - but otherwise it has no idea that we need
2669 		// one.
2670 		cache->AcquireRefLocked();
2671 	}
2672 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2673 		// we need to map in everything at this point
2674 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2675 			// we don't have actual pages to map but a physical area
2676 			vm_translation_map* map
2677 				= &sourceArea->address_space->translation_map;
2678 			map->ops->lock(map);
2679 
2680 			addr_t physicalAddress;
2681 			uint32 oldProtection;
2682 			map->ops->query(map, sourceArea->base, &physicalAddress,
2683 				&oldProtection);
2684 
2685 			map->ops->unlock(map);
2686 
2687 			map = &targetAddressSpace->translation_map;
2688 			size_t reservePages = map->ops->map_max_pages_need(map,
2689 				newArea->base, newArea->base + (newArea->size - 1));
2690 
2691 			vm_page_reserve_pages(reservePages);
2692 			map->ops->lock(map);
2693 
2694 			for (addr_t offset = 0; offset < newArea->size;
2695 					offset += B_PAGE_SIZE) {
2696 				map->ops->map(map, newArea->base + offset,
2697 					physicalAddress + offset, protection);
2698 			}
2699 
2700 			map->ops->unlock(map);
2701 			vm_page_unreserve_pages(reservePages);
2702 		} else {
2703 			vm_translation_map* map = &targetAddressSpace->translation_map;
2704 			size_t reservePages = map->ops->map_max_pages_need(map,
2705 				newArea->base, newArea->base + (newArea->size - 1));
2706 			vm_page_reserve_pages(reservePages);
2707 
2708 			// map in all pages from source
2709 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2710 					vm_page* page  = it.Next();) {
2711 				vm_map_page(newArea, page, newArea->base
2712 					+ ((page->cache_offset << PAGE_SHIFT)
2713 					- newArea->cache_offset), protection);
2714 			}
2715 
2716 			vm_page_unreserve_pages(reservePages);
2717 		}
2718 	}
2719 	if (status == B_OK)
2720 		newArea->cache_type = sourceArea->cache_type;
2721 
2722 	vm_area_put_locked_cache(cache);
2723 
2724 	if (status < B_OK)
2725 		return status;
2726 
2727 	return newArea->id;
2728 }
2729 
2730 
2731 //! The address space must be write locked at this point
2732 static void
2733 remove_area_from_address_space(vm_address_space* addressSpace, vm_area* area)
2734 {
2735 	vm_area* temp = addressSpace->areas;
2736 	vm_area* last = NULL;
2737 
2738 	while (temp != NULL) {
2739 		if (area == temp) {
2740 			if (last != NULL) {
2741 				last->address_space_next = temp->address_space_next;
2742 			} else {
2743 				addressSpace->areas = temp->address_space_next;
2744 			}
2745 			addressSpace->change_count++;
2746 			break;
2747 		}
2748 		last = temp;
2749 		temp = temp->address_space_next;
2750 	}
2751 	if (area == addressSpace->area_hint)
2752 		addressSpace->area_hint = NULL;
2753 
2754 	if (temp == NULL)
2755 		panic("vm_area_release_ref: area not found in aspace's area list\n");
2756 }
2757 
2758 
2759 static void
2760 delete_area(vm_address_space* addressSpace, vm_area* area)
2761 {
2762 	rw_lock_write_lock(&sAreaHashLock);
2763 	hash_remove(sAreaHash, area);
2764 	rw_lock_write_unlock(&sAreaHashLock);
2765 
2766 	// At this point the area is removed from the global hash table, but
2767 	// still exists in the area list.
2768 
2769 	// Unmap the virtual address space the area occupied
2770 	vm_unmap_pages(area, area->base, area->size, !area->cache->temporary);
2771 
2772 	if (!area->cache->temporary)
2773 		area->cache->WriteModified();
2774 
2775 	arch_vm_unset_memory_type(area);
2776 	remove_area_from_address_space(addressSpace, area);
2777 	vm_put_address_space(addressSpace);
2778 
2779 	area->cache->RemoveArea(area);
2780 	area->cache->ReleaseRef();
2781 
2782 	free(area->page_protections);
2783 	free(area->name);
2784 	free(area);
2785 }
2786 
2787 
2788 status_t
2789 vm_delete_area(team_id team, area_id id, bool kernel)
2790 {
2791 	TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id));
2792 
2793 	AddressSpaceWriteLocker locker;
2794 	vm_area* area;
2795 	status_t status = locker.SetFromArea(team, id, area);
2796 	if (status != B_OK)
2797 		return status;
2798 
2799 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2800 		return B_NOT_ALLOWED;
2801 
2802 	delete_area(locker.AddressSpace(), area);
2803 	return B_OK;
2804 }
2805 
2806 
2807 /*!	Creates a new cache on top of given cache, moves all areas from
2808 	the old cache to the new one, and changes the protection of all affected
2809 	areas' pages to read-only.
2810 	Preconditions:
2811 	- The given cache must be locked.
2812 	- All of the cache's areas' address spaces must be read locked.
2813 */
2814 static status_t
2815 vm_copy_on_write_area(vm_cache* lowerCache)
2816 {
2817 	vm_cache* upperCache;
2818 
2819 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2820 
2821 	// We need to separate the cache from its areas. The cache goes one level
2822 	// deeper and we create a new cache inbetween.
2823 
2824 	// create an anonymous cache
2825 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2826 		0, true);
2827 	if (status != B_OK)
2828 		return status;
2829 
2830 	upperCache->Lock();
2831 
2832 	upperCache->temporary = 1;
2833 	upperCache->scan_skip = lowerCache->scan_skip;
2834 	upperCache->virtual_base = lowerCache->virtual_base;
2835 	upperCache->virtual_end = lowerCache->virtual_end;
2836 
2837 	// transfer the lower cache areas to the upper cache
2838 	mutex_lock(&sAreaCacheLock);
2839 
2840 	upperCache->areas = lowerCache->areas;
2841 	lowerCache->areas = NULL;
2842 
2843 	for (vm_area* tempArea = upperCache->areas; tempArea != NULL;
2844 			tempArea = tempArea->cache_next) {
2845 		tempArea->cache = upperCache;
2846 		upperCache->AcquireRefLocked();
2847 		lowerCache->ReleaseRefLocked();
2848 	}
2849 
2850 	mutex_unlock(&sAreaCacheLock);
2851 
2852 	lowerCache->AddConsumer(upperCache);
2853 
2854 	// We now need to remap all pages from all of the cache's areas read-only, so
2855 	// that a copy will be created on next write access
2856 
2857 	for (vm_area* tempArea = upperCache->areas; tempArea != NULL;
2858 			tempArea = tempArea->cache_next) {
2859 		// The area must be readable in the same way it was previously writable
2860 		uint32 protection = B_KERNEL_READ_AREA;
2861 		if ((tempArea->protection & B_READ_AREA) != 0)
2862 			protection |= B_READ_AREA;
2863 
2864 		vm_translation_map* map = &tempArea->address_space->translation_map;
2865 		map->ops->lock(map);
2866 		map->ops->protect(map, tempArea->base,
2867 			tempArea->base - 1 + tempArea->size, protection);
2868 		map->ops->unlock(map);
2869 	}
2870 
2871 	vm_area_put_locked_cache(upperCache);
2872 
2873 	return B_OK;
2874 }
2875 
2876 
2877 area_id
2878 vm_copy_area(team_id team, const char* name, void** _address,
2879 	uint32 addressSpec, uint32 protection, area_id sourceID)
2880 {
2881 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2882 
2883 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2884 		// set the same protection for the kernel as for userland
2885 		protection |= B_KERNEL_READ_AREA;
2886 		if (writableCopy)
2887 			protection |= B_KERNEL_WRITE_AREA;
2888 	}
2889 
2890 	// Do the locking: target address space, all address spaces associated with
2891 	// the source cache, and the cache itself.
2892 	MultiAddressSpaceLocker locker;
2893 	vm_address_space* targetAddressSpace;
2894 	vm_cache* cache;
2895 	vm_area* source;
2896 	status_t status = locker.AddTeam(team, true, &targetAddressSpace);
2897 	if (status == B_OK) {
2898 		status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2899 			&cache);
2900 	}
2901 	if (status != B_OK)
2902 		return status;
2903 
2904 	AreaCacheLocker cacheLocker(cache);	// already locked
2905 
2906 	if (addressSpec == B_CLONE_ADDRESS) {
2907 		addressSpec = B_EXACT_ADDRESS;
2908 		*_address = (void*)source->base;
2909 	}
2910 
2911 	bool sharedArea = (source->protection & B_SHARED_AREA) != 0;
2912 
2913 	// First, create a cache on top of the source area, respectively use the
2914 	// existing one, if this is a shared area.
2915 
2916 	vm_area* target;
2917 	status = map_backing_store(targetAddressSpace, cache, _address,
2918 		source->cache_offset, source->size, addressSpec, source->wiring,
2919 		protection, sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2920 		&target, name, false, true);
2921 	if (status < B_OK)
2922 		return status;
2923 
2924 	if (sharedArea) {
2925 		// The new area uses the old area's cache, but map_backing_store()
2926 		// hasn't acquired a ref. So we have to do that now.
2927 		cache->AcquireRefLocked();
2928 	}
2929 
2930 	// If the source area is writable, we need to move it one layer up as well
2931 
2932 	if (!sharedArea) {
2933 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2934 			// TODO: do something more useful if this fails!
2935 			if (vm_copy_on_write_area(cache) < B_OK)
2936 				panic("vm_copy_on_write_area() failed!\n");
2937 		}
2938 	}
2939 
2940 	// we return the ID of the newly created area
2941 	return target->id;
2942 }
2943 
2944 
2945 //! You need to hold the cache lock when calling this function
2946 static int32
2947 count_writable_areas(vm_cache* cache, vm_area* ignoreArea)
2948 {
2949 	struct vm_area* area = cache->areas;
2950 	uint32 count = 0;
2951 
2952 	for (; area != NULL; area = area->cache_next) {
2953 		if (area != ignoreArea
2954 			&& (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0)
2955 			count++;
2956 	}
2957 
2958 	return count;
2959 }
2960 
2961 
2962 static status_t
2963 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2964 	bool kernel)
2965 {
2966 	TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = "
2967 		"%#lx)\n", team, areaID, newProtection));
2968 
2969 	if (!arch_vm_supports_protection(newProtection))
2970 		return B_NOT_SUPPORTED;
2971 
2972 	// lock address spaces and cache
2973 	MultiAddressSpaceLocker locker;
2974 	vm_cache* cache;
2975 	vm_area* area;
2976 	status_t status = locker.AddAreaCacheAndLock(areaID, true, false, area,
2977 		&cache);
2978 	AreaCacheLocker cacheLocker(cache);	// already locked
2979 
2980 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2981 		return B_NOT_ALLOWED;
2982 
2983 	if (area->protection == newProtection)
2984 		return B_OK;
2985 
2986 	if (team != vm_kernel_address_space_id()
2987 		&& area->address_space->id != team) {
2988 		// unless you're the kernel, you are only allowed to set
2989 		// the protection of your own areas
2990 		return B_NOT_ALLOWED;
2991 	}
2992 
2993 	bool changePageProtection = true;
2994 
2995 	if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2996 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0) {
2997 		// writable -> !writable
2998 
2999 		if (cache->source != NULL && cache->temporary) {
3000 			if (count_writable_areas(cache, area) == 0) {
3001 				// Since this cache now lives from the pages in its source cache,
3002 				// we can change the cache's commitment to take only those pages
3003 				// into account that really are in this cache.
3004 
3005 				status = cache->Commit(cache->page_count * B_PAGE_SIZE);
3006 
3007 				// TODO: we may be able to join with our source cache, if
3008 				// count == 0
3009 			}
3010 		}
3011 	} else if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0
3012 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
3013 		// !writable -> writable
3014 
3015 		if (!list_is_empty(&cache->consumers)) {
3016 			// There are consumers -- we have to insert a new cache. Fortunately
3017 			// vm_copy_on_write_area() does everything that's needed.
3018 			changePageProtection = false;
3019 			status = vm_copy_on_write_area(cache);
3020 		} else {
3021 			// No consumers, so we don't need to insert a new one.
3022 			if (cache->source != NULL && cache->temporary) {
3023 				// the cache's commitment must contain all possible pages
3024 				status = cache->Commit(cache->virtual_end
3025 					- cache->virtual_base);
3026 			}
3027 
3028 			if (status == B_OK && cache->source != NULL) {
3029 				// There's a source cache, hence we can't just change all pages'
3030 				// protection or we might allow writing into pages belonging to
3031 				// a lower cache.
3032 				changePageProtection = false;
3033 
3034 				struct vm_translation_map* map
3035 					= &area->address_space->translation_map;
3036 				map->ops->lock(map);
3037 
3038 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
3039 						vm_page* page = it.Next();) {
3040 					addr_t address = area->base
3041 						+ (page->cache_offset << PAGE_SHIFT);
3042 					map->ops->protect(map, address, address - 1 + B_PAGE_SIZE,
3043 						newProtection);
3044 				}
3045 
3046 				map->ops->unlock(map);
3047 			}
3048 		}
3049 	} else {
3050 		// we don't have anything special to do in all other cases
3051 	}
3052 
3053 	if (status == B_OK) {
3054 		// remap existing pages in this cache
3055 		struct vm_translation_map* map = &area->address_space->translation_map;
3056 
3057 		if (changePageProtection) {
3058 			map->ops->lock(map);
3059 			map->ops->protect(map, area->base, area->base - 1 + area->size,
3060 				newProtection);
3061 			map->ops->unlock(map);
3062 		}
3063 
3064 		area->protection = newProtection;
3065 	}
3066 
3067 	return status;
3068 }
3069 
3070 
3071 status_t
3072 vm_get_page_mapping(team_id team, addr_t vaddr, addr_t* paddr)
3073 {
3074 	vm_address_space* addressSpace = vm_get_address_space(team);
3075 	if (addressSpace == NULL)
3076 		return B_BAD_TEAM_ID;
3077 
3078 	uint32 dummyFlags;
3079 	status_t status = addressSpace->translation_map.ops->query(
3080 		&addressSpace->translation_map, vaddr, paddr, &dummyFlags);
3081 
3082 	vm_put_address_space(addressSpace);
3083 	return status;
3084 }
3085 
3086 
3087 static inline addr_t
3088 virtual_page_address(vm_area* area, vm_page* page)
3089 {
3090 	return area->base
3091 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
3092 }
3093 
3094 
3095 bool
3096 vm_test_map_modification(vm_page* page)
3097 {
3098 	MutexLocker locker(sMappingLock);
3099 
3100 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3101 	vm_page_mapping* mapping;
3102 	while ((mapping = iterator.Next()) != NULL) {
3103 		vm_area* area = mapping->area;
3104 		vm_translation_map* map = &area->address_space->translation_map;
3105 
3106 		addr_t physicalAddress;
3107 		uint32 flags;
3108 		map->ops->lock(map);
3109 		map->ops->query(map, virtual_page_address(area, page),
3110 			&physicalAddress, &flags);
3111 		map->ops->unlock(map);
3112 
3113 		if ((flags & PAGE_MODIFIED) != 0)
3114 			return true;
3115 	}
3116 
3117 	return false;
3118 }
3119 
3120 
3121 int32
3122 vm_test_map_activation(vm_page* page, bool* _modified)
3123 {
3124 	int32 activation = 0;
3125 	bool modified = false;
3126 
3127 	MutexLocker locker(sMappingLock);
3128 
3129 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3130 	vm_page_mapping* mapping;
3131 	while ((mapping = iterator.Next()) != NULL) {
3132 		vm_area* area = mapping->area;
3133 		vm_translation_map* map = &area->address_space->translation_map;
3134 
3135 		addr_t physicalAddress;
3136 		uint32 flags;
3137 		map->ops->lock(map);
3138 		map->ops->query(map, virtual_page_address(area, page),
3139 			&physicalAddress, &flags);
3140 		map->ops->unlock(map);
3141 
3142 		if ((flags & PAGE_ACCESSED) != 0)
3143 			activation++;
3144 		if ((flags & PAGE_MODIFIED) != 0)
3145 			modified = true;
3146 	}
3147 
3148 	if (_modified != NULL)
3149 		*_modified = modified;
3150 
3151 	return activation;
3152 }
3153 
3154 
3155 void
3156 vm_clear_map_flags(vm_page* page, uint32 flags)
3157 {
3158 	MutexLocker locker(sMappingLock);
3159 
3160 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3161 	vm_page_mapping* mapping;
3162 	while ((mapping = iterator.Next()) != NULL) {
3163 		vm_area* area = mapping->area;
3164 		vm_translation_map* map = &area->address_space->translation_map;
3165 
3166 		map->ops->lock(map);
3167 		map->ops->clear_flags(map, virtual_page_address(area, page), flags);
3168 		map->ops->unlock(map);
3169 	}
3170 }
3171 
3172 
3173 /*!	Removes all mappings from a page.
3174 	After you've called this function, the page is unmapped from memory.
3175 	The accumulated page flags of all mappings can be found in \a _flags.
3176 */
3177 void
3178 vm_remove_all_page_mappings(vm_page* page, uint32* _flags)
3179 {
3180 	uint32 accumulatedFlags = 0;
3181 	MutexLocker locker(sMappingLock);
3182 
3183 	vm_page_mappings queue;
3184 	queue.MoveFrom(&page->mappings);
3185 
3186 	vm_page_mappings::Iterator iterator = queue.GetIterator();
3187 	vm_page_mapping* mapping;
3188 	while ((mapping = iterator.Next()) != NULL) {
3189 		vm_area* area = mapping->area;
3190 		vm_translation_map* map = &area->address_space->translation_map;
3191 		addr_t physicalAddress;
3192 		uint32 flags;
3193 
3194 		map->ops->lock(map);
3195 		addr_t address = virtual_page_address(area, page);
3196 		map->ops->unmap(map, address, address + (B_PAGE_SIZE - 1));
3197 		map->ops->flush(map);
3198 		map->ops->query(map, address, &physicalAddress, &flags);
3199 		map->ops->unlock(map);
3200 
3201 		area->mappings.Remove(mapping);
3202 
3203 		accumulatedFlags |= flags;
3204 	}
3205 
3206 	if (page->wired_count == 0 && !queue.IsEmpty())
3207 		atomic_add(&gMappedPagesCount, -1);
3208 
3209 	locker.Unlock();
3210 
3211 	// free now unused mappings
3212 
3213 	while ((mapping = queue.RemoveHead()) != NULL) {
3214 		free(mapping);
3215 	}
3216 
3217 	if (_flags != NULL)
3218 		*_flags = accumulatedFlags;
3219 }
3220 
3221 
3222 bool
3223 vm_unmap_page(vm_area* area, addr_t virtualAddress, bool preserveModified)
3224 {
3225 	vm_translation_map* map = &area->address_space->translation_map;
3226 
3227 	map->ops->lock(map);
3228 
3229 	addr_t physicalAddress;
3230 	uint32 flags;
3231 	status_t status = map->ops->query(map, virtualAddress, &physicalAddress,
3232 		&flags);
3233 	if (status < B_OK || (flags & PAGE_PRESENT) == 0) {
3234 		map->ops->unlock(map);
3235 		return false;
3236 	}
3237 	vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3238 	if (page == NULL && area->cache_type != CACHE_TYPE_DEVICE) {
3239 		panic("area %p looking up page failed for pa 0x%lx\n", area,
3240 			physicalAddress);
3241 	}
3242 
3243 	if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE)
3244 		decrement_page_wired_count(page);
3245 
3246 	map->ops->unmap(map, virtualAddress, virtualAddress + B_PAGE_SIZE - 1);
3247 
3248 	if (preserveModified) {
3249 		map->ops->flush(map);
3250 
3251 		status = map->ops->query(map, virtualAddress, &physicalAddress, &flags);
3252 		if ((flags & PAGE_MODIFIED) != 0 && page->state != PAGE_STATE_MODIFIED)
3253 			vm_page_set_state(page, PAGE_STATE_MODIFIED);
3254 	}
3255 
3256 	map->ops->unlock(map);
3257 
3258 	if (area->wiring == B_NO_LOCK) {
3259 		vm_page_mapping* mapping;
3260 
3261 		mutex_lock(&sMappingLock);
3262 		map->ops->lock(map);
3263 
3264 		vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3265 		while (iterator.HasNext()) {
3266 			mapping = iterator.Next();
3267 
3268 			if (mapping->area == area) {
3269 				area->mappings.Remove(mapping);
3270 				page->mappings.Remove(mapping);
3271 
3272 				if (page->mappings.IsEmpty() && page->wired_count == 0)
3273 					atomic_add(&gMappedPagesCount, -1);
3274 
3275 				map->ops->unlock(map);
3276 				mutex_unlock(&sMappingLock);
3277 
3278 				free(mapping);
3279 
3280 				return true;
3281 			}
3282 		}
3283 
3284 		map->ops->unlock(map);
3285 		mutex_unlock(&sMappingLock);
3286 
3287 		dprintf("vm_unmap_page: couldn't find mapping for area %p in page %p\n",
3288 			area, page);
3289 	}
3290 
3291 	return true;
3292 }
3293 
3294 
3295 status_t
3296 vm_unmap_pages(vm_area* area, addr_t base, size_t size, bool preserveModified)
3297 {
3298 	vm_translation_map* map = &area->address_space->translation_map;
3299 	addr_t end = base + (size - 1);
3300 
3301 	map->ops->lock(map);
3302 
3303 	if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE) {
3304 		// iterate through all pages and decrease their wired count
3305 		for (addr_t virtualAddress = base; virtualAddress < end;
3306 				virtualAddress += B_PAGE_SIZE) {
3307 			addr_t physicalAddress;
3308 			uint32 flags;
3309 			status_t status = map->ops->query(map, virtualAddress,
3310 				&physicalAddress, &flags);
3311 			if (status < B_OK || (flags & PAGE_PRESENT) == 0)
3312 				continue;
3313 
3314 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3315 			if (page == NULL) {
3316 				panic("area %p looking up page failed for pa 0x%lx\n", area,
3317 					physicalAddress);
3318 			}
3319 
3320 			decrement_page_wired_count(page);
3321 		}
3322 	}
3323 
3324 	map->ops->unmap(map, base, end);
3325 	if (preserveModified) {
3326 		map->ops->flush(map);
3327 
3328 		for (addr_t virtualAddress = base; virtualAddress < end;
3329 				virtualAddress += B_PAGE_SIZE) {
3330 			addr_t physicalAddress;
3331 			uint32 flags;
3332 			status_t status = map->ops->query(map, virtualAddress,
3333 				&physicalAddress, &flags);
3334 			if (status < B_OK || (flags & PAGE_PRESENT) == 0)
3335 				continue;
3336 
3337 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3338 			if (page == NULL) {
3339 				panic("area %p looking up page failed for pa 0x%lx\n", area,
3340 					physicalAddress);
3341 			}
3342 
3343 			if ((flags & PAGE_MODIFIED) != 0
3344 				&& page->state != PAGE_STATE_MODIFIED)
3345 				vm_page_set_state(page, PAGE_STATE_MODIFIED);
3346 		}
3347 	}
3348 	map->ops->unlock(map);
3349 
3350 	if (area->wiring == B_NO_LOCK) {
3351 		uint32 startOffset = (area->cache_offset + base - area->base)
3352 			>> PAGE_SHIFT;
3353 		uint32 endOffset = startOffset + (size >> PAGE_SHIFT);
3354 		vm_page_mapping* mapping;
3355 		vm_area_mappings queue;
3356 
3357 		mutex_lock(&sMappingLock);
3358 		map->ops->lock(map);
3359 
3360 		vm_area_mappings::Iterator iterator = area->mappings.GetIterator();
3361 		while (iterator.HasNext()) {
3362 			mapping = iterator.Next();
3363 
3364 			vm_page* page = mapping->page;
3365 			if (page->cache_offset < startOffset
3366 				|| page->cache_offset >= endOffset)
3367 				continue;
3368 
3369 			page->mappings.Remove(mapping);
3370 			iterator.Remove();
3371 
3372 			if (page->mappings.IsEmpty() && page->wired_count == 0)
3373 				atomic_add(&gMappedPagesCount, -1);
3374 
3375 			queue.Add(mapping);
3376 		}
3377 
3378 		map->ops->unlock(map);
3379 		mutex_unlock(&sMappingLock);
3380 
3381 		while ((mapping = queue.RemoveHead()) != NULL) {
3382 			free(mapping);
3383 		}
3384 	}
3385 
3386 	return B_OK;
3387 }
3388 
3389 
3390 /*!	When calling this function, you need to have pages reserved! */
3391 status_t
3392 vm_map_page(vm_area* area, vm_page* page, addr_t address, uint32 protection)
3393 {
3394 	vm_translation_map* map = &area->address_space->translation_map;
3395 	vm_page_mapping* mapping = NULL;
3396 
3397 	if (area->wiring == B_NO_LOCK) {
3398 		mapping = (vm_page_mapping*)malloc_nogrow(sizeof(vm_page_mapping));
3399 		if (mapping == NULL)
3400 			return B_NO_MEMORY;
3401 
3402 		mapping->page = page;
3403 		mapping->area = area;
3404 	}
3405 
3406 	map->ops->lock(map);
3407 	map->ops->map(map, address, page->physical_page_number * B_PAGE_SIZE,
3408 		protection);
3409 	map->ops->unlock(map);
3410 
3411 	if (area->wiring != B_NO_LOCK) {
3412 		increment_page_wired_count(page);
3413 	} else {
3414 		// insert mapping into lists
3415 		MutexLocker locker(sMappingLock);
3416 
3417 		if (page->mappings.IsEmpty() && page->wired_count == 0)
3418 			atomic_add(&gMappedPagesCount, 1);
3419 
3420 		page->mappings.Add(mapping);
3421 		area->mappings.Add(mapping);
3422 	}
3423 
3424 	if (page->usage_count < 0)
3425 		page->usage_count = 1;
3426 
3427 	if (page->state != PAGE_STATE_MODIFIED)
3428 		vm_page_set_state(page, PAGE_STATE_ACTIVE);
3429 
3430 	return B_OK;
3431 }
3432 
3433 
3434 static int
3435 display_mem(int argc, char** argv)
3436 {
3437 	bool physical = false;
3438 	addr_t copyAddress;
3439 	int32 displayWidth;
3440 	int32 itemSize;
3441 	int32 num = -1;
3442 	addr_t address;
3443 	int i = 1, j;
3444 
3445 	if (argc > 1 && argv[1][0] == '-') {
3446 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3447 			physical = true;
3448 			i++;
3449 		} else
3450 			i = 99;
3451 	}
3452 
3453 	if (argc < i + 1 || argc > i + 2) {
3454 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3455 			"\tdl - 8 bytes\n"
3456 			"\tdw - 4 bytes\n"
3457 			"\tds - 2 bytes\n"
3458 			"\tdb - 1 byte\n"
3459 			"\tstring - a whole string\n"
3460 			"  -p or --physical only allows memory from a single page to be "
3461 			"displayed.\n");
3462 		return 0;
3463 	}
3464 
3465 	address = parse_expression(argv[i]);
3466 
3467 	if (argc > i + 1)
3468 		num = parse_expression(argv[i + 1]);
3469 
3470 	// build the format string
3471 	if (strcmp(argv[0], "db") == 0) {
3472 		itemSize = 1;
3473 		displayWidth = 16;
3474 	} else if (strcmp(argv[0], "ds") == 0) {
3475 		itemSize = 2;
3476 		displayWidth = 8;
3477 	} else if (strcmp(argv[0], "dw") == 0) {
3478 		itemSize = 4;
3479 		displayWidth = 4;
3480 	} else if (strcmp(argv[0], "dl") == 0) {
3481 		itemSize = 8;
3482 		displayWidth = 2;
3483 	} else if (strcmp(argv[0], "string") == 0) {
3484 		itemSize = 1;
3485 		displayWidth = -1;
3486 	} else {
3487 		kprintf("display_mem called in an invalid way!\n");
3488 		return 0;
3489 	}
3490 
3491 	if (num <= 0)
3492 		num = displayWidth;
3493 
3494 	void* physicalPageHandle = NULL;
3495 
3496 	if (physical) {
3497 		int32 offset = address & (B_PAGE_SIZE - 1);
3498 		if (num * itemSize + offset > B_PAGE_SIZE) {
3499 			num = (B_PAGE_SIZE - offset) / itemSize;
3500 			kprintf("NOTE: number of bytes has been cut to page size\n");
3501 		}
3502 
3503 		address = ROUNDDOWN(address, B_PAGE_SIZE);
3504 
3505 		if (vm_get_physical_page_debug(address, &copyAddress,
3506 				&physicalPageHandle) != B_OK) {
3507 			kprintf("getting the hardware page failed.");
3508 			return 0;
3509 		}
3510 
3511 		address += offset;
3512 		copyAddress += offset;
3513 	} else
3514 		copyAddress = address;
3515 
3516 	if (!strcmp(argv[0], "string")) {
3517 		kprintf("%p \"", (char*)copyAddress);
3518 
3519 		// string mode
3520 		for (i = 0; true; i++) {
3521 			char c;
3522 			if (debug_memcpy(&c, (char*)copyAddress + i, 1) != B_OK
3523 				|| c == '\0')
3524 				break;
3525 
3526 			if (c == '\n')
3527 				kprintf("\\n");
3528 			else if (c == '\t')
3529 				kprintf("\\t");
3530 			else {
3531 				if (!isprint(c))
3532 					c = '.';
3533 
3534 				kprintf("%c", c);
3535 			}
3536 		}
3537 
3538 		kprintf("\"\n");
3539 	} else {
3540 		// number mode
3541 		for (i = 0; i < num; i++) {
3542 			uint32 value;
3543 
3544 			if ((i % displayWidth) == 0) {
3545 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3546 				if (i != 0)
3547 					kprintf("\n");
3548 
3549 				kprintf("[0x%lx]  ", address + i * itemSize);
3550 
3551 				for (j = 0; j < displayed; j++) {
3552 					char c;
3553 					if (debug_memcpy(&c, (char*)copyAddress + i * itemSize + j,
3554 							1) != B_OK) {
3555 						displayed = j;
3556 						break;
3557 					}
3558 					if (!isprint(c))
3559 						c = '.';
3560 
3561 					kprintf("%c", c);
3562 				}
3563 				if (num > displayWidth) {
3564 					// make sure the spacing in the last line is correct
3565 					for (j = displayed; j < displayWidth * itemSize; j++)
3566 						kprintf(" ");
3567 				}
3568 				kprintf("  ");
3569 			}
3570 
3571 			if (debug_memcpy(&value, (uint8*)copyAddress + i * itemSize,
3572 					itemSize) != B_OK) {
3573 				kprintf("read fault");
3574 				break;
3575 			}
3576 
3577 			switch (itemSize) {
3578 				case 1:
3579 					kprintf(" %02x", *(uint8*)&value);
3580 					break;
3581 				case 2:
3582 					kprintf(" %04x", *(uint16*)&value);
3583 					break;
3584 				case 4:
3585 					kprintf(" %08lx", *(uint32*)&value);
3586 					break;
3587 				case 8:
3588 					kprintf(" %016Lx", *(uint64*)&value);
3589 					break;
3590 			}
3591 		}
3592 
3593 		kprintf("\n");
3594 	}
3595 
3596 	if (physical) {
3597 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3598 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3599 	}
3600 	return 0;
3601 }
3602 
3603 
3604 static void
3605 dump_cache_tree_recursively(vm_cache* cache, int level,
3606 	vm_cache* highlightCache)
3607 {
3608 	// print this cache
3609 	for (int i = 0; i < level; i++)
3610 		kprintf("  ");
3611 	if (cache == highlightCache)
3612 		kprintf("%p <--\n", cache);
3613 	else
3614 		kprintf("%p\n", cache);
3615 
3616 	// recursively print its consumers
3617 	vm_cache* consumer = NULL;
3618 	while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers,
3619 			consumer)) != NULL) {
3620 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3621 	}
3622 }
3623 
3624 
3625 static int
3626 dump_cache_tree(int argc, char** argv)
3627 {
3628 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3629 		kprintf("usage: %s <address>\n", argv[0]);
3630 		return 0;
3631 	}
3632 
3633 	addr_t address = parse_expression(argv[1]);
3634 	if (address == 0)
3635 		return 0;
3636 
3637 	vm_cache* cache = (vm_cache*)address;
3638 	vm_cache* root = cache;
3639 
3640 	// find the root cache (the transitive source)
3641 	while (root->source != NULL)
3642 		root = root->source;
3643 
3644 	dump_cache_tree_recursively(root, 0, cache);
3645 
3646 	return 0;
3647 }
3648 
3649 
3650 static const char*
3651 cache_type_to_string(int32 type)
3652 {
3653 	switch (type) {
3654 		case CACHE_TYPE_RAM:
3655 			return "RAM";
3656 		case CACHE_TYPE_DEVICE:
3657 			return "device";
3658 		case CACHE_TYPE_VNODE:
3659 			return "vnode";
3660 		case CACHE_TYPE_NULL:
3661 			return "null";
3662 
3663 		default:
3664 			return "unknown";
3665 	}
3666 }
3667 
3668 
3669 #if DEBUG_CACHE_LIST
3670 
3671 static void
3672 update_cache_info_recursively(vm_cache* cache, cache_info& info)
3673 {
3674 	info.page_count += cache->page_count;
3675 	if (cache->type == CACHE_TYPE_RAM)
3676 		info.committed += cache->committed_size;
3677 
3678 	// recurse
3679 	vm_cache* consumer = NULL;
3680 	while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers,
3681 			consumer)) != NULL) {
3682 		update_cache_info_recursively(consumer, info);
3683 	}
3684 }
3685 
3686 
3687 static int
3688 cache_info_compare_page_count(const void* _a, const void* _b)
3689 {
3690 	const cache_info* a = (const cache_info*)_a;
3691 	const cache_info* b = (const cache_info*)_b;
3692 	if (a->page_count == b->page_count)
3693 		return 0;
3694 	return a->page_count < b->page_count ? 1 : -1;
3695 }
3696 
3697 
3698 static int
3699 cache_info_compare_committed(const void* _a, const void* _b)
3700 {
3701 	const cache_info* a = (const cache_info*)_a;
3702 	const cache_info* b = (const cache_info*)_b;
3703 	if (a->committed == b->committed)
3704 		return 0;
3705 	return a->committed < b->committed ? 1 : -1;
3706 }
3707 
3708 
3709 static void
3710 dump_caches_recursively(vm_cache* cache, cache_info& info, int level)
3711 {
3712 	for (int i = 0; i < level; i++)
3713 		kprintf("  ");
3714 
3715 	kprintf("%p: type: %s, base: %lld, size: %lld, pages: %lu", cache,
3716 		cache_type_to_string(cache->type), cache->virtual_base,
3717 		cache->virtual_end, cache->page_count);
3718 
3719 	if (level == 0)
3720 		kprintf("/%lu", info.page_count);
3721 
3722 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3723 		kprintf(", committed: %lld", cache->committed_size);
3724 
3725 		if (level == 0)
3726 			kprintf("/%lu", info.committed);
3727 	}
3728 
3729 	// areas
3730 	if (cache->areas != NULL) {
3731 		vm_area* area = cache->areas;
3732 		kprintf(", areas: %ld (%s, team: %ld)", area->id, area->name,
3733 			area->address_space->id);
3734 
3735 		while (area->cache_next != NULL) {
3736 			area = area->cache_next;
3737 			kprintf(", %ld", area->id);
3738 		}
3739 	}
3740 
3741 	kputs("\n");
3742 
3743 	// recurse
3744 	vm_cache* consumer = NULL;
3745 	while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers,
3746 			consumer)) != NULL) {
3747 		dump_caches_recursively(consumer, info, level + 1);
3748 	}
3749 }
3750 
3751 
3752 static int
3753 dump_caches(int argc, char** argv)
3754 {
3755 	if (sCacheInfoTable == NULL) {
3756 		kprintf("No cache info table!\n");
3757 		return 0;
3758 	}
3759 
3760 	bool sortByPageCount = true;
3761 
3762 	for (int32 i = 1; i < argc; i++) {
3763 		if (strcmp(argv[i], "-c") == 0) {
3764 			sortByPageCount = false;
3765 		} else {
3766 			print_debugger_command_usage(argv[0]);
3767 			return 0;
3768 		}
3769 	}
3770 
3771 	uint32 totalCount = 0;
3772 	uint32 rootCount = 0;
3773 	off_t totalCommitted = 0;
3774 	page_num_t totalPages = 0;
3775 
3776 	vm_cache* cache = gDebugCacheList;
3777 	while (cache) {
3778 		totalCount++;
3779 		if (cache->source == NULL) {
3780 			cache_info stackInfo;
3781 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3782 				? sCacheInfoTable[rootCount] : stackInfo;
3783 			rootCount++;
3784 			info.cache = cache;
3785 			info.page_count = 0;
3786 			info.committed = 0;
3787 			update_cache_info_recursively(cache, info);
3788 			totalCommitted += info.committed;
3789 			totalPages += info.page_count;
3790 		}
3791 
3792 		cache = cache->debug_next;
3793 	}
3794 
3795 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3796 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3797 			sortByPageCount
3798 				? &cache_info_compare_page_count
3799 				: &cache_info_compare_committed);
3800 	}
3801 
3802 	kprintf("total committed memory: %lld, total used pages: %lu\n",
3803 		totalCommitted, totalPages);
3804 	kprintf("%lu caches (%lu root caches), sorted by %s per cache "
3805 		"tree...\n\n", totalCount, rootCount,
3806 		sortByPageCount ? "page count" : "committed size");
3807 
3808 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3809 		for (uint32 i = 0; i < rootCount; i++) {
3810 			cache_info& info = sCacheInfoTable[i];
3811 			dump_caches_recursively(info.cache, info, 0);
3812 		}
3813 	} else
3814 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3815 
3816 	return 0;
3817 }
3818 
3819 #endif	// DEBUG_CACHE_LIST
3820 
3821 
3822 static int
3823 dump_cache(int argc, char** argv)
3824 {
3825 	vm_cache* cache;
3826 	bool showPages = false;
3827 	int i = 1;
3828 
3829 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3830 		kprintf("usage: %s [-ps] <address>\n"
3831 			"  if -p is specified, all pages are shown, if -s is used\n"
3832 			"  only the cache info is shown respectively.\n", argv[0]);
3833 		return 0;
3834 	}
3835 	while (argv[i][0] == '-') {
3836 		char* arg = argv[i] + 1;
3837 		while (arg[0]) {
3838 			if (arg[0] == 'p')
3839 				showPages = true;
3840 			arg++;
3841 		}
3842 		i++;
3843 	}
3844 	if (argv[i] == NULL) {
3845 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3846 		return 0;
3847 	}
3848 
3849 	addr_t address = parse_expression(argv[i]);
3850 	if (address == 0)
3851 		return 0;
3852 
3853 	cache = (vm_cache*)address;
3854 
3855 	kprintf("CACHE %p:\n", cache);
3856 	kprintf("  ref_count:    %ld\n", cache->RefCount());
3857 	kprintf("  source:       %p\n", cache->source);
3858 	kprintf("  type:         %s\n", cache_type_to_string(cache->type));
3859 	kprintf("  virtual_base: 0x%Lx\n", cache->virtual_base);
3860 	kprintf("  virtual_end:  0x%Lx\n", cache->virtual_end);
3861 	kprintf("  temporary:    %ld\n", cache->temporary);
3862 	kprintf("  scan_skip:    %ld\n", cache->scan_skip);
3863 	kprintf("  lock:         %p\n", cache->GetLock());
3864 #if KDEBUG
3865 	kprintf("  lock.holder:  %ld\n", cache->GetLock()->holder);
3866 #endif
3867 	kprintf("  areas:\n");
3868 
3869 	for (vm_area* area = cache->areas; area != NULL; area = area->cache_next) {
3870 		kprintf("    area 0x%lx, %s\n", area->id, area->name);
3871 		kprintf("\tbase_addr:  0x%lx, size: 0x%lx\n", area->base, area->size);
3872 		kprintf("\tprotection: 0x%lx\n", area->protection);
3873 		kprintf("\towner:      0x%lx\n", area->address_space->id);
3874 	}
3875 
3876 	kprintf("  consumers:\n");
3877 	vm_cache* consumer = NULL;
3878 	while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers,
3879 				consumer)) != NULL) {
3880 		kprintf("\t%p\n", consumer);
3881 	}
3882 
3883 	kprintf("  pages:\n");
3884 	if (showPages) {
3885 		for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
3886 				vm_page* page = it.Next();) {
3887 			if (page->type == PAGE_TYPE_PHYSICAL) {
3888 				kprintf("\t%p ppn 0x%lx offset 0x%lx type %u state %u (%s) "
3889 					"wired_count %u\n", page, page->physical_page_number,
3890 					page->cache_offset, page->type, page->state,
3891 					page_state_to_string(page->state), page->wired_count);
3892 			} else if(page->type == PAGE_TYPE_DUMMY) {
3893 				kprintf("\t%p DUMMY PAGE state %u (%s)\n",
3894 					page, page->state, page_state_to_string(page->state));
3895 			} else
3896 				kprintf("\t%p UNKNOWN PAGE type %u\n", page, page->type);
3897 		}
3898 	} else
3899 		kprintf("\t%ld in cache\n", cache->page_count);
3900 
3901 	return 0;
3902 }
3903 
3904 
3905 static void
3906 dump_area_struct(vm_area* area, bool mappings)
3907 {
3908 	kprintf("AREA: %p\n", area);
3909 	kprintf("name:\t\t'%s'\n", area->name);
3910 	kprintf("owner:\t\t0x%lx\n", area->address_space->id);
3911 	kprintf("id:\t\t0x%lx\n", area->id);
3912 	kprintf("base:\t\t0x%lx\n", area->base);
3913 	kprintf("size:\t\t0x%lx\n", area->size);
3914 	kprintf("protection:\t0x%lx\n", area->protection);
3915 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3916 	kprintf("memory_type:\t0x%x\n", area->memory_type);
3917 	kprintf("cache:\t\t%p\n", area->cache);
3918 	kprintf("cache_type:\t%s\n", cache_type_to_string(area->cache_type));
3919 	kprintf("cache_offset:\t0x%Lx\n", area->cache_offset);
3920 	kprintf("cache_next:\t%p\n", area->cache_next);
3921 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3922 
3923 	vm_area_mappings::Iterator iterator = area->mappings.GetIterator();
3924 	if (mappings) {
3925 		kprintf("page mappings:\n");
3926 		while (iterator.HasNext()) {
3927 			vm_page_mapping* mapping = iterator.Next();
3928 			kprintf("  %p", mapping->page);
3929 		}
3930 		kprintf("\n");
3931 	} else {
3932 		uint32 count = 0;
3933 		while (iterator.Next() != NULL) {
3934 			count++;
3935 		}
3936 		kprintf("page mappings:\t%lu\n", count);
3937 	}
3938 }
3939 
3940 
3941 static int
3942 dump_area(int argc, char** argv)
3943 {
3944 	bool mappings = false;
3945 	bool found = false;
3946 	int32 index = 1;
3947 	vm_area* area;
3948 	addr_t num;
3949 
3950 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3951 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3952 			"All areas matching either id/address/name are listed. You can\n"
3953 			"force to check only a specific item by prefixing the specifier\n"
3954 			"with the id/contains/address/name keywords.\n"
3955 			"-m shows the area's mappings as well.\n");
3956 		return 0;
3957 	}
3958 
3959 	if (!strcmp(argv[1], "-m")) {
3960 		mappings = true;
3961 		index++;
3962 	}
3963 
3964 	int32 mode = 0xf;
3965 	if (!strcmp(argv[index], "id"))
3966 		mode = 1;
3967 	else if (!strcmp(argv[index], "contains"))
3968 		mode = 2;
3969 	else if (!strcmp(argv[index], "name"))
3970 		mode = 4;
3971 	else if (!strcmp(argv[index], "address"))
3972 		mode = 0;
3973 	if (mode != 0xf)
3974 		index++;
3975 
3976 	if (index >= argc) {
3977 		kprintf("No area specifier given.\n");
3978 		return 0;
3979 	}
3980 
3981 	num = parse_expression(argv[index]);
3982 
3983 	if (mode == 0) {
3984 		dump_area_struct((struct vm_area*)num, mappings);
3985 	} else {
3986 		// walk through the area list, looking for the arguments as a name
3987 		struct hash_iterator iter;
3988 
3989 		hash_open(sAreaHash, &iter);
3990 		while ((area = (vm_area*)hash_next(sAreaHash, &iter)) != NULL) {
3991 			if (((mode & 4) != 0 && area->name != NULL
3992 					&& !strcmp(argv[index], area->name))
3993 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3994 					|| (((mode & 2) != 0 && area->base <= num
3995 						&& area->base + area->size > num))))) {
3996 				dump_area_struct(area, mappings);
3997 				found = true;
3998 			}
3999 		}
4000 
4001 		if (!found)
4002 			kprintf("could not find area %s (%ld)\n", argv[index], num);
4003 	}
4004 
4005 	return 0;
4006 }
4007 
4008 
4009 static int
4010 dump_area_list(int argc, char** argv)
4011 {
4012 	vm_area* area;
4013 	struct hash_iterator iter;
4014 	const char* name = NULL;
4015 	int32 id = 0;
4016 
4017 	if (argc > 1) {
4018 		id = parse_expression(argv[1]);
4019 		if (id == 0)
4020 			name = argv[1];
4021 	}
4022 
4023 	kprintf("addr          id  base\t\tsize    protect lock  name\n");
4024 
4025 	hash_open(sAreaHash, &iter);
4026 	while ((area = (vm_area*)hash_next(sAreaHash, &iter)) != NULL) {
4027 		if ((id != 0 && area->address_space->id != id)
4028 			|| (name != NULL && strstr(area->name, name) == NULL))
4029 			continue;
4030 
4031 		kprintf("%p %5lx  %p\t%p %4lx\t%4d  %s\n", area, area->id,
4032 			(void*)area->base, (void*)area->size, area->protection, area->wiring,
4033 			area->name);
4034 	}
4035 	hash_close(sAreaHash, &iter, false);
4036 	return 0;
4037 }
4038 
4039 
4040 static int
4041 dump_available_memory(int argc, char** argv)
4042 {
4043 	kprintf("Available memory: %Ld/%lu bytes\n",
4044 		sAvailableMemory, vm_page_num_pages() * B_PAGE_SIZE);
4045 	return 0;
4046 }
4047 
4048 
4049 status_t
4050 vm_delete_areas(struct vm_address_space* addressSpace)
4051 {
4052 	vm_area* area;
4053 	vm_area* next;
4054 	vm_area* last = NULL;
4055 
4056 	TRACE(("vm_delete_areas: called on address space 0x%lx\n",
4057 		addressSpace->id));
4058 
4059 	rw_lock_write_lock(&addressSpace->lock);
4060 
4061 	// remove all reserved areas in this address space
4062 
4063 	for (area = addressSpace->areas; area; area = next) {
4064 		next = area->address_space_next;
4065 
4066 		if (area->id == RESERVED_AREA_ID) {
4067 			// just remove it
4068 			if (last)
4069 				last->address_space_next = area->address_space_next;
4070 			else
4071 				addressSpace->areas = area->address_space_next;
4072 
4073 			vm_put_address_space(addressSpace);
4074 			free(area);
4075 			continue;
4076 		}
4077 
4078 		last = area;
4079 	}
4080 
4081 	// delete all the areas in this address space
4082 
4083 	for (area = addressSpace->areas; area; area = next) {
4084 		next = area->address_space_next;
4085 		delete_area(addressSpace, area);
4086 	}
4087 
4088 	rw_lock_write_unlock(&addressSpace->lock);
4089 	return B_OK;
4090 }
4091 
4092 
4093 static area_id
4094 vm_area_for(addr_t address, bool kernel)
4095 {
4096 	team_id team;
4097 	if (IS_USER_ADDRESS(address)) {
4098 		// we try the user team address space, if any
4099 		team = vm_current_user_address_space_id();
4100 		if (team < 0)
4101 			return team;
4102 	} else
4103 		team = vm_kernel_address_space_id();
4104 
4105 	AddressSpaceReadLocker locker(team);
4106 	if (!locker.IsLocked())
4107 		return B_BAD_TEAM_ID;
4108 
4109 	vm_area* area = vm_area_lookup(locker.AddressSpace(), address);
4110 	if (area != NULL) {
4111 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
4112 			return B_ERROR;
4113 
4114 		return area->id;
4115 	}
4116 
4117 	return B_ERROR;
4118 }
4119 
4120 
4121 /*!	Frees physical pages that were used during the boot process.
4122 */
4123 static void
4124 unmap_and_free_physical_pages(vm_translation_map* map, addr_t start, addr_t end)
4125 {
4126 	// free all physical pages in the specified range
4127 
4128 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
4129 		addr_t physicalAddress;
4130 		uint32 flags;
4131 
4132 		if (map->ops->query(map, current, &physicalAddress, &flags) == B_OK) {
4133 			vm_page* page = vm_lookup_page(current / B_PAGE_SIZE);
4134 			if (page != NULL)
4135 				vm_page_set_state(page, PAGE_STATE_FREE);
4136 		}
4137 	}
4138 
4139 	// unmap the memory
4140 	map->ops->unmap(map, start, end - 1);
4141 }
4142 
4143 
4144 void
4145 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
4146 {
4147 	vm_translation_map* map = &vm_kernel_address_space()->translation_map;
4148 	addr_t end = start + size;
4149 	addr_t lastEnd = start;
4150 	vm_area* area;
4151 
4152 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
4153 		(void*)start, (void*)end));
4154 
4155 	// The areas are sorted in virtual address space order, so
4156 	// we just have to find the holes between them that fall
4157 	// into the area we should dispose
4158 
4159 	map->ops->lock(map);
4160 
4161 	for (area = vm_kernel_address_space()->areas; area != NULL;
4162 			area = area->address_space_next) {
4163 		addr_t areaStart = area->base;
4164 		addr_t areaEnd = areaStart + area->size;
4165 
4166 		if (area->id == RESERVED_AREA_ID)
4167 			continue;
4168 
4169 		if (areaEnd >= end) {
4170 			// we are done, the areas are already beyond of what we have to free
4171 			lastEnd = end;
4172 			break;
4173 		}
4174 
4175 		if (areaStart > lastEnd) {
4176 			// this is something we can free
4177 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
4178 				(void*)areaStart));
4179 			unmap_and_free_physical_pages(map, lastEnd, areaStart);
4180 		}
4181 
4182 		lastEnd = areaEnd;
4183 	}
4184 
4185 	if (lastEnd < end) {
4186 		// we can also get rid of some space at the end of the area
4187 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
4188 			(void*)end));
4189 		unmap_and_free_physical_pages(map, lastEnd, end);
4190 	}
4191 
4192 	map->ops->unlock(map);
4193 }
4194 
4195 
4196 static void
4197 create_preloaded_image_areas(struct preloaded_image* image)
4198 {
4199 	char name[B_OS_NAME_LENGTH];
4200 	void* address;
4201 	int32 length;
4202 
4203 	// use file name to create a good area name
4204 	char* fileName = strrchr(image->name, '/');
4205 	if (fileName == NULL)
4206 		fileName = image->name;
4207 	else
4208 		fileName++;
4209 
4210 	length = strlen(fileName);
4211 	// make sure there is enough space for the suffix
4212 	if (length > 25)
4213 		length = 25;
4214 
4215 	memcpy(name, fileName, length);
4216 	strcpy(name + length, "_text");
4217 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
4218 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4219 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
4220 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4221 		// this will later be remapped read-only/executable by the
4222 		// ELF initialization code
4223 
4224 	strcpy(name + length, "_data");
4225 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
4226 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4227 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
4228 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4229 }
4230 
4231 
4232 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
4233 	Any boot loader resources contained in that arguments must not be accessed
4234 	anymore past this point.
4235 */
4236 void
4237 vm_free_kernel_args(kernel_args* args)
4238 {
4239 	uint32 i;
4240 
4241 	TRACE(("vm_free_kernel_args()\n"));
4242 
4243 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
4244 		area_id area = area_for((void*)args->kernel_args_range[i].start);
4245 		if (area >= B_OK)
4246 			delete_area(area);
4247 	}
4248 }
4249 
4250 
4251 static void
4252 allocate_kernel_args(kernel_args* args)
4253 {
4254 	TRACE(("allocate_kernel_args()\n"));
4255 
4256 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
4257 		void* address = (void*)args->kernel_args_range[i].start;
4258 
4259 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
4260 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
4261 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4262 	}
4263 }
4264 
4265 
4266 static void
4267 unreserve_boot_loader_ranges(kernel_args* args)
4268 {
4269 	TRACE(("unreserve_boot_loader_ranges()\n"));
4270 
4271 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4272 		vm_unreserve_address_range(vm_kernel_address_space_id(),
4273 			(void*)args->virtual_allocated_range[i].start,
4274 			args->virtual_allocated_range[i].size);
4275 	}
4276 }
4277 
4278 
4279 static void
4280 reserve_boot_loader_ranges(kernel_args* args)
4281 {
4282 	TRACE(("reserve_boot_loader_ranges()\n"));
4283 
4284 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4285 		void* address = (void*)args->virtual_allocated_range[i].start;
4286 
4287 		// If the address is no kernel address, we just skip it. The
4288 		// architecture specific code has to deal with it.
4289 		if (!IS_KERNEL_ADDRESS(address)) {
4290 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n",
4291 				address, args->virtual_allocated_range[i].size);
4292 			continue;
4293 		}
4294 
4295 		status_t status = vm_reserve_address_range(vm_kernel_address_space_id(),
4296 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
4297 		if (status < B_OK)
4298 			panic("could not reserve boot loader ranges\n");
4299 	}
4300 }
4301 
4302 
4303 static addr_t
4304 allocate_early_virtual(kernel_args* args, size_t size)
4305 {
4306 	addr_t spot = 0;
4307 	uint32 i;
4308 	int last_valloc_entry = 0;
4309 
4310 	size = PAGE_ALIGN(size);
4311 	// find a slot in the virtual allocation addr range
4312 	for (i = 1; i < args->num_virtual_allocated_ranges; i++) {
4313 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
4314 			+ args->virtual_allocated_range[i - 1].size;
4315 		last_valloc_entry = i;
4316 		// check to see if the space between this one and the last is big enough
4317 		if (previousRangeEnd >= KERNEL_BASE
4318 			&& args->virtual_allocated_range[i].start
4319 				- previousRangeEnd >= size) {
4320 			spot = previousRangeEnd;
4321 			args->virtual_allocated_range[i - 1].size += size;
4322 			goto out;
4323 		}
4324 	}
4325 	if (spot == 0) {
4326 		// we hadn't found one between allocation ranges. this is ok.
4327 		// see if there's a gap after the last one
4328 		addr_t lastRangeEnd
4329 			= args->virtual_allocated_range[last_valloc_entry].start
4330 				+ args->virtual_allocated_range[last_valloc_entry].size;
4331 		if (KERNEL_BASE + (KERNEL_SIZE - 1) - lastRangeEnd >= size) {
4332 			spot = lastRangeEnd;
4333 			args->virtual_allocated_range[last_valloc_entry].size += size;
4334 			goto out;
4335 		}
4336 		// see if there's a gap before the first one
4337 		if (args->virtual_allocated_range[0].start > KERNEL_BASE) {
4338 			if (args->virtual_allocated_range[0].start - KERNEL_BASE >= size) {
4339 				args->virtual_allocated_range[0].start -= size;
4340 				spot = args->virtual_allocated_range[0].start;
4341 				goto out;
4342 			}
4343 		}
4344 	}
4345 
4346 out:
4347 	return spot;
4348 }
4349 
4350 
4351 static bool
4352 is_page_in_physical_memory_range(kernel_args* args, addr_t address)
4353 {
4354 	// TODO: horrible brute-force method of determining if the page can be
4355 	// allocated
4356 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4357 		if (address >= args->physical_memory_range[i].start
4358 			&& address < args->physical_memory_range[i].start
4359 				+ args->physical_memory_range[i].size)
4360 			return true;
4361 	}
4362 	return false;
4363 }
4364 
4365 
4366 static addr_t
4367 allocate_early_physical_page(kernel_args* args)
4368 {
4369 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4370 		addr_t nextPage;
4371 
4372 		nextPage = args->physical_allocated_range[i].start
4373 			+ args->physical_allocated_range[i].size;
4374 		// see if the page after the next allocated paddr run can be allocated
4375 		if (i + 1 < args->num_physical_allocated_ranges
4376 			&& args->physical_allocated_range[i + 1].size != 0) {
4377 			// see if the next page will collide with the next allocated range
4378 			if (nextPage >= args->physical_allocated_range[i+1].start)
4379 				continue;
4380 		}
4381 		// see if the next physical page fits in the memory block
4382 		if (is_page_in_physical_memory_range(args, nextPage)) {
4383 			// we got one!
4384 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4385 			return nextPage / B_PAGE_SIZE;
4386 		}
4387 	}
4388 
4389 	return 0;
4390 		// could not allocate a block
4391 }
4392 
4393 
4394 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4395 	allocate some pages before the VM is completely up.
4396 */
4397 addr_t
4398 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4399 	uint32 attributes)
4400 {
4401 	if (physicalSize > virtualSize)
4402 		physicalSize = virtualSize;
4403 
4404 	// find the vaddr to allocate at
4405 	addr_t virtualBase = allocate_early_virtual(args, virtualSize);
4406 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualAddress);
4407 
4408 	// map the pages
4409 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4410 		addr_t physicalAddress = allocate_early_physical_page(args);
4411 		if (physicalAddress == 0)
4412 			panic("error allocating early page!\n");
4413 
4414 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4415 
4416 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
4417 			physicalAddress * B_PAGE_SIZE, attributes,
4418 			&allocate_early_physical_page);
4419 	}
4420 
4421 	return virtualBase;
4422 }
4423 
4424 
4425 /*!	The main entrance point to initialize the VM. */
4426 status_t
4427 vm_init(kernel_args* args)
4428 {
4429 	struct preloaded_image* image;
4430 	void* address;
4431 	status_t err = 0;
4432 	uint32 i;
4433 
4434 	TRACE(("vm_init: entry\n"));
4435 	err = arch_vm_translation_map_init(args);
4436 	err = arch_vm_init(args);
4437 
4438 	// initialize some globals
4439 	sNextAreaID = 1;
4440 
4441 	vm_page_init_num_pages(args);
4442 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4443 
4444 	size_t heapSize = INITIAL_HEAP_SIZE;
4445 	// try to accomodate low memory systems
4446 	while (heapSize > sAvailableMemory / 8)
4447 		heapSize /= 2;
4448 	if (heapSize < 1024 * 1024)
4449 		panic("vm_init: go buy some RAM please.");
4450 
4451 	// map in the new heap and initialize it
4452 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4453 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4454 	TRACE(("heap at 0x%lx\n", heapBase));
4455 	heap_init(heapBase, heapSize);
4456 
4457 	size_t slabInitialSize = args->num_cpus * 2 * B_PAGE_SIZE;
4458 	addr_t slabInitialBase = vm_allocate_early(args, slabInitialSize,
4459 		slabInitialSize, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4460 	slab_init(args, slabInitialBase, slabInitialSize);
4461 
4462 	// initialize the free page list and physical page mapper
4463 	vm_page_init(args);
4464 
4465 	// initialize the hash table that stores the pages mapped to caches
4466 	vm_cache_init(args);
4467 
4468 	{
4469 		vm_area* area;
4470 		sAreaHash = hash_init(AREA_HASH_TABLE_SIZE,
4471 			(addr_t)&area->hash_next - (addr_t)area,
4472 			&area_compare, &area_hash);
4473 		if (sAreaHash == NULL)
4474 			panic("vm_init: error creating aspace hash table\n");
4475 	}
4476 
4477 	vm_address_space_init();
4478 	reserve_boot_loader_ranges(args);
4479 
4480 	// Do any further initialization that the architecture dependant layers may
4481 	// need now
4482 	arch_vm_translation_map_init_post_area(args);
4483 	arch_vm_init_post_area(args);
4484 	vm_page_init_post_area(args);
4485 
4486 	// allocate areas to represent stuff that already exists
4487 
4488 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4489 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4490 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4491 
4492 	address = (void*)ROUNDDOWN(slabInitialBase, B_PAGE_SIZE);
4493 	create_area("initial slab space", &address, B_EXACT_ADDRESS,
4494 		slabInitialSize, B_ALREADY_WIRED, B_KERNEL_READ_AREA
4495 		| B_KERNEL_WRITE_AREA);
4496 
4497 	allocate_kernel_args(args);
4498 
4499 	create_preloaded_image_areas(&args->kernel_image);
4500 
4501 	// allocate areas for preloaded images
4502 	for (image = args->preloaded_images; image != NULL; image = image->next) {
4503 		create_preloaded_image_areas(image);
4504 	}
4505 
4506 	// allocate kernel stacks
4507 	for (i = 0; i < args->num_cpus; i++) {
4508 		char name[64];
4509 
4510 		sprintf(name, "idle thread %lu kstack", i + 1);
4511 		address = (void*)args->cpu_kstack[i].start;
4512 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4513 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4514 	}
4515 
4516 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4517 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4518 
4519 #if DEBUG_CACHE_LIST
4520 	create_area("cache info table", (void**)&sCacheInfoTable,
4521 		B_ANY_KERNEL_ADDRESS,
4522 		ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4523 		B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4524 #endif	// DEBUG_CACHE_LIST
4525 
4526 	// add some debugger commands
4527 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4528 	add_debugger_command("area", &dump_area,
4529 		"Dump info about a particular area");
4530 	add_debugger_command("cache", &dump_cache, "Dump vm_cache");
4531 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump vm_cache tree");
4532 #if DEBUG_CACHE_LIST
4533 	add_debugger_command_etc("caches", &dump_caches,
4534 		"List all vm_cache trees",
4535 		"[ \"-c\" ]\n"
4536 		"All cache trees are listed sorted in decreasing order by number of\n"
4537 		"used pages or, if \"-c\" is specified, by size of committed memory.\n",
4538 		0);
4539 #endif
4540 	add_debugger_command("avail", &dump_available_memory,
4541 		"Dump available memory");
4542 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4543 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4544 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4545 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4546 	add_debugger_command("string", &display_mem, "dump strings");
4547 
4548 	TRACE(("vm_init: exit\n"));
4549 
4550 	return err;
4551 }
4552 
4553 
4554 status_t
4555 vm_init_post_sem(kernel_args* args)
4556 {
4557 	// This frees all unused boot loader resources and makes its space available
4558 	// again
4559 	arch_vm_init_end(args);
4560 	unreserve_boot_loader_ranges(args);
4561 
4562 	// fill in all of the semaphores that were not allocated before
4563 	// since we're still single threaded and only the kernel address space
4564 	// exists, it isn't that hard to find all of the ones we need to create
4565 
4566 	arch_vm_translation_map_init_post_sem(args);
4567 	vm_address_space_init_post_sem();
4568 
4569 	slab_init_post_sem();
4570 	return heap_init_post_sem();
4571 }
4572 
4573 
4574 status_t
4575 vm_init_post_thread(kernel_args* args)
4576 {
4577 	vm_page_init_post_thread(args);
4578 	vm_daemon_init();
4579 	slab_init_post_thread();
4580 	return heap_init_post_thread();
4581 }
4582 
4583 
4584 status_t
4585 vm_init_post_modules(kernel_args* args)
4586 {
4587 	return arch_vm_init_post_modules(args);
4588 }
4589 
4590 
4591 void
4592 permit_page_faults(void)
4593 {
4594 	struct thread* thread = thread_get_current_thread();
4595 	if (thread != NULL)
4596 		atomic_add(&thread->page_faults_allowed, 1);
4597 }
4598 
4599 
4600 void
4601 forbid_page_faults(void)
4602 {
4603 	struct thread* thread = thread_get_current_thread();
4604 	if (thread != NULL)
4605 		atomic_add(&thread->page_faults_allowed, -1);
4606 }
4607 
4608 
4609 status_t
4610 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser,
4611 	addr_t* newIP)
4612 {
4613 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4614 		faultAddress));
4615 
4616 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4617 
4618 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4619 	vm_address_space* addressSpace = NULL;
4620 
4621 	status_t status = B_OK;
4622 	*newIP = 0;
4623 	atomic_add((int32*)&sPageFaults, 1);
4624 
4625 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4626 		addressSpace = vm_get_kernel_address_space();
4627 	} else if (IS_USER_ADDRESS(pageAddress)) {
4628 		addressSpace = vm_get_current_user_address_space();
4629 		if (addressSpace == NULL) {
4630 			if (!isUser) {
4631 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4632 					"memory!\n");
4633 				status = B_BAD_ADDRESS;
4634 				TPF(PageFaultError(-1,
4635 					VMPageFaultTracing
4636 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4637 			} else {
4638 				// XXX weird state.
4639 				panic("vm_page_fault: non kernel thread accessing user memory "
4640 					"that doesn't exist!\n");
4641 				status = B_BAD_ADDRESS;
4642 			}
4643 		}
4644 	} else {
4645 		// the hit was probably in the 64k DMZ between kernel and user space
4646 		// this keeps a user space thread from passing a buffer that crosses
4647 		// into kernel space
4648 		status = B_BAD_ADDRESS;
4649 		TPF(PageFaultError(-1,
4650 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4651 	}
4652 
4653 	if (status == B_OK)
4654 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser);
4655 
4656 	if (status < B_OK) {
4657 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4658 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n",
4659 			strerror(status), address, faultAddress, isWrite, isUser,
4660 			thread_get_current_thread_id());
4661 		if (!isUser) {
4662 			struct thread* thread = thread_get_current_thread();
4663 			if (thread != NULL && thread->fault_handler != 0) {
4664 				// this will cause the arch dependant page fault handler to
4665 				// modify the IP on the interrupt frame or whatever to return
4666 				// to this address
4667 				*newIP = thread->fault_handler;
4668 			} else {
4669 				// unhandled page fault in the kernel
4670 				panic("vm_page_fault: unhandled page fault in kernel space at "
4671 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4672 			}
4673 		} else {
4674 #if 1
4675 			rw_lock_read_lock(&addressSpace->lock);
4676 
4677 			// TODO: remove me once we have proper userland debugging support
4678 			// (and tools)
4679 			vm_area* area = vm_area_lookup(addressSpace, faultAddress);
4680 
4681 			struct thread* thread = thread_get_current_thread();
4682 			dprintf("vm_page_fault: thread \"%s\" (%ld) in team \"%s\" (%ld) "
4683 				"tried to %s address %#lx, ip %#lx (\"%s\" +%#lx)\n",
4684 				thread->name, thread->id, thread->team->name, thread->team->id,
4685 				isWrite ? "write" : "read", address, faultAddress,
4686 				area ? area->name : "???",
4687 				faultAddress - (area ? area->base : 0x0));
4688 
4689 			// We can print a stack trace of the userland thread here.
4690 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
4691 // fault and someone is already waiting for a write lock on the same address
4692 // space. This thread will then try to acquire the lock again and will
4693 // be queued after the writer.
4694 #	if 0
4695 			if (area) {
4696 				struct stack_frame {
4697 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
4698 						struct stack_frame*	previous;
4699 						void*				return_address;
4700 					#else
4701 						// ...
4702 					#warning writeme
4703 					#endif
4704 				} frame;
4705 #		ifdef __INTEL__
4706 				struct iframe* iframe = i386_get_user_iframe();
4707 				if (iframe == NULL)
4708 					panic("iframe is NULL!");
4709 
4710 				status_t status = user_memcpy(&frame, (void*)iframe->ebp,
4711 					sizeof(struct stack_frame));
4712 #		elif defined(__POWERPC__)
4713 				struct iframe* iframe = ppc_get_user_iframe();
4714 				if (iframe == NULL)
4715 					panic("iframe is NULL!");
4716 
4717 				status_t status = user_memcpy(&frame, (void*)iframe->r1,
4718 					sizeof(struct stack_frame));
4719 #		else
4720 #			warning "vm_page_fault() stack trace won't work"
4721 				status = B_ERROR;
4722 #		endif
4723 
4724 				dprintf("stack trace:\n");
4725 				int32 maxFrames = 50;
4726 				while (status == B_OK && --maxFrames >= 0
4727 						&& frame.return_address != NULL) {
4728 					dprintf("  %p", frame.return_address);
4729 					area = vm_area_lookup(addressSpace,
4730 						(addr_t)frame.return_address);
4731 					if (area) {
4732 						dprintf(" (%s + %#lx)", area->name,
4733 							(addr_t)frame.return_address - area->base);
4734 					}
4735 					dprintf("\n");
4736 
4737 					status = user_memcpy(&frame, frame.previous,
4738 						sizeof(struct stack_frame));
4739 				}
4740 			}
4741 #	endif	// 0 (stack trace)
4742 
4743 			rw_lock_read_unlock(&addressSpace->lock);
4744 #endif
4745 
4746 			// TODO: the fault_callback is a temporary solution for vm86
4747 			if (thread->fault_callback == NULL
4748 				|| thread->fault_callback(address, faultAddress, isWrite)) {
4749 				// If the thread has a signal handler for SIGSEGV, we simply
4750 				// send it the signal. Otherwise we notify the user debugger
4751 				// first.
4752 				struct sigaction action;
4753 				if (sigaction(SIGSEGV, NULL, &action) == 0
4754 					&& action.sa_handler != SIG_DFL
4755 					&& action.sa_handler != SIG_IGN) {
4756 					send_signal(thread->id, SIGSEGV);
4757 				} else if (user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4758 						SIGSEGV)) {
4759 					send_signal(thread->id, SIGSEGV);
4760 				}
4761 			}
4762 		}
4763 	}
4764 
4765 	if (addressSpace != NULL)
4766 		vm_put_address_space(addressSpace);
4767 
4768 	return B_HANDLED_INTERRUPT;
4769 }
4770 
4771 
4772 class VMCacheChainLocker {
4773 public:
4774 	VMCacheChainLocker()
4775 		:
4776 		fTopCache(NULL),
4777 		fBottomCache(NULL)
4778 	{
4779 	}
4780 
4781 	void SetTo(VMCache* topCache)
4782 	{
4783 		fTopCache = topCache;
4784 		fBottomCache = topCache;
4785 	}
4786 
4787 	VMCache* LockSourceCache()
4788 	{
4789 		if (fBottomCache == NULL || fBottomCache->source == NULL)
4790 			return NULL;
4791 
4792 		fBottomCache = fBottomCache->source;
4793 		fBottomCache->Lock();
4794 		fBottomCache->AcquireRefLocked();
4795 
4796 		return fBottomCache;
4797 	}
4798 
4799 	void Unlock()
4800 	{
4801 		if (fTopCache == NULL)
4802 			return;
4803 
4804 		VMCache* cache = fTopCache;
4805 		while (cache != NULL) {
4806 			VMCache* nextCache = cache->source;
4807 			cache->ReleaseRefAndUnlock();
4808 
4809 			if (cache == fBottomCache)
4810 				break;
4811 
4812 			cache = nextCache;
4813 		}
4814 
4815 		fTopCache = NULL;
4816 		fBottomCache = NULL;
4817 	}
4818 
4819 private:
4820 	VMCache*	fTopCache;
4821 	VMCache*	fBottomCache;
4822 };
4823 
4824 
4825 struct PageFaultContext {
4826 	AddressSpaceReadLocker	addressSpaceLocker;
4827 	VMCacheChainLocker		cacheChainLocker;
4828 
4829 	vm_translation_map*		map;
4830 	vm_cache*				topCache;
4831 	off_t					cacheOffset;
4832 	bool					isWrite;
4833 
4834 	// return values
4835 	vm_page*				page;
4836 	bool					restart;
4837 
4838 
4839 	PageFaultContext(vm_address_space* addressSpace, bool isWrite)
4840 		:
4841 		addressSpaceLocker(addressSpace, true),
4842 		map(&addressSpace->translation_map),
4843 		isWrite(isWrite)
4844 	{
4845 	}
4846 
4847 	~PageFaultContext()
4848 	{
4849 		UnlockAll();
4850 	}
4851 
4852 	void Prepare(VMCache* topCache, off_t cacheOffset)
4853 	{
4854 		this->topCache = topCache;
4855 		this->cacheOffset = cacheOffset;
4856 		page = NULL;
4857 		restart = false;
4858 
4859 		cacheChainLocker.SetTo(topCache);
4860 	}
4861 
4862 	void UnlockAll()
4863 	{
4864 		topCache = NULL;
4865 		addressSpaceLocker.Unlock();
4866 		cacheChainLocker.Unlock();
4867 	}
4868 };
4869 
4870 
4871 /*!	Gets the page that should be mapped into the area.
4872 	Returns an error code other than \c B_OK, if the page couldn't be found or
4873 	paged in. The locking state of the address space and the caches is undefined
4874 	in that case.
4875 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4876 	had to unlock the address space and all caches and is supposed to be called
4877 	again.
4878 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4879 	found. It is returned in \c context.page. The address space will still be
4880 	locked as well as all caches starting from the top cache to at least the
4881 	cache the page lives in.
4882 */
4883 static inline status_t
4884 fault_get_page(PageFaultContext& context)
4885 {
4886 	vm_cache* cache = context.topCache;
4887 	vm_cache* lastCache = NULL;
4888 	vm_page* page = NULL;
4889 
4890 	while (cache != NULL) {
4891 		// We already hold the lock of the cache at this point.
4892 
4893 		lastCache = cache;
4894 
4895 		for (;;) {
4896 			page = cache->LookupPage(context.cacheOffset);
4897 			if (page == NULL || page->state != PAGE_STATE_BUSY) {
4898 				// Either there is no page or there is one and it is not busy.
4899 				break;
4900 			}
4901 
4902 			// page must be busy -- wait for it to become unbusy
4903 			ConditionVariableEntry entry;
4904 			entry.Add(page);
4905 			context.UnlockAll();
4906 			entry.Wait();
4907 
4908 			// restart the whole process
4909 			context.restart = true;
4910 			return B_OK;
4911 		}
4912 
4913 		if (page != NULL)
4914 			break;
4915 
4916 		// The current cache does not contain the page we're looking for.
4917 
4918 		// see if the backing store has it
4919 		if (cache->HasPage(context.cacheOffset)) {
4920 			// insert a fresh page and mark it busy -- we're going to read it in
4921 			page = vm_page_allocate_page(PAGE_STATE_FREE, true);
4922 			cache->InsertPage(page, context.cacheOffset);
4923 
4924 			ConditionVariable busyCondition;
4925 			busyCondition.Publish(page, "page");
4926 
4927 			// We need to unlock all caches and the address space while reading
4928 			// the page in. Keep a reference to the cache around.
4929 			cache->AcquireRefLocked();
4930 			context.UnlockAll();
4931 
4932 			// read the page in
4933 			iovec vec;
4934 			vec.iov_base = (void*)(page->physical_page_number * B_PAGE_SIZE);
4935 			size_t bytesRead = vec.iov_len = B_PAGE_SIZE;
4936 
4937 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4938 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4939 
4940 			cache->Lock();
4941 
4942 			if (status < B_OK) {
4943 				// on error remove and free the page
4944 				dprintf("reading page from cache %p returned: %s!\n",
4945 					cache, strerror(status));
4946 
4947 				busyCondition.Unpublish();
4948 				cache->RemovePage(page);
4949 				vm_page_set_state(page, PAGE_STATE_FREE);
4950 
4951 				cache->ReleaseRefAndUnlock();
4952 				return status;
4953 			}
4954 
4955 			// mark the page unbusy again
4956 			page->state = PAGE_STATE_ACTIVE;
4957 			busyCondition.Unpublish();
4958 
4959 			// Since we needed to unlock everything temporarily, the area
4960 			// situation might have changed. So we need to restart the whole
4961 			// process.
4962 			cache->ReleaseRefAndUnlock();
4963 			context.restart = true;
4964 			return B_OK;
4965 		}
4966 
4967 		cache = context.cacheChainLocker.LockSourceCache();
4968 	}
4969 
4970 	if (page == NULL) {
4971 		// There was no adequate page, determine the cache for a clean one.
4972 		// Read-only pages come in the deepest cache, only the top most cache
4973 		// may have direct write access.
4974 		cache = context.isWrite ? context.topCache : lastCache;
4975 
4976 		// allocate a clean page
4977 		page = vm_page_allocate_page(PAGE_STATE_CLEAR, true);
4978 		FTRACE(("vm_soft_fault: just allocated page 0x%lx\n",
4979 			page->physical_page_number));
4980 
4981 		// insert the new page into our cache
4982 		cache->InsertPage(page, context.cacheOffset);
4983 
4984 	} else if (page->cache != context.topCache && context.isWrite) {
4985 		// We have a page that has the data we want, but in the wrong cache
4986 		// object so we need to copy it and stick it into the top cache.
4987 		vm_page* sourcePage = page;
4988 
4989 		// TODO: If memory is low, it might be a good idea to steal the page
4990 		// from our source cache -- if possible, that is.
4991 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4992 		page = vm_page_allocate_page(PAGE_STATE_FREE, true);
4993 
4994 		// copy the page
4995 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4996 			sourcePage->physical_page_number * B_PAGE_SIZE);
4997 
4998 		// insert the new page into our cache
4999 		context.topCache->InsertPage(page, context.cacheOffset);
5000 	}
5001 
5002 	context.page = page;
5003 	return B_OK;
5004 }
5005 
5006 
5007 static status_t
5008 vm_soft_fault(vm_address_space* addressSpace, addr_t originalAddress,
5009 	bool isWrite, bool isUser)
5010 {
5011 	FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n",
5012 		thread_get_current_thread_id(), originalAddress, isWrite, isUser));
5013 
5014 	PageFaultContext context(addressSpace, isWrite);
5015 
5016 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
5017 	status_t status = B_OK;
5018 
5019 	atomic_add(&addressSpace->fault_count, 1);
5020 
5021 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
5022 	// the pages upfront makes sure we don't have any cache locked, so that the
5023 	// page daemon/thief can do their job without problems.
5024 	size_t reservePages = 2 + context.map->ops->map_max_pages_need(context.map,
5025 		originalAddress, originalAddress);
5026 	context.addressSpaceLocker.Unlock();
5027 	vm_page_reserve_pages(reservePages);
5028 
5029 	while (true) {
5030 		context.addressSpaceLocker.Lock();
5031 
5032 		// get the area the fault was in
5033 		vm_area* area = vm_area_lookup(addressSpace, address);
5034 		if (area == NULL) {
5035 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
5036 				"space\n", originalAddress);
5037 			TPF(PageFaultError(-1,
5038 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
5039 			status = B_BAD_ADDRESS;
5040 			break;
5041 		}
5042 
5043 		// check permissions
5044 		uint32 protection = get_area_page_protection(area, address);
5045 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
5046 			dprintf("user access on kernel area 0x%lx at %p\n", area->id,
5047 				(void*)originalAddress);
5048 			TPF(PageFaultError(area->id,
5049 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
5050 			status = B_PERMISSION_DENIED;
5051 			break;
5052 		}
5053 		if (isWrite && (protection
5054 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
5055 			dprintf("write access attempted on write-protected area 0x%lx at"
5056 				" %p\n", area->id, (void*)originalAddress);
5057 			TPF(PageFaultError(area->id,
5058 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
5059 			status = B_PERMISSION_DENIED;
5060 			break;
5061 		} else if (!isWrite && (protection
5062 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
5063 			dprintf("read access attempted on read-protected area 0x%lx at"
5064 				" %p\n", area->id, (void*)originalAddress);
5065 			TPF(PageFaultError(area->id,
5066 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
5067 			status = B_PERMISSION_DENIED;
5068 			break;
5069 		}
5070 
5071 		// We have the area, it was a valid access, so let's try to resolve the
5072 		// page fault now.
5073 		// At first, the top most cache from the area is investigated.
5074 
5075 		context.Prepare(vm_area_get_locked_cache(area),
5076 			address - area->base + area->cache_offset);
5077 
5078 		// See if this cache has a fault handler -- this will do all the work
5079 		// for us.
5080 		{
5081 			// Note, since the page fault is resolved with interrupts enabled,
5082 			// the fault handler could be called more than once for the same
5083 			// reason -- the store must take this into account.
5084 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
5085 			if (status != B_BAD_HANDLER)
5086 				break;
5087 		}
5088 
5089 		// The top most cache has no fault handler, so let's see if the cache or
5090 		// its sources already have the page we're searching for (we're going
5091 		// from top to bottom).
5092 		status = fault_get_page(context);
5093 		if (status != B_OK) {
5094 			TPF(PageFaultError(area->id, status));
5095 			break;
5096 		}
5097 
5098 		if (context.restart)
5099 			continue;
5100 
5101 		// All went fine, all there is left to do is to map the page into the
5102 		// address space.
5103 		TPF(PageFaultDone(area->id, context.topCache, context.page->cache,
5104 			context.page));
5105 
5106 		// If the page doesn't reside in the area's cache, we need to make sure
5107 		// it's mapped in read-only, so that we cannot overwrite someone else's
5108 		// data (copy-on-write)
5109 		uint32 newProtection = protection;
5110 		if (context.page->cache != context.topCache && !isWrite)
5111 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
5112 
5113 		bool unmapPage = false;
5114 		bool mapPage = true;
5115 
5116 		// check whether there's already a page mapped at the address
5117 		context.map->ops->lock(context.map);
5118 
5119 		addr_t physicalAddress;
5120 		uint32 flags;
5121 		vm_page* mappedPage;
5122 		if (context.map->ops->query(context.map, address, &physicalAddress,
5123 				&flags) == B_OK
5124 			&& (flags & PAGE_PRESENT) != 0
5125 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5126 				!= NULL) {
5127 			// Yep there's already a page. If it's ours, we can simply adjust
5128 			// its protection. Otherwise we have to unmap it.
5129 			if (mappedPage == context.page) {
5130 				context.map->ops->protect(context.map, address,
5131 					address + (B_PAGE_SIZE - 1), newProtection);
5132 
5133 				mapPage = false;
5134 			} else
5135 				unmapPage = true;
5136 		}
5137 
5138 		context.map->ops->unlock(context.map);
5139 
5140 		if (unmapPage)
5141 			vm_unmap_page(area, address, true);
5142 
5143 		if (mapPage)
5144 			vm_map_page(area, context.page, address, newProtection);
5145 
5146 		break;
5147 	}
5148 
5149 	vm_page_unreserve_pages(reservePages);
5150 
5151 	return status;
5152 }
5153 
5154 
5155 /*! You must have the address space's sem held */
5156 vm_area*
5157 vm_area_lookup(vm_address_space* addressSpace, addr_t address)
5158 {
5159 	vm_area* area;
5160 
5161 	// check the areas list first
5162 	area = addressSpace->area_hint;
5163 	if (area != NULL
5164 		&& area->base <= address
5165 		&& area->base + (area->size - 1) >= address)
5166 		goto found;
5167 
5168 	for (area = addressSpace->areas; area != NULL;
5169 			area = area->address_space_next) {
5170 		if (area->id == RESERVED_AREA_ID)
5171 			continue;
5172 
5173 		if (area->base <= address && area->base + (area->size - 1) >= address)
5174 			break;
5175 	}
5176 
5177 found:
5178 	if (area)
5179 		addressSpace->area_hint = area;
5180 
5181 	return area;
5182 }
5183 
5184 
5185 status_t
5186 vm_get_physical_page(addr_t paddr, addr_t* _vaddr, void** _handle)
5187 {
5188 	return vm_kernel_address_space()->translation_map.ops->get_physical_page(
5189 		paddr, _vaddr, _handle);
5190 }
5191 
5192 status_t
5193 vm_put_physical_page(addr_t vaddr, void* handle)
5194 {
5195 	return vm_kernel_address_space()->translation_map.ops->put_physical_page(
5196 		vaddr, handle);
5197 }
5198 
5199 
5200 status_t
5201 vm_get_physical_page_current_cpu(addr_t paddr, addr_t* _vaddr, void** _handle)
5202 {
5203 	return vm_kernel_address_space()->translation_map.ops
5204 		->get_physical_page_current_cpu(paddr, _vaddr, _handle);
5205 }
5206 
5207 status_t
5208 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
5209 {
5210 	return vm_kernel_address_space()->translation_map.ops
5211 		->put_physical_page_current_cpu(vaddr, handle);
5212 }
5213 
5214 
5215 status_t
5216 vm_get_physical_page_debug(addr_t paddr, addr_t* _vaddr, void** _handle)
5217 {
5218 	return vm_kernel_address_space()->translation_map.ops
5219 		->get_physical_page_debug(paddr, _vaddr, _handle);
5220 }
5221 
5222 status_t
5223 vm_put_physical_page_debug(addr_t vaddr, void* handle)
5224 {
5225 	return vm_kernel_address_space()->translation_map.ops
5226 		->put_physical_page_debug(vaddr, handle);
5227 }
5228 
5229 
5230 void
5231 vm_get_info(system_memory_info* info)
5232 {
5233 	swap_get_info(info);
5234 
5235 	info->max_memory = vm_page_num_pages() * B_PAGE_SIZE;
5236 	info->page_faults = sPageFaults;
5237 
5238 	MutexLocker locker(sAvailableMemoryLock);
5239 	info->free_memory = sAvailableMemory;
5240 	info->needed_memory = sNeededMemory;
5241 }
5242 
5243 
5244 uint32
5245 vm_num_page_faults(void)
5246 {
5247 	return sPageFaults;
5248 }
5249 
5250 
5251 off_t
5252 vm_available_memory(void)
5253 {
5254 	MutexLocker locker(sAvailableMemoryLock);
5255 	return sAvailableMemory;
5256 }
5257 
5258 
5259 off_t
5260 vm_available_not_needed_memory(void)
5261 {
5262 	MutexLocker locker(sAvailableMemoryLock);
5263 	return sAvailableMemory - sNeededMemory;
5264 }
5265 
5266 
5267 void
5268 vm_unreserve_memory(size_t amount)
5269 {
5270 	mutex_lock(&sAvailableMemoryLock);
5271 
5272 	sAvailableMemory += amount;
5273 
5274 	mutex_unlock(&sAvailableMemoryLock);
5275 }
5276 
5277 
5278 status_t
5279 vm_try_reserve_memory(size_t amount, bigtime_t timeout)
5280 {
5281 	MutexLocker locker(sAvailableMemoryLock);
5282 
5283 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5284 
5285 	if (sAvailableMemory >= amount) {
5286 		sAvailableMemory -= amount;
5287 		return B_OK;
5288 	}
5289 
5290 	if (timeout <= 0)
5291 		return B_NO_MEMORY;
5292 
5293 	// turn timeout into an absolute timeout
5294 	timeout += system_time();
5295 
5296 	// loop until we've got the memory or the timeout occurs
5297 	do {
5298 		sNeededMemory += amount;
5299 
5300 		// call the low resource manager
5301 		locker.Unlock();
5302 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5303 			B_ABSOLUTE_TIMEOUT, timeout);
5304 		locker.Lock();
5305 
5306 		sNeededMemory -= amount;
5307 
5308 		if (sAvailableMemory >= amount) {
5309 			sAvailableMemory -= amount;
5310 			return B_OK;
5311 		}
5312 	} while (timeout > system_time());
5313 
5314 	return B_NO_MEMORY;
5315 }
5316 
5317 
5318 status_t
5319 vm_set_area_memory_type(area_id id, addr_t physicalBase, uint32 type)
5320 {
5321 	AddressSpaceReadLocker locker;
5322 	vm_area* area;
5323 	status_t status = locker.SetFromArea(id, area);
5324 	if (status != B_OK)
5325 		return status;
5326 
5327 	return arch_vm_set_memory_type(area, physicalBase, type);
5328 }
5329 
5330 
5331 /*!	This function enforces some protection properties:
5332 	 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well
5333 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5334 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
5335 	   and B_KERNEL_WRITE_AREA.
5336 */
5337 static void
5338 fix_protection(uint32* protection)
5339 {
5340 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5341 		if ((*protection & B_USER_PROTECTION) == 0
5342 			|| (*protection & B_WRITE_AREA) != 0)
5343 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5344 		else
5345 			*protection |= B_KERNEL_READ_AREA;
5346 	}
5347 }
5348 
5349 
5350 static void
5351 fill_area_info(struct vm_area* area, area_info* info, size_t size)
5352 {
5353 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5354 	info->area = area->id;
5355 	info->address = (void*)area->base;
5356 	info->size = area->size;
5357 	info->protection = area->protection;
5358 	info->lock = B_FULL_LOCK;
5359 	info->team = area->address_space->id;
5360 	info->copy_count = 0;
5361 	info->in_count = 0;
5362 	info->out_count = 0;
5363 		// TODO: retrieve real values here!
5364 
5365 	vm_cache* cache = vm_area_get_locked_cache(area);
5366 
5367 	// Note, this is a simplification; the cache could be larger than this area
5368 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5369 
5370 	vm_area_put_locked_cache(cache);
5371 }
5372 
5373 
5374 /*!
5375 	Tests whether or not the area that contains the specified address
5376 	needs any kind of locking, and actually exists.
5377 	Used by both lock_memory() and unlock_memory().
5378 */
5379 static status_t
5380 test_lock_memory(vm_address_space* addressSpace, addr_t address,
5381 	bool& needsLocking)
5382 {
5383 	rw_lock_read_lock(&addressSpace->lock);
5384 
5385 	vm_area* area = vm_area_lookup(addressSpace, address);
5386 	if (area != NULL) {
5387 		// This determines if we need to lock the memory at all
5388 		needsLocking = area->cache_type != CACHE_TYPE_NULL
5389 			&& area->cache_type != CACHE_TYPE_DEVICE
5390 			&& area->wiring != B_FULL_LOCK
5391 			&& area->wiring != B_CONTIGUOUS;
5392 	}
5393 
5394 	rw_lock_read_unlock(&addressSpace->lock);
5395 
5396 	if (area == NULL)
5397 		return B_BAD_ADDRESS;
5398 
5399 	return B_OK;
5400 }
5401 
5402 
5403 static status_t
5404 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5405 {
5406 	// is newSize a multiple of B_PAGE_SIZE?
5407 	if (newSize & (B_PAGE_SIZE - 1))
5408 		return B_BAD_VALUE;
5409 
5410 	// lock all affected address spaces and the cache
5411 	vm_area* area;
5412 	vm_cache* cache;
5413 
5414 	MultiAddressSpaceLocker locker;
5415 	status_t status = locker.AddAreaCacheAndLock(areaID, true, true, area,
5416 		&cache);
5417 	if (status != B_OK)
5418 		return status;
5419 	AreaCacheLocker cacheLocker(cache);	// already locked
5420 
5421 	// enforce restrictions
5422 	if (!kernel) {
5423 		if ((area->protection & B_KERNEL_AREA) != 0)
5424 			return B_NOT_ALLOWED;
5425 		// TODO: Enforce all restrictions (team, etc.)!
5426 	}
5427 
5428 	size_t oldSize = area->size;
5429 	if (newSize == oldSize)
5430 		return B_OK;
5431 
5432 	// Resize all areas of this area's cache
5433 
5434 	if (cache->type != CACHE_TYPE_RAM)
5435 		return B_NOT_ALLOWED;
5436 
5437 	if (oldSize < newSize) {
5438 		// We need to check if all areas of this cache can be resized
5439 
5440 		for (vm_area* current = cache->areas; current != NULL;
5441 				current = current->cache_next) {
5442 			vm_area* next = current->address_space_next;
5443 			if (next != NULL && next->base <= (current->base + newSize)) {
5444 				// If the area was created inside a reserved area, it can
5445 				// also be resized in that area
5446 				// TODO: if there is free space after the reserved area, it could
5447 				// be used as well...
5448 				if (next->id == RESERVED_AREA_ID
5449 					&& next->cache_offset <= current->base
5450 					&& next->base - 1 + next->size
5451 						>= current->base - 1 + newSize)
5452 					continue;
5453 
5454 				return B_ERROR;
5455 			}
5456 		}
5457 	}
5458 
5459 	// Okay, looks good so far, so let's do it
5460 
5461 	if (oldSize < newSize) {
5462 		// Growing the cache can fail, so we do it first.
5463 		status = cache->Resize(cache->virtual_base + newSize);
5464 		if (status != B_OK)
5465 			return status;
5466 	}
5467 
5468 	for (vm_area* current = cache->areas; current != NULL;
5469 			current = current->cache_next) {
5470 		vm_area* next = current->address_space_next;
5471 		if (next != NULL && next->base <= (current->base + newSize)) {
5472 			if (next->id == RESERVED_AREA_ID
5473 				&& next->cache_offset <= current->base
5474 				&& next->base - 1 + next->size >= current->base - 1 + newSize) {
5475 				// resize reserved area
5476 				addr_t offset = current->base + newSize - next->base;
5477 				if (next->size <= offset) {
5478 					current->address_space_next = next->address_space_next;
5479 					free(next);
5480 				} else {
5481 					next->size -= offset;
5482 					next->base += offset;
5483 				}
5484 			} else {
5485 				panic("resize situation for area %p has changed although we "
5486 					"should have the address space lock", current);
5487 				status = B_ERROR;
5488 				break;
5489 			}
5490 		}
5491 
5492 		current->size = newSize;
5493 
5494 		// We also need to unmap all pages beyond the new size, if the area has
5495 		// shrinked
5496 		if (newSize < oldSize) {
5497 			vm_unmap_pages(current, current->base + newSize, oldSize - newSize,
5498 				false);
5499 		}
5500 	}
5501 
5502 	// shrinking the cache can't fail, so we do it now
5503 	if (status == B_OK && newSize < oldSize)
5504 		status = cache->Resize(cache->virtual_base + newSize);
5505 
5506 	if (status < B_OK) {
5507 		// This shouldn't really be possible, but hey, who knows
5508 		for (vm_area* current = cache->areas; current != NULL;
5509 				current = current->cache_next) {
5510 			current->size = oldSize;
5511 		}
5512 
5513 		cache->Resize(cache->virtual_base + oldSize);
5514 	}
5515 
5516 	// TODO: we must honour the lock restrictions of this area
5517 	return status;
5518 }
5519 
5520 
5521 status_t
5522 vm_memset_physical(addr_t address, int value, size_t length)
5523 {
5524 	return vm_kernel_address_space()->translation_map.ops->memset_physical(
5525 		address, value, length);
5526 }
5527 
5528 
5529 status_t
5530 vm_memcpy_from_physical(void* to, addr_t from, size_t length, bool user)
5531 {
5532 	return vm_kernel_address_space()->translation_map.ops->memcpy_from_physical(
5533 		to, from, length, user);
5534 }
5535 
5536 
5537 status_t
5538 vm_memcpy_to_physical(addr_t to, const void* _from, size_t length, bool user)
5539 {
5540 	return vm_kernel_address_space()->translation_map.ops->memcpy_to_physical(
5541 		to, _from, length, user);
5542 }
5543 
5544 
5545 void
5546 vm_memcpy_physical_page(addr_t to, addr_t from)
5547 {
5548 	return vm_kernel_address_space()->translation_map.ops->memcpy_physical_page(
5549 		to, from);
5550 }
5551 
5552 
5553 //	#pragma mark - kernel public API
5554 
5555 
5556 status_t
5557 user_memcpy(void* to, const void* from, size_t size)
5558 {
5559 	// don't allow address overflows
5560 	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
5561 		return B_BAD_ADDRESS;
5562 
5563 	if (arch_cpu_user_memcpy(to, from, size,
5564 			&thread_get_current_thread()->fault_handler) < B_OK)
5565 		return B_BAD_ADDRESS;
5566 
5567 	return B_OK;
5568 }
5569 
5570 
5571 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5572 	the string in \a to, NULL-terminating the result.
5573 
5574 	\param to Pointer to the destination C-string.
5575 	\param from Pointer to the source C-string.
5576 	\param size Size in bytes of the string buffer pointed to by \a to.
5577 
5578 	\return strlen(\a from).
5579 */
5580 ssize_t
5581 user_strlcpy(char* to, const char* from, size_t size)
5582 {
5583 	if (size == 0)
5584 		return 0;
5585 	if (from == NULL || to == NULL)
5586 		return B_BAD_ADDRESS;
5587 
5588 	// limit size to avoid address overflows
5589 	size_t maxSize = std::min(size,
5590 		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
5591 		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
5592 		// the source address might still overflow.
5593 
5594 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize,
5595 		&thread_get_current_thread()->fault_handler);
5596 
5597 	// If we hit the address overflow boundary, fail.
5598 	if (result >= 0 && (size_t)result >= maxSize && maxSize < size)
5599 		return B_BAD_ADDRESS;
5600 
5601 	return result;
5602 }
5603 
5604 
5605 status_t
5606 user_memset(void* s, char c, size_t count)
5607 {
5608 	// don't allow address overflows
5609 	if ((addr_t)s + count < (addr_t)s)
5610 		return B_BAD_ADDRESS;
5611 
5612 	if (arch_cpu_user_memset(s, c, count,
5613 			&thread_get_current_thread()->fault_handler) < B_OK)
5614 		return B_BAD_ADDRESS;
5615 
5616 	return B_OK;
5617 }
5618 
5619 
5620 status_t
5621 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5622 {
5623 	vm_address_space* addressSpace = NULL;
5624 	struct vm_translation_map* map;
5625 	addr_t unalignedBase = (addr_t)address;
5626 	addr_t end = unalignedBase + numBytes;
5627 	addr_t base = ROUNDDOWN(unalignedBase, B_PAGE_SIZE);
5628 	bool isUser = IS_USER_ADDRESS(address);
5629 	bool needsLocking = true;
5630 
5631 	if (isUser) {
5632 		if (team == B_CURRENT_TEAM)
5633 			addressSpace = vm_get_current_user_address_space();
5634 		else
5635 			addressSpace = vm_get_address_space(team);
5636 	} else
5637 		addressSpace = vm_get_kernel_address_space();
5638 	if (addressSpace == NULL)
5639 		return B_ERROR;
5640 
5641 	// test if we're on an area that allows faults at all
5642 
5643 	map = &addressSpace->translation_map;
5644 
5645 	status_t status = test_lock_memory(addressSpace, base, needsLocking);
5646 	if (status < B_OK)
5647 		goto out;
5648 	if (!needsLocking)
5649 		goto out;
5650 
5651 	for (; base < end; base += B_PAGE_SIZE) {
5652 		addr_t physicalAddress;
5653 		uint32 protection;
5654 		status_t status;
5655 
5656 		map->ops->lock(map);
5657 		status = map->ops->query(map, base, &physicalAddress, &protection);
5658 		map->ops->unlock(map);
5659 
5660 		if (status < B_OK)
5661 			goto out;
5662 
5663 		if ((protection & PAGE_PRESENT) != 0) {
5664 			// if B_READ_DEVICE is set, the caller intents to write to the locked
5665 			// memory, so if it hasn't been mapped writable, we'll try the soft
5666 			// fault anyway
5667 			if ((flags & B_READ_DEVICE) == 0
5668 				|| (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
5669 				// update wiring
5670 				vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
5671 				if (page == NULL)
5672 					panic("couldn't lookup physical page just allocated\n");
5673 
5674 				increment_page_wired_count(page);
5675 				continue;
5676 			}
5677 		}
5678 
5679 		status = vm_soft_fault(addressSpace, base, (flags & B_READ_DEVICE) != 0,
5680 			isUser);
5681 		if (status != B_OK)	{
5682 			dprintf("lock_memory(address = %p, numBytes = %lu, flags = %lu) "
5683 				"failed: %s\n", (void*)unalignedBase, numBytes, flags,
5684 				strerror(status));
5685 			goto out;
5686 		}
5687 
5688 		// TODO: Here's a race condition. We should probably add a parameter
5689 		// to vm_soft_fault() that would cause the page's wired count to be
5690 		// incremented immediately.
5691 		// TODO: After memory has been locked in an area, we need to prevent the
5692 		// area from being deleted, resized, cut, etc. That could be done using
5693 		// a "locked pages" count in vm_area, and maybe a condition variable, if
5694 		// we want to allow waiting for the area to become eligible for these
5695 		// operations again.
5696 
5697 		map->ops->lock(map);
5698 		status = map->ops->query(map, base, &physicalAddress, &protection);
5699 		map->ops->unlock(map);
5700 
5701 		if (status < B_OK)
5702 			goto out;
5703 
5704 		// update wiring
5705 		vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
5706 		if (page == NULL)
5707 			panic("couldn't lookup physical page");
5708 
5709 		increment_page_wired_count(page);
5710 			// TODO: needs to be atomic on all platforms!
5711 	}
5712 
5713 out:
5714 	vm_put_address_space(addressSpace);
5715 	return status;
5716 }
5717 
5718 
5719 status_t
5720 lock_memory(void* address, size_t numBytes, uint32 flags)
5721 {
5722 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5723 }
5724 
5725 
5726 status_t
5727 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5728 {
5729 	vm_address_space* addressSpace = NULL;
5730 	struct vm_translation_map* map;
5731 	addr_t unalignedBase = (addr_t)address;
5732 	addr_t end = unalignedBase + numBytes;
5733 	addr_t base = ROUNDDOWN(unalignedBase, B_PAGE_SIZE);
5734 	bool needsLocking = true;
5735 
5736 	if (IS_USER_ADDRESS(address)) {
5737 		if (team == B_CURRENT_TEAM)
5738 			addressSpace = vm_get_current_user_address_space();
5739 		else
5740 			addressSpace = vm_get_address_space(team);
5741 	} else
5742 		addressSpace = vm_get_kernel_address_space();
5743 	if (addressSpace == NULL)
5744 		return B_ERROR;
5745 
5746 	map = &addressSpace->translation_map;
5747 
5748 	status_t status = test_lock_memory(addressSpace, base, needsLocking);
5749 	if (status < B_OK)
5750 		goto out;
5751 	if (!needsLocking)
5752 		goto out;
5753 
5754 	for (; base < end; base += B_PAGE_SIZE) {
5755 		map->ops->lock(map);
5756 
5757 		addr_t physicalAddress;
5758 		uint32 protection;
5759 		status = map->ops->query(map, base, &physicalAddress,
5760 			&protection);
5761 
5762 		map->ops->unlock(map);
5763 
5764 		if (status < B_OK)
5765 			goto out;
5766 		if ((protection & PAGE_PRESENT) == 0)
5767 			panic("calling unlock_memory() on unmapped memory!");
5768 
5769 		// update wiring
5770 		vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
5771 		if (page == NULL)
5772 			panic("couldn't lookup physical page");
5773 
5774 		decrement_page_wired_count(page);
5775 	}
5776 
5777 out:
5778 	vm_put_address_space(addressSpace);
5779 	return status;
5780 }
5781 
5782 
5783 status_t
5784 unlock_memory(void* address, size_t numBytes, uint32 flags)
5785 {
5786 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5787 }
5788 
5789 
5790 /*!	Similar to get_memory_map(), but also allows to specify the address space
5791 	for the memory in question and has a saner semantics.
5792 	Returns \c B_OK when the complete range could be translated or
5793 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5794 	case the actual number of entries is written to \c *_numEntries. Any other
5795 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5796 	in this case.
5797 */
5798 status_t
5799 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5800 	physical_entry* table, uint32* _numEntries)
5801 {
5802 	uint32 numEntries = *_numEntries;
5803 	*_numEntries = 0;
5804 
5805 	vm_address_space* addressSpace;
5806 	addr_t virtualAddress = (addr_t)address;
5807 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5808 	addr_t physicalAddress;
5809 	status_t status = B_OK;
5810 	int32 index = -1;
5811 	addr_t offset = 0;
5812 	bool interrupts = are_interrupts_enabled();
5813 
5814 	TRACE(("get_memory_map_etc(%ld, %p, %lu bytes, %ld entries)\n", team,
5815 		address, numBytes, numEntries));
5816 
5817 	if (numEntries == 0 || numBytes == 0)
5818 		return B_BAD_VALUE;
5819 
5820 	// in which address space is the address to be found?
5821 	if (IS_USER_ADDRESS(virtualAddress)) {
5822 		if (team == B_CURRENT_TEAM)
5823 			addressSpace = vm_get_current_user_address_space();
5824 		else
5825 			addressSpace = vm_get_address_space(team);
5826 	} else
5827 		addressSpace = vm_get_kernel_address_space();
5828 
5829 	if (addressSpace == NULL)
5830 		return B_ERROR;
5831 
5832 	vm_translation_map* map = &addressSpace->translation_map;
5833 
5834 	if (interrupts)
5835 		map->ops->lock(map);
5836 
5837 	while (offset < numBytes) {
5838 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5839 		uint32 flags;
5840 
5841 		if (interrupts) {
5842 			status = map->ops->query(map, (addr_t)address + offset,
5843 				&physicalAddress, &flags);
5844 		} else {
5845 			status = map->ops->query_interrupt(map, (addr_t)address + offset,
5846 				&physicalAddress, &flags);
5847 		}
5848 		if (status < B_OK)
5849 			break;
5850 		if ((flags & PAGE_PRESENT) == 0) {
5851 			panic("get_memory_map() called on unmapped memory!");
5852 			return B_BAD_ADDRESS;
5853 		}
5854 
5855 		if (index < 0 && pageOffset > 0) {
5856 			physicalAddress += pageOffset;
5857 			if (bytes > B_PAGE_SIZE - pageOffset)
5858 				bytes = B_PAGE_SIZE - pageOffset;
5859 		}
5860 
5861 		// need to switch to the next physical_entry?
5862 		if (index < 0 || (addr_t)table[index].address
5863 				!= physicalAddress - table[index].size) {
5864 			if ((uint32)++index + 1 > numEntries) {
5865 				// table to small
5866 				status = B_BUFFER_OVERFLOW;
5867 				break;
5868 			}
5869 			table[index].address = (void*)physicalAddress;
5870 			table[index].size = bytes;
5871 		} else {
5872 			// page does fit in current entry
5873 			table[index].size += bytes;
5874 		}
5875 
5876 		offset += bytes;
5877 	}
5878 
5879 	if (interrupts)
5880 		map->ops->unlock(map);
5881 
5882 	if (status != B_OK)
5883 		return status;
5884 
5885 	if ((uint32)index + 1 > numEntries) {
5886 		*_numEntries = index;
5887 		return B_BUFFER_OVERFLOW;
5888 	}
5889 
5890 	*_numEntries = index + 1;
5891 	return B_OK;
5892 }
5893 
5894 
5895 /*!	According to the BeBook, this function should always succeed.
5896 	This is no longer the case.
5897 */
5898 long
5899 get_memory_map(const void* address, ulong numBytes, physical_entry* table,
5900 	long numEntries)
5901 {
5902 	uint32 entriesRead = numEntries;
5903 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5904 		table, &entriesRead);
5905 	if (error != B_OK)
5906 		return error;
5907 
5908 	// close the entry list
5909 
5910 	// if it's only one entry, we will silently accept the missing ending
5911 	if (numEntries == 1)
5912 		return B_OK;
5913 
5914 	if (entriesRead + 1 > (uint32)numEntries)
5915 		return B_BUFFER_OVERFLOW;
5916 
5917 	table[entriesRead].address = NULL;
5918 	table[entriesRead].size = 0;
5919 
5920 	return B_OK;
5921 }
5922 
5923 
5924 area_id
5925 area_for(void* address)
5926 {
5927 	return vm_area_for((addr_t)address, true);
5928 }
5929 
5930 
5931 area_id
5932 find_area(const char* name)
5933 {
5934 	rw_lock_read_lock(&sAreaHashLock);
5935 	struct hash_iterator iterator;
5936 	hash_open(sAreaHash, &iterator);
5937 
5938 	vm_area* area;
5939 	area_id id = B_NAME_NOT_FOUND;
5940 	while ((area = (vm_area*)hash_next(sAreaHash, &iterator)) != NULL) {
5941 		if (area->id == RESERVED_AREA_ID)
5942 			continue;
5943 
5944 		if (!strcmp(area->name, name)) {
5945 			id = area->id;
5946 			break;
5947 		}
5948 	}
5949 
5950 	hash_close(sAreaHash, &iterator, false);
5951 	rw_lock_read_unlock(&sAreaHashLock);
5952 
5953 	return id;
5954 }
5955 
5956 
5957 status_t
5958 _get_area_info(area_id id, area_info* info, size_t size)
5959 {
5960 	if (size != sizeof(area_info) || info == NULL)
5961 		return B_BAD_VALUE;
5962 
5963 	AddressSpaceReadLocker locker;
5964 	vm_area* area;
5965 	status_t status = locker.SetFromArea(id, area);
5966 	if (status != B_OK)
5967 		return status;
5968 
5969 	fill_area_info(area, info, size);
5970 	return B_OK;
5971 }
5972 
5973 
5974 status_t
5975 _get_next_area_info(team_id team, int32* cookie, area_info* info, size_t size)
5976 {
5977 	addr_t nextBase = *(addr_t*)cookie;
5978 
5979 	// we're already through the list
5980 	if (nextBase == (addr_t)-1)
5981 		return B_ENTRY_NOT_FOUND;
5982 
5983 	if (team == B_CURRENT_TEAM)
5984 		team = team_get_current_team_id();
5985 
5986 	AddressSpaceReadLocker locker(team);
5987 	if (!locker.IsLocked())
5988 		return B_BAD_TEAM_ID;
5989 
5990 	vm_area* area;
5991 	for (area = locker.AddressSpace()->areas; area != NULL;
5992 			area = area->address_space_next) {
5993 		if (area->id == RESERVED_AREA_ID)
5994 			continue;
5995 
5996 		if (area->base > nextBase)
5997 			break;
5998 	}
5999 
6000 	if (area == NULL) {
6001 		nextBase = (addr_t)-1;
6002 		return B_ENTRY_NOT_FOUND;
6003 	}
6004 
6005 	fill_area_info(area, info, size);
6006 	*cookie = (int32)(area->base);
6007 
6008 	return B_OK;
6009 }
6010 
6011 
6012 status_t
6013 set_area_protection(area_id area, uint32 newProtection)
6014 {
6015 	fix_protection(&newProtection);
6016 
6017 	return vm_set_area_protection(vm_kernel_address_space_id(), area,
6018 		newProtection, true);
6019 }
6020 
6021 
6022 status_t
6023 resize_area(area_id areaID, size_t newSize)
6024 {
6025 	return vm_resize_area(areaID, newSize, true);
6026 }
6027 
6028 
6029 /*!	Transfers the specified area to a new team. The caller must be the owner
6030 	of the area (not yet enforced but probably should be).
6031 */
6032 area_id
6033 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6034 	bool kernel)
6035 {
6036 	area_info info;
6037 	status_t status = get_area_info(id, &info);
6038 	if (status != B_OK)
6039 		return status;
6040 
6041 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6042 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6043 	if (clonedArea < 0)
6044 		return clonedArea;
6045 
6046 	status = vm_delete_area(info.team, id, kernel);
6047 	if (status != B_OK) {
6048 		vm_delete_area(target, clonedArea, kernel);
6049 		return status;
6050 	}
6051 
6052 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6053 
6054 	return clonedArea;
6055 }
6056 
6057 
6058 area_id
6059 map_physical_memory(const char* name, void* physicalAddress, size_t numBytes,
6060 	uint32 addressSpec, uint32 protection, void** _virtualAddress)
6061 {
6062 	if (!arch_vm_supports_protection(protection))
6063 		return B_NOT_SUPPORTED;
6064 
6065 	fix_protection(&protection);
6066 
6067 	return vm_map_physical_memory(vm_kernel_address_space_id(), name,
6068 		_virtualAddress, addressSpec, numBytes, protection,
6069 		(addr_t)physicalAddress);
6070 }
6071 
6072 
6073 area_id
6074 clone_area(const char* name, void** _address, uint32 addressSpec,
6075 	uint32 protection, area_id source)
6076 {
6077 	if ((protection & B_KERNEL_PROTECTION) == 0)
6078 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6079 
6080 	return vm_clone_area(vm_kernel_address_space_id(), name, _address,
6081 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6082 }
6083 
6084 
6085 area_id
6086 create_area_etc(team_id team, const char* name, void** address,
6087 	uint32 addressSpec, uint32 size, uint32 lock, uint32 protection,
6088 	addr_t physicalAddress, uint32 flags)
6089 {
6090 	fix_protection(&protection);
6091 
6092 	return vm_create_anonymous_area(team, (char*)name, address, addressSpec,
6093 		size, lock, protection, physicalAddress, flags, true);
6094 }
6095 
6096 
6097 area_id
6098 create_area(const char* name, void** _address, uint32 addressSpec, size_t size,
6099 	uint32 lock, uint32 protection)
6100 {
6101 	fix_protection(&protection);
6102 
6103 	return vm_create_anonymous_area(vm_kernel_address_space_id(), (char*)name,
6104 		_address, addressSpec, size, lock, protection, 0, 0, true);
6105 }
6106 
6107 
6108 status_t
6109 delete_area(area_id area)
6110 {
6111 	return vm_delete_area(vm_kernel_address_space_id(), area, true);
6112 }
6113 
6114 
6115 //	#pragma mark - Userland syscalls
6116 
6117 
6118 status_t
6119 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6120 	addr_t size)
6121 {
6122 	// filter out some unavailable values (for userland)
6123 	switch (addressSpec) {
6124 		case B_ANY_KERNEL_ADDRESS:
6125 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6126 			return B_BAD_VALUE;
6127 	}
6128 
6129 	addr_t address;
6130 
6131 	if (!IS_USER_ADDRESS(userAddress)
6132 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6133 		return B_BAD_ADDRESS;
6134 
6135 	status_t status = vm_reserve_address_range(
6136 		vm_current_user_address_space_id(), (void**)&address, addressSpec, size,
6137 		RESERVED_AVOID_BASE);
6138 	if (status != B_OK)
6139 		return status;
6140 
6141 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6142 		vm_unreserve_address_range(vm_current_user_address_space_id(),
6143 			(void*)address, size);
6144 		return B_BAD_ADDRESS;
6145 	}
6146 
6147 	return B_OK;
6148 }
6149 
6150 
6151 status_t
6152 _user_unreserve_address_range(addr_t address, addr_t size)
6153 {
6154 	return vm_unreserve_address_range(vm_current_user_address_space_id(),
6155 		(void*)address, size);
6156 }
6157 
6158 
6159 area_id
6160 _user_area_for(void* address)
6161 {
6162 	return vm_area_for((addr_t)address, false);
6163 }
6164 
6165 
6166 area_id
6167 _user_find_area(const char* userName)
6168 {
6169 	char name[B_OS_NAME_LENGTH];
6170 
6171 	if (!IS_USER_ADDRESS(userName)
6172 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6173 		return B_BAD_ADDRESS;
6174 
6175 	return find_area(name);
6176 }
6177 
6178 
6179 status_t
6180 _user_get_area_info(area_id area, area_info* userInfo)
6181 {
6182 	if (!IS_USER_ADDRESS(userInfo))
6183 		return B_BAD_ADDRESS;
6184 
6185 	area_info info;
6186 	status_t status = get_area_info(area, &info);
6187 	if (status < B_OK)
6188 		return status;
6189 
6190 	// TODO: do we want to prevent userland from seeing kernel protections?
6191 	//info.protection &= B_USER_PROTECTION;
6192 
6193 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6194 		return B_BAD_ADDRESS;
6195 
6196 	return status;
6197 }
6198 
6199 
6200 status_t
6201 _user_get_next_area_info(team_id team, int32* userCookie, area_info* userInfo)
6202 {
6203 	int32 cookie;
6204 
6205 	if (!IS_USER_ADDRESS(userCookie)
6206 		|| !IS_USER_ADDRESS(userInfo)
6207 		|| user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK)
6208 		return B_BAD_ADDRESS;
6209 
6210 	area_info info;
6211 	status_t status = _get_next_area_info(team, &cookie, &info,
6212 		sizeof(area_info));
6213 	if (status != B_OK)
6214 		return status;
6215 
6216 	//info.protection &= B_USER_PROTECTION;
6217 
6218 	if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK
6219 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6220 		return B_BAD_ADDRESS;
6221 
6222 	return status;
6223 }
6224 
6225 
6226 status_t
6227 _user_set_area_protection(area_id area, uint32 newProtection)
6228 {
6229 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6230 		return B_BAD_VALUE;
6231 
6232 	fix_protection(&newProtection);
6233 
6234 	return vm_set_area_protection(vm_current_user_address_space_id(), area,
6235 		newProtection, false);
6236 }
6237 
6238 
6239 status_t
6240 _user_resize_area(area_id area, size_t newSize)
6241 {
6242 	// TODO: Since we restrict deleting of areas to those owned by the team,
6243 	// we should also do that for resizing (check other functions, too).
6244 	return vm_resize_area(area, newSize, false);
6245 }
6246 
6247 
6248 area_id
6249 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6250 	team_id target)
6251 {
6252 	// filter out some unavailable values (for userland)
6253 	switch (addressSpec) {
6254 		case B_ANY_KERNEL_ADDRESS:
6255 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6256 			return B_BAD_VALUE;
6257 	}
6258 
6259 	void* address;
6260 	if (!IS_USER_ADDRESS(userAddress)
6261 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6262 		return B_BAD_ADDRESS;
6263 
6264 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6265 	if (newArea < B_OK)
6266 		return newArea;
6267 
6268 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6269 		return B_BAD_ADDRESS;
6270 
6271 	return newArea;
6272 }
6273 
6274 
6275 area_id
6276 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6277 	uint32 protection, area_id sourceArea)
6278 {
6279 	char name[B_OS_NAME_LENGTH];
6280 	void* address;
6281 
6282 	// filter out some unavailable values (for userland)
6283 	switch (addressSpec) {
6284 		case B_ANY_KERNEL_ADDRESS:
6285 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6286 			return B_BAD_VALUE;
6287 	}
6288 	if ((protection & ~B_USER_PROTECTION) != 0)
6289 		return B_BAD_VALUE;
6290 
6291 	if (!IS_USER_ADDRESS(userName)
6292 		|| !IS_USER_ADDRESS(userAddress)
6293 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6294 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6295 		return B_BAD_ADDRESS;
6296 
6297 	fix_protection(&protection);
6298 
6299 	area_id clonedArea = vm_clone_area(vm_current_user_address_space_id(), name,
6300 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6301 		false);
6302 	if (clonedArea < B_OK)
6303 		return clonedArea;
6304 
6305 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6306 		delete_area(clonedArea);
6307 		return B_BAD_ADDRESS;
6308 	}
6309 
6310 	return clonedArea;
6311 }
6312 
6313 
6314 area_id
6315 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6316 	size_t size, uint32 lock, uint32 protection)
6317 {
6318 	char name[B_OS_NAME_LENGTH];
6319 	void* address;
6320 
6321 	// filter out some unavailable values (for userland)
6322 	switch (addressSpec) {
6323 		case B_ANY_KERNEL_ADDRESS:
6324 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6325 			return B_BAD_VALUE;
6326 	}
6327 	if ((protection & ~B_USER_PROTECTION) != 0)
6328 		return B_BAD_VALUE;
6329 
6330 	if (!IS_USER_ADDRESS(userName)
6331 		|| !IS_USER_ADDRESS(userAddress)
6332 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6333 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6334 		return B_BAD_ADDRESS;
6335 
6336 	if (addressSpec == B_EXACT_ADDRESS
6337 		&& IS_KERNEL_ADDRESS(address))
6338 		return B_BAD_VALUE;
6339 
6340 	fix_protection(&protection);
6341 
6342 	area_id area = vm_create_anonymous_area(vm_current_user_address_space_id(),
6343 		(char*)name, &address, addressSpec, size, lock, protection, 0, 0,
6344 		false);
6345 
6346 	if (area >= B_OK
6347 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6348 		delete_area(area);
6349 		return B_BAD_ADDRESS;
6350 	}
6351 
6352 	return area;
6353 }
6354 
6355 
6356 status_t
6357 _user_delete_area(area_id area)
6358 {
6359 	// Unlike the BeOS implementation, you can now only delete areas
6360 	// that you have created yourself from userland.
6361 	// The documentation to delete_area() explicitly states that this
6362 	// will be restricted in the future, and so it will.
6363 	return vm_delete_area(vm_current_user_address_space_id(), area, false);
6364 }
6365 
6366 
6367 // TODO: create a BeOS style call for this!
6368 
6369 area_id
6370 _user_map_file(const char* userName, void** userAddress, int addressSpec,
6371 	size_t size, int protection, int mapping, bool unmapAddressRange, int fd,
6372 	off_t offset)
6373 {
6374 	char name[B_OS_NAME_LENGTH];
6375 	void* address;
6376 	area_id area;
6377 
6378 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6379 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6380 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6381 		return B_BAD_ADDRESS;
6382 
6383 	if (addressSpec == B_EXACT_ADDRESS) {
6384 		if ((addr_t)address + size < (addr_t)address)
6385 			return B_BAD_VALUE;
6386 		if (!IS_USER_ADDRESS(address)
6387 				|| !IS_USER_ADDRESS((addr_t)address + size)) {
6388 			return B_BAD_ADDRESS;
6389 		}
6390 	}
6391 
6392 	// userland created areas can always be accessed by the kernel
6393 	protection |= B_KERNEL_READ_AREA
6394 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
6395 
6396 	area = _vm_map_file(vm_current_user_address_space_id(), name, &address,
6397 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6398 		false);
6399 	if (area < B_OK)
6400 		return area;
6401 
6402 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6403 		return B_BAD_ADDRESS;
6404 
6405 	return area;
6406 }
6407 
6408 
6409 status_t
6410 _user_unmap_memory(void* _address, size_t size)
6411 {
6412 	addr_t address = (addr_t)_address;
6413 
6414 	// check params
6415 	if (size == 0 || (addr_t)address + size < (addr_t)address)
6416 		return B_BAD_VALUE;
6417 
6418 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6419 		return B_BAD_ADDRESS;
6420 
6421 	// write lock the address space
6422 	AddressSpaceWriteLocker locker;
6423 	status_t status = locker.SetTo(team_get_current_team_id());
6424 	if (status != B_OK)
6425 		return status;
6426 
6427 	// unmap
6428 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6429 }
6430 
6431 
6432 status_t
6433 _user_set_memory_protection(void* _address, size_t size, int protection)
6434 {
6435 	// check address range
6436 	addr_t address = (addr_t)_address;
6437 	size = PAGE_ALIGN(size);
6438 
6439 	if ((address % B_PAGE_SIZE) != 0)
6440 		return B_BAD_VALUE;
6441 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6442 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6443 		// weird error code required by POSIX
6444 		return ENOMEM;
6445 	}
6446 
6447 	// extend and check protection
6448 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
6449 	uint32 actualProtection = protection | B_KERNEL_READ_AREA
6450 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
6451 
6452 	if (!arch_vm_supports_protection(actualProtection))
6453 		return B_NOT_SUPPORTED;
6454 
6455 	// We need to write lock the address space, since we're going to play with
6456 	// the areas.
6457 	AddressSpaceWriteLocker locker;
6458 	status_t status = locker.SetTo(team_get_current_team_id());
6459 	if (status != B_OK)
6460 		return status;
6461 
6462 	// First round: Check whether the whole range is covered by areas and we are
6463 	// allowed to modify them.
6464 	addr_t currentAddress = address;
6465 	size_t sizeLeft = size;
6466 	while (sizeLeft > 0) {
6467 		vm_area* area = vm_area_lookup(locker.AddressSpace(), currentAddress);
6468 		if (area == NULL)
6469 			return B_NO_MEMORY;
6470 
6471 		if ((area->protection & B_KERNEL_AREA) != 0)
6472 			return B_NOT_ALLOWED;
6473 
6474 		// TODO: For (shared) mapped files we should check whether the new
6475 		// protections are compatible with the file permissions. We don't have
6476 		// a way to do that yet, though.
6477 
6478 		addr_t offset = currentAddress - area->base;
6479 		size_t rangeSize = min_c(area->size - offset, sizeLeft);
6480 
6481 		currentAddress += rangeSize;
6482 		sizeLeft -= rangeSize;
6483 	}
6484 
6485 	// Second round: If the protections differ from that of the area, create a
6486 	// page protection array and re-map mapped pages.
6487 	vm_translation_map* map = &locker.AddressSpace()->translation_map;
6488 	currentAddress = address;
6489 	sizeLeft = size;
6490 	while (sizeLeft > 0) {
6491 		vm_area* area = vm_area_lookup(locker.AddressSpace(), currentAddress);
6492 		if (area == NULL)
6493 			return B_NO_MEMORY;
6494 
6495 		addr_t offset = currentAddress - area->base;
6496 		size_t rangeSize = min_c(area->size - offset, sizeLeft);
6497 
6498 		currentAddress += rangeSize;
6499 		sizeLeft -= rangeSize;
6500 
6501 		if (area->page_protections == NULL) {
6502 			if (area->protection == actualProtection)
6503 				continue;
6504 
6505 			// In the page protections we store only the three user protections,
6506 			// so we use 4 bits per page.
6507 			uint32 bytes = (area->size / B_PAGE_SIZE + 1) / 2;
6508 			area->page_protections = (uint8*)malloc(bytes);
6509 			if (area->page_protections == NULL)
6510 				return B_NO_MEMORY;
6511 
6512 			// init the page protections for all pages to that of the area
6513 			uint32 areaProtection = area->protection
6514 				& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
6515 			memset(area->page_protections,
6516 				areaProtection | (areaProtection << 4), bytes);
6517 		}
6518 
6519 		for (addr_t pageAddress = area->base + offset;
6520 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6521 			map->ops->lock(map);
6522 
6523 			set_area_page_protection(area, pageAddress, protection);
6524 
6525 			addr_t physicalAddress;
6526 			uint32 flags;
6527 
6528 			status_t error = map->ops->query(map, pageAddress, &physicalAddress,
6529 				&flags);
6530 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6531 				map->ops->unlock(map);
6532 				continue;
6533 			}
6534 
6535 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6536 			if (page == NULL) {
6537 				panic("area %p looking up page failed for pa 0x%lx\n", area,
6538 					physicalAddress);
6539 				map->ops->unlock(map);
6540 				return B_ERROR;;
6541 			}
6542 
6543 			// If the page is not in the topmost cache and write access is
6544 			// requested, we have to unmap it. Otherwise we can re-map it with
6545 			// the new protection.
6546 			bool unmapPage = page->cache != area->cache
6547 				&& (protection & B_WRITE_AREA) != 0;
6548 
6549 			if (!unmapPage) {
6550 				map->ops->unmap(map, pageAddress,
6551 					pageAddress + B_PAGE_SIZE - 1);
6552 				map->ops->map(map, pageAddress, physicalAddress,
6553 					actualProtection);
6554 			}
6555 
6556 			map->ops->unlock(map);
6557 
6558 			if (unmapPage)
6559 				vm_unmap_page(area, pageAddress, true);
6560 		}
6561 	}
6562 
6563 	return B_OK;
6564 }
6565 
6566 
6567 status_t
6568 _user_sync_memory(void* _address, size_t size, int flags)
6569 {
6570 	addr_t address = (addr_t)_address;
6571 	size = PAGE_ALIGN(size);
6572 
6573 	// check params
6574 	if ((address % B_PAGE_SIZE) != 0)
6575 		return B_BAD_VALUE;
6576 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6577 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6578 		// weird error code required by POSIX
6579 		return ENOMEM;
6580 	}
6581 
6582 	bool writeSync = (flags & MS_SYNC) != 0;
6583 	bool writeAsync = (flags & MS_ASYNC) != 0;
6584 	if (writeSync && writeAsync)
6585 		return B_BAD_VALUE;
6586 
6587 	if (size == 0 || (!writeSync && !writeAsync))
6588 		return B_OK;
6589 
6590 	// iterate through the range and sync all concerned areas
6591 	while (size > 0) {
6592 		// read lock the address space
6593 		AddressSpaceReadLocker locker;
6594 		status_t error = locker.SetTo(team_get_current_team_id());
6595 		if (error != B_OK)
6596 			return error;
6597 
6598 		// get the first area
6599 		vm_area* area = vm_area_lookup(locker.AddressSpace(), address);
6600 		if (area == NULL)
6601 			return B_NO_MEMORY;
6602 
6603 		uint32 offset = address - area->base;
6604 		size_t rangeSize = min_c(area->size - offset, size);
6605 		offset += area->cache_offset;
6606 
6607 		// lock the cache
6608 		AreaCacheLocker cacheLocker(area);
6609 		if (!cacheLocker)
6610 			return B_BAD_VALUE;
6611 		vm_cache* cache = area->cache;
6612 
6613 		locker.Unlock();
6614 
6615 		uint32 firstPage = offset >> PAGE_SHIFT;
6616 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6617 
6618 		// write the pages
6619 		if (cache->type == CACHE_TYPE_VNODE) {
6620 			if (writeSync) {
6621 				// synchronous
6622 				error = vm_page_write_modified_page_range(cache, firstPage,
6623 					endPage);
6624 				if (error != B_OK)
6625 					return error;
6626 			} else {
6627 				// asynchronous
6628 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6629 				// TODO: This is probably not quite what is supposed to happen.
6630 				// Especially when a lot has to be written, it might take ages
6631 				// until it really hits the disk.
6632 			}
6633 		}
6634 
6635 		address += rangeSize;
6636 		size -= rangeSize;
6637 	}
6638 
6639 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6640 	// synchronize multiple mappings of the same file. In our VM they never get
6641 	// out of sync, though, so we don't have to do anything.
6642 
6643 	return B_OK;
6644 }
6645 
6646 
6647 status_t
6648 _user_memory_advice(void* address, size_t size, int advice)
6649 {
6650 	// TODO: Implement!
6651 	return B_OK;
6652 }
6653