xref: /haiku/src/system/kernel/vm/vm.cpp (revision 1b80286772b529a3d6de3bbeb0720c62e6a32fed)
1 /*
2  * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 
10 #include <vm.h>
11 
12 #include <ctype.h>
13 #include <stdlib.h>
14 #include <stdio.h>
15 #include <string.h>
16 #include <sys/mman.h>
17 
18 #include <OS.h>
19 #include <KernelExport.h>
20 
21 #include <AutoDeleter.h>
22 
23 #include <arch/cpu.h>
24 #include <arch/vm.h>
25 #include <boot/elf.h>
26 #include <boot/stage2.h>
27 #include <condition_variable.h>
28 #include <console.h>
29 #include <debug.h>
30 #include <file_cache.h>
31 #include <fs/fd.h>
32 #include <heap.h>
33 #include <int.h>
34 #include <lock.h>
35 #include <low_resource_manager.h>
36 #include <smp.h>
37 #include <system_info.h>
38 #include <thread.h>
39 #include <team.h>
40 #include <tracing.h>
41 #include <util/AutoLock.h>
42 #include <util/khash.h>
43 #include <vm_address_space.h>
44 #include <vm_cache.h>
45 #include <vm_page.h>
46 #include <vm_priv.h>
47 
48 #include "VMAnonymousCache.h"
49 #include "IORequest.h"
50 
51 
52 //#define TRACE_VM
53 //#define TRACE_FAULTS
54 #ifdef TRACE_VM
55 #	define TRACE(x) dprintf x
56 #else
57 #	define TRACE(x) ;
58 #endif
59 #ifdef TRACE_FAULTS
60 #	define FTRACE(x) dprintf x
61 #else
62 #	define FTRACE(x) ;
63 #endif
64 
65 #define ROUNDUP(a, b) (((a) + ((b)-1)) & ~((b)-1))
66 #define ROUNDOWN(a, b) (((a) / (b)) * (b))
67 
68 
69 class AddressSpaceReadLocker {
70 public:
71 	AddressSpaceReadLocker(team_id team);
72 	AddressSpaceReadLocker(vm_address_space* space, bool getNewReference);
73 	AddressSpaceReadLocker();
74 	~AddressSpaceReadLocker();
75 
76 	status_t SetTo(team_id team);
77 	void SetTo(vm_address_space* space, bool getNewReference);
78 	status_t SetFromArea(area_id areaID, vm_area*& area);
79 
80 	bool IsLocked() const { return fLocked; }
81 	void Unlock();
82 
83 	void Unset();
84 
85 	vm_address_space* AddressSpace() { return fSpace; }
86 
87 private:
88 	vm_address_space* fSpace;
89 	bool	fLocked;
90 };
91 
92 class AddressSpaceWriteLocker {
93 public:
94 	AddressSpaceWriteLocker(team_id team);
95 	AddressSpaceWriteLocker();
96 	~AddressSpaceWriteLocker();
97 
98 	status_t SetTo(team_id team);
99 	status_t SetFromArea(area_id areaID, vm_area*& area);
100 	status_t SetFromArea(team_id team, area_id areaID, bool allowKernel,
101 		vm_area*& area);
102 	status_t SetFromArea(team_id team, area_id areaID, vm_area*& area);
103 
104 	bool IsLocked() const { return fLocked; }
105 	void Unlock();
106 
107 	void DegradeToReadLock();
108 	void Unset();
109 
110 	vm_address_space* AddressSpace() { return fSpace; }
111 
112 private:
113 	vm_address_space* fSpace;
114 	bool	fLocked;
115 	bool	fDegraded;
116 };
117 
118 class MultiAddressSpaceLocker {
119 public:
120 	MultiAddressSpaceLocker();
121 	~MultiAddressSpaceLocker();
122 
123 	inline status_t AddTeam(team_id team, bool writeLock,
124 		vm_address_space** _space = NULL);
125 	inline status_t AddArea(area_id area, bool writeLock,
126 		vm_address_space** _space = NULL);
127 
128 	status_t AddAreaCacheAndLock(area_id areaID, bool writeLockThisOne,
129 		bool writeLockOthers, vm_area*& _area, vm_cache** _cache = NULL,
130 		bool checkNoCacheChange = false);
131 
132 	status_t Lock();
133 	void Unlock();
134 	bool IsLocked() const { return fLocked; }
135 
136 	void Unset();
137 
138 private:
139 	struct lock_item {
140 		vm_address_space*	space;
141 		bool				write_lock;
142 	};
143 
144 	bool _ResizeIfNeeded();
145 	int32 _IndexOfAddressSpace(vm_address_space* space) const;
146 	status_t _AddAddressSpace(vm_address_space* space, bool writeLock,
147 		vm_address_space** _space);
148 
149 	static int _CompareItems(const void* _a, const void* _b);
150 
151 	lock_item*	fItems;
152 	int32		fCapacity;
153 	int32		fCount;
154 	bool		fLocked;
155 };
156 
157 
158 class AreaCacheLocking {
159 public:
160 	inline bool Lock(vm_cache* lockable)
161 	{
162 		return false;
163 	}
164 
165 	inline void Unlock(vm_cache* lockable)
166 	{
167 		vm_area_put_locked_cache(lockable);
168 	}
169 };
170 
171 class AreaCacheLocker : public AutoLocker<vm_cache, AreaCacheLocking> {
172 public:
173 	inline AreaCacheLocker(vm_cache* cache = NULL)
174 		: AutoLocker<vm_cache, AreaCacheLocking>(cache, true)
175 	{
176 	}
177 
178 	inline AreaCacheLocker(vm_area* area)
179 		: AutoLocker<vm_cache, AreaCacheLocking>()
180 	{
181 		SetTo(area);
182 	}
183 
184 	inline void SetTo(vm_area* area)
185 	{
186 		return AutoLocker<vm_cache, AreaCacheLocking>::SetTo(
187 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
188 	}
189 };
190 
191 
192 #define AREA_HASH_TABLE_SIZE 1024
193 static area_id sNextAreaID = 1;
194 static hash_table* sAreaHash;
195 static rw_lock sAreaHashLock = RW_LOCK_INITIALIZER("area hash");
196 static mutex sMappingLock = MUTEX_INITIALIZER("page mappings");
197 static mutex sAreaCacheLock = MUTEX_INITIALIZER("area->cache");
198 
199 static off_t sAvailableMemory;
200 static off_t sNeededMemory;
201 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
202 static uint32 sPageFaults;
203 
204 #if DEBUG_CACHE_LIST
205 
206 struct cache_info {
207 	vm_cache*	cache;
208 	addr_t		page_count;
209 	addr_t		committed;
210 };
211 
212 static const int kCacheInfoTableCount = 100 * 1024;
213 static cache_info* sCacheInfoTable;
214 
215 #endif	// DEBUG_CACHE_LIST
216 
217 
218 // function declarations
219 static void delete_area(vm_address_space* addressSpace, vm_area* area);
220 static vm_address_space* get_address_space_by_area_id(area_id id);
221 static status_t vm_soft_fault(vm_address_space* addressSpace, addr_t address,
222 	bool isWrite, bool isUser);
223 static status_t map_backing_store(vm_address_space* addressSpace,
224 	vm_cache* cache, void** _virtualAddress, off_t offset, addr_t size,
225 	uint32 addressSpec, int wiring, int protection, int mapping,
226 	vm_area** _area, const char* areaName, bool unmapAddressRange, bool kernel);
227 
228 
229 //	#pragma mark -
230 
231 
232 AddressSpaceReadLocker::AddressSpaceReadLocker(team_id team)
233 	:
234 	fSpace(NULL),
235 	fLocked(false)
236 {
237 	SetTo(team);
238 }
239 
240 
241 /*! Takes over the reference of the address space, if \a getNewReference is
242 	\c false.
243 */
244 AddressSpaceReadLocker::AddressSpaceReadLocker(vm_address_space* space,
245 		bool getNewReference)
246 	:
247 	fSpace(NULL),
248 	fLocked(false)
249 {
250 	SetTo(space, getNewReference);
251 }
252 
253 
254 AddressSpaceReadLocker::AddressSpaceReadLocker()
255 	:
256 	fSpace(NULL),
257 	fLocked(false)
258 {
259 }
260 
261 
262 AddressSpaceReadLocker::~AddressSpaceReadLocker()
263 {
264 	Unset();
265 }
266 
267 
268 void
269 AddressSpaceReadLocker::Unset()
270 {
271 	Unlock();
272 	if (fSpace != NULL)
273 		vm_put_address_space(fSpace);
274 }
275 
276 
277 status_t
278 AddressSpaceReadLocker::SetTo(team_id team)
279 {
280 	fSpace = vm_get_address_space(team);
281 	if (fSpace == NULL)
282 		return B_BAD_TEAM_ID;
283 
284 	rw_lock_read_lock(&fSpace->lock);
285 	fLocked = true;
286 	return B_OK;
287 }
288 
289 
290 /*! Takes over the reference of the address space, if \a getNewReference is
291 	\c false.
292 */
293 void
294 AddressSpaceReadLocker::SetTo(vm_address_space* space, bool getNewReference)
295 {
296 	fSpace = space;
297 
298 	if (getNewReference)
299 		atomic_add(&fSpace->ref_count, 1);
300 
301 	rw_lock_read_lock(&fSpace->lock);
302 	fLocked = true;
303 }
304 
305 
306 status_t
307 AddressSpaceReadLocker::SetFromArea(area_id areaID, vm_area*& area)
308 {
309 	fSpace = get_address_space_by_area_id(areaID);
310 	if (fSpace == NULL)
311 		return B_BAD_TEAM_ID;
312 
313 	rw_lock_read_lock(&fSpace->lock);
314 
315 	rw_lock_read_lock(&sAreaHashLock);
316 	area = (vm_area*)hash_lookup(sAreaHash, &areaID);
317 	rw_lock_read_unlock(&sAreaHashLock);
318 
319 	if (area == NULL || area->address_space != fSpace) {
320 		rw_lock_read_unlock(&fSpace->lock);
321 		return B_BAD_VALUE;
322 	}
323 
324 	fLocked = true;
325 	return B_OK;
326 }
327 
328 
329 void
330 AddressSpaceReadLocker::Unlock()
331 {
332 	if (fLocked) {
333 		rw_lock_read_unlock(&fSpace->lock);
334 		fLocked = false;
335 	}
336 }
337 
338 
339 //	#pragma mark -
340 
341 
342 AddressSpaceWriteLocker::AddressSpaceWriteLocker(team_id team)
343 	:
344 	fSpace(NULL),
345 	fLocked(false),
346 	fDegraded(false)
347 {
348 	SetTo(team);
349 }
350 
351 
352 AddressSpaceWriteLocker::AddressSpaceWriteLocker()
353 	:
354 	fSpace(NULL),
355 	fLocked(false),
356 	fDegraded(false)
357 {
358 }
359 
360 
361 AddressSpaceWriteLocker::~AddressSpaceWriteLocker()
362 {
363 	Unset();
364 }
365 
366 
367 void
368 AddressSpaceWriteLocker::Unset()
369 {
370 	Unlock();
371 	if (fSpace != NULL)
372 		vm_put_address_space(fSpace);
373 }
374 
375 
376 status_t
377 AddressSpaceWriteLocker::SetTo(team_id team)
378 {
379 	fSpace = vm_get_address_space(team);
380 	if (fSpace == NULL)
381 		return B_BAD_TEAM_ID;
382 
383 	rw_lock_write_lock(&fSpace->lock);
384 	fLocked = true;
385 	return B_OK;
386 }
387 
388 
389 status_t
390 AddressSpaceWriteLocker::SetFromArea(area_id areaID, vm_area*& area)
391 {
392 	fSpace = get_address_space_by_area_id(areaID);
393 	if (fSpace == NULL)
394 		return B_BAD_VALUE;
395 
396 	rw_lock_write_lock(&fSpace->lock);
397 
398 	rw_lock_read_lock(&sAreaHashLock);
399 	area = (vm_area*)hash_lookup(sAreaHash, &areaID);
400 	rw_lock_read_unlock(&sAreaHashLock);
401 
402 	if (area == NULL || area->address_space != fSpace) {
403 		rw_lock_write_unlock(&fSpace->lock);
404 		return B_BAD_VALUE;
405 	}
406 
407 	fLocked = true;
408 	return B_OK;
409 }
410 
411 
412 status_t
413 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID,
414 	bool allowKernel, vm_area*& area)
415 {
416 	rw_lock_read_lock(&sAreaHashLock);
417 
418 	area = (vm_area*)hash_lookup(sAreaHash, &areaID);
419 	if (area != NULL
420 		&& (area->address_space->id == team
421 			|| (allowKernel && team == vm_kernel_address_space_id()))) {
422 		fSpace = area->address_space;
423 		atomic_add(&fSpace->ref_count, 1);
424 	}
425 
426 	rw_lock_read_unlock(&sAreaHashLock);
427 
428 	if (fSpace == NULL)
429 		return B_BAD_VALUE;
430 
431 	// Second try to get the area -- this time with the address space
432 	// write lock held
433 
434 	rw_lock_write_lock(&fSpace->lock);
435 
436 	rw_lock_read_lock(&sAreaHashLock);
437 	area = (vm_area*)hash_lookup(sAreaHash, &areaID);
438 	rw_lock_read_unlock(&sAreaHashLock);
439 
440 	if (area == NULL) {
441 		rw_lock_write_unlock(&fSpace->lock);
442 		return B_BAD_VALUE;
443 	}
444 
445 	fLocked = true;
446 	return B_OK;
447 }
448 
449 
450 status_t
451 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID,
452 	vm_area*& area)
453 {
454 	return SetFromArea(team, areaID, false, area);
455 }
456 
457 
458 void
459 AddressSpaceWriteLocker::Unlock()
460 {
461 	if (fLocked) {
462 		if (fDegraded)
463 			rw_lock_read_unlock(&fSpace->lock);
464 		else
465 			rw_lock_write_unlock(&fSpace->lock);
466 		fLocked = false;
467 		fDegraded = false;
468 	}
469 }
470 
471 
472 void
473 AddressSpaceWriteLocker::DegradeToReadLock()
474 {
475 	// TODO: the current R/W lock implementation just keeps the write lock here
476 	rw_lock_read_lock(&fSpace->lock);
477 	rw_lock_write_unlock(&fSpace->lock);
478 	fDegraded = true;
479 }
480 
481 
482 //	#pragma mark -
483 
484 
485 MultiAddressSpaceLocker::MultiAddressSpaceLocker()
486 	:
487 	fItems(NULL),
488 	fCapacity(0),
489 	fCount(0),
490 	fLocked(false)
491 {
492 }
493 
494 
495 MultiAddressSpaceLocker::~MultiAddressSpaceLocker()
496 {
497 	Unset();
498 	free(fItems);
499 }
500 
501 
502 /*static*/ int
503 MultiAddressSpaceLocker::_CompareItems(const void* _a, const void* _b)
504 {
505 	lock_item* a = (lock_item*)_a;
506 	lock_item* b = (lock_item*)_b;
507 	return a->space->id - b->space->id;
508 }
509 
510 
511 bool
512 MultiAddressSpaceLocker::_ResizeIfNeeded()
513 {
514 	if (fCount == fCapacity) {
515 		lock_item* items = (lock_item*)realloc(fItems,
516 			(fCapacity + 4) * sizeof(lock_item));
517 		if (items == NULL)
518 			return false;
519 
520 		fCapacity += 4;
521 		fItems = items;
522 	}
523 
524 	return true;
525 }
526 
527 
528 int32
529 MultiAddressSpaceLocker::_IndexOfAddressSpace(vm_address_space* space) const
530 {
531 	for (int32 i = 0; i < fCount; i++) {
532 		if (fItems[i].space == space)
533 			return i;
534 	}
535 
536 	return -1;
537 }
538 
539 
540 status_t
541 MultiAddressSpaceLocker::_AddAddressSpace(vm_address_space* space,
542 	bool writeLock, vm_address_space** _space)
543 {
544 	if (!space)
545 		return B_BAD_VALUE;
546 
547 	int32 index = _IndexOfAddressSpace(space);
548 	if (index < 0) {
549 		if (!_ResizeIfNeeded()) {
550 			vm_put_address_space(space);
551 			return B_NO_MEMORY;
552 		}
553 
554 		lock_item& item = fItems[fCount++];
555 		item.space = space;
556 		item.write_lock = writeLock;
557 	} else {
558 
559 		// one reference is enough
560 		vm_put_address_space(space);
561 
562 		fItems[index].write_lock |= writeLock;
563 	}
564 
565 	if (_space != NULL)
566 		*_space = space;
567 
568 	return B_OK;
569 }
570 
571 
572 inline status_t
573 MultiAddressSpaceLocker::AddTeam(team_id team, bool writeLock,
574 	vm_address_space** _space)
575 {
576 	return _AddAddressSpace(vm_get_address_space(team), writeLock,
577 		_space);
578 }
579 
580 
581 inline status_t
582 MultiAddressSpaceLocker::AddArea(area_id area, bool writeLock,
583 	vm_address_space** _space)
584 {
585 	return _AddAddressSpace(get_address_space_by_area_id(area), writeLock,
586 		_space);
587 }
588 
589 
590 void
591 MultiAddressSpaceLocker::Unset()
592 {
593 	Unlock();
594 
595 	for (int32 i = 0; i < fCount; i++)
596 		vm_put_address_space(fItems[i].space);
597 
598 	fCount = 0;
599 }
600 
601 
602 status_t
603 MultiAddressSpaceLocker::Lock()
604 {
605 	ASSERT(!fLocked);
606 
607 	qsort(fItems, fCount, sizeof(lock_item), &_CompareItems);
608 
609 	for (int32 i = 0; i < fCount; i++) {
610 		status_t status;
611 		if (fItems[i].write_lock)
612 			status = rw_lock_write_lock(&fItems[i].space->lock);
613 		else
614 			status = rw_lock_read_lock(&fItems[i].space->lock);
615 
616 		if (status < B_OK) {
617 			while (--i >= 0) {
618 				if (fItems[i].write_lock)
619 					rw_lock_write_unlock(&fItems[i].space->lock);
620 				else
621 					rw_lock_read_unlock(&fItems[i].space->lock);
622 			}
623 			return status;
624 		}
625 	}
626 
627 	fLocked = true;
628 	return B_OK;
629 }
630 
631 
632 void
633 MultiAddressSpaceLocker::Unlock()
634 {
635 	if (!fLocked)
636 		return;
637 
638 	for (int32 i = 0; i < fCount; i++) {
639 		if (fItems[i].write_lock)
640 			rw_lock_write_unlock(&fItems[i].space->lock);
641 		else
642 			rw_lock_read_unlock(&fItems[i].space->lock);
643 	}
644 
645 	fLocked = false;
646 }
647 
648 
649 /*!	Adds all address spaces of the areas associated with the given area's cache,
650 	locks them, and locks the cache (including a reference to it). It retries
651 	until the situation is stable (i.e. the neither cache nor cache's areas
652 	changed) or an error occurs. If \c checkNoCacheChange ist \c true it does
653 	not return until all areas' \c no_cache_change flags is clear.
654 */
655 status_t
656 MultiAddressSpaceLocker::AddAreaCacheAndLock(area_id areaID,
657 	bool writeLockThisOne, bool writeLockOthers, vm_area*& _area,
658 	vm_cache** _cache, bool checkNoCacheChange)
659 {
660 	// remember the original state
661 	int originalCount = fCount;
662 	lock_item* originalItems = NULL;
663 	if (fCount > 0) {
664 		originalItems = new(nothrow) lock_item[fCount];
665 		if (originalItems == NULL)
666 			return B_NO_MEMORY;
667 		memcpy(originalItems, fItems, fCount * sizeof(lock_item));
668 	}
669 	ArrayDeleter<lock_item> _(originalItems);
670 
671 	// get the cache
672 	vm_cache* cache;
673 	vm_area* area;
674 	status_t error;
675 	{
676 		AddressSpaceReadLocker locker;
677 		error = locker.SetFromArea(areaID, area);
678 		if (error != B_OK)
679 			return error;
680 
681 		cache = vm_area_get_locked_cache(area);
682 	}
683 
684 	while (true) {
685 		// add all areas
686 		vm_area* firstArea = cache->areas;
687 		for (vm_area* current = firstArea; current;
688 				current = current->cache_next) {
689 			error = AddArea(current->id,
690 				current == area ? writeLockThisOne : writeLockOthers);
691 			if (error != B_OK) {
692 				vm_area_put_locked_cache(cache);
693 				return error;
694 			}
695 		}
696 
697 		// unlock the cache and attempt to lock the address spaces
698 		vm_area_put_locked_cache(cache);
699 
700 		error = Lock();
701 		if (error != B_OK)
702 			return error;
703 
704 		// lock the cache again and check whether anything has changed
705 
706 		// check whether the area is gone in the meantime
707 		rw_lock_read_lock(&sAreaHashLock);
708 		area = (vm_area*)hash_lookup(sAreaHash, &areaID);
709 		rw_lock_read_unlock(&sAreaHashLock);
710 
711 		if (area == NULL) {
712 			Unlock();
713 			return B_BAD_VALUE;
714 		}
715 
716 		// lock the cache
717 		vm_cache* oldCache = cache;
718 		cache = vm_area_get_locked_cache(area);
719 
720 		// If neither the area's cache has changed nor its area list we're
721 		// done...
722 		bool done = (cache == oldCache || firstArea == cache->areas);
723 
724 		// ... unless we're supposed to check the areas' "no_cache_change" flag
725 		bool yield = false;
726 		if (done && checkNoCacheChange) {
727 			for (vm_area* tempArea = cache->areas; tempArea != NULL;
728 					tempArea = tempArea->cache_next) {
729 				if (tempArea->no_cache_change) {
730 					done = false;
731 					yield = true;
732 					break;
733 				}
734 			}
735 		}
736 
737 		// If everything looks dandy, return the values.
738 		if (done) {
739 			_area = area;
740 			if (_cache != NULL)
741 				*_cache = cache;
742 			return B_OK;
743 		}
744 
745 		// Restore the original state and try again.
746 
747 		// Unlock the address spaces, but keep the cache locked for the next
748 		// iteration.
749 		Unlock();
750 
751 		// Get an additional reference to the original address spaces.
752 		for (int32 i = 0; i < originalCount; i++)
753 			atomic_add(&originalItems[i].space->ref_count, 1);
754 
755 		// Release all references to the current address spaces.
756 		for (int32 i = 0; i < fCount; i++)
757 			vm_put_address_space(fItems[i].space);
758 
759 		// Copy over the original state.
760 		fCount = originalCount;
761 		if (originalItems != NULL)
762 			memcpy(fItems, originalItems, fCount * sizeof(lock_item));
763 
764 		if (yield)
765 			thread_yield(true);
766 	}
767 }
768 
769 
770 //	#pragma mark -
771 
772 
773 #if VM_PAGE_FAULT_TRACING
774 
775 namespace VMPageFaultTracing {
776 
777 class PageFaultStart : public AbstractTraceEntry {
778 public:
779 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
780 		:
781 		fAddress(address),
782 		fPC(pc),
783 		fWrite(write),
784 		fUser(user)
785 	{
786 		Initialized();
787 	}
788 
789 	virtual void AddDump(TraceOutput& out)
790 	{
791 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
792 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
793 	}
794 
795 private:
796 	addr_t	fAddress;
797 	addr_t	fPC;
798 	bool	fWrite;
799 	bool	fUser;
800 };
801 
802 
803 // page fault errors
804 enum {
805 	PAGE_FAULT_ERROR_NO_AREA		= 0,
806 	PAGE_FAULT_ERROR_KERNEL_ONLY,
807 	PAGE_FAULT_ERROR_READ_ONLY,
808 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
809 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
810 };
811 
812 
813 class PageFaultError : public AbstractTraceEntry {
814 public:
815 	PageFaultError(area_id area, status_t error)
816 		:
817 		fArea(area),
818 		fError(error)
819 	{
820 		Initialized();
821 	}
822 
823 	virtual void AddDump(TraceOutput& out)
824 	{
825 		switch (fError) {
826 			case PAGE_FAULT_ERROR_NO_AREA:
827 				out.Print("page fault error: no area");
828 				break;
829 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
830 				out.Print("page fault error: area: %ld, kernel only", fArea);
831 				break;
832 			case PAGE_FAULT_ERROR_READ_ONLY:
833 				out.Print("page fault error: area: %ld, read only", fArea);
834 				break;
835 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
836 				out.Print("page fault error: kernel touching bad user memory");
837 				break;
838 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
839 				out.Print("page fault error: no address space");
840 				break;
841 			default:
842 				out.Print("page fault error: area: %ld, error: %s", fArea,
843 					strerror(fError));
844 				break;
845 		}
846 	}
847 
848 private:
849 	area_id		fArea;
850 	status_t	fError;
851 };
852 
853 
854 class PageFaultDone : public AbstractTraceEntry {
855 public:
856 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
857 			vm_page* page)
858 		:
859 		fArea(area),
860 		fTopCache(topCache),
861 		fCache(cache),
862 		fPage(page)
863 	{
864 		Initialized();
865 	}
866 
867 	virtual void AddDump(TraceOutput& out)
868 	{
869 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
870 			"page: %p", fArea, fTopCache, fCache, fPage);
871 	}
872 
873 private:
874 	area_id		fArea;
875 	VMCache*	fTopCache;
876 	VMCache*	fCache;
877 	vm_page*	fPage;
878 };
879 
880 }	// namespace VMPageFaultTracing
881 
882 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
883 #else
884 #	define TPF(x) ;
885 #endif	// VM_PAGE_FAULT_TRACING
886 
887 
888 //	#pragma mark -
889 
890 
891 static int
892 area_compare(void* _area, const void* key)
893 {
894 	vm_area* area = (vm_area*)_area;
895 	const area_id* id = (const area_id*)key;
896 
897 	if (area->id == *id)
898 		return 0;
899 
900 	return -1;
901 }
902 
903 
904 static uint32
905 area_hash(void* _area, const void* key, uint32 range)
906 {
907 	vm_area* area = (vm_area*)_area;
908 	const area_id* id = (const area_id*)key;
909 
910 	if (area != NULL)
911 		return area->id % range;
912 
913 	return (uint32)*id % range;
914 }
915 
916 
917 static vm_address_space*
918 get_address_space_by_area_id(area_id id)
919 {
920 	vm_address_space* addressSpace = NULL;
921 
922 	rw_lock_read_lock(&sAreaHashLock);
923 
924 	vm_area* area = (vm_area*)hash_lookup(sAreaHash, &id);
925 	if (area != NULL) {
926 		addressSpace = area->address_space;
927 		atomic_add(&addressSpace->ref_count, 1);
928 	}
929 
930 	rw_lock_read_unlock(&sAreaHashLock);
931 
932 	return addressSpace;
933 }
934 
935 
936 //! You need to have the address space locked when calling this function
937 static vm_area*
938 lookup_area(vm_address_space* addressSpace, area_id id)
939 {
940 	rw_lock_read_lock(&sAreaHashLock);
941 
942 	vm_area* area = (vm_area*)hash_lookup(sAreaHash, &id);
943 	if (area != NULL && area->address_space != addressSpace)
944 		area = NULL;
945 
946 	rw_lock_read_unlock(&sAreaHashLock);
947 
948 	return area;
949 }
950 
951 
952 static vm_area*
953 create_reserved_area_struct(vm_address_space* addressSpace, uint32 flags)
954 {
955 	vm_area* reserved = (vm_area*)malloc_nogrow(sizeof(vm_area));
956 	if (reserved == NULL)
957 		return NULL;
958 
959 	memset(reserved, 0, sizeof(vm_area));
960 	reserved->id = RESERVED_AREA_ID;
961 		// this marks it as reserved space
962 	reserved->protection = flags;
963 	reserved->address_space = addressSpace;
964 
965 	return reserved;
966 }
967 
968 
969 static vm_area*
970 create_area_struct(vm_address_space* addressSpace, const char* name,
971 	uint32 wiring, uint32 protection)
972 {
973 	// restrict the area name to B_OS_NAME_LENGTH
974 	size_t length = strlen(name) + 1;
975 	if (length > B_OS_NAME_LENGTH)
976 		length = B_OS_NAME_LENGTH;
977 
978 	vm_area* area = (vm_area*)malloc_nogrow(sizeof(vm_area));
979 	if (area == NULL)
980 		return NULL;
981 
982 	area->name = (char*)malloc_nogrow(length);
983 	if (area->name == NULL) {
984 		free(area);
985 		return NULL;
986 	}
987 	strlcpy(area->name, name, length);
988 
989 	area->id = atomic_add(&sNextAreaID, 1);
990 	area->base = 0;
991 	area->size = 0;
992 	area->protection = protection;
993 	area->wiring = wiring;
994 	area->memory_type = 0;
995 
996 	area->cache = NULL;
997 	area->no_cache_change = 0;
998 	area->cache_offset = 0;
999 
1000 	area->address_space = addressSpace;
1001 	area->address_space_next = NULL;
1002 	area->cache_next = area->cache_prev = NULL;
1003 	area->hash_next = NULL;
1004 	new (&area->mappings) vm_area_mappings;
1005 	area->page_protections = NULL;
1006 
1007 	return area;
1008 }
1009 
1010 
1011 /*!	Finds a reserved area that covers the region spanned by \a start and
1012 	\a size, inserts the \a area into that region and makes sure that
1013 	there are reserved regions for the remaining parts.
1014 */
1015 static status_t
1016 find_reserved_area(vm_address_space* addressSpace, addr_t start,
1017 	addr_t size, vm_area* area)
1018 {
1019 	vm_area* last = NULL;
1020 	vm_area* next;
1021 
1022 	next = addressSpace->areas;
1023 	while (next) {
1024 		if (next->base <= start && next->base + next->size >= start + size) {
1025 			// this area covers the requested range
1026 			if (next->id != RESERVED_AREA_ID) {
1027 				// but it's not reserved space, it's a real area
1028 				return B_BAD_VALUE;
1029 			}
1030 
1031 			break;
1032 		}
1033 		last = next;
1034 		next = next->address_space_next;
1035 	}
1036 	if (next == NULL)
1037 		return B_ENTRY_NOT_FOUND;
1038 
1039 	// now we have to transfer the requested part of the reserved
1040 	// range to the new area - and remove, resize or split the old
1041 	// reserved area.
1042 
1043 	if (start == next->base) {
1044 		// the area starts at the beginning of the reserved range
1045 		if (last)
1046 			last->address_space_next = area;
1047 		else
1048 			addressSpace->areas = area;
1049 
1050 		if (size == next->size) {
1051 			// the new area fully covers the reversed range
1052 			area->address_space_next = next->address_space_next;
1053 			vm_put_address_space(addressSpace);
1054 			free(next);
1055 		} else {
1056 			// resize the reserved range behind the area
1057 			area->address_space_next = next;
1058 			next->base += size;
1059 			next->size -= size;
1060 		}
1061 	} else if (start + size == next->base + next->size) {
1062 		// the area is at the end of the reserved range
1063 		area->address_space_next = next->address_space_next;
1064 		next->address_space_next = area;
1065 
1066 		// resize the reserved range before the area
1067 		next->size = start - next->base;
1068 	} else {
1069 		// the area splits the reserved range into two separate ones
1070 		// we need a new reserved area to cover this space
1071 		vm_area* reserved = create_reserved_area_struct(addressSpace,
1072 			next->protection);
1073 		if (reserved == NULL)
1074 			return B_NO_MEMORY;
1075 
1076 		atomic_add(&addressSpace->ref_count, 1);
1077 		reserved->address_space_next = next->address_space_next;
1078 		area->address_space_next = reserved;
1079 		next->address_space_next = area;
1080 
1081 		// resize regions
1082 		reserved->size = next->base + next->size - start - size;
1083 		next->size = start - next->base;
1084 		reserved->base = start + size;
1085 		reserved->cache_offset = next->cache_offset;
1086 	}
1087 
1088 	area->base = start;
1089 	area->size = size;
1090 	addressSpace->change_count++;
1091 
1092 	return B_OK;
1093 }
1094 
1095 
1096 /*!	Must be called with this address space's sem held */
1097 static status_t
1098 find_and_insert_area_slot(vm_address_space* addressSpace, addr_t start,
1099 	addr_t size, addr_t end, uint32 addressSpec, vm_area* area)
1100 {
1101 	vm_area* last = NULL;
1102 	vm_area* next;
1103 	bool foundSpot = false;
1104 
1105 	TRACE(("find_and_insert_area_slot: address space %p, start 0x%lx, "
1106 		"size %ld, end 0x%lx, addressSpec %ld, area %p\n", addressSpace, start,
1107 		size, end, addressSpec, area));
1108 
1109 	// do some sanity checking
1110 	if (start < addressSpace->base || size == 0
1111 		|| (end - 1) > (addressSpace->base + (addressSpace->size - 1))
1112 		|| start + size > end)
1113 		return B_BAD_ADDRESS;
1114 
1115 	if (addressSpec == B_EXACT_ADDRESS) {
1116 		// search for a reserved area
1117 		status_t status = find_reserved_area(addressSpace, start, size, area);
1118 		if (status == B_OK || status == B_BAD_VALUE)
1119 			return status;
1120 
1121 		// There was no reserved area, and the slot doesn't seem to be used
1122 		// already
1123 		// TODO: this could be further optimized.
1124 	}
1125 
1126 	size_t alignment = B_PAGE_SIZE;
1127 	if (addressSpec == B_ANY_KERNEL_BLOCK_ADDRESS) {
1128 		// align the memory to the next power of two of the size
1129 		while (alignment < size)
1130 			alignment <<= 1;
1131 	}
1132 
1133 	start = ROUNDUP(start, alignment);
1134 
1135 	// walk up to the spot where we should start searching
1136 second_chance:
1137 	next = addressSpace->areas;
1138 	while (next) {
1139 		if (next->base >= start + size) {
1140 			// we have a winner
1141 			break;
1142 		}
1143 		last = next;
1144 		next = next->address_space_next;
1145 	}
1146 
1147 	// find the right spot depending on the address specification - the area
1148 	// will be inserted directly after "last" ("next" is not referenced anymore)
1149 
1150 	switch (addressSpec) {
1151 		case B_ANY_ADDRESS:
1152 		case B_ANY_KERNEL_ADDRESS:
1153 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1154 			// find a hole big enough for a new area
1155 			if (!last) {
1156 				// see if we can build it at the beginning of the virtual map
1157 				if (!next || (next->base >= ROUNDUP(addressSpace->base,
1158 						alignment) + size)) {
1159 					foundSpot = true;
1160 					area->base = ROUNDUP(addressSpace->base, alignment);
1161 					break;
1162 				}
1163 				last = next;
1164 				next = next->address_space_next;
1165 			}
1166 			// keep walking
1167 			while (next) {
1168 				if (next->base >= ROUNDUP(last->base + last->size, alignment)
1169 						+ size) {
1170 					// we found a spot (it'll be filled up below)
1171 					break;
1172 				}
1173 				last = next;
1174 				next = next->address_space_next;
1175 			}
1176 
1177 			if ((addressSpace->base + (addressSpace->size - 1)) >= (ROUNDUP(
1178 					last->base + last->size, alignment) + (size - 1))) {
1179 				// got a spot
1180 				foundSpot = true;
1181 				area->base = ROUNDUP(last->base + last->size, alignment);
1182 				break;
1183 			} else {
1184 				// We didn't find a free spot - if there were any reserved areas
1185 				// with the RESERVED_AVOID_BASE flag set, we can now test those
1186 				// for free space
1187 				// TODO: it would make sense to start with the biggest of them
1188 				next = addressSpace->areas;
1189 				last = NULL;
1190 				for (last = NULL; next; next = next->address_space_next) {
1191 					if (next->id != RESERVED_AREA_ID) {
1192 						last = next;
1193 						continue;
1194 					}
1195 
1196 					// TODO: take free space after the reserved area into
1197 					// account!
1198 					if (next->base == ROUNDUP(next->base, alignment)
1199 						&& next->size == size) {
1200 						// The reserved area is entirely covered, and thus,
1201 						// removed
1202 						if (last)
1203 							last->address_space_next = next->address_space_next;
1204 						else
1205 							addressSpace->areas = next->address_space_next;
1206 
1207 						foundSpot = true;
1208 						area->base = next->base;
1209 						free(next);
1210 						break;
1211 					}
1212 					if (next->size - (ROUNDUP(next->base, alignment)
1213 							- next->base) >= size) {
1214 						// The new area will be placed at the end of the
1215 						// reserved area, and the reserved area will be resized
1216 						// to make space
1217 						foundSpot = true;
1218 						next->size -= size;
1219 						last = next;
1220 						area->base = next->base + next->size;
1221 						break;
1222 					}
1223 
1224 					last = next;
1225 				}
1226 			}
1227 			break;
1228 
1229 		case B_BASE_ADDRESS:
1230 			// find a hole big enough for a new area beginning with "start"
1231 			if (!last) {
1232 				// see if we can build it at the beginning of the specified start
1233 				if (!next || (next->base >= start + size)) {
1234 					foundSpot = true;
1235 					area->base = start;
1236 					break;
1237 				}
1238 				last = next;
1239 				next = next->address_space_next;
1240 			}
1241 			// keep walking
1242 			while (next) {
1243 				if (next->base >= last->base + last->size + size) {
1244 					// we found a spot (it'll be filled up below)
1245 					break;
1246 				}
1247 				last = next;
1248 				next = next->address_space_next;
1249 			}
1250 
1251 			if ((addressSpace->base + (addressSpace->size - 1))
1252 					>= (last->base + last->size + (size - 1))) {
1253 				// got a spot
1254 				foundSpot = true;
1255 				if (last->base + last->size <= start)
1256 					area->base = start;
1257 				else
1258 					area->base = last->base + last->size;
1259 				break;
1260 			}
1261 			// we didn't find a free spot in the requested range, so we'll
1262 			// try again without any restrictions
1263 			start = addressSpace->base;
1264 			addressSpec = B_ANY_ADDRESS;
1265 			last = NULL;
1266 			goto second_chance;
1267 
1268 		case B_EXACT_ADDRESS:
1269 			// see if we can create it exactly here
1270 			if (!last) {
1271 				if (!next || (next->base >= start + size)) {
1272 					foundSpot = true;
1273 					area->base = start;
1274 					break;
1275 				}
1276 			} else {
1277 				if (next) {
1278 					if (last->base + last->size <= start
1279 						&& next->base >= start + size) {
1280 						foundSpot = true;
1281 						area->base = start;
1282 						break;
1283 					}
1284 				} else {
1285 					if ((last->base + (last->size - 1)) <= start - 1) {
1286 						foundSpot = true;
1287 						area->base = start;
1288 					}
1289 				}
1290 			}
1291 			break;
1292 		default:
1293 			return B_BAD_VALUE;
1294 	}
1295 
1296 	if (!foundSpot)
1297 		return addressSpec == B_EXACT_ADDRESS ? B_BAD_VALUE : B_NO_MEMORY;
1298 
1299 	area->size = size;
1300 	if (last) {
1301 		area->address_space_next = last->address_space_next;
1302 		last->address_space_next = area;
1303 	} else {
1304 		area->address_space_next = addressSpace->areas;
1305 		addressSpace->areas = area;
1306 	}
1307 	addressSpace->change_count++;
1308 	return B_OK;
1309 }
1310 
1311 
1312 /*!	This inserts the area you pass into the specified address space.
1313 	It will also set the "_address" argument to its base address when
1314 	the call succeeds.
1315 	You need to hold the vm_address_space semaphore.
1316 */
1317 static status_t
1318 insert_area(vm_address_space* addressSpace, void** _address,
1319 	uint32 addressSpec, addr_t size, vm_area* area)
1320 {
1321 	addr_t searchBase, searchEnd;
1322 	status_t status;
1323 
1324 	switch (addressSpec) {
1325 		case B_EXACT_ADDRESS:
1326 			searchBase = (addr_t)*_address;
1327 			searchEnd = (addr_t)*_address + size;
1328 			break;
1329 
1330 		case B_BASE_ADDRESS:
1331 			searchBase = (addr_t)*_address;
1332 			searchEnd = addressSpace->base + (addressSpace->size - 1);
1333 			break;
1334 
1335 		case B_ANY_ADDRESS:
1336 		case B_ANY_KERNEL_ADDRESS:
1337 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1338 			searchBase = addressSpace->base;
1339 			// TODO: remove this again when vm86 mode is moved into the kernel
1340 			// completely (currently needs a userland address space!)
1341 			if (searchBase == USER_BASE)
1342 				searchBase = USER_BASE_ANY;
1343 			searchEnd = addressSpace->base + (addressSpace->size - 1);
1344 			break;
1345 
1346 		default:
1347 			return B_BAD_VALUE;
1348 	}
1349 
1350 	status = find_and_insert_area_slot(addressSpace, searchBase, size,
1351 		searchEnd, addressSpec, area);
1352 	if (status == B_OK) {
1353 		// TODO: do we have to do anything about B_ANY_KERNEL_ADDRESS
1354 		// vs. B_ANY_KERNEL_BLOCK_ADDRESS here?
1355 		*_address = (void*)area->base;
1356 	}
1357 
1358 	return status;
1359 }
1360 
1361 
1362 static inline void
1363 set_area_page_protection(vm_area* area, addr_t pageAddress, uint32 protection)
1364 {
1365 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
1366 	uint32 pageIndex = (pageAddress - area->base) / B_PAGE_SIZE;
1367 	uint8& entry = area->page_protections[pageIndex / 2];
1368 	if (pageIndex % 2 == 0)
1369 		entry = (entry & 0xf0) | protection;
1370 	else
1371 		entry = (entry & 0x0f) | (protection << 4);
1372 }
1373 
1374 
1375 static inline uint32
1376 get_area_page_protection(vm_area* area, addr_t pageAddress)
1377 {
1378 	if (area->page_protections == NULL)
1379 		return area->protection;
1380 
1381 	uint32 pageIndex = (pageAddress - area->base) / B_PAGE_SIZE;
1382 	uint32 protection = area->page_protections[pageIndex / 2];
1383 	if (pageIndex % 2 == 0)
1384 		protection &= 0x0f;
1385 	else
1386 		protection >>= 4;
1387 
1388 	return protection | B_KERNEL_READ_AREA
1389 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
1390 }
1391 
1392 
1393 /*!	Cuts a piece out of an area. If the given cut range covers the complete
1394 	area, it is deleted. If it covers the beginning or the end, the area is
1395 	resized accordingly. If the range covers some part in the middle of the
1396 	area, it is split in two; in this case the second area is returned via
1397 	\a _secondArea (the variable is left untouched in the other cases).
1398 	The address space must be write locked.
1399 */
1400 static status_t
1401 cut_area(vm_address_space* addressSpace, vm_area* area, addr_t address,
1402 	addr_t lastAddress, vm_area** _secondArea, bool kernel)
1403 {
1404 	// Does the cut range intersect with the area at all?
1405 	addr_t areaLast = area->base + (area->size - 1);
1406 	if (area->base > lastAddress || areaLast < address)
1407 		return B_OK;
1408 
1409 	// Is the area fully covered?
1410 	if (area->base >= address && areaLast <= lastAddress) {
1411 		delete_area(addressSpace, area);
1412 		return B_OK;
1413 	}
1414 
1415 	AreaCacheLocker cacheLocker(area);
1416 	vm_cache* cache = area->cache;
1417 
1418 	// Cut the end only?
1419 	if (areaLast <= lastAddress) {
1420 		addr_t newSize = address - area->base;
1421 
1422 		// unmap pages
1423 		vm_unmap_pages(area, address, area->size - newSize, false);
1424 
1425 		// If no one else uses the area's cache, we can resize it, too.
1426 		if (cache->areas == area && area->cache_next == NULL
1427 			&& list_is_empty(&cache->consumers)) {
1428 			status_t error = cache->Resize(cache->virtual_base + newSize);
1429 			if (error != B_OK)
1430 				return error;
1431 		}
1432 
1433 		area->size = newSize;
1434 
1435 		return B_OK;
1436 	}
1437 
1438 	// Cut the beginning only?
1439 	if (area->base >= address) {
1440 		addr_t newBase = lastAddress + 1;
1441 		addr_t newSize = areaLast - lastAddress;
1442 
1443 		// unmap pages
1444 		vm_unmap_pages(area, area->base, newBase - area->base, false);
1445 
1446 		// TODO: If no one else uses the area's cache, we should resize it, too!
1447 
1448 		area->cache_offset += newBase - area->base;
1449 		area->base = newBase;
1450 		area->size = newSize;
1451 
1452 		return B_OK;
1453 	}
1454 
1455 	// The tough part -- cut a piece out of the middle of the area.
1456 	// We do that by shrinking the area to the begin section and creating a
1457 	// new area for the end section.
1458 
1459 	addr_t firstNewSize = address - area->base;
1460 	addr_t secondBase = lastAddress + 1;
1461 	addr_t secondSize = areaLast - lastAddress;
1462 
1463 	// unmap pages
1464 	vm_unmap_pages(area, address, area->size - firstNewSize, false);
1465 
1466 	// resize the area
1467 	addr_t oldSize = area->size;
1468 	area->size = firstNewSize;
1469 
1470 	// TODO: If no one else uses the area's cache, we might want to create a
1471 	// new cache for the second area, transfer the concerned pages from the
1472 	// first cache to it and resize the first cache.
1473 
1474 	// map the second area
1475 	vm_area* secondArea;
1476 	void* secondBaseAddress = (void*)secondBase;
1477 	status_t error = map_backing_store(addressSpace, cache, &secondBaseAddress,
1478 		area->cache_offset + (secondBase - area->base), secondSize,
1479 		B_EXACT_ADDRESS, area->wiring, area->protection, REGION_NO_PRIVATE_MAP,
1480 		&secondArea, area->name, false, kernel);
1481 	if (error != B_OK) {
1482 		area->size = oldSize;
1483 		return error;
1484 	}
1485 
1486 	// We need a cache reference for the new area.
1487 	cache->AcquireRefLocked();
1488 
1489 	if (_secondArea != NULL)
1490 		*_secondArea = secondArea;
1491 
1492 	return B_OK;
1493 }
1494 
1495 
1496 static inline void
1497 increment_page_wired_count(vm_page* page)
1498 {
1499 	// TODO: needs to be atomic on all platforms!
1500 	// ... but at least the check isn't. Consequently we should hold
1501 	// sMappingLock, which would allows us to even avoid atomic_add() on
1502 	// gMappedPagesCount.
1503 	if (page->wired_count++ == 0) {
1504 		if (page->mappings.IsEmpty())
1505 			atomic_add(&gMappedPagesCount, 1);
1506 	}
1507 }
1508 
1509 
1510 static inline void
1511 decrement_page_wired_count(vm_page* page)
1512 {
1513 	if (--page->wired_count == 0) {
1514 		// TODO: needs to be atomic on all platforms!
1515 		// See above!
1516 		if (page->mappings.IsEmpty())
1517 			atomic_add(&gMappedPagesCount, -1);
1518 	}
1519 }
1520 
1521 
1522 /*!	Deletes all areas in the given address range.
1523 	The address space must be write-locked.
1524 */
1525 static status_t
1526 unmap_address_range(vm_address_space* addressSpace, addr_t address, addr_t size,
1527 	bool kernel)
1528 {
1529 	size = PAGE_ALIGN(size);
1530 	addr_t lastAddress = address + (size - 1);
1531 
1532 	// Check, whether the caller is allowed to modify the concerned areas.
1533 	vm_area* area;
1534 	if (!kernel) {
1535 		area = addressSpace->areas;
1536 		while (area != NULL) {
1537 			vm_area* nextArea = area->address_space_next;
1538 
1539 			if (area->id != RESERVED_AREA_ID) {
1540 				addr_t areaLast = area->base + (area->size - 1);
1541 				if (area->base < lastAddress && address < areaLast) {
1542 					if ((area->protection & B_KERNEL_AREA) != 0)
1543 						return B_NOT_ALLOWED;
1544 				}
1545 			}
1546 
1547 			area = nextArea;
1548 		}
1549 	}
1550 
1551 	area = addressSpace->areas;
1552 	while (area != NULL) {
1553 		vm_area* nextArea = area->address_space_next;
1554 
1555 		if (area->id != RESERVED_AREA_ID) {
1556 			addr_t areaLast = area->base + (area->size - 1);
1557 			if (area->base < lastAddress && address < areaLast) {
1558 				status_t error = cut_area(addressSpace, area, address,
1559 					lastAddress, NULL, kernel);
1560 				if (error != B_OK)
1561 					return error;
1562 					// Failing after already messing with areas is ugly, but we
1563 					// can't do anything about it.
1564 			}
1565 		}
1566 
1567 		area = nextArea;
1568 	}
1569 
1570 	return B_OK;
1571 }
1572 
1573 
1574 /*! You need to hold the lock of the cache and the write lock of the address
1575 	space when calling this function.
1576 	Note, that in case of error your cache will be temporarily unlocked.
1577 */
1578 static status_t
1579 map_backing_store(vm_address_space* addressSpace, vm_cache* cache,
1580 	void** _virtualAddress, off_t offset, addr_t size, uint32 addressSpec,
1581 	int wiring, int protection, int mapping, vm_area** _area,
1582 	const char* areaName, bool unmapAddressRange, bool kernel)
1583 {
1584 	TRACE(("map_backing_store: aspace %p, cache %p, *vaddr %p, offset 0x%Lx, "
1585 		"size %lu, addressSpec %ld, wiring %d, protection %d, area %p, areaName "
1586 		"'%s'\n", addressSpace, cache, *_virtualAddress, offset, size,
1587 		addressSpec, wiring, protection, _area, areaName));
1588 	cache->AssertLocked();
1589 
1590 	vm_area* area = create_area_struct(addressSpace, areaName, wiring,
1591 		protection);
1592 	if (area == NULL)
1593 		return B_NO_MEMORY;
1594 
1595 	status_t status;
1596 
1597 	// if this is a private map, we need to create a new cache
1598 	// to handle the private copies of pages as they are written to
1599 	vm_cache* sourceCache = cache;
1600 	if (mapping == REGION_PRIVATE_MAP) {
1601 		vm_cache* newCache;
1602 
1603 		// create an anonymous cache
1604 		status = VMCacheFactory::CreateAnonymousCache(newCache,
1605 			(protection & B_STACK_AREA) != 0, 0, USER_STACK_GUARD_PAGES, true);
1606 		if (status != B_OK)
1607 			goto err1;
1608 
1609 		newCache->Lock();
1610 		newCache->temporary = 1;
1611 		newCache->scan_skip = cache->scan_skip;
1612 		newCache->virtual_base = offset;
1613 		newCache->virtual_end = offset + size;
1614 
1615 		cache->AddConsumer(newCache);
1616 
1617 		cache = newCache;
1618 	}
1619 
1620 	status = cache->SetMinimalCommitment(size);
1621 	if (status != B_OK)
1622 		goto err2;
1623 
1624 	// check to see if this address space has entered DELETE state
1625 	if (addressSpace->state == VM_ASPACE_STATE_DELETION) {
1626 		// okay, someone is trying to delete this address space now, so we can't
1627 		// insert the area, so back out
1628 		status = B_BAD_TEAM_ID;
1629 		goto err2;
1630 	}
1631 
1632 	if (addressSpec == B_EXACT_ADDRESS && unmapAddressRange) {
1633 		status = unmap_address_range(addressSpace, (addr_t)*_virtualAddress,
1634 			size, kernel);
1635 		if (status != B_OK)
1636 			goto err2;
1637 	}
1638 
1639 	status = insert_area(addressSpace, _virtualAddress, addressSpec, size, area);
1640 	if (status < B_OK)
1641 		goto err2;
1642 
1643 	// attach the cache to the area
1644 	area->cache = cache;
1645 	area->cache_offset = offset;
1646 
1647 	// point the cache back to the area
1648 	cache->InsertAreaLocked(area);
1649 	if (mapping == REGION_PRIVATE_MAP)
1650 		cache->Unlock();
1651 
1652 	// insert the area in the global area hash table
1653 	rw_lock_write_lock(&sAreaHashLock);
1654 	hash_insert(sAreaHash, area);
1655 	rw_lock_write_unlock(&sAreaHashLock);
1656 
1657 	// grab a ref to the address space (the area holds this)
1658 	atomic_add(&addressSpace->ref_count, 1);
1659 
1660 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1661 //		cache, sourceCache, areaName, area);
1662 
1663 	*_area = area;
1664 	return B_OK;
1665 
1666 err2:
1667 	if (mapping == REGION_PRIVATE_MAP) {
1668 		// We created this cache, so we must delete it again. Note, that we
1669 		// need to temporarily unlock the source cache or we'll otherwise
1670 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1671 		sourceCache->Unlock();
1672 		cache->ReleaseRefAndUnlock();
1673 		sourceCache->Lock();
1674 	}
1675 err1:
1676 	free(area->name);
1677 	free(area);
1678 	return status;
1679 }
1680 
1681 
1682 status_t
1683 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1684 {
1685 	AddressSpaceWriteLocker locker(team);
1686 	if (!locker.IsLocked())
1687 		return B_BAD_TEAM_ID;
1688 
1689 	// check to see if this address space has entered DELETE state
1690 	if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) {
1691 		// okay, someone is trying to delete this address space now, so we can't
1692 		// insert the area, so back out
1693 		return B_BAD_TEAM_ID;
1694 	}
1695 
1696 	// search area list and remove any matching reserved ranges
1697 
1698 	vm_area* area = locker.AddressSpace()->areas;
1699 	vm_area* last = NULL;
1700 	while (area) {
1701 		// the area must be completely part of the reserved range
1702 		if (area->id == RESERVED_AREA_ID && area->base >= (addr_t)address
1703 			&& area->base + area->size <= (addr_t)address + size) {
1704 			// remove reserved range
1705 			vm_area* reserved = area;
1706 			if (last)
1707 				last->address_space_next = reserved->address_space_next;
1708 			else
1709 				locker.AddressSpace()->areas = reserved->address_space_next;
1710 
1711 			area = reserved->address_space_next;
1712 			vm_put_address_space(locker.AddressSpace());
1713 			free(reserved);
1714 			continue;
1715 		}
1716 
1717 		last = area;
1718 		area = area->address_space_next;
1719 	}
1720 
1721 	return B_OK;
1722 }
1723 
1724 
1725 status_t
1726 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1727 	addr_t size, uint32 flags)
1728 {
1729 	if (size == 0)
1730 		return B_BAD_VALUE;
1731 
1732 	AddressSpaceWriteLocker locker(team);
1733 	if (!locker.IsLocked())
1734 		return B_BAD_TEAM_ID;
1735 
1736 	// check to see if this address space has entered DELETE state
1737 	if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) {
1738 		// okay, someone is trying to delete this address space now, so we
1739 		// can't insert the area, let's back out
1740 		return B_BAD_TEAM_ID;
1741 	}
1742 
1743 	vm_area* area = create_reserved_area_struct(locker.AddressSpace(), flags);
1744 	if (area == NULL)
1745 		return B_NO_MEMORY;
1746 
1747 	status_t status = insert_area(locker.AddressSpace(), _address, addressSpec,
1748 		size, area);
1749 	if (status < B_OK) {
1750 		free(area);
1751 		return status;
1752 	}
1753 
1754 	// the area is now reserved!
1755 
1756 	area->cache_offset = area->base;
1757 		// we cache the original base address here
1758 
1759 	atomic_add(&locker.AddressSpace()->ref_count, 1);
1760 	return B_OK;
1761 }
1762 
1763 
1764 area_id
1765 vm_create_anonymous_area(team_id team, const char* name, void** address,
1766 	uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection,
1767 	uint32 flags, bool kernel)
1768 {
1769 	vm_area* area;
1770 	vm_cache* cache;
1771 	vm_page* page = NULL;
1772 	bool isStack = (protection & B_STACK_AREA) != 0;
1773 	page_num_t guardPages;
1774 	bool canOvercommit = false;
1775 	addr_t physicalBase = 0;
1776 
1777 	TRACE(("create_anonymous_area [%d] %s: size 0x%lx\n", team, name, size));
1778 
1779 	size = PAGE_ALIGN(size);
1780 
1781 	if (size == 0)
1782 		return B_BAD_VALUE;
1783 	if (!arch_vm_supports_protection(protection))
1784 		return B_NOT_SUPPORTED;
1785 
1786 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1787 		canOvercommit = true;
1788 
1789 #ifdef DEBUG_KERNEL_STACKS
1790 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1791 		isStack = true;
1792 #endif
1793 
1794 	// check parameters
1795 	switch (addressSpec) {
1796 		case B_ANY_ADDRESS:
1797 		case B_EXACT_ADDRESS:
1798 		case B_BASE_ADDRESS:
1799 		case B_ANY_KERNEL_ADDRESS:
1800 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1801 			break;
1802 		case B_PHYSICAL_BASE_ADDRESS:
1803 			physicalBase = (addr_t)*address;
1804 			addressSpec = B_ANY_KERNEL_ADDRESS;
1805 			break;
1806 
1807 		default:
1808 			return B_BAD_VALUE;
1809 	}
1810 
1811 	bool doReserveMemory = false;
1812 	switch (wiring) {
1813 		case B_NO_LOCK:
1814 			break;
1815 		case B_FULL_LOCK:
1816 		case B_LAZY_LOCK:
1817 		case B_CONTIGUOUS:
1818 			doReserveMemory = true;
1819 			break;
1820 		case B_ALREADY_WIRED:
1821 			break;
1822 		case B_LOMEM:
1823 		//case B_SLOWMEM:
1824 			dprintf("B_LOMEM/SLOWMEM is not yet supported!\n");
1825 			wiring = B_FULL_LOCK;
1826 			doReserveMemory = true;
1827 			break;
1828 		default:
1829 			return B_BAD_VALUE;
1830 	}
1831 
1832 	// For full lock or contiguous areas we're also going to map the pages and
1833 	// thus need to reserve pages for the mapping backend upfront.
1834 	addr_t reservedMapPages = 0;
1835 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1836 		AddressSpaceWriteLocker locker;
1837 		status_t status = locker.SetTo(team);
1838 		if (status != B_OK)
1839 			return status;
1840 
1841 		vm_translation_map* map = &locker.AddressSpace()->translation_map;
1842 		reservedMapPages = map->ops->map_max_pages_need(map, 0, size - 1);
1843 	}
1844 
1845 	// Reserve memory before acquiring the address space lock. This reduces the
1846 	// chances of failure, since while holding the write lock to the address
1847 	// space (if it is the kernel address space that is), the low memory handler
1848 	// won't be able to free anything for us.
1849 	addr_t reservedMemory = 0;
1850 	if (doReserveMemory) {
1851 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1852 		if (vm_try_reserve_memory(size, timeout) != B_OK)
1853 			return B_NO_MEMORY;
1854 		reservedMemory = size;
1855 		// TODO: We don't reserve the memory for the pages for the page
1856 		// directories/tables. We actually need to do since we currently don't
1857 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1858 		// there are actually less physical pages than there should be, which
1859 		// can get the VM into trouble in low memory situations.
1860 	}
1861 
1862 	AddressSpaceWriteLocker locker;
1863 	vm_address_space* addressSpace;
1864 	status_t status;
1865 
1866 	// For full lock areas reserve the pages before locking the address
1867 	// space. E.g. block caches can't release their memory while we hold the
1868 	// address space lock.
1869 	page_num_t reservedPages = reservedMapPages;
1870 	if (wiring == B_FULL_LOCK)
1871 		reservedPages += size / B_PAGE_SIZE;
1872 	if (reservedPages > 0) {
1873 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1874 			if (!vm_page_try_reserve_pages(reservedPages)) {
1875 				reservedPages = 0;
1876 				status = B_WOULD_BLOCK;
1877 				goto err0;
1878 			}
1879 		} else
1880 			vm_page_reserve_pages(reservedPages);
1881 	}
1882 
1883 	status = locker.SetTo(team);
1884 	if (status != B_OK)
1885 		goto err0;
1886 
1887 	addressSpace = locker.AddressSpace();
1888 
1889 	if (wiring == B_CONTIGUOUS) {
1890 		// we try to allocate the page run here upfront as this may easily
1891 		// fail for obvious reasons
1892 		page = vm_page_allocate_page_run(PAGE_STATE_CLEAR, physicalBase,
1893 			size / B_PAGE_SIZE);
1894 		if (page == NULL) {
1895 			status = B_NO_MEMORY;
1896 			goto err0;
1897 		}
1898 	}
1899 
1900 	// create an anonymous cache
1901 	// if it's a stack, make sure that two pages are available at least
1902 	guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0
1903 		? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0;
1904 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1905 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1906 		wiring == B_NO_LOCK);
1907 	if (status != B_OK)
1908 		goto err1;
1909 
1910 	cache->temporary = 1;
1911 	cache->virtual_end = size;
1912 	cache->committed_size = reservedMemory;
1913 		// TODO: This should be done via a method.
1914 	reservedMemory = 0;
1915 
1916 	switch (wiring) {
1917 		case B_LAZY_LOCK:
1918 		case B_FULL_LOCK:
1919 		case B_CONTIGUOUS:
1920 		case B_ALREADY_WIRED:
1921 			cache->scan_skip = 1;
1922 			break;
1923 		case B_NO_LOCK:
1924 			cache->scan_skip = 0;
1925 			break;
1926 	}
1927 
1928 	cache->Lock();
1929 
1930 	status = map_backing_store(addressSpace, cache, address, 0, size,
1931 		addressSpec, wiring, protection, REGION_NO_PRIVATE_MAP, &area, name,
1932 		(flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0, kernel);
1933 
1934 	if (status < B_OK) {
1935 		cache->ReleaseRefAndUnlock();
1936 		goto err1;
1937 	}
1938 
1939 	locker.DegradeToReadLock();
1940 
1941 	switch (wiring) {
1942 		case B_NO_LOCK:
1943 		case B_LAZY_LOCK:
1944 			// do nothing - the pages are mapped in as needed
1945 			break;
1946 
1947 		case B_FULL_LOCK:
1948 		{
1949 			// Allocate and map all pages for this area
1950 
1951 			off_t offset = 0;
1952 			for (addr_t address = area->base;
1953 					address < area->base + (area->size - 1);
1954 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1955 #ifdef DEBUG_KERNEL_STACKS
1956 #	ifdef STACK_GROWS_DOWNWARDS
1957 				if (isStack && address < area->base + KERNEL_STACK_GUARD_PAGES
1958 						* B_PAGE_SIZE)
1959 #	else
1960 				if (isStack && address >= area->base + area->size
1961 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1962 #	endif
1963 					continue;
1964 #endif
1965 				vm_page* page = vm_page_allocate_page(PAGE_STATE_CLEAR, true);
1966 				cache->InsertPage(page, offset);
1967 				vm_map_page(area, page, address, protection);
1968 
1969 				// Periodically unreserve pages we've already allocated, so that
1970 				// we don't unnecessarily increase the pressure on the VM.
1971 				if (offset > 0 && offset % (128 * B_PAGE_SIZE) == 0) {
1972 					page_num_t toUnreserve = 128;
1973 					vm_page_unreserve_pages(toUnreserve);
1974 					reservedPages -= toUnreserve;
1975 				}
1976 			}
1977 
1978 			break;
1979 		}
1980 
1981 		case B_ALREADY_WIRED:
1982 		{
1983 			// The pages should already be mapped. This is only really useful
1984 			// during boot time. Find the appropriate vm_page objects and stick
1985 			// them in the cache object.
1986 			vm_translation_map* map = &addressSpace->translation_map;
1987 			off_t offset = 0;
1988 
1989 			if (!gKernelStartup)
1990 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1991 
1992 			map->ops->lock(map);
1993 
1994 			for (addr_t virtualAddress = area->base; virtualAddress < area->base
1995 					+ (area->size - 1); virtualAddress += B_PAGE_SIZE,
1996 					offset += B_PAGE_SIZE) {
1997 				addr_t physicalAddress;
1998 				uint32 flags;
1999 				status = map->ops->query(map, virtualAddress,
2000 					&physicalAddress, &flags);
2001 				if (status < B_OK) {
2002 					panic("looking up mapping failed for va 0x%lx\n",
2003 						virtualAddress);
2004 				}
2005 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
2006 				if (page == NULL) {
2007 					panic("looking up page failed for pa 0x%lx\n",
2008 						physicalAddress);
2009 				}
2010 
2011 				increment_page_wired_count(page);
2012 				vm_page_set_state(page, PAGE_STATE_WIRED);
2013 				cache->InsertPage(page, offset);
2014 			}
2015 
2016 			map->ops->unlock(map);
2017 			break;
2018 		}
2019 
2020 		case B_CONTIGUOUS:
2021 		{
2022 			// We have already allocated our continuous pages run, so we can now
2023 			// just map them in the address space
2024 			vm_translation_map* map = &addressSpace->translation_map;
2025 			addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE;
2026 			addr_t virtualAddress = area->base;
2027 			off_t offset = 0;
2028 
2029 			map->ops->lock(map);
2030 
2031 			for (virtualAddress = area->base; virtualAddress < area->base
2032 					+ (area->size - 1); virtualAddress += B_PAGE_SIZE,
2033 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
2034 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
2035 				if (page == NULL)
2036 					panic("couldn't lookup physical page just allocated\n");
2037 
2038 				status = map->ops->map(map, virtualAddress, physicalAddress,
2039 					protection);
2040 				if (status < B_OK)
2041 					panic("couldn't map physical page in page run\n");
2042 
2043 				increment_page_wired_count(page);
2044 				vm_page_set_state(page, PAGE_STATE_WIRED);
2045 				cache->InsertPage(page, offset);
2046 			}
2047 
2048 			map->ops->unlock(map);
2049 			break;
2050 		}
2051 
2052 		default:
2053 			break;
2054 	}
2055 
2056 	cache->Unlock();
2057 
2058 	if (reservedPages > 0)
2059 		vm_page_unreserve_pages(reservedPages);
2060 
2061 	TRACE(("vm_create_anonymous_area: done\n"));
2062 
2063 	area->cache_type = CACHE_TYPE_RAM;
2064 	return area->id;
2065 
2066 err1:
2067 	if (wiring == B_CONTIGUOUS) {
2068 		// we had reserved the area space upfront...
2069 		addr_t pageNumber = page->physical_page_number;
2070 		int32 i;
2071 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
2072 			page = vm_lookup_page(pageNumber);
2073 			if (page == NULL)
2074 				panic("couldn't lookup physical page just allocated\n");
2075 
2076 			vm_page_set_state(page, PAGE_STATE_FREE);
2077 		}
2078 	}
2079 
2080 err0:
2081 	if (reservedPages > 0)
2082 		vm_page_unreserve_pages(reservedPages);
2083 	if (reservedMemory > 0)
2084 		vm_unreserve_memory(reservedMemory);
2085 
2086 	return status;
2087 }
2088 
2089 
2090 area_id
2091 vm_map_physical_memory(team_id team, const char* name, void** _address,
2092 	uint32 addressSpec, addr_t size, uint32 protection, addr_t physicalAddress)
2093 {
2094 	vm_area* area;
2095 	vm_cache* cache;
2096 	addr_t mapOffset;
2097 
2098 	TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, "
2099 		"spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team,
2100 		name, _address, addressSpec, size, protection, physicalAddress));
2101 
2102 	if (!arch_vm_supports_protection(protection))
2103 		return B_NOT_SUPPORTED;
2104 
2105 	AddressSpaceWriteLocker locker(team);
2106 	if (!locker.IsLocked())
2107 		return B_BAD_TEAM_ID;
2108 
2109 	// if the physical address is somewhat inside a page,
2110 	// move the actual area down to align on a page boundary
2111 	mapOffset = physicalAddress % B_PAGE_SIZE;
2112 	size += mapOffset;
2113 	physicalAddress -= mapOffset;
2114 
2115 	size = PAGE_ALIGN(size);
2116 
2117 	// create an device cache
2118 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
2119 	if (status != B_OK)
2120 		return status;
2121 
2122 	// tell the page scanner to skip over this area, it's pages are special
2123 	cache->scan_skip = 1;
2124 	cache->virtual_end = size;
2125 
2126 	cache->Lock();
2127 
2128 	status = map_backing_store(locker.AddressSpace(), cache, _address,
2129 		0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection,
2130 		REGION_NO_PRIVATE_MAP, &area, name, false, true);
2131 
2132 	if (status < B_OK)
2133 		cache->ReleaseRefLocked();
2134 
2135 	cache->Unlock();
2136 
2137 	if (status >= B_OK && (addressSpec & B_MTR_MASK) != 0) {
2138 		// set requested memory type
2139 		status = arch_vm_set_memory_type(area, physicalAddress,
2140 			addressSpec & B_MTR_MASK);
2141 		if (status < B_OK)
2142 			delete_area(locker.AddressSpace(), area);
2143 	}
2144 
2145 	if (status >= B_OK) {
2146 		// make sure our area is mapped in completely
2147 
2148 		vm_translation_map* map = &locker.AddressSpace()->translation_map;
2149 		size_t reservePages = map->ops->map_max_pages_need(map, area->base,
2150 			area->base + (size - 1));
2151 
2152 		vm_page_reserve_pages(reservePages);
2153 		map->ops->lock(map);
2154 
2155 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
2156 			map->ops->map(map, area->base + offset, physicalAddress + offset,
2157 				protection);
2158 		}
2159 
2160 		map->ops->unlock(map);
2161 		vm_page_unreserve_pages(reservePages);
2162 	}
2163 
2164 	if (status < B_OK)
2165 		return status;
2166 
2167 	// modify the pointer returned to be offset back into the new area
2168 	// the same way the physical address in was offset
2169 	*_address = (void*)((addr_t)*_address + mapOffset);
2170 
2171 	area->cache_type = CACHE_TYPE_DEVICE;
2172 	return area->id;
2173 }
2174 
2175 
2176 area_id
2177 vm_create_null_area(team_id team, const char* name, void** address,
2178 	uint32 addressSpec, addr_t size)
2179 {
2180 	vm_area* area;
2181 	vm_cache* cache;
2182 	status_t status;
2183 
2184 	AddressSpaceWriteLocker locker(team);
2185 	if (!locker.IsLocked())
2186 		return B_BAD_TEAM_ID;
2187 
2188 	size = PAGE_ALIGN(size);
2189 
2190 	// create an null cache
2191 	status = VMCacheFactory::CreateNullCache(cache);
2192 	if (status != B_OK)
2193 		return status;
2194 
2195 	// tell the page scanner to skip over this area, no pages will be mapped here
2196 	cache->scan_skip = 1;
2197 	cache->virtual_end = size;
2198 
2199 	cache->Lock();
2200 
2201 	status = map_backing_store(locker.AddressSpace(), cache, address, 0, size,
2202 		addressSpec, 0, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, &area, name,
2203 		false, true);
2204 
2205 	if (status < B_OK) {
2206 		cache->ReleaseRefAndUnlock();
2207 		return status;
2208 	}
2209 
2210 	cache->Unlock();
2211 
2212 	area->cache_type = CACHE_TYPE_NULL;
2213 	return area->id;
2214 }
2215 
2216 
2217 /*!	Creates the vnode cache for the specified \a vnode.
2218 	The vnode has to be marked busy when calling this function.
2219 */
2220 status_t
2221 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
2222 {
2223 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
2224 }
2225 
2226 
2227 /*!	\a cache must be locked. The area's address space must be read-locked.
2228 */
2229 static void
2230 pre_map_area_pages(vm_area* area, VMCache* cache)
2231 {
2232 	addr_t baseAddress = area->base;
2233 	addr_t cacheOffset = area->cache_offset;
2234 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
2235 	page_num_t endPage = firstPage + area->size / B_PAGE_SIZE;
2236 
2237 	for (VMCachePagesTree::Iterator it
2238 				= cache->pages.GetIterator(firstPage, true, true);
2239 			vm_page* page = it.Next();) {
2240 		if (page->cache_offset >= endPage)
2241 			break;
2242 
2243 		// skip inactive pages
2244 		if (page->state == PAGE_STATE_BUSY || page->usage_count <= 0)
2245 			continue;
2246 
2247 		vm_map_page(area, page,
2248 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
2249 			B_READ_AREA | B_KERNEL_READ_AREA);
2250 	}
2251 }
2252 
2253 
2254 /*!	Will map the file specified by \a fd to an area in memory.
2255 	The file will be mirrored beginning at the specified \a offset. The
2256 	\a offset and \a size arguments have to be page aligned.
2257 */
2258 static area_id
2259 _vm_map_file(team_id team, const char* name, void** _address, uint32 addressSpec,
2260 	size_t size, uint32 protection, uint32 mapping, int fd, off_t offset,
2261 	bool kernel)
2262 {
2263 	// TODO: for binary files, we want to make sure that they get the
2264 	//	copy of a file at a given time, ie. later changes should not
2265 	//	make it into the mapped copy -- this will need quite some changes
2266 	//	to be done in a nice way
2267 	TRACE(("_vm_map_file(fd = %d, offset = %Ld, size = %lu, mapping %ld)\n",
2268 		fd, offset, size, mapping));
2269 
2270 	offset = ROUNDOWN(offset, B_PAGE_SIZE);
2271 	size = PAGE_ALIGN(size);
2272 
2273 	if (mapping == REGION_NO_PRIVATE_MAP)
2274 		protection |= B_SHARED_AREA;
2275 
2276 	if (fd < 0) {
2277 		uint32 flags = addressSpec == B_EXACT_ADDRESS
2278 			? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
2279 		return vm_create_anonymous_area(team, name, _address, addressSpec, size,
2280 			B_NO_LOCK, protection, flags, kernel);
2281 	}
2282 
2283 	// get the open flags of the FD
2284 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2285 	if (descriptor == NULL)
2286 		return EBADF;
2287 	int32 openMode = descriptor->open_mode;
2288 	put_fd(descriptor);
2289 
2290 	// The FD must open for reading at any rate. For shared mapping with write
2291 	// access, additionally the FD must be open for writing.
2292 	if ((openMode & O_ACCMODE) == O_WRONLY
2293 		|| (mapping == REGION_NO_PRIVATE_MAP
2294 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2295 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2296 		return EACCES;
2297 	}
2298 
2299 	// get the vnode for the object, this also grabs a ref to it
2300 	struct vnode* vnode = NULL;
2301 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2302 	if (status < B_OK)
2303 		return status;
2304 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
2305 
2306 	// If we're going to pre-map pages, we need to reserve the pages needed by
2307 	// the mapping backend upfront.
2308 	page_num_t reservedPreMapPages = 0;
2309 	if ((protection & B_READ_AREA) != 0) {
2310 		AddressSpaceWriteLocker locker;
2311 		status = locker.SetTo(team);
2312 		if (status != B_OK)
2313 			return status;
2314 
2315 		vm_translation_map* map = &locker.AddressSpace()->translation_map;
2316 		reservedPreMapPages = map->ops->map_max_pages_need(map, 0, size - 1);
2317 
2318 		locker.Unlock();
2319 
2320 		vm_page_reserve_pages(reservedPreMapPages);
2321 	}
2322 
2323 	struct PageUnreserver {
2324 		PageUnreserver(page_num_t count)
2325 			: fCount(count)
2326 		{
2327 		}
2328 
2329 		~PageUnreserver()
2330 		{
2331 			if (fCount > 0)
2332 				vm_page_unreserve_pages(fCount);
2333 		}
2334 
2335 		page_num_t	fCount;
2336 	} pageUnreserver(reservedPreMapPages);
2337 
2338 	AddressSpaceWriteLocker locker(team);
2339 	if (!locker.IsLocked())
2340 		return B_BAD_TEAM_ID;
2341 
2342 	// TODO: this only works for file systems that use the file cache
2343 	vm_cache* cache;
2344 	status = vfs_get_vnode_cache(vnode, &cache, false);
2345 	if (status < B_OK)
2346 		return status;
2347 
2348 	cache->Lock();
2349 
2350 	vm_area* area;
2351 	status = map_backing_store(locker.AddressSpace(), cache, _address,
2352 		offset, size, addressSpec, 0, protection, mapping, &area, name,
2353 		addressSpec == B_EXACT_ADDRESS, kernel);
2354 
2355 	if (status < B_OK || mapping == REGION_PRIVATE_MAP) {
2356 		// map_backing_store() cannot know we no longer need the ref
2357 		cache->ReleaseRefLocked();
2358 	}
2359 
2360 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2361 		pre_map_area_pages(area, cache);
2362 
2363 	cache->Unlock();
2364 
2365 	if (status < B_OK)
2366 		return status;
2367 
2368 	area->cache_type = CACHE_TYPE_VNODE;
2369 	return area->id;
2370 }
2371 
2372 
2373 area_id
2374 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2375 	addr_t size, uint32 protection, uint32 mapping, int fd, off_t offset)
2376 {
2377 	if (!arch_vm_supports_protection(protection))
2378 		return B_NOT_SUPPORTED;
2379 
2380 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2381 		mapping, fd, offset, true);
2382 }
2383 
2384 
2385 vm_cache*
2386 vm_area_get_locked_cache(vm_area* area)
2387 {
2388 	mutex_lock(&sAreaCacheLock);
2389 
2390 	while (true) {
2391 		vm_cache* cache = area->cache;
2392 
2393 		if (!cache->SwitchLock(&sAreaCacheLock)) {
2394 			// cache has been deleted
2395 			mutex_lock(&sAreaCacheLock);
2396 			continue;
2397 		}
2398 
2399 		mutex_lock(&sAreaCacheLock);
2400 
2401 		if (cache == area->cache) {
2402 			cache->AcquireRefLocked();
2403 			mutex_unlock(&sAreaCacheLock);
2404 			return cache;
2405 		}
2406 
2407 		// the cache changed in the meantime
2408 		cache->Unlock();
2409 	}
2410 }
2411 
2412 
2413 void
2414 vm_area_put_locked_cache(vm_cache* cache)
2415 {
2416 	cache->ReleaseRefAndUnlock();
2417 }
2418 
2419 
2420 area_id
2421 vm_clone_area(team_id team, const char* name, void** address,
2422 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2423 	bool kernel)
2424 {
2425 	vm_area* newArea = NULL;
2426 	vm_area* sourceArea;
2427 
2428 	// Check whether the source area exists and is cloneable. If so, mark it
2429 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2430 	{
2431 		AddressSpaceWriteLocker locker;
2432 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2433 		if (status != B_OK)
2434 			return status;
2435 
2436 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2437 			return B_NOT_ALLOWED;
2438 
2439 		sourceArea->protection |= B_SHARED_AREA;
2440 		protection |= B_SHARED_AREA;
2441 	}
2442 
2443 	// Now lock both address spaces and actually do the cloning.
2444 
2445 	MultiAddressSpaceLocker locker;
2446 	vm_address_space* sourceAddressSpace;
2447 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2448 	if (status != B_OK)
2449 		return status;
2450 
2451 	vm_address_space* targetAddressSpace;
2452 	status = locker.AddTeam(team, true, &targetAddressSpace);
2453 	if (status != B_OK)
2454 		return status;
2455 
2456 	status = locker.Lock();
2457 	if (status != B_OK)
2458 		return status;
2459 
2460 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2461 	if (sourceArea == NULL)
2462 		return B_BAD_VALUE;
2463 
2464 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2465 		return B_NOT_ALLOWED;
2466 
2467 	vm_cache* cache = vm_area_get_locked_cache(sourceArea);
2468 
2469 	// TODO: for now, B_USER_CLONEABLE is disabled, until all drivers
2470 	//	have been adapted. Maybe it should be part of the kernel settings,
2471 	//	anyway (so that old drivers can always work).
2472 #if 0
2473 	if (sourceArea->aspace == vm_kernel_address_space()
2474 		&& addressSpace != vm_kernel_address_space()
2475 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
2476 		// kernel areas must not be cloned in userland, unless explicitly
2477 		// declared user-cloneable upon construction
2478 		status = B_NOT_ALLOWED;
2479 	} else
2480 #endif
2481 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
2482 		status = B_NOT_ALLOWED;
2483 	else {
2484 		status = map_backing_store(targetAddressSpace, cache, address,
2485 			sourceArea->cache_offset, sourceArea->size, addressSpec,
2486 			sourceArea->wiring, protection, mapping, &newArea, name, false,
2487 			kernel);
2488 	}
2489 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2490 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2491 		// to create a new cache, and has therefore already acquired a reference
2492 		// to the source cache - but otherwise it has no idea that we need
2493 		// one.
2494 		cache->AcquireRefLocked();
2495 	}
2496 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2497 		// we need to map in everything at this point
2498 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2499 			// we don't have actual pages to map but a physical area
2500 			vm_translation_map* map
2501 				= &sourceArea->address_space->translation_map;
2502 			map->ops->lock(map);
2503 
2504 			addr_t physicalAddress;
2505 			uint32 oldProtection;
2506 			map->ops->query(map, sourceArea->base, &physicalAddress,
2507 				&oldProtection);
2508 
2509 			map->ops->unlock(map);
2510 
2511 			map = &targetAddressSpace->translation_map;
2512 			size_t reservePages = map->ops->map_max_pages_need(map,
2513 				newArea->base, newArea->base + (newArea->size - 1));
2514 
2515 			vm_page_reserve_pages(reservePages);
2516 			map->ops->lock(map);
2517 
2518 			for (addr_t offset = 0; offset < newArea->size;
2519 					offset += B_PAGE_SIZE) {
2520 				map->ops->map(map, newArea->base + offset,
2521 					physicalAddress + offset, protection);
2522 			}
2523 
2524 			map->ops->unlock(map);
2525 			vm_page_unreserve_pages(reservePages);
2526 		} else {
2527 			vm_translation_map* map = &targetAddressSpace->translation_map;
2528 			size_t reservePages = map->ops->map_max_pages_need(map,
2529 				newArea->base, newArea->base + (newArea->size - 1));
2530 			vm_page_reserve_pages(reservePages);
2531 
2532 			// map in all pages from source
2533 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2534 					vm_page* page  = it.Next();) {
2535 				vm_map_page(newArea, page, newArea->base
2536 					+ ((page->cache_offset << PAGE_SHIFT)
2537 					- newArea->cache_offset), protection);
2538 			}
2539 
2540 			vm_page_unreserve_pages(reservePages);
2541 		}
2542 	}
2543 	if (status == B_OK)
2544 		newArea->cache_type = sourceArea->cache_type;
2545 
2546 	vm_area_put_locked_cache(cache);
2547 
2548 	if (status < B_OK)
2549 		return status;
2550 
2551 	return newArea->id;
2552 }
2553 
2554 
2555 //! The address space must be write locked at this point
2556 static void
2557 remove_area_from_address_space(vm_address_space* addressSpace, vm_area* area)
2558 {
2559 	vm_area* temp = addressSpace->areas;
2560 	vm_area* last = NULL;
2561 
2562 	while (temp != NULL) {
2563 		if (area == temp) {
2564 			if (last != NULL) {
2565 				last->address_space_next = temp->address_space_next;
2566 			} else {
2567 				addressSpace->areas = temp->address_space_next;
2568 			}
2569 			addressSpace->change_count++;
2570 			break;
2571 		}
2572 		last = temp;
2573 		temp = temp->address_space_next;
2574 	}
2575 	if (area == addressSpace->area_hint)
2576 		addressSpace->area_hint = NULL;
2577 
2578 	if (temp == NULL)
2579 		panic("vm_area_release_ref: area not found in aspace's area list\n");
2580 }
2581 
2582 
2583 static void
2584 delete_area(vm_address_space* addressSpace, vm_area* area)
2585 {
2586 	rw_lock_write_lock(&sAreaHashLock);
2587 	hash_remove(sAreaHash, area);
2588 	rw_lock_write_unlock(&sAreaHashLock);
2589 
2590 	// At this point the area is removed from the global hash table, but
2591 	// still exists in the area list.
2592 
2593 	// Unmap the virtual address space the area occupied
2594 	vm_unmap_pages(area, area->base, area->size, !area->cache->temporary);
2595 
2596 	if (!area->cache->temporary)
2597 		area->cache->WriteModified();
2598 
2599 	arch_vm_unset_memory_type(area);
2600 	remove_area_from_address_space(addressSpace, area);
2601 	vm_put_address_space(addressSpace);
2602 
2603 	area->cache->RemoveArea(area);
2604 	area->cache->ReleaseRef();
2605 
2606 	free(area->page_protections);
2607 	free(area->name);
2608 	free(area);
2609 }
2610 
2611 
2612 status_t
2613 vm_delete_area(team_id team, area_id id, bool kernel)
2614 {
2615 	TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id));
2616 
2617 	AddressSpaceWriteLocker locker;
2618 	vm_area* area;
2619 	status_t status = locker.SetFromArea(team, id, area);
2620 	if (status < B_OK)
2621 		return status;
2622 
2623 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2624 		return B_NOT_ALLOWED;
2625 
2626 	delete_area(locker.AddressSpace(), area);
2627 	return B_OK;
2628 }
2629 
2630 
2631 /*!	Creates a new cache on top of given cache, moves all areas from
2632 	the old cache to the new one, and changes the protection of all affected
2633 	areas' pages to read-only.
2634 	Preconditions:
2635 	- The given cache must be locked.
2636 	- All of the cache's areas' address spaces must be read locked.
2637 	- All of the cache's areas must have a clear \c no_cache_change flags.
2638 */
2639 static status_t
2640 vm_copy_on_write_area(vm_cache* lowerCache)
2641 {
2642 	vm_cache* upperCache;
2643 
2644 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2645 
2646 	// We need to separate the cache from its areas. The cache goes one level
2647 	// deeper and we create a new cache inbetween.
2648 
2649 	// create an anonymous cache
2650 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2651 		0, true);
2652 	if (status != B_OK)
2653 		return status;
2654 
2655 	upperCache->Lock();
2656 
2657 	upperCache->temporary = 1;
2658 	upperCache->scan_skip = lowerCache->scan_skip;
2659 	upperCache->virtual_base = lowerCache->virtual_base;
2660 	upperCache->virtual_end = lowerCache->virtual_end;
2661 
2662 	// transfer the lower cache areas to the upper cache
2663 	mutex_lock(&sAreaCacheLock);
2664 
2665 	upperCache->areas = lowerCache->areas;
2666 	lowerCache->areas = NULL;
2667 
2668 	for (vm_area* tempArea = upperCache->areas; tempArea != NULL;
2669 			tempArea = tempArea->cache_next) {
2670 		ASSERT(!tempArea->no_cache_change);
2671 
2672 		tempArea->cache = upperCache;
2673 		upperCache->AcquireRefLocked();
2674 		lowerCache->ReleaseRefLocked();
2675 	}
2676 
2677 	mutex_unlock(&sAreaCacheLock);
2678 
2679 	lowerCache->AddConsumer(upperCache);
2680 
2681 	// We now need to remap all pages from all of the cache's areas read-only, so
2682 	// that a copy will be created on next write access
2683 
2684 	for (vm_area* tempArea = upperCache->areas; tempArea != NULL;
2685 			tempArea = tempArea->cache_next) {
2686 		// The area must be readable in the same way it was previously writable
2687 		uint32 protection = B_KERNEL_READ_AREA;
2688 		if ((tempArea->protection & B_READ_AREA) != 0)
2689 			protection |= B_READ_AREA;
2690 
2691 		vm_translation_map* map = &tempArea->address_space->translation_map;
2692 		map->ops->lock(map);
2693 		map->ops->protect(map, tempArea->base,
2694 			tempArea->base - 1 + tempArea->size, protection);
2695 		map->ops->unlock(map);
2696 	}
2697 
2698 	vm_area_put_locked_cache(upperCache);
2699 
2700 	return B_OK;
2701 }
2702 
2703 
2704 area_id
2705 vm_copy_area(team_id team, const char* name, void** _address,
2706 	uint32 addressSpec, uint32 protection, area_id sourceID)
2707 {
2708 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2709 
2710 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2711 		// set the same protection for the kernel as for userland
2712 		protection |= B_KERNEL_READ_AREA;
2713 		if (writableCopy)
2714 			protection |= B_KERNEL_WRITE_AREA;
2715 	}
2716 
2717 	// Do the locking: target address space, all address spaces associated with
2718 	// the source cache, and the cache itself.
2719 	MultiAddressSpaceLocker locker;
2720 	vm_address_space* targetAddressSpace;
2721 	vm_cache* cache;
2722 	vm_area* source;
2723 	status_t status = locker.AddTeam(team, true, &targetAddressSpace);
2724 	if (status == B_OK) {
2725 		status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2726 			&cache, true);
2727 	}
2728 	if (status != B_OK)
2729 		return status;
2730 
2731 	AreaCacheLocker cacheLocker(cache);	// already locked
2732 
2733 	if (addressSpec == B_CLONE_ADDRESS) {
2734 		addressSpec = B_EXACT_ADDRESS;
2735 		*_address = (void*)source->base;
2736 	}
2737 
2738 	bool sharedArea = (source->protection & B_SHARED_AREA) != 0;
2739 
2740 	// First, create a cache on top of the source area, respectively use the
2741 	// existing one, if this is a shared area.
2742 
2743 	vm_area* target;
2744 	status = map_backing_store(targetAddressSpace, cache, _address,
2745 		source->cache_offset, source->size, addressSpec, source->wiring,
2746 		protection, sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2747 		&target, name, false, true);
2748 	if (status < B_OK)
2749 		return status;
2750 
2751 	if (sharedArea) {
2752 		// The new area uses the old area's cache, but map_backing_store()
2753 		// hasn't acquired a ref. So we have to do that now.
2754 		cache->AcquireRefLocked();
2755 	}
2756 
2757 	// If the source area is writable, we need to move it one layer up as well
2758 
2759 	if (!sharedArea) {
2760 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2761 			// TODO: do something more useful if this fails!
2762 			if (vm_copy_on_write_area(cache) < B_OK)
2763 				panic("vm_copy_on_write_area() failed!\n");
2764 		}
2765 	}
2766 
2767 	// we return the ID of the newly created area
2768 	return target->id;
2769 }
2770 
2771 
2772 //! You need to hold the cache lock when calling this function
2773 static int32
2774 count_writable_areas(vm_cache* cache, vm_area* ignoreArea)
2775 {
2776 	struct vm_area* area = cache->areas;
2777 	uint32 count = 0;
2778 
2779 	for (; area != NULL; area = area->cache_next) {
2780 		if (area != ignoreArea
2781 			&& (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0)
2782 			count++;
2783 	}
2784 
2785 	return count;
2786 }
2787 
2788 
2789 static status_t
2790 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2791 	bool kernel)
2792 {
2793 	TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = "
2794 		"%#lx)\n", team, areaID, newProtection));
2795 
2796 	if (!arch_vm_supports_protection(newProtection))
2797 		return B_NOT_SUPPORTED;
2798 
2799 	// lock address spaces and cache
2800 	MultiAddressSpaceLocker locker;
2801 	vm_cache* cache;
2802 	vm_area* area;
2803 	status_t status = locker.AddAreaCacheAndLock(areaID, true, false, area,
2804 		&cache, true);
2805 	AreaCacheLocker cacheLocker(cache);	// already locked
2806 
2807 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2808 		return B_NOT_ALLOWED;
2809 
2810 	if (area->protection == newProtection)
2811 		return B_OK;
2812 
2813 	if (team != vm_kernel_address_space_id()
2814 		&& area->address_space->id != team) {
2815 		// unless you're the kernel, you are only allowed to set
2816 		// the protection of your own areas
2817 		return B_NOT_ALLOWED;
2818 	}
2819 
2820 	bool changePageProtection = true;
2821 
2822 	if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2823 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0) {
2824 		// writable -> !writable
2825 
2826 		if (cache->source != NULL && cache->temporary) {
2827 			if (count_writable_areas(cache, area) == 0) {
2828 				// Since this cache now lives from the pages in its source cache,
2829 				// we can change the cache's commitment to take only those pages
2830 				// into account that really are in this cache.
2831 
2832 				status = cache->Commit(cache->page_count * B_PAGE_SIZE);
2833 
2834 				// TODO: we may be able to join with our source cache, if
2835 				// count == 0
2836 			}
2837 		}
2838 	} else if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0
2839 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
2840 		// !writable -> writable
2841 
2842 		if (!list_is_empty(&cache->consumers)) {
2843 			// There are consumers -- we have to insert a new cache. Fortunately
2844 			// vm_copy_on_write_area() does everything that's needed.
2845 			changePageProtection = false;
2846 			status = vm_copy_on_write_area(cache);
2847 		} else {
2848 			// No consumers, so we don't need to insert a new one.
2849 			if (cache->source != NULL && cache->temporary) {
2850 				// the cache's commitment must contain all possible pages
2851 				status = cache->Commit(cache->virtual_end
2852 					- cache->virtual_base);
2853 			}
2854 
2855 			if (status == B_OK && cache->source != NULL) {
2856 				// There's a source cache, hence we can't just change all pages'
2857 				// protection or we might allow writing into pages belonging to
2858 				// a lower cache.
2859 				changePageProtection = false;
2860 
2861 				struct vm_translation_map* map
2862 					= &area->address_space->translation_map;
2863 				map->ops->lock(map);
2864 
2865 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2866 						vm_page* page = it.Next();) {
2867 					addr_t address = area->base
2868 						+ (page->cache_offset << PAGE_SHIFT);
2869 					map->ops->protect(map, address, address - 1 + B_PAGE_SIZE,
2870 						newProtection);
2871 				}
2872 
2873 				map->ops->unlock(map);
2874 			}
2875 		}
2876 	} else {
2877 		// we don't have anything special to do in all other cases
2878 	}
2879 
2880 	if (status == B_OK) {
2881 		// remap existing pages in this cache
2882 		struct vm_translation_map* map = &area->address_space->translation_map;
2883 
2884 		if (changePageProtection) {
2885 			map->ops->lock(map);
2886 			map->ops->protect(map, area->base, area->base + area->size,
2887 				newProtection);
2888 			map->ops->unlock(map);
2889 		}
2890 
2891 		area->protection = newProtection;
2892 	}
2893 
2894 	return status;
2895 }
2896 
2897 
2898 status_t
2899 vm_get_page_mapping(team_id team, addr_t vaddr, addr_t* paddr)
2900 {
2901 	vm_address_space* addressSpace = vm_get_address_space(team);
2902 	if (addressSpace == NULL)
2903 		return B_BAD_TEAM_ID;
2904 
2905 	uint32 dummyFlags;
2906 	status_t status = addressSpace->translation_map.ops->query(
2907 		&addressSpace->translation_map, vaddr, paddr, &dummyFlags);
2908 
2909 	vm_put_address_space(addressSpace);
2910 	return status;
2911 }
2912 
2913 
2914 static inline addr_t
2915 virtual_page_address(vm_area* area, vm_page* page)
2916 {
2917 	return area->base
2918 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
2919 }
2920 
2921 
2922 bool
2923 vm_test_map_modification(vm_page* page)
2924 {
2925 	MutexLocker locker(sMappingLock);
2926 
2927 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2928 	vm_page_mapping* mapping;
2929 	while ((mapping = iterator.Next()) != NULL) {
2930 		vm_area* area = mapping->area;
2931 		vm_translation_map* map = &area->address_space->translation_map;
2932 
2933 		addr_t physicalAddress;
2934 		uint32 flags;
2935 		map->ops->lock(map);
2936 		map->ops->query(map, virtual_page_address(area, page),
2937 			&physicalAddress, &flags);
2938 		map->ops->unlock(map);
2939 
2940 		if ((flags & PAGE_MODIFIED) != 0)
2941 			return true;
2942 	}
2943 
2944 	return false;
2945 }
2946 
2947 
2948 int32
2949 vm_test_map_activation(vm_page* page, bool* _modified)
2950 {
2951 	int32 activation = 0;
2952 	bool modified = false;
2953 
2954 	MutexLocker locker(sMappingLock);
2955 
2956 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2957 	vm_page_mapping* mapping;
2958 	while ((mapping = iterator.Next()) != NULL) {
2959 		vm_area* area = mapping->area;
2960 		vm_translation_map* map = &area->address_space->translation_map;
2961 
2962 		addr_t physicalAddress;
2963 		uint32 flags;
2964 		map->ops->lock(map);
2965 		map->ops->query(map, virtual_page_address(area, page),
2966 			&physicalAddress, &flags);
2967 		map->ops->unlock(map);
2968 
2969 		if ((flags & PAGE_ACCESSED) != 0)
2970 			activation++;
2971 		if ((flags & PAGE_MODIFIED) != 0)
2972 			modified = true;
2973 	}
2974 
2975 	if (_modified != NULL)
2976 		*_modified = modified;
2977 
2978 	return activation;
2979 }
2980 
2981 
2982 void
2983 vm_clear_map_flags(vm_page* page, uint32 flags)
2984 {
2985 	MutexLocker locker(sMappingLock);
2986 
2987 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2988 	vm_page_mapping* mapping;
2989 	while ((mapping = iterator.Next()) != NULL) {
2990 		vm_area* area = mapping->area;
2991 		vm_translation_map* map = &area->address_space->translation_map;
2992 
2993 		map->ops->lock(map);
2994 		map->ops->clear_flags(map, virtual_page_address(area, page), flags);
2995 		map->ops->unlock(map);
2996 	}
2997 }
2998 
2999 
3000 /*!	Removes all mappings from a page.
3001 	After you've called this function, the page is unmapped from memory.
3002 	The accumulated page flags of all mappings can be found in \a _flags.
3003 */
3004 void
3005 vm_remove_all_page_mappings(vm_page* page, uint32* _flags)
3006 {
3007 	uint32 accumulatedFlags = 0;
3008 	MutexLocker locker(sMappingLock);
3009 
3010 	vm_page_mappings queue;
3011 	queue.MoveFrom(&page->mappings);
3012 
3013 	vm_page_mappings::Iterator iterator = queue.GetIterator();
3014 	vm_page_mapping* mapping;
3015 	while ((mapping = iterator.Next()) != NULL) {
3016 		vm_area* area = mapping->area;
3017 		vm_translation_map* map = &area->address_space->translation_map;
3018 		addr_t physicalAddress;
3019 		uint32 flags;
3020 
3021 		map->ops->lock(map);
3022 		addr_t address = virtual_page_address(area, page);
3023 		map->ops->unmap(map, address, address + (B_PAGE_SIZE - 1));
3024 		map->ops->flush(map);
3025 		map->ops->query(map, address, &physicalAddress, &flags);
3026 		map->ops->unlock(map);
3027 
3028 		area->mappings.Remove(mapping);
3029 
3030 		accumulatedFlags |= flags;
3031 	}
3032 
3033 	if (page->wired_count == 0 && !queue.IsEmpty())
3034 		atomic_add(&gMappedPagesCount, -1);
3035 
3036 	locker.Unlock();
3037 
3038 	// free now unused mappings
3039 
3040 	while ((mapping = queue.RemoveHead()) != NULL) {
3041 		free(mapping);
3042 	}
3043 
3044 	if (_flags != NULL)
3045 		*_flags = accumulatedFlags;
3046 }
3047 
3048 
3049 bool
3050 vm_unmap_page(vm_area* area, addr_t virtualAddress, bool preserveModified)
3051 {
3052 	vm_translation_map* map = &area->address_space->translation_map;
3053 
3054 	map->ops->lock(map);
3055 
3056 	addr_t physicalAddress;
3057 	uint32 flags;
3058 	status_t status = map->ops->query(map, virtualAddress, &physicalAddress,
3059 		&flags);
3060 	if (status < B_OK || (flags & PAGE_PRESENT) == 0) {
3061 		map->ops->unlock(map);
3062 		return false;
3063 	}
3064 	vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3065 	if (page == NULL && area->cache_type != CACHE_TYPE_DEVICE) {
3066 		panic("area %p looking up page failed for pa 0x%lx\n", area,
3067 			physicalAddress);
3068 	}
3069 
3070 	if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE)
3071 		decrement_page_wired_count(page);
3072 
3073 	map->ops->unmap(map, virtualAddress, virtualAddress + B_PAGE_SIZE - 1);
3074 
3075 	if (preserveModified) {
3076 		map->ops->flush(map);
3077 
3078 		status = map->ops->query(map, virtualAddress, &physicalAddress, &flags);
3079 		if ((flags & PAGE_MODIFIED) != 0 && page->state != PAGE_STATE_MODIFIED)
3080 			vm_page_set_state(page, PAGE_STATE_MODIFIED);
3081 	}
3082 
3083 	map->ops->unlock(map);
3084 
3085 	if (area->wiring == B_NO_LOCK) {
3086 		vm_page_mapping* mapping;
3087 
3088 		mutex_lock(&sMappingLock);
3089 		map->ops->lock(map);
3090 
3091 		vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3092 		while (iterator.HasNext()) {
3093 			mapping = iterator.Next();
3094 
3095 			if (mapping->area == area) {
3096 				area->mappings.Remove(mapping);
3097 				page->mappings.Remove(mapping);
3098 
3099 				if (page->mappings.IsEmpty() && page->wired_count == 0)
3100 					atomic_add(&gMappedPagesCount, -1);
3101 
3102 				map->ops->unlock(map);
3103 				mutex_unlock(&sMappingLock);
3104 
3105 				free(mapping);
3106 
3107 				return true;
3108 			}
3109 		}
3110 
3111 		map->ops->unlock(map);
3112 		mutex_unlock(&sMappingLock);
3113 
3114 		dprintf("vm_unmap_page: couldn't find mapping for area %p in page %p\n",
3115 			area, page);
3116 	}
3117 
3118 	return true;
3119 }
3120 
3121 
3122 status_t
3123 vm_unmap_pages(vm_area* area, addr_t base, size_t size, bool preserveModified)
3124 {
3125 	vm_translation_map* map = &area->address_space->translation_map;
3126 	addr_t end = base + (size - 1);
3127 
3128 	map->ops->lock(map);
3129 
3130 	if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE) {
3131 		// iterate through all pages and decrease their wired count
3132 		for (addr_t virtualAddress = base; virtualAddress < end;
3133 				virtualAddress += B_PAGE_SIZE) {
3134 			addr_t physicalAddress;
3135 			uint32 flags;
3136 			status_t status = map->ops->query(map, virtualAddress,
3137 				&physicalAddress, &flags);
3138 			if (status < B_OK || (flags & PAGE_PRESENT) == 0)
3139 				continue;
3140 
3141 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3142 			if (page == NULL) {
3143 				panic("area %p looking up page failed for pa 0x%lx\n", area,
3144 					physicalAddress);
3145 			}
3146 
3147 			decrement_page_wired_count(page);
3148 		}
3149 	}
3150 
3151 	map->ops->unmap(map, base, end);
3152 	if (preserveModified) {
3153 		map->ops->flush(map);
3154 
3155 		for (addr_t virtualAddress = base; virtualAddress < end;
3156 				virtualAddress += B_PAGE_SIZE) {
3157 			addr_t physicalAddress;
3158 			uint32 flags;
3159 			status_t status = map->ops->query(map, virtualAddress,
3160 				&physicalAddress, &flags);
3161 			if (status < B_OK || (flags & PAGE_PRESENT) == 0)
3162 				continue;
3163 
3164 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3165 			if (page == NULL) {
3166 				panic("area %p looking up page failed for pa 0x%lx\n", area,
3167 					physicalAddress);
3168 			}
3169 
3170 			if ((flags & PAGE_MODIFIED) != 0
3171 				&& page->state != PAGE_STATE_MODIFIED)
3172 				vm_page_set_state(page, PAGE_STATE_MODIFIED);
3173 		}
3174 	}
3175 	map->ops->unlock(map);
3176 
3177 	if (area->wiring == B_NO_LOCK) {
3178 		uint32 startOffset = (area->cache_offset + base - area->base)
3179 			>> PAGE_SHIFT;
3180 		uint32 endOffset = startOffset + (size >> PAGE_SHIFT);
3181 		vm_page_mapping* mapping;
3182 		vm_area_mappings queue;
3183 
3184 		mutex_lock(&sMappingLock);
3185 		map->ops->lock(map);
3186 
3187 		vm_area_mappings::Iterator iterator = area->mappings.GetIterator();
3188 		while (iterator.HasNext()) {
3189 			mapping = iterator.Next();
3190 
3191 			vm_page* page = mapping->page;
3192 			if (page->cache_offset < startOffset
3193 				|| page->cache_offset >= endOffset)
3194 				continue;
3195 
3196 			page->mappings.Remove(mapping);
3197 			iterator.Remove();
3198 
3199 			if (page->mappings.IsEmpty() && page->wired_count == 0)
3200 				atomic_add(&gMappedPagesCount, -1);
3201 
3202 			queue.Add(mapping);
3203 		}
3204 
3205 		map->ops->unlock(map);
3206 		mutex_unlock(&sMappingLock);
3207 
3208 		while ((mapping = queue.RemoveHead()) != NULL) {
3209 			free(mapping);
3210 		}
3211 	}
3212 
3213 	return B_OK;
3214 }
3215 
3216 
3217 /*!	When calling this function, you need to have pages reserved! */
3218 status_t
3219 vm_map_page(vm_area* area, vm_page* page, addr_t address, uint32 protection)
3220 {
3221 	vm_translation_map* map = &area->address_space->translation_map;
3222 	vm_page_mapping* mapping = NULL;
3223 
3224 	if (area->wiring == B_NO_LOCK) {
3225 		mapping = (vm_page_mapping*)malloc_nogrow(sizeof(vm_page_mapping));
3226 		if (mapping == NULL)
3227 			return B_NO_MEMORY;
3228 
3229 		mapping->page = page;
3230 		mapping->area = area;
3231 	}
3232 
3233 	map->ops->lock(map);
3234 	map->ops->map(map, address, page->physical_page_number * B_PAGE_SIZE,
3235 		protection);
3236 	map->ops->unlock(map);
3237 
3238 	if (area->wiring != B_NO_LOCK) {
3239 		increment_page_wired_count(page);
3240 	} else {
3241 		// insert mapping into lists
3242 		MutexLocker locker(sMappingLock);
3243 
3244 		if (page->mappings.IsEmpty() && page->wired_count == 0)
3245 			atomic_add(&gMappedPagesCount, 1);
3246 
3247 		page->mappings.Add(mapping);
3248 		area->mappings.Add(mapping);
3249 	}
3250 
3251 	if (page->usage_count < 0)
3252 		page->usage_count = 1;
3253 
3254 	if (page->state != PAGE_STATE_MODIFIED)
3255 		vm_page_set_state(page, PAGE_STATE_ACTIVE);
3256 
3257 	return B_OK;
3258 }
3259 
3260 
3261 static int
3262 display_mem(int argc, char** argv)
3263 {
3264 	bool physical = false;
3265 	addr_t copyAddress;
3266 	int32 displayWidth;
3267 	int32 itemSize;
3268 	int32 num = -1;
3269 	addr_t address;
3270 	int i = 1, j;
3271 
3272 	if (argc > 1 && argv[1][0] == '-') {
3273 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3274 			physical = true;
3275 			i++;
3276 		} else
3277 			i = 99;
3278 	}
3279 
3280 	if (argc < i + 1 || argc > i + 2) {
3281 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3282 			"\tdl - 8 bytes\n"
3283 			"\tdw - 4 bytes\n"
3284 			"\tds - 2 bytes\n"
3285 			"\tdb - 1 byte\n"
3286 			"\tstring - a whole string\n"
3287 			"  -p or --physical only allows memory from a single page to be "
3288 			"displayed.\n");
3289 		return 0;
3290 	}
3291 
3292 	address = parse_expression(argv[i]);
3293 
3294 	if (argc > i + 1)
3295 		num = parse_expression(argv[i + 1]);
3296 
3297 	// build the format string
3298 	if (strcmp(argv[0], "db") == 0) {
3299 		itemSize = 1;
3300 		displayWidth = 16;
3301 	} else if (strcmp(argv[0], "ds") == 0) {
3302 		itemSize = 2;
3303 		displayWidth = 8;
3304 	} else if (strcmp(argv[0], "dw") == 0) {
3305 		itemSize = 4;
3306 		displayWidth = 4;
3307 	} else if (strcmp(argv[0], "dl") == 0) {
3308 		itemSize = 8;
3309 		displayWidth = 2;
3310 	} else if (strcmp(argv[0], "string") == 0) {
3311 		itemSize = 1;
3312 		displayWidth = -1;
3313 	} else {
3314 		kprintf("display_mem called in an invalid way!\n");
3315 		return 0;
3316 	}
3317 
3318 	if (num <= 0)
3319 		num = displayWidth;
3320 
3321 	void* physicalPageHandle = NULL;
3322 
3323 	if (physical) {
3324 		int32 offset = address & (B_PAGE_SIZE - 1);
3325 		if (num * itemSize + offset > B_PAGE_SIZE) {
3326 			num = (B_PAGE_SIZE - offset) / itemSize;
3327 			kprintf("NOTE: number of bytes has been cut to page size\n");
3328 		}
3329 
3330 		address = ROUNDOWN(address, B_PAGE_SIZE);
3331 
3332 		if (vm_get_physical_page_debug(address, &copyAddress,
3333 				&physicalPageHandle) != B_OK) {
3334 			kprintf("getting the hardware page failed.");
3335 			return 0;
3336 		}
3337 
3338 		address += offset;
3339 		copyAddress += offset;
3340 	} else
3341 		copyAddress = address;
3342 
3343 	if (!strcmp(argv[0], "string")) {
3344 		kprintf("%p \"", (char*)copyAddress);
3345 
3346 		// string mode
3347 		for (i = 0; true; i++) {
3348 			char c;
3349 			if (user_memcpy(&c, (char*)copyAddress + i, 1) != B_OK
3350 				|| c == '\0')
3351 				break;
3352 
3353 			if (c == '\n')
3354 				kprintf("\\n");
3355 			else if (c == '\t')
3356 				kprintf("\\t");
3357 			else {
3358 				if (!isprint(c))
3359 					c = '.';
3360 
3361 				kprintf("%c", c);
3362 			}
3363 		}
3364 
3365 		kprintf("\"\n");
3366 	} else {
3367 		// number mode
3368 		for (i = 0; i < num; i++) {
3369 			uint32 value;
3370 
3371 			if ((i % displayWidth) == 0) {
3372 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3373 				if (i != 0)
3374 					kprintf("\n");
3375 
3376 				kprintf("[0x%lx]  ", address + i * itemSize);
3377 
3378 				for (j = 0; j < displayed; j++) {
3379 					char c;
3380 					if (user_memcpy(&c, (char*)copyAddress + i * itemSize + j,
3381 							1) != B_OK) {
3382 						displayed = j;
3383 						break;
3384 					}
3385 					if (!isprint(c))
3386 						c = '.';
3387 
3388 					kprintf("%c", c);
3389 				}
3390 				if (num > displayWidth) {
3391 					// make sure the spacing in the last line is correct
3392 					for (j = displayed; j < displayWidth * itemSize; j++)
3393 						kprintf(" ");
3394 				}
3395 				kprintf("  ");
3396 			}
3397 
3398 			if (user_memcpy(&value, (uint8*)copyAddress + i * itemSize,
3399 					itemSize) != B_OK) {
3400 				kprintf("read fault");
3401 				break;
3402 			}
3403 
3404 			switch (itemSize) {
3405 				case 1:
3406 					kprintf(" %02x", *(uint8*)&value);
3407 					break;
3408 				case 2:
3409 					kprintf(" %04x", *(uint16*)&value);
3410 					break;
3411 				case 4:
3412 					kprintf(" %08lx", *(uint32*)&value);
3413 					break;
3414 				case 8:
3415 					kprintf(" %016Lx", *(uint64*)&value);
3416 					break;
3417 			}
3418 		}
3419 
3420 		kprintf("\n");
3421 	}
3422 
3423 	if (physical) {
3424 		copyAddress = ROUNDOWN(copyAddress, B_PAGE_SIZE);
3425 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3426 	}
3427 	return 0;
3428 }
3429 
3430 
3431 static void
3432 dump_cache_tree_recursively(vm_cache* cache, int level,
3433 	vm_cache* highlightCache)
3434 {
3435 	// print this cache
3436 	for (int i = 0; i < level; i++)
3437 		kprintf("  ");
3438 	if (cache == highlightCache)
3439 		kprintf("%p <--\n", cache);
3440 	else
3441 		kprintf("%p\n", cache);
3442 
3443 	// recursively print its consumers
3444 	vm_cache* consumer = NULL;
3445 	while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers,
3446 			consumer)) != NULL) {
3447 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3448 	}
3449 }
3450 
3451 
3452 static int
3453 dump_cache_tree(int argc, char** argv)
3454 {
3455 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3456 		kprintf("usage: %s <address>\n", argv[0]);
3457 		return 0;
3458 	}
3459 
3460 	addr_t address = parse_expression(argv[1]);
3461 	if (address == 0)
3462 		return 0;
3463 
3464 	vm_cache* cache = (vm_cache*)address;
3465 	vm_cache* root = cache;
3466 
3467 	// find the root cache (the transitive source)
3468 	while (root->source != NULL)
3469 		root = root->source;
3470 
3471 	dump_cache_tree_recursively(root, 0, cache);
3472 
3473 	return 0;
3474 }
3475 
3476 
3477 static const char*
3478 cache_type_to_string(int32 type)
3479 {
3480 	switch (type) {
3481 		case CACHE_TYPE_RAM:
3482 			return "RAM";
3483 		case CACHE_TYPE_DEVICE:
3484 			return "device";
3485 		case CACHE_TYPE_VNODE:
3486 			return "vnode";
3487 		case CACHE_TYPE_NULL:
3488 			return "null";
3489 
3490 		default:
3491 			return "unknown";
3492 	}
3493 }
3494 
3495 
3496 #if DEBUG_CACHE_LIST
3497 
3498 static void
3499 update_cache_info_recursively(vm_cache* cache, cache_info& info)
3500 {
3501 	info.page_count += cache->page_count;
3502 	if (cache->type == CACHE_TYPE_RAM)
3503 		info.committed += cache->committed_size;
3504 
3505 	// recurse
3506 	vm_cache* consumer = NULL;
3507 	while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers,
3508 			consumer)) != NULL) {
3509 		update_cache_info_recursively(consumer, info);
3510 	}
3511 }
3512 
3513 
3514 static int
3515 cache_info_compare_page_count(const void* _a, const void* _b)
3516 {
3517 	const cache_info* a = (const cache_info*)_a;
3518 	const cache_info* b = (const cache_info*)_b;
3519 	if (a->page_count == b->page_count)
3520 		return 0;
3521 	return a->page_count < b->page_count ? 1 : -1;
3522 }
3523 
3524 
3525 static int
3526 cache_info_compare_committed(const void* _a, const void* _b)
3527 {
3528 	const cache_info* a = (const cache_info*)_a;
3529 	const cache_info* b = (const cache_info*)_b;
3530 	if (a->committed == b->committed)
3531 		return 0;
3532 	return a->committed < b->committed ? 1 : -1;
3533 }
3534 
3535 
3536 static void
3537 dump_caches_recursively(vm_cache* cache, cache_info& info, int level)
3538 {
3539 	for (int i = 0; i < level; i++)
3540 		kprintf("  ");
3541 
3542 	kprintf("%p: type: %s, base: %lld, size: %lld, pages: %lu", cache,
3543 		cache_type_to_string(cache->type), cache->virtual_base,
3544 		cache->virtual_end, cache->page_count);
3545 
3546 	if (level == 0)
3547 		kprintf("/%lu", info.page_count);
3548 
3549 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3550 		kprintf(", committed: %lld", cache->committed_size);
3551 
3552 		if (level == 0)
3553 			kprintf("/%lu", info.committed);
3554 	}
3555 
3556 	// areas
3557 	if (cache->areas != NULL) {
3558 		vm_area* area = cache->areas;
3559 		kprintf(", areas: %ld (%s, team: %ld)", area->id, area->name,
3560 			area->address_space->id);
3561 
3562 		while (area->cache_next != NULL) {
3563 			area = area->cache_next;
3564 			kprintf(", %ld", area->id);
3565 		}
3566 	}
3567 
3568 	kputs("\n");
3569 
3570 	// recurse
3571 	vm_cache* consumer = NULL;
3572 	while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers,
3573 			consumer)) != NULL) {
3574 		dump_caches_recursively(consumer, info, level + 1);
3575 	}
3576 }
3577 
3578 
3579 static int
3580 dump_caches(int argc, char** argv)
3581 {
3582 	if (sCacheInfoTable == NULL) {
3583 		kprintf("No cache info table!\n");
3584 		return 0;
3585 	}
3586 
3587 	bool sortByPageCount = true;
3588 
3589 	for (int32 i = 1; i < argc; i++) {
3590 		if (strcmp(argv[i], "-c") == 0) {
3591 			sortByPageCount = false;
3592 		} else {
3593 			print_debugger_command_usage(argv[0]);
3594 			return 0;
3595 		}
3596 	}
3597 
3598 	uint32 totalCount = 0;
3599 	uint32 rootCount = 0;
3600 	off_t totalCommitted = 0;
3601 	page_num_t totalPages = 0;
3602 
3603 	vm_cache* cache = gDebugCacheList;
3604 	while (cache) {
3605 		totalCount++;
3606 		if (cache->source == NULL) {
3607 			cache_info stackInfo;
3608 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3609 				? sCacheInfoTable[rootCount] : stackInfo;
3610 			rootCount++;
3611 			info.cache = cache;
3612 			info.page_count = 0;
3613 			info.committed = 0;
3614 			update_cache_info_recursively(cache, info);
3615 			totalCommitted += info.committed;
3616 			totalPages += info.page_count;
3617 		}
3618 
3619 		cache = cache->debug_next;
3620 	}
3621 
3622 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3623 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3624 			sortByPageCount
3625 				? &cache_info_compare_page_count
3626 				: &cache_info_compare_committed);
3627 	}
3628 
3629 	kprintf("total committed memory: %lld, total used pages: %lu\n",
3630 		totalCommitted, totalPages);
3631 	kprintf("%lu caches (%lu root caches), sorted by %s per cache "
3632 		"tree...\n\n", totalCount, rootCount,
3633 		sortByPageCount ? "page count" : "committed size");
3634 
3635 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3636 		for (uint32 i = 0; i < rootCount; i++) {
3637 			cache_info& info = sCacheInfoTable[i];
3638 			dump_caches_recursively(info.cache, info, 0);
3639 		}
3640 	} else
3641 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3642 
3643 	return 0;
3644 }
3645 
3646 #endif	// DEBUG_CACHE_LIST
3647 
3648 
3649 static int
3650 dump_cache(int argc, char** argv)
3651 {
3652 	vm_cache* cache;
3653 	bool showPages = false;
3654 	int i = 1;
3655 
3656 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3657 		kprintf("usage: %s [-ps] <address>\n"
3658 			"  if -p is specified, all pages are shown, if -s is used\n"
3659 			"  only the cache info is shown respectively.\n", argv[0]);
3660 		return 0;
3661 	}
3662 	while (argv[i][0] == '-') {
3663 		char* arg = argv[i] + 1;
3664 		while (arg[0]) {
3665 			if (arg[0] == 'p')
3666 				showPages = true;
3667 			arg++;
3668 		}
3669 		i++;
3670 	}
3671 	if (argv[i] == NULL) {
3672 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3673 		return 0;
3674 	}
3675 
3676 	addr_t address = parse_expression(argv[i]);
3677 	if (address == 0)
3678 		return 0;
3679 
3680 	cache = (vm_cache*)address;
3681 
3682 	kprintf("CACHE %p:\n", cache);
3683 	kprintf("  ref_count:    %ld\n", cache->RefCount());
3684 	kprintf("  source:       %p\n", cache->source);
3685 	kprintf("  type:         %s\n", cache_type_to_string(cache->type));
3686 	kprintf("  virtual_base: 0x%Lx\n", cache->virtual_base);
3687 	kprintf("  virtual_end:  0x%Lx\n", cache->virtual_end);
3688 	kprintf("  temporary:    %ld\n", cache->temporary);
3689 	kprintf("  scan_skip:    %ld\n", cache->scan_skip);
3690 	kprintf("  lock:         %p\n", cache->GetLock());
3691 #if KDEBUG
3692 	kprintf("  lock.holder:  %ld\n", cache->GetLock()->holder);
3693 #endif
3694 	kprintf("  areas:\n");
3695 
3696 	for (vm_area* area = cache->areas; area != NULL; area = area->cache_next) {
3697 		kprintf("    area 0x%lx, %s\n", area->id, area->name);
3698 		kprintf("\tbase_addr:  0x%lx, size: 0x%lx\n", area->base, area->size);
3699 		kprintf("\tprotection: 0x%lx\n", area->protection);
3700 		kprintf("\towner:      0x%lx\n", area->address_space->id);
3701 	}
3702 
3703 	kprintf("  consumers:\n");
3704 	vm_cache* consumer = NULL;
3705 	while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers,
3706 				consumer)) != NULL) {
3707 		kprintf("\t%p\n", consumer);
3708 	}
3709 
3710 	kprintf("  pages:\n");
3711 	if (showPages) {
3712 		for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
3713 				vm_page* page = it.Next();) {
3714 			if (page->type == PAGE_TYPE_PHYSICAL) {
3715 				kprintf("\t%p ppn 0x%lx offset 0x%lx type %u state %u (%s) "
3716 					"wired_count %u\n", page, page->physical_page_number,
3717 					page->cache_offset, page->type, page->state,
3718 					page_state_to_string(page->state), page->wired_count);
3719 			} else if(page->type == PAGE_TYPE_DUMMY) {
3720 				kprintf("\t%p DUMMY PAGE state %u (%s)\n",
3721 					page, page->state, page_state_to_string(page->state));
3722 			} else
3723 				kprintf("\t%p UNKNOWN PAGE type %u\n", page, page->type);
3724 		}
3725 	} else
3726 		kprintf("\t%ld in cache\n", cache->page_count);
3727 
3728 	return 0;
3729 }
3730 
3731 
3732 static void
3733 dump_area_struct(vm_area* area, bool mappings)
3734 {
3735 	kprintf("AREA: %p\n", area);
3736 	kprintf("name:\t\t'%s'\n", area->name);
3737 	kprintf("owner:\t\t0x%lx\n", area->address_space->id);
3738 	kprintf("id:\t\t0x%lx\n", area->id);
3739 	kprintf("base:\t\t0x%lx\n", area->base);
3740 	kprintf("size:\t\t0x%lx\n", area->size);
3741 	kprintf("protection:\t0x%lx\n", area->protection);
3742 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3743 	kprintf("memory_type:\t0x%x\n", area->memory_type);
3744 	kprintf("cache:\t\t%p\n", area->cache);
3745 	kprintf("cache_type:\t%s\n", cache_type_to_string(area->cache_type));
3746 	kprintf("cache_offset:\t0x%Lx\n", area->cache_offset);
3747 	kprintf("cache_next:\t%p\n", area->cache_next);
3748 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3749 
3750 	vm_area_mappings::Iterator iterator = area->mappings.GetIterator();
3751 	if (mappings) {
3752 		kprintf("page mappings:\n");
3753 		while (iterator.HasNext()) {
3754 			vm_page_mapping* mapping = iterator.Next();
3755 			kprintf("  %p", mapping->page);
3756 		}
3757 		kprintf("\n");
3758 	} else {
3759 		uint32 count = 0;
3760 		while (iterator.Next() != NULL) {
3761 			count++;
3762 		}
3763 		kprintf("page mappings:\t%lu\n", count);
3764 	}
3765 }
3766 
3767 
3768 static int
3769 dump_area(int argc, char** argv)
3770 {
3771 	bool mappings = false;
3772 	bool found = false;
3773 	int32 index = 1;
3774 	vm_area* area;
3775 	addr_t num;
3776 
3777 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3778 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3779 			"All areas matching either id/address/name are listed. You can\n"
3780 			"force to check only a specific item by prefixing the specifier\n"
3781 			"with the id/contains/address/name keywords.\n"
3782 			"-m shows the area's mappings as well.\n");
3783 		return 0;
3784 	}
3785 
3786 	if (!strcmp(argv[1], "-m")) {
3787 		mappings = true;
3788 		index++;
3789 	}
3790 
3791 	int32 mode = 0xf;
3792 	if (!strcmp(argv[index], "id"))
3793 		mode = 1;
3794 	else if (!strcmp(argv[index], "contains"))
3795 		mode = 2;
3796 	else if (!strcmp(argv[index], "name"))
3797 		mode = 4;
3798 	else if (!strcmp(argv[index], "address"))
3799 		mode = 0;
3800 	if (mode != 0xf)
3801 		index++;
3802 
3803 	if (index >= argc) {
3804 		kprintf("No area specifier given.\n");
3805 		return 0;
3806 	}
3807 
3808 	num = parse_expression(argv[index]);
3809 
3810 	if (mode == 0) {
3811 		dump_area_struct((struct vm_area*)num, mappings);
3812 	} else {
3813 		// walk through the area list, looking for the arguments as a name
3814 		struct hash_iterator iter;
3815 
3816 		hash_open(sAreaHash, &iter);
3817 		while ((area = (vm_area*)hash_next(sAreaHash, &iter)) != NULL) {
3818 			if (((mode & 4) != 0 && area->name != NULL
3819 					&& !strcmp(argv[index], area->name))
3820 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3821 					|| (((mode & 2) != 0 && area->base <= num
3822 						&& area->base + area->size > num))))) {
3823 				dump_area_struct(area, mappings);
3824 				found = true;
3825 			}
3826 		}
3827 
3828 		if (!found)
3829 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3830 	}
3831 
3832 	return 0;
3833 }
3834 
3835 
3836 static int
3837 dump_area_list(int argc, char** argv)
3838 {
3839 	vm_area* area;
3840 	struct hash_iterator iter;
3841 	const char* name = NULL;
3842 	int32 id = 0;
3843 
3844 	if (argc > 1) {
3845 		id = parse_expression(argv[1]);
3846 		if (id == 0)
3847 			name = argv[1];
3848 	}
3849 
3850 	kprintf("addr          id  base\t\tsize    protect lock  name\n");
3851 
3852 	hash_open(sAreaHash, &iter);
3853 	while ((area = (vm_area*)hash_next(sAreaHash, &iter)) != NULL) {
3854 		if ((id != 0 && area->address_space->id != id)
3855 			|| (name != NULL && strstr(area->name, name) == NULL))
3856 			continue;
3857 
3858 		kprintf("%p %5lx  %p\t%p %4lx\t%4d  %s\n", area, area->id,
3859 			(void*)area->base, (void*)area->size, area->protection, area->wiring,
3860 			area->name);
3861 	}
3862 	hash_close(sAreaHash, &iter, false);
3863 	return 0;
3864 }
3865 
3866 
3867 static int
3868 dump_available_memory(int argc, char** argv)
3869 {
3870 	kprintf("Available memory: %Ld/%lu bytes\n",
3871 		sAvailableMemory, vm_page_num_pages() * B_PAGE_SIZE);
3872 	return 0;
3873 }
3874 
3875 
3876 status_t
3877 vm_delete_areas(struct vm_address_space* addressSpace)
3878 {
3879 	vm_area* area;
3880 	vm_area* next;
3881 	vm_area* last = NULL;
3882 
3883 	TRACE(("vm_delete_areas: called on address space 0x%lx\n",
3884 		addressSpace->id));
3885 
3886 	rw_lock_write_lock(&addressSpace->lock);
3887 
3888 	// remove all reserved areas in this address space
3889 
3890 	for (area = addressSpace->areas; area; area = next) {
3891 		next = area->address_space_next;
3892 
3893 		if (area->id == RESERVED_AREA_ID) {
3894 			// just remove it
3895 			if (last)
3896 				last->address_space_next = area->address_space_next;
3897 			else
3898 				addressSpace->areas = area->address_space_next;
3899 
3900 			vm_put_address_space(addressSpace);
3901 			free(area);
3902 			continue;
3903 		}
3904 
3905 		last = area;
3906 	}
3907 
3908 	// delete all the areas in this address space
3909 
3910 	for (area = addressSpace->areas; area; area = next) {
3911 		next = area->address_space_next;
3912 		delete_area(addressSpace, area);
3913 	}
3914 
3915 	rw_lock_write_unlock(&addressSpace->lock);
3916 	return B_OK;
3917 }
3918 
3919 
3920 static area_id
3921 vm_area_for(team_id team, addr_t address)
3922 {
3923 	AddressSpaceReadLocker locker(team);
3924 	if (!locker.IsLocked())
3925 		return B_BAD_TEAM_ID;
3926 
3927 	vm_area* area = vm_area_lookup(locker.AddressSpace(), address);
3928 	if (area != NULL)
3929 		return area->id;
3930 
3931 	return B_ERROR;
3932 }
3933 
3934 
3935 /*!	Frees physical pages that were used during the boot process.
3936 */
3937 static void
3938 unmap_and_free_physical_pages(vm_translation_map* map, addr_t start, addr_t end)
3939 {
3940 	// free all physical pages in the specified range
3941 
3942 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3943 		addr_t physicalAddress;
3944 		uint32 flags;
3945 
3946 		if (map->ops->query(map, current, &physicalAddress, &flags) == B_OK) {
3947 			vm_page* page = vm_lookup_page(current / B_PAGE_SIZE);
3948 			if (page != NULL)
3949 				vm_page_set_state(page, PAGE_STATE_FREE);
3950 		}
3951 	}
3952 
3953 	// unmap the memory
3954 	map->ops->unmap(map, start, end - 1);
3955 }
3956 
3957 
3958 void
3959 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3960 {
3961 	vm_translation_map* map = &vm_kernel_address_space()->translation_map;
3962 	addr_t end = start + size;
3963 	addr_t lastEnd = start;
3964 	vm_area* area;
3965 
3966 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3967 		(void*)start, (void*)end));
3968 
3969 	// The areas are sorted in virtual address space order, so
3970 	// we just have to find the holes between them that fall
3971 	// into the area we should dispose
3972 
3973 	map->ops->lock(map);
3974 
3975 	for (area = vm_kernel_address_space()->areas; area != NULL;
3976 			area = area->address_space_next) {
3977 		addr_t areaStart = area->base;
3978 		addr_t areaEnd = areaStart + area->size;
3979 
3980 		if (area->id == RESERVED_AREA_ID)
3981 			continue;
3982 
3983 		if (areaEnd >= end) {
3984 			// we are done, the areas are already beyond of what we have to free
3985 			lastEnd = end;
3986 			break;
3987 		}
3988 
3989 		if (areaStart > lastEnd) {
3990 			// this is something we can free
3991 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3992 				(void*)areaStart));
3993 			unmap_and_free_physical_pages(map, lastEnd, areaStart);
3994 		}
3995 
3996 		lastEnd = areaEnd;
3997 	}
3998 
3999 	if (lastEnd < end) {
4000 		// we can also get rid of some space at the end of the area
4001 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
4002 			(void*)end));
4003 		unmap_and_free_physical_pages(map, lastEnd, end);
4004 	}
4005 
4006 	map->ops->unlock(map);
4007 }
4008 
4009 
4010 static void
4011 create_preloaded_image_areas(struct preloaded_image* image)
4012 {
4013 	char name[B_OS_NAME_LENGTH];
4014 	void* address;
4015 	int32 length;
4016 
4017 	// use file name to create a good area name
4018 	char* fileName = strrchr(image->name, '/');
4019 	if (fileName == NULL)
4020 		fileName = image->name;
4021 	else
4022 		fileName++;
4023 
4024 	length = strlen(fileName);
4025 	// make sure there is enough space for the suffix
4026 	if (length > 25)
4027 		length = 25;
4028 
4029 	memcpy(name, fileName, length);
4030 	strcpy(name + length, "_text");
4031 	address = (void*)ROUNDOWN(image->text_region.start, B_PAGE_SIZE);
4032 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4033 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
4034 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4035 		// this will later be remapped read-only/executable by the
4036 		// ELF initialization code
4037 
4038 	strcpy(name + length, "_data");
4039 	address = (void*)ROUNDOWN(image->data_region.start, B_PAGE_SIZE);
4040 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4041 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
4042 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4043 }
4044 
4045 
4046 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
4047 	Any boot loader resources contained in that arguments must not be accessed
4048 	anymore past this point.
4049 */
4050 void
4051 vm_free_kernel_args(kernel_args* args)
4052 {
4053 	uint32 i;
4054 
4055 	TRACE(("vm_free_kernel_args()\n"));
4056 
4057 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
4058 		area_id area = area_for((void*)args->kernel_args_range[i].start);
4059 		if (area >= B_OK)
4060 			delete_area(area);
4061 	}
4062 }
4063 
4064 
4065 static void
4066 allocate_kernel_args(kernel_args* args)
4067 {
4068 	TRACE(("allocate_kernel_args()\n"));
4069 
4070 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
4071 		void* address = (void*)args->kernel_args_range[i].start;
4072 
4073 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
4074 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
4075 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4076 	}
4077 }
4078 
4079 
4080 static void
4081 unreserve_boot_loader_ranges(kernel_args* args)
4082 {
4083 	TRACE(("unreserve_boot_loader_ranges()\n"));
4084 
4085 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4086 		vm_unreserve_address_range(vm_kernel_address_space_id(),
4087 			(void*)args->virtual_allocated_range[i].start,
4088 			args->virtual_allocated_range[i].size);
4089 	}
4090 }
4091 
4092 
4093 static void
4094 reserve_boot_loader_ranges(kernel_args* args)
4095 {
4096 	TRACE(("reserve_boot_loader_ranges()\n"));
4097 
4098 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4099 		void* address = (void*)args->virtual_allocated_range[i].start;
4100 
4101 		// If the address is no kernel address, we just skip it. The
4102 		// architecture specific code has to deal with it.
4103 		if (!IS_KERNEL_ADDRESS(address)) {
4104 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n",
4105 				address, args->virtual_allocated_range[i].size);
4106 			continue;
4107 		}
4108 
4109 		status_t status = vm_reserve_address_range(vm_kernel_address_space_id(),
4110 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
4111 		if (status < B_OK)
4112 			panic("could not reserve boot loader ranges\n");
4113 	}
4114 }
4115 
4116 
4117 static addr_t
4118 allocate_early_virtual(kernel_args* args, size_t size)
4119 {
4120 	addr_t spot = 0;
4121 	uint32 i;
4122 	int last_valloc_entry = 0;
4123 
4124 	size = PAGE_ALIGN(size);
4125 	// find a slot in the virtual allocation addr range
4126 	for (i = 1; i < args->num_virtual_allocated_ranges; i++) {
4127 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
4128 			+ args->virtual_allocated_range[i - 1].size;
4129 		last_valloc_entry = i;
4130 		// check to see if the space between this one and the last is big enough
4131 		if (previousRangeEnd >= KERNEL_BASE
4132 			&& args->virtual_allocated_range[i].start
4133 				- previousRangeEnd >= size) {
4134 			spot = previousRangeEnd;
4135 			args->virtual_allocated_range[i - 1].size += size;
4136 			goto out;
4137 		}
4138 	}
4139 	if (spot == 0) {
4140 		// we hadn't found one between allocation ranges. this is ok.
4141 		// see if there's a gap after the last one
4142 		addr_t lastRangeEnd
4143 			= args->virtual_allocated_range[last_valloc_entry].start
4144 				+ args->virtual_allocated_range[last_valloc_entry].size;
4145 		if (KERNEL_BASE + (KERNEL_SIZE - 1) - lastRangeEnd >= size) {
4146 			spot = lastRangeEnd;
4147 			args->virtual_allocated_range[last_valloc_entry].size += size;
4148 			goto out;
4149 		}
4150 		// see if there's a gap before the first one
4151 		if (args->virtual_allocated_range[0].start > KERNEL_BASE) {
4152 			if (args->virtual_allocated_range[0].start - KERNEL_BASE >= size) {
4153 				args->virtual_allocated_range[0].start -= size;
4154 				spot = args->virtual_allocated_range[0].start;
4155 				goto out;
4156 			}
4157 		}
4158 	}
4159 
4160 out:
4161 	return spot;
4162 }
4163 
4164 
4165 static bool
4166 is_page_in_physical_memory_range(kernel_args* args, addr_t address)
4167 {
4168 	// TODO: horrible brute-force method of determining if the page can be
4169 	// allocated
4170 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4171 		if (address >= args->physical_memory_range[i].start
4172 			&& address < args->physical_memory_range[i].start
4173 				+ args->physical_memory_range[i].size)
4174 			return true;
4175 	}
4176 	return false;
4177 }
4178 
4179 
4180 static addr_t
4181 allocate_early_physical_page(kernel_args* args)
4182 {
4183 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4184 		addr_t nextPage;
4185 
4186 		nextPage = args->physical_allocated_range[i].start
4187 			+ args->physical_allocated_range[i].size;
4188 		// see if the page after the next allocated paddr run can be allocated
4189 		if (i + 1 < args->num_physical_allocated_ranges
4190 			&& args->physical_allocated_range[i + 1].size != 0) {
4191 			// see if the next page will collide with the next allocated range
4192 			if (nextPage >= args->physical_allocated_range[i+1].start)
4193 				continue;
4194 		}
4195 		// see if the next physical page fits in the memory block
4196 		if (is_page_in_physical_memory_range(args, nextPage)) {
4197 			// we got one!
4198 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4199 			return nextPage / B_PAGE_SIZE;
4200 		}
4201 	}
4202 
4203 	return 0;
4204 		// could not allocate a block
4205 }
4206 
4207 
4208 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4209 	allocate some pages before the VM is completely up.
4210 */
4211 addr_t
4212 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4213 	uint32 attributes)
4214 {
4215 	if (physicalSize > virtualSize)
4216 		physicalSize = virtualSize;
4217 
4218 	// find the vaddr to allocate at
4219 	addr_t virtualBase = allocate_early_virtual(args, virtualSize);
4220 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualAddress);
4221 
4222 	// map the pages
4223 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4224 		addr_t physicalAddress = allocate_early_physical_page(args);
4225 		if (physicalAddress == 0)
4226 			panic("error allocating early page!\n");
4227 
4228 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4229 
4230 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
4231 			physicalAddress * B_PAGE_SIZE, attributes,
4232 			&allocate_early_physical_page);
4233 	}
4234 
4235 	return virtualBase;
4236 }
4237 
4238 
4239 /*!	The main entrance point to initialize the VM. */
4240 status_t
4241 vm_init(kernel_args* args)
4242 {
4243 	struct preloaded_image* image;
4244 	void* address;
4245 	status_t err = 0;
4246 	uint32 i;
4247 
4248 	TRACE(("vm_init: entry\n"));
4249 	err = arch_vm_translation_map_init(args);
4250 	err = arch_vm_init(args);
4251 
4252 	// initialize some globals
4253 	sNextAreaID = 1;
4254 
4255 	vm_page_init_num_pages(args);
4256 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4257 
4258 	size_t heapSize = INITIAL_HEAP_SIZE;
4259 	// try to accomodate low memory systems
4260 	while (heapSize > sAvailableMemory / 8)
4261 		heapSize /= 2;
4262 	if (heapSize < 1024 * 1024)
4263 		panic("vm_init: go buy some RAM please.");
4264 
4265 	// map in the new heap and initialize it
4266 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4267 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4268 	TRACE(("heap at 0x%lx\n", heapBase));
4269 	heap_init(heapBase, heapSize);
4270 
4271 	size_t slabInitialSize = args->num_cpus * 2 * B_PAGE_SIZE;
4272 	addr_t slabInitialBase = vm_allocate_early(args, slabInitialSize,
4273 		slabInitialSize, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4274 	slab_init(args, slabInitialBase, slabInitialSize);
4275 
4276 	// initialize the free page list and physical page mapper
4277 	vm_page_init(args);
4278 
4279 	// initialize the hash table that stores the pages mapped to caches
4280 	vm_cache_init(args);
4281 
4282 	{
4283 		vm_area* area;
4284 		sAreaHash = hash_init(AREA_HASH_TABLE_SIZE,
4285 			(addr_t)&area->hash_next - (addr_t)area,
4286 			&area_compare, &area_hash);
4287 		if (sAreaHash == NULL)
4288 			panic("vm_init: error creating aspace hash table\n");
4289 	}
4290 
4291 	vm_address_space_init();
4292 	reserve_boot_loader_ranges(args);
4293 
4294 	// Do any further initialization that the architecture dependant layers may
4295 	// need now
4296 	arch_vm_translation_map_init_post_area(args);
4297 	arch_vm_init_post_area(args);
4298 	vm_page_init_post_area(args);
4299 
4300 	// allocate areas to represent stuff that already exists
4301 
4302 	address = (void*)ROUNDOWN(heapBase, B_PAGE_SIZE);
4303 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4304 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4305 
4306 	address = (void*)ROUNDOWN(slabInitialBase, B_PAGE_SIZE);
4307 	create_area("initial slab space", &address, B_EXACT_ADDRESS,
4308 		slabInitialSize, B_ALREADY_WIRED, B_KERNEL_READ_AREA
4309 		| B_KERNEL_WRITE_AREA);
4310 
4311 	allocate_kernel_args(args);
4312 
4313 	create_preloaded_image_areas(&args->kernel_image);
4314 
4315 	// allocate areas for preloaded images
4316 	for (image = args->preloaded_images; image != NULL; image = image->next) {
4317 		create_preloaded_image_areas(image);
4318 	}
4319 
4320 	// allocate kernel stacks
4321 	for (i = 0; i < args->num_cpus; i++) {
4322 		char name[64];
4323 
4324 		sprintf(name, "idle thread %lu kstack", i + 1);
4325 		address = (void*)args->cpu_kstack[i].start;
4326 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4327 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4328 	}
4329 
4330 #if DEBUG_CACHE_LIST
4331 	create_area("cache info table", (void**)&sCacheInfoTable,
4332 		B_ANY_KERNEL_ADDRESS,
4333 		ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4334 		B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4335 #endif	// DEBUG_CACHE_LIST
4336 
4337 	// add some debugger commands
4338 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4339 	add_debugger_command("area", &dump_area,
4340 		"Dump info about a particular area");
4341 	add_debugger_command("cache", &dump_cache, "Dump vm_cache");
4342 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump vm_cache tree");
4343 #if DEBUG_CACHE_LIST
4344 	add_debugger_command_etc("caches", &dump_caches,
4345 		"List all vm_cache trees",
4346 		"[ \"-c\" ]\n"
4347 		"All cache trees are listed sorted in decreasing order by number of\n"
4348 		"used pages or, if \"-c\" is specified, by size of committed memory.\n",
4349 		0);
4350 #endif
4351 	add_debugger_command("avail", &dump_available_memory,
4352 		"Dump available memory");
4353 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4354 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4355 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4356 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4357 	add_debugger_command("string", &display_mem, "dump strings");
4358 
4359 	TRACE(("vm_init: exit\n"));
4360 
4361 	return err;
4362 }
4363 
4364 
4365 status_t
4366 vm_init_post_sem(kernel_args* args)
4367 {
4368 	// This frees all unused boot loader resources and makes its space available
4369 	// again
4370 	arch_vm_init_end(args);
4371 	unreserve_boot_loader_ranges(args);
4372 
4373 	// fill in all of the semaphores that were not allocated before
4374 	// since we're still single threaded and only the kernel address space
4375 	// exists, it isn't that hard to find all of the ones we need to create
4376 
4377 	arch_vm_translation_map_init_post_sem(args);
4378 	vm_address_space_init_post_sem();
4379 
4380 	slab_init_post_sem();
4381 	return heap_init_post_sem();
4382 }
4383 
4384 
4385 status_t
4386 vm_init_post_thread(kernel_args* args)
4387 {
4388 	vm_page_init_post_thread(args);
4389 	vm_daemon_init();
4390 	slab_init_post_thread();
4391 	return heap_init_post_thread();
4392 }
4393 
4394 
4395 status_t
4396 vm_init_post_modules(kernel_args* args)
4397 {
4398 	return arch_vm_init_post_modules(args);
4399 }
4400 
4401 
4402 void
4403 permit_page_faults(void)
4404 {
4405 	struct thread* thread = thread_get_current_thread();
4406 	if (thread != NULL)
4407 		atomic_add(&thread->page_faults_allowed, 1);
4408 }
4409 
4410 
4411 void
4412 forbid_page_faults(void)
4413 {
4414 	struct thread* thread = thread_get_current_thread();
4415 	if (thread != NULL)
4416 		atomic_add(&thread->page_faults_allowed, -1);
4417 }
4418 
4419 
4420 status_t
4421 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser,
4422 	addr_t* newIP)
4423 {
4424 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4425 		faultAddress));
4426 
4427 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4428 
4429 	addr_t pageAddress = ROUNDOWN(address, B_PAGE_SIZE);
4430 	vm_address_space* addressSpace = NULL;
4431 
4432 	status_t status = B_OK;
4433 	*newIP = 0;
4434 	atomic_add((int32*)&sPageFaults, 1);
4435 
4436 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4437 		addressSpace = vm_get_kernel_address_space();
4438 	} else if (IS_USER_ADDRESS(pageAddress)) {
4439 		addressSpace = vm_get_current_user_address_space();
4440 		if (addressSpace == NULL) {
4441 			if (!isUser) {
4442 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4443 					"memory!\n");
4444 				status = B_BAD_ADDRESS;
4445 				TPF(PageFaultError(-1,
4446 					VMPageFaultTracing
4447 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4448 			} else {
4449 				// XXX weird state.
4450 				panic("vm_page_fault: non kernel thread accessing user memory "
4451 					"that doesn't exist!\n");
4452 				status = B_BAD_ADDRESS;
4453 			}
4454 		}
4455 	} else {
4456 		// the hit was probably in the 64k DMZ between kernel and user space
4457 		// this keeps a user space thread from passing a buffer that crosses
4458 		// into kernel space
4459 		status = B_BAD_ADDRESS;
4460 		TPF(PageFaultError(-1,
4461 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4462 	}
4463 
4464 	if (status == B_OK)
4465 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser);
4466 
4467 	if (status < B_OK) {
4468 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4469 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n",
4470 			strerror(status), address, faultAddress, isWrite, isUser,
4471 			thread_get_current_thread_id());
4472 		if (!isUser) {
4473 			struct thread* thread = thread_get_current_thread();
4474 			if (thread != NULL && thread->fault_handler != 0) {
4475 				// this will cause the arch dependant page fault handler to
4476 				// modify the IP on the interrupt frame or whatever to return
4477 				// to this address
4478 				*newIP = thread->fault_handler;
4479 			} else {
4480 				// unhandled page fault in the kernel
4481 				panic("vm_page_fault: unhandled page fault in kernel space at "
4482 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4483 			}
4484 		} else {
4485 #if 1
4486 			rw_lock_read_lock(&addressSpace->lock);
4487 
4488 			// TODO: remove me once we have proper userland debugging support
4489 			// (and tools)
4490 			vm_area* area = vm_area_lookup(addressSpace, faultAddress);
4491 
4492 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
4493 // fault and someone is already waiting for a write lock on the same address
4494 // space. This thread will then try to acquire the semaphore again and will
4495 // be queued after the writer.
4496 			struct thread* thread = thread_get_current_thread();
4497 			dprintf("vm_page_fault: thread \"%s\" (%ld) in team \"%s\" (%ld) "
4498 				"tried to %s address %#lx, ip %#lx (\"%s\" +%#lx)\n",
4499 				thread->name, thread->id, thread->team->name, thread->team->id,
4500 				isWrite ? "write" : "read", address, faultAddress,
4501 				area ? area->name : "???",
4502 				faultAddress - (area ? area->base : 0x0));
4503 
4504 			// We can print a stack trace of the userland thread here.
4505 #if 1
4506 			if (area) {
4507 				struct stack_frame {
4508 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
4509 						struct stack_frame*	previous;
4510 						void*				return_address;
4511 					#else
4512 						// ...
4513 					#warning writeme
4514 					#endif
4515 				} frame;
4516 #ifdef __INTEL__
4517 				struct iframe* iframe = i386_get_user_iframe();
4518 				if (iframe == NULL)
4519 					panic("iframe is NULL!");
4520 
4521 				status_t status = user_memcpy(&frame, (void*)iframe->ebp,
4522 					sizeof(struct stack_frame));
4523 #elif defined(__POWERPC__)
4524 				struct iframe* iframe = ppc_get_user_iframe();
4525 				if (iframe == NULL)
4526 					panic("iframe is NULL!");
4527 
4528 				status_t status = user_memcpy(&frame, (void*)iframe->r1,
4529 					sizeof(struct stack_frame));
4530 #else
4531 #	warning "vm_page_fault() stack trace won't work"
4532 				status = B_ERROR;
4533 #endif
4534 
4535 				dprintf("stack trace:\n");
4536 				int32 maxFrames = 50;
4537 				while (status == B_OK && --maxFrames >= 0
4538 						&& frame.return_address != NULL) {
4539 					dprintf("  %p", frame.return_address);
4540 					area = vm_area_lookup(addressSpace,
4541 						(addr_t)frame.return_address);
4542 					if (area) {
4543 						dprintf(" (%s + %#lx)", area->name,
4544 							(addr_t)frame.return_address - area->base);
4545 					}
4546 					dprintf("\n");
4547 
4548 					status = user_memcpy(&frame, frame.previous,
4549 						sizeof(struct stack_frame));
4550 				}
4551 			}
4552 #endif	// 0 (stack trace)
4553 
4554 			rw_lock_read_unlock(&addressSpace->lock);
4555 #endif
4556 
4557 			// TODO: the fault_callback is a temporary solution for vm86
4558 			if (thread->fault_callback == NULL
4559 				|| thread->fault_callback(address, faultAddress, isWrite)) {
4560 				// If the thread has a signal handler for SIGSEGV, we simply
4561 				// send it the signal. Otherwise we notify the user debugger
4562 				// first.
4563 				struct sigaction action;
4564 				if (sigaction(SIGSEGV, NULL, &action) == 0
4565 					&& action.sa_handler != SIG_DFL
4566 					&& action.sa_handler != SIG_IGN) {
4567 					send_signal(thread->id, SIGSEGV);
4568 				} else if (user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4569 						SIGSEGV)) {
4570 					send_signal(thread->id, SIGSEGV);
4571 				}
4572 			}
4573 		}
4574 	}
4575 
4576 	if (addressSpace != NULL)
4577 		vm_put_address_space(addressSpace);
4578 
4579 	return B_HANDLED_INTERRUPT;
4580 }
4581 
4582 
4583 static inline status_t
4584 fault_acquire_locked_source(vm_cache* cache, vm_cache** _source)
4585 {
4586 	vm_cache* source = cache->source;
4587 	if (source == NULL)
4588 		return B_ERROR;
4589 
4590 	source->Lock();
4591 	source->AcquireRefLocked();
4592 
4593 	*_source = source;
4594 	return B_OK;
4595 }
4596 
4597 
4598 /*!	Inserts a busy dummy page into a cache, and makes sure the cache won't go
4599 	away by grabbing a reference to it.
4600 */
4601 static inline void
4602 fault_insert_dummy_page(vm_cache* cache, vm_dummy_page& dummyPage,
4603 	off_t cacheOffset)
4604 {
4605 	dummyPage.state = PAGE_STATE_BUSY;
4606 	cache->AcquireRefLocked();
4607 	cache->InsertPage(&dummyPage, cacheOffset);
4608 	dummyPage.busy_condition.Publish(&dummyPage, "page");
4609 }
4610 
4611 
4612 /*!	Removes the busy dummy page from a cache, and releases its reference to
4613 	the cache.
4614 */
4615 static inline void
4616 fault_remove_dummy_page(vm_dummy_page& dummyPage, bool isLocked)
4617 {
4618 	vm_cache* cache = dummyPage.cache;
4619 	if (!isLocked)
4620 		cache->Lock();
4621 
4622 	if (dummyPage.state == PAGE_STATE_BUSY) {
4623 		cache->RemovePage(&dummyPage);
4624 		dummyPage.state = PAGE_STATE_INACTIVE;
4625 		dummyPage.busy_condition.Unpublish();
4626 	}
4627 
4628 	cache->ReleaseRefLocked();
4629 
4630 	if (!isLocked)
4631 		cache->Unlock();
4632 }
4633 
4634 
4635 /*!	Finds a page at the specified \a cacheOffset in either the \a topCache
4636 	or in its source chain. Will also page in a missing page in case there is
4637 	a cache, whose backing store has the page.
4638 	If it couldn't find a page, it will return the vm_cache that should get it,
4639 	otherwise, it will return the vm_cache that contains the page.
4640 	It always grabs a reference to the vm_cache that it returns, and also locks
4641 	it.
4642 */
4643 static inline status_t
4644 fault_find_page(vm_translation_map* map, vm_cache* topCache,
4645 	off_t cacheOffset, bool isWrite, vm_dummy_page& dummyPage,
4646 	vm_cache** _pageCache, vm_page** _page, bool* _restart)
4647 {
4648 	*_restart = false;
4649 	vm_cache* cache = topCache;
4650 	vm_cache* lastCache = NULL;
4651 	vm_page* page = NULL;
4652 
4653 	cache->Lock();
4654 	cache->AcquireRefLocked();
4655 		// we release this later in the loop
4656 
4657 	while (cache != NULL) {
4658 		if (lastCache != NULL)
4659 			lastCache->ReleaseRefAndUnlock();
4660 
4661 		// we hold the lock of the cache at this point
4662 
4663 		lastCache = cache;
4664 
4665 		for (;;) {
4666 			page = cache->LookupPage(cacheOffset);
4667 			if (page != NULL && page->state != PAGE_STATE_BUSY) {
4668 				// we found the page
4669 				break;
4670 			}
4671 			if (page == NULL || page == &dummyPage)
4672 				break;
4673 
4674 			// page must be busy -- wait for it to become unbusy
4675 			{
4676 				ConditionVariableEntry entry;
4677 				entry.Add(page);
4678 				cache->Unlock();
4679 				entry.Wait();
4680 				cache->Lock();
4681 			}
4682 		}
4683 
4684 		if (page != NULL && page != &dummyPage)
4685 			break;
4686 
4687 		// The current cache does not contain the page we're looking for
4688 
4689 		// see if the backing store has it
4690 		if (cache->HasPage(cacheOffset)) {
4691 			// insert a fresh page and mark it busy -- we're going to read it in
4692 			page = vm_page_allocate_page(PAGE_STATE_FREE, true);
4693 			cache->InsertPage(page, cacheOffset);
4694 
4695 			ConditionVariable busyCondition;
4696 			busyCondition.Publish(page, "page");
4697 
4698 			cache->Unlock();
4699 
4700 			// get a virtual address for the page
4701 			iovec vec;
4702 			vec.iov_base = (void*)(page->physical_page_number * B_PAGE_SIZE);
4703 			size_t bytesRead = vec.iov_len = B_PAGE_SIZE;
4704 
4705 			// read it in
4706 			status_t status = cache->Read(cacheOffset, &vec, 1,
4707 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4708 
4709 			cache->Lock();
4710 
4711 			if (status < B_OK) {
4712 				// on error remove and free the page
4713 				dprintf("reading page from cache %p returned: %s!\n",
4714 					cache, strerror(status));
4715 
4716 				busyCondition.Unpublish();
4717 				cache->RemovePage(page);
4718 				vm_page_set_state(page, PAGE_STATE_FREE);
4719 
4720 				cache->ReleaseRefAndUnlock();
4721 				return status;
4722 			}
4723 
4724 			// mark the page unbusy again
4725 			page->state = PAGE_STATE_ACTIVE;
4726 			busyCondition.Unpublish();
4727 			break;
4728 		}
4729 
4730 		// If we're at the top most cache, insert the dummy page here to keep
4731 		// other threads from faulting on the same address and chasing us up the
4732 		// cache chain
4733 		if (cache == topCache && dummyPage.state != PAGE_STATE_BUSY)
4734 			fault_insert_dummy_page(cache, dummyPage, cacheOffset);
4735 
4736 		vm_cache* nextCache;
4737 		status_t status = fault_acquire_locked_source(cache, &nextCache);
4738 		if (status < B_OK)
4739 			nextCache = NULL;
4740 
4741 		// at this point, we still hold a ref to this cache
4742 		// (through lastCacheRef)
4743 
4744 		cache = nextCache;
4745 	}
4746 
4747 	if (page == &dummyPage)
4748 		page = NULL;
4749 
4750 	if (page == NULL) {
4751 		// there was no adequate page, determine the cache for a clean one
4752 
4753 		ASSERT(cache == NULL);
4754 
4755 		// We rolled off the end of the cache chain, so we need to decide which
4756 		// cache will get the new page we're about to create.
4757 		cache = isWrite ? topCache : lastCache;
4758 			// Read-only pages come in the deepest cache - only the
4759 			// top most cache may have direct write access.
4760 		if (cache != lastCache) {
4761 			lastCache->ReleaseRefAndUnlock();
4762 			cache->Lock();
4763 			cache->AcquireRefLocked();
4764 		}
4765 
4766 		vm_page* newPage = cache->LookupPage(cacheOffset);
4767 		if (newPage && newPage != &dummyPage) {
4768 			// A new page turned up. It could be the one we're looking
4769 			// for, but it could as well be a dummy page from someone
4770 			// else or an otherwise busy page. We can't really handle
4771 			// that here. Hence we completely restart this functions.
4772 			cache->ReleaseRefAndUnlock();
4773 			*_restart = true;
4774 		}
4775 	} else {
4776 		// we still own reference and lock to the cache
4777 	}
4778 
4779 	*_pageCache = cache;
4780 	*_page = page;
4781 	return B_OK;
4782 }
4783 
4784 
4785 /*!	Returns the page that should be mapped into the area that got the fault.
4786 	It returns the owner of the page in \a sourceCache - it keeps a reference
4787 	to it, and has also locked it on exit.
4788 */
4789 static inline status_t
4790 fault_get_page(vm_translation_map* map, vm_cache* topCache, off_t cacheOffset,
4791 	bool isWrite, vm_dummy_page& dummyPage, vm_cache** _sourceCache,
4792 	vm_cache** _copiedSource, vm_page** _page)
4793 {
4794 	vm_cache* cache;
4795 	vm_page* page;
4796 	bool restart;
4797 	for (;;) {
4798 		status_t status = fault_find_page(map, topCache, cacheOffset, isWrite,
4799 			dummyPage, &cache, &page, &restart);
4800 		if (status != B_OK)
4801 			return status;
4802 
4803 		if (!restart)
4804 			break;
4805 
4806 		// Remove the dummy page, if it has been inserted.
4807 		topCache->Lock();
4808 
4809 		if (dummyPage.state == PAGE_STATE_BUSY) {
4810 			ASSERT_PRINT(dummyPage.cache == topCache, "dummy page: %p\n",
4811 				&dummyPage);
4812 			fault_remove_dummy_page(dummyPage, true);
4813 		}
4814 
4815 		topCache->Unlock();
4816 	}
4817 
4818 	if (page == NULL) {
4819 		// we still haven't found a page, so we allocate a clean one
4820 
4821 		page = vm_page_allocate_page(PAGE_STATE_CLEAR, true);
4822 		FTRACE(("vm_soft_fault: just allocated page 0x%lx\n",
4823 			page->physical_page_number));
4824 
4825 		// Insert the new page into our cache, and replace it with the dummy page
4826 		// if necessary
4827 
4828 		// If we inserted a dummy page into this cache (i.e. if it is the top
4829 		// cache), we have to remove it now
4830 		if (dummyPage.state == PAGE_STATE_BUSY && dummyPage.cache == cache) {
4831 #if DEBUG_PAGE_CACHE_TRANSITIONS
4832 			page->debug_flags = dummyPage.debug_flags | 0x8;
4833 			if (dummyPage.collided_page != NULL) {
4834 				dummyPage.collided_page->collided_page = page;
4835 				page->collided_page = dummyPage.collided_page;
4836 			}
4837 #endif	// DEBUG_PAGE_CACHE_TRANSITIONS
4838 
4839 			fault_remove_dummy_page(dummyPage, true);
4840 		}
4841 
4842 		cache->InsertPage(page, cacheOffset);
4843 
4844 		if (dummyPage.state == PAGE_STATE_BUSY) {
4845 #if DEBUG_PAGE_CACHE_TRANSITIONS
4846 			page->debug_flags = dummyPage.debug_flags | 0x10;
4847 			if (dummyPage.collided_page != NULL) {
4848 				dummyPage.collided_page->collided_page = page;
4849 				page->collided_page = dummyPage.collided_page;
4850 			}
4851 #endif	// DEBUG_PAGE_CACHE_TRANSITIONS
4852 
4853 			// This is not the top cache into which we inserted the dummy page,
4854 			// let's remove it from there. We need to temporarily unlock our
4855 			// cache to comply with the cache locking policy.
4856 			cache->Unlock();
4857 			fault_remove_dummy_page(dummyPage, false);
4858 			cache->Lock();
4859 		}
4860 	}
4861 
4862 	// We now have the page and a cache it belongs to - we now need to make
4863 	// sure that the area's cache can access it, too, and sees the correct data
4864 
4865 	if (page->cache != topCache && isWrite) {
4866 		// Now we have a page that has the data we want, but in the wrong cache
4867 		// object so we need to copy it and stick it into the top cache.
4868 		// Note that this and the "if" before are mutual exclusive. If
4869 		// fault_find_page() didn't find the page, it would return the top cache
4870 		// for write faults.
4871 		vm_page* sourcePage = page;
4872 
4873 		// TODO: if memory is low, it might be a good idea to steal the page
4874 		// from our source cache - if possible, that is
4875 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4876 		page = vm_page_allocate_page(PAGE_STATE_FREE, true);
4877 
4878 		// copy the page
4879 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4880 			sourcePage->physical_page_number * B_PAGE_SIZE);
4881 
4882 		if (sourcePage->state != PAGE_STATE_MODIFIED)
4883 			vm_page_set_state(sourcePage, PAGE_STATE_ACTIVE);
4884 
4885 		cache->Unlock();
4886 		topCache->Lock();
4887 
4888 		// Since the top cache has been unlocked for a while, someone else
4889 		// (RemoveConsumer()) might have replaced our dummy page.
4890 		vm_page* newPage = NULL;
4891 		for (;;) {
4892 			newPage = topCache->LookupPage(cacheOffset);
4893 			if (newPage == NULL || newPage == &dummyPage) {
4894 				newPage = NULL;
4895 				break;
4896 			}
4897 
4898 			if (newPage->state != PAGE_STATE_BUSY)
4899 				break;
4900 
4901 			// The page is busy, wait till it becomes unbusy.
4902 			ConditionVariableEntry entry;
4903 			entry.Add(newPage);
4904 			topCache->Unlock();
4905 			entry.Wait();
4906 			topCache->Lock();
4907 		}
4908 
4909 		if (newPage) {
4910 			// Indeed someone else threw in a page. We free ours and are happy.
4911 			vm_page_set_state(page, PAGE_STATE_FREE);
4912 			page = newPage;
4913 		} else {
4914 			// Insert the new page into our cache and remove the dummy page, if
4915 			// necessary.
4916 
4917 			// if we inserted a dummy page into this cache, we have to remove it
4918 			// now
4919 			if (dummyPage.state == PAGE_STATE_BUSY) {
4920 				ASSERT_PRINT(dummyPage.cache == topCache, "dummy page: %p\n",
4921 					&dummyPage);
4922 				fault_remove_dummy_page(dummyPage, true);
4923 			}
4924 
4925 			topCache->InsertPage(page, cacheOffset);
4926 		}
4927 
4928 		*_copiedSource = cache;
4929 
4930 		cache = topCache;
4931 		cache->AcquireRefLocked();
4932 	}
4933 
4934 	*_sourceCache = cache;
4935 	*_page = page;
4936 	return B_OK;
4937 }
4938 
4939 
4940 static status_t
4941 vm_soft_fault(vm_address_space* addressSpace, addr_t originalAddress,
4942 	bool isWrite, bool isUser)
4943 {
4944 	FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n",
4945 		thread_get_current_thread_id(), originalAddress, isWrite, isUser));
4946 
4947 	AddressSpaceReadLocker locker(addressSpace, true);
4948 
4949 	atomic_add(&addressSpace->fault_count, 1);
4950 
4951 	// Get the area the fault was in
4952 
4953 	addr_t address = ROUNDOWN(originalAddress, B_PAGE_SIZE);
4954 
4955 	vm_area* area = vm_area_lookup(addressSpace, address);
4956 	if (area == NULL) {
4957 		dprintf("vm_soft_fault: va 0x%lx not covered by area in address space\n",
4958 			originalAddress);
4959 		TPF(PageFaultError(-1, VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4960 		return B_BAD_ADDRESS;
4961 	}
4962 
4963 	// check permissions
4964 	uint32 protection = get_area_page_protection(area, address);
4965 	if (isUser && (protection & B_USER_PROTECTION) == 0) {
4966 		dprintf("user access on kernel area 0x%lx at %p\n", area->id,
4967 			(void*)originalAddress);
4968 		TPF(PageFaultError(area->id,
4969 			VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4970 		return B_PERMISSION_DENIED;
4971 	}
4972 	if (isWrite && (protection
4973 			& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4974 		dprintf("write access attempted on read-only area 0x%lx at %p\n",
4975 			area->id, (void*)originalAddress);
4976 		TPF(PageFaultError(area->id,
4977 			VMPageFaultTracing::PAGE_FAULT_ERROR_READ_ONLY));
4978 		return B_PERMISSION_DENIED;
4979 	}
4980 
4981 	// We have the area, it was a valid access, so let's try to resolve the page
4982 	// fault now.
4983 	// At first, the top most cache from the area is investigated
4984 
4985 	vm_cache* topCache = vm_area_get_locked_cache(area);
4986 	off_t cacheOffset = address - area->base + area->cache_offset;
4987 
4988 	atomic_add(&area->no_cache_change, 1);
4989 		// make sure the area's cache isn't replaced during the page fault
4990 
4991 	// See if this cache has a fault handler - this will do all the work for us
4992 	{
4993 		// Note, since the page fault is resolved with interrupts enabled, the
4994 		// fault handler could be called more than once for the same reason -
4995 		// the store must take this into account
4996 		status_t status = topCache->Fault(addressSpace, cacheOffset);
4997 		if (status != B_BAD_HANDLER) {
4998 			vm_area_put_locked_cache(topCache);
4999 			return status;
5000 		}
5001 	}
5002 
5003 	topCache->Unlock();
5004 
5005 	// The top most cache has no fault handler, so let's see if the cache or its
5006 	// sources already have the page we're searching for (we're going from top to
5007 	// bottom)
5008 
5009 	vm_translation_map* map = &addressSpace->translation_map;
5010 	size_t reservePages = 2 + map->ops->map_max_pages_need(map,
5011 		originalAddress, originalAddress);
5012 	vm_page_reserve_pages(reservePages);
5013 		// we may need up to 2 pages - reserving them upfront makes sure
5014 		// we don't have any cache locked, so that the page daemon/thief
5015 		// can do their job without problems
5016 
5017 	vm_dummy_page dummyPage;
5018 	dummyPage.cache = NULL;
5019 	dummyPage.state = PAGE_STATE_INACTIVE;
5020 	dummyPage.type = PAGE_TYPE_DUMMY;
5021 	dummyPage.wired_count = 0;
5022 #if DEBUG_PAGE_CACHE_TRANSITIONS
5023 	dummyPage.debug_flags = 0;
5024 	dummyPage.collided_page = NULL;
5025 #endif	// DEBUG_PAGE_CACHE_TRANSITIONS
5026 
5027 	vm_cache* copiedPageSource = NULL;
5028 	vm_cache* pageSource;
5029 	vm_page* page;
5030 	// TODO: We keep the address space read lock during the whole operation
5031 	// which might be rather expensive depending on where the data has to
5032 	// be retrieved from.
5033 	status_t status = fault_get_page(map, topCache, cacheOffset, isWrite,
5034 		dummyPage, &pageSource, &copiedPageSource, &page);
5035 
5036 	if (status == B_OK) {
5037 		// All went fine, all there is left to do is to map the page into the
5038 		// address space
5039 		TPF(PageFaultDone(area->id, topCache, page->cache, page));
5040 
5041 		// In case this is a copy-on-write page, we need to unmap it from the
5042 		// area now
5043 		if (isWrite && page->cache == topCache)
5044 			vm_unmap_page(area, address, true);
5045 
5046 		// TODO: there is currently no mechanism to prevent a page being mapped
5047 		// more than once in case of a second page fault!
5048 
5049 		// If the page doesn't reside in the area's cache, we need to make sure
5050 		// it's mapped in read-only, so that we cannot overwrite someone else's
5051 		// data (copy-on-write)
5052 		uint32 newProtection = protection;
5053 		if (page->cache != topCache && !isWrite)
5054 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
5055 
5056 		vm_map_page(area, page, address, newProtection);
5057 
5058 		pageSource->ReleaseRefAndUnlock();
5059 	} else
5060 		TPF(PageFaultError(area->id, status));
5061 
5062 	atomic_add(&area->no_cache_change, -1);
5063 
5064 	if (copiedPageSource)
5065 		copiedPageSource->ReleaseRef();
5066 
5067 	if (dummyPage.state == PAGE_STATE_BUSY) {
5068 		// We still have the dummy page in the cache - that happens if we didn't
5069 		// need to allocate a new page before, but could use one in another cache
5070 		fault_remove_dummy_page(dummyPage, false);
5071 	}
5072 
5073 	topCache->ReleaseRef();
5074 	vm_page_unreserve_pages(reservePages);
5075 
5076 	return status;
5077 }
5078 
5079 
5080 /*! You must have the address space's sem held */
5081 vm_area*
5082 vm_area_lookup(vm_address_space* addressSpace, addr_t address)
5083 {
5084 	vm_area* area;
5085 
5086 	// check the areas list first
5087 	area = addressSpace->area_hint;
5088 	if (area != NULL
5089 		&& area->base <= address
5090 		&& area->base + (area->size - 1) >= address)
5091 		goto found;
5092 
5093 	for (area = addressSpace->areas; area != NULL;
5094 			area = area->address_space_next) {
5095 		if (area->id == RESERVED_AREA_ID)
5096 			continue;
5097 
5098 		if (area->base <= address && area->base + (area->size - 1) >= address)
5099 			break;
5100 	}
5101 
5102 found:
5103 	if (area)
5104 		addressSpace->area_hint = area;
5105 
5106 	return area;
5107 }
5108 
5109 
5110 status_t
5111 vm_get_physical_page(addr_t paddr, addr_t* _vaddr, void** _handle)
5112 {
5113 	return vm_kernel_address_space()->translation_map.ops->get_physical_page(
5114 		paddr, _vaddr, _handle);
5115 }
5116 
5117 status_t
5118 vm_put_physical_page(addr_t vaddr, void* handle)
5119 {
5120 	return vm_kernel_address_space()->translation_map.ops->put_physical_page(
5121 		vaddr, handle);
5122 }
5123 
5124 
5125 status_t
5126 vm_get_physical_page_current_cpu(addr_t paddr, addr_t* _vaddr, void** _handle)
5127 {
5128 	return vm_kernel_address_space()->translation_map.ops
5129 		->get_physical_page_current_cpu(paddr, _vaddr, _handle);
5130 }
5131 
5132 status_t
5133 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
5134 {
5135 	return vm_kernel_address_space()->translation_map.ops
5136 		->put_physical_page_current_cpu(vaddr, handle);
5137 }
5138 
5139 
5140 status_t
5141 vm_get_physical_page_debug(addr_t paddr, addr_t* _vaddr, void** _handle)
5142 {
5143 	return vm_kernel_address_space()->translation_map.ops
5144 		->get_physical_page_debug(paddr, _vaddr, _handle);
5145 }
5146 
5147 status_t
5148 vm_put_physical_page_debug(addr_t vaddr, void* handle)
5149 {
5150 	return vm_kernel_address_space()->translation_map.ops
5151 		->put_physical_page_debug(vaddr, handle);
5152 }
5153 
5154 
5155 void
5156 vm_get_info(system_memory_info* info)
5157 {
5158 	swap_get_info(info);
5159 
5160 	info->max_memory = vm_page_num_pages() * B_PAGE_SIZE;
5161 	info->page_faults = sPageFaults;
5162 
5163 	MutexLocker locker(sAvailableMemoryLock);
5164 	info->free_memory = sAvailableMemory;
5165 	info->needed_memory = sNeededMemory;
5166 }
5167 
5168 
5169 uint32
5170 vm_num_page_faults(void)
5171 {
5172 	return sPageFaults;
5173 }
5174 
5175 
5176 off_t
5177 vm_available_memory(void)
5178 {
5179 	MutexLocker locker(sAvailableMemoryLock);
5180 	return sAvailableMemory;
5181 }
5182 
5183 
5184 off_t
5185 vm_available_not_needed_memory(void)
5186 {
5187 	MutexLocker locker(sAvailableMemoryLock);
5188 	return sAvailableMemory - sNeededMemory;
5189 }
5190 
5191 
5192 void
5193 vm_unreserve_memory(size_t amount)
5194 {
5195 	mutex_lock(&sAvailableMemoryLock);
5196 
5197 	sAvailableMemory += amount;
5198 
5199 	mutex_unlock(&sAvailableMemoryLock);
5200 }
5201 
5202 
5203 status_t
5204 vm_try_reserve_memory(size_t amount, bigtime_t timeout)
5205 {
5206 	MutexLocker locker(sAvailableMemoryLock);
5207 
5208 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5209 
5210 	if (sAvailableMemory >= amount) {
5211 		sAvailableMemory -= amount;
5212 		return B_OK;
5213 	}
5214 
5215 	if (timeout <= 0)
5216 		return B_NO_MEMORY;
5217 
5218 	// turn timeout into an absolute timeout
5219 	timeout += system_time();
5220 
5221 	// loop until we've got the memory or the timeout occurs
5222 	do {
5223 		sNeededMemory += amount;
5224 
5225 		// call the low resource manager
5226 		locker.Unlock();
5227 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5228 			B_ABSOLUTE_TIMEOUT, timeout);
5229 		locker.Lock();
5230 
5231 		sNeededMemory -= amount;
5232 
5233 		if (sAvailableMemory >= amount) {
5234 			sAvailableMemory -= amount;
5235 			return B_OK;
5236 		}
5237 	} while (timeout > system_time());
5238 
5239 	return B_NO_MEMORY;
5240 }
5241 
5242 
5243 status_t
5244 vm_set_area_memory_type(area_id id, addr_t physicalBase, uint32 type)
5245 {
5246 	AddressSpaceReadLocker locker;
5247 	vm_area* area;
5248 	status_t status = locker.SetFromArea(id, area);
5249 	if (status != B_OK)
5250 		return status;
5251 
5252 	return arch_vm_set_memory_type(area, physicalBase, type);
5253 }
5254 
5255 
5256 /*!	This function enforces some protection properties:
5257 	 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well
5258 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5259 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
5260 	   and B_KERNEL_WRITE_AREA.
5261 */
5262 static void
5263 fix_protection(uint32* protection)
5264 {
5265 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5266 		if ((*protection & B_USER_PROTECTION) == 0
5267 			|| (*protection & B_WRITE_AREA) != 0)
5268 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5269 		else
5270 			*protection |= B_KERNEL_READ_AREA;
5271 	}
5272 }
5273 
5274 
5275 static void
5276 fill_area_info(struct vm_area* area, area_info* info, size_t size)
5277 {
5278 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5279 	info->area = area->id;
5280 	info->address = (void*)area->base;
5281 	info->size = area->size;
5282 	info->protection = area->protection;
5283 	info->lock = B_FULL_LOCK;
5284 	info->team = area->address_space->id;
5285 	info->copy_count = 0;
5286 	info->in_count = 0;
5287 	info->out_count = 0;
5288 		// TODO: retrieve real values here!
5289 
5290 	vm_cache* cache = vm_area_get_locked_cache(area);
5291 
5292 	// Note, this is a simplification; the cache could be larger than this area
5293 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5294 
5295 	vm_area_put_locked_cache(cache);
5296 }
5297 
5298 
5299 /*!
5300 	Tests whether or not the area that contains the specified address
5301 	needs any kind of locking, and actually exists.
5302 	Used by both lock_memory() and unlock_memory().
5303 */
5304 static status_t
5305 test_lock_memory(vm_address_space* addressSpace, addr_t address,
5306 	bool& needsLocking)
5307 {
5308 	rw_lock_read_lock(&addressSpace->lock);
5309 
5310 	vm_area* area = vm_area_lookup(addressSpace, address);
5311 	if (area != NULL) {
5312 		// This determines if we need to lock the memory at all
5313 		needsLocking = area->cache_type != CACHE_TYPE_NULL
5314 			&& area->cache_type != CACHE_TYPE_DEVICE
5315 			&& area->wiring != B_FULL_LOCK
5316 			&& area->wiring != B_CONTIGUOUS;
5317 	}
5318 
5319 	rw_lock_read_unlock(&addressSpace->lock);
5320 
5321 	if (area == NULL)
5322 		return B_BAD_ADDRESS;
5323 
5324 	return B_OK;
5325 }
5326 
5327 
5328 static status_t
5329 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5330 {
5331 	// is newSize a multiple of B_PAGE_SIZE?
5332 	if (newSize & (B_PAGE_SIZE - 1))
5333 		return B_BAD_VALUE;
5334 
5335 	// lock all affected address spaces and the cache
5336 	vm_area* area;
5337 	vm_cache* cache;
5338 
5339 	MultiAddressSpaceLocker locker;
5340 	status_t status = locker.AddAreaCacheAndLock(areaID, true, true, area,
5341 		&cache);
5342 	if (status != B_OK)
5343 		return status;
5344 	AreaCacheLocker cacheLocker(cache);	// already locked
5345 
5346 	// enforce restrictions
5347 	if (!kernel) {
5348 		if ((area->protection & B_KERNEL_AREA) != 0)
5349 			return B_NOT_ALLOWED;
5350 		// TODO: Enforce all restrictions (team, etc.)!
5351 	}
5352 
5353 	size_t oldSize = area->size;
5354 	if (newSize == oldSize)
5355 		return B_OK;
5356 
5357 	// Resize all areas of this area's cache
5358 
5359 	if (cache->type != CACHE_TYPE_RAM)
5360 		return B_NOT_ALLOWED;
5361 
5362 	if (oldSize < newSize) {
5363 		// We need to check if all areas of this cache can be resized
5364 
5365 		for (vm_area* current = cache->areas; current != NULL;
5366 				current = current->cache_next) {
5367 			vm_area* next = current->address_space_next;
5368 			if (next != NULL && next->base <= (current->base + newSize)) {
5369 				// If the area was created inside a reserved area, it can
5370 				// also be resized in that area
5371 				// TODO: if there is free space after the reserved area, it could
5372 				// be used as well...
5373 				if (next->id == RESERVED_AREA_ID
5374 					&& next->cache_offset <= current->base
5375 					&& next->base - 1 + next->size
5376 						>= current->base - 1 + newSize)
5377 					continue;
5378 
5379 				return B_ERROR;
5380 			}
5381 		}
5382 	}
5383 
5384 	// Okay, looks good so far, so let's do it
5385 
5386 	if (oldSize < newSize) {
5387 		// Growing the cache can fail, so we do it first.
5388 		status = cache->Resize(cache->virtual_base + newSize);
5389 		if (status != B_OK)
5390 			return status;
5391 	}
5392 
5393 	for (vm_area* current = cache->areas; current != NULL;
5394 			current = current->cache_next) {
5395 		vm_area* next = current->address_space_next;
5396 		if (next != NULL && next->base <= (current->base + newSize)) {
5397 			if (next->id == RESERVED_AREA_ID
5398 				&& next->cache_offset <= current->base
5399 				&& next->base - 1 + next->size >= current->base - 1 + newSize) {
5400 				// resize reserved area
5401 				addr_t offset = current->base + newSize - next->base;
5402 				if (next->size <= offset) {
5403 					current->address_space_next = next->address_space_next;
5404 					free(next);
5405 				} else {
5406 					next->size -= offset;
5407 					next->base += offset;
5408 				}
5409 			} else {
5410 				panic("resize situation for area %p has changed although we "
5411 					"should have the address space lock", current);
5412 				status = B_ERROR;
5413 				break;
5414 			}
5415 		}
5416 
5417 		current->size = newSize;
5418 
5419 		// We also need to unmap all pages beyond the new size, if the area has
5420 		// shrinked
5421 		if (newSize < oldSize) {
5422 			vm_unmap_pages(current, current->base + newSize, oldSize - newSize,
5423 				false);
5424 		}
5425 	}
5426 
5427 	// shrinking the cache can't fail, so we do it now
5428 	if (status == B_OK && newSize < oldSize)
5429 		status = cache->Resize(cache->virtual_base + newSize);
5430 
5431 	if (status < B_OK) {
5432 		// This shouldn't really be possible, but hey, who knows
5433 		for (vm_area* current = cache->areas; current != NULL;
5434 				current = current->cache_next) {
5435 			current->size = oldSize;
5436 		}
5437 
5438 		cache->Resize(cache->virtual_base + oldSize);
5439 	}
5440 
5441 	// TODO: we must honour the lock restrictions of this area
5442 	return status;
5443 }
5444 
5445 
5446 status_t
5447 vm_memset_physical(addr_t address, int value, size_t length)
5448 {
5449 	return vm_kernel_address_space()->translation_map.ops->memset_physical(
5450 		address, value, length);
5451 }
5452 
5453 
5454 status_t
5455 vm_memcpy_from_physical(void* to, addr_t from, size_t length, bool user)
5456 {
5457 	return vm_kernel_address_space()->translation_map.ops->memcpy_from_physical(
5458 		to, from, length, user);
5459 }
5460 
5461 
5462 status_t
5463 vm_memcpy_to_physical(addr_t to, const void* _from, size_t length, bool user)
5464 {
5465 	return vm_kernel_address_space()->translation_map.ops->memcpy_to_physical(
5466 		to, _from, length, user);
5467 }
5468 
5469 
5470 void
5471 vm_memcpy_physical_page(addr_t to, addr_t from)
5472 {
5473 	return vm_kernel_address_space()->translation_map.ops->memcpy_physical_page(
5474 		to, from);
5475 }
5476 
5477 
5478 //	#pragma mark - kernel public API
5479 
5480 
5481 status_t
5482 user_memcpy(void* to, const void* from, size_t size)
5483 {
5484 	if (arch_cpu_user_memcpy(to, from, size,
5485 			&thread_get_current_thread()->fault_handler) < B_OK)
5486 		return B_BAD_ADDRESS;
5487 
5488 	return B_OK;
5489 }
5490 
5491 
5492 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5493 	the string in \a to, NULL-terminating the result.
5494 
5495 	\param to Pointer to the destination C-string.
5496 	\param from Pointer to the source C-string.
5497 	\param size Size in bytes of the string buffer pointed to by \a to.
5498 
5499 	\return strlen(\a from).
5500 */
5501 ssize_t
5502 user_strlcpy(char* to, const char* from, size_t size)
5503 {
5504 	return arch_cpu_user_strlcpy(to, from, size,
5505 		&thread_get_current_thread()->fault_handler);
5506 }
5507 
5508 
5509 status_t
5510 user_memset(void* s, char c, size_t count)
5511 {
5512 	if (arch_cpu_user_memset(s, c, count,
5513 			&thread_get_current_thread()->fault_handler) < B_OK)
5514 		return B_BAD_ADDRESS;
5515 
5516 	return B_OK;
5517 }
5518 
5519 
5520 status_t
5521 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5522 {
5523 	vm_address_space* addressSpace = NULL;
5524 	struct vm_translation_map* map;
5525 	addr_t unalignedBase = (addr_t)address;
5526 	addr_t end = unalignedBase + numBytes;
5527 	addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE);
5528 	bool isUser = IS_USER_ADDRESS(address);
5529 	bool needsLocking = true;
5530 
5531 	if (isUser) {
5532 		if (team == B_CURRENT_TEAM)
5533 			addressSpace = vm_get_current_user_address_space();
5534 		else
5535 			addressSpace = vm_get_address_space(team);
5536 	} else
5537 		addressSpace = vm_get_kernel_address_space();
5538 	if (addressSpace == NULL)
5539 		return B_ERROR;
5540 
5541 	// test if we're on an area that allows faults at all
5542 
5543 	map = &addressSpace->translation_map;
5544 
5545 	status_t status = test_lock_memory(addressSpace, base, needsLocking);
5546 	if (status < B_OK)
5547 		goto out;
5548 	if (!needsLocking)
5549 		goto out;
5550 
5551 	for (; base < end; base += B_PAGE_SIZE) {
5552 		addr_t physicalAddress;
5553 		uint32 protection;
5554 		status_t status;
5555 
5556 		map->ops->lock(map);
5557 		status = map->ops->query(map, base, &physicalAddress, &protection);
5558 		map->ops->unlock(map);
5559 
5560 		if (status < B_OK)
5561 			goto out;
5562 
5563 		if ((protection & PAGE_PRESENT) != 0) {
5564 			// if B_READ_DEVICE is set, the caller intents to write to the locked
5565 			// memory, so if it hasn't been mapped writable, we'll try the soft
5566 			// fault anyway
5567 			if ((flags & B_READ_DEVICE) == 0
5568 				|| (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
5569 				// update wiring
5570 				vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
5571 				if (page == NULL)
5572 					panic("couldn't lookup physical page just allocated\n");
5573 
5574 				increment_page_wired_count(page);
5575 				continue;
5576 			}
5577 		}
5578 
5579 		status = vm_soft_fault(addressSpace, base, (flags & B_READ_DEVICE) != 0,
5580 			isUser);
5581 		if (status != B_OK)	{
5582 			dprintf("lock_memory(address = %p, numBytes = %lu, flags = %lu) "
5583 				"failed: %s\n", (void*)unalignedBase, numBytes, flags,
5584 				strerror(status));
5585 			goto out;
5586 		}
5587 
5588 		// TODO: Here's a race condition. We should probably add a parameter
5589 		// to vm_soft_fault() that would cause the page's wired count to be
5590 		// incremented immediately.
5591 		// TODO: After memory has been locked in an area, we need to prevent the
5592 		// area from being deleted, resized, cut, etc. That could be done using
5593 		// a "locked pages" count in vm_area, and maybe a condition variable, if
5594 		// we want to allow waiting for the area to become eligible for these
5595 		// operations again.
5596 
5597 		map->ops->lock(map);
5598 		status = map->ops->query(map, base, &physicalAddress, &protection);
5599 		map->ops->unlock(map);
5600 
5601 		if (status < B_OK)
5602 			goto out;
5603 
5604 		// update wiring
5605 		vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
5606 		if (page == NULL)
5607 			panic("couldn't lookup physical page");
5608 
5609 		increment_page_wired_count(page);
5610 			// TODO: needs to be atomic on all platforms!
5611 	}
5612 
5613 out:
5614 	vm_put_address_space(addressSpace);
5615 	return status;
5616 }
5617 
5618 
5619 status_t
5620 lock_memory(void* address, size_t numBytes, uint32 flags)
5621 {
5622 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5623 }
5624 
5625 
5626 status_t
5627 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5628 {
5629 	vm_address_space* addressSpace = NULL;
5630 	struct vm_translation_map* map;
5631 	addr_t unalignedBase = (addr_t)address;
5632 	addr_t end = unalignedBase + numBytes;
5633 	addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE);
5634 	bool needsLocking = true;
5635 
5636 	if (IS_USER_ADDRESS(address)) {
5637 		if (team == B_CURRENT_TEAM)
5638 			addressSpace = vm_get_current_user_address_space();
5639 		else
5640 			addressSpace = vm_get_address_space(team);
5641 	} else
5642 		addressSpace = vm_get_kernel_address_space();
5643 	if (addressSpace == NULL)
5644 		return B_ERROR;
5645 
5646 	map = &addressSpace->translation_map;
5647 
5648 	status_t status = test_lock_memory(addressSpace, base, needsLocking);
5649 	if (status < B_OK)
5650 		goto out;
5651 	if (!needsLocking)
5652 		goto out;
5653 
5654 	for (; base < end; base += B_PAGE_SIZE) {
5655 		map->ops->lock(map);
5656 
5657 		addr_t physicalAddress;
5658 		uint32 protection;
5659 		status = map->ops->query(map, base, &physicalAddress,
5660 			&protection);
5661 
5662 		map->ops->unlock(map);
5663 
5664 		if (status < B_OK)
5665 			goto out;
5666 		if ((protection & PAGE_PRESENT) == 0)
5667 			panic("calling unlock_memory() on unmapped memory!");
5668 
5669 		// update wiring
5670 		vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
5671 		if (page == NULL)
5672 			panic("couldn't lookup physical page");
5673 
5674 		decrement_page_wired_count(page);
5675 	}
5676 
5677 out:
5678 	vm_put_address_space(addressSpace);
5679 	return status;
5680 }
5681 
5682 
5683 status_t
5684 unlock_memory(void* address, size_t numBytes, uint32 flags)
5685 {
5686 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5687 }
5688 
5689 
5690 /*!	Similar to get_memory_map(), but also allows to specify the address space
5691 	for the memory in question and has a saner semantics.
5692 	Returns \c B_OK when the complete range could be translated or
5693 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5694 	case the actual number of entries is written to \c *_numEntries. Any other
5695 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5696 	in this case.
5697 */
5698 status_t
5699 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5700 	physical_entry* table, uint32* _numEntries)
5701 {
5702 	uint32 numEntries = *_numEntries;
5703 	*_numEntries = 0;
5704 
5705 	vm_address_space* addressSpace;
5706 	addr_t virtualAddress = (addr_t)address;
5707 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5708 	addr_t physicalAddress;
5709 	status_t status = B_OK;
5710 	int32 index = -1;
5711 	addr_t offset = 0;
5712 	bool interrupts = are_interrupts_enabled();
5713 
5714 	TRACE(("get_memory_map_etc(%ld, %p, %lu bytes, %ld entries)\n", team,
5715 		address, numBytes, numEntries));
5716 
5717 	if (numEntries == 0 || numBytes == 0)
5718 		return B_BAD_VALUE;
5719 
5720 	// in which address space is the address to be found?
5721 	if (IS_USER_ADDRESS(virtualAddress)) {
5722 		if (team == B_CURRENT_TEAM)
5723 			addressSpace = vm_get_current_user_address_space();
5724 		else
5725 			addressSpace = vm_get_address_space(team);
5726 	} else
5727 		addressSpace = vm_get_kernel_address_space();
5728 
5729 	if (addressSpace == NULL)
5730 		return B_ERROR;
5731 
5732 	vm_translation_map* map = &addressSpace->translation_map;
5733 
5734 	if (interrupts)
5735 		map->ops->lock(map);
5736 
5737 	while (offset < numBytes) {
5738 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5739 		uint32 flags;
5740 
5741 		if (interrupts) {
5742 			status = map->ops->query(map, (addr_t)address + offset,
5743 				&physicalAddress, &flags);
5744 		} else {
5745 			status = map->ops->query_interrupt(map, (addr_t)address + offset,
5746 				&physicalAddress, &flags);
5747 		}
5748 		if (status < B_OK)
5749 			break;
5750 		if ((flags & PAGE_PRESENT) == 0) {
5751 			panic("get_memory_map() called on unmapped memory!");
5752 			return B_BAD_ADDRESS;
5753 		}
5754 
5755 		if (index < 0 && pageOffset > 0) {
5756 			physicalAddress += pageOffset;
5757 			if (bytes > B_PAGE_SIZE - pageOffset)
5758 				bytes = B_PAGE_SIZE - pageOffset;
5759 		}
5760 
5761 		// need to switch to the next physical_entry?
5762 		if (index < 0 || (addr_t)table[index].address
5763 				!= physicalAddress - table[index].size) {
5764 			if ((uint32)++index + 1 > numEntries) {
5765 				// table to small
5766 				status = B_BUFFER_OVERFLOW;
5767 				break;
5768 			}
5769 			table[index].address = (void*)physicalAddress;
5770 			table[index].size = bytes;
5771 		} else {
5772 			// page does fit in current entry
5773 			table[index].size += bytes;
5774 		}
5775 
5776 		offset += bytes;
5777 	}
5778 
5779 	if (interrupts)
5780 		map->ops->unlock(map);
5781 
5782 	if (status != B_OK)
5783 		return status;
5784 
5785 	if ((uint32)index + 1 > numEntries) {
5786 		*_numEntries = index;
5787 		return B_BUFFER_OVERFLOW;
5788 	}
5789 
5790 	*_numEntries = index + 1;
5791 	return B_OK;
5792 }
5793 
5794 
5795 /*!	According to the BeBook, this function should always succeed.
5796 	This is no longer the case.
5797 */
5798 long
5799 get_memory_map(const void* address, ulong numBytes, physical_entry* table,
5800 	long numEntries)
5801 {
5802 	uint32 entriesRead = numEntries;
5803 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5804 		table, &entriesRead);
5805 	if (error != B_OK)
5806 		return error;
5807 
5808 	// close the entry list
5809 
5810 	// if it's only one entry, we will silently accept the missing ending
5811 	if (numEntries == 1)
5812 		return B_OK;
5813 
5814 	if (entriesRead + 1 > (uint32)numEntries)
5815 		return B_BUFFER_OVERFLOW;
5816 
5817 	table[entriesRead].address = NULL;
5818 	table[entriesRead].size = 0;
5819 
5820 	return B_OK;
5821 }
5822 
5823 
5824 area_id
5825 area_for(void* address)
5826 {
5827 	team_id space;
5828 
5829 	if (IS_USER_ADDRESS(address)) {
5830 		// we try the user team address space, if any
5831 		space = vm_current_user_address_space_id();
5832 		if (space < B_OK)
5833 			return space;
5834 	} else
5835 		space = vm_kernel_address_space_id();
5836 
5837 	return vm_area_for(space, (addr_t)address);
5838 }
5839 
5840 
5841 area_id
5842 find_area(const char* name)
5843 {
5844 	rw_lock_read_lock(&sAreaHashLock);
5845 	struct hash_iterator iterator;
5846 	hash_open(sAreaHash, &iterator);
5847 
5848 	vm_area* area;
5849 	area_id id = B_NAME_NOT_FOUND;
5850 	while ((area = (vm_area*)hash_next(sAreaHash, &iterator)) != NULL) {
5851 		if (area->id == RESERVED_AREA_ID)
5852 			continue;
5853 
5854 		if (!strcmp(area->name, name)) {
5855 			id = area->id;
5856 			break;
5857 		}
5858 	}
5859 
5860 	hash_close(sAreaHash, &iterator, false);
5861 	rw_lock_read_unlock(&sAreaHashLock);
5862 
5863 	return id;
5864 }
5865 
5866 
5867 status_t
5868 _get_area_info(area_id id, area_info* info, size_t size)
5869 {
5870 	if (size != sizeof(area_info) || info == NULL)
5871 		return B_BAD_VALUE;
5872 
5873 	AddressSpaceReadLocker locker;
5874 	vm_area* area;
5875 	status_t status = locker.SetFromArea(id, area);
5876 	if (status != B_OK)
5877 		return status;
5878 
5879 	fill_area_info(area, info, size);
5880 	return B_OK;
5881 }
5882 
5883 
5884 status_t
5885 _get_next_area_info(team_id team, int32* cookie, area_info* info, size_t size)
5886 {
5887 	addr_t nextBase = *(addr_t*)cookie;
5888 
5889 	// we're already through the list
5890 	if (nextBase == (addr_t)-1)
5891 		return B_ENTRY_NOT_FOUND;
5892 
5893 	if (team == B_CURRENT_TEAM)
5894 		team = team_get_current_team_id();
5895 
5896 	AddressSpaceReadLocker locker(team);
5897 	if (!locker.IsLocked())
5898 		return B_BAD_TEAM_ID;
5899 
5900 	vm_area* area;
5901 	for (area = locker.AddressSpace()->areas; area != NULL;
5902 			area = area->address_space_next) {
5903 		if (area->id == RESERVED_AREA_ID)
5904 			continue;
5905 
5906 		if (area->base > nextBase)
5907 			break;
5908 	}
5909 
5910 	if (area == NULL) {
5911 		nextBase = (addr_t)-1;
5912 		return B_ENTRY_NOT_FOUND;
5913 	}
5914 
5915 	fill_area_info(area, info, size);
5916 	*cookie = (int32)(area->base);
5917 
5918 	return B_OK;
5919 }
5920 
5921 
5922 status_t
5923 set_area_protection(area_id area, uint32 newProtection)
5924 {
5925 	fix_protection(&newProtection);
5926 
5927 	return vm_set_area_protection(vm_kernel_address_space_id(), area,
5928 		newProtection, true);
5929 }
5930 
5931 
5932 status_t
5933 resize_area(area_id areaID, size_t newSize)
5934 {
5935 	return vm_resize_area(areaID, newSize, true);
5936 }
5937 
5938 
5939 /*!	Transfers the specified area to a new team. The caller must be the owner
5940 	of the area (not yet enforced but probably should be).
5941 	This function is currently not exported to the kernel namespace, but is
5942 	only accessible using the _kern_transfer_area() syscall.
5943 */
5944 static area_id
5945 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
5946 	bool kernel)
5947 {
5948 	area_info info;
5949 	status_t status = get_area_info(id, &info);
5950 	if (status < B_OK)
5951 		return status;
5952 
5953 	area_id clonedArea = vm_clone_area(target, info.name, _address,
5954 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
5955 	if (clonedArea < B_OK)
5956 		return clonedArea;
5957 
5958 	status = vm_delete_area(info.team, id, kernel);
5959 	if (status < B_OK) {
5960 		vm_delete_area(target, clonedArea, kernel);
5961 		return status;
5962 	}
5963 
5964 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
5965 
5966 	return clonedArea;
5967 }
5968 
5969 
5970 area_id
5971 map_physical_memory(const char* name, void* physicalAddress, size_t numBytes,
5972 	uint32 addressSpec, uint32 protection, void** _virtualAddress)
5973 {
5974 	if (!arch_vm_supports_protection(protection))
5975 		return B_NOT_SUPPORTED;
5976 
5977 	fix_protection(&protection);
5978 
5979 	return vm_map_physical_memory(vm_kernel_address_space_id(), name,
5980 		_virtualAddress, addressSpec, numBytes, protection,
5981 		(addr_t)physicalAddress);
5982 }
5983 
5984 
5985 area_id
5986 clone_area(const char* name, void** _address, uint32 addressSpec,
5987 	uint32 protection, area_id source)
5988 {
5989 	if ((protection & B_KERNEL_PROTECTION) == 0)
5990 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5991 
5992 	return vm_clone_area(vm_kernel_address_space_id(), name, _address,
5993 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
5994 }
5995 
5996 
5997 area_id
5998 create_area_etc(team_id team, const char* name, void** address,
5999 	uint32 addressSpec, uint32 size, uint32 lock, uint32 protection,
6000 	uint32 flags)
6001 {
6002 	fix_protection(&protection);
6003 
6004 	return vm_create_anonymous_area(team, (char*)name, address, addressSpec,
6005 		size, lock, protection, flags, true);
6006 }
6007 
6008 
6009 area_id
6010 create_area(const char* name, void** _address, uint32 addressSpec, size_t size,
6011 	uint32 lock, uint32 protection)
6012 {
6013 	fix_protection(&protection);
6014 
6015 	return vm_create_anonymous_area(vm_kernel_address_space_id(), (char*)name,
6016 		_address, addressSpec, size, lock, protection, 0, true);
6017 }
6018 
6019 
6020 status_t
6021 delete_area(area_id area)
6022 {
6023 	return vm_delete_area(vm_kernel_address_space_id(), area, true);
6024 }
6025 
6026 
6027 //	#pragma mark - Userland syscalls
6028 
6029 
6030 status_t
6031 _user_reserve_heap_address_range(addr_t* userAddress, uint32 addressSpec,
6032 	addr_t size)
6033 {
6034 	// filter out some unavailable values (for userland)
6035 	switch (addressSpec) {
6036 		case B_ANY_KERNEL_ADDRESS:
6037 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6038 			return B_BAD_VALUE;
6039 	}
6040 
6041 	addr_t address;
6042 
6043 	if (!IS_USER_ADDRESS(userAddress)
6044 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6045 		return B_BAD_ADDRESS;
6046 
6047 	status_t status = vm_reserve_address_range(
6048 		vm_current_user_address_space_id(), (void**)&address, addressSpec, size,
6049 		RESERVED_AVOID_BASE);
6050 	if (status < B_OK)
6051 		return status;
6052 
6053 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6054 		vm_unreserve_address_range(vm_current_user_address_space_id(),
6055 			(void*)address, size);
6056 		return B_BAD_ADDRESS;
6057 	}
6058 
6059 	return B_OK;
6060 }
6061 
6062 
6063 area_id
6064 _user_area_for(void* address)
6065 {
6066 	return vm_area_for(vm_current_user_address_space_id(), (addr_t)address);
6067 }
6068 
6069 
6070 area_id
6071 _user_find_area(const char* userName)
6072 {
6073 	char name[B_OS_NAME_LENGTH];
6074 
6075 	if (!IS_USER_ADDRESS(userName)
6076 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6077 		return B_BAD_ADDRESS;
6078 
6079 	return find_area(name);
6080 }
6081 
6082 
6083 status_t
6084 _user_get_area_info(area_id area, area_info* userInfo)
6085 {
6086 	if (!IS_USER_ADDRESS(userInfo))
6087 		return B_BAD_ADDRESS;
6088 
6089 	area_info info;
6090 	status_t status = get_area_info(area, &info);
6091 	if (status < B_OK)
6092 		return status;
6093 
6094 	// TODO: do we want to prevent userland from seeing kernel protections?
6095 	//info.protection &= B_USER_PROTECTION;
6096 
6097 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6098 		return B_BAD_ADDRESS;
6099 
6100 	return status;
6101 }
6102 
6103 
6104 status_t
6105 _user_get_next_area_info(team_id team, int32* userCookie, area_info* userInfo)
6106 {
6107 	int32 cookie;
6108 
6109 	if (!IS_USER_ADDRESS(userCookie)
6110 		|| !IS_USER_ADDRESS(userInfo)
6111 		|| user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK)
6112 		return B_BAD_ADDRESS;
6113 
6114 	area_info info;
6115 	status_t status = _get_next_area_info(team, &cookie, &info,
6116 		sizeof(area_info));
6117 	if (status != B_OK)
6118 		return status;
6119 
6120 	//info.protection &= B_USER_PROTECTION;
6121 
6122 	if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK
6123 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6124 		return B_BAD_ADDRESS;
6125 
6126 	return status;
6127 }
6128 
6129 
6130 status_t
6131 _user_set_area_protection(area_id area, uint32 newProtection)
6132 {
6133 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6134 		return B_BAD_VALUE;
6135 
6136 	fix_protection(&newProtection);
6137 
6138 	return vm_set_area_protection(vm_current_user_address_space_id(), area,
6139 		newProtection, false);
6140 }
6141 
6142 
6143 status_t
6144 _user_resize_area(area_id area, size_t newSize)
6145 {
6146 	// TODO: Since we restrict deleting of areas to those owned by the team,
6147 	// we should also do that for resizing (check other functions, too).
6148 	return vm_resize_area(area, newSize, false);
6149 }
6150 
6151 
6152 area_id
6153 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6154 	team_id target)
6155 {
6156 	// filter out some unavailable values (for userland)
6157 	switch (addressSpec) {
6158 		case B_ANY_KERNEL_ADDRESS:
6159 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6160 			return B_BAD_VALUE;
6161 	}
6162 
6163 	void* address;
6164 	if (!IS_USER_ADDRESS(userAddress)
6165 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6166 		return B_BAD_ADDRESS;
6167 
6168 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6169 	if (newArea < B_OK)
6170 		return newArea;
6171 
6172 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6173 		return B_BAD_ADDRESS;
6174 
6175 	return newArea;
6176 }
6177 
6178 
6179 area_id
6180 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6181 	uint32 protection, area_id sourceArea)
6182 {
6183 	char name[B_OS_NAME_LENGTH];
6184 	void* address;
6185 
6186 	// filter out some unavailable values (for userland)
6187 	switch (addressSpec) {
6188 		case B_ANY_KERNEL_ADDRESS:
6189 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6190 			return B_BAD_VALUE;
6191 	}
6192 	if ((protection & ~B_USER_PROTECTION) != 0)
6193 		return B_BAD_VALUE;
6194 
6195 	if (!IS_USER_ADDRESS(userName)
6196 		|| !IS_USER_ADDRESS(userAddress)
6197 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6198 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6199 		return B_BAD_ADDRESS;
6200 
6201 	fix_protection(&protection);
6202 
6203 	area_id clonedArea = vm_clone_area(vm_current_user_address_space_id(), name,
6204 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6205 		false);
6206 	if (clonedArea < B_OK)
6207 		return clonedArea;
6208 
6209 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6210 		delete_area(clonedArea);
6211 		return B_BAD_ADDRESS;
6212 	}
6213 
6214 	return clonedArea;
6215 }
6216 
6217 
6218 area_id
6219 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6220 	size_t size, uint32 lock, uint32 protection)
6221 {
6222 	char name[B_OS_NAME_LENGTH];
6223 	void* address;
6224 
6225 	// filter out some unavailable values (for userland)
6226 	switch (addressSpec) {
6227 		case B_ANY_KERNEL_ADDRESS:
6228 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6229 			return B_BAD_VALUE;
6230 	}
6231 	if ((protection & ~B_USER_PROTECTION) != 0)
6232 		return B_BAD_VALUE;
6233 
6234 	if (!IS_USER_ADDRESS(userName)
6235 		|| !IS_USER_ADDRESS(userAddress)
6236 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6237 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6238 		return B_BAD_ADDRESS;
6239 
6240 	if (addressSpec == B_EXACT_ADDRESS
6241 		&& IS_KERNEL_ADDRESS(address))
6242 		return B_BAD_VALUE;
6243 
6244 	fix_protection(&protection);
6245 
6246 	area_id area = vm_create_anonymous_area(vm_current_user_address_space_id(),
6247 		(char*)name, &address, addressSpec, size, lock, protection, 0, false);
6248 
6249 	if (area >= B_OK
6250 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6251 		delete_area(area);
6252 		return B_BAD_ADDRESS;
6253 	}
6254 
6255 	return area;
6256 }
6257 
6258 
6259 status_t
6260 _user_delete_area(area_id area)
6261 {
6262 	// Unlike the BeOS implementation, you can now only delete areas
6263 	// that you have created yourself from userland.
6264 	// The documentation to delete_area() explicitly states that this
6265 	// will be restricted in the future, and so it will.
6266 	return vm_delete_area(vm_current_user_address_space_id(), area, false);
6267 }
6268 
6269 
6270 // TODO: create a BeOS style call for this!
6271 
6272 area_id
6273 _user_map_file(const char* userName, void** userAddress, int addressSpec,
6274 	size_t size, int protection, int mapping, int fd, off_t offset)
6275 {
6276 	char name[B_OS_NAME_LENGTH];
6277 	void* address;
6278 	area_id area;
6279 
6280 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6281 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6282 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6283 		return B_BAD_ADDRESS;
6284 
6285 	if (addressSpec == B_EXACT_ADDRESS) {
6286 		if ((addr_t)address + size < (addr_t)address)
6287 			return B_BAD_VALUE;
6288 		if (!IS_USER_ADDRESS(address)
6289 				|| !IS_USER_ADDRESS((addr_t)address + size)) {
6290 			return B_BAD_ADDRESS;
6291 		}
6292 	}
6293 
6294 	// userland created areas can always be accessed by the kernel
6295 	protection |= B_KERNEL_READ_AREA
6296 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
6297 
6298 	area = _vm_map_file(vm_current_user_address_space_id(), name, &address,
6299 		addressSpec, size, protection, mapping, fd, offset, false);
6300 	if (area < B_OK)
6301 		return area;
6302 
6303 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6304 		return B_BAD_ADDRESS;
6305 
6306 	return area;
6307 }
6308 
6309 
6310 status_t
6311 _user_unmap_memory(void* _address, size_t size)
6312 {
6313 	addr_t address = (addr_t)_address;
6314 
6315 	// check params
6316 	if (size == 0 || (addr_t)address + size < (addr_t)address)
6317 		return B_BAD_VALUE;
6318 
6319 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6320 		return B_BAD_ADDRESS;
6321 
6322 	// write lock the address space
6323 	AddressSpaceWriteLocker locker;
6324 	status_t status = locker.SetTo(team_get_current_team_id());
6325 	if (status != B_OK)
6326 		return status;
6327 
6328 	// unmap
6329 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6330 }
6331 
6332 
6333 status_t
6334 _user_set_memory_protection(void* _address, size_t size, int protection)
6335 {
6336 	// check address range
6337 	addr_t address = (addr_t)_address;
6338 	size = PAGE_ALIGN(size);
6339 
6340 	if ((address % B_PAGE_SIZE) != 0)
6341 		return B_BAD_VALUE;
6342 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6343 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6344 		// weird error code required by POSIX
6345 		return ENOMEM;
6346 	}
6347 
6348 	// extend and check protection
6349 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
6350 	uint32 actualProtection = protection | B_KERNEL_READ_AREA
6351 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
6352 
6353 	if (!arch_vm_supports_protection(actualProtection))
6354 		return B_NOT_SUPPORTED;
6355 
6356 	// We need to write lock the address space, since we're going to play with
6357 	// the areas.
6358 	AddressSpaceWriteLocker locker;
6359 	status_t status = locker.SetTo(team_get_current_team_id());
6360 	if (status != B_OK)
6361 		return status;
6362 
6363 	// First round: Check whether the whole range is covered by areas and we are
6364 	// allowed to modify them.
6365 	addr_t currentAddress = address;
6366 	size_t sizeLeft = size;
6367 	while (sizeLeft > 0) {
6368 		vm_area* area = vm_area_lookup(locker.AddressSpace(), currentAddress);
6369 		if (area == NULL)
6370 			return B_NO_MEMORY;
6371 
6372 		if ((area->protection & B_KERNEL_AREA) != 0)
6373 			return B_NOT_ALLOWED;
6374 
6375 		// TODO: For (shared) mapped files we should check whether the new
6376 		// protections are compatible with the file permissions. We don't have
6377 		// a way to do that yet, though.
6378 
6379 		addr_t offset = currentAddress - area->base;
6380 		size_t rangeSize = min_c(area->size - offset, sizeLeft);
6381 
6382 		currentAddress += rangeSize;
6383 		sizeLeft -= rangeSize;
6384 	}
6385 
6386 	// Second round: If the protections differ from that of the area, create a
6387 	// page protection array and re-map mapped pages.
6388 	vm_translation_map* map = &locker.AddressSpace()->translation_map;
6389 	currentAddress = address;
6390 	sizeLeft = size;
6391 	while (sizeLeft > 0) {
6392 		vm_area* area = vm_area_lookup(locker.AddressSpace(), currentAddress);
6393 		if (area == NULL)
6394 			return B_NO_MEMORY;
6395 
6396 		addr_t offset = currentAddress - area->base;
6397 		size_t rangeSize = min_c(area->size - offset, sizeLeft);
6398 
6399 		currentAddress += rangeSize;
6400 		sizeLeft -= rangeSize;
6401 
6402 		if (area->page_protections == NULL) {
6403 			if (area->protection == actualProtection)
6404 				continue;
6405 
6406 			// In the page protections we store only the three user protections,
6407 			// so we use 4 bits per page.
6408 			uint32 bytes = (area->size / B_PAGE_SIZE + 1) / 2;
6409 			area->page_protections = (uint8*)malloc(bytes);
6410 			if (area->page_protections == NULL)
6411 				return B_NO_MEMORY;
6412 
6413 			// init the page protections for all pages to that of the area
6414 			uint32 areaProtection = area->protection
6415 				& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
6416 			memset(area->page_protections,
6417 				areaProtection | (areaProtection << 4), bytes);
6418 		}
6419 
6420 		for (addr_t pageAddress = area->base + offset;
6421 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6422 			map->ops->lock(map);
6423 
6424 			set_area_page_protection(area, pageAddress, protection);
6425 
6426 			addr_t physicalAddress;
6427 			uint32 flags;
6428 
6429 			status_t error = map->ops->query(map, pageAddress, &physicalAddress,
6430 				&flags);
6431 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6432 				map->ops->unlock(map);
6433 				continue;
6434 			}
6435 
6436 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6437 			if (page == NULL) {
6438 				panic("area %p looking up page failed for pa 0x%lx\n", area,
6439 					physicalAddress);
6440 				map->ops->unlock(map);
6441 				return B_ERROR;;
6442 			}
6443 
6444 			// If the page is not in the topmost cache and write access is
6445 			// requested, we have to unmap it. Otherwise we can re-map it with
6446 			// the new protection.
6447 			bool unmapPage = page->cache != area->cache
6448 				&& (protection & B_WRITE_AREA) != 0;
6449 
6450 			if (!unmapPage) {
6451 				map->ops->unmap(map, pageAddress,
6452 					pageAddress + B_PAGE_SIZE - 1);
6453 				map->ops->map(map, pageAddress, physicalAddress,
6454 					actualProtection);
6455 			}
6456 
6457 			map->ops->unlock(map);
6458 
6459 			if (unmapPage)
6460 				vm_unmap_page(area, pageAddress, true);
6461 		}
6462 	}
6463 
6464 	return B_OK;
6465 }
6466 
6467 
6468 status_t
6469 _user_sync_memory(void* _address, size_t size, int flags)
6470 {
6471 	addr_t address = (addr_t)_address;
6472 	size = PAGE_ALIGN(size);
6473 
6474 	// check params
6475 	if ((address % B_PAGE_SIZE) != 0)
6476 		return B_BAD_VALUE;
6477 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6478 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6479 		// weird error code required by POSIX
6480 		return ENOMEM;
6481 	}
6482 
6483 	bool writeSync = (flags & MS_SYNC) != 0;
6484 	bool writeAsync = (flags & MS_ASYNC) != 0;
6485 	if (writeSync && writeAsync)
6486 		return B_BAD_VALUE;
6487 
6488 	if (size == 0 || (!writeSync && !writeAsync))
6489 		return B_OK;
6490 
6491 	// iterate through the range and sync all concerned areas
6492 	while (size > 0) {
6493 		// read lock the address space
6494 		AddressSpaceReadLocker locker;
6495 		status_t error = locker.SetTo(team_get_current_team_id());
6496 		if (error != B_OK)
6497 			return error;
6498 
6499 		// get the first area
6500 		vm_area* area = vm_area_lookup(locker.AddressSpace(), address);
6501 		if (area == NULL)
6502 			return B_NO_MEMORY;
6503 
6504 		uint32 offset = address - area->base;
6505 		size_t rangeSize = min_c(area->size - offset, size);
6506 		offset += area->cache_offset;
6507 
6508 		// lock the cache
6509 		AreaCacheLocker cacheLocker(area);
6510 		if (!cacheLocker)
6511 			return B_BAD_VALUE;
6512 		vm_cache* cache = area->cache;
6513 
6514 		locker.Unlock();
6515 
6516 		uint32 firstPage = offset >> PAGE_SHIFT;
6517 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6518 
6519 		// write the pages
6520 		if (cache->type == CACHE_TYPE_VNODE) {
6521 			if (writeSync) {
6522 				// synchronous
6523 				error = vm_page_write_modified_page_range(cache, firstPage,
6524 					endPage);
6525 				if (error != B_OK)
6526 					return error;
6527 			} else {
6528 				// asynchronous
6529 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6530 				// TODO: This is probably not quite what is supposed to happen.
6531 				// Especially when a lot has to be written, it might take ages
6532 				// until it really hits the disk.
6533 			}
6534 		}
6535 
6536 		address += rangeSize;
6537 		size -= rangeSize;
6538 	}
6539 
6540 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6541 	// synchronize multiple mappings of the same file. In our VM they never get
6542 	// out of sync, though, so we don't have to do anything.
6543 
6544 	return B_OK;
6545 }
6546 
6547 
6548 status_t
6549 _user_memory_advice(void* address, size_t size, int advice)
6550 {
6551 	// TODO: Implement!
6552 	return B_OK;
6553 }
6554