xref: /haiku/src/system/kernel/vm/vm.cpp (revision 1978089f7cec856677e46204e992c7273d70b9af)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleterDrivers.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <util/BitUtils.h>
51 #include <util/ThreadAutoLock.h>
52 #include <vm/vm_page.h>
53 #include <vm/vm_priv.h>
54 #include <vm/VMAddressSpace.h>
55 #include <vm/VMArea.h>
56 #include <vm/VMCache.h>
57 
58 #include "VMAddressSpaceLocking.h"
59 #include "VMAnonymousCache.h"
60 #include "VMAnonymousNoSwapCache.h"
61 #include "IORequest.h"
62 
63 
64 //#define TRACE_VM
65 //#define TRACE_FAULTS
66 #ifdef TRACE_VM
67 #	define TRACE(x) dprintf x
68 #else
69 #	define TRACE(x) ;
70 #endif
71 #ifdef TRACE_FAULTS
72 #	define FTRACE(x) dprintf x
73 #else
74 #	define FTRACE(x) ;
75 #endif
76 
77 
78 namespace {
79 
80 class AreaCacheLocking {
81 public:
82 	inline bool Lock(VMCache* lockable)
83 	{
84 		return false;
85 	}
86 
87 	inline void Unlock(VMCache* lockable)
88 	{
89 		vm_area_put_locked_cache(lockable);
90 	}
91 };
92 
93 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
94 public:
95 	inline AreaCacheLocker(VMCache* cache = NULL)
96 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
97 	{
98 	}
99 
100 	inline AreaCacheLocker(VMArea* area)
101 		: AutoLocker<VMCache, AreaCacheLocking>()
102 	{
103 		SetTo(area);
104 	}
105 
106 	inline void SetTo(VMCache* cache, bool alreadyLocked)
107 	{
108 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
109 	}
110 
111 	inline void SetTo(VMArea* area)
112 	{
113 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
114 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
115 	}
116 };
117 
118 
119 class VMCacheChainLocker {
120 public:
121 	VMCacheChainLocker()
122 		:
123 		fTopCache(NULL),
124 		fBottomCache(NULL)
125 	{
126 	}
127 
128 	VMCacheChainLocker(VMCache* topCache)
129 		:
130 		fTopCache(topCache),
131 		fBottomCache(topCache)
132 	{
133 	}
134 
135 	~VMCacheChainLocker()
136 	{
137 		Unlock();
138 	}
139 
140 	void SetTo(VMCache* topCache)
141 	{
142 		fTopCache = topCache;
143 		fBottomCache = topCache;
144 
145 		if (topCache != NULL)
146 			topCache->SetUserData(NULL);
147 	}
148 
149 	VMCache* LockSourceCache()
150 	{
151 		if (fBottomCache == NULL || fBottomCache->source == NULL)
152 			return NULL;
153 
154 		VMCache* previousCache = fBottomCache;
155 
156 		fBottomCache = fBottomCache->source;
157 		fBottomCache->Lock();
158 		fBottomCache->AcquireRefLocked();
159 		fBottomCache->SetUserData(previousCache);
160 
161 		return fBottomCache;
162 	}
163 
164 	void LockAllSourceCaches()
165 	{
166 		while (LockSourceCache() != NULL) {
167 		}
168 	}
169 
170 	void Unlock(VMCache* exceptCache = NULL)
171 	{
172 		if (fTopCache == NULL)
173 			return;
174 
175 		// Unlock caches in source -> consumer direction. This is important to
176 		// avoid double-locking and a reversal of locking order in case a cache
177 		// is eligable for merging.
178 		VMCache* cache = fBottomCache;
179 		while (cache != NULL) {
180 			VMCache* nextCache = (VMCache*)cache->UserData();
181 			if (cache != exceptCache)
182 				cache->ReleaseRefAndUnlock(cache != fTopCache);
183 
184 			if (cache == fTopCache)
185 				break;
186 
187 			cache = nextCache;
188 		}
189 
190 		fTopCache = NULL;
191 		fBottomCache = NULL;
192 	}
193 
194 	void UnlockKeepRefs(bool keepTopCacheLocked)
195 	{
196 		if (fTopCache == NULL)
197 			return;
198 
199 		VMCache* nextCache = fBottomCache;
200 		VMCache* cache = NULL;
201 
202 		while (keepTopCacheLocked
203 				? nextCache != fTopCache : cache != fTopCache) {
204 			cache = nextCache;
205 			nextCache = (VMCache*)cache->UserData();
206 			cache->Unlock(cache != fTopCache);
207 		}
208 	}
209 
210 	void RelockCaches(bool topCacheLocked)
211 	{
212 		if (fTopCache == NULL)
213 			return;
214 
215 		VMCache* nextCache = fTopCache;
216 		VMCache* cache = NULL;
217 		if (topCacheLocked) {
218 			cache = nextCache;
219 			nextCache = cache->source;
220 		}
221 
222 		while (cache != fBottomCache && nextCache != NULL) {
223 			VMCache* consumer = cache;
224 			cache = nextCache;
225 			nextCache = cache->source;
226 			cache->Lock();
227 			cache->SetUserData(consumer);
228 		}
229 	}
230 
231 private:
232 	VMCache*	fTopCache;
233 	VMCache*	fBottomCache;
234 };
235 
236 } // namespace
237 
238 
239 // The memory reserve an allocation of the certain priority must not touch.
240 static const size_t kMemoryReserveForPriority[] = {
241 	VM_MEMORY_RESERVE_USER,		// user
242 	VM_MEMORY_RESERVE_SYSTEM,	// system
243 	0							// VIP
244 };
245 
246 
247 ObjectCache* gPageMappingsObjectCache;
248 
249 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
250 
251 static off_t sAvailableMemory;
252 static off_t sNeededMemory;
253 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
254 static uint32 sPageFaults;
255 
256 static VMPhysicalPageMapper* sPhysicalPageMapper;
257 
258 #if DEBUG_CACHE_LIST
259 
260 struct cache_info {
261 	VMCache*	cache;
262 	addr_t		page_count;
263 	addr_t		committed;
264 };
265 
266 static const int kCacheInfoTableCount = 100 * 1024;
267 static cache_info* sCacheInfoTable;
268 
269 #endif	// DEBUG_CACHE_LIST
270 
271 
272 // function declarations
273 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
274 	bool addressSpaceCleanup);
275 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
276 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
277 static status_t map_backing_store(VMAddressSpace* addressSpace,
278 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
279 	int protection, int protectionMax, int mapping, uint32 flags,
280 	const virtual_address_restrictions* addressRestrictions, bool kernel,
281 	VMArea** _area, void** _virtualAddress);
282 static void fix_protection(uint32* protection);
283 
284 
285 //	#pragma mark -
286 
287 
288 #if VM_PAGE_FAULT_TRACING
289 
290 namespace VMPageFaultTracing {
291 
292 class PageFaultStart : public AbstractTraceEntry {
293 public:
294 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
295 		:
296 		fAddress(address),
297 		fPC(pc),
298 		fWrite(write),
299 		fUser(user)
300 	{
301 		Initialized();
302 	}
303 
304 	virtual void AddDump(TraceOutput& out)
305 	{
306 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
307 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
308 	}
309 
310 private:
311 	addr_t	fAddress;
312 	addr_t	fPC;
313 	bool	fWrite;
314 	bool	fUser;
315 };
316 
317 
318 // page fault errors
319 enum {
320 	PAGE_FAULT_ERROR_NO_AREA		= 0,
321 	PAGE_FAULT_ERROR_KERNEL_ONLY,
322 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
323 	PAGE_FAULT_ERROR_READ_PROTECTED,
324 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
325 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
326 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
327 };
328 
329 
330 class PageFaultError : public AbstractTraceEntry {
331 public:
332 	PageFaultError(area_id area, status_t error)
333 		:
334 		fArea(area),
335 		fError(error)
336 	{
337 		Initialized();
338 	}
339 
340 	virtual void AddDump(TraceOutput& out)
341 	{
342 		switch (fError) {
343 			case PAGE_FAULT_ERROR_NO_AREA:
344 				out.Print("page fault error: no area");
345 				break;
346 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
347 				out.Print("page fault error: area: %ld, kernel only", fArea);
348 				break;
349 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
350 				out.Print("page fault error: area: %ld, write protected",
351 					fArea);
352 				break;
353 			case PAGE_FAULT_ERROR_READ_PROTECTED:
354 				out.Print("page fault error: area: %ld, read protected", fArea);
355 				break;
356 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
357 				out.Print("page fault error: area: %ld, execute protected",
358 					fArea);
359 				break;
360 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
361 				out.Print("page fault error: kernel touching bad user memory");
362 				break;
363 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
364 				out.Print("page fault error: no address space");
365 				break;
366 			default:
367 				out.Print("page fault error: area: %ld, error: %s", fArea,
368 					strerror(fError));
369 				break;
370 		}
371 	}
372 
373 private:
374 	area_id		fArea;
375 	status_t	fError;
376 };
377 
378 
379 class PageFaultDone : public AbstractTraceEntry {
380 public:
381 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
382 			vm_page* page)
383 		:
384 		fArea(area),
385 		fTopCache(topCache),
386 		fCache(cache),
387 		fPage(page)
388 	{
389 		Initialized();
390 	}
391 
392 	virtual void AddDump(TraceOutput& out)
393 	{
394 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
395 			"page: %p", fArea, fTopCache, fCache, fPage);
396 	}
397 
398 private:
399 	area_id		fArea;
400 	VMCache*	fTopCache;
401 	VMCache*	fCache;
402 	vm_page*	fPage;
403 };
404 
405 }	// namespace VMPageFaultTracing
406 
407 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
408 #else
409 #	define TPF(x) ;
410 #endif	// VM_PAGE_FAULT_TRACING
411 
412 
413 //	#pragma mark -
414 
415 
416 /*!	The page's cache must be locked.
417 */
418 static inline void
419 increment_page_wired_count(vm_page* page)
420 {
421 	if (!page->IsMapped())
422 		atomic_add(&gMappedPagesCount, 1);
423 	page->IncrementWiredCount();
424 }
425 
426 
427 /*!	The page's cache must be locked.
428 */
429 static inline void
430 decrement_page_wired_count(vm_page* page)
431 {
432 	page->DecrementWiredCount();
433 	if (!page->IsMapped())
434 		atomic_add(&gMappedPagesCount, -1);
435 }
436 
437 
438 static inline addr_t
439 virtual_page_address(VMArea* area, vm_page* page)
440 {
441 	return area->Base()
442 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
443 }
444 
445 
446 static inline bool
447 is_page_in_area(VMArea* area, vm_page* page)
448 {
449 	off_t pageCacheOffsetBytes = (off_t)(page->cache_offset << PAGE_SHIFT);
450 	return pageCacheOffsetBytes >= area->cache_offset
451 		&& pageCacheOffsetBytes < area->cache_offset + (off_t)area->Size();
452 }
453 
454 
455 //! You need to have the address space locked when calling this function
456 static VMArea*
457 lookup_area(VMAddressSpace* addressSpace, area_id id)
458 {
459 	VMAreas::ReadLock();
460 
461 	VMArea* area = VMAreas::LookupLocked(id);
462 	if (area != NULL && area->address_space != addressSpace)
463 		area = NULL;
464 
465 	VMAreas::ReadUnlock();
466 
467 	return area;
468 }
469 
470 
471 static inline size_t
472 area_page_protections_size(size_t areaSize)
473 {
474 	// In the page protections we store only the three user protections,
475 	// so we use 4 bits per page.
476 	return (areaSize / B_PAGE_SIZE + 1) / 2;
477 }
478 
479 
480 static status_t
481 allocate_area_page_protections(VMArea* area)
482 {
483 	size_t bytes = area_page_protections_size(area->Size());
484 	area->page_protections = (uint8*)malloc_etc(bytes,
485 		area->address_space == VMAddressSpace::Kernel()
486 			? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
487 	if (area->page_protections == NULL)
488 		return B_NO_MEMORY;
489 
490 	// init the page protections for all pages to that of the area
491 	uint32 areaProtection = area->protection
492 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
493 	memset(area->page_protections, areaProtection | (areaProtection << 4),
494 		bytes);
495 	return B_OK;
496 }
497 
498 
499 static inline void
500 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
501 {
502 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
503 	addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
504 	uint8& entry = area->page_protections[pageIndex / 2];
505 	if (pageIndex % 2 == 0)
506 		entry = (entry & 0xf0) | protection;
507 	else
508 		entry = (entry & 0x0f) | (protection << 4);
509 }
510 
511 
512 static inline uint32
513 get_area_page_protection(VMArea* area, addr_t pageAddress)
514 {
515 	if (area->page_protections == NULL)
516 		return area->protection;
517 
518 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
519 	uint32 protection = area->page_protections[pageIndex / 2];
520 	if (pageIndex % 2 == 0)
521 		protection &= 0x0f;
522 	else
523 		protection >>= 4;
524 
525 	uint32 kernelProtection = 0;
526 	if ((protection & B_READ_AREA) != 0)
527 		kernelProtection |= B_KERNEL_READ_AREA;
528 	if ((protection & B_WRITE_AREA) != 0)
529 		kernelProtection |= B_KERNEL_WRITE_AREA;
530 
531 	// If this is a kernel area we return only the kernel flags.
532 	if (area->address_space == VMAddressSpace::Kernel())
533 		return kernelProtection;
534 
535 	return protection | kernelProtection;
536 }
537 
538 
539 static inline uint8*
540 realloc_page_protections(uint8* pageProtections, size_t areaSize,
541 	uint32 allocationFlags)
542 {
543 	size_t bytes = area_page_protections_size(areaSize);
544 	return (uint8*)realloc_etc(pageProtections, bytes, allocationFlags);
545 }
546 
547 
548 /*!	The caller must have reserved enough pages the translation map
549 	implementation might need to map this page.
550 	The page's cache must be locked.
551 */
552 static status_t
553 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
554 	vm_page_reservation* reservation)
555 {
556 	VMTranslationMap* map = area->address_space->TranslationMap();
557 
558 	bool wasMapped = page->IsMapped();
559 
560 	if (area->wiring == B_NO_LOCK) {
561 		DEBUG_PAGE_ACCESS_CHECK(page);
562 
563 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
564 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
565 			gPageMappingsObjectCache,
566 			CACHE_DONT_WAIT_FOR_MEMORY
567 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
568 		if (mapping == NULL)
569 			return B_NO_MEMORY;
570 
571 		mapping->page = page;
572 		mapping->area = area;
573 
574 		map->Lock();
575 
576 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
577 			area->MemoryType(), reservation);
578 
579 		// insert mapping into lists
580 		if (!page->IsMapped())
581 			atomic_add(&gMappedPagesCount, 1);
582 
583 		page->mappings.Add(mapping);
584 		area->mappings.Add(mapping);
585 
586 		map->Unlock();
587 	} else {
588 		DEBUG_PAGE_ACCESS_CHECK(page);
589 
590 		map->Lock();
591 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
592 			area->MemoryType(), reservation);
593 		map->Unlock();
594 
595 		increment_page_wired_count(page);
596 	}
597 
598 	if (!wasMapped) {
599 		// The page is mapped now, so we must not remain in the cached queue.
600 		// It also makes sense to move it from the inactive to the active, since
601 		// otherwise the page daemon wouldn't come to keep track of it (in idle
602 		// mode) -- if the page isn't touched, it will be deactivated after a
603 		// full iteration through the queue at the latest.
604 		if (page->State() == PAGE_STATE_CACHED
605 				|| page->State() == PAGE_STATE_INACTIVE) {
606 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
607 		}
608 	}
609 
610 	return B_OK;
611 }
612 
613 
614 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
615 	page's cache.
616 */
617 static inline bool
618 unmap_page(VMArea* area, addr_t virtualAddress)
619 {
620 	return area->address_space->TranslationMap()->UnmapPage(area,
621 		virtualAddress, true);
622 }
623 
624 
625 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
626 	mapped pages' caches.
627 */
628 static inline void
629 unmap_pages(VMArea* area, addr_t base, size_t size)
630 {
631 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
632 }
633 
634 
635 static inline bool
636 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
637 {
638 	if (address < area->Base()) {
639 		offset = area->Base() - address;
640 		if (offset >= size)
641 			return false;
642 
643 		address = area->Base();
644 		size -= offset;
645 		offset = 0;
646 		if (size > area->Size())
647 			size = area->Size();
648 
649 		return true;
650 	}
651 
652 	offset = address - area->Base();
653 	if (offset >= area->Size())
654 		return false;
655 
656 	if (size >= area->Size() - offset)
657 		size = area->Size() - offset;
658 
659 	return true;
660 }
661 
662 
663 /*!	Cuts a piece out of an area. If the given cut range covers the complete
664 	area, it is deleted. If it covers the beginning or the end, the area is
665 	resized accordingly. If the range covers some part in the middle of the
666 	area, it is split in two; in this case the second area is returned via
667 	\a _secondArea (the variable is left untouched in the other cases).
668 	The address space must be write locked.
669 	The caller must ensure that no part of the given range is wired.
670 */
671 static status_t
672 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
673 	addr_t size, VMArea** _secondArea, bool kernel)
674 {
675 	addr_t offset;
676 	if (!intersect_area(area, address, size, offset))
677 		return B_OK;
678 
679 	// Is the area fully covered?
680 	if (address == area->Base() && size == area->Size()) {
681 		delete_area(addressSpace, area, false);
682 		return B_OK;
683 	}
684 
685 	int priority;
686 	uint32 allocationFlags;
687 	if (addressSpace == VMAddressSpace::Kernel()) {
688 		priority = VM_PRIORITY_SYSTEM;
689 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
690 			| HEAP_DONT_LOCK_KERNEL_SPACE;
691 	} else {
692 		priority = VM_PRIORITY_USER;
693 		allocationFlags = 0;
694 	}
695 
696 	VMCache* cache = vm_area_get_locked_cache(area);
697 	VMCacheChainLocker cacheChainLocker(cache);
698 	cacheChainLocker.LockAllSourceCaches();
699 
700 	// If no one else uses the area's cache and it's an anonymous cache, we can
701 	// resize or split it, too.
702 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
703 		&& cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM;
704 
705 	const addr_t oldSize = area->Size();
706 
707 	// Cut the end only?
708 	if (offset > 0 && size == area->Size() - offset) {
709 		status_t error = addressSpace->ShrinkAreaTail(area, offset,
710 			allocationFlags);
711 		if (error != B_OK)
712 			return error;
713 
714 		if (area->page_protections != NULL) {
715 			uint8* newProtections = realloc_page_protections(
716 				area->page_protections, area->Size(), allocationFlags);
717 
718 			if (newProtections == NULL) {
719 				addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
720 				return B_NO_MEMORY;
721 			}
722 
723 			area->page_protections = newProtections;
724 		}
725 
726 		// unmap pages
727 		unmap_pages(area, address, size);
728 
729 		if (onlyCacheUser) {
730 			// Since VMCache::Resize() can temporarily drop the lock, we must
731 			// unlock all lower caches to prevent locking order inversion.
732 			cacheChainLocker.Unlock(cache);
733 			cache->Resize(cache->virtual_base + offset, priority);
734 			cache->ReleaseRefAndUnlock();
735 		}
736 
737 		return B_OK;
738 	}
739 
740 	// Cut the beginning only?
741 	if (area->Base() == address) {
742 		uint8* newProtections = NULL;
743 		if (area->page_protections != NULL) {
744 			// Allocate all memory before shifting as the shift might lose some
745 			// bits.
746 			newProtections = realloc_page_protections(NULL, area->Size(),
747 				allocationFlags);
748 
749 			if (newProtections == NULL)
750 				return B_NO_MEMORY;
751 		}
752 
753 		// resize the area
754 		status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
755 			allocationFlags);
756 		if (error != B_OK) {
757 			if (newProtections != NULL)
758 				free_etc(newProtections, allocationFlags);
759 			return error;
760 		}
761 
762 		if (area->page_protections != NULL) {
763 			size_t oldBytes = area_page_protections_size(oldSize);
764 			ssize_t pagesShifted = (oldSize - area->Size()) / B_PAGE_SIZE;
765 			bitmap_shift<uint8>(area->page_protections, oldBytes * 8, -(pagesShifted * 4));
766 
767 			size_t bytes = area_page_protections_size(area->Size());
768 			memcpy(newProtections, area->page_protections, bytes);
769 			free_etc(area->page_protections, allocationFlags);
770 			area->page_protections = newProtections;
771 		}
772 
773 		// unmap pages
774 		unmap_pages(area, address, size);
775 
776 		if (onlyCacheUser) {
777 			// Since VMCache::Rebase() can temporarily drop the lock, we must
778 			// unlock all lower caches to prevent locking order inversion.
779 			cacheChainLocker.Unlock(cache);
780 			cache->Rebase(cache->virtual_base + size, priority);
781 			cache->ReleaseRefAndUnlock();
782 		}
783 		area->cache_offset += size;
784 
785 		return B_OK;
786 	}
787 
788 	// The tough part -- cut a piece out of the middle of the area.
789 	// We do that by shrinking the area to the begin section and creating a
790 	// new area for the end section.
791 	addr_t firstNewSize = offset;
792 	addr_t secondBase = address + size;
793 	addr_t secondSize = area->Size() - offset - size;
794 
795 	// unmap pages
796 	unmap_pages(area, address, area->Size() - firstNewSize);
797 
798 	// resize the area
799 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
800 		allocationFlags);
801 	if (error != B_OK)
802 		return error;
803 
804 	uint8* areaNewProtections = NULL;
805 	uint8* secondAreaNewProtections = NULL;
806 
807 	// Try to allocate the new memory before making some hard to reverse
808 	// changes.
809 	if (area->page_protections != NULL) {
810 		areaNewProtections = realloc_page_protections(NULL, area->Size(),
811 			allocationFlags);
812 		secondAreaNewProtections = realloc_page_protections(NULL, secondSize,
813 			allocationFlags);
814 
815 		if (areaNewProtections == NULL || secondAreaNewProtections == NULL) {
816 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
817 			free_etc(areaNewProtections, allocationFlags);
818 			free_etc(secondAreaNewProtections, allocationFlags);
819 			return B_NO_MEMORY;
820 		}
821 	}
822 
823 	virtual_address_restrictions addressRestrictions = {};
824 	addressRestrictions.address = (void*)secondBase;
825 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
826 	VMArea* secondArea;
827 
828 	if (onlyCacheUser) {
829 		// Create a new cache for the second area.
830 		VMCache* secondCache;
831 		error = VMCacheFactory::CreateAnonymousCache(secondCache,
832 			area->protection & B_OVERCOMMITTING_AREA, 0, 0,
833 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
834 		if (error != B_OK) {
835 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
836 			free_etc(areaNewProtections, allocationFlags);
837 			free_etc(secondAreaNewProtections, allocationFlags);
838 			return error;
839 		}
840 
841 		secondCache->Lock();
842 		secondCache->temporary = cache->temporary;
843 		secondCache->virtual_base = area->cache_offset;
844 		secondCache->virtual_end = area->cache_offset + secondSize;
845 
846 		// Transfer the concerned pages from the first cache.
847 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
848 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
849 			area->cache_offset);
850 
851 		if (error == B_OK) {
852 			// Since VMCache::Resize() can temporarily drop the lock, we must
853 			// unlock all lower caches to prevent locking order inversion.
854 			cacheChainLocker.Unlock(cache);
855 			cache->Resize(cache->virtual_base + firstNewSize, priority);
856 			// Don't unlock the cache yet because we might have to resize it
857 			// back.
858 
859 			// Map the second area.
860 			error = map_backing_store(addressSpace, secondCache,
861 				area->cache_offset, area->name, secondSize, area->wiring,
862 				area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0,
863 				&addressRestrictions, kernel, &secondArea, NULL);
864 		}
865 
866 		if (error != B_OK) {
867 			// Restore the original cache.
868 			cache->Resize(cache->virtual_base + oldSize, priority);
869 
870 			// Move the pages back.
871 			status_t readoptStatus = cache->Adopt(secondCache,
872 				area->cache_offset, secondSize, adoptOffset);
873 			if (readoptStatus != B_OK) {
874 				// Some (swap) pages have not been moved back and will be lost
875 				// once the second cache is deleted.
876 				panic("failed to restore cache range: %s",
877 					strerror(readoptStatus));
878 
879 				// TODO: Handle out of memory cases by freeing memory and
880 				// retrying.
881 			}
882 
883 			cache->ReleaseRefAndUnlock();
884 			secondCache->ReleaseRefAndUnlock();
885 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
886 			free_etc(areaNewProtections, allocationFlags);
887 			free_etc(secondAreaNewProtections, allocationFlags);
888 			return error;
889 		}
890 
891 		// Now we can unlock it.
892 		cache->ReleaseRefAndUnlock();
893 		secondCache->Unlock();
894 	} else {
895 		error = map_backing_store(addressSpace, cache, area->cache_offset
896 			+ (secondBase - area->Base()),
897 			area->name, secondSize, area->wiring, area->protection,
898 			area->protection_max, REGION_NO_PRIVATE_MAP, 0,
899 			&addressRestrictions, kernel, &secondArea, NULL);
900 		if (error != B_OK) {
901 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
902 			free_etc(areaNewProtections, allocationFlags);
903 			free_etc(secondAreaNewProtections, allocationFlags);
904 			return error;
905 		}
906 		// We need a cache reference for the new area.
907 		cache->AcquireRefLocked();
908 	}
909 
910 	if (area->page_protections != NULL) {
911 		// Copy the protection bits of the first area.
912 		size_t areaBytes = area_page_protections_size(area->Size());
913 		memcpy(areaNewProtections, area->page_protections, areaBytes);
914 		uint8* areaOldProtections = area->page_protections;
915 		area->page_protections = areaNewProtections;
916 
917 		// Shift the protection bits of the second area to the start of
918 		// the old array.
919 		size_t oldBytes = area_page_protections_size(oldSize);
920 		addr_t secondAreaOffset = secondBase - area->Base();
921 		ssize_t secondAreaPagesShifted = secondAreaOffset / B_PAGE_SIZE;
922 		bitmap_shift<uint8>(areaOldProtections, oldBytes * 8, -(secondAreaPagesShifted * 4));
923 
924 		// Copy the protection bits of the second area.
925 		size_t secondAreaBytes = area_page_protections_size(secondSize);
926 		memcpy(secondAreaNewProtections, areaOldProtections, secondAreaBytes);
927 		secondArea->page_protections = secondAreaNewProtections;
928 
929 		// We don't need this anymore.
930 		free_etc(areaOldProtections, allocationFlags);
931 
932 		// Set the correct page protections for the second area.
933 		VMTranslationMap* map = addressSpace->TranslationMap();
934 		map->Lock();
935 		for (VMCachePagesTree::Iterator it
936 				= secondArea->cache->pages.GetIterator();
937 				vm_page* page = it.Next();) {
938 			if (is_page_in_area(secondArea, page)) {
939 				addr_t address = virtual_page_address(secondArea, page);
940 				uint32 pageProtection
941 					= get_area_page_protection(secondArea, address);
942 				map->ProtectPage(secondArea, address, pageProtection);
943 			}
944 		}
945 		map->Unlock();
946 	}
947 
948 	if (_secondArea != NULL)
949 		*_secondArea = secondArea;
950 
951 	return B_OK;
952 }
953 
954 
955 /*!	Deletes or cuts all areas in the given address range.
956 	The address space must be write-locked.
957 	The caller must ensure that no part of the given range is wired.
958 */
959 static status_t
960 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
961 	bool kernel)
962 {
963 	size = PAGE_ALIGN(size);
964 
965 	// Check, whether the caller is allowed to modify the concerned areas.
966 	if (!kernel) {
967 		for (VMAddressSpace::AreaRangeIterator it
968 				= addressSpace->GetAreaRangeIterator(address, size);
969 			VMArea* area = it.Next();) {
970 
971 			if ((area->protection & B_KERNEL_AREA) != 0) {
972 				dprintf("unmap_address_range: team %" B_PRId32 " tried to "
973 					"unmap range of kernel area %" B_PRId32 " (%s)\n",
974 					team_get_current_team_id(), area->id, area->name);
975 				return B_NOT_ALLOWED;
976 			}
977 		}
978 	}
979 
980 	for (VMAddressSpace::AreaRangeIterator it
981 			= addressSpace->GetAreaRangeIterator(address, size);
982 		VMArea* area = it.Next();) {
983 
984 		status_t error = cut_area(addressSpace, area, address, size, NULL,
985 			kernel);
986 		if (error != B_OK)
987 			return error;
988 			// Failing after already messing with areas is ugly, but we
989 			// can't do anything about it.
990 	}
991 
992 	return B_OK;
993 }
994 
995 
996 static status_t
997 discard_area_range(VMArea* area, addr_t address, addr_t size)
998 {
999 	addr_t offset;
1000 	if (!intersect_area(area, address, size, offset))
1001 		return B_OK;
1002 
1003 	// If someone else uses the area's cache or it's not an anonymous cache, we
1004 	// can't discard.
1005 	VMCache* cache = vm_area_get_locked_cache(area);
1006 	if (cache->areas != area || area->cache_next != NULL
1007 		|| !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) {
1008 		return B_OK;
1009 	}
1010 
1011 	VMCacheChainLocker cacheChainLocker(cache);
1012 	cacheChainLocker.LockAllSourceCaches();
1013 
1014 	unmap_pages(area, address, size);
1015 
1016 	// Since VMCache::Discard() can temporarily drop the lock, we must
1017 	// unlock all lower caches to prevent locking order inversion.
1018 	cacheChainLocker.Unlock(cache);
1019 	cache->Discard(cache->virtual_base + offset, size);
1020 	cache->ReleaseRefAndUnlock();
1021 
1022 	return B_OK;
1023 }
1024 
1025 
1026 static status_t
1027 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
1028 	bool kernel)
1029 {
1030 	for (VMAddressSpace::AreaRangeIterator it
1031 		= addressSpace->GetAreaRangeIterator(address, size);
1032 			VMArea* area = it.Next();) {
1033 		status_t error = discard_area_range(area, address, size);
1034 		if (error != B_OK)
1035 			return error;
1036 	}
1037 
1038 	return B_OK;
1039 }
1040 
1041 
1042 /*! You need to hold the lock of the cache and the write lock of the address
1043 	space when calling this function.
1044 	Note, that in case of error your cache will be temporarily unlocked.
1045 	If \a addressSpec is \c B_EXACT_ADDRESS and the
1046 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
1047 	that no part of the specified address range (base \c *_virtualAddress, size
1048 	\a size) is wired. The cache will also be temporarily unlocked.
1049 */
1050 static status_t
1051 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
1052 	const char* areaName, addr_t size, int wiring, int protection,
1053 	int protectionMax, int mapping,
1054 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
1055 	bool kernel, VMArea** _area, void** _virtualAddress)
1056 {
1057 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
1058 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
1059 		", protection %d, protectionMax %d, area %p, areaName '%s'\n",
1060 		addressSpace, cache, addressRestrictions->address, offset, size,
1061 		addressRestrictions->address_specification, wiring, protection,
1062 		protectionMax, _area, areaName));
1063 	cache->AssertLocked();
1064 
1065 	if (size == 0) {
1066 #if KDEBUG
1067 		panic("map_backing_store(): called with size=0 for area '%s'!",
1068 			areaName);
1069 #endif
1070 		return B_BAD_VALUE;
1071 	}
1072 	if (offset < 0)
1073 		return B_BAD_VALUE;
1074 
1075 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
1076 		| HEAP_DONT_LOCK_KERNEL_SPACE;
1077 	int priority;
1078 	if (addressSpace != VMAddressSpace::Kernel()) {
1079 		priority = VM_PRIORITY_USER;
1080 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
1081 		priority = VM_PRIORITY_VIP;
1082 		allocationFlags |= HEAP_PRIORITY_VIP;
1083 	} else
1084 		priority = VM_PRIORITY_SYSTEM;
1085 
1086 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
1087 		allocationFlags);
1088 	if (mapping != REGION_PRIVATE_MAP)
1089 		area->protection_max = protectionMax & B_USER_PROTECTION;
1090 	if (area == NULL)
1091 		return B_NO_MEMORY;
1092 
1093 	status_t status;
1094 
1095 	// if this is a private map, we need to create a new cache
1096 	// to handle the private copies of pages as they are written to
1097 	VMCache* sourceCache = cache;
1098 	if (mapping == REGION_PRIVATE_MAP) {
1099 		VMCache* newCache;
1100 
1101 		// create an anonymous cache
1102 		status = VMCacheFactory::CreateAnonymousCache(newCache,
1103 			(protection & B_STACK_AREA) != 0
1104 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
1105 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
1106 		if (status != B_OK)
1107 			goto err1;
1108 
1109 		newCache->Lock();
1110 		newCache->temporary = 1;
1111 		newCache->virtual_base = offset;
1112 		newCache->virtual_end = offset + size;
1113 
1114 		cache->AddConsumer(newCache);
1115 
1116 		cache = newCache;
1117 	}
1118 
1119 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
1120 		status = cache->SetMinimalCommitment(size, priority);
1121 		if (status != B_OK)
1122 			goto err2;
1123 	}
1124 
1125 	// check to see if this address space has entered DELETE state
1126 	if (addressSpace->IsBeingDeleted()) {
1127 		// okay, someone is trying to delete this address space now, so we can't
1128 		// insert the area, so back out
1129 		status = B_BAD_TEAM_ID;
1130 		goto err2;
1131 	}
1132 
1133 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
1134 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
1135 		// temporarily unlock the current cache since it might be mapped to
1136 		// some existing area, and unmap_address_range also needs to lock that
1137 		// cache to delete the area.
1138 		cache->Unlock();
1139 		status = unmap_address_range(addressSpace,
1140 			(addr_t)addressRestrictions->address, size, kernel);
1141 		cache->Lock();
1142 		if (status != B_OK)
1143 			goto err2;
1144 	}
1145 
1146 	status = addressSpace->InsertArea(area, size, addressRestrictions,
1147 		allocationFlags, _virtualAddress);
1148 	if (status == B_NO_MEMORY
1149 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
1150 		// Due to how many locks are held, we cannot wait here for space to be
1151 		// freed up, but we can at least notify the low_resource handler.
1152 		low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0);
1153 	}
1154 	if (status != B_OK)
1155 		goto err2;
1156 
1157 	// attach the cache to the area
1158 	area->cache = cache;
1159 	area->cache_offset = offset;
1160 
1161 	// point the cache back to the area
1162 	cache->InsertAreaLocked(area);
1163 	if (mapping == REGION_PRIVATE_MAP)
1164 		cache->Unlock();
1165 
1166 	// insert the area in the global areas map
1167 	VMAreas::Insert(area);
1168 
1169 	// grab a ref to the address space (the area holds this)
1170 	addressSpace->Get();
1171 
1172 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1173 //		cache, sourceCache, areaName, area);
1174 
1175 	*_area = area;
1176 	return B_OK;
1177 
1178 err2:
1179 	if (mapping == REGION_PRIVATE_MAP) {
1180 		// We created this cache, so we must delete it again. Note, that we
1181 		// need to temporarily unlock the source cache or we'll otherwise
1182 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1183 		sourceCache->Unlock();
1184 		cache->ReleaseRefAndUnlock();
1185 		sourceCache->Lock();
1186 	}
1187 err1:
1188 	addressSpace->DeleteArea(area, allocationFlags);
1189 	return status;
1190 }
1191 
1192 
1193 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1194 	  locker1, locker2).
1195 */
1196 template<typename LockerType1, typename LockerType2>
1197 static inline bool
1198 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1199 {
1200 	area->cache->AssertLocked();
1201 
1202 	VMAreaUnwiredWaiter waiter;
1203 	if (!area->AddWaiterIfWired(&waiter))
1204 		return false;
1205 
1206 	// unlock everything and wait
1207 	if (locker1 != NULL)
1208 		locker1->Unlock();
1209 	if (locker2 != NULL)
1210 		locker2->Unlock();
1211 
1212 	waiter.waitEntry.Wait();
1213 
1214 	return true;
1215 }
1216 
1217 
1218 /*!	Checks whether the given area has any wired ranges intersecting with the
1219 	specified range and waits, if so.
1220 
1221 	When it has to wait, the function calls \c Unlock() on both \a locker1
1222 	and \a locker2, if given.
1223 	The area's top cache must be locked and must be unlocked as a side effect
1224 	of calling \c Unlock() on either \a locker1 or \a locker2.
1225 
1226 	If the function does not have to wait it does not modify or unlock any
1227 	object.
1228 
1229 	\param area The area to be checked.
1230 	\param base The base address of the range to check.
1231 	\param size The size of the address range to check.
1232 	\param locker1 An object to be unlocked when before starting to wait (may
1233 		be \c NULL).
1234 	\param locker2 An object to be unlocked when before starting to wait (may
1235 		be \c NULL).
1236 	\return \c true, if the function had to wait, \c false otherwise.
1237 */
1238 template<typename LockerType1, typename LockerType2>
1239 static inline bool
1240 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1241 	LockerType1* locker1, LockerType2* locker2)
1242 {
1243 	area->cache->AssertLocked();
1244 
1245 	VMAreaUnwiredWaiter waiter;
1246 	if (!area->AddWaiterIfWired(&waiter, base, size))
1247 		return false;
1248 
1249 	// unlock everything and wait
1250 	if (locker1 != NULL)
1251 		locker1->Unlock();
1252 	if (locker2 != NULL)
1253 		locker2->Unlock();
1254 
1255 	waiter.waitEntry.Wait();
1256 
1257 	return true;
1258 }
1259 
1260 
1261 /*!	Checks whether the given address space has any wired ranges intersecting
1262 	with the specified range and waits, if so.
1263 
1264 	Similar to wait_if_area_range_is_wired(), with the following differences:
1265 	- All areas intersecting with the range are checked (respectively all until
1266 	  one is found that contains a wired range intersecting with the given
1267 	  range).
1268 	- The given address space must at least be read-locked and must be unlocked
1269 	  when \c Unlock() is called on \a locker.
1270 	- None of the areas' caches are allowed to be locked.
1271 */
1272 template<typename LockerType>
1273 static inline bool
1274 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1275 	size_t size, LockerType* locker)
1276 {
1277 	for (VMAddressSpace::AreaRangeIterator it
1278 		= addressSpace->GetAreaRangeIterator(base, size);
1279 			VMArea* area = it.Next();) {
1280 
1281 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1282 
1283 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1284 			return true;
1285 	}
1286 
1287 	return false;
1288 }
1289 
1290 
1291 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1292 	It must be called in a situation where the kernel address space may be
1293 	locked.
1294 */
1295 status_t
1296 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1297 {
1298 	AddressSpaceReadLocker locker;
1299 	VMArea* area;
1300 	status_t status = locker.SetFromArea(id, area);
1301 	if (status != B_OK)
1302 		return status;
1303 
1304 	if (area->page_protections == NULL) {
1305 		status = allocate_area_page_protections(area);
1306 		if (status != B_OK)
1307 			return status;
1308 	}
1309 
1310 	*cookie = (void*)area;
1311 	return B_OK;
1312 }
1313 
1314 
1315 /*!	This is a debug helper function that can only be used with very specific
1316 	use cases.
1317 	Sets protection for the given address range to the protection specified.
1318 	If \a protection is 0 then the involved pages will be marked non-present
1319 	in the translation map to cause a fault on access. The pages aren't
1320 	actually unmapped however so that they can be marked present again with
1321 	additional calls to this function. For this to work the area must be
1322 	fully locked in memory so that the pages aren't otherwise touched.
1323 	This function does not lock the kernel address space and needs to be
1324 	supplied with a \a cookie retrieved from a successful call to
1325 	vm_prepare_kernel_area_debug_protection().
1326 */
1327 status_t
1328 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1329 	uint32 protection)
1330 {
1331 	// check address range
1332 	addr_t address = (addr_t)_address;
1333 	size = PAGE_ALIGN(size);
1334 
1335 	if ((address % B_PAGE_SIZE) != 0
1336 		|| (addr_t)address + size < (addr_t)address
1337 		|| !IS_KERNEL_ADDRESS(address)
1338 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1339 		return B_BAD_VALUE;
1340 	}
1341 
1342 	// Translate the kernel protection to user protection as we only store that.
1343 	if ((protection & B_KERNEL_READ_AREA) != 0)
1344 		protection |= B_READ_AREA;
1345 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1346 		protection |= B_WRITE_AREA;
1347 
1348 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1349 	VMTranslationMap* map = addressSpace->TranslationMap();
1350 	VMArea* area = (VMArea*)cookie;
1351 
1352 	addr_t offset = address - area->Base();
1353 	if (area->Size() - offset < size) {
1354 		panic("protect range not fully within supplied area");
1355 		return B_BAD_VALUE;
1356 	}
1357 
1358 	if (area->page_protections == NULL) {
1359 		panic("area has no page protections");
1360 		return B_BAD_VALUE;
1361 	}
1362 
1363 	// Invalidate the mapping entries so any access to them will fault or
1364 	// restore the mapping entries unchanged so that lookup will success again.
1365 	map->Lock();
1366 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1367 	map->Unlock();
1368 
1369 	// And set the proper page protections so that the fault case will actually
1370 	// fail and not simply try to map a new page.
1371 	for (addr_t pageAddress = address; pageAddress < address + size;
1372 			pageAddress += B_PAGE_SIZE) {
1373 		set_area_page_protection(area, pageAddress, protection);
1374 	}
1375 
1376 	return B_OK;
1377 }
1378 
1379 
1380 status_t
1381 vm_block_address_range(const char* name, void* address, addr_t size)
1382 {
1383 	if (!arch_vm_supports_protection(0))
1384 		return B_NOT_SUPPORTED;
1385 
1386 	AddressSpaceWriteLocker locker;
1387 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1388 	if (status != B_OK)
1389 		return status;
1390 
1391 	VMAddressSpace* addressSpace = locker.AddressSpace();
1392 
1393 	// create an anonymous cache
1394 	VMCache* cache;
1395 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1396 		VM_PRIORITY_SYSTEM);
1397 	if (status != B_OK)
1398 		return status;
1399 
1400 	cache->temporary = 1;
1401 	cache->virtual_end = size;
1402 	cache->Lock();
1403 
1404 	VMArea* area;
1405 	virtual_address_restrictions addressRestrictions = {};
1406 	addressRestrictions.address = address;
1407 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1408 	status = map_backing_store(addressSpace, cache, 0, name, size,
1409 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions,
1410 		true, &area, NULL);
1411 	if (status != B_OK) {
1412 		cache->ReleaseRefAndUnlock();
1413 		return status;
1414 	}
1415 
1416 	cache->Unlock();
1417 	area->cache_type = CACHE_TYPE_RAM;
1418 	return area->id;
1419 }
1420 
1421 
1422 status_t
1423 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1424 {
1425 	AddressSpaceWriteLocker locker(team);
1426 	if (!locker.IsLocked())
1427 		return B_BAD_TEAM_ID;
1428 
1429 	VMAddressSpace* addressSpace = locker.AddressSpace();
1430 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1431 		addressSpace == VMAddressSpace::Kernel()
1432 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1433 }
1434 
1435 
1436 status_t
1437 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1438 	addr_t size, uint32 flags)
1439 {
1440 	if (size == 0)
1441 		return B_BAD_VALUE;
1442 
1443 	AddressSpaceWriteLocker locker(team);
1444 	if (!locker.IsLocked())
1445 		return B_BAD_TEAM_ID;
1446 
1447 	virtual_address_restrictions addressRestrictions = {};
1448 	addressRestrictions.address = *_address;
1449 	addressRestrictions.address_specification = addressSpec;
1450 	VMAddressSpace* addressSpace = locker.AddressSpace();
1451 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1452 		addressSpace == VMAddressSpace::Kernel()
1453 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1454 		_address);
1455 }
1456 
1457 
1458 area_id
1459 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1460 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1461 	const virtual_address_restrictions* virtualAddressRestrictions,
1462 	const physical_address_restrictions* physicalAddressRestrictions,
1463 	bool kernel, void** _address)
1464 {
1465 	VMArea* area;
1466 	VMCache* cache;
1467 	vm_page* page = NULL;
1468 	bool isStack = (protection & B_STACK_AREA) != 0;
1469 	page_num_t guardPages;
1470 	bool canOvercommit = false;
1471 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1472 		? VM_PAGE_ALLOC_CLEAR : 0;
1473 
1474 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1475 		team, name, size));
1476 
1477 	size = PAGE_ALIGN(size);
1478 	guardSize = PAGE_ALIGN(guardSize);
1479 	guardPages = guardSize / B_PAGE_SIZE;
1480 
1481 	if (size == 0 || size < guardSize)
1482 		return B_BAD_VALUE;
1483 	if (!arch_vm_supports_protection(protection))
1484 		return B_NOT_SUPPORTED;
1485 
1486 	if (team == B_CURRENT_TEAM)
1487 		team = VMAddressSpace::CurrentID();
1488 	if (team < 0)
1489 		return B_BAD_TEAM_ID;
1490 
1491 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1492 		canOvercommit = true;
1493 
1494 #ifdef DEBUG_KERNEL_STACKS
1495 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1496 		isStack = true;
1497 #endif
1498 
1499 	// check parameters
1500 	switch (virtualAddressRestrictions->address_specification) {
1501 		case B_ANY_ADDRESS:
1502 		case B_EXACT_ADDRESS:
1503 		case B_BASE_ADDRESS:
1504 		case B_ANY_KERNEL_ADDRESS:
1505 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1506 		case B_RANDOMIZED_ANY_ADDRESS:
1507 		case B_RANDOMIZED_BASE_ADDRESS:
1508 			break;
1509 
1510 		default:
1511 			return B_BAD_VALUE;
1512 	}
1513 
1514 	// If low or high physical address restrictions are given, we force
1515 	// B_CONTIGUOUS wiring, since only then we'll use
1516 	// vm_page_allocate_page_run() which deals with those restrictions.
1517 	if (physicalAddressRestrictions->low_address != 0
1518 		|| physicalAddressRestrictions->high_address != 0) {
1519 		wiring = B_CONTIGUOUS;
1520 	}
1521 
1522 	physical_address_restrictions stackPhysicalRestrictions;
1523 	bool doReserveMemory = false;
1524 	switch (wiring) {
1525 		case B_NO_LOCK:
1526 			break;
1527 		case B_FULL_LOCK:
1528 		case B_LAZY_LOCK:
1529 		case B_CONTIGUOUS:
1530 			doReserveMemory = true;
1531 			break;
1532 		case B_ALREADY_WIRED:
1533 			break;
1534 		case B_LOMEM:
1535 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1536 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1537 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1538 			wiring = B_CONTIGUOUS;
1539 			doReserveMemory = true;
1540 			break;
1541 		case B_32_BIT_FULL_LOCK:
1542 			if (B_HAIKU_PHYSICAL_BITS <= 32
1543 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1544 				wiring = B_FULL_LOCK;
1545 				doReserveMemory = true;
1546 				break;
1547 			}
1548 			// TODO: We don't really support this mode efficiently. Just fall
1549 			// through for now ...
1550 		case B_32_BIT_CONTIGUOUS:
1551 			#if B_HAIKU_PHYSICAL_BITS > 32
1552 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1553 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1554 					stackPhysicalRestrictions.high_address
1555 						= (phys_addr_t)1 << 32;
1556 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1557 				}
1558 			#endif
1559 			wiring = B_CONTIGUOUS;
1560 			doReserveMemory = true;
1561 			break;
1562 		default:
1563 			return B_BAD_VALUE;
1564 	}
1565 
1566 	// Optimization: For a single-page contiguous allocation without low/high
1567 	// memory restriction B_FULL_LOCK wiring suffices.
1568 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1569 		&& physicalAddressRestrictions->low_address == 0
1570 		&& physicalAddressRestrictions->high_address == 0) {
1571 		wiring = B_FULL_LOCK;
1572 	}
1573 
1574 	// For full lock or contiguous areas we're also going to map the pages and
1575 	// thus need to reserve pages for the mapping backend upfront.
1576 	addr_t reservedMapPages = 0;
1577 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1578 		AddressSpaceWriteLocker locker;
1579 		status_t status = locker.SetTo(team);
1580 		if (status != B_OK)
1581 			return status;
1582 
1583 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1584 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1585 	}
1586 
1587 	int priority;
1588 	if (team != VMAddressSpace::KernelID())
1589 		priority = VM_PRIORITY_USER;
1590 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1591 		priority = VM_PRIORITY_VIP;
1592 	else
1593 		priority = VM_PRIORITY_SYSTEM;
1594 
1595 	// Reserve memory before acquiring the address space lock. This reduces the
1596 	// chances of failure, since while holding the write lock to the address
1597 	// space (if it is the kernel address space that is), the low memory handler
1598 	// won't be able to free anything for us.
1599 	addr_t reservedMemory = 0;
1600 	if (doReserveMemory) {
1601 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1602 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1603 			return B_NO_MEMORY;
1604 		reservedMemory = size;
1605 		// TODO: We don't reserve the memory for the pages for the page
1606 		// directories/tables. We actually need to do since we currently don't
1607 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1608 		// there are actually less physical pages than there should be, which
1609 		// can get the VM into trouble in low memory situations.
1610 	}
1611 
1612 	AddressSpaceWriteLocker locker;
1613 	VMAddressSpace* addressSpace;
1614 	status_t status;
1615 
1616 	// For full lock areas reserve the pages before locking the address
1617 	// space. E.g. block caches can't release their memory while we hold the
1618 	// address space lock.
1619 	page_num_t reservedPages = reservedMapPages;
1620 	if (wiring == B_FULL_LOCK)
1621 		reservedPages += size / B_PAGE_SIZE;
1622 
1623 	vm_page_reservation reservation;
1624 	if (reservedPages > 0) {
1625 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1626 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1627 					priority)) {
1628 				reservedPages = 0;
1629 				status = B_WOULD_BLOCK;
1630 				goto err0;
1631 			}
1632 		} else
1633 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1634 	}
1635 
1636 	if (wiring == B_CONTIGUOUS) {
1637 		// we try to allocate the page run here upfront as this may easily
1638 		// fail for obvious reasons
1639 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1640 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1641 		if (page == NULL) {
1642 			status = B_NO_MEMORY;
1643 			goto err0;
1644 		}
1645 	}
1646 
1647 	// Lock the address space and, if B_EXACT_ADDRESS and
1648 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1649 	// is not wired.
1650 	do {
1651 		status = locker.SetTo(team);
1652 		if (status != B_OK)
1653 			goto err1;
1654 
1655 		addressSpace = locker.AddressSpace();
1656 	} while (virtualAddressRestrictions->address_specification
1657 			== B_EXACT_ADDRESS
1658 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1659 		&& wait_if_address_range_is_wired(addressSpace,
1660 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1661 
1662 	// create an anonymous cache
1663 	// if it's a stack, make sure that two pages are available at least
1664 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1665 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1666 		wiring == B_NO_LOCK, priority);
1667 	if (status != B_OK)
1668 		goto err1;
1669 
1670 	cache->temporary = 1;
1671 	cache->virtual_end = size;
1672 	cache->committed_size = reservedMemory;
1673 		// TODO: This should be done via a method.
1674 	reservedMemory = 0;
1675 
1676 	cache->Lock();
1677 
1678 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1679 		protection, 0, REGION_NO_PRIVATE_MAP, flags,
1680 		virtualAddressRestrictions, kernel, &area, _address);
1681 
1682 	if (status != B_OK) {
1683 		cache->ReleaseRefAndUnlock();
1684 		goto err1;
1685 	}
1686 
1687 	locker.DegradeToReadLock();
1688 
1689 	switch (wiring) {
1690 		case B_NO_LOCK:
1691 		case B_LAZY_LOCK:
1692 			// do nothing - the pages are mapped in as needed
1693 			break;
1694 
1695 		case B_FULL_LOCK:
1696 		{
1697 			// Allocate and map all pages for this area
1698 
1699 			off_t offset = 0;
1700 			for (addr_t address = area->Base();
1701 					address < area->Base() + (area->Size() - 1);
1702 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1703 #ifdef DEBUG_KERNEL_STACKS
1704 #	ifdef STACK_GROWS_DOWNWARDS
1705 				if (isStack && address < area->Base()
1706 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1707 #	else
1708 				if (isStack && address >= area->Base() + area->Size()
1709 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1710 #	endif
1711 					continue;
1712 #endif
1713 				vm_page* page = vm_page_allocate_page(&reservation,
1714 					PAGE_STATE_WIRED | pageAllocFlags);
1715 				cache->InsertPage(page, offset);
1716 				map_page(area, page, address, protection, &reservation);
1717 
1718 				DEBUG_PAGE_ACCESS_END(page);
1719 			}
1720 
1721 			break;
1722 		}
1723 
1724 		case B_ALREADY_WIRED:
1725 		{
1726 			// The pages should already be mapped. This is only really useful
1727 			// during boot time. Find the appropriate vm_page objects and stick
1728 			// them in the cache object.
1729 			VMTranslationMap* map = addressSpace->TranslationMap();
1730 			off_t offset = 0;
1731 
1732 			if (!gKernelStartup)
1733 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1734 
1735 			map->Lock();
1736 
1737 			for (addr_t virtualAddress = area->Base();
1738 					virtualAddress < area->Base() + (area->Size() - 1);
1739 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1740 				phys_addr_t physicalAddress;
1741 				uint32 flags;
1742 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1743 				if (status < B_OK) {
1744 					panic("looking up mapping failed for va 0x%lx\n",
1745 						virtualAddress);
1746 				}
1747 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1748 				if (page == NULL) {
1749 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1750 						"\n", physicalAddress);
1751 				}
1752 
1753 				DEBUG_PAGE_ACCESS_START(page);
1754 
1755 				cache->InsertPage(page, offset);
1756 				increment_page_wired_count(page);
1757 				vm_page_set_state(page, PAGE_STATE_WIRED);
1758 				page->busy = false;
1759 
1760 				DEBUG_PAGE_ACCESS_END(page);
1761 			}
1762 
1763 			map->Unlock();
1764 			break;
1765 		}
1766 
1767 		case B_CONTIGUOUS:
1768 		{
1769 			// We have already allocated our continuous pages run, so we can now
1770 			// just map them in the address space
1771 			VMTranslationMap* map = addressSpace->TranslationMap();
1772 			phys_addr_t physicalAddress
1773 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1774 			addr_t virtualAddress = area->Base();
1775 			off_t offset = 0;
1776 
1777 			map->Lock();
1778 
1779 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1780 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1781 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1782 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1783 				if (page == NULL)
1784 					panic("couldn't lookup physical page just allocated\n");
1785 
1786 				status = map->Map(virtualAddress, physicalAddress, protection,
1787 					area->MemoryType(), &reservation);
1788 				if (status < B_OK)
1789 					panic("couldn't map physical page in page run\n");
1790 
1791 				cache->InsertPage(page, offset);
1792 				increment_page_wired_count(page);
1793 
1794 				DEBUG_PAGE_ACCESS_END(page);
1795 			}
1796 
1797 			map->Unlock();
1798 			break;
1799 		}
1800 
1801 		default:
1802 			break;
1803 	}
1804 
1805 	cache->Unlock();
1806 
1807 	if (reservedPages > 0)
1808 		vm_page_unreserve_pages(&reservation);
1809 
1810 	TRACE(("vm_create_anonymous_area: done\n"));
1811 
1812 	area->cache_type = CACHE_TYPE_RAM;
1813 	return area->id;
1814 
1815 err1:
1816 	if (wiring == B_CONTIGUOUS) {
1817 		// we had reserved the area space upfront...
1818 		phys_addr_t pageNumber = page->physical_page_number;
1819 		int32 i;
1820 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1821 			page = vm_lookup_page(pageNumber);
1822 			if (page == NULL)
1823 				panic("couldn't lookup physical page just allocated\n");
1824 
1825 			vm_page_set_state(page, PAGE_STATE_FREE);
1826 		}
1827 	}
1828 
1829 err0:
1830 	if (reservedPages > 0)
1831 		vm_page_unreserve_pages(&reservation);
1832 	if (reservedMemory > 0)
1833 		vm_unreserve_memory(reservedMemory);
1834 
1835 	return status;
1836 }
1837 
1838 
1839 area_id
1840 vm_map_physical_memory(team_id team, const char* name, void** _address,
1841 	uint32 addressSpec, addr_t size, uint32 protection,
1842 	phys_addr_t physicalAddress, bool alreadyWired)
1843 {
1844 	VMArea* area;
1845 	VMCache* cache;
1846 	addr_t mapOffset;
1847 
1848 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1849 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1850 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1851 		addressSpec, size, protection, physicalAddress));
1852 
1853 	if (!arch_vm_supports_protection(protection))
1854 		return B_NOT_SUPPORTED;
1855 
1856 	AddressSpaceWriteLocker locker(team);
1857 	if (!locker.IsLocked())
1858 		return B_BAD_TEAM_ID;
1859 
1860 	// if the physical address is somewhat inside a page,
1861 	// move the actual area down to align on a page boundary
1862 	mapOffset = physicalAddress % B_PAGE_SIZE;
1863 	size += mapOffset;
1864 	physicalAddress -= mapOffset;
1865 
1866 	size = PAGE_ALIGN(size);
1867 
1868 	// create a device cache
1869 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1870 	if (status != B_OK)
1871 		return status;
1872 
1873 	cache->virtual_end = size;
1874 
1875 	cache->Lock();
1876 
1877 	virtual_address_restrictions addressRestrictions = {};
1878 	addressRestrictions.address = *_address;
1879 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1880 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1881 		B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1882 		true, &area, _address);
1883 
1884 	if (status < B_OK)
1885 		cache->ReleaseRefLocked();
1886 
1887 	cache->Unlock();
1888 
1889 	if (status == B_OK) {
1890 		// set requested memory type -- use uncached, if not given
1891 		uint32 memoryType = addressSpec & B_MTR_MASK;
1892 		if (memoryType == 0)
1893 			memoryType = B_MTR_UC;
1894 
1895 		area->SetMemoryType(memoryType);
1896 
1897 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1898 		if (status != B_OK)
1899 			delete_area(locker.AddressSpace(), area, false);
1900 	}
1901 
1902 	if (status != B_OK)
1903 		return status;
1904 
1905 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1906 
1907 	if (alreadyWired) {
1908 		// The area is already mapped, but possibly not with the right
1909 		// memory type.
1910 		map->Lock();
1911 		map->ProtectArea(area, area->protection);
1912 		map->Unlock();
1913 	} else {
1914 		// Map the area completely.
1915 
1916 		// reserve pages needed for the mapping
1917 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1918 			area->Base() + (size - 1));
1919 		vm_page_reservation reservation;
1920 		vm_page_reserve_pages(&reservation, reservePages,
1921 			team == VMAddressSpace::KernelID()
1922 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1923 
1924 		map->Lock();
1925 
1926 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1927 			map->Map(area->Base() + offset, physicalAddress + offset,
1928 				protection, area->MemoryType(), &reservation);
1929 		}
1930 
1931 		map->Unlock();
1932 
1933 		vm_page_unreserve_pages(&reservation);
1934 	}
1935 
1936 	// modify the pointer returned to be offset back into the new area
1937 	// the same way the physical address in was offset
1938 	*_address = (void*)((addr_t)*_address + mapOffset);
1939 
1940 	area->cache_type = CACHE_TYPE_DEVICE;
1941 	return area->id;
1942 }
1943 
1944 
1945 /*!	Don't use!
1946 	TODO: This function was introduced to map physical page vecs to
1947 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1948 	use a device cache and does not track vm_page::wired_count!
1949 */
1950 area_id
1951 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1952 	uint32 addressSpec, addr_t* _size, uint32 protection,
1953 	struct generic_io_vec* vecs, uint32 vecCount)
1954 {
1955 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1956 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1957 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1958 		addressSpec, _size, protection, vecs, vecCount));
1959 
1960 	if (!arch_vm_supports_protection(protection)
1961 		|| (addressSpec & B_MTR_MASK) != 0) {
1962 		return B_NOT_SUPPORTED;
1963 	}
1964 
1965 	AddressSpaceWriteLocker locker(team);
1966 	if (!locker.IsLocked())
1967 		return B_BAD_TEAM_ID;
1968 
1969 	if (vecCount == 0)
1970 		return B_BAD_VALUE;
1971 
1972 	addr_t size = 0;
1973 	for (uint32 i = 0; i < vecCount; i++) {
1974 		if (vecs[i].base % B_PAGE_SIZE != 0
1975 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1976 			return B_BAD_VALUE;
1977 		}
1978 
1979 		size += vecs[i].length;
1980 	}
1981 
1982 	// create a device cache
1983 	VMCache* cache;
1984 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1985 	if (result != B_OK)
1986 		return result;
1987 
1988 	cache->virtual_end = size;
1989 
1990 	cache->Lock();
1991 
1992 	VMArea* area;
1993 	virtual_address_restrictions addressRestrictions = {};
1994 	addressRestrictions.address = *_address;
1995 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1996 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1997 		size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0,
1998 		&addressRestrictions, true, &area, _address);
1999 
2000 	if (result != B_OK)
2001 		cache->ReleaseRefLocked();
2002 
2003 	cache->Unlock();
2004 
2005 	if (result != B_OK)
2006 		return result;
2007 
2008 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2009 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
2010 		area->Base() + (size - 1));
2011 
2012 	vm_page_reservation reservation;
2013 	vm_page_reserve_pages(&reservation, reservePages,
2014 			team == VMAddressSpace::KernelID()
2015 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2016 	map->Lock();
2017 
2018 	uint32 vecIndex = 0;
2019 	size_t vecOffset = 0;
2020 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
2021 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
2022 			vecOffset = 0;
2023 			vecIndex++;
2024 		}
2025 
2026 		if (vecIndex >= vecCount)
2027 			break;
2028 
2029 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
2030 			protection, area->MemoryType(), &reservation);
2031 
2032 		vecOffset += B_PAGE_SIZE;
2033 	}
2034 
2035 	map->Unlock();
2036 	vm_page_unreserve_pages(&reservation);
2037 
2038 	if (_size != NULL)
2039 		*_size = size;
2040 
2041 	area->cache_type = CACHE_TYPE_DEVICE;
2042 	return area->id;
2043 }
2044 
2045 
2046 area_id
2047 vm_create_null_area(team_id team, const char* name, void** address,
2048 	uint32 addressSpec, addr_t size, uint32 flags)
2049 {
2050 	size = PAGE_ALIGN(size);
2051 
2052 	// Lock the address space and, if B_EXACT_ADDRESS and
2053 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
2054 	// is not wired.
2055 	AddressSpaceWriteLocker locker;
2056 	do {
2057 		if (locker.SetTo(team) != B_OK)
2058 			return B_BAD_TEAM_ID;
2059 	} while (addressSpec == B_EXACT_ADDRESS
2060 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
2061 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2062 			(addr_t)*address, size, &locker));
2063 
2064 	// create a null cache
2065 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
2066 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
2067 	VMCache* cache;
2068 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
2069 	if (status != B_OK)
2070 		return status;
2071 
2072 	cache->temporary = 1;
2073 	cache->virtual_end = size;
2074 
2075 	cache->Lock();
2076 
2077 	VMArea* area;
2078 	virtual_address_restrictions addressRestrictions = {};
2079 	addressRestrictions.address = *address;
2080 	addressRestrictions.address_specification = addressSpec;
2081 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
2082 		B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA,
2083 		REGION_NO_PRIVATE_MAP, flags,
2084 		&addressRestrictions, true, &area, address);
2085 
2086 	if (status < B_OK) {
2087 		cache->ReleaseRefAndUnlock();
2088 		return status;
2089 	}
2090 
2091 	cache->Unlock();
2092 
2093 	area->cache_type = CACHE_TYPE_NULL;
2094 	return area->id;
2095 }
2096 
2097 
2098 /*!	Creates the vnode cache for the specified \a vnode.
2099 	The vnode has to be marked busy when calling this function.
2100 */
2101 status_t
2102 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
2103 {
2104 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
2105 }
2106 
2107 
2108 /*!	\a cache must be locked. The area's address space must be read-locked.
2109 */
2110 static void
2111 pre_map_area_pages(VMArea* area, VMCache* cache,
2112 	vm_page_reservation* reservation)
2113 {
2114 	addr_t baseAddress = area->Base();
2115 	addr_t cacheOffset = area->cache_offset;
2116 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
2117 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
2118 
2119 	for (VMCachePagesTree::Iterator it
2120 				= cache->pages.GetIterator(firstPage, true, true);
2121 			vm_page* page = it.Next();) {
2122 		if (page->cache_offset >= endPage)
2123 			break;
2124 
2125 		// skip busy and inactive pages
2126 		if (page->busy || page->usage_count == 0)
2127 			continue;
2128 
2129 		DEBUG_PAGE_ACCESS_START(page);
2130 		map_page(area, page,
2131 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
2132 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
2133 		DEBUG_PAGE_ACCESS_END(page);
2134 	}
2135 }
2136 
2137 
2138 /*!	Will map the file specified by \a fd to an area in memory.
2139 	The file will be mirrored beginning at the specified \a offset. The
2140 	\a offset and \a size arguments have to be page aligned.
2141 */
2142 static area_id
2143 _vm_map_file(team_id team, const char* name, void** _address,
2144 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2145 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2146 {
2147 	// TODO: for binary files, we want to make sure that they get the
2148 	//	copy of a file at a given time, ie. later changes should not
2149 	//	make it into the mapped copy -- this will need quite some changes
2150 	//	to be done in a nice way
2151 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
2152 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
2153 
2154 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2155 	size = PAGE_ALIGN(size);
2156 
2157 	if (mapping == REGION_NO_PRIVATE_MAP)
2158 		protection |= B_SHARED_AREA;
2159 	if (addressSpec != B_EXACT_ADDRESS)
2160 		unmapAddressRange = false;
2161 
2162 	uint32 mappingFlags = 0;
2163 	if (unmapAddressRange)
2164 		mappingFlags |= CREATE_AREA_UNMAP_ADDRESS_RANGE;
2165 
2166 	if (fd < 0) {
2167 		virtual_address_restrictions virtualRestrictions = {};
2168 		virtualRestrictions.address = *_address;
2169 		virtualRestrictions.address_specification = addressSpec;
2170 		physical_address_restrictions physicalRestrictions = {};
2171 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
2172 			mappingFlags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
2173 			_address);
2174 	}
2175 
2176 	// get the open flags of the FD
2177 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2178 	if (descriptor == NULL)
2179 		return EBADF;
2180 	int32 openMode = descriptor->open_mode;
2181 	put_fd(descriptor);
2182 
2183 	// The FD must open for reading at any rate. For shared mapping with write
2184 	// access, additionally the FD must be open for writing.
2185 	if ((openMode & O_ACCMODE) == O_WRONLY
2186 		|| (mapping == REGION_NO_PRIVATE_MAP
2187 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2188 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2189 		return EACCES;
2190 	}
2191 
2192 	uint32 protectionMax = 0;
2193 	if (mapping == REGION_NO_PRIVATE_MAP) {
2194 		if ((openMode & O_ACCMODE) == O_RDWR)
2195 			protectionMax = protection | B_USER_PROTECTION;
2196 		else
2197 			protectionMax = protection | (B_USER_PROTECTION & ~B_WRITE_AREA);
2198 	} else if (mapping == REGION_PRIVATE_MAP) {
2199 		// For privately mapped read-only regions, skip committing memory.
2200 		// (If protections are changed later on, memory will be committed then.)
2201 		if ((protection & B_WRITE_AREA) == 0)
2202 			mappingFlags |= CREATE_AREA_DONT_COMMIT_MEMORY;
2203 	}
2204 
2205 	// get the vnode for the object, this also grabs a ref to it
2206 	struct vnode* vnode = NULL;
2207 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2208 	if (status < B_OK)
2209 		return status;
2210 	VnodePutter vnodePutter(vnode);
2211 
2212 	// If we're going to pre-map pages, we need to reserve the pages needed by
2213 	// the mapping backend upfront.
2214 	page_num_t reservedPreMapPages = 0;
2215 	vm_page_reservation reservation;
2216 	if ((protection & B_READ_AREA) != 0) {
2217 		AddressSpaceWriteLocker locker;
2218 		status = locker.SetTo(team);
2219 		if (status != B_OK)
2220 			return status;
2221 
2222 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2223 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2224 
2225 		locker.Unlock();
2226 
2227 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2228 			team == VMAddressSpace::KernelID()
2229 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2230 	}
2231 
2232 	struct PageUnreserver {
2233 		PageUnreserver(vm_page_reservation* reservation)
2234 			:
2235 			fReservation(reservation)
2236 		{
2237 		}
2238 
2239 		~PageUnreserver()
2240 		{
2241 			if (fReservation != NULL)
2242 				vm_page_unreserve_pages(fReservation);
2243 		}
2244 
2245 		vm_page_reservation* fReservation;
2246 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2247 
2248 	// Lock the address space and, if the specified address range shall be
2249 	// unmapped, ensure it is not wired.
2250 	AddressSpaceWriteLocker locker;
2251 	do {
2252 		if (locker.SetTo(team) != B_OK)
2253 			return B_BAD_TEAM_ID;
2254 	} while (unmapAddressRange
2255 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2256 			(addr_t)*_address, size, &locker));
2257 
2258 	// TODO: this only works for file systems that use the file cache
2259 	VMCache* cache;
2260 	status = vfs_get_vnode_cache(vnode, &cache, false);
2261 	if (status < B_OK)
2262 		return status;
2263 
2264 	cache->Lock();
2265 
2266 	VMArea* area;
2267 	virtual_address_restrictions addressRestrictions = {};
2268 	addressRestrictions.address = *_address;
2269 	addressRestrictions.address_specification = addressSpec;
2270 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2271 		0, protection, protectionMax, mapping, mappingFlags,
2272 		&addressRestrictions, kernel, &area, _address);
2273 
2274 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2275 		// map_backing_store() cannot know we no longer need the ref
2276 		cache->ReleaseRefLocked();
2277 	}
2278 
2279 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2280 		pre_map_area_pages(area, cache, &reservation);
2281 
2282 	cache->Unlock();
2283 
2284 	if (status == B_OK) {
2285 		// TODO: this probably deserves a smarter solution, ie. don't always
2286 		// prefetch stuff, and also, probably don't trigger it at this place.
2287 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2288 			// prefetches at max 10 MB starting from "offset"
2289 	}
2290 
2291 	if (status != B_OK)
2292 		return status;
2293 
2294 	area->cache_type = CACHE_TYPE_VNODE;
2295 	return area->id;
2296 }
2297 
2298 
2299 area_id
2300 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2301 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2302 	int fd, off_t offset)
2303 {
2304 	if (!arch_vm_supports_protection(protection))
2305 		return B_NOT_SUPPORTED;
2306 
2307 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2308 		mapping, unmapAddressRange, fd, offset, true);
2309 }
2310 
2311 
2312 VMCache*
2313 vm_area_get_locked_cache(VMArea* area)
2314 {
2315 	rw_lock_read_lock(&sAreaCacheLock);
2316 
2317 	while (true) {
2318 		VMCache* cache = area->cache;
2319 
2320 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2321 			// cache has been deleted
2322 			rw_lock_read_lock(&sAreaCacheLock);
2323 			continue;
2324 		}
2325 
2326 		rw_lock_read_lock(&sAreaCacheLock);
2327 
2328 		if (cache == area->cache) {
2329 			cache->AcquireRefLocked();
2330 			rw_lock_read_unlock(&sAreaCacheLock);
2331 			return cache;
2332 		}
2333 
2334 		// the cache changed in the meantime
2335 		cache->Unlock();
2336 	}
2337 }
2338 
2339 
2340 void
2341 vm_area_put_locked_cache(VMCache* cache)
2342 {
2343 	cache->ReleaseRefAndUnlock();
2344 }
2345 
2346 
2347 area_id
2348 vm_clone_area(team_id team, const char* name, void** address,
2349 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2350 	bool kernel)
2351 {
2352 	VMArea* newArea = NULL;
2353 	VMArea* sourceArea;
2354 
2355 	// Check whether the source area exists and is cloneable. If so, mark it
2356 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2357 	{
2358 		AddressSpaceWriteLocker locker;
2359 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2360 		if (status != B_OK)
2361 			return status;
2362 
2363 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2364 			return B_NOT_ALLOWED;
2365 
2366 		sourceArea->protection |= B_SHARED_AREA;
2367 		protection |= B_SHARED_AREA;
2368 	}
2369 
2370 	// Now lock both address spaces and actually do the cloning.
2371 
2372 	MultiAddressSpaceLocker locker;
2373 	VMAddressSpace* sourceAddressSpace;
2374 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2375 	if (status != B_OK)
2376 		return status;
2377 
2378 	VMAddressSpace* targetAddressSpace;
2379 	status = locker.AddTeam(team, true, &targetAddressSpace);
2380 	if (status != B_OK)
2381 		return status;
2382 
2383 	status = locker.Lock();
2384 	if (status != B_OK)
2385 		return status;
2386 
2387 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2388 	if (sourceArea == NULL)
2389 		return B_BAD_VALUE;
2390 
2391 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2392 		return B_NOT_ALLOWED;
2393 
2394 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2395 
2396 	if (!kernel && sourceAddressSpace != targetAddressSpace
2397 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2398 #if KDEBUG
2399 		Team* team = thread_get_current_thread()->team;
2400 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2401 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2402 #endif
2403 		status = B_NOT_ALLOWED;
2404 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2405 		status = B_NOT_ALLOWED;
2406 	} else {
2407 		virtual_address_restrictions addressRestrictions = {};
2408 		addressRestrictions.address = *address;
2409 		addressRestrictions.address_specification = addressSpec;
2410 		status = map_backing_store(targetAddressSpace, cache,
2411 			sourceArea->cache_offset, name, sourceArea->Size(),
2412 			sourceArea->wiring, protection, sourceArea->protection_max,
2413 			mapping, 0, &addressRestrictions,
2414 			kernel, &newArea, address);
2415 	}
2416 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2417 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2418 		// to create a new cache, and has therefore already acquired a reference
2419 		// to the source cache - but otherwise it has no idea that we need
2420 		// one.
2421 		cache->AcquireRefLocked();
2422 	}
2423 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2424 		// we need to map in everything at this point
2425 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2426 			// we don't have actual pages to map but a physical area
2427 			VMTranslationMap* map
2428 				= sourceArea->address_space->TranslationMap();
2429 			map->Lock();
2430 
2431 			phys_addr_t physicalAddress;
2432 			uint32 oldProtection;
2433 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2434 
2435 			map->Unlock();
2436 
2437 			map = targetAddressSpace->TranslationMap();
2438 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2439 				newArea->Base() + (newArea->Size() - 1));
2440 
2441 			vm_page_reservation reservation;
2442 			vm_page_reserve_pages(&reservation, reservePages,
2443 				targetAddressSpace == VMAddressSpace::Kernel()
2444 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2445 			map->Lock();
2446 
2447 			for (addr_t offset = 0; offset < newArea->Size();
2448 					offset += B_PAGE_SIZE) {
2449 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2450 					protection, newArea->MemoryType(), &reservation);
2451 			}
2452 
2453 			map->Unlock();
2454 			vm_page_unreserve_pages(&reservation);
2455 		} else {
2456 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2457 			size_t reservePages = map->MaxPagesNeededToMap(
2458 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2459 			vm_page_reservation reservation;
2460 			vm_page_reserve_pages(&reservation, reservePages,
2461 				targetAddressSpace == VMAddressSpace::Kernel()
2462 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2463 
2464 			// map in all pages from source
2465 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2466 					vm_page* page  = it.Next();) {
2467 				if (!page->busy) {
2468 					DEBUG_PAGE_ACCESS_START(page);
2469 					map_page(newArea, page,
2470 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2471 							- newArea->cache_offset),
2472 						protection, &reservation);
2473 					DEBUG_PAGE_ACCESS_END(page);
2474 				}
2475 			}
2476 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2477 			// ensuring that!
2478 
2479 			vm_page_unreserve_pages(&reservation);
2480 		}
2481 	}
2482 	if (status == B_OK)
2483 		newArea->cache_type = sourceArea->cache_type;
2484 
2485 	vm_area_put_locked_cache(cache);
2486 
2487 	if (status < B_OK)
2488 		return status;
2489 
2490 	return newArea->id;
2491 }
2492 
2493 
2494 /*!	Deletes the specified area of the given address space.
2495 
2496 	The address space must be write-locked.
2497 	The caller must ensure that the area does not have any wired ranges.
2498 
2499 	\param addressSpace The address space containing the area.
2500 	\param area The area to be deleted.
2501 	\param deletingAddressSpace \c true, if the address space is in the process
2502 		of being deleted.
2503 */
2504 static void
2505 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2506 	bool deletingAddressSpace)
2507 {
2508 	ASSERT(!area->IsWired());
2509 
2510 	VMAreas::Remove(area);
2511 
2512 	// At this point the area is removed from the global hash table, but
2513 	// still exists in the area list.
2514 
2515 	// Unmap the virtual address space the area occupied.
2516 	{
2517 		// We need to lock the complete cache chain.
2518 		VMCache* topCache = vm_area_get_locked_cache(area);
2519 		VMCacheChainLocker cacheChainLocker(topCache);
2520 		cacheChainLocker.LockAllSourceCaches();
2521 
2522 		// If the area's top cache is a temporary cache and the area is the only
2523 		// one referencing it (besides us currently holding a second reference),
2524 		// the unmapping code doesn't need to care about preserving the accessed
2525 		// and dirty flags of the top cache page mappings.
2526 		bool ignoreTopCachePageFlags
2527 			= topCache->temporary && topCache->RefCount() == 2;
2528 
2529 		area->address_space->TranslationMap()->UnmapArea(area,
2530 			deletingAddressSpace, ignoreTopCachePageFlags);
2531 	}
2532 
2533 	if (!area->cache->temporary)
2534 		area->cache->WriteModified();
2535 
2536 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2537 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2538 
2539 	arch_vm_unset_memory_type(area);
2540 	addressSpace->RemoveArea(area, allocationFlags);
2541 	addressSpace->Put();
2542 
2543 	area->cache->RemoveArea(area);
2544 	area->cache->ReleaseRef();
2545 
2546 	addressSpace->DeleteArea(area, allocationFlags);
2547 }
2548 
2549 
2550 status_t
2551 vm_delete_area(team_id team, area_id id, bool kernel)
2552 {
2553 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2554 		team, id));
2555 
2556 	// lock the address space and make sure the area isn't wired
2557 	AddressSpaceWriteLocker locker;
2558 	VMArea* area;
2559 	AreaCacheLocker cacheLocker;
2560 
2561 	do {
2562 		status_t status = locker.SetFromArea(team, id, area);
2563 		if (status != B_OK)
2564 			return status;
2565 
2566 		cacheLocker.SetTo(area);
2567 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2568 
2569 	cacheLocker.Unlock();
2570 
2571 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2572 		return B_NOT_ALLOWED;
2573 
2574 	delete_area(locker.AddressSpace(), area, false);
2575 	return B_OK;
2576 }
2577 
2578 
2579 /*!	Creates a new cache on top of given cache, moves all areas from
2580 	the old cache to the new one, and changes the protection of all affected
2581 	areas' pages to read-only. If requested, wired pages are moved up to the
2582 	new cache and copies are added to the old cache in their place.
2583 	Preconditions:
2584 	- The given cache must be locked.
2585 	- All of the cache's areas' address spaces must be read locked.
2586 	- Either the cache must not have any wired ranges or a page reservation for
2587 	  all wired pages must be provided, so they can be copied.
2588 
2589 	\param lowerCache The cache on top of which a new cache shall be created.
2590 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2591 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2592 		has wired page. The wired pages are copied in this case.
2593 */
2594 static status_t
2595 vm_copy_on_write_area(VMCache* lowerCache,
2596 	vm_page_reservation* wiredPagesReservation)
2597 {
2598 	VMCache* upperCache;
2599 
2600 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2601 
2602 	// We need to separate the cache from its areas. The cache goes one level
2603 	// deeper and we create a new cache inbetween.
2604 
2605 	// create an anonymous cache
2606 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2607 		lowerCache->GuardSize() / B_PAGE_SIZE,
2608 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2609 		VM_PRIORITY_USER);
2610 	if (status != B_OK)
2611 		return status;
2612 
2613 	upperCache->Lock();
2614 
2615 	upperCache->temporary = 1;
2616 	upperCache->virtual_base = lowerCache->virtual_base;
2617 	upperCache->virtual_end = lowerCache->virtual_end;
2618 
2619 	// transfer the lower cache areas to the upper cache
2620 	rw_lock_write_lock(&sAreaCacheLock);
2621 	upperCache->TransferAreas(lowerCache);
2622 	rw_lock_write_unlock(&sAreaCacheLock);
2623 
2624 	lowerCache->AddConsumer(upperCache);
2625 
2626 	// We now need to remap all pages from all of the cache's areas read-only,
2627 	// so that a copy will be created on next write access. If there are wired
2628 	// pages, we keep their protection, move them to the upper cache and create
2629 	// copies for the lower cache.
2630 	if (wiredPagesReservation != NULL) {
2631 		// We need to handle wired pages -- iterate through the cache's pages.
2632 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2633 				vm_page* page = it.Next();) {
2634 			if (page->WiredCount() > 0) {
2635 				// allocate a new page and copy the wired one
2636 				vm_page* copiedPage = vm_page_allocate_page(
2637 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2638 
2639 				vm_memcpy_physical_page(
2640 					copiedPage->physical_page_number * B_PAGE_SIZE,
2641 					page->physical_page_number * B_PAGE_SIZE);
2642 
2643 				// move the wired page to the upper cache (note: removing is OK
2644 				// with the SplayTree iterator) and insert the copy
2645 				upperCache->MovePage(page);
2646 				lowerCache->InsertPage(copiedPage,
2647 					page->cache_offset * B_PAGE_SIZE);
2648 
2649 				DEBUG_PAGE_ACCESS_END(copiedPage);
2650 			} else {
2651 				// Change the protection of this page in all areas.
2652 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2653 						tempArea = tempArea->cache_next) {
2654 					if (!is_page_in_area(tempArea, page))
2655 						continue;
2656 
2657 					// The area must be readable in the same way it was
2658 					// previously writable.
2659 					addr_t address = virtual_page_address(tempArea, page);
2660 					uint32 protection = 0;
2661 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2662 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2663 						protection |= B_KERNEL_READ_AREA;
2664 					if ((pageProtection & B_READ_AREA) != 0)
2665 						protection |= B_READ_AREA;
2666 
2667 					VMTranslationMap* map
2668 						= tempArea->address_space->TranslationMap();
2669 					map->Lock();
2670 					map->ProtectPage(tempArea, address, protection);
2671 					map->Unlock();
2672 				}
2673 			}
2674 		}
2675 	} else {
2676 		ASSERT(lowerCache->WiredPagesCount() == 0);
2677 
2678 		// just change the protection of all areas
2679 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2680 				tempArea = tempArea->cache_next) {
2681 			if (tempArea->page_protections != NULL) {
2682 				// Change the protection of all pages in this area.
2683 				VMTranslationMap* map = tempArea->address_space->TranslationMap();
2684 				map->Lock();
2685 				for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2686 					vm_page* page = it.Next();) {
2687 					if (!is_page_in_area(tempArea, page))
2688 						continue;
2689 
2690 					// The area must be readable in the same way it was
2691 					// previously writable.
2692 					addr_t address = virtual_page_address(tempArea, page);
2693 					uint32 protection = 0;
2694 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2695 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2696 						protection |= B_KERNEL_READ_AREA;
2697 					if ((pageProtection & B_READ_AREA) != 0)
2698 						protection |= B_READ_AREA;
2699 
2700 					map->ProtectPage(tempArea, address, protection);
2701 				}
2702 				map->Unlock();
2703 				continue;
2704 			}
2705 			// The area must be readable in the same way it was previously
2706 			// writable.
2707 			uint32 protection = 0;
2708 			if ((tempArea->protection & B_KERNEL_READ_AREA) != 0)
2709 				protection |= B_KERNEL_READ_AREA;
2710 			if ((tempArea->protection & B_READ_AREA) != 0)
2711 				protection |= B_READ_AREA;
2712 
2713 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2714 			map->Lock();
2715 			map->ProtectArea(tempArea, protection);
2716 			map->Unlock();
2717 		}
2718 	}
2719 
2720 	vm_area_put_locked_cache(upperCache);
2721 
2722 	return B_OK;
2723 }
2724 
2725 
2726 area_id
2727 vm_copy_area(team_id team, const char* name, void** _address,
2728 	uint32 addressSpec, area_id sourceID)
2729 {
2730 	// Do the locking: target address space, all address spaces associated with
2731 	// the source cache, and the cache itself.
2732 	MultiAddressSpaceLocker locker;
2733 	VMAddressSpace* targetAddressSpace;
2734 	VMCache* cache;
2735 	VMArea* source;
2736 	AreaCacheLocker cacheLocker;
2737 	status_t status;
2738 	bool sharedArea;
2739 
2740 	page_num_t wiredPages = 0;
2741 	vm_page_reservation wiredPagesReservation;
2742 
2743 	bool restart;
2744 	do {
2745 		restart = false;
2746 
2747 		locker.Unset();
2748 		status = locker.AddTeam(team, true, &targetAddressSpace);
2749 		if (status == B_OK) {
2750 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2751 				&cache);
2752 		}
2753 		if (status != B_OK)
2754 			return status;
2755 
2756 		cacheLocker.SetTo(cache, true);	// already locked
2757 
2758 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2759 
2760 		page_num_t oldWiredPages = wiredPages;
2761 		wiredPages = 0;
2762 
2763 		// If the source area isn't shared, count the number of wired pages in
2764 		// the cache and reserve as many pages.
2765 		if (!sharedArea) {
2766 			wiredPages = cache->WiredPagesCount();
2767 
2768 			if (wiredPages > oldWiredPages) {
2769 				cacheLocker.Unlock();
2770 				locker.Unlock();
2771 
2772 				if (oldWiredPages > 0)
2773 					vm_page_unreserve_pages(&wiredPagesReservation);
2774 
2775 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2776 					VM_PRIORITY_USER);
2777 
2778 				restart = true;
2779 			}
2780 		} else if (oldWiredPages > 0)
2781 			vm_page_unreserve_pages(&wiredPagesReservation);
2782 	} while (restart);
2783 
2784 	// unreserve pages later
2785 	struct PagesUnreserver {
2786 		PagesUnreserver(vm_page_reservation* reservation)
2787 			:
2788 			fReservation(reservation)
2789 		{
2790 		}
2791 
2792 		~PagesUnreserver()
2793 		{
2794 			if (fReservation != NULL)
2795 				vm_page_unreserve_pages(fReservation);
2796 		}
2797 
2798 	private:
2799 		vm_page_reservation*	fReservation;
2800 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2801 
2802 	bool writableCopy
2803 		= (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2804 	uint8* targetPageProtections = NULL;
2805 
2806 	if (source->page_protections != NULL) {
2807 		size_t bytes = area_page_protections_size(source->Size());
2808 		targetPageProtections = (uint8*)malloc_etc(bytes,
2809 			(source->address_space == VMAddressSpace::Kernel()
2810 					|| targetAddressSpace == VMAddressSpace::Kernel())
2811 				? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
2812 		if (targetPageProtections == NULL)
2813 			return B_NO_MEMORY;
2814 
2815 		memcpy(targetPageProtections, source->page_protections, bytes);
2816 
2817 		if (!writableCopy) {
2818 			for (size_t i = 0; i < bytes; i++) {
2819 				if ((targetPageProtections[i]
2820 						& (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) {
2821 					writableCopy = true;
2822 					break;
2823 				}
2824 			}
2825 		}
2826 	}
2827 
2828 	if (addressSpec == B_CLONE_ADDRESS) {
2829 		addressSpec = B_EXACT_ADDRESS;
2830 		*_address = (void*)source->Base();
2831 	}
2832 
2833 	// First, create a cache on top of the source area, respectively use the
2834 	// existing one, if this is a shared area.
2835 
2836 	VMArea* target;
2837 	virtual_address_restrictions addressRestrictions = {};
2838 	addressRestrictions.address = *_address;
2839 	addressRestrictions.address_specification = addressSpec;
2840 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2841 		name, source->Size(), source->wiring, source->protection,
2842 		source->protection_max,
2843 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2844 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2845 		&addressRestrictions, true, &target, _address);
2846 	if (status < B_OK) {
2847 		free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE);
2848 		return status;
2849 	}
2850 
2851 	if (targetPageProtections != NULL)
2852 		target->page_protections = targetPageProtections;
2853 
2854 	if (sharedArea) {
2855 		// The new area uses the old area's cache, but map_backing_store()
2856 		// hasn't acquired a ref. So we have to do that now.
2857 		cache->AcquireRefLocked();
2858 	}
2859 
2860 	// If the source area is writable, we need to move it one layer up as well
2861 
2862 	if (!sharedArea) {
2863 		if (writableCopy) {
2864 			// TODO: do something more useful if this fails!
2865 			if (vm_copy_on_write_area(cache,
2866 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2867 				panic("vm_copy_on_write_area() failed!\n");
2868 			}
2869 		}
2870 	}
2871 
2872 	// we return the ID of the newly created area
2873 	return target->id;
2874 }
2875 
2876 
2877 status_t
2878 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2879 	bool kernel)
2880 {
2881 	fix_protection(&newProtection);
2882 
2883 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2884 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2885 
2886 	if (!arch_vm_supports_protection(newProtection))
2887 		return B_NOT_SUPPORTED;
2888 
2889 	bool becomesWritable
2890 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2891 
2892 	// lock address spaces and cache
2893 	MultiAddressSpaceLocker locker;
2894 	VMCache* cache;
2895 	VMArea* area;
2896 	status_t status;
2897 	AreaCacheLocker cacheLocker;
2898 	bool isWritable;
2899 
2900 	bool restart;
2901 	do {
2902 		restart = false;
2903 
2904 		locker.Unset();
2905 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2906 		if (status != B_OK)
2907 			return status;
2908 
2909 		cacheLocker.SetTo(cache, true);	// already locked
2910 
2911 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
2912 				|| (area->protection & B_KERNEL_AREA) != 0)) {
2913 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2914 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2915 				" (%s)\n", team, newProtection, areaID, area->name);
2916 			return B_NOT_ALLOWED;
2917 		}
2918 		if (!kernel && area->protection_max != 0
2919 			&& (newProtection & area->protection_max)
2920 				!= (newProtection & B_USER_PROTECTION)) {
2921 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2922 				"set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel "
2923 				"area %" B_PRId32 " (%s)\n", team, newProtection,
2924 				area->protection_max, areaID, area->name);
2925 			return B_NOT_ALLOWED;
2926 		}
2927 
2928 		if (team != VMAddressSpace::KernelID()
2929 			&& area->address_space->ID() != team) {
2930 			// unless you're the kernel, you are only allowed to set
2931 			// the protection of your own areas
2932 			return B_NOT_ALLOWED;
2933 		}
2934 
2935 		if (area->protection == newProtection)
2936 			return B_OK;
2937 
2938 		isWritable
2939 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2940 
2941 		// Make sure the area (respectively, if we're going to call
2942 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2943 		// wired ranges.
2944 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2945 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2946 					otherArea = otherArea->cache_next) {
2947 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2948 					restart = true;
2949 					break;
2950 				}
2951 			}
2952 		} else {
2953 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2954 				restart = true;
2955 		}
2956 	} while (restart);
2957 
2958 	bool changePageProtection = true;
2959 	bool changeTopCachePagesOnly = false;
2960 
2961 	if (isWritable && !becomesWritable) {
2962 		// writable -> !writable
2963 
2964 		if (cache->source != NULL && cache->temporary) {
2965 			if (cache->CountWritableAreas(area) == 0) {
2966 				// Since this cache now lives from the pages in its source cache,
2967 				// we can change the cache's commitment to take only those pages
2968 				// into account that really are in this cache.
2969 
2970 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2971 					team == VMAddressSpace::KernelID()
2972 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2973 
2974 				// TODO: we may be able to join with our source cache, if
2975 				// count == 0
2976 			}
2977 		}
2978 
2979 		// If only the writability changes, we can just remap the pages of the
2980 		// top cache, since the pages of lower caches are mapped read-only
2981 		// anyway. That's advantageous only, if the number of pages in the cache
2982 		// is significantly smaller than the number of pages in the area,
2983 		// though.
2984 		if (newProtection
2985 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2986 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2987 			changeTopCachePagesOnly = true;
2988 		}
2989 	} else if (!isWritable && becomesWritable) {
2990 		// !writable -> writable
2991 
2992 		if (!cache->consumers.IsEmpty()) {
2993 			// There are consumers -- we have to insert a new cache. Fortunately
2994 			// vm_copy_on_write_area() does everything that's needed.
2995 			changePageProtection = false;
2996 			status = vm_copy_on_write_area(cache, NULL);
2997 		} else {
2998 			// No consumers, so we don't need to insert a new one.
2999 			if (cache->source != NULL && cache->temporary) {
3000 				// the cache's commitment must contain all possible pages
3001 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
3002 					team == VMAddressSpace::KernelID()
3003 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
3004 			}
3005 
3006 			if (status == B_OK && cache->source != NULL) {
3007 				// There's a source cache, hence we can't just change all pages'
3008 				// protection or we might allow writing into pages belonging to
3009 				// a lower cache.
3010 				changeTopCachePagesOnly = true;
3011 			}
3012 		}
3013 	} else {
3014 		// we don't have anything special to do in all other cases
3015 	}
3016 
3017 	if (status == B_OK) {
3018 		// remap existing pages in this cache
3019 		if (changePageProtection) {
3020 			VMTranslationMap* map = area->address_space->TranslationMap();
3021 			map->Lock();
3022 
3023 			if (changeTopCachePagesOnly) {
3024 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
3025 				page_num_t lastPageOffset
3026 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
3027 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
3028 						vm_page* page = it.Next();) {
3029 					if (page->cache_offset >= firstPageOffset
3030 						&& page->cache_offset <= lastPageOffset) {
3031 						addr_t address = virtual_page_address(area, page);
3032 						map->ProtectPage(area, address, newProtection);
3033 					}
3034 				}
3035 			} else
3036 				map->ProtectArea(area, newProtection);
3037 
3038 			map->Unlock();
3039 		}
3040 
3041 		area->protection = newProtection;
3042 	}
3043 
3044 	return status;
3045 }
3046 
3047 
3048 status_t
3049 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
3050 {
3051 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
3052 	if (addressSpace == NULL)
3053 		return B_BAD_TEAM_ID;
3054 
3055 	VMTranslationMap* map = addressSpace->TranslationMap();
3056 
3057 	map->Lock();
3058 	uint32 dummyFlags;
3059 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
3060 	map->Unlock();
3061 
3062 	addressSpace->Put();
3063 	return status;
3064 }
3065 
3066 
3067 /*!	The page's cache must be locked.
3068 */
3069 bool
3070 vm_test_map_modification(vm_page* page)
3071 {
3072 	if (page->modified)
3073 		return true;
3074 
3075 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3076 	vm_page_mapping* mapping;
3077 	while ((mapping = iterator.Next()) != NULL) {
3078 		VMArea* area = mapping->area;
3079 		VMTranslationMap* map = area->address_space->TranslationMap();
3080 
3081 		phys_addr_t physicalAddress;
3082 		uint32 flags;
3083 		map->Lock();
3084 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
3085 		map->Unlock();
3086 
3087 		if ((flags & PAGE_MODIFIED) != 0)
3088 			return true;
3089 	}
3090 
3091 	return false;
3092 }
3093 
3094 
3095 /*!	The page's cache must be locked.
3096 */
3097 void
3098 vm_clear_map_flags(vm_page* page, uint32 flags)
3099 {
3100 	if ((flags & PAGE_ACCESSED) != 0)
3101 		page->accessed = false;
3102 	if ((flags & PAGE_MODIFIED) != 0)
3103 		page->modified = false;
3104 
3105 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3106 	vm_page_mapping* mapping;
3107 	while ((mapping = iterator.Next()) != NULL) {
3108 		VMArea* area = mapping->area;
3109 		VMTranslationMap* map = area->address_space->TranslationMap();
3110 
3111 		map->Lock();
3112 		map->ClearFlags(virtual_page_address(area, page), flags);
3113 		map->Unlock();
3114 	}
3115 }
3116 
3117 
3118 /*!	Removes all mappings from a page.
3119 	After you've called this function, the page is unmapped from memory and
3120 	the page's \c accessed and \c modified flags have been updated according
3121 	to the state of the mappings.
3122 	The page's cache must be locked.
3123 */
3124 void
3125 vm_remove_all_page_mappings(vm_page* page)
3126 {
3127 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3128 		VMArea* area = mapping->area;
3129 		VMTranslationMap* map = area->address_space->TranslationMap();
3130 		addr_t address = virtual_page_address(area, page);
3131 		map->UnmapPage(area, address, false);
3132 	}
3133 }
3134 
3135 
3136 int32
3137 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
3138 {
3139 	int32 count = 0;
3140 
3141 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3142 	vm_page_mapping* mapping;
3143 	while ((mapping = iterator.Next()) != NULL) {
3144 		VMArea* area = mapping->area;
3145 		VMTranslationMap* map = area->address_space->TranslationMap();
3146 
3147 		bool modified;
3148 		if (map->ClearAccessedAndModified(area,
3149 				virtual_page_address(area, page), false, modified)) {
3150 			count++;
3151 		}
3152 
3153 		page->modified |= modified;
3154 	}
3155 
3156 
3157 	if (page->accessed) {
3158 		count++;
3159 		page->accessed = false;
3160 	}
3161 
3162 	return count;
3163 }
3164 
3165 
3166 /*!	Removes all mappings of a page and/or clears the accessed bits of the
3167 	mappings.
3168 	The function iterates through the page mappings and removes them until
3169 	encountering one that has been accessed. From then on it will continue to
3170 	iterate, but only clear the accessed flag of the mapping. The page's
3171 	\c modified bit will be updated accordingly, the \c accessed bit will be
3172 	cleared.
3173 	\return The number of mapping accessed bits encountered, including the
3174 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
3175 		of the page have been removed.
3176 */
3177 int32
3178 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
3179 {
3180 	ASSERT(page->WiredCount() == 0);
3181 
3182 	if (page->accessed)
3183 		return vm_clear_page_mapping_accessed_flags(page);
3184 
3185 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3186 		VMArea* area = mapping->area;
3187 		VMTranslationMap* map = area->address_space->TranslationMap();
3188 		addr_t address = virtual_page_address(area, page);
3189 		bool modified = false;
3190 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
3191 			page->accessed = true;
3192 			page->modified |= modified;
3193 			return vm_clear_page_mapping_accessed_flags(page);
3194 		}
3195 		page->modified |= modified;
3196 	}
3197 
3198 	return 0;
3199 }
3200 
3201 
3202 static int
3203 display_mem(int argc, char** argv)
3204 {
3205 	bool physical = false;
3206 	addr_t copyAddress;
3207 	int32 displayWidth;
3208 	int32 itemSize;
3209 	int32 num = -1;
3210 	addr_t address;
3211 	int i = 1, j;
3212 
3213 	if (argc > 1 && argv[1][0] == '-') {
3214 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3215 			physical = true;
3216 			i++;
3217 		} else
3218 			i = 99;
3219 	}
3220 
3221 	if (argc < i + 1 || argc > i + 2) {
3222 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3223 			"\tdl - 8 bytes\n"
3224 			"\tdw - 4 bytes\n"
3225 			"\tds - 2 bytes\n"
3226 			"\tdb - 1 byte\n"
3227 			"\tstring - a whole string\n"
3228 			"  -p or --physical only allows memory from a single page to be "
3229 			"displayed.\n");
3230 		return 0;
3231 	}
3232 
3233 	address = parse_expression(argv[i]);
3234 
3235 	if (argc > i + 1)
3236 		num = parse_expression(argv[i + 1]);
3237 
3238 	// build the format string
3239 	if (strcmp(argv[0], "db") == 0) {
3240 		itemSize = 1;
3241 		displayWidth = 16;
3242 	} else if (strcmp(argv[0], "ds") == 0) {
3243 		itemSize = 2;
3244 		displayWidth = 8;
3245 	} else if (strcmp(argv[0], "dw") == 0) {
3246 		itemSize = 4;
3247 		displayWidth = 4;
3248 	} else if (strcmp(argv[0], "dl") == 0) {
3249 		itemSize = 8;
3250 		displayWidth = 2;
3251 	} else if (strcmp(argv[0], "string") == 0) {
3252 		itemSize = 1;
3253 		displayWidth = -1;
3254 	} else {
3255 		kprintf("display_mem called in an invalid way!\n");
3256 		return 0;
3257 	}
3258 
3259 	if (num <= 0)
3260 		num = displayWidth;
3261 
3262 	void* physicalPageHandle = NULL;
3263 
3264 	if (physical) {
3265 		int32 offset = address & (B_PAGE_SIZE - 1);
3266 		if (num * itemSize + offset > B_PAGE_SIZE) {
3267 			num = (B_PAGE_SIZE - offset) / itemSize;
3268 			kprintf("NOTE: number of bytes has been cut to page size\n");
3269 		}
3270 
3271 		address = ROUNDDOWN(address, B_PAGE_SIZE);
3272 
3273 		if (vm_get_physical_page_debug(address, &copyAddress,
3274 				&physicalPageHandle) != B_OK) {
3275 			kprintf("getting the hardware page failed.");
3276 			return 0;
3277 		}
3278 
3279 		address += offset;
3280 		copyAddress += offset;
3281 	} else
3282 		copyAddress = address;
3283 
3284 	if (!strcmp(argv[0], "string")) {
3285 		kprintf("%p \"", (char*)copyAddress);
3286 
3287 		// string mode
3288 		for (i = 0; true; i++) {
3289 			char c;
3290 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
3291 					!= B_OK
3292 				|| c == '\0') {
3293 				break;
3294 			}
3295 
3296 			if (c == '\n')
3297 				kprintf("\\n");
3298 			else if (c == '\t')
3299 				kprintf("\\t");
3300 			else {
3301 				if (!isprint(c))
3302 					c = '.';
3303 
3304 				kprintf("%c", c);
3305 			}
3306 		}
3307 
3308 		kprintf("\"\n");
3309 	} else {
3310 		// number mode
3311 		for (i = 0; i < num; i++) {
3312 			uint64 value;
3313 
3314 			if ((i % displayWidth) == 0) {
3315 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3316 				if (i != 0)
3317 					kprintf("\n");
3318 
3319 				kprintf("[0x%lx]  ", address + i * itemSize);
3320 
3321 				for (j = 0; j < displayed; j++) {
3322 					char c;
3323 					if (debug_memcpy(B_CURRENT_TEAM, &c,
3324 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
3325 						displayed = j;
3326 						break;
3327 					}
3328 					if (!isprint(c))
3329 						c = '.';
3330 
3331 					kprintf("%c", c);
3332 				}
3333 				if (num > displayWidth) {
3334 					// make sure the spacing in the last line is correct
3335 					for (j = displayed; j < displayWidth * itemSize; j++)
3336 						kprintf(" ");
3337 				}
3338 				kprintf("  ");
3339 			}
3340 
3341 			if (debug_memcpy(B_CURRENT_TEAM, &value,
3342 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
3343 				kprintf("read fault");
3344 				break;
3345 			}
3346 
3347 			switch (itemSize) {
3348 				case 1:
3349 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
3350 					break;
3351 				case 2:
3352 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3353 					break;
3354 				case 4:
3355 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3356 					break;
3357 				case 8:
3358 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3359 					break;
3360 			}
3361 		}
3362 
3363 		kprintf("\n");
3364 	}
3365 
3366 	if (physical) {
3367 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3368 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3369 	}
3370 	return 0;
3371 }
3372 
3373 
3374 static void
3375 dump_cache_tree_recursively(VMCache* cache, int level,
3376 	VMCache* highlightCache)
3377 {
3378 	// print this cache
3379 	for (int i = 0; i < level; i++)
3380 		kprintf("  ");
3381 	if (cache == highlightCache)
3382 		kprintf("%p <--\n", cache);
3383 	else
3384 		kprintf("%p\n", cache);
3385 
3386 	// recursively print its consumers
3387 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3388 			VMCache* consumer = it.Next();) {
3389 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3390 	}
3391 }
3392 
3393 
3394 static int
3395 dump_cache_tree(int argc, char** argv)
3396 {
3397 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3398 		kprintf("usage: %s <address>\n", argv[0]);
3399 		return 0;
3400 	}
3401 
3402 	addr_t address = parse_expression(argv[1]);
3403 	if (address == 0)
3404 		return 0;
3405 
3406 	VMCache* cache = (VMCache*)address;
3407 	VMCache* root = cache;
3408 
3409 	// find the root cache (the transitive source)
3410 	while (root->source != NULL)
3411 		root = root->source;
3412 
3413 	dump_cache_tree_recursively(root, 0, cache);
3414 
3415 	return 0;
3416 }
3417 
3418 
3419 const char*
3420 vm_cache_type_to_string(int32 type)
3421 {
3422 	switch (type) {
3423 		case CACHE_TYPE_RAM:
3424 			return "RAM";
3425 		case CACHE_TYPE_DEVICE:
3426 			return "device";
3427 		case CACHE_TYPE_VNODE:
3428 			return "vnode";
3429 		case CACHE_TYPE_NULL:
3430 			return "null";
3431 
3432 		default:
3433 			return "unknown";
3434 	}
3435 }
3436 
3437 
3438 #if DEBUG_CACHE_LIST
3439 
3440 static void
3441 update_cache_info_recursively(VMCache* cache, cache_info& info)
3442 {
3443 	info.page_count += cache->page_count;
3444 	if (cache->type == CACHE_TYPE_RAM)
3445 		info.committed += cache->committed_size;
3446 
3447 	// recurse
3448 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3449 			VMCache* consumer = it.Next();) {
3450 		update_cache_info_recursively(consumer, info);
3451 	}
3452 }
3453 
3454 
3455 static int
3456 cache_info_compare_page_count(const void* _a, const void* _b)
3457 {
3458 	const cache_info* a = (const cache_info*)_a;
3459 	const cache_info* b = (const cache_info*)_b;
3460 	if (a->page_count == b->page_count)
3461 		return 0;
3462 	return a->page_count < b->page_count ? 1 : -1;
3463 }
3464 
3465 
3466 static int
3467 cache_info_compare_committed(const void* _a, const void* _b)
3468 {
3469 	const cache_info* a = (const cache_info*)_a;
3470 	const cache_info* b = (const cache_info*)_b;
3471 	if (a->committed == b->committed)
3472 		return 0;
3473 	return a->committed < b->committed ? 1 : -1;
3474 }
3475 
3476 
3477 static void
3478 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3479 {
3480 	for (int i = 0; i < level; i++)
3481 		kprintf("  ");
3482 
3483 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3484 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3485 		cache->virtual_base, cache->virtual_end, cache->page_count);
3486 
3487 	if (level == 0)
3488 		kprintf("/%lu", info.page_count);
3489 
3490 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3491 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3492 
3493 		if (level == 0)
3494 			kprintf("/%lu", info.committed);
3495 	}
3496 
3497 	// areas
3498 	if (cache->areas != NULL) {
3499 		VMArea* area = cache->areas;
3500 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3501 			area->name, area->address_space->ID());
3502 
3503 		while (area->cache_next != NULL) {
3504 			area = area->cache_next;
3505 			kprintf(", %" B_PRId32, area->id);
3506 		}
3507 	}
3508 
3509 	kputs("\n");
3510 
3511 	// recurse
3512 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3513 			VMCache* consumer = it.Next();) {
3514 		dump_caches_recursively(consumer, info, level + 1);
3515 	}
3516 }
3517 
3518 
3519 static int
3520 dump_caches(int argc, char** argv)
3521 {
3522 	if (sCacheInfoTable == NULL) {
3523 		kprintf("No cache info table!\n");
3524 		return 0;
3525 	}
3526 
3527 	bool sortByPageCount = true;
3528 
3529 	for (int32 i = 1; i < argc; i++) {
3530 		if (strcmp(argv[i], "-c") == 0) {
3531 			sortByPageCount = false;
3532 		} else {
3533 			print_debugger_command_usage(argv[0]);
3534 			return 0;
3535 		}
3536 	}
3537 
3538 	uint32 totalCount = 0;
3539 	uint32 rootCount = 0;
3540 	off_t totalCommitted = 0;
3541 	page_num_t totalPages = 0;
3542 
3543 	VMCache* cache = gDebugCacheList;
3544 	while (cache) {
3545 		totalCount++;
3546 		if (cache->source == NULL) {
3547 			cache_info stackInfo;
3548 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3549 				? sCacheInfoTable[rootCount] : stackInfo;
3550 			rootCount++;
3551 			info.cache = cache;
3552 			info.page_count = 0;
3553 			info.committed = 0;
3554 			update_cache_info_recursively(cache, info);
3555 			totalCommitted += info.committed;
3556 			totalPages += info.page_count;
3557 		}
3558 
3559 		cache = cache->debug_next;
3560 	}
3561 
3562 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3563 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3564 			sortByPageCount
3565 				? &cache_info_compare_page_count
3566 				: &cache_info_compare_committed);
3567 	}
3568 
3569 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3570 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3571 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3572 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3573 			"page count" : "committed size");
3574 
3575 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3576 		for (uint32 i = 0; i < rootCount; i++) {
3577 			cache_info& info = sCacheInfoTable[i];
3578 			dump_caches_recursively(info.cache, info, 0);
3579 		}
3580 	} else
3581 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3582 
3583 	return 0;
3584 }
3585 
3586 #endif	// DEBUG_CACHE_LIST
3587 
3588 
3589 static int
3590 dump_cache(int argc, char** argv)
3591 {
3592 	VMCache* cache;
3593 	bool showPages = false;
3594 	int i = 1;
3595 
3596 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3597 		kprintf("usage: %s [-ps] <address>\n"
3598 			"  if -p is specified, all pages are shown, if -s is used\n"
3599 			"  only the cache info is shown respectively.\n", argv[0]);
3600 		return 0;
3601 	}
3602 	while (argv[i][0] == '-') {
3603 		char* arg = argv[i] + 1;
3604 		while (arg[0]) {
3605 			if (arg[0] == 'p')
3606 				showPages = true;
3607 			arg++;
3608 		}
3609 		i++;
3610 	}
3611 	if (argv[i] == NULL) {
3612 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3613 		return 0;
3614 	}
3615 
3616 	addr_t address = parse_expression(argv[i]);
3617 	if (address == 0)
3618 		return 0;
3619 
3620 	cache = (VMCache*)address;
3621 
3622 	cache->Dump(showPages);
3623 
3624 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3625 
3626 	return 0;
3627 }
3628 
3629 
3630 static void
3631 dump_area_struct(VMArea* area, bool mappings)
3632 {
3633 	kprintf("AREA: %p\n", area);
3634 	kprintf("name:\t\t'%s'\n", area->name);
3635 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3636 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3637 	kprintf("base:\t\t0x%lx\n", area->Base());
3638 	kprintf("size:\t\t0x%lx\n", area->Size());
3639 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3640 	kprintf("page_protection:%p\n", area->page_protections);
3641 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3642 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3643 	kprintf("cache:\t\t%p\n", area->cache);
3644 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3645 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3646 	kprintf("cache_next:\t%p\n", area->cache_next);
3647 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3648 
3649 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3650 	if (mappings) {
3651 		kprintf("page mappings:\n");
3652 		while (iterator.HasNext()) {
3653 			vm_page_mapping* mapping = iterator.Next();
3654 			kprintf("  %p", mapping->page);
3655 		}
3656 		kprintf("\n");
3657 	} else {
3658 		uint32 count = 0;
3659 		while (iterator.Next() != NULL) {
3660 			count++;
3661 		}
3662 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3663 	}
3664 }
3665 
3666 
3667 static int
3668 dump_area(int argc, char** argv)
3669 {
3670 	bool mappings = false;
3671 	bool found = false;
3672 	int32 index = 1;
3673 	VMArea* area;
3674 	addr_t num;
3675 
3676 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3677 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3678 			"All areas matching either id/address/name are listed. You can\n"
3679 			"force to check only a specific item by prefixing the specifier\n"
3680 			"with the id/contains/address/name keywords.\n"
3681 			"-m shows the area's mappings as well.\n");
3682 		return 0;
3683 	}
3684 
3685 	if (!strcmp(argv[1], "-m")) {
3686 		mappings = true;
3687 		index++;
3688 	}
3689 
3690 	int32 mode = 0xf;
3691 	if (!strcmp(argv[index], "id"))
3692 		mode = 1;
3693 	else if (!strcmp(argv[index], "contains"))
3694 		mode = 2;
3695 	else if (!strcmp(argv[index], "name"))
3696 		mode = 4;
3697 	else if (!strcmp(argv[index], "address"))
3698 		mode = 0;
3699 	if (mode != 0xf)
3700 		index++;
3701 
3702 	if (index >= argc) {
3703 		kprintf("No area specifier given.\n");
3704 		return 0;
3705 	}
3706 
3707 	num = parse_expression(argv[index]);
3708 
3709 	if (mode == 0) {
3710 		dump_area_struct((struct VMArea*)num, mappings);
3711 	} else {
3712 		// walk through the area list, looking for the arguments as a name
3713 
3714 		VMAreasTree::Iterator it = VMAreas::GetIterator();
3715 		while ((area = it.Next()) != NULL) {
3716 			if (((mode & 4) != 0
3717 					&& !strcmp(argv[index], area->name))
3718 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3719 					|| (((mode & 2) != 0 && area->Base() <= num
3720 						&& area->Base() + area->Size() > num))))) {
3721 				dump_area_struct(area, mappings);
3722 				found = true;
3723 			}
3724 		}
3725 
3726 		if (!found)
3727 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3728 	}
3729 
3730 	return 0;
3731 }
3732 
3733 
3734 static int
3735 dump_area_list(int argc, char** argv)
3736 {
3737 	VMArea* area;
3738 	const char* name = NULL;
3739 	int32 id = 0;
3740 
3741 	if (argc > 1) {
3742 		id = parse_expression(argv[1]);
3743 		if (id == 0)
3744 			name = argv[1];
3745 	}
3746 
3747 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3748 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3749 		B_PRINTF_POINTER_WIDTH, "size");
3750 
3751 	VMAreasTree::Iterator it = VMAreas::GetIterator();
3752 	while ((area = it.Next()) != NULL) {
3753 		if ((id != 0 && area->address_space->ID() != id)
3754 			|| (name != NULL && strstr(area->name, name) == NULL))
3755 			continue;
3756 
3757 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3758 			area->id, (void*)area->Base(), (void*)area->Size(),
3759 			area->protection, area->wiring, area->name);
3760 	}
3761 	return 0;
3762 }
3763 
3764 
3765 static int
3766 dump_available_memory(int argc, char** argv)
3767 {
3768 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3769 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3770 	return 0;
3771 }
3772 
3773 
3774 static int
3775 dump_mapping_info(int argc, char** argv)
3776 {
3777 	bool reverseLookup = false;
3778 	bool pageLookup = false;
3779 
3780 	int argi = 1;
3781 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3782 		const char* arg = argv[argi];
3783 		if (strcmp(arg, "-r") == 0) {
3784 			reverseLookup = true;
3785 		} else if (strcmp(arg, "-p") == 0) {
3786 			reverseLookup = true;
3787 			pageLookup = true;
3788 		} else {
3789 			print_debugger_command_usage(argv[0]);
3790 			return 0;
3791 		}
3792 	}
3793 
3794 	// We need at least one argument, the address. Optionally a thread ID can be
3795 	// specified.
3796 	if (argi >= argc || argi + 2 < argc) {
3797 		print_debugger_command_usage(argv[0]);
3798 		return 0;
3799 	}
3800 
3801 	uint64 addressValue;
3802 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3803 		return 0;
3804 
3805 	Team* team = NULL;
3806 	if (argi < argc) {
3807 		uint64 threadID;
3808 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3809 			return 0;
3810 
3811 		Thread* thread = Thread::GetDebug(threadID);
3812 		if (thread == NULL) {
3813 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3814 			return 0;
3815 		}
3816 
3817 		team = thread->team;
3818 	}
3819 
3820 	if (reverseLookup) {
3821 		phys_addr_t physicalAddress;
3822 		if (pageLookup) {
3823 			vm_page* page = (vm_page*)(addr_t)addressValue;
3824 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3825 		} else {
3826 			physicalAddress = (phys_addr_t)addressValue;
3827 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3828 		}
3829 
3830 		kprintf("    Team     Virtual Address      Area\n");
3831 		kprintf("--------------------------------------\n");
3832 
3833 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3834 			Callback()
3835 				:
3836 				fAddressSpace(NULL)
3837 			{
3838 			}
3839 
3840 			void SetAddressSpace(VMAddressSpace* addressSpace)
3841 			{
3842 				fAddressSpace = addressSpace;
3843 			}
3844 
3845 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3846 			{
3847 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3848 					virtualAddress);
3849 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3850 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3851 				else
3852 					kprintf("\n");
3853 				return false;
3854 			}
3855 
3856 		private:
3857 			VMAddressSpace*	fAddressSpace;
3858 		} callback;
3859 
3860 		if (team != NULL) {
3861 			// team specified -- get its address space
3862 			VMAddressSpace* addressSpace = team->address_space;
3863 			if (addressSpace == NULL) {
3864 				kprintf("Failed to get address space!\n");
3865 				return 0;
3866 			}
3867 
3868 			callback.SetAddressSpace(addressSpace);
3869 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3870 				physicalAddress, callback);
3871 		} else {
3872 			// no team specified -- iterate through all address spaces
3873 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3874 				addressSpace != NULL;
3875 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3876 				callback.SetAddressSpace(addressSpace);
3877 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3878 					physicalAddress, callback);
3879 			}
3880 		}
3881 	} else {
3882 		// get the address space
3883 		addr_t virtualAddress = (addr_t)addressValue;
3884 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3885 		VMAddressSpace* addressSpace;
3886 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3887 			addressSpace = VMAddressSpace::Kernel();
3888 		} else if (team != NULL) {
3889 			addressSpace = team->address_space;
3890 		} else {
3891 			Thread* thread = debug_get_debugged_thread();
3892 			if (thread == NULL || thread->team == NULL) {
3893 				kprintf("Failed to get team!\n");
3894 				return 0;
3895 			}
3896 
3897 			addressSpace = thread->team->address_space;
3898 		}
3899 
3900 		if (addressSpace == NULL) {
3901 			kprintf("Failed to get address space!\n");
3902 			return 0;
3903 		}
3904 
3905 		// let the translation map implementation do the job
3906 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3907 	}
3908 
3909 	return 0;
3910 }
3911 
3912 
3913 /*!	Deletes all areas and reserved regions in the given address space.
3914 
3915 	The caller must ensure that none of the areas has any wired ranges.
3916 
3917 	\param addressSpace The address space.
3918 	\param deletingAddressSpace \c true, if the address space is in the process
3919 		of being deleted.
3920 */
3921 void
3922 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3923 {
3924 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3925 		addressSpace->ID()));
3926 
3927 	addressSpace->WriteLock();
3928 
3929 	// remove all reserved areas in this address space
3930 	addressSpace->UnreserveAllAddressRanges(0);
3931 
3932 	// delete all the areas in this address space
3933 	while (VMArea* area = addressSpace->FirstArea()) {
3934 		ASSERT(!area->IsWired());
3935 		delete_area(addressSpace, area, deletingAddressSpace);
3936 	}
3937 
3938 	addressSpace->WriteUnlock();
3939 }
3940 
3941 
3942 static area_id
3943 vm_area_for(addr_t address, bool kernel)
3944 {
3945 	team_id team;
3946 	if (IS_USER_ADDRESS(address)) {
3947 		// we try the user team address space, if any
3948 		team = VMAddressSpace::CurrentID();
3949 		if (team < 0)
3950 			return team;
3951 	} else
3952 		team = VMAddressSpace::KernelID();
3953 
3954 	AddressSpaceReadLocker locker(team);
3955 	if (!locker.IsLocked())
3956 		return B_BAD_TEAM_ID;
3957 
3958 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3959 	if (area != NULL) {
3960 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0
3961 				&& (area->protection & B_KERNEL_AREA) != 0)
3962 			return B_ERROR;
3963 
3964 		return area->id;
3965 	}
3966 
3967 	return B_ERROR;
3968 }
3969 
3970 
3971 /*!	Frees physical pages that were used during the boot process.
3972 	\a end is inclusive.
3973 */
3974 static void
3975 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3976 {
3977 	// free all physical pages in the specified range
3978 
3979 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3980 		phys_addr_t physicalAddress;
3981 		uint32 flags;
3982 
3983 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3984 			&& (flags & PAGE_PRESENT) != 0) {
3985 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3986 			if (page != NULL && page->State() != PAGE_STATE_FREE
3987 					&& page->State() != PAGE_STATE_CLEAR
3988 					&& page->State() != PAGE_STATE_UNUSED) {
3989 				DEBUG_PAGE_ACCESS_START(page);
3990 				vm_page_set_state(page, PAGE_STATE_FREE);
3991 			}
3992 		}
3993 	}
3994 
3995 	// unmap the memory
3996 	map->Unmap(start, end);
3997 }
3998 
3999 
4000 void
4001 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
4002 {
4003 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
4004 	addr_t end = start + (size - 1);
4005 	addr_t lastEnd = start;
4006 
4007 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
4008 		(void*)start, (void*)end));
4009 
4010 	// The areas are sorted in virtual address space order, so
4011 	// we just have to find the holes between them that fall
4012 	// into the area we should dispose
4013 
4014 	map->Lock();
4015 
4016 	for (VMAddressSpace::AreaIterator it
4017 				= VMAddressSpace::Kernel()->GetAreaIterator();
4018 			VMArea* area = it.Next();) {
4019 		addr_t areaStart = area->Base();
4020 		addr_t areaEnd = areaStart + (area->Size() - 1);
4021 
4022 		if (areaEnd < start)
4023 			continue;
4024 
4025 		if (areaStart > end) {
4026 			// we are done, the area is already beyond of what we have to free
4027 			break;
4028 		}
4029 
4030 		if (areaStart > lastEnd) {
4031 			// this is something we can free
4032 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
4033 				(void*)areaStart));
4034 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
4035 		}
4036 
4037 		if (areaEnd >= end) {
4038 			lastEnd = areaEnd;
4039 				// no +1 to prevent potential overflow
4040 			break;
4041 		}
4042 
4043 		lastEnd = areaEnd + 1;
4044 	}
4045 
4046 	if (lastEnd < end) {
4047 		// we can also get rid of some space at the end of the area
4048 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
4049 			(void*)end));
4050 		unmap_and_free_physical_pages(map, lastEnd, end);
4051 	}
4052 
4053 	map->Unlock();
4054 }
4055 
4056 
4057 static void
4058 create_preloaded_image_areas(struct preloaded_image* _image)
4059 {
4060 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
4061 	char name[B_OS_NAME_LENGTH];
4062 	void* address;
4063 	int32 length;
4064 
4065 	// use file name to create a good area name
4066 	char* fileName = strrchr(image->name, '/');
4067 	if (fileName == NULL)
4068 		fileName = image->name;
4069 	else
4070 		fileName++;
4071 
4072 	length = strlen(fileName);
4073 	// make sure there is enough space for the suffix
4074 	if (length > 25)
4075 		length = 25;
4076 
4077 	memcpy(name, fileName, length);
4078 	strcpy(name + length, "_text");
4079 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
4080 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4081 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
4082 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4083 		// this will later be remapped read-only/executable by the
4084 		// ELF initialization code
4085 
4086 	strcpy(name + length, "_data");
4087 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
4088 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4089 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
4090 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4091 }
4092 
4093 
4094 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
4095 	Any boot loader resources contained in that arguments must not be accessed
4096 	anymore past this point.
4097 */
4098 void
4099 vm_free_kernel_args(kernel_args* args)
4100 {
4101 	uint32 i;
4102 
4103 	TRACE(("vm_free_kernel_args()\n"));
4104 
4105 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
4106 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
4107 		if (area >= B_OK)
4108 			delete_area(area);
4109 	}
4110 }
4111 
4112 
4113 static void
4114 allocate_kernel_args(kernel_args* args)
4115 {
4116 	TRACE(("allocate_kernel_args()\n"));
4117 
4118 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
4119 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
4120 
4121 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
4122 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
4123 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4124 	}
4125 }
4126 
4127 
4128 static void
4129 unreserve_boot_loader_ranges(kernel_args* args)
4130 {
4131 	TRACE(("unreserve_boot_loader_ranges()\n"));
4132 
4133 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4134 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
4135 			(void*)(addr_t)args->virtual_allocated_range[i].start,
4136 			args->virtual_allocated_range[i].size);
4137 	}
4138 }
4139 
4140 
4141 static void
4142 reserve_boot_loader_ranges(kernel_args* args)
4143 {
4144 	TRACE(("reserve_boot_loader_ranges()\n"));
4145 
4146 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4147 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
4148 
4149 		// If the address is no kernel address, we just skip it. The
4150 		// architecture specific code has to deal with it.
4151 		if (!IS_KERNEL_ADDRESS(address)) {
4152 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
4153 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
4154 			continue;
4155 		}
4156 
4157 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
4158 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
4159 		if (status < B_OK)
4160 			panic("could not reserve boot loader ranges\n");
4161 	}
4162 }
4163 
4164 
4165 static addr_t
4166 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
4167 {
4168 	size = PAGE_ALIGN(size);
4169 
4170 	// find a slot in the virtual allocation addr range
4171 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
4172 		// check to see if the space between this one and the last is big enough
4173 		addr_t rangeStart = args->virtual_allocated_range[i].start;
4174 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
4175 			+ args->virtual_allocated_range[i - 1].size;
4176 
4177 		addr_t base = alignment > 0
4178 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
4179 
4180 		if (base >= KERNEL_BASE && base < rangeStart
4181 				&& rangeStart - base >= size) {
4182 			args->virtual_allocated_range[i - 1].size
4183 				+= base + size - previousRangeEnd;
4184 			return base;
4185 		}
4186 	}
4187 
4188 	// we hadn't found one between allocation ranges. this is ok.
4189 	// see if there's a gap after the last one
4190 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
4191 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
4192 		+ args->virtual_allocated_range[lastEntryIndex].size;
4193 	addr_t base = alignment > 0
4194 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
4195 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
4196 		args->virtual_allocated_range[lastEntryIndex].size
4197 			+= base + size - lastRangeEnd;
4198 		return base;
4199 	}
4200 
4201 	// see if there's a gap before the first one
4202 	addr_t rangeStart = args->virtual_allocated_range[0].start;
4203 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
4204 		base = rangeStart - size;
4205 		if (alignment > 0)
4206 			base = ROUNDDOWN(base, alignment);
4207 
4208 		if (base >= KERNEL_BASE) {
4209 			args->virtual_allocated_range[0].start = base;
4210 			args->virtual_allocated_range[0].size += rangeStart - base;
4211 			return base;
4212 		}
4213 	}
4214 
4215 	return 0;
4216 }
4217 
4218 
4219 static bool
4220 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
4221 {
4222 	// TODO: horrible brute-force method of determining if the page can be
4223 	// allocated
4224 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4225 		if (address >= args->physical_memory_range[i].start
4226 			&& address < args->physical_memory_range[i].start
4227 				+ args->physical_memory_range[i].size)
4228 			return true;
4229 	}
4230 	return false;
4231 }
4232 
4233 
4234 page_num_t
4235 vm_allocate_early_physical_page(kernel_args* args)
4236 {
4237 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4238 		phys_addr_t nextPage;
4239 
4240 		nextPage = args->physical_allocated_range[i].start
4241 			+ args->physical_allocated_range[i].size;
4242 		// see if the page after the next allocated paddr run can be allocated
4243 		if (i + 1 < args->num_physical_allocated_ranges
4244 			&& args->physical_allocated_range[i + 1].size != 0) {
4245 			// see if the next page will collide with the next allocated range
4246 			if (nextPage >= args->physical_allocated_range[i+1].start)
4247 				continue;
4248 		}
4249 		// see if the next physical page fits in the memory block
4250 		if (is_page_in_physical_memory_range(args, nextPage)) {
4251 			// we got one!
4252 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4253 			return nextPage / B_PAGE_SIZE;
4254 		}
4255 	}
4256 
4257 	// Expanding upwards didn't work, try going downwards.
4258 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4259 		phys_addr_t nextPage;
4260 
4261 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
4262 		// see if the page after the prev allocated paddr run can be allocated
4263 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
4264 			// see if the next page will collide with the next allocated range
4265 			if (nextPage < args->physical_allocated_range[i-1].start
4266 				+ args->physical_allocated_range[i-1].size)
4267 				continue;
4268 		}
4269 		// see if the next physical page fits in the memory block
4270 		if (is_page_in_physical_memory_range(args, nextPage)) {
4271 			// we got one!
4272 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
4273 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4274 			return nextPage / B_PAGE_SIZE;
4275 		}
4276 	}
4277 
4278 	return 0;
4279 		// could not allocate a block
4280 }
4281 
4282 
4283 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4284 	allocate some pages before the VM is completely up.
4285 */
4286 addr_t
4287 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4288 	uint32 attributes, addr_t alignment)
4289 {
4290 	if (physicalSize > virtualSize)
4291 		physicalSize = virtualSize;
4292 
4293 	// find the vaddr to allocate at
4294 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
4295 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
4296 	if (virtualBase == 0) {
4297 		panic("vm_allocate_early: could not allocate virtual address\n");
4298 		return 0;
4299 	}
4300 
4301 	// map the pages
4302 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4303 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
4304 		if (physicalAddress == 0)
4305 			panic("error allocating early page!\n");
4306 
4307 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4308 
4309 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
4310 			physicalAddress * B_PAGE_SIZE, attributes,
4311 			&vm_allocate_early_physical_page);
4312 	}
4313 
4314 	return virtualBase;
4315 }
4316 
4317 
4318 /*!	The main entrance point to initialize the VM. */
4319 status_t
4320 vm_init(kernel_args* args)
4321 {
4322 	struct preloaded_image* image;
4323 	void* address;
4324 	status_t err = 0;
4325 	uint32 i;
4326 
4327 	TRACE(("vm_init: entry\n"));
4328 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
4329 	err = arch_vm_init(args);
4330 
4331 	// initialize some globals
4332 	vm_page_init_num_pages(args);
4333 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4334 
4335 	slab_init(args);
4336 
4337 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4338 	off_t heapSize = INITIAL_HEAP_SIZE;
4339 	// try to accomodate low memory systems
4340 	while (heapSize > sAvailableMemory / 8)
4341 		heapSize /= 2;
4342 	if (heapSize < 1024 * 1024)
4343 		panic("vm_init: go buy some RAM please.");
4344 
4345 	// map in the new heap and initialize it
4346 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4347 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
4348 	TRACE(("heap at 0x%lx\n", heapBase));
4349 	heap_init(heapBase, heapSize);
4350 #endif
4351 
4352 	// initialize the free page list and physical page mapper
4353 	vm_page_init(args);
4354 
4355 	// initialize the cache allocators
4356 	vm_cache_init(args);
4357 
4358 	{
4359 		status_t error = VMAreas::Init();
4360 		if (error != B_OK)
4361 			panic("vm_init: error initializing areas map\n");
4362 	}
4363 
4364 	VMAddressSpace::Init();
4365 	reserve_boot_loader_ranges(args);
4366 
4367 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4368 	heap_init_post_area();
4369 #endif
4370 
4371 	// Do any further initialization that the architecture dependant layers may
4372 	// need now
4373 	arch_vm_translation_map_init_post_area(args);
4374 	arch_vm_init_post_area(args);
4375 	vm_page_init_post_area(args);
4376 	slab_init_post_area();
4377 
4378 	// allocate areas to represent stuff that already exists
4379 
4380 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4381 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4382 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4383 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4384 #endif
4385 
4386 	allocate_kernel_args(args);
4387 
4388 	create_preloaded_image_areas(args->kernel_image);
4389 
4390 	// allocate areas for preloaded images
4391 	for (image = args->preloaded_images; image != NULL; image = image->next)
4392 		create_preloaded_image_areas(image);
4393 
4394 	// allocate kernel stacks
4395 	for (i = 0; i < args->num_cpus; i++) {
4396 		char name[64];
4397 
4398 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4399 		address = (void*)args->cpu_kstack[i].start;
4400 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4401 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4402 	}
4403 
4404 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4405 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4406 
4407 #if PARANOID_KERNEL_MALLOC
4408 	vm_block_address_range("uninitialized heap memory",
4409 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4410 #endif
4411 #if PARANOID_KERNEL_FREE
4412 	vm_block_address_range("freed heap memory",
4413 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4414 #endif
4415 
4416 	// create the object cache for the page mappings
4417 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4418 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4419 		NULL, NULL);
4420 	if (gPageMappingsObjectCache == NULL)
4421 		panic("failed to create page mappings object cache");
4422 
4423 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4424 
4425 #if DEBUG_CACHE_LIST
4426 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4427 		virtual_address_restrictions virtualRestrictions = {};
4428 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4429 		physical_address_restrictions physicalRestrictions = {};
4430 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4431 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4432 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4433 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4434 			&physicalRestrictions, (void**)&sCacheInfoTable);
4435 	}
4436 #endif	// DEBUG_CACHE_LIST
4437 
4438 	// add some debugger commands
4439 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4440 	add_debugger_command("area", &dump_area,
4441 		"Dump info about a particular area");
4442 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4443 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4444 #if DEBUG_CACHE_LIST
4445 	if (sCacheInfoTable != NULL) {
4446 		add_debugger_command_etc("caches", &dump_caches,
4447 			"List all VMCache trees",
4448 			"[ \"-c\" ]\n"
4449 			"All cache trees are listed sorted in decreasing order by number "
4450 				"of\n"
4451 			"used pages or, if \"-c\" is specified, by size of committed "
4452 				"memory.\n",
4453 			0);
4454 	}
4455 #endif
4456 	add_debugger_command("avail", &dump_available_memory,
4457 		"Dump available memory");
4458 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4459 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4460 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4461 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4462 	add_debugger_command("string", &display_mem, "dump strings");
4463 
4464 	add_debugger_command_etc("mapping", &dump_mapping_info,
4465 		"Print address mapping information",
4466 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4467 		"Prints low-level page mapping information for a given address. If\n"
4468 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4469 		"address that is looked up in the translation map of the current\n"
4470 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4471 		"\"-r\" is specified, <address> is a physical address that is\n"
4472 		"searched in the translation map of all teams, respectively the team\n"
4473 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4474 		"<address> is the address of a vm_page structure. The behavior is\n"
4475 		"equivalent to specifying \"-r\" with the physical address of that\n"
4476 		"page.\n",
4477 		0);
4478 
4479 	TRACE(("vm_init: exit\n"));
4480 
4481 	vm_cache_init_post_heap();
4482 
4483 	return err;
4484 }
4485 
4486 
4487 status_t
4488 vm_init_post_sem(kernel_args* args)
4489 {
4490 	// This frees all unused boot loader resources and makes its space available
4491 	// again
4492 	arch_vm_init_end(args);
4493 	unreserve_boot_loader_ranges(args);
4494 
4495 	// fill in all of the semaphores that were not allocated before
4496 	// since we're still single threaded and only the kernel address space
4497 	// exists, it isn't that hard to find all of the ones we need to create
4498 
4499 	arch_vm_translation_map_init_post_sem(args);
4500 
4501 	slab_init_post_sem();
4502 
4503 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4504 	heap_init_post_sem();
4505 #endif
4506 
4507 	return B_OK;
4508 }
4509 
4510 
4511 status_t
4512 vm_init_post_thread(kernel_args* args)
4513 {
4514 	vm_page_init_post_thread(args);
4515 	slab_init_post_thread();
4516 	return heap_init_post_thread();
4517 }
4518 
4519 
4520 status_t
4521 vm_init_post_modules(kernel_args* args)
4522 {
4523 	return arch_vm_init_post_modules(args);
4524 }
4525 
4526 
4527 void
4528 permit_page_faults(void)
4529 {
4530 	Thread* thread = thread_get_current_thread();
4531 	if (thread != NULL)
4532 		atomic_add(&thread->page_faults_allowed, 1);
4533 }
4534 
4535 
4536 void
4537 forbid_page_faults(void)
4538 {
4539 	Thread* thread = thread_get_current_thread();
4540 	if (thread != NULL)
4541 		atomic_add(&thread->page_faults_allowed, -1);
4542 }
4543 
4544 
4545 status_t
4546 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4547 	bool isUser, addr_t* newIP)
4548 {
4549 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4550 		faultAddress));
4551 
4552 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4553 
4554 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4555 	VMAddressSpace* addressSpace = NULL;
4556 
4557 	status_t status = B_OK;
4558 	*newIP = 0;
4559 	atomic_add((int32*)&sPageFaults, 1);
4560 
4561 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4562 		addressSpace = VMAddressSpace::GetKernel();
4563 	} else if (IS_USER_ADDRESS(pageAddress)) {
4564 		addressSpace = VMAddressSpace::GetCurrent();
4565 		if (addressSpace == NULL) {
4566 			if (!isUser) {
4567 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4568 					"memory!\n");
4569 				status = B_BAD_ADDRESS;
4570 				TPF(PageFaultError(-1,
4571 					VMPageFaultTracing
4572 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4573 			} else {
4574 				// XXX weird state.
4575 				panic("vm_page_fault: non kernel thread accessing user memory "
4576 					"that doesn't exist!\n");
4577 				status = B_BAD_ADDRESS;
4578 			}
4579 		}
4580 	} else {
4581 		// the hit was probably in the 64k DMZ between kernel and user space
4582 		// this keeps a user space thread from passing a buffer that crosses
4583 		// into kernel space
4584 		status = B_BAD_ADDRESS;
4585 		TPF(PageFaultError(-1,
4586 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4587 	}
4588 
4589 	if (status == B_OK) {
4590 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4591 			isUser, NULL);
4592 	}
4593 
4594 	if (status < B_OK) {
4595 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4596 			"0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n",
4597 			strerror(status), address, faultAddress, isWrite, isUser, isExecute,
4598 			thread_get_current_thread_id());
4599 		if (!isUser) {
4600 			Thread* thread = thread_get_current_thread();
4601 			if (thread != NULL && thread->fault_handler != 0) {
4602 				// this will cause the arch dependant page fault handler to
4603 				// modify the IP on the interrupt frame or whatever to return
4604 				// to this address
4605 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4606 			} else {
4607 				// unhandled page fault in the kernel
4608 				panic("vm_page_fault: unhandled page fault in kernel space at "
4609 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4610 			}
4611 		} else {
4612 			Thread* thread = thread_get_current_thread();
4613 
4614 #ifdef TRACE_FAULTS
4615 			VMArea* area = NULL;
4616 			if (addressSpace != NULL) {
4617 				addressSpace->ReadLock();
4618 				area = addressSpace->LookupArea(faultAddress);
4619 			}
4620 
4621 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4622 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4623 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4624 				thread->team->Name(), thread->team->id,
4625 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4626 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4627 					area->Base() : 0x0));
4628 
4629 			if (addressSpace != NULL)
4630 				addressSpace->ReadUnlock();
4631 #endif
4632 
4633 			// If the thread has a signal handler for SIGSEGV, we simply
4634 			// send it the signal. Otherwise we notify the user debugger
4635 			// first.
4636 			struct sigaction action;
4637 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4638 					&& action.sa_handler != SIG_DFL
4639 					&& action.sa_handler != SIG_IGN)
4640 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4641 					SIGSEGV)) {
4642 				Signal signal(SIGSEGV,
4643 					status == B_PERMISSION_DENIED
4644 						? SEGV_ACCERR : SEGV_MAPERR,
4645 					EFAULT, thread->team->id);
4646 				signal.SetAddress((void*)address);
4647 				send_signal_to_thread(thread, signal, 0);
4648 			}
4649 		}
4650 	}
4651 
4652 	if (addressSpace != NULL)
4653 		addressSpace->Put();
4654 
4655 	return B_HANDLED_INTERRUPT;
4656 }
4657 
4658 
4659 struct PageFaultContext {
4660 	AddressSpaceReadLocker	addressSpaceLocker;
4661 	VMCacheChainLocker		cacheChainLocker;
4662 
4663 	VMTranslationMap*		map;
4664 	VMCache*				topCache;
4665 	off_t					cacheOffset;
4666 	vm_page_reservation		reservation;
4667 	bool					isWrite;
4668 
4669 	// return values
4670 	vm_page*				page;
4671 	bool					restart;
4672 	bool					pageAllocated;
4673 
4674 
4675 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4676 		:
4677 		addressSpaceLocker(addressSpace, true),
4678 		map(addressSpace->TranslationMap()),
4679 		isWrite(isWrite)
4680 	{
4681 	}
4682 
4683 	~PageFaultContext()
4684 	{
4685 		UnlockAll();
4686 		vm_page_unreserve_pages(&reservation);
4687 	}
4688 
4689 	void Prepare(VMCache* topCache, off_t cacheOffset)
4690 	{
4691 		this->topCache = topCache;
4692 		this->cacheOffset = cacheOffset;
4693 		page = NULL;
4694 		restart = false;
4695 		pageAllocated = false;
4696 
4697 		cacheChainLocker.SetTo(topCache);
4698 	}
4699 
4700 	void UnlockAll(VMCache* exceptCache = NULL)
4701 	{
4702 		topCache = NULL;
4703 		addressSpaceLocker.Unlock();
4704 		cacheChainLocker.Unlock(exceptCache);
4705 	}
4706 };
4707 
4708 
4709 /*!	Gets the page that should be mapped into the area.
4710 	Returns an error code other than \c B_OK, if the page couldn't be found or
4711 	paged in. The locking state of the address space and the caches is undefined
4712 	in that case.
4713 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4714 	had to unlock the address space and all caches and is supposed to be called
4715 	again.
4716 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4717 	found. It is returned in \c context.page. The address space will still be
4718 	locked as well as all caches starting from the top cache to at least the
4719 	cache the page lives in.
4720 */
4721 static status_t
4722 fault_get_page(PageFaultContext& context)
4723 {
4724 	VMCache* cache = context.topCache;
4725 	VMCache* lastCache = NULL;
4726 	vm_page* page = NULL;
4727 
4728 	while (cache != NULL) {
4729 		// We already hold the lock of the cache at this point.
4730 
4731 		lastCache = cache;
4732 
4733 		page = cache->LookupPage(context.cacheOffset);
4734 		if (page != NULL && page->busy) {
4735 			// page must be busy -- wait for it to become unbusy
4736 			context.UnlockAll(cache);
4737 			cache->ReleaseRefLocked();
4738 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4739 
4740 			// restart the whole process
4741 			context.restart = true;
4742 			return B_OK;
4743 		}
4744 
4745 		if (page != NULL)
4746 			break;
4747 
4748 		// The current cache does not contain the page we're looking for.
4749 
4750 		// see if the backing store has it
4751 		if (cache->HasPage(context.cacheOffset)) {
4752 			// insert a fresh page and mark it busy -- we're going to read it in
4753 			page = vm_page_allocate_page(&context.reservation,
4754 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4755 			cache->InsertPage(page, context.cacheOffset);
4756 
4757 			// We need to unlock all caches and the address space while reading
4758 			// the page in. Keep a reference to the cache around.
4759 			cache->AcquireRefLocked();
4760 			context.UnlockAll();
4761 
4762 			// read the page in
4763 			generic_io_vec vec;
4764 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4765 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4766 
4767 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4768 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4769 
4770 			cache->Lock();
4771 
4772 			if (status < B_OK) {
4773 				// on error remove and free the page
4774 				dprintf("reading page from cache %p returned: %s!\n",
4775 					cache, strerror(status));
4776 
4777 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4778 				cache->RemovePage(page);
4779 				vm_page_set_state(page, PAGE_STATE_FREE);
4780 
4781 				cache->ReleaseRefAndUnlock();
4782 				return status;
4783 			}
4784 
4785 			// mark the page unbusy again
4786 			cache->MarkPageUnbusy(page);
4787 
4788 			DEBUG_PAGE_ACCESS_END(page);
4789 
4790 			// Since we needed to unlock everything temporarily, the area
4791 			// situation might have changed. So we need to restart the whole
4792 			// process.
4793 			cache->ReleaseRefAndUnlock();
4794 			context.restart = true;
4795 			return B_OK;
4796 		}
4797 
4798 		cache = context.cacheChainLocker.LockSourceCache();
4799 	}
4800 
4801 	if (page == NULL) {
4802 		// There was no adequate page, determine the cache for a clean one.
4803 		// Read-only pages come in the deepest cache, only the top most cache
4804 		// may have direct write access.
4805 		cache = context.isWrite ? context.topCache : lastCache;
4806 
4807 		// allocate a clean page
4808 		page = vm_page_allocate_page(&context.reservation,
4809 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4810 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4811 			page->physical_page_number));
4812 
4813 		// insert the new page into our cache
4814 		cache->InsertPage(page, context.cacheOffset);
4815 		context.pageAllocated = true;
4816 	} else if (page->Cache() != context.topCache && context.isWrite) {
4817 		// We have a page that has the data we want, but in the wrong cache
4818 		// object so we need to copy it and stick it into the top cache.
4819 		vm_page* sourcePage = page;
4820 
4821 		// TODO: If memory is low, it might be a good idea to steal the page
4822 		// from our source cache -- if possible, that is.
4823 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4824 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4825 
4826 		// To not needlessly kill concurrency we unlock all caches but the top
4827 		// one while copying the page. Lacking another mechanism to ensure that
4828 		// the source page doesn't disappear, we mark it busy.
4829 		sourcePage->busy = true;
4830 		context.cacheChainLocker.UnlockKeepRefs(true);
4831 
4832 		// copy the page
4833 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4834 			sourcePage->physical_page_number * B_PAGE_SIZE);
4835 
4836 		context.cacheChainLocker.RelockCaches(true);
4837 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4838 
4839 		// insert the new page into our cache
4840 		context.topCache->InsertPage(page, context.cacheOffset);
4841 		context.pageAllocated = true;
4842 	} else
4843 		DEBUG_PAGE_ACCESS_START(page);
4844 
4845 	context.page = page;
4846 	return B_OK;
4847 }
4848 
4849 
4850 /*!	Makes sure the address in the given address space is mapped.
4851 
4852 	\param addressSpace The address space.
4853 	\param originalAddress The address. Doesn't need to be page aligned.
4854 	\param isWrite If \c true the address shall be write-accessible.
4855 	\param isUser If \c true the access is requested by a userland team.
4856 	\param wirePage On success, if non \c NULL, the wired count of the page
4857 		mapped at the given address is incremented and the page is returned
4858 		via this parameter.
4859 	\return \c B_OK on success, another error code otherwise.
4860 */
4861 static status_t
4862 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4863 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4864 {
4865 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4866 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4867 		originalAddress, isWrite, isUser));
4868 
4869 	PageFaultContext context(addressSpace, isWrite);
4870 
4871 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4872 	status_t status = B_OK;
4873 
4874 	addressSpace->IncrementFaultCount();
4875 
4876 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4877 	// the pages upfront makes sure we don't have any cache locked, so that the
4878 	// page daemon/thief can do their job without problems.
4879 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4880 		originalAddress);
4881 	context.addressSpaceLocker.Unlock();
4882 	vm_page_reserve_pages(&context.reservation, reservePages,
4883 		addressSpace == VMAddressSpace::Kernel()
4884 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4885 
4886 	while (true) {
4887 		context.addressSpaceLocker.Lock();
4888 
4889 		// get the area the fault was in
4890 		VMArea* area = addressSpace->LookupArea(address);
4891 		if (area == NULL) {
4892 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4893 				"space\n", originalAddress);
4894 			TPF(PageFaultError(-1,
4895 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4896 			status = B_BAD_ADDRESS;
4897 			break;
4898 		}
4899 
4900 		// check permissions
4901 		uint32 protection = get_area_page_protection(area, address);
4902 		if (isUser && (protection & B_USER_PROTECTION) == 0
4903 				&& (area->protection & B_KERNEL_AREA) != 0) {
4904 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4905 				area->id, (void*)originalAddress);
4906 			TPF(PageFaultError(area->id,
4907 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4908 			status = B_PERMISSION_DENIED;
4909 			break;
4910 		}
4911 		if (isWrite && (protection
4912 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4913 			dprintf("write access attempted on write-protected area 0x%"
4914 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4915 			TPF(PageFaultError(area->id,
4916 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4917 			status = B_PERMISSION_DENIED;
4918 			break;
4919 		} else if (isExecute && (protection
4920 				& (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4921 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4922 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4923 			TPF(PageFaultError(area->id,
4924 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4925 			status = B_PERMISSION_DENIED;
4926 			break;
4927 		} else if (!isWrite && !isExecute && (protection
4928 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4929 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4930 				" at %p\n", area->id, (void*)originalAddress);
4931 			TPF(PageFaultError(area->id,
4932 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4933 			status = B_PERMISSION_DENIED;
4934 			break;
4935 		}
4936 
4937 		// We have the area, it was a valid access, so let's try to resolve the
4938 		// page fault now.
4939 		// At first, the top most cache from the area is investigated.
4940 
4941 		context.Prepare(vm_area_get_locked_cache(area),
4942 			address - area->Base() + area->cache_offset);
4943 
4944 		// See if this cache has a fault handler -- this will do all the work
4945 		// for us.
4946 		{
4947 			// Note, since the page fault is resolved with interrupts enabled,
4948 			// the fault handler could be called more than once for the same
4949 			// reason -- the store must take this into account.
4950 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4951 			if (status != B_BAD_HANDLER)
4952 				break;
4953 		}
4954 
4955 		// The top most cache has no fault handler, so let's see if the cache or
4956 		// its sources already have the page we're searching for (we're going
4957 		// from top to bottom).
4958 		status = fault_get_page(context);
4959 		if (status != B_OK) {
4960 			TPF(PageFaultError(area->id, status));
4961 			break;
4962 		}
4963 
4964 		if (context.restart)
4965 			continue;
4966 
4967 		// All went fine, all there is left to do is to map the page into the
4968 		// address space.
4969 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4970 			context.page));
4971 
4972 		// If the page doesn't reside in the area's cache, we need to make sure
4973 		// it's mapped in read-only, so that we cannot overwrite someone else's
4974 		// data (copy-on-write)
4975 		uint32 newProtection = protection;
4976 		if (context.page->Cache() != context.topCache && !isWrite)
4977 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4978 
4979 		bool unmapPage = false;
4980 		bool mapPage = true;
4981 
4982 		// check whether there's already a page mapped at the address
4983 		context.map->Lock();
4984 
4985 		phys_addr_t physicalAddress;
4986 		uint32 flags;
4987 		vm_page* mappedPage = NULL;
4988 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4989 			&& (flags & PAGE_PRESENT) != 0
4990 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4991 				!= NULL) {
4992 			// Yep there's already a page. If it's ours, we can simply adjust
4993 			// its protection. Otherwise we have to unmap it.
4994 			if (mappedPage == context.page) {
4995 				context.map->ProtectPage(area, address, newProtection);
4996 					// Note: We assume that ProtectPage() is atomic (i.e.
4997 					// the page isn't temporarily unmapped), otherwise we'd have
4998 					// to make sure it isn't wired.
4999 				mapPage = false;
5000 			} else
5001 				unmapPage = true;
5002 		}
5003 
5004 		context.map->Unlock();
5005 
5006 		if (unmapPage) {
5007 			// If the page is wired, we can't unmap it. Wait until it is unwired
5008 			// again and restart. Note that the page cannot be wired for
5009 			// writing, since it it isn't in the topmost cache. So we can safely
5010 			// ignore ranges wired for writing (our own and other concurrent
5011 			// wiring attempts in progress) and in fact have to do that to avoid
5012 			// a deadlock.
5013 			VMAreaUnwiredWaiter waiter;
5014 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
5015 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
5016 				// unlock everything and wait
5017 				if (context.pageAllocated) {
5018 					// ... but since we allocated a page and inserted it into
5019 					// the top cache, remove and free it first. Otherwise we'd
5020 					// have a page from a lower cache mapped while an upper
5021 					// cache has a page that would shadow it.
5022 					context.topCache->RemovePage(context.page);
5023 					vm_page_free_etc(context.topCache, context.page,
5024 						&context.reservation);
5025 				} else
5026 					DEBUG_PAGE_ACCESS_END(context.page);
5027 
5028 				context.UnlockAll();
5029 				waiter.waitEntry.Wait();
5030 				continue;
5031 			}
5032 
5033 			// Note: The mapped page is a page of a lower cache. We are
5034 			// guaranteed to have that cached locked, our new page is a copy of
5035 			// that page, and the page is not busy. The logic for that guarantee
5036 			// is as follows: Since the page is mapped, it must live in the top
5037 			// cache (ruled out above) or any of its lower caches, and there is
5038 			// (was before the new page was inserted) no other page in any
5039 			// cache between the top cache and the page's cache (otherwise that
5040 			// would be mapped instead). That in turn means that our algorithm
5041 			// must have found it and therefore it cannot be busy either.
5042 			DEBUG_PAGE_ACCESS_START(mappedPage);
5043 			unmap_page(area, address);
5044 			DEBUG_PAGE_ACCESS_END(mappedPage);
5045 		}
5046 
5047 		if (mapPage) {
5048 			if (map_page(area, context.page, address, newProtection,
5049 					&context.reservation) != B_OK) {
5050 				// Mapping can only fail, when the page mapping object couldn't
5051 				// be allocated. Save for the missing mapping everything is
5052 				// fine, though. If this was a regular page fault, we'll simply
5053 				// leave and probably fault again. To make sure we'll have more
5054 				// luck then, we ensure that the minimum object reserve is
5055 				// available.
5056 				DEBUG_PAGE_ACCESS_END(context.page);
5057 
5058 				context.UnlockAll();
5059 
5060 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
5061 						!= B_OK) {
5062 					// Apparently the situation is serious. Let's get ourselves
5063 					// killed.
5064 					status = B_NO_MEMORY;
5065 				} else if (wirePage != NULL) {
5066 					// The caller expects us to wire the page. Since
5067 					// object_cache_reserve() succeeded, we should now be able
5068 					// to allocate a mapping structure. Restart.
5069 					continue;
5070 				}
5071 
5072 				break;
5073 			}
5074 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
5075 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
5076 
5077 		// also wire the page, if requested
5078 		if (wirePage != NULL && status == B_OK) {
5079 			increment_page_wired_count(context.page);
5080 			*wirePage = context.page;
5081 		}
5082 
5083 		DEBUG_PAGE_ACCESS_END(context.page);
5084 
5085 		break;
5086 	}
5087 
5088 	return status;
5089 }
5090 
5091 
5092 status_t
5093 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
5094 {
5095 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
5096 }
5097 
5098 status_t
5099 vm_put_physical_page(addr_t vaddr, void* handle)
5100 {
5101 	return sPhysicalPageMapper->PutPage(vaddr, handle);
5102 }
5103 
5104 
5105 status_t
5106 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
5107 	void** _handle)
5108 {
5109 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
5110 }
5111 
5112 status_t
5113 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
5114 {
5115 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
5116 }
5117 
5118 
5119 status_t
5120 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
5121 {
5122 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
5123 }
5124 
5125 status_t
5126 vm_put_physical_page_debug(addr_t vaddr, void* handle)
5127 {
5128 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
5129 }
5130 
5131 
5132 void
5133 vm_get_info(system_info* info)
5134 {
5135 	swap_get_info(info);
5136 
5137 	MutexLocker locker(sAvailableMemoryLock);
5138 	info->needed_memory = sNeededMemory;
5139 	info->free_memory = sAvailableMemory;
5140 }
5141 
5142 
5143 uint32
5144 vm_num_page_faults(void)
5145 {
5146 	return sPageFaults;
5147 }
5148 
5149 
5150 off_t
5151 vm_available_memory(void)
5152 {
5153 	MutexLocker locker(sAvailableMemoryLock);
5154 	return sAvailableMemory;
5155 }
5156 
5157 
5158 off_t
5159 vm_available_not_needed_memory(void)
5160 {
5161 	MutexLocker locker(sAvailableMemoryLock);
5162 	return sAvailableMemory - sNeededMemory;
5163 }
5164 
5165 
5166 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
5167 	debugger.
5168 */
5169 off_t
5170 vm_available_not_needed_memory_debug(void)
5171 {
5172 	return sAvailableMemory - sNeededMemory;
5173 }
5174 
5175 
5176 size_t
5177 vm_kernel_address_space_left(void)
5178 {
5179 	return VMAddressSpace::Kernel()->FreeSpace();
5180 }
5181 
5182 
5183 void
5184 vm_unreserve_memory(size_t amount)
5185 {
5186 	mutex_lock(&sAvailableMemoryLock);
5187 
5188 	sAvailableMemory += amount;
5189 
5190 	mutex_unlock(&sAvailableMemoryLock);
5191 }
5192 
5193 
5194 status_t
5195 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
5196 {
5197 	size_t reserve = kMemoryReserveForPriority[priority];
5198 
5199 	MutexLocker locker(sAvailableMemoryLock);
5200 
5201 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5202 
5203 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
5204 		sAvailableMemory -= amount;
5205 		return B_OK;
5206 	}
5207 
5208 	if (timeout <= 0)
5209 		return B_NO_MEMORY;
5210 
5211 	// turn timeout into an absolute timeout
5212 	timeout += system_time();
5213 
5214 	// loop until we've got the memory or the timeout occurs
5215 	do {
5216 		sNeededMemory += amount;
5217 
5218 		// call the low resource manager
5219 		locker.Unlock();
5220 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5221 			B_ABSOLUTE_TIMEOUT, timeout);
5222 		locker.Lock();
5223 
5224 		sNeededMemory -= amount;
5225 
5226 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
5227 			sAvailableMemory -= amount;
5228 			return B_OK;
5229 		}
5230 	} while (timeout > system_time());
5231 
5232 	return B_NO_MEMORY;
5233 }
5234 
5235 
5236 status_t
5237 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
5238 {
5239 	// NOTE: The caller is responsible for synchronizing calls to this function!
5240 
5241 	AddressSpaceReadLocker locker;
5242 	VMArea* area;
5243 	status_t status = locker.SetFromArea(id, area);
5244 	if (status != B_OK)
5245 		return status;
5246 
5247 	// nothing to do, if the type doesn't change
5248 	uint32 oldType = area->MemoryType();
5249 	if (type == oldType)
5250 		return B_OK;
5251 
5252 	// set the memory type of the area and the mapped pages
5253 	VMTranslationMap* map = area->address_space->TranslationMap();
5254 	map->Lock();
5255 	area->SetMemoryType(type);
5256 	map->ProtectArea(area, area->protection);
5257 	map->Unlock();
5258 
5259 	// set the physical memory type
5260 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
5261 	if (error != B_OK) {
5262 		// reset the memory type of the area and the mapped pages
5263 		map->Lock();
5264 		area->SetMemoryType(oldType);
5265 		map->ProtectArea(area, area->protection);
5266 		map->Unlock();
5267 		return error;
5268 	}
5269 
5270 	return B_OK;
5271 
5272 }
5273 
5274 
5275 /*!	This function enforces some protection properties:
5276 	 - kernel areas must be W^X (after kernel startup)
5277 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
5278 	 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5279 */
5280 static void
5281 fix_protection(uint32* protection)
5282 {
5283 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
5284 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
5285 			|| (*protection & B_WRITE_AREA) != 0)
5286 		&& !gKernelStartup)
5287 		panic("kernel areas cannot be both writable and executable!");
5288 
5289 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5290 		if ((*protection & B_WRITE_AREA) != 0)
5291 			*protection |= B_KERNEL_WRITE_AREA;
5292 		if ((*protection & B_READ_AREA) != 0)
5293 			*protection |= B_KERNEL_READ_AREA;
5294 	}
5295 }
5296 
5297 
5298 static void
5299 fill_area_info(struct VMArea* area, area_info* info, size_t size)
5300 {
5301 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5302 	info->area = area->id;
5303 	info->address = (void*)area->Base();
5304 	info->size = area->Size();
5305 	info->protection = area->protection;
5306 	info->lock = area->wiring;
5307 	info->team = area->address_space->ID();
5308 	info->copy_count = 0;
5309 	info->in_count = 0;
5310 	info->out_count = 0;
5311 		// TODO: retrieve real values here!
5312 
5313 	VMCache* cache = vm_area_get_locked_cache(area);
5314 
5315 	// Note, this is a simplification; the cache could be larger than this area
5316 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5317 
5318 	vm_area_put_locked_cache(cache);
5319 }
5320 
5321 
5322 static status_t
5323 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5324 {
5325 	// is newSize a multiple of B_PAGE_SIZE?
5326 	if (newSize & (B_PAGE_SIZE - 1))
5327 		return B_BAD_VALUE;
5328 
5329 	// lock all affected address spaces and the cache
5330 	VMArea* area;
5331 	VMCache* cache;
5332 
5333 	MultiAddressSpaceLocker locker;
5334 	AreaCacheLocker cacheLocker;
5335 
5336 	status_t status;
5337 	size_t oldSize;
5338 	bool anyKernelArea;
5339 	bool restart;
5340 
5341 	do {
5342 		anyKernelArea = false;
5343 		restart = false;
5344 
5345 		locker.Unset();
5346 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5347 		if (status != B_OK)
5348 			return status;
5349 		cacheLocker.SetTo(cache, true);	// already locked
5350 
5351 		// enforce restrictions
5352 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
5353 				|| (area->protection & B_KERNEL_AREA) != 0)) {
5354 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5355 				"resize kernel area %" B_PRId32 " (%s)\n",
5356 				team_get_current_team_id(), areaID, area->name);
5357 			return B_NOT_ALLOWED;
5358 		}
5359 		// TODO: Enforce all restrictions (team, etc.)!
5360 
5361 		oldSize = area->Size();
5362 		if (newSize == oldSize)
5363 			return B_OK;
5364 
5365 		if (cache->type != CACHE_TYPE_RAM)
5366 			return B_NOT_ALLOWED;
5367 
5368 		if (oldSize < newSize) {
5369 			// We need to check if all areas of this cache can be resized.
5370 			for (VMArea* current = cache->areas; current != NULL;
5371 					current = current->cache_next) {
5372 				if (!current->address_space->CanResizeArea(current, newSize))
5373 					return B_ERROR;
5374 				anyKernelArea
5375 					|= current->address_space == VMAddressSpace::Kernel();
5376 			}
5377 		} else {
5378 			// We're shrinking the areas, so we must make sure the affected
5379 			// ranges are not wired.
5380 			for (VMArea* current = cache->areas; current != NULL;
5381 					current = current->cache_next) {
5382 				anyKernelArea
5383 					|= current->address_space == VMAddressSpace::Kernel();
5384 
5385 				if (wait_if_area_range_is_wired(current,
5386 						current->Base() + newSize, oldSize - newSize, &locker,
5387 						&cacheLocker)) {
5388 					restart = true;
5389 					break;
5390 				}
5391 			}
5392 		}
5393 	} while (restart);
5394 
5395 	// Okay, looks good so far, so let's do it
5396 
5397 	int priority = kernel && anyKernelArea
5398 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5399 	uint32 allocationFlags = kernel && anyKernelArea
5400 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5401 
5402 	if (oldSize < newSize) {
5403 		// Growing the cache can fail, so we do it first.
5404 		status = cache->Resize(cache->virtual_base + newSize, priority);
5405 		if (status != B_OK)
5406 			return status;
5407 	}
5408 
5409 	for (VMArea* current = cache->areas; current != NULL;
5410 			current = current->cache_next) {
5411 		status = current->address_space->ResizeArea(current, newSize,
5412 			allocationFlags);
5413 		if (status != B_OK)
5414 			break;
5415 
5416 		// We also need to unmap all pages beyond the new size, if the area has
5417 		// shrunk
5418 		if (newSize < oldSize) {
5419 			VMCacheChainLocker cacheChainLocker(cache);
5420 			cacheChainLocker.LockAllSourceCaches();
5421 
5422 			unmap_pages(current, current->Base() + newSize,
5423 				oldSize - newSize);
5424 
5425 			cacheChainLocker.Unlock(cache);
5426 		}
5427 	}
5428 
5429 	if (status == B_OK) {
5430 		// Shrink or grow individual page protections if in use.
5431 		if (area->page_protections != NULL) {
5432 			size_t bytes = area_page_protections_size(newSize);
5433 			uint8* newProtections
5434 				= (uint8*)realloc(area->page_protections, bytes);
5435 			if (newProtections == NULL)
5436 				status = B_NO_MEMORY;
5437 			else {
5438 				area->page_protections = newProtections;
5439 
5440 				if (oldSize < newSize) {
5441 					// init the additional page protections to that of the area
5442 					uint32 offset = area_page_protections_size(oldSize);
5443 					uint32 areaProtection = area->protection
5444 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5445 					memset(area->page_protections + offset,
5446 						areaProtection | (areaProtection << 4), bytes - offset);
5447 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5448 						uint8& entry = area->page_protections[offset - 1];
5449 						entry = (entry & 0x0f) | (areaProtection << 4);
5450 					}
5451 				}
5452 			}
5453 		}
5454 	}
5455 
5456 	// shrinking the cache can't fail, so we do it now
5457 	if (status == B_OK && newSize < oldSize)
5458 		status = cache->Resize(cache->virtual_base + newSize, priority);
5459 
5460 	if (status != B_OK) {
5461 		// Something failed -- resize the areas back to their original size.
5462 		// This can fail, too, in which case we're seriously screwed.
5463 		for (VMArea* current = cache->areas; current != NULL;
5464 				current = current->cache_next) {
5465 			if (current->address_space->ResizeArea(current, oldSize,
5466 					allocationFlags) != B_OK) {
5467 				panic("vm_resize_area(): Failed and not being able to restore "
5468 					"original state.");
5469 			}
5470 		}
5471 
5472 		cache->Resize(cache->virtual_base + oldSize, priority);
5473 	}
5474 
5475 	// TODO: we must honour the lock restrictions of this area
5476 	return status;
5477 }
5478 
5479 
5480 status_t
5481 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5482 {
5483 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5484 }
5485 
5486 
5487 status_t
5488 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5489 {
5490 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5491 }
5492 
5493 
5494 status_t
5495 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5496 	bool user)
5497 {
5498 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5499 }
5500 
5501 
5502 void
5503 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5504 {
5505 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5506 }
5507 
5508 
5509 /*!	Copies a range of memory directly from/to a page that might not be mapped
5510 	at the moment.
5511 
5512 	For \a unsafeMemory the current mapping (if any is ignored). The function
5513 	walks through the respective area's cache chain to find the physical page
5514 	and copies from/to it directly.
5515 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5516 	must not cross a page boundary.
5517 
5518 	\param teamID The team ID identifying the address space \a unsafeMemory is
5519 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5520 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5521 		is passed, the address space of the thread returned by
5522 		debug_get_debugged_thread() is used.
5523 	\param unsafeMemory The start of the unsafe memory range to be copied
5524 		from/to.
5525 	\param buffer A safely accessible kernel buffer to be copied from/to.
5526 	\param size The number of bytes to be copied.
5527 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5528 		\a unsafeMemory, the other way around otherwise.
5529 */
5530 status_t
5531 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5532 	size_t size, bool copyToUnsafe)
5533 {
5534 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5535 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5536 		return B_BAD_VALUE;
5537 	}
5538 
5539 	// get the address space for the debugged thread
5540 	VMAddressSpace* addressSpace;
5541 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5542 		addressSpace = VMAddressSpace::Kernel();
5543 	} else if (teamID == B_CURRENT_TEAM) {
5544 		Thread* thread = debug_get_debugged_thread();
5545 		if (thread == NULL || thread->team == NULL)
5546 			return B_BAD_ADDRESS;
5547 
5548 		addressSpace = thread->team->address_space;
5549 	} else
5550 		addressSpace = VMAddressSpace::DebugGet(teamID);
5551 
5552 	if (addressSpace == NULL)
5553 		return B_BAD_ADDRESS;
5554 
5555 	// get the area
5556 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5557 	if (area == NULL)
5558 		return B_BAD_ADDRESS;
5559 
5560 	// search the page
5561 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5562 		+ area->cache_offset;
5563 	VMCache* cache = area->cache;
5564 	vm_page* page = NULL;
5565 	while (cache != NULL) {
5566 		page = cache->DebugLookupPage(cacheOffset);
5567 		if (page != NULL)
5568 			break;
5569 
5570 		// Page not found in this cache -- if it is paged out, we must not try
5571 		// to get it from lower caches.
5572 		if (cache->DebugHasPage(cacheOffset))
5573 			break;
5574 
5575 		cache = cache->source;
5576 	}
5577 
5578 	if (page == NULL)
5579 		return B_UNSUPPORTED;
5580 
5581 	// copy from/to physical memory
5582 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5583 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5584 
5585 	if (copyToUnsafe) {
5586 		if (page->Cache() != area->cache)
5587 			return B_UNSUPPORTED;
5588 
5589 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5590 	}
5591 
5592 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5593 }
5594 
5595 
5596 /** Validate that a memory range is either fully in kernel space, or fully in
5597  *  userspace */
5598 static inline bool
5599 validate_memory_range(const void* addr, size_t size)
5600 {
5601 	addr_t address = (addr_t)addr;
5602 
5603 	// Check for overflows on all addresses.
5604 	if ((address + size) < address)
5605 		return false;
5606 
5607 	// Validate that the address range does not cross the kernel/user boundary.
5608 	return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1);
5609 }
5610 
5611 
5612 //	#pragma mark - kernel public API
5613 
5614 
5615 status_t
5616 user_memcpy(void* to, const void* from, size_t size)
5617 {
5618 	if (!validate_memory_range(to, size) || !validate_memory_range(from, size))
5619 		return B_BAD_ADDRESS;
5620 
5621 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5622 		return B_BAD_ADDRESS;
5623 
5624 	return B_OK;
5625 }
5626 
5627 
5628 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5629 	the string in \a to, NULL-terminating the result.
5630 
5631 	\param to Pointer to the destination C-string.
5632 	\param from Pointer to the source C-string.
5633 	\param size Size in bytes of the string buffer pointed to by \a to.
5634 
5635 	\return strlen(\a from).
5636 */
5637 ssize_t
5638 user_strlcpy(char* to, const char* from, size_t size)
5639 {
5640 	if (to == NULL && size != 0)
5641 		return B_BAD_VALUE;
5642 	if (from == NULL)
5643 		return B_BAD_ADDRESS;
5644 
5645 	// Protect the source address from overflows.
5646 	size_t maxSize = size;
5647 	if ((addr_t)from + maxSize < (addr_t)from)
5648 		maxSize -= (addr_t)from + maxSize;
5649 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5650 		maxSize = USER_TOP - (addr_t)from;
5651 
5652 	if (!validate_memory_range(to, maxSize))
5653 		return B_BAD_ADDRESS;
5654 
5655 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5656 	if (result < 0)
5657 		return result;
5658 
5659 	// If we hit the address overflow boundary, fail.
5660 	if ((size_t)result >= maxSize && maxSize < size)
5661 		return B_BAD_ADDRESS;
5662 
5663 	return result;
5664 }
5665 
5666 
5667 status_t
5668 user_memset(void* s, char c, size_t count)
5669 {
5670 	if (!validate_memory_range(s, count))
5671 		return B_BAD_ADDRESS;
5672 
5673 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5674 		return B_BAD_ADDRESS;
5675 
5676 	return B_OK;
5677 }
5678 
5679 
5680 /*!	Wires a single page at the given address.
5681 
5682 	\param team The team whose address space the address belongs to. Supports
5683 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5684 		parameter is ignored.
5685 	\param address address The virtual address to wire down. Does not need to
5686 		be page aligned.
5687 	\param writable If \c true the page shall be writable.
5688 	\param info On success the info is filled in, among other things
5689 		containing the physical address the given virtual one translates to.
5690 	\return \c B_OK, when the page could be wired, another error code otherwise.
5691 */
5692 status_t
5693 vm_wire_page(team_id team, addr_t address, bool writable,
5694 	VMPageWiringInfo* info)
5695 {
5696 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5697 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5698 
5699 	// compute the page protection that is required
5700 	bool isUser = IS_USER_ADDRESS(address);
5701 	uint32 requiredProtection = PAGE_PRESENT
5702 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5703 	if (writable)
5704 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5705 
5706 	// get and read lock the address space
5707 	VMAddressSpace* addressSpace = NULL;
5708 	if (isUser) {
5709 		if (team == B_CURRENT_TEAM)
5710 			addressSpace = VMAddressSpace::GetCurrent();
5711 		else
5712 			addressSpace = VMAddressSpace::Get(team);
5713 	} else
5714 		addressSpace = VMAddressSpace::GetKernel();
5715 	if (addressSpace == NULL)
5716 		return B_ERROR;
5717 
5718 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5719 
5720 	VMTranslationMap* map = addressSpace->TranslationMap();
5721 	status_t error = B_OK;
5722 
5723 	// get the area
5724 	VMArea* area = addressSpace->LookupArea(pageAddress);
5725 	if (area == NULL) {
5726 		addressSpace->Put();
5727 		return B_BAD_ADDRESS;
5728 	}
5729 
5730 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5731 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5732 
5733 	// mark the area range wired
5734 	area->Wire(&info->range);
5735 
5736 	// Lock the area's cache chain and the translation map. Needed to look
5737 	// up the page and play with its wired count.
5738 	cacheChainLocker.LockAllSourceCaches();
5739 	map->Lock();
5740 
5741 	phys_addr_t physicalAddress;
5742 	uint32 flags;
5743 	vm_page* page;
5744 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5745 		&& (flags & requiredProtection) == requiredProtection
5746 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5747 			!= NULL) {
5748 		// Already mapped with the correct permissions -- just increment
5749 		// the page's wired count.
5750 		increment_page_wired_count(page);
5751 
5752 		map->Unlock();
5753 		cacheChainLocker.Unlock();
5754 		addressSpaceLocker.Unlock();
5755 	} else {
5756 		// Let vm_soft_fault() map the page for us, if possible. We need
5757 		// to fully unlock to avoid deadlocks. Since we have already
5758 		// wired the area itself, nothing disturbing will happen with it
5759 		// in the meantime.
5760 		map->Unlock();
5761 		cacheChainLocker.Unlock();
5762 		addressSpaceLocker.Unlock();
5763 
5764 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5765 			isUser, &page);
5766 
5767 		if (error != B_OK) {
5768 			// The page could not be mapped -- clean up.
5769 			VMCache* cache = vm_area_get_locked_cache(area);
5770 			area->Unwire(&info->range);
5771 			cache->ReleaseRefAndUnlock();
5772 			addressSpace->Put();
5773 			return error;
5774 		}
5775 	}
5776 
5777 	info->physicalAddress
5778 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5779 			+ address % B_PAGE_SIZE;
5780 	info->page = page;
5781 
5782 	return B_OK;
5783 }
5784 
5785 
5786 /*!	Unwires a single page previously wired via vm_wire_page().
5787 
5788 	\param info The same object passed to vm_wire_page() before.
5789 */
5790 void
5791 vm_unwire_page(VMPageWiringInfo* info)
5792 {
5793 	// lock the address space
5794 	VMArea* area = info->range.area;
5795 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5796 		// takes over our reference
5797 
5798 	// lock the top cache
5799 	VMCache* cache = vm_area_get_locked_cache(area);
5800 	VMCacheChainLocker cacheChainLocker(cache);
5801 
5802 	if (info->page->Cache() != cache) {
5803 		// The page is not in the top cache, so we lock the whole cache chain
5804 		// before touching the page's wired count.
5805 		cacheChainLocker.LockAllSourceCaches();
5806 	}
5807 
5808 	decrement_page_wired_count(info->page);
5809 
5810 	// remove the wired range from the range
5811 	area->Unwire(&info->range);
5812 
5813 	cacheChainLocker.Unlock();
5814 }
5815 
5816 
5817 /*!	Wires down the given address range in the specified team's address space.
5818 
5819 	If successful the function
5820 	- acquires a reference to the specified team's address space,
5821 	- adds respective wired ranges to all areas that intersect with the given
5822 	  address range,
5823 	- makes sure all pages in the given address range are mapped with the
5824 	  requested access permissions and increments their wired count.
5825 
5826 	It fails, when \a team doesn't specify a valid address space, when any part
5827 	of the specified address range is not covered by areas, when the concerned
5828 	areas don't allow mapping with the requested permissions, or when mapping
5829 	failed for another reason.
5830 
5831 	When successful the call must be balanced by a unlock_memory_etc() call with
5832 	the exact same parameters.
5833 
5834 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5835 		supported.
5836 	\param address The start of the address range to be wired.
5837 	\param numBytes The size of the address range to be wired.
5838 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5839 		requests that the range must be wired writable ("read from device
5840 		into memory").
5841 	\return \c B_OK on success, another error code otherwise.
5842 */
5843 status_t
5844 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5845 {
5846 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5847 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5848 
5849 	// compute the page protection that is required
5850 	bool isUser = IS_USER_ADDRESS(address);
5851 	bool writable = (flags & B_READ_DEVICE) == 0;
5852 	uint32 requiredProtection = PAGE_PRESENT
5853 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5854 	if (writable)
5855 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5856 
5857 	uint32 mallocFlags = isUser
5858 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5859 
5860 	// get and read lock the address space
5861 	VMAddressSpace* addressSpace = NULL;
5862 	if (isUser) {
5863 		if (team == B_CURRENT_TEAM)
5864 			addressSpace = VMAddressSpace::GetCurrent();
5865 		else
5866 			addressSpace = VMAddressSpace::Get(team);
5867 	} else
5868 		addressSpace = VMAddressSpace::GetKernel();
5869 	if (addressSpace == NULL)
5870 		return B_ERROR;
5871 
5872 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5873 		// We get a new address space reference here. The one we got above will
5874 		// be freed by unlock_memory_etc().
5875 
5876 	VMTranslationMap* map = addressSpace->TranslationMap();
5877 	status_t error = B_OK;
5878 
5879 	// iterate through all concerned areas
5880 	addr_t nextAddress = lockBaseAddress;
5881 	while (nextAddress != lockEndAddress) {
5882 		// get the next area
5883 		VMArea* area = addressSpace->LookupArea(nextAddress);
5884 		if (area == NULL) {
5885 			error = B_BAD_ADDRESS;
5886 			break;
5887 		}
5888 
5889 		addr_t areaStart = nextAddress;
5890 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5891 
5892 		// allocate the wired range (do that before locking the cache to avoid
5893 		// deadlocks)
5894 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5895 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5896 		if (range == NULL) {
5897 			error = B_NO_MEMORY;
5898 			break;
5899 		}
5900 
5901 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5902 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5903 
5904 		// mark the area range wired
5905 		area->Wire(range);
5906 
5907 		// Depending on the area cache type and the wiring, we may not need to
5908 		// look at the individual pages.
5909 		if (area->cache_type == CACHE_TYPE_NULL
5910 			|| area->cache_type == CACHE_TYPE_DEVICE
5911 			|| area->wiring == B_FULL_LOCK
5912 			|| area->wiring == B_CONTIGUOUS) {
5913 			nextAddress = areaEnd;
5914 			continue;
5915 		}
5916 
5917 		// Lock the area's cache chain and the translation map. Needed to look
5918 		// up pages and play with their wired count.
5919 		cacheChainLocker.LockAllSourceCaches();
5920 		map->Lock();
5921 
5922 		// iterate through the pages and wire them
5923 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5924 			phys_addr_t physicalAddress;
5925 			uint32 flags;
5926 
5927 			vm_page* page;
5928 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5929 				&& (flags & requiredProtection) == requiredProtection
5930 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5931 					!= NULL) {
5932 				// Already mapped with the correct permissions -- just increment
5933 				// the page's wired count.
5934 				increment_page_wired_count(page);
5935 			} else {
5936 				// Let vm_soft_fault() map the page for us, if possible. We need
5937 				// to fully unlock to avoid deadlocks. Since we have already
5938 				// wired the area itself, nothing disturbing will happen with it
5939 				// in the meantime.
5940 				map->Unlock();
5941 				cacheChainLocker.Unlock();
5942 				addressSpaceLocker.Unlock();
5943 
5944 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5945 					false, isUser, &page);
5946 
5947 				addressSpaceLocker.Lock();
5948 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5949 				cacheChainLocker.LockAllSourceCaches();
5950 				map->Lock();
5951 			}
5952 
5953 			if (error != B_OK)
5954 				break;
5955 		}
5956 
5957 		map->Unlock();
5958 
5959 		if (error == B_OK) {
5960 			cacheChainLocker.Unlock();
5961 		} else {
5962 			// An error occurred, so abort right here. If the current address
5963 			// is the first in this area, unwire the area, since we won't get
5964 			// to it when reverting what we've done so far.
5965 			if (nextAddress == areaStart) {
5966 				area->Unwire(range);
5967 				cacheChainLocker.Unlock();
5968 				range->~VMAreaWiredRange();
5969 				free_etc(range, mallocFlags);
5970 			} else
5971 				cacheChainLocker.Unlock();
5972 
5973 			break;
5974 		}
5975 	}
5976 
5977 	if (error != B_OK) {
5978 		// An error occurred, so unwire all that we've already wired. Note that
5979 		// even if not a single page was wired, unlock_memory_etc() is called
5980 		// to put the address space reference.
5981 		addressSpaceLocker.Unlock();
5982 		unlock_memory_etc(team, (void*)lockBaseAddress,
5983 			nextAddress - lockBaseAddress, flags);
5984 	}
5985 
5986 	return error;
5987 }
5988 
5989 
5990 status_t
5991 lock_memory(void* address, size_t numBytes, uint32 flags)
5992 {
5993 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5994 }
5995 
5996 
5997 /*!	Unwires an address range previously wired with lock_memory_etc().
5998 
5999 	Note that a call to this function must balance a previous lock_memory_etc()
6000 	call with exactly the same parameters.
6001 */
6002 status_t
6003 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
6004 {
6005 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
6006 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
6007 
6008 	// compute the page protection that is required
6009 	bool isUser = IS_USER_ADDRESS(address);
6010 	bool writable = (flags & B_READ_DEVICE) == 0;
6011 	uint32 requiredProtection = PAGE_PRESENT
6012 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
6013 	if (writable)
6014 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
6015 
6016 	uint32 mallocFlags = isUser
6017 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
6018 
6019 	// get and read lock the address space
6020 	VMAddressSpace* addressSpace = NULL;
6021 	if (isUser) {
6022 		if (team == B_CURRENT_TEAM)
6023 			addressSpace = VMAddressSpace::GetCurrent();
6024 		else
6025 			addressSpace = VMAddressSpace::Get(team);
6026 	} else
6027 		addressSpace = VMAddressSpace::GetKernel();
6028 	if (addressSpace == NULL)
6029 		return B_ERROR;
6030 
6031 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
6032 		// Take over the address space reference. We don't unlock until we're
6033 		// done.
6034 
6035 	VMTranslationMap* map = addressSpace->TranslationMap();
6036 	status_t error = B_OK;
6037 
6038 	// iterate through all concerned areas
6039 	addr_t nextAddress = lockBaseAddress;
6040 	while (nextAddress != lockEndAddress) {
6041 		// get the next area
6042 		VMArea* area = addressSpace->LookupArea(nextAddress);
6043 		if (area == NULL) {
6044 			error = B_BAD_ADDRESS;
6045 			break;
6046 		}
6047 
6048 		addr_t areaStart = nextAddress;
6049 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
6050 
6051 		// Lock the area's top cache. This is a requirement for
6052 		// VMArea::Unwire().
6053 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
6054 
6055 		// Depending on the area cache type and the wiring, we may not need to
6056 		// look at the individual pages.
6057 		if (area->cache_type == CACHE_TYPE_NULL
6058 			|| area->cache_type == CACHE_TYPE_DEVICE
6059 			|| area->wiring == B_FULL_LOCK
6060 			|| area->wiring == B_CONTIGUOUS) {
6061 			// unwire the range (to avoid deadlocks we delete the range after
6062 			// unlocking the cache)
6063 			nextAddress = areaEnd;
6064 			VMAreaWiredRange* range = area->Unwire(areaStart,
6065 				areaEnd - areaStart, writable);
6066 			cacheChainLocker.Unlock();
6067 			if (range != NULL) {
6068 				range->~VMAreaWiredRange();
6069 				free_etc(range, mallocFlags);
6070 			}
6071 			continue;
6072 		}
6073 
6074 		// Lock the area's cache chain and the translation map. Needed to look
6075 		// up pages and play with their wired count.
6076 		cacheChainLocker.LockAllSourceCaches();
6077 		map->Lock();
6078 
6079 		// iterate through the pages and unwire them
6080 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
6081 			phys_addr_t physicalAddress;
6082 			uint32 flags;
6083 
6084 			vm_page* page;
6085 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
6086 				&& (flags & PAGE_PRESENT) != 0
6087 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
6088 					!= NULL) {
6089 				// Already mapped with the correct permissions -- just increment
6090 				// the page's wired count.
6091 				decrement_page_wired_count(page);
6092 			} else {
6093 				panic("unlock_memory_etc(): Failed to unwire page: address "
6094 					"space %p, address: %#" B_PRIxADDR, addressSpace,
6095 					nextAddress);
6096 				error = B_BAD_VALUE;
6097 				break;
6098 			}
6099 		}
6100 
6101 		map->Unlock();
6102 
6103 		// All pages are unwired. Remove the area's wired range as well (to
6104 		// avoid deadlocks we delete the range after unlocking the cache).
6105 		VMAreaWiredRange* range = area->Unwire(areaStart,
6106 			areaEnd - areaStart, writable);
6107 
6108 		cacheChainLocker.Unlock();
6109 
6110 		if (range != NULL) {
6111 			range->~VMAreaWiredRange();
6112 			free_etc(range, mallocFlags);
6113 		}
6114 
6115 		if (error != B_OK)
6116 			break;
6117 	}
6118 
6119 	// get rid of the address space reference lock_memory_etc() acquired
6120 	addressSpace->Put();
6121 
6122 	return error;
6123 }
6124 
6125 
6126 status_t
6127 unlock_memory(void* address, size_t numBytes, uint32 flags)
6128 {
6129 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
6130 }
6131 
6132 
6133 /*!	Similar to get_memory_map(), but also allows to specify the address space
6134 	for the memory in question and has a saner semantics.
6135 	Returns \c B_OK when the complete range could be translated or
6136 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
6137 	case the actual number of entries is written to \c *_numEntries. Any other
6138 	error case indicates complete failure; \c *_numEntries will be set to \c 0
6139 	in this case.
6140 */
6141 status_t
6142 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
6143 	physical_entry* table, uint32* _numEntries)
6144 {
6145 	uint32 numEntries = *_numEntries;
6146 	*_numEntries = 0;
6147 
6148 	VMAddressSpace* addressSpace;
6149 	addr_t virtualAddress = (addr_t)address;
6150 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
6151 	phys_addr_t physicalAddress;
6152 	status_t status = B_OK;
6153 	int32 index = -1;
6154 	addr_t offset = 0;
6155 	bool interrupts = are_interrupts_enabled();
6156 
6157 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
6158 		"entries)\n", team, address, numBytes, numEntries));
6159 
6160 	if (numEntries == 0 || numBytes == 0)
6161 		return B_BAD_VALUE;
6162 
6163 	// in which address space is the address to be found?
6164 	if (IS_USER_ADDRESS(virtualAddress)) {
6165 		if (team == B_CURRENT_TEAM)
6166 			addressSpace = VMAddressSpace::GetCurrent();
6167 		else
6168 			addressSpace = VMAddressSpace::Get(team);
6169 	} else
6170 		addressSpace = VMAddressSpace::GetKernel();
6171 
6172 	if (addressSpace == NULL)
6173 		return B_ERROR;
6174 
6175 	VMTranslationMap* map = addressSpace->TranslationMap();
6176 
6177 	if (interrupts)
6178 		map->Lock();
6179 
6180 	while (offset < numBytes) {
6181 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
6182 		uint32 flags;
6183 
6184 		if (interrupts) {
6185 			status = map->Query((addr_t)address + offset, &physicalAddress,
6186 				&flags);
6187 		} else {
6188 			status = map->QueryInterrupt((addr_t)address + offset,
6189 				&physicalAddress, &flags);
6190 		}
6191 		if (status < B_OK)
6192 			break;
6193 		if ((flags & PAGE_PRESENT) == 0) {
6194 			panic("get_memory_map() called on unmapped memory!");
6195 			return B_BAD_ADDRESS;
6196 		}
6197 
6198 		if (index < 0 && pageOffset > 0) {
6199 			physicalAddress += pageOffset;
6200 			if (bytes > B_PAGE_SIZE - pageOffset)
6201 				bytes = B_PAGE_SIZE - pageOffset;
6202 		}
6203 
6204 		// need to switch to the next physical_entry?
6205 		if (index < 0 || table[index].address
6206 				!= physicalAddress - table[index].size) {
6207 			if ((uint32)++index + 1 > numEntries) {
6208 				// table to small
6209 				break;
6210 			}
6211 			table[index].address = physicalAddress;
6212 			table[index].size = bytes;
6213 		} else {
6214 			// page does fit in current entry
6215 			table[index].size += bytes;
6216 		}
6217 
6218 		offset += bytes;
6219 	}
6220 
6221 	if (interrupts)
6222 		map->Unlock();
6223 
6224 	if (status != B_OK)
6225 		return status;
6226 
6227 	if ((uint32)index + 1 > numEntries) {
6228 		*_numEntries = index;
6229 		return B_BUFFER_OVERFLOW;
6230 	}
6231 
6232 	*_numEntries = index + 1;
6233 	return B_OK;
6234 }
6235 
6236 
6237 /*!	According to the BeBook, this function should always succeed.
6238 	This is no longer the case.
6239 */
6240 extern "C" int32
6241 __get_memory_map_haiku(const void* address, size_t numBytes,
6242 	physical_entry* table, int32 numEntries)
6243 {
6244 	uint32 entriesRead = numEntries;
6245 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
6246 		table, &entriesRead);
6247 	if (error != B_OK)
6248 		return error;
6249 
6250 	// close the entry list
6251 
6252 	// if it's only one entry, we will silently accept the missing ending
6253 	if (numEntries == 1)
6254 		return B_OK;
6255 
6256 	if (entriesRead + 1 > (uint32)numEntries)
6257 		return B_BUFFER_OVERFLOW;
6258 
6259 	table[entriesRead].address = 0;
6260 	table[entriesRead].size = 0;
6261 
6262 	return B_OK;
6263 }
6264 
6265 
6266 area_id
6267 area_for(void* address)
6268 {
6269 	return vm_area_for((addr_t)address, true);
6270 }
6271 
6272 
6273 area_id
6274 find_area(const char* name)
6275 {
6276 	return VMAreas::Find(name);
6277 }
6278 
6279 
6280 status_t
6281 _get_area_info(area_id id, area_info* info, size_t size)
6282 {
6283 	if (size != sizeof(area_info) || info == NULL)
6284 		return B_BAD_VALUE;
6285 
6286 	AddressSpaceReadLocker locker;
6287 	VMArea* area;
6288 	status_t status = locker.SetFromArea(id, area);
6289 	if (status != B_OK)
6290 		return status;
6291 
6292 	fill_area_info(area, info, size);
6293 	return B_OK;
6294 }
6295 
6296 
6297 status_t
6298 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
6299 {
6300 	addr_t nextBase = *(addr_t*)cookie;
6301 
6302 	// we're already through the list
6303 	if (nextBase == (addr_t)-1)
6304 		return B_ENTRY_NOT_FOUND;
6305 
6306 	if (team == B_CURRENT_TEAM)
6307 		team = team_get_current_team_id();
6308 
6309 	AddressSpaceReadLocker locker(team);
6310 	if (!locker.IsLocked())
6311 		return B_BAD_TEAM_ID;
6312 
6313 	VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
6314 	if (area == NULL) {
6315 		nextBase = (addr_t)-1;
6316 		return B_ENTRY_NOT_FOUND;
6317 	}
6318 
6319 	fill_area_info(area, info, size);
6320 	*cookie = (ssize_t)(area->Base() + 1);
6321 
6322 	return B_OK;
6323 }
6324 
6325 
6326 status_t
6327 set_area_protection(area_id area, uint32 newProtection)
6328 {
6329 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6330 		newProtection, true);
6331 }
6332 
6333 
6334 status_t
6335 resize_area(area_id areaID, size_t newSize)
6336 {
6337 	return vm_resize_area(areaID, newSize, true);
6338 }
6339 
6340 
6341 /*!	Transfers the specified area to a new team. The caller must be the owner
6342 	of the area.
6343 */
6344 area_id
6345 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6346 	bool kernel)
6347 {
6348 	area_info info;
6349 	status_t status = get_area_info(id, &info);
6350 	if (status != B_OK)
6351 		return status;
6352 
6353 	if (info.team != thread_get_current_thread()->team->id)
6354 		return B_PERMISSION_DENIED;
6355 
6356 	// We need to mark the area cloneable so the following operations work.
6357 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
6358 	if (status != B_OK)
6359 		return status;
6360 
6361 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6362 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6363 	if (clonedArea < 0)
6364 		return clonedArea;
6365 
6366 	status = vm_delete_area(info.team, id, kernel);
6367 	if (status != B_OK) {
6368 		vm_delete_area(target, clonedArea, kernel);
6369 		return status;
6370 	}
6371 
6372 	// Now we can reset the protection to whatever it was before.
6373 	set_area_protection(clonedArea, info.protection);
6374 
6375 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6376 
6377 	return clonedArea;
6378 }
6379 
6380 
6381 extern "C" area_id
6382 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6383 	size_t numBytes, uint32 addressSpec, uint32 protection,
6384 	void** _virtualAddress)
6385 {
6386 	if (!arch_vm_supports_protection(protection))
6387 		return B_NOT_SUPPORTED;
6388 
6389 	fix_protection(&protection);
6390 
6391 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6392 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6393 		false);
6394 }
6395 
6396 
6397 area_id
6398 clone_area(const char* name, void** _address, uint32 addressSpec,
6399 	uint32 protection, area_id source)
6400 {
6401 	if ((protection & B_KERNEL_PROTECTION) == 0)
6402 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6403 
6404 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6405 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6406 }
6407 
6408 
6409 area_id
6410 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6411 	uint32 protection, uint32 flags, uint32 guardSize,
6412 	const virtual_address_restrictions* virtualAddressRestrictions,
6413 	const physical_address_restrictions* physicalAddressRestrictions,
6414 	void** _address)
6415 {
6416 	fix_protection(&protection);
6417 
6418 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6419 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6420 		true, _address);
6421 }
6422 
6423 
6424 extern "C" area_id
6425 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6426 	size_t size, uint32 lock, uint32 protection)
6427 {
6428 	fix_protection(&protection);
6429 
6430 	virtual_address_restrictions virtualRestrictions = {};
6431 	virtualRestrictions.address = *_address;
6432 	virtualRestrictions.address_specification = addressSpec;
6433 	physical_address_restrictions physicalRestrictions = {};
6434 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6435 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6436 		true, _address);
6437 }
6438 
6439 
6440 status_t
6441 delete_area(area_id area)
6442 {
6443 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6444 }
6445 
6446 
6447 //	#pragma mark - Userland syscalls
6448 
6449 
6450 status_t
6451 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6452 	addr_t size)
6453 {
6454 	// filter out some unavailable values (for userland)
6455 	switch (addressSpec) {
6456 		case B_ANY_KERNEL_ADDRESS:
6457 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6458 			return B_BAD_VALUE;
6459 	}
6460 
6461 	addr_t address;
6462 
6463 	if (!IS_USER_ADDRESS(userAddress)
6464 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6465 		return B_BAD_ADDRESS;
6466 
6467 	status_t status = vm_reserve_address_range(
6468 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6469 		RESERVED_AVOID_BASE);
6470 	if (status != B_OK)
6471 		return status;
6472 
6473 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6474 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6475 			(void*)address, size);
6476 		return B_BAD_ADDRESS;
6477 	}
6478 
6479 	return B_OK;
6480 }
6481 
6482 
6483 status_t
6484 _user_unreserve_address_range(addr_t address, addr_t size)
6485 {
6486 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6487 		(void*)address, size);
6488 }
6489 
6490 
6491 area_id
6492 _user_area_for(void* address)
6493 {
6494 	return vm_area_for((addr_t)address, false);
6495 }
6496 
6497 
6498 area_id
6499 _user_find_area(const char* userName)
6500 {
6501 	char name[B_OS_NAME_LENGTH];
6502 
6503 	if (!IS_USER_ADDRESS(userName)
6504 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6505 		return B_BAD_ADDRESS;
6506 
6507 	return find_area(name);
6508 }
6509 
6510 
6511 status_t
6512 _user_get_area_info(area_id area, area_info* userInfo)
6513 {
6514 	if (!IS_USER_ADDRESS(userInfo))
6515 		return B_BAD_ADDRESS;
6516 
6517 	area_info info;
6518 	status_t status = get_area_info(area, &info);
6519 	if (status < B_OK)
6520 		return status;
6521 
6522 	// TODO: do we want to prevent userland from seeing kernel protections?
6523 	//info.protection &= B_USER_PROTECTION;
6524 
6525 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6526 		return B_BAD_ADDRESS;
6527 
6528 	return status;
6529 }
6530 
6531 
6532 status_t
6533 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6534 {
6535 	ssize_t cookie;
6536 
6537 	if (!IS_USER_ADDRESS(userCookie)
6538 		|| !IS_USER_ADDRESS(userInfo)
6539 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6540 		return B_BAD_ADDRESS;
6541 
6542 	area_info info;
6543 	status_t status = _get_next_area_info(team, &cookie, &info,
6544 		sizeof(area_info));
6545 	if (status != B_OK)
6546 		return status;
6547 
6548 	//info.protection &= B_USER_PROTECTION;
6549 
6550 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6551 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6552 		return B_BAD_ADDRESS;
6553 
6554 	return status;
6555 }
6556 
6557 
6558 status_t
6559 _user_set_area_protection(area_id area, uint32 newProtection)
6560 {
6561 	if ((newProtection & ~(B_USER_PROTECTION | B_CLONEABLE_AREA)) != 0)
6562 		return B_BAD_VALUE;
6563 
6564 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6565 		newProtection, false);
6566 }
6567 
6568 
6569 status_t
6570 _user_resize_area(area_id area, size_t newSize)
6571 {
6572 	// TODO: Since we restrict deleting of areas to those owned by the team,
6573 	// we should also do that for resizing (check other functions, too).
6574 	return vm_resize_area(area, newSize, false);
6575 }
6576 
6577 
6578 area_id
6579 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6580 	team_id target)
6581 {
6582 	// filter out some unavailable values (for userland)
6583 	switch (addressSpec) {
6584 		case B_ANY_KERNEL_ADDRESS:
6585 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6586 			return B_BAD_VALUE;
6587 	}
6588 
6589 	void* address;
6590 	if (!IS_USER_ADDRESS(userAddress)
6591 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6592 		return B_BAD_ADDRESS;
6593 
6594 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6595 	if (newArea < B_OK)
6596 		return newArea;
6597 
6598 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6599 		return B_BAD_ADDRESS;
6600 
6601 	return newArea;
6602 }
6603 
6604 
6605 area_id
6606 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6607 	uint32 protection, area_id sourceArea)
6608 {
6609 	char name[B_OS_NAME_LENGTH];
6610 	void* address;
6611 
6612 	// filter out some unavailable values (for userland)
6613 	switch (addressSpec) {
6614 		case B_ANY_KERNEL_ADDRESS:
6615 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6616 			return B_BAD_VALUE;
6617 	}
6618 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6619 		return B_BAD_VALUE;
6620 
6621 	if (!IS_USER_ADDRESS(userName)
6622 		|| !IS_USER_ADDRESS(userAddress)
6623 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6624 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6625 		return B_BAD_ADDRESS;
6626 
6627 	fix_protection(&protection);
6628 
6629 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6630 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6631 		false);
6632 	if (clonedArea < B_OK)
6633 		return clonedArea;
6634 
6635 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6636 		delete_area(clonedArea);
6637 		return B_BAD_ADDRESS;
6638 	}
6639 
6640 	return clonedArea;
6641 }
6642 
6643 
6644 area_id
6645 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6646 	size_t size, uint32 lock, uint32 protection)
6647 {
6648 	char name[B_OS_NAME_LENGTH];
6649 	void* address;
6650 
6651 	// filter out some unavailable values (for userland)
6652 	switch (addressSpec) {
6653 		case B_ANY_KERNEL_ADDRESS:
6654 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6655 			return B_BAD_VALUE;
6656 	}
6657 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6658 		return B_BAD_VALUE;
6659 
6660 	if (!IS_USER_ADDRESS(userName)
6661 		|| !IS_USER_ADDRESS(userAddress)
6662 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6663 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6664 		return B_BAD_ADDRESS;
6665 
6666 	if (addressSpec == B_EXACT_ADDRESS
6667 		&& IS_KERNEL_ADDRESS(address))
6668 		return B_BAD_VALUE;
6669 
6670 	if (addressSpec == B_ANY_ADDRESS)
6671 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6672 	if (addressSpec == B_BASE_ADDRESS)
6673 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6674 
6675 	fix_protection(&protection);
6676 
6677 	virtual_address_restrictions virtualRestrictions = {};
6678 	virtualRestrictions.address = address;
6679 	virtualRestrictions.address_specification = addressSpec;
6680 	physical_address_restrictions physicalRestrictions = {};
6681 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6682 		size, lock, protection, 0, 0, &virtualRestrictions,
6683 		&physicalRestrictions, false, &address);
6684 
6685 	if (area >= B_OK
6686 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6687 		delete_area(area);
6688 		return B_BAD_ADDRESS;
6689 	}
6690 
6691 	return area;
6692 }
6693 
6694 
6695 status_t
6696 _user_delete_area(area_id area)
6697 {
6698 	// Unlike the BeOS implementation, you can now only delete areas
6699 	// that you have created yourself from userland.
6700 	// The documentation to delete_area() explicitly states that this
6701 	// will be restricted in the future, and so it will.
6702 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6703 }
6704 
6705 
6706 // TODO: create a BeOS style call for this!
6707 
6708 area_id
6709 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6710 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6711 	int fd, off_t offset)
6712 {
6713 	char name[B_OS_NAME_LENGTH];
6714 	void* address;
6715 	area_id area;
6716 
6717 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6718 		return B_BAD_VALUE;
6719 
6720 	fix_protection(&protection);
6721 
6722 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6723 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6724 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6725 		return B_BAD_ADDRESS;
6726 
6727 	if (addressSpec == B_EXACT_ADDRESS) {
6728 		if ((addr_t)address + size < (addr_t)address
6729 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6730 			return B_BAD_VALUE;
6731 		}
6732 		if (!IS_USER_ADDRESS(address)
6733 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6734 			return B_BAD_ADDRESS;
6735 		}
6736 	}
6737 
6738 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6739 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6740 		false);
6741 	if (area < B_OK)
6742 		return area;
6743 
6744 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6745 		return B_BAD_ADDRESS;
6746 
6747 	return area;
6748 }
6749 
6750 
6751 status_t
6752 _user_unmap_memory(void* _address, size_t size)
6753 {
6754 	addr_t address = (addr_t)_address;
6755 
6756 	// check params
6757 	if (size == 0 || (addr_t)address + size < (addr_t)address
6758 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6759 		return B_BAD_VALUE;
6760 	}
6761 
6762 	if (!IS_USER_ADDRESS(address)
6763 		|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6764 		return B_BAD_ADDRESS;
6765 	}
6766 
6767 	// Write lock the address space and ensure the address range is not wired.
6768 	AddressSpaceWriteLocker locker;
6769 	do {
6770 		status_t status = locker.SetTo(team_get_current_team_id());
6771 		if (status != B_OK)
6772 			return status;
6773 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6774 			size, &locker));
6775 
6776 	// unmap
6777 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6778 }
6779 
6780 
6781 status_t
6782 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6783 {
6784 	// check address range
6785 	addr_t address = (addr_t)_address;
6786 	size = PAGE_ALIGN(size);
6787 
6788 	if ((address % B_PAGE_SIZE) != 0)
6789 		return B_BAD_VALUE;
6790 	if (!is_user_address_range(_address, size)) {
6791 		// weird error code required by POSIX
6792 		return ENOMEM;
6793 	}
6794 
6795 	// extend and check protection
6796 	if ((protection & ~B_USER_PROTECTION) != 0)
6797 		return B_BAD_VALUE;
6798 
6799 	fix_protection(&protection);
6800 
6801 	// We need to write lock the address space, since we're going to play with
6802 	// the areas. Also make sure that none of the areas is wired and that we're
6803 	// actually allowed to change the protection.
6804 	AddressSpaceWriteLocker locker;
6805 
6806 	bool restart;
6807 	do {
6808 		restart = false;
6809 
6810 		status_t status = locker.SetTo(team_get_current_team_id());
6811 		if (status != B_OK)
6812 			return status;
6813 
6814 		// First round: Check whether the whole range is covered by areas and we
6815 		// are allowed to modify them.
6816 		addr_t currentAddress = address;
6817 		size_t sizeLeft = size;
6818 		while (sizeLeft > 0) {
6819 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6820 			if (area == NULL)
6821 				return B_NO_MEMORY;
6822 
6823 			if ((area->protection & B_KERNEL_AREA) != 0)
6824 				return B_NOT_ALLOWED;
6825 			if (area->protection_max != 0
6826 				&& (protection & area->protection_max) != (protection & B_USER_PROTECTION)) {
6827 				return B_NOT_ALLOWED;
6828 			}
6829 
6830 			addr_t offset = currentAddress - area->Base();
6831 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6832 
6833 			AreaCacheLocker cacheLocker(area);
6834 
6835 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6836 					&locker, &cacheLocker)) {
6837 				restart = true;
6838 				break;
6839 			}
6840 
6841 			cacheLocker.Unlock();
6842 
6843 			currentAddress += rangeSize;
6844 			sizeLeft -= rangeSize;
6845 		}
6846 	} while (restart);
6847 
6848 	// Second round: If the protections differ from that of the area, create a
6849 	// page protection array and re-map mapped pages.
6850 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6851 	addr_t currentAddress = address;
6852 	size_t sizeLeft = size;
6853 	while (sizeLeft > 0) {
6854 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6855 		if (area == NULL)
6856 			return B_NO_MEMORY;
6857 
6858 		addr_t offset = currentAddress - area->Base();
6859 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6860 
6861 		currentAddress += rangeSize;
6862 		sizeLeft -= rangeSize;
6863 
6864 		if (area->page_protections == NULL) {
6865 			if (area->protection == protection)
6866 				continue;
6867 			if (offset == 0 && rangeSize == area->Size()) {
6868 				// The whole area is covered: let set_area_protection handle it.
6869 				status_t status = vm_set_area_protection(area->address_space->ID(),
6870 					area->id, protection, false);
6871 				if (status != B_OK)
6872 					return status;
6873 				continue;
6874 			}
6875 
6876 			status_t status = allocate_area_page_protections(area);
6877 			if (status != B_OK)
6878 				return status;
6879 		}
6880 
6881 		// We need to lock the complete cache chain, since we potentially unmap
6882 		// pages of lower caches.
6883 		VMCache* topCache = vm_area_get_locked_cache(area);
6884 		VMCacheChainLocker cacheChainLocker(topCache);
6885 		cacheChainLocker.LockAllSourceCaches();
6886 
6887 		// Adjust the committed size, if necessary.
6888 		if (topCache->source != NULL && topCache->temporary) {
6889 			const bool becomesWritable = (protection & B_WRITE_AREA) != 0;
6890 			ssize_t commitmentChange = 0;
6891 			for (addr_t pageAddress = area->Base() + offset;
6892 					pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6893 				if (topCache->LookupPage(pageAddress) != NULL) {
6894 					// This page should already be accounted for in the commitment.
6895 					continue;
6896 				}
6897 
6898 				const bool isWritable
6899 					= (get_area_page_protection(area, pageAddress) & B_WRITE_AREA) != 0;
6900 
6901 				if (becomesWritable && !isWritable)
6902 					commitmentChange += B_PAGE_SIZE;
6903 				else if (!becomesWritable && isWritable)
6904 					commitmentChange -= B_PAGE_SIZE;
6905 			}
6906 
6907 			if (commitmentChange != 0) {
6908 				const off_t newCommitment = topCache->committed_size + commitmentChange;
6909 				ASSERT(newCommitment <= (topCache->virtual_end - topCache->virtual_base));
6910 				status_t status = topCache->Commit(newCommitment, VM_PRIORITY_USER);
6911 				if (status != B_OK)
6912 					return status;
6913 			}
6914 		}
6915 
6916 		for (addr_t pageAddress = area->Base() + offset;
6917 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6918 			map->Lock();
6919 
6920 			set_area_page_protection(area, pageAddress, protection);
6921 
6922 			phys_addr_t physicalAddress;
6923 			uint32 flags;
6924 
6925 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6926 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6927 				map->Unlock();
6928 				continue;
6929 			}
6930 
6931 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6932 			if (page == NULL) {
6933 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6934 					"\n", area, physicalAddress);
6935 				map->Unlock();
6936 				return B_ERROR;
6937 			}
6938 
6939 			// If the page is not in the topmost cache and write access is
6940 			// requested, we have to unmap it. Otherwise we can re-map it with
6941 			// the new protection.
6942 			bool unmapPage = page->Cache() != topCache
6943 				&& (protection & B_WRITE_AREA) != 0;
6944 
6945 			if (!unmapPage)
6946 				map->ProtectPage(area, pageAddress, protection);
6947 
6948 			map->Unlock();
6949 
6950 			if (unmapPage) {
6951 				DEBUG_PAGE_ACCESS_START(page);
6952 				unmap_page(area, pageAddress);
6953 				DEBUG_PAGE_ACCESS_END(page);
6954 			}
6955 		}
6956 	}
6957 
6958 	return B_OK;
6959 }
6960 
6961 
6962 status_t
6963 _user_sync_memory(void* _address, size_t size, uint32 flags)
6964 {
6965 	addr_t address = (addr_t)_address;
6966 	size = PAGE_ALIGN(size);
6967 
6968 	// check params
6969 	if ((address % B_PAGE_SIZE) != 0)
6970 		return B_BAD_VALUE;
6971 	if (!is_user_address_range(_address, size)) {
6972 		// weird error code required by POSIX
6973 		return ENOMEM;
6974 	}
6975 
6976 	bool writeSync = (flags & MS_SYNC) != 0;
6977 	bool writeAsync = (flags & MS_ASYNC) != 0;
6978 	if (writeSync && writeAsync)
6979 		return B_BAD_VALUE;
6980 
6981 	if (size == 0 || (!writeSync && !writeAsync))
6982 		return B_OK;
6983 
6984 	// iterate through the range and sync all concerned areas
6985 	while (size > 0) {
6986 		// read lock the address space
6987 		AddressSpaceReadLocker locker;
6988 		status_t error = locker.SetTo(team_get_current_team_id());
6989 		if (error != B_OK)
6990 			return error;
6991 
6992 		// get the first area
6993 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6994 		if (area == NULL)
6995 			return B_NO_MEMORY;
6996 
6997 		uint32 offset = address - area->Base();
6998 		size_t rangeSize = min_c(area->Size() - offset, size);
6999 		offset += area->cache_offset;
7000 
7001 		// lock the cache
7002 		AreaCacheLocker cacheLocker(area);
7003 		if (!cacheLocker)
7004 			return B_BAD_VALUE;
7005 		VMCache* cache = area->cache;
7006 
7007 		locker.Unlock();
7008 
7009 		uint32 firstPage = offset >> PAGE_SHIFT;
7010 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
7011 
7012 		// write the pages
7013 		if (cache->type == CACHE_TYPE_VNODE) {
7014 			if (writeSync) {
7015 				// synchronous
7016 				error = vm_page_write_modified_page_range(cache, firstPage,
7017 					endPage);
7018 				if (error != B_OK)
7019 					return error;
7020 			} else {
7021 				// asynchronous
7022 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
7023 				// TODO: This is probably not quite what is supposed to happen.
7024 				// Especially when a lot has to be written, it might take ages
7025 				// until it really hits the disk.
7026 			}
7027 		}
7028 
7029 		address += rangeSize;
7030 		size -= rangeSize;
7031 	}
7032 
7033 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
7034 	// synchronize multiple mappings of the same file. In our VM they never get
7035 	// out of sync, though, so we don't have to do anything.
7036 
7037 	return B_OK;
7038 }
7039 
7040 
7041 status_t
7042 _user_memory_advice(void* _address, size_t size, uint32 advice)
7043 {
7044 	addr_t address = (addr_t)_address;
7045 	if ((address % B_PAGE_SIZE) != 0)
7046 		return B_BAD_VALUE;
7047 
7048 	size = PAGE_ALIGN(size);
7049 	if (!is_user_address_range(_address, size)) {
7050 		// weird error code required by POSIX
7051 		return B_NO_MEMORY;
7052 	}
7053 
7054 	switch (advice) {
7055 		case MADV_NORMAL:
7056 		case MADV_SEQUENTIAL:
7057 		case MADV_RANDOM:
7058 		case MADV_WILLNEED:
7059 		case MADV_DONTNEED:
7060 			// TODO: Implement!
7061 			break;
7062 
7063 		case MADV_FREE:
7064 		{
7065 			AddressSpaceWriteLocker locker;
7066 			do {
7067 				status_t status = locker.SetTo(team_get_current_team_id());
7068 				if (status != B_OK)
7069 					return status;
7070 			} while (wait_if_address_range_is_wired(locker.AddressSpace(),
7071 					address, size, &locker));
7072 
7073 			discard_address_range(locker.AddressSpace(), address, size, false);
7074 			break;
7075 		}
7076 
7077 		default:
7078 			return B_BAD_VALUE;
7079 	}
7080 
7081 	return B_OK;
7082 }
7083 
7084 
7085 status_t
7086 _user_get_memory_properties(team_id teamID, const void* address,
7087 	uint32* _protected, uint32* _lock)
7088 {
7089 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
7090 		return B_BAD_ADDRESS;
7091 
7092 	AddressSpaceReadLocker locker;
7093 	status_t error = locker.SetTo(teamID);
7094 	if (error != B_OK)
7095 		return error;
7096 
7097 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
7098 	if (area == NULL)
7099 		return B_NO_MEMORY;
7100 
7101 	uint32 protection = get_area_page_protection(area, (addr_t)address);
7102 	uint32 wiring = area->wiring;
7103 
7104 	locker.Unlock();
7105 
7106 	error = user_memcpy(_protected, &protection, sizeof(protection));
7107 	if (error != B_OK)
7108 		return error;
7109 
7110 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
7111 
7112 	return error;
7113 }
7114 
7115 
7116 static status_t
7117 user_set_memory_swappable(const void* _address, size_t size, bool swappable)
7118 {
7119 #if ENABLE_SWAP_SUPPORT
7120 	// check address range
7121 	addr_t address = (addr_t)_address;
7122 	size = PAGE_ALIGN(size);
7123 
7124 	if ((address % B_PAGE_SIZE) != 0)
7125 		return EINVAL;
7126 	if (!is_user_address_range(_address, size))
7127 		return EINVAL;
7128 
7129 	const addr_t endAddress = address + size;
7130 
7131 	AddressSpaceReadLocker addressSpaceLocker;
7132 	status_t error = addressSpaceLocker.SetTo(team_get_current_team_id());
7133 	if (error != B_OK)
7134 		return error;
7135 	VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace();
7136 
7137 	// iterate through all concerned areas
7138 	addr_t nextAddress = address;
7139 	while (nextAddress != endAddress) {
7140 		// get the next area
7141 		VMArea* area = addressSpace->LookupArea(nextAddress);
7142 		if (area == NULL) {
7143 			error = B_BAD_ADDRESS;
7144 			break;
7145 		}
7146 
7147 		const addr_t areaStart = nextAddress;
7148 		const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size());
7149 		nextAddress = areaEnd;
7150 
7151 		error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
7152 		if (error != B_OK) {
7153 			// We don't need to unset or reset things on failure.
7154 			break;
7155 		}
7156 
7157 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
7158 		VMAnonymousCache* anonCache = NULL;
7159 		if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) {
7160 			// This memory will aready never be swapped. Nothing to do.
7161 		} else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) {
7162 			error = anonCache->SetCanSwapPages(areaStart - area->Base(),
7163 				areaEnd - areaStart, swappable);
7164 		} else {
7165 			// Some other cache type? We cannot affect anything here.
7166 			error = EINVAL;
7167 		}
7168 
7169 		cacheChainLocker.Unlock();
7170 
7171 		unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
7172 		if (error != B_OK)
7173 			break;
7174 	}
7175 
7176 	return error;
7177 #else
7178 	// No swap support? Nothing to do.
7179 	return B_OK;
7180 #endif
7181 }
7182 
7183 
7184 status_t
7185 _user_mlock(const void* _address, size_t size)
7186 {
7187 	return user_set_memory_swappable(_address, size, false);
7188 }
7189 
7190 
7191 status_t
7192 _user_munlock(const void* _address, size_t size)
7193 {
7194 	// TODO: B_SHARED_AREAs need to be handled a bit differently:
7195 	// if multiple clones of an area had mlock() called on them,
7196 	// munlock() must also be called on all of them to actually unlock.
7197 	// (At present, the first munlock() will unlock all.)
7198 	// TODO: fork() should automatically unlock memory in the child.
7199 	return user_set_memory_swappable(_address, size, true);
7200 }
7201 
7202 
7203 // #pragma mark -- compatibility
7204 
7205 
7206 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7207 
7208 
7209 struct physical_entry_beos {
7210 	uint32	address;
7211 	uint32	size;
7212 };
7213 
7214 
7215 /*!	The physical_entry structure has changed. We need to translate it to the
7216 	old one.
7217 */
7218 extern "C" int32
7219 __get_memory_map_beos(const void* _address, size_t numBytes,
7220 	physical_entry_beos* table, int32 numEntries)
7221 {
7222 	if (numEntries <= 0)
7223 		return B_BAD_VALUE;
7224 
7225 	const uint8* address = (const uint8*)_address;
7226 
7227 	int32 count = 0;
7228 	while (numBytes > 0 && count < numEntries) {
7229 		physical_entry entry;
7230 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
7231 		if (result < 0) {
7232 			if (result != B_BUFFER_OVERFLOW)
7233 				return result;
7234 		}
7235 
7236 		if (entry.address >= (phys_addr_t)1 << 32) {
7237 			panic("get_memory_map(): Address is greater 4 GB!");
7238 			return B_ERROR;
7239 		}
7240 
7241 		table[count].address = entry.address;
7242 		table[count++].size = entry.size;
7243 
7244 		address += entry.size;
7245 		numBytes -= entry.size;
7246 	}
7247 
7248 	// null-terminate the table, if possible
7249 	if (count < numEntries) {
7250 		table[count].address = 0;
7251 		table[count].size = 0;
7252 	}
7253 
7254 	return B_OK;
7255 }
7256 
7257 
7258 /*!	The type of the \a physicalAddress parameter has changed from void* to
7259 	phys_addr_t.
7260 */
7261 extern "C" area_id
7262 __map_physical_memory_beos(const char* name, void* physicalAddress,
7263 	size_t numBytes, uint32 addressSpec, uint32 protection,
7264 	void** _virtualAddress)
7265 {
7266 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
7267 		addressSpec, protection, _virtualAddress);
7268 }
7269 
7270 
7271 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
7272 	we meddle with the \a lock parameter to force 32 bit.
7273 */
7274 extern "C" area_id
7275 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
7276 	size_t size, uint32 lock, uint32 protection)
7277 {
7278 	switch (lock) {
7279 		case B_NO_LOCK:
7280 			break;
7281 		case B_FULL_LOCK:
7282 		case B_LAZY_LOCK:
7283 			lock = B_32_BIT_FULL_LOCK;
7284 			break;
7285 		case B_CONTIGUOUS:
7286 			lock = B_32_BIT_CONTIGUOUS;
7287 			break;
7288 	}
7289 
7290 	return __create_area_haiku(name, _address, addressSpec, size, lock,
7291 		protection);
7292 }
7293 
7294 
7295 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
7296 	"BASE");
7297 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
7298 	"map_physical_memory@", "BASE");
7299 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
7300 	"BASE");
7301 
7302 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7303 	"get_memory_map@@", "1_ALPHA3");
7304 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7305 	"map_physical_memory@@", "1_ALPHA3");
7306 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7307 	"1_ALPHA3");
7308 
7309 
7310 #else
7311 
7312 
7313 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7314 	"get_memory_map@@", "BASE");
7315 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7316 	"map_physical_memory@@", "BASE");
7317 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7318 	"BASE");
7319 
7320 
7321 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7322