xref: /haiku/src/system/kernel/vm/vm.cpp (revision 4c07199d8201fcf267e90be0d24b76799d03cea6)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleterDrivers.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <util/BitUtils.h>
51 #include <util/ThreadAutoLock.h>
52 #include <vm/vm_page.h>
53 #include <vm/vm_priv.h>
54 #include <vm/VMAddressSpace.h>
55 #include <vm/VMArea.h>
56 #include <vm/VMCache.h>
57 
58 #include "VMAddressSpaceLocking.h"
59 #include "VMAnonymousCache.h"
60 #include "VMAnonymousNoSwapCache.h"
61 #include "IORequest.h"
62 
63 
64 //#define TRACE_VM
65 //#define TRACE_FAULTS
66 #ifdef TRACE_VM
67 #	define TRACE(x) dprintf x
68 #else
69 #	define TRACE(x) ;
70 #endif
71 #ifdef TRACE_FAULTS
72 #	define FTRACE(x) dprintf x
73 #else
74 #	define FTRACE(x) ;
75 #endif
76 
77 
78 namespace {
79 
80 class AreaCacheLocking {
81 public:
82 	inline bool Lock(VMCache* lockable)
83 	{
84 		return false;
85 	}
86 
87 	inline void Unlock(VMCache* lockable)
88 	{
89 		vm_area_put_locked_cache(lockable);
90 	}
91 };
92 
93 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
94 public:
95 	inline AreaCacheLocker(VMCache* cache = NULL)
96 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
97 	{
98 	}
99 
100 	inline AreaCacheLocker(VMArea* area)
101 		: AutoLocker<VMCache, AreaCacheLocking>()
102 	{
103 		SetTo(area);
104 	}
105 
106 	inline void SetTo(VMCache* cache, bool alreadyLocked)
107 	{
108 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
109 	}
110 
111 	inline void SetTo(VMArea* area)
112 	{
113 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
114 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
115 	}
116 };
117 
118 
119 class VMCacheChainLocker {
120 public:
121 	VMCacheChainLocker()
122 		:
123 		fTopCache(NULL),
124 		fBottomCache(NULL)
125 	{
126 	}
127 
128 	VMCacheChainLocker(VMCache* topCache)
129 		:
130 		fTopCache(topCache),
131 		fBottomCache(topCache)
132 	{
133 	}
134 
135 	~VMCacheChainLocker()
136 	{
137 		Unlock();
138 	}
139 
140 	void SetTo(VMCache* topCache)
141 	{
142 		fTopCache = topCache;
143 		fBottomCache = topCache;
144 
145 		if (topCache != NULL)
146 			topCache->SetUserData(NULL);
147 	}
148 
149 	VMCache* LockSourceCache()
150 	{
151 		if (fBottomCache == NULL || fBottomCache->source == NULL)
152 			return NULL;
153 
154 		VMCache* previousCache = fBottomCache;
155 
156 		fBottomCache = fBottomCache->source;
157 		fBottomCache->Lock();
158 		fBottomCache->AcquireRefLocked();
159 		fBottomCache->SetUserData(previousCache);
160 
161 		return fBottomCache;
162 	}
163 
164 	void LockAllSourceCaches()
165 	{
166 		while (LockSourceCache() != NULL) {
167 		}
168 	}
169 
170 	void Unlock(VMCache* exceptCache = NULL)
171 	{
172 		if (fTopCache == NULL)
173 			return;
174 
175 		// Unlock caches in source -> consumer direction. This is important to
176 		// avoid double-locking and a reversal of locking order in case a cache
177 		// is eligable for merging.
178 		VMCache* cache = fBottomCache;
179 		while (cache != NULL) {
180 			VMCache* nextCache = (VMCache*)cache->UserData();
181 			if (cache != exceptCache)
182 				cache->ReleaseRefAndUnlock(cache != fTopCache);
183 
184 			if (cache == fTopCache)
185 				break;
186 
187 			cache = nextCache;
188 		}
189 
190 		fTopCache = NULL;
191 		fBottomCache = NULL;
192 	}
193 
194 	void UnlockKeepRefs(bool keepTopCacheLocked)
195 	{
196 		if (fTopCache == NULL)
197 			return;
198 
199 		VMCache* nextCache = fBottomCache;
200 		VMCache* cache = NULL;
201 
202 		while (keepTopCacheLocked
203 				? nextCache != fTopCache : cache != fTopCache) {
204 			cache = nextCache;
205 			nextCache = (VMCache*)cache->UserData();
206 			cache->Unlock(cache != fTopCache);
207 		}
208 	}
209 
210 	void RelockCaches(bool topCacheLocked)
211 	{
212 		if (fTopCache == NULL)
213 			return;
214 
215 		VMCache* nextCache = fTopCache;
216 		VMCache* cache = NULL;
217 		if (topCacheLocked) {
218 			cache = nextCache;
219 			nextCache = cache->source;
220 		}
221 
222 		while (cache != fBottomCache && nextCache != NULL) {
223 			VMCache* consumer = cache;
224 			cache = nextCache;
225 			nextCache = cache->source;
226 			cache->Lock();
227 			cache->SetUserData(consumer);
228 		}
229 	}
230 
231 private:
232 	VMCache*	fTopCache;
233 	VMCache*	fBottomCache;
234 };
235 
236 } // namespace
237 
238 
239 // The memory reserve an allocation of the certain priority must not touch.
240 static const size_t kMemoryReserveForPriority[] = {
241 	VM_MEMORY_RESERVE_USER,		// user
242 	VM_MEMORY_RESERVE_SYSTEM,	// system
243 	0							// VIP
244 };
245 
246 
247 ObjectCache* gPageMappingsObjectCache;
248 
249 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
250 
251 static off_t sAvailableMemory;
252 static off_t sNeededMemory;
253 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
254 static uint32 sPageFaults;
255 
256 static VMPhysicalPageMapper* sPhysicalPageMapper;
257 
258 #if DEBUG_CACHE_LIST
259 
260 struct cache_info {
261 	VMCache*	cache;
262 	addr_t		page_count;
263 	addr_t		committed;
264 };
265 
266 static const int kCacheInfoTableCount = 100 * 1024;
267 static cache_info* sCacheInfoTable;
268 
269 #endif	// DEBUG_CACHE_LIST
270 
271 
272 // function declarations
273 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
274 	bool addressSpaceCleanup);
275 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
276 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
277 static status_t map_backing_store(VMAddressSpace* addressSpace,
278 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
279 	int protection, int protectionMax, int mapping, uint32 flags,
280 	const virtual_address_restrictions* addressRestrictions, bool kernel,
281 	VMArea** _area, void** _virtualAddress);
282 static void fix_protection(uint32* protection);
283 
284 
285 //	#pragma mark -
286 
287 
288 #if VM_PAGE_FAULT_TRACING
289 
290 namespace VMPageFaultTracing {
291 
292 class PageFaultStart : public AbstractTraceEntry {
293 public:
294 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
295 		:
296 		fAddress(address),
297 		fPC(pc),
298 		fWrite(write),
299 		fUser(user)
300 	{
301 		Initialized();
302 	}
303 
304 	virtual void AddDump(TraceOutput& out)
305 	{
306 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
307 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
308 	}
309 
310 private:
311 	addr_t	fAddress;
312 	addr_t	fPC;
313 	bool	fWrite;
314 	bool	fUser;
315 };
316 
317 
318 // page fault errors
319 enum {
320 	PAGE_FAULT_ERROR_NO_AREA		= 0,
321 	PAGE_FAULT_ERROR_KERNEL_ONLY,
322 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
323 	PAGE_FAULT_ERROR_READ_PROTECTED,
324 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
325 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
326 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
327 };
328 
329 
330 class PageFaultError : public AbstractTraceEntry {
331 public:
332 	PageFaultError(area_id area, status_t error)
333 		:
334 		fArea(area),
335 		fError(error)
336 	{
337 		Initialized();
338 	}
339 
340 	virtual void AddDump(TraceOutput& out)
341 	{
342 		switch (fError) {
343 			case PAGE_FAULT_ERROR_NO_AREA:
344 				out.Print("page fault error: no area");
345 				break;
346 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
347 				out.Print("page fault error: area: %ld, kernel only", fArea);
348 				break;
349 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
350 				out.Print("page fault error: area: %ld, write protected",
351 					fArea);
352 				break;
353 			case PAGE_FAULT_ERROR_READ_PROTECTED:
354 				out.Print("page fault error: area: %ld, read protected", fArea);
355 				break;
356 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
357 				out.Print("page fault error: area: %ld, execute protected",
358 					fArea);
359 				break;
360 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
361 				out.Print("page fault error: kernel touching bad user memory");
362 				break;
363 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
364 				out.Print("page fault error: no address space");
365 				break;
366 			default:
367 				out.Print("page fault error: area: %ld, error: %s", fArea,
368 					strerror(fError));
369 				break;
370 		}
371 	}
372 
373 private:
374 	area_id		fArea;
375 	status_t	fError;
376 };
377 
378 
379 class PageFaultDone : public AbstractTraceEntry {
380 public:
381 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
382 			vm_page* page)
383 		:
384 		fArea(area),
385 		fTopCache(topCache),
386 		fCache(cache),
387 		fPage(page)
388 	{
389 		Initialized();
390 	}
391 
392 	virtual void AddDump(TraceOutput& out)
393 	{
394 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
395 			"page: %p", fArea, fTopCache, fCache, fPage);
396 	}
397 
398 private:
399 	area_id		fArea;
400 	VMCache*	fTopCache;
401 	VMCache*	fCache;
402 	vm_page*	fPage;
403 };
404 
405 }	// namespace VMPageFaultTracing
406 
407 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
408 #else
409 #	define TPF(x) ;
410 #endif	// VM_PAGE_FAULT_TRACING
411 
412 
413 //	#pragma mark -
414 
415 
416 /*!	The page's cache must be locked.
417 */
418 static inline void
419 increment_page_wired_count(vm_page* page)
420 {
421 	if (!page->IsMapped())
422 		atomic_add(&gMappedPagesCount, 1);
423 	page->IncrementWiredCount();
424 }
425 
426 
427 /*!	The page's cache must be locked.
428 */
429 static inline void
430 decrement_page_wired_count(vm_page* page)
431 {
432 	page->DecrementWiredCount();
433 	if (!page->IsMapped())
434 		atomic_add(&gMappedPagesCount, -1);
435 }
436 
437 
438 static inline addr_t
439 virtual_page_address(VMArea* area, vm_page* page)
440 {
441 	return area->Base()
442 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
443 }
444 
445 
446 static inline bool
447 is_page_in_area(VMArea* area, vm_page* page)
448 {
449 	off_t pageCacheOffsetBytes = (off_t)(page->cache_offset << PAGE_SHIFT);
450 	return pageCacheOffsetBytes >= area->cache_offset
451 		&& pageCacheOffsetBytes < area->cache_offset + (off_t)area->Size();
452 }
453 
454 
455 //! You need to have the address space locked when calling this function
456 static VMArea*
457 lookup_area(VMAddressSpace* addressSpace, area_id id)
458 {
459 	VMAreas::ReadLock();
460 
461 	VMArea* area = VMAreas::LookupLocked(id);
462 	if (area != NULL && area->address_space != addressSpace)
463 		area = NULL;
464 
465 	VMAreas::ReadUnlock();
466 
467 	return area;
468 }
469 
470 
471 static inline size_t
472 area_page_protections_size(size_t areaSize)
473 {
474 	// In the page protections we store only the three user protections,
475 	// so we use 4 bits per page.
476 	return (areaSize / B_PAGE_SIZE + 1) / 2;
477 }
478 
479 
480 static status_t
481 allocate_area_page_protections(VMArea* area)
482 {
483 	size_t bytes = area_page_protections_size(area->Size());
484 	area->page_protections = (uint8*)malloc_etc(bytes,
485 		area->address_space == VMAddressSpace::Kernel()
486 			? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
487 	if (area->page_protections == NULL)
488 		return B_NO_MEMORY;
489 
490 	// init the page protections for all pages to that of the area
491 	uint32 areaProtection = area->protection
492 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
493 	memset(area->page_protections, areaProtection | (areaProtection << 4),
494 		bytes);
495 	return B_OK;
496 }
497 
498 
499 static inline void
500 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
501 {
502 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
503 	addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
504 	uint8& entry = area->page_protections[pageIndex / 2];
505 	if (pageIndex % 2 == 0)
506 		entry = (entry & 0xf0) | protection;
507 	else
508 		entry = (entry & 0x0f) | (protection << 4);
509 }
510 
511 
512 static inline uint32
513 get_area_page_protection(VMArea* area, addr_t pageAddress)
514 {
515 	if (area->page_protections == NULL)
516 		return area->protection;
517 
518 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
519 	uint32 protection = area->page_protections[pageIndex / 2];
520 	if (pageIndex % 2 == 0)
521 		protection &= 0x0f;
522 	else
523 		protection >>= 4;
524 
525 	uint32 kernelProtection = 0;
526 	if ((protection & B_READ_AREA) != 0)
527 		kernelProtection |= B_KERNEL_READ_AREA;
528 	if ((protection & B_WRITE_AREA) != 0)
529 		kernelProtection |= B_KERNEL_WRITE_AREA;
530 
531 	// If this is a kernel area we return only the kernel flags.
532 	if (area->address_space == VMAddressSpace::Kernel())
533 		return kernelProtection;
534 
535 	return protection | kernelProtection;
536 }
537 
538 
539 static inline uint8*
540 realloc_page_protections(uint8* pageProtections, size_t areaSize,
541 	uint32 allocationFlags)
542 {
543 	size_t bytes = area_page_protections_size(areaSize);
544 	return (uint8*)realloc_etc(pageProtections, bytes, allocationFlags);
545 }
546 
547 
548 /*!	The caller must have reserved enough pages the translation map
549 	implementation might need to map this page.
550 	The page's cache must be locked.
551 */
552 static status_t
553 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
554 	vm_page_reservation* reservation)
555 {
556 	VMTranslationMap* map = area->address_space->TranslationMap();
557 
558 	bool wasMapped = page->IsMapped();
559 
560 	if (area->wiring == B_NO_LOCK) {
561 		DEBUG_PAGE_ACCESS_CHECK(page);
562 
563 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
564 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
565 			gPageMappingsObjectCache,
566 			CACHE_DONT_WAIT_FOR_MEMORY
567 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
568 		if (mapping == NULL)
569 			return B_NO_MEMORY;
570 
571 		mapping->page = page;
572 		mapping->area = area;
573 
574 		map->Lock();
575 
576 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
577 			area->MemoryType(), reservation);
578 
579 		// insert mapping into lists
580 		if (!page->IsMapped())
581 			atomic_add(&gMappedPagesCount, 1);
582 
583 		page->mappings.Add(mapping);
584 		area->mappings.Add(mapping);
585 
586 		map->Unlock();
587 	} else {
588 		DEBUG_PAGE_ACCESS_CHECK(page);
589 
590 		map->Lock();
591 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
592 			area->MemoryType(), reservation);
593 		map->Unlock();
594 
595 		increment_page_wired_count(page);
596 	}
597 
598 	if (!wasMapped) {
599 		// The page is mapped now, so we must not remain in the cached queue.
600 		// It also makes sense to move it from the inactive to the active, since
601 		// otherwise the page daemon wouldn't come to keep track of it (in idle
602 		// mode) -- if the page isn't touched, it will be deactivated after a
603 		// full iteration through the queue at the latest.
604 		if (page->State() == PAGE_STATE_CACHED
605 				|| page->State() == PAGE_STATE_INACTIVE) {
606 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
607 		}
608 	}
609 
610 	return B_OK;
611 }
612 
613 
614 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
615 	page's cache.
616 */
617 static inline bool
618 unmap_page(VMArea* area, addr_t virtualAddress)
619 {
620 	return area->address_space->TranslationMap()->UnmapPage(area,
621 		virtualAddress, true);
622 }
623 
624 
625 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
626 	mapped pages' caches.
627 */
628 static inline void
629 unmap_pages(VMArea* area, addr_t base, size_t size)
630 {
631 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
632 }
633 
634 
635 static inline bool
636 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
637 {
638 	if (address < area->Base()) {
639 		offset = area->Base() - address;
640 		if (offset >= size)
641 			return false;
642 
643 		address = area->Base();
644 		size -= offset;
645 		offset = 0;
646 		if (size > area->Size())
647 			size = area->Size();
648 
649 		return true;
650 	}
651 
652 	offset = address - area->Base();
653 	if (offset >= area->Size())
654 		return false;
655 
656 	if (size >= area->Size() - offset)
657 		size = area->Size() - offset;
658 
659 	return true;
660 }
661 
662 
663 /*!	Cuts a piece out of an area. If the given cut range covers the complete
664 	area, it is deleted. If it covers the beginning or the end, the area is
665 	resized accordingly. If the range covers some part in the middle of the
666 	area, it is split in two; in this case the second area is returned via
667 	\a _secondArea (the variable is left untouched in the other cases).
668 	The address space must be write locked.
669 	The caller must ensure that no part of the given range is wired.
670 */
671 static status_t
672 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
673 	addr_t size, VMArea** _secondArea, bool kernel)
674 {
675 	addr_t offset;
676 	if (!intersect_area(area, address, size, offset))
677 		return B_OK;
678 
679 	// Is the area fully covered?
680 	if (address == area->Base() && size == area->Size()) {
681 		delete_area(addressSpace, area, false);
682 		return B_OK;
683 	}
684 
685 	int priority;
686 	uint32 allocationFlags;
687 	if (addressSpace == VMAddressSpace::Kernel()) {
688 		priority = VM_PRIORITY_SYSTEM;
689 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
690 			| HEAP_DONT_LOCK_KERNEL_SPACE;
691 	} else {
692 		priority = VM_PRIORITY_USER;
693 		allocationFlags = 0;
694 	}
695 
696 	VMCache* cache = vm_area_get_locked_cache(area);
697 	VMCacheChainLocker cacheChainLocker(cache);
698 	cacheChainLocker.LockAllSourceCaches();
699 
700 	// If no one else uses the area's cache and it's an anonymous cache, we can
701 	// resize or split it, too.
702 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
703 		&& cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM;
704 
705 	const addr_t oldSize = area->Size();
706 
707 	// Cut the end only?
708 	if (offset > 0 && size == area->Size() - offset) {
709 		status_t error = addressSpace->ShrinkAreaTail(area, offset,
710 			allocationFlags);
711 		if (error != B_OK)
712 			return error;
713 
714 		if (area->page_protections != NULL) {
715 			uint8* newProtections = realloc_page_protections(
716 				area->page_protections, area->Size(), allocationFlags);
717 
718 			if (newProtections == NULL) {
719 				addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
720 				return B_NO_MEMORY;
721 			}
722 
723 			area->page_protections = newProtections;
724 		}
725 
726 		// unmap pages
727 		unmap_pages(area, address, size);
728 
729 		if (onlyCacheUser) {
730 			// Since VMCache::Resize() can temporarily drop the lock, we must
731 			// unlock all lower caches to prevent locking order inversion.
732 			cacheChainLocker.Unlock(cache);
733 			cache->Resize(cache->virtual_base + offset, priority);
734 			cache->ReleaseRefAndUnlock();
735 		}
736 
737 		return B_OK;
738 	}
739 
740 	// Cut the beginning only?
741 	if (area->Base() == address) {
742 		uint8* newProtections = NULL;
743 		if (area->page_protections != NULL) {
744 			// Allocate all memory before shifting as the shift might lose some
745 			// bits.
746 			newProtections = realloc_page_protections(NULL, area->Size(),
747 				allocationFlags);
748 
749 			if (newProtections == NULL)
750 				return B_NO_MEMORY;
751 		}
752 
753 		// resize the area
754 		status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
755 			allocationFlags);
756 		if (error != B_OK) {
757 			if (newProtections != NULL)
758 				free_etc(newProtections, allocationFlags);
759 			return error;
760 		}
761 
762 		if (area->page_protections != NULL) {
763 			size_t oldBytes = area_page_protections_size(oldSize);
764 			ssize_t pagesShifted = (oldSize - area->Size()) / B_PAGE_SIZE;
765 			bitmap_shift<uint8>(area->page_protections, oldBytes * 8, -(pagesShifted * 4));
766 
767 			size_t bytes = area_page_protections_size(area->Size());
768 			memcpy(newProtections, area->page_protections, bytes);
769 			free_etc(area->page_protections, allocationFlags);
770 			area->page_protections = newProtections;
771 		}
772 
773 		// unmap pages
774 		unmap_pages(area, address, size);
775 
776 		if (onlyCacheUser) {
777 			// Since VMCache::Rebase() can temporarily drop the lock, we must
778 			// unlock all lower caches to prevent locking order inversion.
779 			cacheChainLocker.Unlock(cache);
780 			cache->Rebase(cache->virtual_base + size, priority);
781 			cache->ReleaseRefAndUnlock();
782 		}
783 		area->cache_offset += size;
784 
785 		return B_OK;
786 	}
787 
788 	// The tough part -- cut a piece out of the middle of the area.
789 	// We do that by shrinking the area to the begin section and creating a
790 	// new area for the end section.
791 	addr_t firstNewSize = offset;
792 	addr_t secondBase = address + size;
793 	addr_t secondSize = area->Size() - offset - size;
794 
795 	// unmap pages
796 	unmap_pages(area, address, area->Size() - firstNewSize);
797 
798 	// resize the area
799 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
800 		allocationFlags);
801 	if (error != B_OK)
802 		return error;
803 
804 	uint8* areaNewProtections = NULL;
805 	uint8* secondAreaNewProtections = NULL;
806 
807 	// Try to allocate the new memory before making some hard to reverse
808 	// changes.
809 	if (area->page_protections != NULL) {
810 		areaNewProtections = realloc_page_protections(NULL, area->Size(),
811 			allocationFlags);
812 		secondAreaNewProtections = realloc_page_protections(NULL, secondSize,
813 			allocationFlags);
814 
815 		if (areaNewProtections == NULL || secondAreaNewProtections == NULL) {
816 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
817 			free_etc(areaNewProtections, allocationFlags);
818 			free_etc(secondAreaNewProtections, allocationFlags);
819 			return B_NO_MEMORY;
820 		}
821 	}
822 
823 	virtual_address_restrictions addressRestrictions = {};
824 	addressRestrictions.address = (void*)secondBase;
825 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
826 	VMArea* secondArea;
827 
828 	if (onlyCacheUser) {
829 		// Create a new cache for the second area.
830 		VMCache* secondCache;
831 		error = VMCacheFactory::CreateAnonymousCache(secondCache,
832 			area->protection & B_OVERCOMMITTING_AREA, 0, 0,
833 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
834 		if (error != B_OK) {
835 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
836 			free_etc(areaNewProtections, allocationFlags);
837 			free_etc(secondAreaNewProtections, allocationFlags);
838 			return error;
839 		}
840 
841 		secondCache->Lock();
842 		secondCache->temporary = cache->temporary;
843 		secondCache->virtual_base = area->cache_offset;
844 		secondCache->virtual_end = area->cache_offset + secondSize;
845 
846 		// Transfer the concerned pages from the first cache.
847 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
848 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
849 			area->cache_offset);
850 
851 		if (error == B_OK) {
852 			// Since VMCache::Resize() can temporarily drop the lock, we must
853 			// unlock all lower caches to prevent locking order inversion.
854 			cacheChainLocker.Unlock(cache);
855 			cache->Resize(cache->virtual_base + firstNewSize, priority);
856 			// Don't unlock the cache yet because we might have to resize it
857 			// back.
858 
859 			// Map the second area.
860 			error = map_backing_store(addressSpace, secondCache,
861 				area->cache_offset, area->name, secondSize, area->wiring,
862 				area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0,
863 				&addressRestrictions, kernel, &secondArea, NULL);
864 		}
865 
866 		if (error != B_OK) {
867 			// Restore the original cache.
868 			cache->Resize(cache->virtual_base + oldSize, priority);
869 
870 			// Move the pages back.
871 			status_t readoptStatus = cache->Adopt(secondCache,
872 				area->cache_offset, secondSize, adoptOffset);
873 			if (readoptStatus != B_OK) {
874 				// Some (swap) pages have not been moved back and will be lost
875 				// once the second cache is deleted.
876 				panic("failed to restore cache range: %s",
877 					strerror(readoptStatus));
878 
879 				// TODO: Handle out of memory cases by freeing memory and
880 				// retrying.
881 			}
882 
883 			cache->ReleaseRefAndUnlock();
884 			secondCache->ReleaseRefAndUnlock();
885 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
886 			free_etc(areaNewProtections, allocationFlags);
887 			free_etc(secondAreaNewProtections, allocationFlags);
888 			return error;
889 		}
890 
891 		// Now we can unlock it.
892 		cache->ReleaseRefAndUnlock();
893 		secondCache->Unlock();
894 	} else {
895 		error = map_backing_store(addressSpace, cache, area->cache_offset
896 			+ (secondBase - area->Base()),
897 			area->name, secondSize, area->wiring, area->protection,
898 			area->protection_max, REGION_NO_PRIVATE_MAP, 0,
899 			&addressRestrictions, kernel, &secondArea, NULL);
900 		if (error != B_OK) {
901 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
902 			free_etc(areaNewProtections, allocationFlags);
903 			free_etc(secondAreaNewProtections, allocationFlags);
904 			return error;
905 		}
906 		// We need a cache reference for the new area.
907 		cache->AcquireRefLocked();
908 	}
909 
910 	if (area->page_protections != NULL) {
911 		// Copy the protection bits of the first area.
912 		size_t areaBytes = area_page_protections_size(area->Size());
913 		memcpy(areaNewProtections, area->page_protections, areaBytes);
914 		uint8* areaOldProtections = area->page_protections;
915 		area->page_protections = areaNewProtections;
916 
917 		// Shift the protection bits of the second area to the start of
918 		// the old array.
919 		size_t oldBytes = area_page_protections_size(oldSize);
920 		addr_t secondAreaOffset = secondBase - area->Base();
921 		ssize_t secondAreaPagesShifted = secondAreaOffset / B_PAGE_SIZE;
922 		bitmap_shift<uint8>(areaOldProtections, oldBytes * 8, -(secondAreaPagesShifted * 4));
923 
924 		// Copy the protection bits of the second area.
925 		size_t secondAreaBytes = area_page_protections_size(secondSize);
926 		memcpy(secondAreaNewProtections, areaOldProtections, secondAreaBytes);
927 		secondArea->page_protections = secondAreaNewProtections;
928 
929 		// We don't need this anymore.
930 		free_etc(areaOldProtections, allocationFlags);
931 
932 		// Set the correct page protections for the second area.
933 		VMTranslationMap* map = addressSpace->TranslationMap();
934 		map->Lock();
935 		for (VMCachePagesTree::Iterator it
936 				= secondArea->cache->pages.GetIterator();
937 				vm_page* page = it.Next();) {
938 			if (is_page_in_area(secondArea, page)) {
939 				addr_t address = virtual_page_address(secondArea, page);
940 				uint32 pageProtection
941 					= get_area_page_protection(secondArea, address);
942 				map->ProtectPage(secondArea, address, pageProtection);
943 			}
944 		}
945 		map->Unlock();
946 	}
947 
948 	if (_secondArea != NULL)
949 		*_secondArea = secondArea;
950 
951 	return B_OK;
952 }
953 
954 
955 /*!	Deletes or cuts all areas in the given address range.
956 	The address space must be write-locked.
957 	The caller must ensure that no part of the given range is wired.
958 */
959 static status_t
960 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
961 	bool kernel)
962 {
963 	size = PAGE_ALIGN(size);
964 
965 	// Check, whether the caller is allowed to modify the concerned areas.
966 	if (!kernel) {
967 		for (VMAddressSpace::AreaRangeIterator it
968 				= addressSpace->GetAreaRangeIterator(address, size);
969 			VMArea* area = it.Next();) {
970 
971 			if ((area->protection & B_KERNEL_AREA) != 0) {
972 				dprintf("unmap_address_range: team %" B_PRId32 " tried to "
973 					"unmap range of kernel area %" B_PRId32 " (%s)\n",
974 					team_get_current_team_id(), area->id, area->name);
975 				return B_NOT_ALLOWED;
976 			}
977 		}
978 	}
979 
980 	for (VMAddressSpace::AreaRangeIterator it
981 			= addressSpace->GetAreaRangeIterator(address, size);
982 		VMArea* area = it.Next();) {
983 
984 		status_t error = cut_area(addressSpace, area, address, size, NULL,
985 			kernel);
986 		if (error != B_OK)
987 			return error;
988 			// Failing after already messing with areas is ugly, but we
989 			// can't do anything about it.
990 	}
991 
992 	return B_OK;
993 }
994 
995 
996 static status_t
997 discard_area_range(VMArea* area, addr_t address, addr_t size)
998 {
999 	addr_t offset;
1000 	if (!intersect_area(area, address, size, offset))
1001 		return B_OK;
1002 
1003 	// If someone else uses the area's cache or it's not an anonymous cache, we
1004 	// can't discard.
1005 	VMCache* cache = vm_area_get_locked_cache(area);
1006 	if (cache->areas != area || area->cache_next != NULL
1007 		|| !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) {
1008 		return B_OK;
1009 	}
1010 
1011 	VMCacheChainLocker cacheChainLocker(cache);
1012 	cacheChainLocker.LockAllSourceCaches();
1013 
1014 	unmap_pages(area, address, size);
1015 
1016 	// Since VMCache::Discard() can temporarily drop the lock, we must
1017 	// unlock all lower caches to prevent locking order inversion.
1018 	cacheChainLocker.Unlock(cache);
1019 	cache->Discard(cache->virtual_base + offset, size);
1020 	cache->ReleaseRefAndUnlock();
1021 
1022 	return B_OK;
1023 }
1024 
1025 
1026 static status_t
1027 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
1028 	bool kernel)
1029 {
1030 	for (VMAddressSpace::AreaRangeIterator it
1031 		= addressSpace->GetAreaRangeIterator(address, size);
1032 			VMArea* area = it.Next();) {
1033 		status_t error = discard_area_range(area, address, size);
1034 		if (error != B_OK)
1035 			return error;
1036 	}
1037 
1038 	return B_OK;
1039 }
1040 
1041 
1042 /*! You need to hold the lock of the cache and the write lock of the address
1043 	space when calling this function.
1044 	Note, that in case of error your cache will be temporarily unlocked.
1045 	If \a addressSpec is \c B_EXACT_ADDRESS and the
1046 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
1047 	that no part of the specified address range (base \c *_virtualAddress, size
1048 	\a size) is wired. The cache will also be temporarily unlocked.
1049 */
1050 static status_t
1051 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
1052 	const char* areaName, addr_t size, int wiring, int protection,
1053 	int protectionMax, int mapping,
1054 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
1055 	bool kernel, VMArea** _area, void** _virtualAddress)
1056 {
1057 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
1058 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
1059 		", protection %d, protectionMax %d, area %p, areaName '%s'\n",
1060 		addressSpace, cache, addressRestrictions->address, offset, size,
1061 		addressRestrictions->address_specification, wiring, protection,
1062 		protectionMax, _area, areaName));
1063 	cache->AssertLocked();
1064 
1065 	if (size == 0) {
1066 #if KDEBUG
1067 		panic("map_backing_store(): called with size=0 for area '%s'!",
1068 			areaName);
1069 #endif
1070 		return B_BAD_VALUE;
1071 	}
1072 	if (offset < 0)
1073 		return B_BAD_VALUE;
1074 
1075 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
1076 		| HEAP_DONT_LOCK_KERNEL_SPACE;
1077 	int priority;
1078 	if (addressSpace != VMAddressSpace::Kernel()) {
1079 		priority = VM_PRIORITY_USER;
1080 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
1081 		priority = VM_PRIORITY_VIP;
1082 		allocationFlags |= HEAP_PRIORITY_VIP;
1083 	} else
1084 		priority = VM_PRIORITY_SYSTEM;
1085 
1086 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
1087 		allocationFlags);
1088 	if (mapping != REGION_PRIVATE_MAP)
1089 		area->protection_max = protectionMax & B_USER_PROTECTION;
1090 	if (area == NULL)
1091 		return B_NO_MEMORY;
1092 
1093 	status_t status;
1094 
1095 	// if this is a private map, we need to create a new cache
1096 	// to handle the private copies of pages as they are written to
1097 	VMCache* sourceCache = cache;
1098 	if (mapping == REGION_PRIVATE_MAP) {
1099 		VMCache* newCache;
1100 
1101 		// create an anonymous cache
1102 		status = VMCacheFactory::CreateAnonymousCache(newCache,
1103 			(protection & B_STACK_AREA) != 0
1104 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
1105 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
1106 		if (status != B_OK)
1107 			goto err1;
1108 
1109 		newCache->Lock();
1110 		newCache->temporary = 1;
1111 		newCache->virtual_base = offset;
1112 		newCache->virtual_end = offset + size;
1113 
1114 		cache->AddConsumer(newCache);
1115 
1116 		cache = newCache;
1117 	}
1118 
1119 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
1120 		status = cache->SetMinimalCommitment(size, priority);
1121 		if (status != B_OK)
1122 			goto err2;
1123 	}
1124 
1125 	// check to see if this address space has entered DELETE state
1126 	if (addressSpace->IsBeingDeleted()) {
1127 		// okay, someone is trying to delete this address space now, so we can't
1128 		// insert the area, so back out
1129 		status = B_BAD_TEAM_ID;
1130 		goto err2;
1131 	}
1132 
1133 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
1134 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
1135 		// temporarily unlock the current cache since it might be mapped to
1136 		// some existing area, and unmap_address_range also needs to lock that
1137 		// cache to delete the area.
1138 		cache->Unlock();
1139 		status = unmap_address_range(addressSpace,
1140 			(addr_t)addressRestrictions->address, size, kernel);
1141 		cache->Lock();
1142 		if (status != B_OK)
1143 			goto err2;
1144 	}
1145 
1146 	status = addressSpace->InsertArea(area, size, addressRestrictions,
1147 		allocationFlags, _virtualAddress);
1148 	if (status == B_NO_MEMORY
1149 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
1150 		// Due to how many locks are held, we cannot wait here for space to be
1151 		// freed up, but we can at least notify the low_resource handler.
1152 		low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0);
1153 	}
1154 	if (status != B_OK)
1155 		goto err2;
1156 
1157 	// attach the cache to the area
1158 	area->cache = cache;
1159 	area->cache_offset = offset;
1160 
1161 	// point the cache back to the area
1162 	cache->InsertAreaLocked(area);
1163 	if (mapping == REGION_PRIVATE_MAP)
1164 		cache->Unlock();
1165 
1166 	// insert the area in the global areas map
1167 	VMAreas::Insert(area);
1168 
1169 	// grab a ref to the address space (the area holds this)
1170 	addressSpace->Get();
1171 
1172 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1173 //		cache, sourceCache, areaName, area);
1174 
1175 	*_area = area;
1176 	return B_OK;
1177 
1178 err2:
1179 	if (mapping == REGION_PRIVATE_MAP) {
1180 		// We created this cache, so we must delete it again. Note, that we
1181 		// need to temporarily unlock the source cache or we'll otherwise
1182 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1183 		sourceCache->Unlock();
1184 		cache->ReleaseRefAndUnlock();
1185 		sourceCache->Lock();
1186 	}
1187 err1:
1188 	addressSpace->DeleteArea(area, allocationFlags);
1189 	return status;
1190 }
1191 
1192 
1193 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1194 	  locker1, locker2).
1195 */
1196 template<typename LockerType1, typename LockerType2>
1197 static inline bool
1198 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1199 {
1200 	area->cache->AssertLocked();
1201 
1202 	VMAreaUnwiredWaiter waiter;
1203 	if (!area->AddWaiterIfWired(&waiter))
1204 		return false;
1205 
1206 	// unlock everything and wait
1207 	if (locker1 != NULL)
1208 		locker1->Unlock();
1209 	if (locker2 != NULL)
1210 		locker2->Unlock();
1211 
1212 	waiter.waitEntry.Wait();
1213 
1214 	return true;
1215 }
1216 
1217 
1218 /*!	Checks whether the given area has any wired ranges intersecting with the
1219 	specified range and waits, if so.
1220 
1221 	When it has to wait, the function calls \c Unlock() on both \a locker1
1222 	and \a locker2, if given.
1223 	The area's top cache must be locked and must be unlocked as a side effect
1224 	of calling \c Unlock() on either \a locker1 or \a locker2.
1225 
1226 	If the function does not have to wait it does not modify or unlock any
1227 	object.
1228 
1229 	\param area The area to be checked.
1230 	\param base The base address of the range to check.
1231 	\param size The size of the address range to check.
1232 	\param locker1 An object to be unlocked when before starting to wait (may
1233 		be \c NULL).
1234 	\param locker2 An object to be unlocked when before starting to wait (may
1235 		be \c NULL).
1236 	\return \c true, if the function had to wait, \c false otherwise.
1237 */
1238 template<typename LockerType1, typename LockerType2>
1239 static inline bool
1240 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1241 	LockerType1* locker1, LockerType2* locker2)
1242 {
1243 	area->cache->AssertLocked();
1244 
1245 	VMAreaUnwiredWaiter waiter;
1246 	if (!area->AddWaiterIfWired(&waiter, base, size))
1247 		return false;
1248 
1249 	// unlock everything and wait
1250 	if (locker1 != NULL)
1251 		locker1->Unlock();
1252 	if (locker2 != NULL)
1253 		locker2->Unlock();
1254 
1255 	waiter.waitEntry.Wait();
1256 
1257 	return true;
1258 }
1259 
1260 
1261 /*!	Checks whether the given address space has any wired ranges intersecting
1262 	with the specified range and waits, if so.
1263 
1264 	Similar to wait_if_area_range_is_wired(), with the following differences:
1265 	- All areas intersecting with the range are checked (respectively all until
1266 	  one is found that contains a wired range intersecting with the given
1267 	  range).
1268 	- The given address space must at least be read-locked and must be unlocked
1269 	  when \c Unlock() is called on \a locker.
1270 	- None of the areas' caches are allowed to be locked.
1271 */
1272 template<typename LockerType>
1273 static inline bool
1274 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1275 	size_t size, LockerType* locker)
1276 {
1277 	for (VMAddressSpace::AreaRangeIterator it
1278 		= addressSpace->GetAreaRangeIterator(base, size);
1279 			VMArea* area = it.Next();) {
1280 
1281 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1282 
1283 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1284 			return true;
1285 	}
1286 
1287 	return false;
1288 }
1289 
1290 
1291 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1292 	It must be called in a situation where the kernel address space may be
1293 	locked.
1294 */
1295 status_t
1296 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1297 {
1298 	AddressSpaceReadLocker locker;
1299 	VMArea* area;
1300 	status_t status = locker.SetFromArea(id, area);
1301 	if (status != B_OK)
1302 		return status;
1303 
1304 	if (area->page_protections == NULL) {
1305 		status = allocate_area_page_protections(area);
1306 		if (status != B_OK)
1307 			return status;
1308 	}
1309 
1310 	*cookie = (void*)area;
1311 	return B_OK;
1312 }
1313 
1314 
1315 /*!	This is a debug helper function that can only be used with very specific
1316 	use cases.
1317 	Sets protection for the given address range to the protection specified.
1318 	If \a protection is 0 then the involved pages will be marked non-present
1319 	in the translation map to cause a fault on access. The pages aren't
1320 	actually unmapped however so that they can be marked present again with
1321 	additional calls to this function. For this to work the area must be
1322 	fully locked in memory so that the pages aren't otherwise touched.
1323 	This function does not lock the kernel address space and needs to be
1324 	supplied with a \a cookie retrieved from a successful call to
1325 	vm_prepare_kernel_area_debug_protection().
1326 */
1327 status_t
1328 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1329 	uint32 protection)
1330 {
1331 	// check address range
1332 	addr_t address = (addr_t)_address;
1333 	size = PAGE_ALIGN(size);
1334 
1335 	if ((address % B_PAGE_SIZE) != 0
1336 		|| (addr_t)address + size < (addr_t)address
1337 		|| !IS_KERNEL_ADDRESS(address)
1338 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1339 		return B_BAD_VALUE;
1340 	}
1341 
1342 	// Translate the kernel protection to user protection as we only store that.
1343 	if ((protection & B_KERNEL_READ_AREA) != 0)
1344 		protection |= B_READ_AREA;
1345 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1346 		protection |= B_WRITE_AREA;
1347 
1348 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1349 	VMTranslationMap* map = addressSpace->TranslationMap();
1350 	VMArea* area = (VMArea*)cookie;
1351 
1352 	addr_t offset = address - area->Base();
1353 	if (area->Size() - offset < size) {
1354 		panic("protect range not fully within supplied area");
1355 		return B_BAD_VALUE;
1356 	}
1357 
1358 	if (area->page_protections == NULL) {
1359 		panic("area has no page protections");
1360 		return B_BAD_VALUE;
1361 	}
1362 
1363 	// Invalidate the mapping entries so any access to them will fault or
1364 	// restore the mapping entries unchanged so that lookup will success again.
1365 	map->Lock();
1366 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1367 	map->Unlock();
1368 
1369 	// And set the proper page protections so that the fault case will actually
1370 	// fail and not simply try to map a new page.
1371 	for (addr_t pageAddress = address; pageAddress < address + size;
1372 			pageAddress += B_PAGE_SIZE) {
1373 		set_area_page_protection(area, pageAddress, protection);
1374 	}
1375 
1376 	return B_OK;
1377 }
1378 
1379 
1380 status_t
1381 vm_block_address_range(const char* name, void* address, addr_t size)
1382 {
1383 	if (!arch_vm_supports_protection(0))
1384 		return B_NOT_SUPPORTED;
1385 
1386 	AddressSpaceWriteLocker locker;
1387 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1388 	if (status != B_OK)
1389 		return status;
1390 
1391 	VMAddressSpace* addressSpace = locker.AddressSpace();
1392 
1393 	// create an anonymous cache
1394 	VMCache* cache;
1395 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1396 		VM_PRIORITY_SYSTEM);
1397 	if (status != B_OK)
1398 		return status;
1399 
1400 	cache->temporary = 1;
1401 	cache->virtual_end = size;
1402 	cache->Lock();
1403 
1404 	VMArea* area;
1405 	virtual_address_restrictions addressRestrictions = {};
1406 	addressRestrictions.address = address;
1407 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1408 	status = map_backing_store(addressSpace, cache, 0, name, size,
1409 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions,
1410 		true, &area, NULL);
1411 	if (status != B_OK) {
1412 		cache->ReleaseRefAndUnlock();
1413 		return status;
1414 	}
1415 
1416 	cache->Unlock();
1417 	area->cache_type = CACHE_TYPE_RAM;
1418 	return area->id;
1419 }
1420 
1421 
1422 status_t
1423 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1424 {
1425 	AddressSpaceWriteLocker locker(team);
1426 	if (!locker.IsLocked())
1427 		return B_BAD_TEAM_ID;
1428 
1429 	VMAddressSpace* addressSpace = locker.AddressSpace();
1430 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1431 		addressSpace == VMAddressSpace::Kernel()
1432 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1433 }
1434 
1435 
1436 status_t
1437 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1438 	addr_t size, uint32 flags)
1439 {
1440 	if (size == 0)
1441 		return B_BAD_VALUE;
1442 
1443 	AddressSpaceWriteLocker locker(team);
1444 	if (!locker.IsLocked())
1445 		return B_BAD_TEAM_ID;
1446 
1447 	virtual_address_restrictions addressRestrictions = {};
1448 	addressRestrictions.address = *_address;
1449 	addressRestrictions.address_specification = addressSpec;
1450 	VMAddressSpace* addressSpace = locker.AddressSpace();
1451 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1452 		addressSpace == VMAddressSpace::Kernel()
1453 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1454 		_address);
1455 }
1456 
1457 
1458 area_id
1459 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1460 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1461 	const virtual_address_restrictions* virtualAddressRestrictions,
1462 	const physical_address_restrictions* physicalAddressRestrictions,
1463 	bool kernel, void** _address)
1464 {
1465 	VMArea* area;
1466 	VMCache* cache;
1467 	vm_page* page = NULL;
1468 	bool isStack = (protection & B_STACK_AREA) != 0;
1469 	page_num_t guardPages;
1470 	bool canOvercommit = false;
1471 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1472 		? VM_PAGE_ALLOC_CLEAR : 0;
1473 
1474 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1475 		team, name, size));
1476 
1477 	size = PAGE_ALIGN(size);
1478 	guardSize = PAGE_ALIGN(guardSize);
1479 	guardPages = guardSize / B_PAGE_SIZE;
1480 
1481 	if (size == 0 || size < guardSize)
1482 		return B_BAD_VALUE;
1483 	if (!arch_vm_supports_protection(protection))
1484 		return B_NOT_SUPPORTED;
1485 
1486 	if (team == B_CURRENT_TEAM)
1487 		team = VMAddressSpace::CurrentID();
1488 	if (team < 0)
1489 		return B_BAD_TEAM_ID;
1490 
1491 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1492 		canOvercommit = true;
1493 
1494 #ifdef DEBUG_KERNEL_STACKS
1495 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1496 		isStack = true;
1497 #endif
1498 
1499 	// check parameters
1500 	switch (virtualAddressRestrictions->address_specification) {
1501 		case B_ANY_ADDRESS:
1502 		case B_EXACT_ADDRESS:
1503 		case B_BASE_ADDRESS:
1504 		case B_ANY_KERNEL_ADDRESS:
1505 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1506 		case B_RANDOMIZED_ANY_ADDRESS:
1507 		case B_RANDOMIZED_BASE_ADDRESS:
1508 			break;
1509 
1510 		default:
1511 			return B_BAD_VALUE;
1512 	}
1513 
1514 	// If low or high physical address restrictions are given, we force
1515 	// B_CONTIGUOUS wiring, since only then we'll use
1516 	// vm_page_allocate_page_run() which deals with those restrictions.
1517 	if (physicalAddressRestrictions->low_address != 0
1518 		|| physicalAddressRestrictions->high_address != 0) {
1519 		wiring = B_CONTIGUOUS;
1520 	}
1521 
1522 	physical_address_restrictions stackPhysicalRestrictions;
1523 	bool doReserveMemory = false;
1524 	switch (wiring) {
1525 		case B_NO_LOCK:
1526 			break;
1527 		case B_FULL_LOCK:
1528 		case B_LAZY_LOCK:
1529 		case B_CONTIGUOUS:
1530 			doReserveMemory = true;
1531 			break;
1532 		case B_ALREADY_WIRED:
1533 			break;
1534 		case B_LOMEM:
1535 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1536 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1537 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1538 			wiring = B_CONTIGUOUS;
1539 			doReserveMemory = true;
1540 			break;
1541 		case B_32_BIT_FULL_LOCK:
1542 			if (B_HAIKU_PHYSICAL_BITS <= 32
1543 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1544 				wiring = B_FULL_LOCK;
1545 				doReserveMemory = true;
1546 				break;
1547 			}
1548 			// TODO: We don't really support this mode efficiently. Just fall
1549 			// through for now ...
1550 		case B_32_BIT_CONTIGUOUS:
1551 			#if B_HAIKU_PHYSICAL_BITS > 32
1552 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1553 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1554 					stackPhysicalRestrictions.high_address
1555 						= (phys_addr_t)1 << 32;
1556 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1557 				}
1558 			#endif
1559 			wiring = B_CONTIGUOUS;
1560 			doReserveMemory = true;
1561 			break;
1562 		default:
1563 			return B_BAD_VALUE;
1564 	}
1565 
1566 	// Optimization: For a single-page contiguous allocation without low/high
1567 	// memory restriction B_FULL_LOCK wiring suffices.
1568 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1569 		&& physicalAddressRestrictions->low_address == 0
1570 		&& physicalAddressRestrictions->high_address == 0) {
1571 		wiring = B_FULL_LOCK;
1572 	}
1573 
1574 	// For full lock or contiguous areas we're also going to map the pages and
1575 	// thus need to reserve pages for the mapping backend upfront.
1576 	addr_t reservedMapPages = 0;
1577 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1578 		AddressSpaceWriteLocker locker;
1579 		status_t status = locker.SetTo(team);
1580 		if (status != B_OK)
1581 			return status;
1582 
1583 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1584 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1585 	}
1586 
1587 	int priority;
1588 	if (team != VMAddressSpace::KernelID())
1589 		priority = VM_PRIORITY_USER;
1590 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1591 		priority = VM_PRIORITY_VIP;
1592 	else
1593 		priority = VM_PRIORITY_SYSTEM;
1594 
1595 	// Reserve memory before acquiring the address space lock. This reduces the
1596 	// chances of failure, since while holding the write lock to the address
1597 	// space (if it is the kernel address space that is), the low memory handler
1598 	// won't be able to free anything for us.
1599 	addr_t reservedMemory = 0;
1600 	if (doReserveMemory) {
1601 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1602 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1603 			return B_NO_MEMORY;
1604 		reservedMemory = size;
1605 		// TODO: We don't reserve the memory for the pages for the page
1606 		// directories/tables. We actually need to do since we currently don't
1607 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1608 		// there are actually less physical pages than there should be, which
1609 		// can get the VM into trouble in low memory situations.
1610 	}
1611 
1612 	AddressSpaceWriteLocker locker;
1613 	VMAddressSpace* addressSpace;
1614 	status_t status;
1615 
1616 	// For full lock areas reserve the pages before locking the address
1617 	// space. E.g. block caches can't release their memory while we hold the
1618 	// address space lock.
1619 	page_num_t reservedPages = reservedMapPages;
1620 	if (wiring == B_FULL_LOCK)
1621 		reservedPages += size / B_PAGE_SIZE;
1622 
1623 	vm_page_reservation reservation;
1624 	if (reservedPages > 0) {
1625 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1626 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1627 					priority)) {
1628 				reservedPages = 0;
1629 				status = B_WOULD_BLOCK;
1630 				goto err0;
1631 			}
1632 		} else
1633 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1634 	}
1635 
1636 	if (wiring == B_CONTIGUOUS) {
1637 		// we try to allocate the page run here upfront as this may easily
1638 		// fail for obvious reasons
1639 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1640 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1641 		if (page == NULL) {
1642 			status = B_NO_MEMORY;
1643 			goto err0;
1644 		}
1645 	}
1646 
1647 	// Lock the address space and, if B_EXACT_ADDRESS and
1648 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1649 	// is not wired.
1650 	do {
1651 		status = locker.SetTo(team);
1652 		if (status != B_OK)
1653 			goto err1;
1654 
1655 		addressSpace = locker.AddressSpace();
1656 	} while (virtualAddressRestrictions->address_specification
1657 			== B_EXACT_ADDRESS
1658 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1659 		&& wait_if_address_range_is_wired(addressSpace,
1660 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1661 
1662 	// create an anonymous cache
1663 	// if it's a stack, make sure that two pages are available at least
1664 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1665 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1666 		wiring == B_NO_LOCK, priority);
1667 	if (status != B_OK)
1668 		goto err1;
1669 
1670 	cache->temporary = 1;
1671 	cache->virtual_end = size;
1672 	cache->committed_size = reservedMemory;
1673 		// TODO: This should be done via a method.
1674 	reservedMemory = 0;
1675 
1676 	cache->Lock();
1677 
1678 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1679 		protection, 0, REGION_NO_PRIVATE_MAP, flags,
1680 		virtualAddressRestrictions, kernel, &area, _address);
1681 
1682 	if (status != B_OK) {
1683 		cache->ReleaseRefAndUnlock();
1684 		goto err1;
1685 	}
1686 
1687 	locker.DegradeToReadLock();
1688 
1689 	switch (wiring) {
1690 		case B_NO_LOCK:
1691 		case B_LAZY_LOCK:
1692 			// do nothing - the pages are mapped in as needed
1693 			break;
1694 
1695 		case B_FULL_LOCK:
1696 		{
1697 			// Allocate and map all pages for this area
1698 
1699 			off_t offset = 0;
1700 			for (addr_t address = area->Base();
1701 					address < area->Base() + (area->Size() - 1);
1702 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1703 #ifdef DEBUG_KERNEL_STACKS
1704 #	ifdef STACK_GROWS_DOWNWARDS
1705 				if (isStack && address < area->Base()
1706 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1707 #	else
1708 				if (isStack && address >= area->Base() + area->Size()
1709 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1710 #	endif
1711 					continue;
1712 #endif
1713 				vm_page* page = vm_page_allocate_page(&reservation,
1714 					PAGE_STATE_WIRED | pageAllocFlags);
1715 				cache->InsertPage(page, offset);
1716 				map_page(area, page, address, protection, &reservation);
1717 
1718 				DEBUG_PAGE_ACCESS_END(page);
1719 			}
1720 
1721 			break;
1722 		}
1723 
1724 		case B_ALREADY_WIRED:
1725 		{
1726 			// The pages should already be mapped. This is only really useful
1727 			// during boot time. Find the appropriate vm_page objects and stick
1728 			// them in the cache object.
1729 			VMTranslationMap* map = addressSpace->TranslationMap();
1730 			off_t offset = 0;
1731 
1732 			if (!gKernelStartup)
1733 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1734 
1735 			map->Lock();
1736 
1737 			for (addr_t virtualAddress = area->Base();
1738 					virtualAddress < area->Base() + (area->Size() - 1);
1739 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1740 				phys_addr_t physicalAddress;
1741 				uint32 flags;
1742 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1743 				if (status < B_OK) {
1744 					panic("looking up mapping failed for va 0x%lx\n",
1745 						virtualAddress);
1746 				}
1747 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1748 				if (page == NULL) {
1749 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1750 						"\n", physicalAddress);
1751 				}
1752 
1753 				DEBUG_PAGE_ACCESS_START(page);
1754 
1755 				cache->InsertPage(page, offset);
1756 				increment_page_wired_count(page);
1757 				vm_page_set_state(page, PAGE_STATE_WIRED);
1758 				page->busy = false;
1759 
1760 				DEBUG_PAGE_ACCESS_END(page);
1761 			}
1762 
1763 			map->Unlock();
1764 			break;
1765 		}
1766 
1767 		case B_CONTIGUOUS:
1768 		{
1769 			// We have already allocated our continuous pages run, so we can now
1770 			// just map them in the address space
1771 			VMTranslationMap* map = addressSpace->TranslationMap();
1772 			phys_addr_t physicalAddress
1773 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1774 			addr_t virtualAddress = area->Base();
1775 			off_t offset = 0;
1776 
1777 			map->Lock();
1778 
1779 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1780 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1781 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1782 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1783 				if (page == NULL)
1784 					panic("couldn't lookup physical page just allocated\n");
1785 
1786 				status = map->Map(virtualAddress, physicalAddress, protection,
1787 					area->MemoryType(), &reservation);
1788 				if (status < B_OK)
1789 					panic("couldn't map physical page in page run\n");
1790 
1791 				cache->InsertPage(page, offset);
1792 				increment_page_wired_count(page);
1793 
1794 				DEBUG_PAGE_ACCESS_END(page);
1795 			}
1796 
1797 			map->Unlock();
1798 			break;
1799 		}
1800 
1801 		default:
1802 			break;
1803 	}
1804 
1805 	cache->Unlock();
1806 
1807 	if (reservedPages > 0)
1808 		vm_page_unreserve_pages(&reservation);
1809 
1810 	TRACE(("vm_create_anonymous_area: done\n"));
1811 
1812 	area->cache_type = CACHE_TYPE_RAM;
1813 	return area->id;
1814 
1815 err1:
1816 	if (wiring == B_CONTIGUOUS) {
1817 		// we had reserved the area space upfront...
1818 		phys_addr_t pageNumber = page->physical_page_number;
1819 		int32 i;
1820 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1821 			page = vm_lookup_page(pageNumber);
1822 			if (page == NULL)
1823 				panic("couldn't lookup physical page just allocated\n");
1824 
1825 			vm_page_set_state(page, PAGE_STATE_FREE);
1826 		}
1827 	}
1828 
1829 err0:
1830 	if (reservedPages > 0)
1831 		vm_page_unreserve_pages(&reservation);
1832 	if (reservedMemory > 0)
1833 		vm_unreserve_memory(reservedMemory);
1834 
1835 	return status;
1836 }
1837 
1838 
1839 area_id
1840 vm_map_physical_memory(team_id team, const char* name, void** _address,
1841 	uint32 addressSpec, addr_t size, uint32 protection,
1842 	phys_addr_t physicalAddress, bool alreadyWired)
1843 {
1844 	VMArea* area;
1845 	VMCache* cache;
1846 	addr_t mapOffset;
1847 
1848 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1849 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1850 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1851 		addressSpec, size, protection, physicalAddress));
1852 
1853 	if (!arch_vm_supports_protection(protection))
1854 		return B_NOT_SUPPORTED;
1855 
1856 	AddressSpaceWriteLocker locker(team);
1857 	if (!locker.IsLocked())
1858 		return B_BAD_TEAM_ID;
1859 
1860 	// if the physical address is somewhat inside a page,
1861 	// move the actual area down to align on a page boundary
1862 	mapOffset = physicalAddress % B_PAGE_SIZE;
1863 	size += mapOffset;
1864 	physicalAddress -= mapOffset;
1865 
1866 	size = PAGE_ALIGN(size);
1867 
1868 	// create a device cache
1869 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1870 	if (status != B_OK)
1871 		return status;
1872 
1873 	cache->virtual_end = size;
1874 
1875 	cache->Lock();
1876 
1877 	virtual_address_restrictions addressRestrictions = {};
1878 	addressRestrictions.address = *_address;
1879 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1880 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1881 		B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1882 		true, &area, _address);
1883 
1884 	if (status < B_OK)
1885 		cache->ReleaseRefLocked();
1886 
1887 	cache->Unlock();
1888 
1889 	if (status == B_OK) {
1890 		// set requested memory type -- use uncached, if not given
1891 		uint32 memoryType = addressSpec & B_MTR_MASK;
1892 		if (memoryType == 0)
1893 			memoryType = B_MTR_UC;
1894 
1895 		area->SetMemoryType(memoryType);
1896 
1897 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1898 		if (status != B_OK)
1899 			delete_area(locker.AddressSpace(), area, false);
1900 	}
1901 
1902 	if (status != B_OK)
1903 		return status;
1904 
1905 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1906 
1907 	if (alreadyWired) {
1908 		// The area is already mapped, but possibly not with the right
1909 		// memory type.
1910 		map->Lock();
1911 		map->ProtectArea(area, area->protection);
1912 		map->Unlock();
1913 	} else {
1914 		// Map the area completely.
1915 
1916 		// reserve pages needed for the mapping
1917 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1918 			area->Base() + (size - 1));
1919 		vm_page_reservation reservation;
1920 		vm_page_reserve_pages(&reservation, reservePages,
1921 			team == VMAddressSpace::KernelID()
1922 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1923 
1924 		map->Lock();
1925 
1926 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1927 			map->Map(area->Base() + offset, physicalAddress + offset,
1928 				protection, area->MemoryType(), &reservation);
1929 		}
1930 
1931 		map->Unlock();
1932 
1933 		vm_page_unreserve_pages(&reservation);
1934 	}
1935 
1936 	// modify the pointer returned to be offset back into the new area
1937 	// the same way the physical address in was offset
1938 	*_address = (void*)((addr_t)*_address + mapOffset);
1939 
1940 	area->cache_type = CACHE_TYPE_DEVICE;
1941 	return area->id;
1942 }
1943 
1944 
1945 /*!	Don't use!
1946 	TODO: This function was introduced to map physical page vecs to
1947 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1948 	use a device cache and does not track vm_page::wired_count!
1949 */
1950 area_id
1951 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1952 	uint32 addressSpec, addr_t* _size, uint32 protection,
1953 	struct generic_io_vec* vecs, uint32 vecCount)
1954 {
1955 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1956 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1957 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1958 		addressSpec, _size, protection, vecs, vecCount));
1959 
1960 	if (!arch_vm_supports_protection(protection)
1961 		|| (addressSpec & B_MTR_MASK) != 0) {
1962 		return B_NOT_SUPPORTED;
1963 	}
1964 
1965 	AddressSpaceWriteLocker locker(team);
1966 	if (!locker.IsLocked())
1967 		return B_BAD_TEAM_ID;
1968 
1969 	if (vecCount == 0)
1970 		return B_BAD_VALUE;
1971 
1972 	addr_t size = 0;
1973 	for (uint32 i = 0; i < vecCount; i++) {
1974 		if (vecs[i].base % B_PAGE_SIZE != 0
1975 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1976 			return B_BAD_VALUE;
1977 		}
1978 
1979 		size += vecs[i].length;
1980 	}
1981 
1982 	// create a device cache
1983 	VMCache* cache;
1984 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1985 	if (result != B_OK)
1986 		return result;
1987 
1988 	cache->virtual_end = size;
1989 
1990 	cache->Lock();
1991 
1992 	VMArea* area;
1993 	virtual_address_restrictions addressRestrictions = {};
1994 	addressRestrictions.address = *_address;
1995 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1996 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1997 		size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0,
1998 		&addressRestrictions, true, &area, _address);
1999 
2000 	if (result != B_OK)
2001 		cache->ReleaseRefLocked();
2002 
2003 	cache->Unlock();
2004 
2005 	if (result != B_OK)
2006 		return result;
2007 
2008 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2009 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
2010 		area->Base() + (size - 1));
2011 
2012 	vm_page_reservation reservation;
2013 	vm_page_reserve_pages(&reservation, reservePages,
2014 			team == VMAddressSpace::KernelID()
2015 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2016 	map->Lock();
2017 
2018 	uint32 vecIndex = 0;
2019 	size_t vecOffset = 0;
2020 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
2021 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
2022 			vecOffset = 0;
2023 			vecIndex++;
2024 		}
2025 
2026 		if (vecIndex >= vecCount)
2027 			break;
2028 
2029 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
2030 			protection, area->MemoryType(), &reservation);
2031 
2032 		vecOffset += B_PAGE_SIZE;
2033 	}
2034 
2035 	map->Unlock();
2036 	vm_page_unreserve_pages(&reservation);
2037 
2038 	if (_size != NULL)
2039 		*_size = size;
2040 
2041 	area->cache_type = CACHE_TYPE_DEVICE;
2042 	return area->id;
2043 }
2044 
2045 
2046 area_id
2047 vm_create_null_area(team_id team, const char* name, void** address,
2048 	uint32 addressSpec, addr_t size, uint32 flags)
2049 {
2050 	size = PAGE_ALIGN(size);
2051 
2052 	// Lock the address space and, if B_EXACT_ADDRESS and
2053 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
2054 	// is not wired.
2055 	AddressSpaceWriteLocker locker;
2056 	do {
2057 		if (locker.SetTo(team) != B_OK)
2058 			return B_BAD_TEAM_ID;
2059 	} while (addressSpec == B_EXACT_ADDRESS
2060 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
2061 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2062 			(addr_t)*address, size, &locker));
2063 
2064 	// create a null cache
2065 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
2066 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
2067 	VMCache* cache;
2068 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
2069 	if (status != B_OK)
2070 		return status;
2071 
2072 	cache->temporary = 1;
2073 	cache->virtual_end = size;
2074 
2075 	cache->Lock();
2076 
2077 	VMArea* area;
2078 	virtual_address_restrictions addressRestrictions = {};
2079 	addressRestrictions.address = *address;
2080 	addressRestrictions.address_specification = addressSpec;
2081 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
2082 		B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA,
2083 		REGION_NO_PRIVATE_MAP, flags,
2084 		&addressRestrictions, true, &area, address);
2085 
2086 	if (status < B_OK) {
2087 		cache->ReleaseRefAndUnlock();
2088 		return status;
2089 	}
2090 
2091 	cache->Unlock();
2092 
2093 	area->cache_type = CACHE_TYPE_NULL;
2094 	return area->id;
2095 }
2096 
2097 
2098 /*!	Creates the vnode cache for the specified \a vnode.
2099 	The vnode has to be marked busy when calling this function.
2100 */
2101 status_t
2102 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
2103 {
2104 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
2105 }
2106 
2107 
2108 /*!	\a cache must be locked. The area's address space must be read-locked.
2109 */
2110 static void
2111 pre_map_area_pages(VMArea* area, VMCache* cache,
2112 	vm_page_reservation* reservation)
2113 {
2114 	addr_t baseAddress = area->Base();
2115 	addr_t cacheOffset = area->cache_offset;
2116 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
2117 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
2118 
2119 	for (VMCachePagesTree::Iterator it
2120 				= cache->pages.GetIterator(firstPage, true, true);
2121 			vm_page* page = it.Next();) {
2122 		if (page->cache_offset >= endPage)
2123 			break;
2124 
2125 		// skip busy and inactive pages
2126 		if (page->busy || page->usage_count == 0)
2127 			continue;
2128 
2129 		DEBUG_PAGE_ACCESS_START(page);
2130 		map_page(area, page,
2131 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
2132 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
2133 		DEBUG_PAGE_ACCESS_END(page);
2134 	}
2135 }
2136 
2137 
2138 /*!	Will map the file specified by \a fd to an area in memory.
2139 	The file will be mirrored beginning at the specified \a offset. The
2140 	\a offset and \a size arguments have to be page aligned.
2141 */
2142 static area_id
2143 _vm_map_file(team_id team, const char* name, void** _address,
2144 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2145 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2146 {
2147 	// TODO: for binary files, we want to make sure that they get the
2148 	//	copy of a file at a given time, ie. later changes should not
2149 	//	make it into the mapped copy -- this will need quite some changes
2150 	//	to be done in a nice way
2151 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
2152 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
2153 
2154 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2155 	size = PAGE_ALIGN(size);
2156 
2157 	if (mapping == REGION_NO_PRIVATE_MAP)
2158 		protection |= B_SHARED_AREA;
2159 	if (addressSpec != B_EXACT_ADDRESS)
2160 		unmapAddressRange = false;
2161 
2162 	if (fd < 0) {
2163 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
2164 		virtual_address_restrictions virtualRestrictions = {};
2165 		virtualRestrictions.address = *_address;
2166 		virtualRestrictions.address_specification = addressSpec;
2167 		physical_address_restrictions physicalRestrictions = {};
2168 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
2169 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
2170 			_address);
2171 	}
2172 
2173 	// get the open flags of the FD
2174 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2175 	if (descriptor == NULL)
2176 		return EBADF;
2177 	int32 openMode = descriptor->open_mode;
2178 	put_fd(descriptor);
2179 
2180 	// The FD must open for reading at any rate. For shared mapping with write
2181 	// access, additionally the FD must be open for writing.
2182 	if ((openMode & O_ACCMODE) == O_WRONLY
2183 		|| (mapping == REGION_NO_PRIVATE_MAP
2184 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2185 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2186 		return EACCES;
2187 	}
2188 
2189 	uint32 protectionMax = 0;
2190 	if (mapping != REGION_PRIVATE_MAP) {
2191 		if ((openMode & O_ACCMODE) == O_RDWR)
2192 			protectionMax = protection | B_USER_PROTECTION;
2193 		else
2194 			protectionMax = protection | (B_USER_PROTECTION & ~B_WRITE_AREA);
2195 	}
2196 
2197 	// get the vnode for the object, this also grabs a ref to it
2198 	struct vnode* vnode = NULL;
2199 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2200 	if (status < B_OK)
2201 		return status;
2202 	VnodePutter vnodePutter(vnode);
2203 
2204 	// If we're going to pre-map pages, we need to reserve the pages needed by
2205 	// the mapping backend upfront.
2206 	page_num_t reservedPreMapPages = 0;
2207 	vm_page_reservation reservation;
2208 	if ((protection & B_READ_AREA) != 0) {
2209 		AddressSpaceWriteLocker locker;
2210 		status = locker.SetTo(team);
2211 		if (status != B_OK)
2212 			return status;
2213 
2214 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2215 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2216 
2217 		locker.Unlock();
2218 
2219 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2220 			team == VMAddressSpace::KernelID()
2221 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2222 	}
2223 
2224 	struct PageUnreserver {
2225 		PageUnreserver(vm_page_reservation* reservation)
2226 			:
2227 			fReservation(reservation)
2228 		{
2229 		}
2230 
2231 		~PageUnreserver()
2232 		{
2233 			if (fReservation != NULL)
2234 				vm_page_unreserve_pages(fReservation);
2235 		}
2236 
2237 		vm_page_reservation* fReservation;
2238 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2239 
2240 	// Lock the address space and, if the specified address range shall be
2241 	// unmapped, ensure it is not wired.
2242 	AddressSpaceWriteLocker locker;
2243 	do {
2244 		if (locker.SetTo(team) != B_OK)
2245 			return B_BAD_TEAM_ID;
2246 	} while (unmapAddressRange
2247 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2248 			(addr_t)*_address, size, &locker));
2249 
2250 	// TODO: this only works for file systems that use the file cache
2251 	VMCache* cache;
2252 	status = vfs_get_vnode_cache(vnode, &cache, false);
2253 	if (status < B_OK)
2254 		return status;
2255 
2256 	cache->Lock();
2257 
2258 	VMArea* area;
2259 	virtual_address_restrictions addressRestrictions = {};
2260 	addressRestrictions.address = *_address;
2261 	addressRestrictions.address_specification = addressSpec;
2262 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2263 		0, protection, protectionMax, mapping,
2264 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
2265 		&addressRestrictions, kernel, &area, _address);
2266 
2267 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2268 		// map_backing_store() cannot know we no longer need the ref
2269 		cache->ReleaseRefLocked();
2270 	}
2271 
2272 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2273 		pre_map_area_pages(area, cache, &reservation);
2274 
2275 	cache->Unlock();
2276 
2277 	if (status == B_OK) {
2278 		// TODO: this probably deserves a smarter solution, ie. don't always
2279 		// prefetch stuff, and also, probably don't trigger it at this place.
2280 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2281 			// prefetches at max 10 MB starting from "offset"
2282 	}
2283 
2284 	if (status != B_OK)
2285 		return status;
2286 
2287 	area->cache_type = CACHE_TYPE_VNODE;
2288 	return area->id;
2289 }
2290 
2291 
2292 area_id
2293 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2294 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2295 	int fd, off_t offset)
2296 {
2297 	if (!arch_vm_supports_protection(protection))
2298 		return B_NOT_SUPPORTED;
2299 
2300 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2301 		mapping, unmapAddressRange, fd, offset, true);
2302 }
2303 
2304 
2305 VMCache*
2306 vm_area_get_locked_cache(VMArea* area)
2307 {
2308 	rw_lock_read_lock(&sAreaCacheLock);
2309 
2310 	while (true) {
2311 		VMCache* cache = area->cache;
2312 
2313 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2314 			// cache has been deleted
2315 			rw_lock_read_lock(&sAreaCacheLock);
2316 			continue;
2317 		}
2318 
2319 		rw_lock_read_lock(&sAreaCacheLock);
2320 
2321 		if (cache == area->cache) {
2322 			cache->AcquireRefLocked();
2323 			rw_lock_read_unlock(&sAreaCacheLock);
2324 			return cache;
2325 		}
2326 
2327 		// the cache changed in the meantime
2328 		cache->Unlock();
2329 	}
2330 }
2331 
2332 
2333 void
2334 vm_area_put_locked_cache(VMCache* cache)
2335 {
2336 	cache->ReleaseRefAndUnlock();
2337 }
2338 
2339 
2340 area_id
2341 vm_clone_area(team_id team, const char* name, void** address,
2342 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2343 	bool kernel)
2344 {
2345 	VMArea* newArea = NULL;
2346 	VMArea* sourceArea;
2347 
2348 	// Check whether the source area exists and is cloneable. If so, mark it
2349 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2350 	{
2351 		AddressSpaceWriteLocker locker;
2352 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2353 		if (status != B_OK)
2354 			return status;
2355 
2356 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2357 			return B_NOT_ALLOWED;
2358 
2359 		sourceArea->protection |= B_SHARED_AREA;
2360 		protection |= B_SHARED_AREA;
2361 	}
2362 
2363 	// Now lock both address spaces and actually do the cloning.
2364 
2365 	MultiAddressSpaceLocker locker;
2366 	VMAddressSpace* sourceAddressSpace;
2367 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2368 	if (status != B_OK)
2369 		return status;
2370 
2371 	VMAddressSpace* targetAddressSpace;
2372 	status = locker.AddTeam(team, true, &targetAddressSpace);
2373 	if (status != B_OK)
2374 		return status;
2375 
2376 	status = locker.Lock();
2377 	if (status != B_OK)
2378 		return status;
2379 
2380 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2381 	if (sourceArea == NULL)
2382 		return B_BAD_VALUE;
2383 
2384 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2385 		return B_NOT_ALLOWED;
2386 
2387 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2388 
2389 	if (!kernel && sourceAddressSpace != targetAddressSpace
2390 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2391 #if KDEBUG
2392 		Team* team = thread_get_current_thread()->team;
2393 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2394 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2395 #endif
2396 		status = B_NOT_ALLOWED;
2397 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2398 		status = B_NOT_ALLOWED;
2399 	} else {
2400 		virtual_address_restrictions addressRestrictions = {};
2401 		addressRestrictions.address = *address;
2402 		addressRestrictions.address_specification = addressSpec;
2403 		status = map_backing_store(targetAddressSpace, cache,
2404 			sourceArea->cache_offset, name, sourceArea->Size(),
2405 			sourceArea->wiring, protection, sourceArea->protection_max,
2406 			mapping, 0, &addressRestrictions,
2407 			kernel, &newArea, address);
2408 	}
2409 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2410 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2411 		// to create a new cache, and has therefore already acquired a reference
2412 		// to the source cache - but otherwise it has no idea that we need
2413 		// one.
2414 		cache->AcquireRefLocked();
2415 	}
2416 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2417 		// we need to map in everything at this point
2418 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2419 			// we don't have actual pages to map but a physical area
2420 			VMTranslationMap* map
2421 				= sourceArea->address_space->TranslationMap();
2422 			map->Lock();
2423 
2424 			phys_addr_t physicalAddress;
2425 			uint32 oldProtection;
2426 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2427 
2428 			map->Unlock();
2429 
2430 			map = targetAddressSpace->TranslationMap();
2431 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2432 				newArea->Base() + (newArea->Size() - 1));
2433 
2434 			vm_page_reservation reservation;
2435 			vm_page_reserve_pages(&reservation, reservePages,
2436 				targetAddressSpace == VMAddressSpace::Kernel()
2437 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2438 			map->Lock();
2439 
2440 			for (addr_t offset = 0; offset < newArea->Size();
2441 					offset += B_PAGE_SIZE) {
2442 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2443 					protection, newArea->MemoryType(), &reservation);
2444 			}
2445 
2446 			map->Unlock();
2447 			vm_page_unreserve_pages(&reservation);
2448 		} else {
2449 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2450 			size_t reservePages = map->MaxPagesNeededToMap(
2451 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2452 			vm_page_reservation reservation;
2453 			vm_page_reserve_pages(&reservation, reservePages,
2454 				targetAddressSpace == VMAddressSpace::Kernel()
2455 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2456 
2457 			// map in all pages from source
2458 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2459 					vm_page* page  = it.Next();) {
2460 				if (!page->busy) {
2461 					DEBUG_PAGE_ACCESS_START(page);
2462 					map_page(newArea, page,
2463 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2464 							- newArea->cache_offset),
2465 						protection, &reservation);
2466 					DEBUG_PAGE_ACCESS_END(page);
2467 				}
2468 			}
2469 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2470 			// ensuring that!
2471 
2472 			vm_page_unreserve_pages(&reservation);
2473 		}
2474 	}
2475 	if (status == B_OK)
2476 		newArea->cache_type = sourceArea->cache_type;
2477 
2478 	vm_area_put_locked_cache(cache);
2479 
2480 	if (status < B_OK)
2481 		return status;
2482 
2483 	return newArea->id;
2484 }
2485 
2486 
2487 /*!	Deletes the specified area of the given address space.
2488 
2489 	The address space must be write-locked.
2490 	The caller must ensure that the area does not have any wired ranges.
2491 
2492 	\param addressSpace The address space containing the area.
2493 	\param area The area to be deleted.
2494 	\param deletingAddressSpace \c true, if the address space is in the process
2495 		of being deleted.
2496 */
2497 static void
2498 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2499 	bool deletingAddressSpace)
2500 {
2501 	ASSERT(!area->IsWired());
2502 
2503 	VMAreas::Remove(area);
2504 
2505 	// At this point the area is removed from the global hash table, but
2506 	// still exists in the area list.
2507 
2508 	// Unmap the virtual address space the area occupied.
2509 	{
2510 		// We need to lock the complete cache chain.
2511 		VMCache* topCache = vm_area_get_locked_cache(area);
2512 		VMCacheChainLocker cacheChainLocker(topCache);
2513 		cacheChainLocker.LockAllSourceCaches();
2514 
2515 		// If the area's top cache is a temporary cache and the area is the only
2516 		// one referencing it (besides us currently holding a second reference),
2517 		// the unmapping code doesn't need to care about preserving the accessed
2518 		// and dirty flags of the top cache page mappings.
2519 		bool ignoreTopCachePageFlags
2520 			= topCache->temporary && topCache->RefCount() == 2;
2521 
2522 		area->address_space->TranslationMap()->UnmapArea(area,
2523 			deletingAddressSpace, ignoreTopCachePageFlags);
2524 	}
2525 
2526 	if (!area->cache->temporary)
2527 		area->cache->WriteModified();
2528 
2529 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2530 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2531 
2532 	arch_vm_unset_memory_type(area);
2533 	addressSpace->RemoveArea(area, allocationFlags);
2534 	addressSpace->Put();
2535 
2536 	area->cache->RemoveArea(area);
2537 	area->cache->ReleaseRef();
2538 
2539 	addressSpace->DeleteArea(area, allocationFlags);
2540 }
2541 
2542 
2543 status_t
2544 vm_delete_area(team_id team, area_id id, bool kernel)
2545 {
2546 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2547 		team, id));
2548 
2549 	// lock the address space and make sure the area isn't wired
2550 	AddressSpaceWriteLocker locker;
2551 	VMArea* area;
2552 	AreaCacheLocker cacheLocker;
2553 
2554 	do {
2555 		status_t status = locker.SetFromArea(team, id, area);
2556 		if (status != B_OK)
2557 			return status;
2558 
2559 		cacheLocker.SetTo(area);
2560 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2561 
2562 	cacheLocker.Unlock();
2563 
2564 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2565 		return B_NOT_ALLOWED;
2566 
2567 	delete_area(locker.AddressSpace(), area, false);
2568 	return B_OK;
2569 }
2570 
2571 
2572 /*!	Creates a new cache on top of given cache, moves all areas from
2573 	the old cache to the new one, and changes the protection of all affected
2574 	areas' pages to read-only. If requested, wired pages are moved up to the
2575 	new cache and copies are added to the old cache in their place.
2576 	Preconditions:
2577 	- The given cache must be locked.
2578 	- All of the cache's areas' address spaces must be read locked.
2579 	- Either the cache must not have any wired ranges or a page reservation for
2580 	  all wired pages must be provided, so they can be copied.
2581 
2582 	\param lowerCache The cache on top of which a new cache shall be created.
2583 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2584 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2585 		has wired page. The wired pages are copied in this case.
2586 */
2587 static status_t
2588 vm_copy_on_write_area(VMCache* lowerCache,
2589 	vm_page_reservation* wiredPagesReservation)
2590 {
2591 	VMCache* upperCache;
2592 
2593 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2594 
2595 	// We need to separate the cache from its areas. The cache goes one level
2596 	// deeper and we create a new cache inbetween.
2597 
2598 	// create an anonymous cache
2599 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2600 		lowerCache->GuardSize() / B_PAGE_SIZE,
2601 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2602 		VM_PRIORITY_USER);
2603 	if (status != B_OK)
2604 		return status;
2605 
2606 	upperCache->Lock();
2607 
2608 	upperCache->temporary = 1;
2609 	upperCache->virtual_base = lowerCache->virtual_base;
2610 	upperCache->virtual_end = lowerCache->virtual_end;
2611 
2612 	// transfer the lower cache areas to the upper cache
2613 	rw_lock_write_lock(&sAreaCacheLock);
2614 	upperCache->TransferAreas(lowerCache);
2615 	rw_lock_write_unlock(&sAreaCacheLock);
2616 
2617 	lowerCache->AddConsumer(upperCache);
2618 
2619 	// We now need to remap all pages from all of the cache's areas read-only,
2620 	// so that a copy will be created on next write access. If there are wired
2621 	// pages, we keep their protection, move them to the upper cache and create
2622 	// copies for the lower cache.
2623 	if (wiredPagesReservation != NULL) {
2624 		// We need to handle wired pages -- iterate through the cache's pages.
2625 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2626 				vm_page* page = it.Next();) {
2627 			if (page->WiredCount() > 0) {
2628 				// allocate a new page and copy the wired one
2629 				vm_page* copiedPage = vm_page_allocate_page(
2630 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2631 
2632 				vm_memcpy_physical_page(
2633 					copiedPage->physical_page_number * B_PAGE_SIZE,
2634 					page->physical_page_number * B_PAGE_SIZE);
2635 
2636 				// move the wired page to the upper cache (note: removing is OK
2637 				// with the SplayTree iterator) and insert the copy
2638 				upperCache->MovePage(page);
2639 				lowerCache->InsertPage(copiedPage,
2640 					page->cache_offset * B_PAGE_SIZE);
2641 
2642 				DEBUG_PAGE_ACCESS_END(copiedPage);
2643 			} else {
2644 				// Change the protection of this page in all areas.
2645 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2646 						tempArea = tempArea->cache_next) {
2647 					if (!is_page_in_area(tempArea, page))
2648 						continue;
2649 
2650 					// The area must be readable in the same way it was
2651 					// previously writable.
2652 					addr_t address = virtual_page_address(tempArea, page);
2653 					uint32 protection = 0;
2654 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2655 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2656 						protection |= B_KERNEL_READ_AREA;
2657 					if ((pageProtection & B_READ_AREA) != 0)
2658 						protection |= B_READ_AREA;
2659 
2660 					VMTranslationMap* map
2661 						= tempArea->address_space->TranslationMap();
2662 					map->Lock();
2663 					map->ProtectPage(tempArea, address, protection);
2664 					map->Unlock();
2665 				}
2666 			}
2667 		}
2668 	} else {
2669 		ASSERT(lowerCache->WiredPagesCount() == 0);
2670 
2671 		// just change the protection of all areas
2672 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2673 				tempArea = tempArea->cache_next) {
2674 			if (tempArea->page_protections != NULL) {
2675 				// Change the protection of all pages in this area.
2676 				VMTranslationMap* map = tempArea->address_space->TranslationMap();
2677 				map->Lock();
2678 				for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2679 					vm_page* page = it.Next();) {
2680 					if (!is_page_in_area(tempArea, page))
2681 						continue;
2682 
2683 					// The area must be readable in the same way it was
2684 					// previously writable.
2685 					addr_t address = virtual_page_address(tempArea, page);
2686 					uint32 protection = 0;
2687 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2688 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2689 						protection |= B_KERNEL_READ_AREA;
2690 					if ((pageProtection & B_READ_AREA) != 0)
2691 						protection |= B_READ_AREA;
2692 
2693 					map->ProtectPage(tempArea, address, protection);
2694 				}
2695 				map->Unlock();
2696 				continue;
2697 			}
2698 			// The area must be readable in the same way it was previously
2699 			// writable.
2700 			uint32 protection = 0;
2701 			if ((tempArea->protection & B_KERNEL_READ_AREA) != 0)
2702 				protection |= B_KERNEL_READ_AREA;
2703 			if ((tempArea->protection & B_READ_AREA) != 0)
2704 				protection |= B_READ_AREA;
2705 
2706 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2707 			map->Lock();
2708 			map->ProtectArea(tempArea, protection);
2709 			map->Unlock();
2710 		}
2711 	}
2712 
2713 	vm_area_put_locked_cache(upperCache);
2714 
2715 	return B_OK;
2716 }
2717 
2718 
2719 area_id
2720 vm_copy_area(team_id team, const char* name, void** _address,
2721 	uint32 addressSpec, area_id sourceID)
2722 {
2723 	// Do the locking: target address space, all address spaces associated with
2724 	// the source cache, and the cache itself.
2725 	MultiAddressSpaceLocker locker;
2726 	VMAddressSpace* targetAddressSpace;
2727 	VMCache* cache;
2728 	VMArea* source;
2729 	AreaCacheLocker cacheLocker;
2730 	status_t status;
2731 	bool sharedArea;
2732 
2733 	page_num_t wiredPages = 0;
2734 	vm_page_reservation wiredPagesReservation;
2735 
2736 	bool restart;
2737 	do {
2738 		restart = false;
2739 
2740 		locker.Unset();
2741 		status = locker.AddTeam(team, true, &targetAddressSpace);
2742 		if (status == B_OK) {
2743 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2744 				&cache);
2745 		}
2746 		if (status != B_OK)
2747 			return status;
2748 
2749 		cacheLocker.SetTo(cache, true);	// already locked
2750 
2751 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2752 
2753 		page_num_t oldWiredPages = wiredPages;
2754 		wiredPages = 0;
2755 
2756 		// If the source area isn't shared, count the number of wired pages in
2757 		// the cache and reserve as many pages.
2758 		if (!sharedArea) {
2759 			wiredPages = cache->WiredPagesCount();
2760 
2761 			if (wiredPages > oldWiredPages) {
2762 				cacheLocker.Unlock();
2763 				locker.Unlock();
2764 
2765 				if (oldWiredPages > 0)
2766 					vm_page_unreserve_pages(&wiredPagesReservation);
2767 
2768 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2769 					VM_PRIORITY_USER);
2770 
2771 				restart = true;
2772 			}
2773 		} else if (oldWiredPages > 0)
2774 			vm_page_unreserve_pages(&wiredPagesReservation);
2775 	} while (restart);
2776 
2777 	// unreserve pages later
2778 	struct PagesUnreserver {
2779 		PagesUnreserver(vm_page_reservation* reservation)
2780 			:
2781 			fReservation(reservation)
2782 		{
2783 		}
2784 
2785 		~PagesUnreserver()
2786 		{
2787 			if (fReservation != NULL)
2788 				vm_page_unreserve_pages(fReservation);
2789 		}
2790 
2791 	private:
2792 		vm_page_reservation*	fReservation;
2793 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2794 
2795 	bool writableCopy
2796 		= (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2797 	uint8* targetPageProtections = NULL;
2798 
2799 	if (source->page_protections != NULL) {
2800 		size_t bytes = area_page_protections_size(source->Size());
2801 		targetPageProtections = (uint8*)malloc_etc(bytes,
2802 			(source->address_space == VMAddressSpace::Kernel()
2803 					|| targetAddressSpace == VMAddressSpace::Kernel())
2804 				? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
2805 		if (targetPageProtections == NULL)
2806 			return B_NO_MEMORY;
2807 
2808 		memcpy(targetPageProtections, source->page_protections, bytes);
2809 
2810 		if (!writableCopy) {
2811 			for (size_t i = 0; i < bytes; i++) {
2812 				if ((targetPageProtections[i]
2813 						& (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) {
2814 					writableCopy = true;
2815 					break;
2816 				}
2817 			}
2818 		}
2819 	}
2820 
2821 	if (addressSpec == B_CLONE_ADDRESS) {
2822 		addressSpec = B_EXACT_ADDRESS;
2823 		*_address = (void*)source->Base();
2824 	}
2825 
2826 	// First, create a cache on top of the source area, respectively use the
2827 	// existing one, if this is a shared area.
2828 
2829 	VMArea* target;
2830 	virtual_address_restrictions addressRestrictions = {};
2831 	addressRestrictions.address = *_address;
2832 	addressRestrictions.address_specification = addressSpec;
2833 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2834 		name, source->Size(), source->wiring, source->protection,
2835 		source->protection_max,
2836 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2837 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2838 		&addressRestrictions, true, &target, _address);
2839 	if (status < B_OK) {
2840 		free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE);
2841 		return status;
2842 	}
2843 
2844 	if (targetPageProtections != NULL)
2845 		target->page_protections = targetPageProtections;
2846 
2847 	if (sharedArea) {
2848 		// The new area uses the old area's cache, but map_backing_store()
2849 		// hasn't acquired a ref. So we have to do that now.
2850 		cache->AcquireRefLocked();
2851 	}
2852 
2853 	// If the source area is writable, we need to move it one layer up as well
2854 
2855 	if (!sharedArea) {
2856 		if (writableCopy) {
2857 			// TODO: do something more useful if this fails!
2858 			if (vm_copy_on_write_area(cache,
2859 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2860 				panic("vm_copy_on_write_area() failed!\n");
2861 			}
2862 		}
2863 	}
2864 
2865 	// we return the ID of the newly created area
2866 	return target->id;
2867 }
2868 
2869 
2870 status_t
2871 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2872 	bool kernel)
2873 {
2874 	fix_protection(&newProtection);
2875 
2876 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2877 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2878 
2879 	if (!arch_vm_supports_protection(newProtection))
2880 		return B_NOT_SUPPORTED;
2881 
2882 	bool becomesWritable
2883 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2884 
2885 	// lock address spaces and cache
2886 	MultiAddressSpaceLocker locker;
2887 	VMCache* cache;
2888 	VMArea* area;
2889 	status_t status;
2890 	AreaCacheLocker cacheLocker;
2891 	bool isWritable;
2892 
2893 	bool restart;
2894 	do {
2895 		restart = false;
2896 
2897 		locker.Unset();
2898 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2899 		if (status != B_OK)
2900 			return status;
2901 
2902 		cacheLocker.SetTo(cache, true);	// already locked
2903 
2904 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
2905 				|| (area->protection & B_KERNEL_AREA) != 0)) {
2906 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2907 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2908 				" (%s)\n", team, newProtection, areaID, area->name);
2909 			return B_NOT_ALLOWED;
2910 		}
2911 		if (!kernel && area->protection_max != 0
2912 			&& (newProtection & area->protection_max)
2913 				!= (newProtection & B_USER_PROTECTION)) {
2914 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2915 				"set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel "
2916 				"area %" B_PRId32 " (%s)\n", team, newProtection,
2917 				area->protection_max, areaID, area->name);
2918 			return B_NOT_ALLOWED;
2919 		}
2920 
2921 		if (team != VMAddressSpace::KernelID()
2922 			&& area->address_space->ID() != team) {
2923 			// unless you're the kernel, you are only allowed to set
2924 			// the protection of your own areas
2925 			return B_NOT_ALLOWED;
2926 		}
2927 
2928 		if (area->protection == newProtection)
2929 			return B_OK;
2930 
2931 		isWritable
2932 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2933 
2934 		// Make sure the area (respectively, if we're going to call
2935 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2936 		// wired ranges.
2937 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2938 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2939 					otherArea = otherArea->cache_next) {
2940 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2941 					restart = true;
2942 					break;
2943 				}
2944 			}
2945 		} else {
2946 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2947 				restart = true;
2948 		}
2949 	} while (restart);
2950 
2951 	bool changePageProtection = true;
2952 	bool changeTopCachePagesOnly = false;
2953 
2954 	if (isWritable && !becomesWritable) {
2955 		// writable -> !writable
2956 
2957 		if (cache->source != NULL && cache->temporary) {
2958 			if (cache->CountWritableAreas(area) == 0) {
2959 				// Since this cache now lives from the pages in its source cache,
2960 				// we can change the cache's commitment to take only those pages
2961 				// into account that really are in this cache.
2962 
2963 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2964 					team == VMAddressSpace::KernelID()
2965 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2966 
2967 				// TODO: we may be able to join with our source cache, if
2968 				// count == 0
2969 			}
2970 		}
2971 
2972 		// If only the writability changes, we can just remap the pages of the
2973 		// top cache, since the pages of lower caches are mapped read-only
2974 		// anyway. That's advantageous only, if the number of pages in the cache
2975 		// is significantly smaller than the number of pages in the area,
2976 		// though.
2977 		if (newProtection
2978 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2979 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2980 			changeTopCachePagesOnly = true;
2981 		}
2982 	} else if (!isWritable && becomesWritable) {
2983 		// !writable -> writable
2984 
2985 		if (!cache->consumers.IsEmpty()) {
2986 			// There are consumers -- we have to insert a new cache. Fortunately
2987 			// vm_copy_on_write_area() does everything that's needed.
2988 			changePageProtection = false;
2989 			status = vm_copy_on_write_area(cache, NULL);
2990 		} else {
2991 			// No consumers, so we don't need to insert a new one.
2992 			if (cache->source != NULL && cache->temporary) {
2993 				// the cache's commitment must contain all possible pages
2994 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2995 					team == VMAddressSpace::KernelID()
2996 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2997 			}
2998 
2999 			if (status == B_OK && cache->source != NULL) {
3000 				// There's a source cache, hence we can't just change all pages'
3001 				// protection or we might allow writing into pages belonging to
3002 				// a lower cache.
3003 				changeTopCachePagesOnly = true;
3004 			}
3005 		}
3006 	} else {
3007 		// we don't have anything special to do in all other cases
3008 	}
3009 
3010 	if (status == B_OK) {
3011 		// remap existing pages in this cache
3012 		if (changePageProtection) {
3013 			VMTranslationMap* map = area->address_space->TranslationMap();
3014 			map->Lock();
3015 
3016 			if (changeTopCachePagesOnly) {
3017 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
3018 				page_num_t lastPageOffset
3019 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
3020 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
3021 						vm_page* page = it.Next();) {
3022 					if (page->cache_offset >= firstPageOffset
3023 						&& page->cache_offset <= lastPageOffset) {
3024 						addr_t address = virtual_page_address(area, page);
3025 						map->ProtectPage(area, address, newProtection);
3026 					}
3027 				}
3028 			} else
3029 				map->ProtectArea(area, newProtection);
3030 
3031 			map->Unlock();
3032 		}
3033 
3034 		area->protection = newProtection;
3035 	}
3036 
3037 	return status;
3038 }
3039 
3040 
3041 status_t
3042 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
3043 {
3044 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
3045 	if (addressSpace == NULL)
3046 		return B_BAD_TEAM_ID;
3047 
3048 	VMTranslationMap* map = addressSpace->TranslationMap();
3049 
3050 	map->Lock();
3051 	uint32 dummyFlags;
3052 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
3053 	map->Unlock();
3054 
3055 	addressSpace->Put();
3056 	return status;
3057 }
3058 
3059 
3060 /*!	The page's cache must be locked.
3061 */
3062 bool
3063 vm_test_map_modification(vm_page* page)
3064 {
3065 	if (page->modified)
3066 		return true;
3067 
3068 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3069 	vm_page_mapping* mapping;
3070 	while ((mapping = iterator.Next()) != NULL) {
3071 		VMArea* area = mapping->area;
3072 		VMTranslationMap* map = area->address_space->TranslationMap();
3073 
3074 		phys_addr_t physicalAddress;
3075 		uint32 flags;
3076 		map->Lock();
3077 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
3078 		map->Unlock();
3079 
3080 		if ((flags & PAGE_MODIFIED) != 0)
3081 			return true;
3082 	}
3083 
3084 	return false;
3085 }
3086 
3087 
3088 /*!	The page's cache must be locked.
3089 */
3090 void
3091 vm_clear_map_flags(vm_page* page, uint32 flags)
3092 {
3093 	if ((flags & PAGE_ACCESSED) != 0)
3094 		page->accessed = false;
3095 	if ((flags & PAGE_MODIFIED) != 0)
3096 		page->modified = false;
3097 
3098 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3099 	vm_page_mapping* mapping;
3100 	while ((mapping = iterator.Next()) != NULL) {
3101 		VMArea* area = mapping->area;
3102 		VMTranslationMap* map = area->address_space->TranslationMap();
3103 
3104 		map->Lock();
3105 		map->ClearFlags(virtual_page_address(area, page), flags);
3106 		map->Unlock();
3107 	}
3108 }
3109 
3110 
3111 /*!	Removes all mappings from a page.
3112 	After you've called this function, the page is unmapped from memory and
3113 	the page's \c accessed and \c modified flags have been updated according
3114 	to the state of the mappings.
3115 	The page's cache must be locked.
3116 */
3117 void
3118 vm_remove_all_page_mappings(vm_page* page)
3119 {
3120 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3121 		VMArea* area = mapping->area;
3122 		VMTranslationMap* map = area->address_space->TranslationMap();
3123 		addr_t address = virtual_page_address(area, page);
3124 		map->UnmapPage(area, address, false);
3125 	}
3126 }
3127 
3128 
3129 int32
3130 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
3131 {
3132 	int32 count = 0;
3133 
3134 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3135 	vm_page_mapping* mapping;
3136 	while ((mapping = iterator.Next()) != NULL) {
3137 		VMArea* area = mapping->area;
3138 		VMTranslationMap* map = area->address_space->TranslationMap();
3139 
3140 		bool modified;
3141 		if (map->ClearAccessedAndModified(area,
3142 				virtual_page_address(area, page), false, modified)) {
3143 			count++;
3144 		}
3145 
3146 		page->modified |= modified;
3147 	}
3148 
3149 
3150 	if (page->accessed) {
3151 		count++;
3152 		page->accessed = false;
3153 	}
3154 
3155 	return count;
3156 }
3157 
3158 
3159 /*!	Removes all mappings of a page and/or clears the accessed bits of the
3160 	mappings.
3161 	The function iterates through the page mappings and removes them until
3162 	encountering one that has been accessed. From then on it will continue to
3163 	iterate, but only clear the accessed flag of the mapping. The page's
3164 	\c modified bit will be updated accordingly, the \c accessed bit will be
3165 	cleared.
3166 	\return The number of mapping accessed bits encountered, including the
3167 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
3168 		of the page have been removed.
3169 */
3170 int32
3171 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
3172 {
3173 	ASSERT(page->WiredCount() == 0);
3174 
3175 	if (page->accessed)
3176 		return vm_clear_page_mapping_accessed_flags(page);
3177 
3178 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3179 		VMArea* area = mapping->area;
3180 		VMTranslationMap* map = area->address_space->TranslationMap();
3181 		addr_t address = virtual_page_address(area, page);
3182 		bool modified = false;
3183 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
3184 			page->accessed = true;
3185 			page->modified |= modified;
3186 			return vm_clear_page_mapping_accessed_flags(page);
3187 		}
3188 		page->modified |= modified;
3189 	}
3190 
3191 	return 0;
3192 }
3193 
3194 
3195 static int
3196 display_mem(int argc, char** argv)
3197 {
3198 	bool physical = false;
3199 	addr_t copyAddress;
3200 	int32 displayWidth;
3201 	int32 itemSize;
3202 	int32 num = -1;
3203 	addr_t address;
3204 	int i = 1, j;
3205 
3206 	if (argc > 1 && argv[1][0] == '-') {
3207 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3208 			physical = true;
3209 			i++;
3210 		} else
3211 			i = 99;
3212 	}
3213 
3214 	if (argc < i + 1 || argc > i + 2) {
3215 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3216 			"\tdl - 8 bytes\n"
3217 			"\tdw - 4 bytes\n"
3218 			"\tds - 2 bytes\n"
3219 			"\tdb - 1 byte\n"
3220 			"\tstring - a whole string\n"
3221 			"  -p or --physical only allows memory from a single page to be "
3222 			"displayed.\n");
3223 		return 0;
3224 	}
3225 
3226 	address = parse_expression(argv[i]);
3227 
3228 	if (argc > i + 1)
3229 		num = parse_expression(argv[i + 1]);
3230 
3231 	// build the format string
3232 	if (strcmp(argv[0], "db") == 0) {
3233 		itemSize = 1;
3234 		displayWidth = 16;
3235 	} else if (strcmp(argv[0], "ds") == 0) {
3236 		itemSize = 2;
3237 		displayWidth = 8;
3238 	} else if (strcmp(argv[0], "dw") == 0) {
3239 		itemSize = 4;
3240 		displayWidth = 4;
3241 	} else if (strcmp(argv[0], "dl") == 0) {
3242 		itemSize = 8;
3243 		displayWidth = 2;
3244 	} else if (strcmp(argv[0], "string") == 0) {
3245 		itemSize = 1;
3246 		displayWidth = -1;
3247 	} else {
3248 		kprintf("display_mem called in an invalid way!\n");
3249 		return 0;
3250 	}
3251 
3252 	if (num <= 0)
3253 		num = displayWidth;
3254 
3255 	void* physicalPageHandle = NULL;
3256 
3257 	if (physical) {
3258 		int32 offset = address & (B_PAGE_SIZE - 1);
3259 		if (num * itemSize + offset > B_PAGE_SIZE) {
3260 			num = (B_PAGE_SIZE - offset) / itemSize;
3261 			kprintf("NOTE: number of bytes has been cut to page size\n");
3262 		}
3263 
3264 		address = ROUNDDOWN(address, B_PAGE_SIZE);
3265 
3266 		if (vm_get_physical_page_debug(address, &copyAddress,
3267 				&physicalPageHandle) != B_OK) {
3268 			kprintf("getting the hardware page failed.");
3269 			return 0;
3270 		}
3271 
3272 		address += offset;
3273 		copyAddress += offset;
3274 	} else
3275 		copyAddress = address;
3276 
3277 	if (!strcmp(argv[0], "string")) {
3278 		kprintf("%p \"", (char*)copyAddress);
3279 
3280 		// string mode
3281 		for (i = 0; true; i++) {
3282 			char c;
3283 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
3284 					!= B_OK
3285 				|| c == '\0') {
3286 				break;
3287 			}
3288 
3289 			if (c == '\n')
3290 				kprintf("\\n");
3291 			else if (c == '\t')
3292 				kprintf("\\t");
3293 			else {
3294 				if (!isprint(c))
3295 					c = '.';
3296 
3297 				kprintf("%c", c);
3298 			}
3299 		}
3300 
3301 		kprintf("\"\n");
3302 	} else {
3303 		// number mode
3304 		for (i = 0; i < num; i++) {
3305 			uint64 value;
3306 
3307 			if ((i % displayWidth) == 0) {
3308 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3309 				if (i != 0)
3310 					kprintf("\n");
3311 
3312 				kprintf("[0x%lx]  ", address + i * itemSize);
3313 
3314 				for (j = 0; j < displayed; j++) {
3315 					char c;
3316 					if (debug_memcpy(B_CURRENT_TEAM, &c,
3317 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
3318 						displayed = j;
3319 						break;
3320 					}
3321 					if (!isprint(c))
3322 						c = '.';
3323 
3324 					kprintf("%c", c);
3325 				}
3326 				if (num > displayWidth) {
3327 					// make sure the spacing in the last line is correct
3328 					for (j = displayed; j < displayWidth * itemSize; j++)
3329 						kprintf(" ");
3330 				}
3331 				kprintf("  ");
3332 			}
3333 
3334 			if (debug_memcpy(B_CURRENT_TEAM, &value,
3335 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
3336 				kprintf("read fault");
3337 				break;
3338 			}
3339 
3340 			switch (itemSize) {
3341 				case 1:
3342 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
3343 					break;
3344 				case 2:
3345 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3346 					break;
3347 				case 4:
3348 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3349 					break;
3350 				case 8:
3351 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3352 					break;
3353 			}
3354 		}
3355 
3356 		kprintf("\n");
3357 	}
3358 
3359 	if (physical) {
3360 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3361 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3362 	}
3363 	return 0;
3364 }
3365 
3366 
3367 static void
3368 dump_cache_tree_recursively(VMCache* cache, int level,
3369 	VMCache* highlightCache)
3370 {
3371 	// print this cache
3372 	for (int i = 0; i < level; i++)
3373 		kprintf("  ");
3374 	if (cache == highlightCache)
3375 		kprintf("%p <--\n", cache);
3376 	else
3377 		kprintf("%p\n", cache);
3378 
3379 	// recursively print its consumers
3380 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3381 			VMCache* consumer = it.Next();) {
3382 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3383 	}
3384 }
3385 
3386 
3387 static int
3388 dump_cache_tree(int argc, char** argv)
3389 {
3390 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3391 		kprintf("usage: %s <address>\n", argv[0]);
3392 		return 0;
3393 	}
3394 
3395 	addr_t address = parse_expression(argv[1]);
3396 	if (address == 0)
3397 		return 0;
3398 
3399 	VMCache* cache = (VMCache*)address;
3400 	VMCache* root = cache;
3401 
3402 	// find the root cache (the transitive source)
3403 	while (root->source != NULL)
3404 		root = root->source;
3405 
3406 	dump_cache_tree_recursively(root, 0, cache);
3407 
3408 	return 0;
3409 }
3410 
3411 
3412 const char*
3413 vm_cache_type_to_string(int32 type)
3414 {
3415 	switch (type) {
3416 		case CACHE_TYPE_RAM:
3417 			return "RAM";
3418 		case CACHE_TYPE_DEVICE:
3419 			return "device";
3420 		case CACHE_TYPE_VNODE:
3421 			return "vnode";
3422 		case CACHE_TYPE_NULL:
3423 			return "null";
3424 
3425 		default:
3426 			return "unknown";
3427 	}
3428 }
3429 
3430 
3431 #if DEBUG_CACHE_LIST
3432 
3433 static void
3434 update_cache_info_recursively(VMCache* cache, cache_info& info)
3435 {
3436 	info.page_count += cache->page_count;
3437 	if (cache->type == CACHE_TYPE_RAM)
3438 		info.committed += cache->committed_size;
3439 
3440 	// recurse
3441 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3442 			VMCache* consumer = it.Next();) {
3443 		update_cache_info_recursively(consumer, info);
3444 	}
3445 }
3446 
3447 
3448 static int
3449 cache_info_compare_page_count(const void* _a, const void* _b)
3450 {
3451 	const cache_info* a = (const cache_info*)_a;
3452 	const cache_info* b = (const cache_info*)_b;
3453 	if (a->page_count == b->page_count)
3454 		return 0;
3455 	return a->page_count < b->page_count ? 1 : -1;
3456 }
3457 
3458 
3459 static int
3460 cache_info_compare_committed(const void* _a, const void* _b)
3461 {
3462 	const cache_info* a = (const cache_info*)_a;
3463 	const cache_info* b = (const cache_info*)_b;
3464 	if (a->committed == b->committed)
3465 		return 0;
3466 	return a->committed < b->committed ? 1 : -1;
3467 }
3468 
3469 
3470 static void
3471 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3472 {
3473 	for (int i = 0; i < level; i++)
3474 		kprintf("  ");
3475 
3476 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3477 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3478 		cache->virtual_base, cache->virtual_end, cache->page_count);
3479 
3480 	if (level == 0)
3481 		kprintf("/%lu", info.page_count);
3482 
3483 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3484 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3485 
3486 		if (level == 0)
3487 			kprintf("/%lu", info.committed);
3488 	}
3489 
3490 	// areas
3491 	if (cache->areas != NULL) {
3492 		VMArea* area = cache->areas;
3493 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3494 			area->name, area->address_space->ID());
3495 
3496 		while (area->cache_next != NULL) {
3497 			area = area->cache_next;
3498 			kprintf(", %" B_PRId32, area->id);
3499 		}
3500 	}
3501 
3502 	kputs("\n");
3503 
3504 	// recurse
3505 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3506 			VMCache* consumer = it.Next();) {
3507 		dump_caches_recursively(consumer, info, level + 1);
3508 	}
3509 }
3510 
3511 
3512 static int
3513 dump_caches(int argc, char** argv)
3514 {
3515 	if (sCacheInfoTable == NULL) {
3516 		kprintf("No cache info table!\n");
3517 		return 0;
3518 	}
3519 
3520 	bool sortByPageCount = true;
3521 
3522 	for (int32 i = 1; i < argc; i++) {
3523 		if (strcmp(argv[i], "-c") == 0) {
3524 			sortByPageCount = false;
3525 		} else {
3526 			print_debugger_command_usage(argv[0]);
3527 			return 0;
3528 		}
3529 	}
3530 
3531 	uint32 totalCount = 0;
3532 	uint32 rootCount = 0;
3533 	off_t totalCommitted = 0;
3534 	page_num_t totalPages = 0;
3535 
3536 	VMCache* cache = gDebugCacheList;
3537 	while (cache) {
3538 		totalCount++;
3539 		if (cache->source == NULL) {
3540 			cache_info stackInfo;
3541 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3542 				? sCacheInfoTable[rootCount] : stackInfo;
3543 			rootCount++;
3544 			info.cache = cache;
3545 			info.page_count = 0;
3546 			info.committed = 0;
3547 			update_cache_info_recursively(cache, info);
3548 			totalCommitted += info.committed;
3549 			totalPages += info.page_count;
3550 		}
3551 
3552 		cache = cache->debug_next;
3553 	}
3554 
3555 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3556 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3557 			sortByPageCount
3558 				? &cache_info_compare_page_count
3559 				: &cache_info_compare_committed);
3560 	}
3561 
3562 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3563 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3564 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3565 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3566 			"page count" : "committed size");
3567 
3568 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3569 		for (uint32 i = 0; i < rootCount; i++) {
3570 			cache_info& info = sCacheInfoTable[i];
3571 			dump_caches_recursively(info.cache, info, 0);
3572 		}
3573 	} else
3574 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3575 
3576 	return 0;
3577 }
3578 
3579 #endif	// DEBUG_CACHE_LIST
3580 
3581 
3582 static int
3583 dump_cache(int argc, char** argv)
3584 {
3585 	VMCache* cache;
3586 	bool showPages = false;
3587 	int i = 1;
3588 
3589 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3590 		kprintf("usage: %s [-ps] <address>\n"
3591 			"  if -p is specified, all pages are shown, if -s is used\n"
3592 			"  only the cache info is shown respectively.\n", argv[0]);
3593 		return 0;
3594 	}
3595 	while (argv[i][0] == '-') {
3596 		char* arg = argv[i] + 1;
3597 		while (arg[0]) {
3598 			if (arg[0] == 'p')
3599 				showPages = true;
3600 			arg++;
3601 		}
3602 		i++;
3603 	}
3604 	if (argv[i] == NULL) {
3605 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3606 		return 0;
3607 	}
3608 
3609 	addr_t address = parse_expression(argv[i]);
3610 	if (address == 0)
3611 		return 0;
3612 
3613 	cache = (VMCache*)address;
3614 
3615 	cache->Dump(showPages);
3616 
3617 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3618 
3619 	return 0;
3620 }
3621 
3622 
3623 static void
3624 dump_area_struct(VMArea* area, bool mappings)
3625 {
3626 	kprintf("AREA: %p\n", area);
3627 	kprintf("name:\t\t'%s'\n", area->name);
3628 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3629 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3630 	kprintf("base:\t\t0x%lx\n", area->Base());
3631 	kprintf("size:\t\t0x%lx\n", area->Size());
3632 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3633 	kprintf("page_protection:%p\n", area->page_protections);
3634 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3635 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3636 	kprintf("cache:\t\t%p\n", area->cache);
3637 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3638 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3639 	kprintf("cache_next:\t%p\n", area->cache_next);
3640 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3641 
3642 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3643 	if (mappings) {
3644 		kprintf("page mappings:\n");
3645 		while (iterator.HasNext()) {
3646 			vm_page_mapping* mapping = iterator.Next();
3647 			kprintf("  %p", mapping->page);
3648 		}
3649 		kprintf("\n");
3650 	} else {
3651 		uint32 count = 0;
3652 		while (iterator.Next() != NULL) {
3653 			count++;
3654 		}
3655 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3656 	}
3657 }
3658 
3659 
3660 static int
3661 dump_area(int argc, char** argv)
3662 {
3663 	bool mappings = false;
3664 	bool found = false;
3665 	int32 index = 1;
3666 	VMArea* area;
3667 	addr_t num;
3668 
3669 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3670 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3671 			"All areas matching either id/address/name are listed. You can\n"
3672 			"force to check only a specific item by prefixing the specifier\n"
3673 			"with the id/contains/address/name keywords.\n"
3674 			"-m shows the area's mappings as well.\n");
3675 		return 0;
3676 	}
3677 
3678 	if (!strcmp(argv[1], "-m")) {
3679 		mappings = true;
3680 		index++;
3681 	}
3682 
3683 	int32 mode = 0xf;
3684 	if (!strcmp(argv[index], "id"))
3685 		mode = 1;
3686 	else if (!strcmp(argv[index], "contains"))
3687 		mode = 2;
3688 	else if (!strcmp(argv[index], "name"))
3689 		mode = 4;
3690 	else if (!strcmp(argv[index], "address"))
3691 		mode = 0;
3692 	if (mode != 0xf)
3693 		index++;
3694 
3695 	if (index >= argc) {
3696 		kprintf("No area specifier given.\n");
3697 		return 0;
3698 	}
3699 
3700 	num = parse_expression(argv[index]);
3701 
3702 	if (mode == 0) {
3703 		dump_area_struct((struct VMArea*)num, mappings);
3704 	} else {
3705 		// walk through the area list, looking for the arguments as a name
3706 
3707 		VMAreasTree::Iterator it = VMAreas::GetIterator();
3708 		while ((area = it.Next()) != NULL) {
3709 			if (((mode & 4) != 0
3710 					&& !strcmp(argv[index], area->name))
3711 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3712 					|| (((mode & 2) != 0 && area->Base() <= num
3713 						&& area->Base() + area->Size() > num))))) {
3714 				dump_area_struct(area, mappings);
3715 				found = true;
3716 			}
3717 		}
3718 
3719 		if (!found)
3720 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3721 	}
3722 
3723 	return 0;
3724 }
3725 
3726 
3727 static int
3728 dump_area_list(int argc, char** argv)
3729 {
3730 	VMArea* area;
3731 	const char* name = NULL;
3732 	int32 id = 0;
3733 
3734 	if (argc > 1) {
3735 		id = parse_expression(argv[1]);
3736 		if (id == 0)
3737 			name = argv[1];
3738 	}
3739 
3740 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3741 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3742 		B_PRINTF_POINTER_WIDTH, "size");
3743 
3744 	VMAreasTree::Iterator it = VMAreas::GetIterator();
3745 	while ((area = it.Next()) != NULL) {
3746 		if ((id != 0 && area->address_space->ID() != id)
3747 			|| (name != NULL && strstr(area->name, name) == NULL))
3748 			continue;
3749 
3750 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3751 			area->id, (void*)area->Base(), (void*)area->Size(),
3752 			area->protection, area->wiring, area->name);
3753 	}
3754 	return 0;
3755 }
3756 
3757 
3758 static int
3759 dump_available_memory(int argc, char** argv)
3760 {
3761 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3762 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3763 	return 0;
3764 }
3765 
3766 
3767 static int
3768 dump_mapping_info(int argc, char** argv)
3769 {
3770 	bool reverseLookup = false;
3771 	bool pageLookup = false;
3772 
3773 	int argi = 1;
3774 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3775 		const char* arg = argv[argi];
3776 		if (strcmp(arg, "-r") == 0) {
3777 			reverseLookup = true;
3778 		} else if (strcmp(arg, "-p") == 0) {
3779 			reverseLookup = true;
3780 			pageLookup = true;
3781 		} else {
3782 			print_debugger_command_usage(argv[0]);
3783 			return 0;
3784 		}
3785 	}
3786 
3787 	// We need at least one argument, the address. Optionally a thread ID can be
3788 	// specified.
3789 	if (argi >= argc || argi + 2 < argc) {
3790 		print_debugger_command_usage(argv[0]);
3791 		return 0;
3792 	}
3793 
3794 	uint64 addressValue;
3795 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3796 		return 0;
3797 
3798 	Team* team = NULL;
3799 	if (argi < argc) {
3800 		uint64 threadID;
3801 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3802 			return 0;
3803 
3804 		Thread* thread = Thread::GetDebug(threadID);
3805 		if (thread == NULL) {
3806 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3807 			return 0;
3808 		}
3809 
3810 		team = thread->team;
3811 	}
3812 
3813 	if (reverseLookup) {
3814 		phys_addr_t physicalAddress;
3815 		if (pageLookup) {
3816 			vm_page* page = (vm_page*)(addr_t)addressValue;
3817 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3818 		} else {
3819 			physicalAddress = (phys_addr_t)addressValue;
3820 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3821 		}
3822 
3823 		kprintf("    Team     Virtual Address      Area\n");
3824 		kprintf("--------------------------------------\n");
3825 
3826 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3827 			Callback()
3828 				:
3829 				fAddressSpace(NULL)
3830 			{
3831 			}
3832 
3833 			void SetAddressSpace(VMAddressSpace* addressSpace)
3834 			{
3835 				fAddressSpace = addressSpace;
3836 			}
3837 
3838 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3839 			{
3840 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3841 					virtualAddress);
3842 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3843 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3844 				else
3845 					kprintf("\n");
3846 				return false;
3847 			}
3848 
3849 		private:
3850 			VMAddressSpace*	fAddressSpace;
3851 		} callback;
3852 
3853 		if (team != NULL) {
3854 			// team specified -- get its address space
3855 			VMAddressSpace* addressSpace = team->address_space;
3856 			if (addressSpace == NULL) {
3857 				kprintf("Failed to get address space!\n");
3858 				return 0;
3859 			}
3860 
3861 			callback.SetAddressSpace(addressSpace);
3862 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3863 				physicalAddress, callback);
3864 		} else {
3865 			// no team specified -- iterate through all address spaces
3866 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3867 				addressSpace != NULL;
3868 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3869 				callback.SetAddressSpace(addressSpace);
3870 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3871 					physicalAddress, callback);
3872 			}
3873 		}
3874 	} else {
3875 		// get the address space
3876 		addr_t virtualAddress = (addr_t)addressValue;
3877 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3878 		VMAddressSpace* addressSpace;
3879 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3880 			addressSpace = VMAddressSpace::Kernel();
3881 		} else if (team != NULL) {
3882 			addressSpace = team->address_space;
3883 		} else {
3884 			Thread* thread = debug_get_debugged_thread();
3885 			if (thread == NULL || thread->team == NULL) {
3886 				kprintf("Failed to get team!\n");
3887 				return 0;
3888 			}
3889 
3890 			addressSpace = thread->team->address_space;
3891 		}
3892 
3893 		if (addressSpace == NULL) {
3894 			kprintf("Failed to get address space!\n");
3895 			return 0;
3896 		}
3897 
3898 		// let the translation map implementation do the job
3899 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3900 	}
3901 
3902 	return 0;
3903 }
3904 
3905 
3906 /*!	Deletes all areas and reserved regions in the given address space.
3907 
3908 	The caller must ensure that none of the areas has any wired ranges.
3909 
3910 	\param addressSpace The address space.
3911 	\param deletingAddressSpace \c true, if the address space is in the process
3912 		of being deleted.
3913 */
3914 void
3915 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3916 {
3917 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3918 		addressSpace->ID()));
3919 
3920 	addressSpace->WriteLock();
3921 
3922 	// remove all reserved areas in this address space
3923 	addressSpace->UnreserveAllAddressRanges(0);
3924 
3925 	// delete all the areas in this address space
3926 	while (VMArea* area = addressSpace->FirstArea()) {
3927 		ASSERT(!area->IsWired());
3928 		delete_area(addressSpace, area, deletingAddressSpace);
3929 	}
3930 
3931 	addressSpace->WriteUnlock();
3932 }
3933 
3934 
3935 static area_id
3936 vm_area_for(addr_t address, bool kernel)
3937 {
3938 	team_id team;
3939 	if (IS_USER_ADDRESS(address)) {
3940 		// we try the user team address space, if any
3941 		team = VMAddressSpace::CurrentID();
3942 		if (team < 0)
3943 			return team;
3944 	} else
3945 		team = VMAddressSpace::KernelID();
3946 
3947 	AddressSpaceReadLocker locker(team);
3948 	if (!locker.IsLocked())
3949 		return B_BAD_TEAM_ID;
3950 
3951 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3952 	if (area != NULL) {
3953 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0
3954 				&& (area->protection & B_KERNEL_AREA) != 0)
3955 			return B_ERROR;
3956 
3957 		return area->id;
3958 	}
3959 
3960 	return B_ERROR;
3961 }
3962 
3963 
3964 /*!	Frees physical pages that were used during the boot process.
3965 	\a end is inclusive.
3966 */
3967 static void
3968 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3969 {
3970 	// free all physical pages in the specified range
3971 
3972 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3973 		phys_addr_t physicalAddress;
3974 		uint32 flags;
3975 
3976 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3977 			&& (flags & PAGE_PRESENT) != 0) {
3978 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3979 			if (page != NULL && page->State() != PAGE_STATE_FREE
3980 					&& page->State() != PAGE_STATE_CLEAR
3981 					&& page->State() != PAGE_STATE_UNUSED) {
3982 				DEBUG_PAGE_ACCESS_START(page);
3983 				vm_page_set_state(page, PAGE_STATE_FREE);
3984 			}
3985 		}
3986 	}
3987 
3988 	// unmap the memory
3989 	map->Unmap(start, end);
3990 }
3991 
3992 
3993 void
3994 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3995 {
3996 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3997 	addr_t end = start + (size - 1);
3998 	addr_t lastEnd = start;
3999 
4000 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
4001 		(void*)start, (void*)end));
4002 
4003 	// The areas are sorted in virtual address space order, so
4004 	// we just have to find the holes between them that fall
4005 	// into the area we should dispose
4006 
4007 	map->Lock();
4008 
4009 	for (VMAddressSpace::AreaIterator it
4010 				= VMAddressSpace::Kernel()->GetAreaIterator();
4011 			VMArea* area = it.Next();) {
4012 		addr_t areaStart = area->Base();
4013 		addr_t areaEnd = areaStart + (area->Size() - 1);
4014 
4015 		if (areaEnd < start)
4016 			continue;
4017 
4018 		if (areaStart > end) {
4019 			// we are done, the area is already beyond of what we have to free
4020 			break;
4021 		}
4022 
4023 		if (areaStart > lastEnd) {
4024 			// this is something we can free
4025 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
4026 				(void*)areaStart));
4027 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
4028 		}
4029 
4030 		if (areaEnd >= end) {
4031 			lastEnd = areaEnd;
4032 				// no +1 to prevent potential overflow
4033 			break;
4034 		}
4035 
4036 		lastEnd = areaEnd + 1;
4037 	}
4038 
4039 	if (lastEnd < end) {
4040 		// we can also get rid of some space at the end of the area
4041 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
4042 			(void*)end));
4043 		unmap_and_free_physical_pages(map, lastEnd, end);
4044 	}
4045 
4046 	map->Unlock();
4047 }
4048 
4049 
4050 static void
4051 create_preloaded_image_areas(struct preloaded_image* _image)
4052 {
4053 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
4054 	char name[B_OS_NAME_LENGTH];
4055 	void* address;
4056 	int32 length;
4057 
4058 	// use file name to create a good area name
4059 	char* fileName = strrchr(image->name, '/');
4060 	if (fileName == NULL)
4061 		fileName = image->name;
4062 	else
4063 		fileName++;
4064 
4065 	length = strlen(fileName);
4066 	// make sure there is enough space for the suffix
4067 	if (length > 25)
4068 		length = 25;
4069 
4070 	memcpy(name, fileName, length);
4071 	strcpy(name + length, "_text");
4072 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
4073 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4074 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
4075 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4076 		// this will later be remapped read-only/executable by the
4077 		// ELF initialization code
4078 
4079 	strcpy(name + length, "_data");
4080 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
4081 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4082 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
4083 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4084 }
4085 
4086 
4087 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
4088 	Any boot loader resources contained in that arguments must not be accessed
4089 	anymore past this point.
4090 */
4091 void
4092 vm_free_kernel_args(kernel_args* args)
4093 {
4094 	uint32 i;
4095 
4096 	TRACE(("vm_free_kernel_args()\n"));
4097 
4098 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
4099 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
4100 		if (area >= B_OK)
4101 			delete_area(area);
4102 	}
4103 }
4104 
4105 
4106 static void
4107 allocate_kernel_args(kernel_args* args)
4108 {
4109 	TRACE(("allocate_kernel_args()\n"));
4110 
4111 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
4112 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
4113 
4114 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
4115 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
4116 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4117 	}
4118 }
4119 
4120 
4121 static void
4122 unreserve_boot_loader_ranges(kernel_args* args)
4123 {
4124 	TRACE(("unreserve_boot_loader_ranges()\n"));
4125 
4126 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4127 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
4128 			(void*)(addr_t)args->virtual_allocated_range[i].start,
4129 			args->virtual_allocated_range[i].size);
4130 	}
4131 }
4132 
4133 
4134 static void
4135 reserve_boot_loader_ranges(kernel_args* args)
4136 {
4137 	TRACE(("reserve_boot_loader_ranges()\n"));
4138 
4139 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4140 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
4141 
4142 		// If the address is no kernel address, we just skip it. The
4143 		// architecture specific code has to deal with it.
4144 		if (!IS_KERNEL_ADDRESS(address)) {
4145 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
4146 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
4147 			continue;
4148 		}
4149 
4150 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
4151 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
4152 		if (status < B_OK)
4153 			panic("could not reserve boot loader ranges\n");
4154 	}
4155 }
4156 
4157 
4158 static addr_t
4159 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
4160 {
4161 	size = PAGE_ALIGN(size);
4162 
4163 	// find a slot in the virtual allocation addr range
4164 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
4165 		// check to see if the space between this one and the last is big enough
4166 		addr_t rangeStart = args->virtual_allocated_range[i].start;
4167 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
4168 			+ args->virtual_allocated_range[i - 1].size;
4169 
4170 		addr_t base = alignment > 0
4171 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
4172 
4173 		if (base >= KERNEL_BASE && base < rangeStart
4174 				&& rangeStart - base >= size) {
4175 			args->virtual_allocated_range[i - 1].size
4176 				+= base + size - previousRangeEnd;
4177 			return base;
4178 		}
4179 	}
4180 
4181 	// we hadn't found one between allocation ranges. this is ok.
4182 	// see if there's a gap after the last one
4183 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
4184 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
4185 		+ args->virtual_allocated_range[lastEntryIndex].size;
4186 	addr_t base = alignment > 0
4187 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
4188 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
4189 		args->virtual_allocated_range[lastEntryIndex].size
4190 			+= base + size - lastRangeEnd;
4191 		return base;
4192 	}
4193 
4194 	// see if there's a gap before the first one
4195 	addr_t rangeStart = args->virtual_allocated_range[0].start;
4196 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
4197 		base = rangeStart - size;
4198 		if (alignment > 0)
4199 			base = ROUNDDOWN(base, alignment);
4200 
4201 		if (base >= KERNEL_BASE) {
4202 			args->virtual_allocated_range[0].start = base;
4203 			args->virtual_allocated_range[0].size += rangeStart - base;
4204 			return base;
4205 		}
4206 	}
4207 
4208 	return 0;
4209 }
4210 
4211 
4212 static bool
4213 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
4214 {
4215 	// TODO: horrible brute-force method of determining if the page can be
4216 	// allocated
4217 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4218 		if (address >= args->physical_memory_range[i].start
4219 			&& address < args->physical_memory_range[i].start
4220 				+ args->physical_memory_range[i].size)
4221 			return true;
4222 	}
4223 	return false;
4224 }
4225 
4226 
4227 page_num_t
4228 vm_allocate_early_physical_page(kernel_args* args)
4229 {
4230 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4231 		phys_addr_t nextPage;
4232 
4233 		nextPage = args->physical_allocated_range[i].start
4234 			+ args->physical_allocated_range[i].size;
4235 		// see if the page after the next allocated paddr run can be allocated
4236 		if (i + 1 < args->num_physical_allocated_ranges
4237 			&& args->physical_allocated_range[i + 1].size != 0) {
4238 			// see if the next page will collide with the next allocated range
4239 			if (nextPage >= args->physical_allocated_range[i+1].start)
4240 				continue;
4241 		}
4242 		// see if the next physical page fits in the memory block
4243 		if (is_page_in_physical_memory_range(args, nextPage)) {
4244 			// we got one!
4245 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4246 			return nextPage / B_PAGE_SIZE;
4247 		}
4248 	}
4249 
4250 	// Expanding upwards didn't work, try going downwards.
4251 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4252 		phys_addr_t nextPage;
4253 
4254 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
4255 		// see if the page after the prev allocated paddr run can be allocated
4256 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
4257 			// see if the next page will collide with the next allocated range
4258 			if (nextPage < args->physical_allocated_range[i-1].start
4259 				+ args->physical_allocated_range[i-1].size)
4260 				continue;
4261 		}
4262 		// see if the next physical page fits in the memory block
4263 		if (is_page_in_physical_memory_range(args, nextPage)) {
4264 			// we got one!
4265 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
4266 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4267 			return nextPage / B_PAGE_SIZE;
4268 		}
4269 	}
4270 
4271 	return 0;
4272 		// could not allocate a block
4273 }
4274 
4275 
4276 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4277 	allocate some pages before the VM is completely up.
4278 */
4279 addr_t
4280 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4281 	uint32 attributes, addr_t alignment)
4282 {
4283 	if (physicalSize > virtualSize)
4284 		physicalSize = virtualSize;
4285 
4286 	// find the vaddr to allocate at
4287 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
4288 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
4289 	if (virtualBase == 0) {
4290 		panic("vm_allocate_early: could not allocate virtual address\n");
4291 		return 0;
4292 	}
4293 
4294 	// map the pages
4295 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4296 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
4297 		if (physicalAddress == 0)
4298 			panic("error allocating early page!\n");
4299 
4300 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4301 
4302 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
4303 			physicalAddress * B_PAGE_SIZE, attributes,
4304 			&vm_allocate_early_physical_page);
4305 	}
4306 
4307 	return virtualBase;
4308 }
4309 
4310 
4311 /*!	The main entrance point to initialize the VM. */
4312 status_t
4313 vm_init(kernel_args* args)
4314 {
4315 	struct preloaded_image* image;
4316 	void* address;
4317 	status_t err = 0;
4318 	uint32 i;
4319 
4320 	TRACE(("vm_init: entry\n"));
4321 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
4322 	err = arch_vm_init(args);
4323 
4324 	// initialize some globals
4325 	vm_page_init_num_pages(args);
4326 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4327 
4328 	slab_init(args);
4329 
4330 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4331 	off_t heapSize = INITIAL_HEAP_SIZE;
4332 	// try to accomodate low memory systems
4333 	while (heapSize > sAvailableMemory / 8)
4334 		heapSize /= 2;
4335 	if (heapSize < 1024 * 1024)
4336 		panic("vm_init: go buy some RAM please.");
4337 
4338 	// map in the new heap and initialize it
4339 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4340 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
4341 	TRACE(("heap at 0x%lx\n", heapBase));
4342 	heap_init(heapBase, heapSize);
4343 #endif
4344 
4345 	// initialize the free page list and physical page mapper
4346 	vm_page_init(args);
4347 
4348 	// initialize the cache allocators
4349 	vm_cache_init(args);
4350 
4351 	{
4352 		status_t error = VMAreas::Init();
4353 		if (error != B_OK)
4354 			panic("vm_init: error initializing areas map\n");
4355 	}
4356 
4357 	VMAddressSpace::Init();
4358 	reserve_boot_loader_ranges(args);
4359 
4360 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4361 	heap_init_post_area();
4362 #endif
4363 
4364 	// Do any further initialization that the architecture dependant layers may
4365 	// need now
4366 	arch_vm_translation_map_init_post_area(args);
4367 	arch_vm_init_post_area(args);
4368 	vm_page_init_post_area(args);
4369 	slab_init_post_area();
4370 
4371 	// allocate areas to represent stuff that already exists
4372 
4373 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4374 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4375 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4376 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4377 #endif
4378 
4379 	allocate_kernel_args(args);
4380 
4381 	create_preloaded_image_areas(args->kernel_image);
4382 
4383 	// allocate areas for preloaded images
4384 	for (image = args->preloaded_images; image != NULL; image = image->next)
4385 		create_preloaded_image_areas(image);
4386 
4387 	// allocate kernel stacks
4388 	for (i = 0; i < args->num_cpus; i++) {
4389 		char name[64];
4390 
4391 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4392 		address = (void*)args->cpu_kstack[i].start;
4393 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4394 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4395 	}
4396 
4397 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4398 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4399 
4400 #if PARANOID_KERNEL_MALLOC
4401 	vm_block_address_range("uninitialized heap memory",
4402 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4403 #endif
4404 #if PARANOID_KERNEL_FREE
4405 	vm_block_address_range("freed heap memory",
4406 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4407 #endif
4408 
4409 	// create the object cache for the page mappings
4410 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4411 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4412 		NULL, NULL);
4413 	if (gPageMappingsObjectCache == NULL)
4414 		panic("failed to create page mappings object cache");
4415 
4416 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4417 
4418 #if DEBUG_CACHE_LIST
4419 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4420 		virtual_address_restrictions virtualRestrictions = {};
4421 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4422 		physical_address_restrictions physicalRestrictions = {};
4423 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4424 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4425 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4426 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4427 			&physicalRestrictions, (void**)&sCacheInfoTable);
4428 	}
4429 #endif	// DEBUG_CACHE_LIST
4430 
4431 	// add some debugger commands
4432 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4433 	add_debugger_command("area", &dump_area,
4434 		"Dump info about a particular area");
4435 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4436 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4437 #if DEBUG_CACHE_LIST
4438 	if (sCacheInfoTable != NULL) {
4439 		add_debugger_command_etc("caches", &dump_caches,
4440 			"List all VMCache trees",
4441 			"[ \"-c\" ]\n"
4442 			"All cache trees are listed sorted in decreasing order by number "
4443 				"of\n"
4444 			"used pages or, if \"-c\" is specified, by size of committed "
4445 				"memory.\n",
4446 			0);
4447 	}
4448 #endif
4449 	add_debugger_command("avail", &dump_available_memory,
4450 		"Dump available memory");
4451 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4452 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4453 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4454 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4455 	add_debugger_command("string", &display_mem, "dump strings");
4456 
4457 	add_debugger_command_etc("mapping", &dump_mapping_info,
4458 		"Print address mapping information",
4459 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4460 		"Prints low-level page mapping information for a given address. If\n"
4461 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4462 		"address that is looked up in the translation map of the current\n"
4463 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4464 		"\"-r\" is specified, <address> is a physical address that is\n"
4465 		"searched in the translation map of all teams, respectively the team\n"
4466 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4467 		"<address> is the address of a vm_page structure. The behavior is\n"
4468 		"equivalent to specifying \"-r\" with the physical address of that\n"
4469 		"page.\n",
4470 		0);
4471 
4472 	TRACE(("vm_init: exit\n"));
4473 
4474 	vm_cache_init_post_heap();
4475 
4476 	return err;
4477 }
4478 
4479 
4480 status_t
4481 vm_init_post_sem(kernel_args* args)
4482 {
4483 	// This frees all unused boot loader resources and makes its space available
4484 	// again
4485 	arch_vm_init_end(args);
4486 	unreserve_boot_loader_ranges(args);
4487 
4488 	// fill in all of the semaphores that were not allocated before
4489 	// since we're still single threaded and only the kernel address space
4490 	// exists, it isn't that hard to find all of the ones we need to create
4491 
4492 	arch_vm_translation_map_init_post_sem(args);
4493 
4494 	slab_init_post_sem();
4495 
4496 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4497 	heap_init_post_sem();
4498 #endif
4499 
4500 	return B_OK;
4501 }
4502 
4503 
4504 status_t
4505 vm_init_post_thread(kernel_args* args)
4506 {
4507 	vm_page_init_post_thread(args);
4508 	slab_init_post_thread();
4509 	return heap_init_post_thread();
4510 }
4511 
4512 
4513 status_t
4514 vm_init_post_modules(kernel_args* args)
4515 {
4516 	return arch_vm_init_post_modules(args);
4517 }
4518 
4519 
4520 void
4521 permit_page_faults(void)
4522 {
4523 	Thread* thread = thread_get_current_thread();
4524 	if (thread != NULL)
4525 		atomic_add(&thread->page_faults_allowed, 1);
4526 }
4527 
4528 
4529 void
4530 forbid_page_faults(void)
4531 {
4532 	Thread* thread = thread_get_current_thread();
4533 	if (thread != NULL)
4534 		atomic_add(&thread->page_faults_allowed, -1);
4535 }
4536 
4537 
4538 status_t
4539 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4540 	bool isUser, addr_t* newIP)
4541 {
4542 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4543 		faultAddress));
4544 
4545 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4546 
4547 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4548 	VMAddressSpace* addressSpace = NULL;
4549 
4550 	status_t status = B_OK;
4551 	*newIP = 0;
4552 	atomic_add((int32*)&sPageFaults, 1);
4553 
4554 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4555 		addressSpace = VMAddressSpace::GetKernel();
4556 	} else if (IS_USER_ADDRESS(pageAddress)) {
4557 		addressSpace = VMAddressSpace::GetCurrent();
4558 		if (addressSpace == NULL) {
4559 			if (!isUser) {
4560 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4561 					"memory!\n");
4562 				status = B_BAD_ADDRESS;
4563 				TPF(PageFaultError(-1,
4564 					VMPageFaultTracing
4565 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4566 			} else {
4567 				// XXX weird state.
4568 				panic("vm_page_fault: non kernel thread accessing user memory "
4569 					"that doesn't exist!\n");
4570 				status = B_BAD_ADDRESS;
4571 			}
4572 		}
4573 	} else {
4574 		// the hit was probably in the 64k DMZ between kernel and user space
4575 		// this keeps a user space thread from passing a buffer that crosses
4576 		// into kernel space
4577 		status = B_BAD_ADDRESS;
4578 		TPF(PageFaultError(-1,
4579 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4580 	}
4581 
4582 	if (status == B_OK) {
4583 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4584 			isUser, NULL);
4585 	}
4586 
4587 	if (status < B_OK) {
4588 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4589 			"0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n",
4590 			strerror(status), address, faultAddress, isWrite, isUser, isExecute,
4591 			thread_get_current_thread_id());
4592 		if (!isUser) {
4593 			Thread* thread = thread_get_current_thread();
4594 			if (thread != NULL && thread->fault_handler != 0) {
4595 				// this will cause the arch dependant page fault handler to
4596 				// modify the IP on the interrupt frame or whatever to return
4597 				// to this address
4598 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4599 			} else {
4600 				// unhandled page fault in the kernel
4601 				panic("vm_page_fault: unhandled page fault in kernel space at "
4602 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4603 			}
4604 		} else {
4605 			Thread* thread = thread_get_current_thread();
4606 
4607 #ifdef TRACE_FAULTS
4608 			VMArea* area = NULL;
4609 			if (addressSpace != NULL) {
4610 				addressSpace->ReadLock();
4611 				area = addressSpace->LookupArea(faultAddress);
4612 			}
4613 
4614 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4615 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4616 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4617 				thread->team->Name(), thread->team->id,
4618 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4619 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4620 					area->Base() : 0x0));
4621 
4622 			if (addressSpace != NULL)
4623 				addressSpace->ReadUnlock();
4624 #endif
4625 
4626 			// If the thread has a signal handler for SIGSEGV, we simply
4627 			// send it the signal. Otherwise we notify the user debugger
4628 			// first.
4629 			struct sigaction action;
4630 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4631 					&& action.sa_handler != SIG_DFL
4632 					&& action.sa_handler != SIG_IGN)
4633 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4634 					SIGSEGV)) {
4635 				Signal signal(SIGSEGV,
4636 					status == B_PERMISSION_DENIED
4637 						? SEGV_ACCERR : SEGV_MAPERR,
4638 					EFAULT, thread->team->id);
4639 				signal.SetAddress((void*)address);
4640 				send_signal_to_thread(thread, signal, 0);
4641 			}
4642 		}
4643 	}
4644 
4645 	if (addressSpace != NULL)
4646 		addressSpace->Put();
4647 
4648 	return B_HANDLED_INTERRUPT;
4649 }
4650 
4651 
4652 struct PageFaultContext {
4653 	AddressSpaceReadLocker	addressSpaceLocker;
4654 	VMCacheChainLocker		cacheChainLocker;
4655 
4656 	VMTranslationMap*		map;
4657 	VMCache*				topCache;
4658 	off_t					cacheOffset;
4659 	vm_page_reservation		reservation;
4660 	bool					isWrite;
4661 
4662 	// return values
4663 	vm_page*				page;
4664 	bool					restart;
4665 	bool					pageAllocated;
4666 
4667 
4668 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4669 		:
4670 		addressSpaceLocker(addressSpace, true),
4671 		map(addressSpace->TranslationMap()),
4672 		isWrite(isWrite)
4673 	{
4674 	}
4675 
4676 	~PageFaultContext()
4677 	{
4678 		UnlockAll();
4679 		vm_page_unreserve_pages(&reservation);
4680 	}
4681 
4682 	void Prepare(VMCache* topCache, off_t cacheOffset)
4683 	{
4684 		this->topCache = topCache;
4685 		this->cacheOffset = cacheOffset;
4686 		page = NULL;
4687 		restart = false;
4688 		pageAllocated = false;
4689 
4690 		cacheChainLocker.SetTo(topCache);
4691 	}
4692 
4693 	void UnlockAll(VMCache* exceptCache = NULL)
4694 	{
4695 		topCache = NULL;
4696 		addressSpaceLocker.Unlock();
4697 		cacheChainLocker.Unlock(exceptCache);
4698 	}
4699 };
4700 
4701 
4702 /*!	Gets the page that should be mapped into the area.
4703 	Returns an error code other than \c B_OK, if the page couldn't be found or
4704 	paged in. The locking state of the address space and the caches is undefined
4705 	in that case.
4706 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4707 	had to unlock the address space and all caches and is supposed to be called
4708 	again.
4709 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4710 	found. It is returned in \c context.page. The address space will still be
4711 	locked as well as all caches starting from the top cache to at least the
4712 	cache the page lives in.
4713 */
4714 static status_t
4715 fault_get_page(PageFaultContext& context)
4716 {
4717 	VMCache* cache = context.topCache;
4718 	VMCache* lastCache = NULL;
4719 	vm_page* page = NULL;
4720 
4721 	while (cache != NULL) {
4722 		// We already hold the lock of the cache at this point.
4723 
4724 		lastCache = cache;
4725 
4726 		page = cache->LookupPage(context.cacheOffset);
4727 		if (page != NULL && page->busy) {
4728 			// page must be busy -- wait for it to become unbusy
4729 			context.UnlockAll(cache);
4730 			cache->ReleaseRefLocked();
4731 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4732 
4733 			// restart the whole process
4734 			context.restart = true;
4735 			return B_OK;
4736 		}
4737 
4738 		if (page != NULL)
4739 			break;
4740 
4741 		// The current cache does not contain the page we're looking for.
4742 
4743 		// see if the backing store has it
4744 		if (cache->HasPage(context.cacheOffset)) {
4745 			// insert a fresh page and mark it busy -- we're going to read it in
4746 			page = vm_page_allocate_page(&context.reservation,
4747 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4748 			cache->InsertPage(page, context.cacheOffset);
4749 
4750 			// We need to unlock all caches and the address space while reading
4751 			// the page in. Keep a reference to the cache around.
4752 			cache->AcquireRefLocked();
4753 			context.UnlockAll();
4754 
4755 			// read the page in
4756 			generic_io_vec vec;
4757 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4758 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4759 
4760 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4761 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4762 
4763 			cache->Lock();
4764 
4765 			if (status < B_OK) {
4766 				// on error remove and free the page
4767 				dprintf("reading page from cache %p returned: %s!\n",
4768 					cache, strerror(status));
4769 
4770 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4771 				cache->RemovePage(page);
4772 				vm_page_set_state(page, PAGE_STATE_FREE);
4773 
4774 				cache->ReleaseRefAndUnlock();
4775 				return status;
4776 			}
4777 
4778 			// mark the page unbusy again
4779 			cache->MarkPageUnbusy(page);
4780 
4781 			DEBUG_PAGE_ACCESS_END(page);
4782 
4783 			// Since we needed to unlock everything temporarily, the area
4784 			// situation might have changed. So we need to restart the whole
4785 			// process.
4786 			cache->ReleaseRefAndUnlock();
4787 			context.restart = true;
4788 			return B_OK;
4789 		}
4790 
4791 		cache = context.cacheChainLocker.LockSourceCache();
4792 	}
4793 
4794 	if (page == NULL) {
4795 		// There was no adequate page, determine the cache for a clean one.
4796 		// Read-only pages come in the deepest cache, only the top most cache
4797 		// may have direct write access.
4798 		cache = context.isWrite ? context.topCache : lastCache;
4799 
4800 		// allocate a clean page
4801 		page = vm_page_allocate_page(&context.reservation,
4802 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4803 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4804 			page->physical_page_number));
4805 
4806 		// insert the new page into our cache
4807 		cache->InsertPage(page, context.cacheOffset);
4808 		context.pageAllocated = true;
4809 	} else if (page->Cache() != context.topCache && context.isWrite) {
4810 		// We have a page that has the data we want, but in the wrong cache
4811 		// object so we need to copy it and stick it into the top cache.
4812 		vm_page* sourcePage = page;
4813 
4814 		// TODO: If memory is low, it might be a good idea to steal the page
4815 		// from our source cache -- if possible, that is.
4816 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4817 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4818 
4819 		// To not needlessly kill concurrency we unlock all caches but the top
4820 		// one while copying the page. Lacking another mechanism to ensure that
4821 		// the source page doesn't disappear, we mark it busy.
4822 		sourcePage->busy = true;
4823 		context.cacheChainLocker.UnlockKeepRefs(true);
4824 
4825 		// copy the page
4826 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4827 			sourcePage->physical_page_number * B_PAGE_SIZE);
4828 
4829 		context.cacheChainLocker.RelockCaches(true);
4830 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4831 
4832 		// insert the new page into our cache
4833 		context.topCache->InsertPage(page, context.cacheOffset);
4834 		context.pageAllocated = true;
4835 	} else
4836 		DEBUG_PAGE_ACCESS_START(page);
4837 
4838 	context.page = page;
4839 	return B_OK;
4840 }
4841 
4842 
4843 /*!	Makes sure the address in the given address space is mapped.
4844 
4845 	\param addressSpace The address space.
4846 	\param originalAddress The address. Doesn't need to be page aligned.
4847 	\param isWrite If \c true the address shall be write-accessible.
4848 	\param isUser If \c true the access is requested by a userland team.
4849 	\param wirePage On success, if non \c NULL, the wired count of the page
4850 		mapped at the given address is incremented and the page is returned
4851 		via this parameter.
4852 	\return \c B_OK on success, another error code otherwise.
4853 */
4854 static status_t
4855 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4856 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4857 {
4858 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4859 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4860 		originalAddress, isWrite, isUser));
4861 
4862 	PageFaultContext context(addressSpace, isWrite);
4863 
4864 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4865 	status_t status = B_OK;
4866 
4867 	addressSpace->IncrementFaultCount();
4868 
4869 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4870 	// the pages upfront makes sure we don't have any cache locked, so that the
4871 	// page daemon/thief can do their job without problems.
4872 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4873 		originalAddress);
4874 	context.addressSpaceLocker.Unlock();
4875 	vm_page_reserve_pages(&context.reservation, reservePages,
4876 		addressSpace == VMAddressSpace::Kernel()
4877 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4878 
4879 	while (true) {
4880 		context.addressSpaceLocker.Lock();
4881 
4882 		// get the area the fault was in
4883 		VMArea* area = addressSpace->LookupArea(address);
4884 		if (area == NULL) {
4885 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4886 				"space\n", originalAddress);
4887 			TPF(PageFaultError(-1,
4888 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4889 			status = B_BAD_ADDRESS;
4890 			break;
4891 		}
4892 
4893 		// check permissions
4894 		uint32 protection = get_area_page_protection(area, address);
4895 		if (isUser && (protection & B_USER_PROTECTION) == 0
4896 				&& (area->protection & B_KERNEL_AREA) != 0) {
4897 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4898 				area->id, (void*)originalAddress);
4899 			TPF(PageFaultError(area->id,
4900 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4901 			status = B_PERMISSION_DENIED;
4902 			break;
4903 		}
4904 		if (isWrite && (protection
4905 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4906 			dprintf("write access attempted on write-protected area 0x%"
4907 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4908 			TPF(PageFaultError(area->id,
4909 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4910 			status = B_PERMISSION_DENIED;
4911 			break;
4912 		} else if (isExecute && (protection
4913 				& (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4914 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4915 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4916 			TPF(PageFaultError(area->id,
4917 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4918 			status = B_PERMISSION_DENIED;
4919 			break;
4920 		} else if (!isWrite && !isExecute && (protection
4921 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4922 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4923 				" at %p\n", area->id, (void*)originalAddress);
4924 			TPF(PageFaultError(area->id,
4925 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4926 			status = B_PERMISSION_DENIED;
4927 			break;
4928 		}
4929 
4930 		// We have the area, it was a valid access, so let's try to resolve the
4931 		// page fault now.
4932 		// At first, the top most cache from the area is investigated.
4933 
4934 		context.Prepare(vm_area_get_locked_cache(area),
4935 			address - area->Base() + area->cache_offset);
4936 
4937 		// See if this cache has a fault handler -- this will do all the work
4938 		// for us.
4939 		{
4940 			// Note, since the page fault is resolved with interrupts enabled,
4941 			// the fault handler could be called more than once for the same
4942 			// reason -- the store must take this into account.
4943 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4944 			if (status != B_BAD_HANDLER)
4945 				break;
4946 		}
4947 
4948 		// The top most cache has no fault handler, so let's see if the cache or
4949 		// its sources already have the page we're searching for (we're going
4950 		// from top to bottom).
4951 		status = fault_get_page(context);
4952 		if (status != B_OK) {
4953 			TPF(PageFaultError(area->id, status));
4954 			break;
4955 		}
4956 
4957 		if (context.restart)
4958 			continue;
4959 
4960 		// All went fine, all there is left to do is to map the page into the
4961 		// address space.
4962 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4963 			context.page));
4964 
4965 		// If the page doesn't reside in the area's cache, we need to make sure
4966 		// it's mapped in read-only, so that we cannot overwrite someone else's
4967 		// data (copy-on-write)
4968 		uint32 newProtection = protection;
4969 		if (context.page->Cache() != context.topCache && !isWrite)
4970 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4971 
4972 		bool unmapPage = false;
4973 		bool mapPage = true;
4974 
4975 		// check whether there's already a page mapped at the address
4976 		context.map->Lock();
4977 
4978 		phys_addr_t physicalAddress;
4979 		uint32 flags;
4980 		vm_page* mappedPage = NULL;
4981 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4982 			&& (flags & PAGE_PRESENT) != 0
4983 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4984 				!= NULL) {
4985 			// Yep there's already a page. If it's ours, we can simply adjust
4986 			// its protection. Otherwise we have to unmap it.
4987 			if (mappedPage == context.page) {
4988 				context.map->ProtectPage(area, address, newProtection);
4989 					// Note: We assume that ProtectPage() is atomic (i.e.
4990 					// the page isn't temporarily unmapped), otherwise we'd have
4991 					// to make sure it isn't wired.
4992 				mapPage = false;
4993 			} else
4994 				unmapPage = true;
4995 		}
4996 
4997 		context.map->Unlock();
4998 
4999 		if (unmapPage) {
5000 			// If the page is wired, we can't unmap it. Wait until it is unwired
5001 			// again and restart. Note that the page cannot be wired for
5002 			// writing, since it it isn't in the topmost cache. So we can safely
5003 			// ignore ranges wired for writing (our own and other concurrent
5004 			// wiring attempts in progress) and in fact have to do that to avoid
5005 			// a deadlock.
5006 			VMAreaUnwiredWaiter waiter;
5007 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
5008 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
5009 				// unlock everything and wait
5010 				if (context.pageAllocated) {
5011 					// ... but since we allocated a page and inserted it into
5012 					// the top cache, remove and free it first. Otherwise we'd
5013 					// have a page from a lower cache mapped while an upper
5014 					// cache has a page that would shadow it.
5015 					context.topCache->RemovePage(context.page);
5016 					vm_page_free_etc(context.topCache, context.page,
5017 						&context.reservation);
5018 				} else
5019 					DEBUG_PAGE_ACCESS_END(context.page);
5020 
5021 				context.UnlockAll();
5022 				waiter.waitEntry.Wait();
5023 				continue;
5024 			}
5025 
5026 			// Note: The mapped page is a page of a lower cache. We are
5027 			// guaranteed to have that cached locked, our new page is a copy of
5028 			// that page, and the page is not busy. The logic for that guarantee
5029 			// is as follows: Since the page is mapped, it must live in the top
5030 			// cache (ruled out above) or any of its lower caches, and there is
5031 			// (was before the new page was inserted) no other page in any
5032 			// cache between the top cache and the page's cache (otherwise that
5033 			// would be mapped instead). That in turn means that our algorithm
5034 			// must have found it and therefore it cannot be busy either.
5035 			DEBUG_PAGE_ACCESS_START(mappedPage);
5036 			unmap_page(area, address);
5037 			DEBUG_PAGE_ACCESS_END(mappedPage);
5038 		}
5039 
5040 		if (mapPage) {
5041 			if (map_page(area, context.page, address, newProtection,
5042 					&context.reservation) != B_OK) {
5043 				// Mapping can only fail, when the page mapping object couldn't
5044 				// be allocated. Save for the missing mapping everything is
5045 				// fine, though. If this was a regular page fault, we'll simply
5046 				// leave and probably fault again. To make sure we'll have more
5047 				// luck then, we ensure that the minimum object reserve is
5048 				// available.
5049 				DEBUG_PAGE_ACCESS_END(context.page);
5050 
5051 				context.UnlockAll();
5052 
5053 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
5054 						!= B_OK) {
5055 					// Apparently the situation is serious. Let's get ourselves
5056 					// killed.
5057 					status = B_NO_MEMORY;
5058 				} else if (wirePage != NULL) {
5059 					// The caller expects us to wire the page. Since
5060 					// object_cache_reserve() succeeded, we should now be able
5061 					// to allocate a mapping structure. Restart.
5062 					continue;
5063 				}
5064 
5065 				break;
5066 			}
5067 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
5068 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
5069 
5070 		// also wire the page, if requested
5071 		if (wirePage != NULL && status == B_OK) {
5072 			increment_page_wired_count(context.page);
5073 			*wirePage = context.page;
5074 		}
5075 
5076 		DEBUG_PAGE_ACCESS_END(context.page);
5077 
5078 		break;
5079 	}
5080 
5081 	return status;
5082 }
5083 
5084 
5085 status_t
5086 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
5087 {
5088 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
5089 }
5090 
5091 status_t
5092 vm_put_physical_page(addr_t vaddr, void* handle)
5093 {
5094 	return sPhysicalPageMapper->PutPage(vaddr, handle);
5095 }
5096 
5097 
5098 status_t
5099 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
5100 	void** _handle)
5101 {
5102 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
5103 }
5104 
5105 status_t
5106 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
5107 {
5108 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
5109 }
5110 
5111 
5112 status_t
5113 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
5114 {
5115 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
5116 }
5117 
5118 status_t
5119 vm_put_physical_page_debug(addr_t vaddr, void* handle)
5120 {
5121 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
5122 }
5123 
5124 
5125 void
5126 vm_get_info(system_info* info)
5127 {
5128 	swap_get_info(info);
5129 
5130 	MutexLocker locker(sAvailableMemoryLock);
5131 	info->needed_memory = sNeededMemory;
5132 	info->free_memory = sAvailableMemory;
5133 }
5134 
5135 
5136 uint32
5137 vm_num_page_faults(void)
5138 {
5139 	return sPageFaults;
5140 }
5141 
5142 
5143 off_t
5144 vm_available_memory(void)
5145 {
5146 	MutexLocker locker(sAvailableMemoryLock);
5147 	return sAvailableMemory;
5148 }
5149 
5150 
5151 off_t
5152 vm_available_not_needed_memory(void)
5153 {
5154 	MutexLocker locker(sAvailableMemoryLock);
5155 	return sAvailableMemory - sNeededMemory;
5156 }
5157 
5158 
5159 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
5160 	debugger.
5161 */
5162 off_t
5163 vm_available_not_needed_memory_debug(void)
5164 {
5165 	return sAvailableMemory - sNeededMemory;
5166 }
5167 
5168 
5169 size_t
5170 vm_kernel_address_space_left(void)
5171 {
5172 	return VMAddressSpace::Kernel()->FreeSpace();
5173 }
5174 
5175 
5176 void
5177 vm_unreserve_memory(size_t amount)
5178 {
5179 	mutex_lock(&sAvailableMemoryLock);
5180 
5181 	sAvailableMemory += amount;
5182 
5183 	mutex_unlock(&sAvailableMemoryLock);
5184 }
5185 
5186 
5187 status_t
5188 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
5189 {
5190 	size_t reserve = kMemoryReserveForPriority[priority];
5191 
5192 	MutexLocker locker(sAvailableMemoryLock);
5193 
5194 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5195 
5196 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
5197 		sAvailableMemory -= amount;
5198 		return B_OK;
5199 	}
5200 
5201 	if (timeout <= 0)
5202 		return B_NO_MEMORY;
5203 
5204 	// turn timeout into an absolute timeout
5205 	timeout += system_time();
5206 
5207 	// loop until we've got the memory or the timeout occurs
5208 	do {
5209 		sNeededMemory += amount;
5210 
5211 		// call the low resource manager
5212 		locker.Unlock();
5213 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5214 			B_ABSOLUTE_TIMEOUT, timeout);
5215 		locker.Lock();
5216 
5217 		sNeededMemory -= amount;
5218 
5219 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
5220 			sAvailableMemory -= amount;
5221 			return B_OK;
5222 		}
5223 	} while (timeout > system_time());
5224 
5225 	return B_NO_MEMORY;
5226 }
5227 
5228 
5229 status_t
5230 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
5231 {
5232 	// NOTE: The caller is responsible for synchronizing calls to this function!
5233 
5234 	AddressSpaceReadLocker locker;
5235 	VMArea* area;
5236 	status_t status = locker.SetFromArea(id, area);
5237 	if (status != B_OK)
5238 		return status;
5239 
5240 	// nothing to do, if the type doesn't change
5241 	uint32 oldType = area->MemoryType();
5242 	if (type == oldType)
5243 		return B_OK;
5244 
5245 	// set the memory type of the area and the mapped pages
5246 	VMTranslationMap* map = area->address_space->TranslationMap();
5247 	map->Lock();
5248 	area->SetMemoryType(type);
5249 	map->ProtectArea(area, area->protection);
5250 	map->Unlock();
5251 
5252 	// set the physical memory type
5253 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
5254 	if (error != B_OK) {
5255 		// reset the memory type of the area and the mapped pages
5256 		map->Lock();
5257 		area->SetMemoryType(oldType);
5258 		map->ProtectArea(area, area->protection);
5259 		map->Unlock();
5260 		return error;
5261 	}
5262 
5263 	return B_OK;
5264 
5265 }
5266 
5267 
5268 /*!	This function enforces some protection properties:
5269 	 - kernel areas must be W^X (after kernel startup)
5270 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
5271 	 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5272 */
5273 static void
5274 fix_protection(uint32* protection)
5275 {
5276 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
5277 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
5278 			|| (*protection & B_WRITE_AREA) != 0)
5279 		&& !gKernelStartup)
5280 		panic("kernel areas cannot be both writable and executable!");
5281 
5282 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5283 		if ((*protection & B_WRITE_AREA) != 0)
5284 			*protection |= B_KERNEL_WRITE_AREA;
5285 		if ((*protection & B_READ_AREA) != 0)
5286 			*protection |= B_KERNEL_READ_AREA;
5287 	}
5288 }
5289 
5290 
5291 static void
5292 fill_area_info(struct VMArea* area, area_info* info, size_t size)
5293 {
5294 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5295 	info->area = area->id;
5296 	info->address = (void*)area->Base();
5297 	info->size = area->Size();
5298 	info->protection = area->protection;
5299 	info->lock = area->wiring;
5300 	info->team = area->address_space->ID();
5301 	info->copy_count = 0;
5302 	info->in_count = 0;
5303 	info->out_count = 0;
5304 		// TODO: retrieve real values here!
5305 
5306 	VMCache* cache = vm_area_get_locked_cache(area);
5307 
5308 	// Note, this is a simplification; the cache could be larger than this area
5309 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5310 
5311 	vm_area_put_locked_cache(cache);
5312 }
5313 
5314 
5315 static status_t
5316 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5317 {
5318 	// is newSize a multiple of B_PAGE_SIZE?
5319 	if (newSize & (B_PAGE_SIZE - 1))
5320 		return B_BAD_VALUE;
5321 
5322 	// lock all affected address spaces and the cache
5323 	VMArea* area;
5324 	VMCache* cache;
5325 
5326 	MultiAddressSpaceLocker locker;
5327 	AreaCacheLocker cacheLocker;
5328 
5329 	status_t status;
5330 	size_t oldSize;
5331 	bool anyKernelArea;
5332 	bool restart;
5333 
5334 	do {
5335 		anyKernelArea = false;
5336 		restart = false;
5337 
5338 		locker.Unset();
5339 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5340 		if (status != B_OK)
5341 			return status;
5342 		cacheLocker.SetTo(cache, true);	// already locked
5343 
5344 		// enforce restrictions
5345 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
5346 				|| (area->protection & B_KERNEL_AREA) != 0)) {
5347 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5348 				"resize kernel area %" B_PRId32 " (%s)\n",
5349 				team_get_current_team_id(), areaID, area->name);
5350 			return B_NOT_ALLOWED;
5351 		}
5352 		// TODO: Enforce all restrictions (team, etc.)!
5353 
5354 		oldSize = area->Size();
5355 		if (newSize == oldSize)
5356 			return B_OK;
5357 
5358 		if (cache->type != CACHE_TYPE_RAM)
5359 			return B_NOT_ALLOWED;
5360 
5361 		if (oldSize < newSize) {
5362 			// We need to check if all areas of this cache can be resized.
5363 			for (VMArea* current = cache->areas; current != NULL;
5364 					current = current->cache_next) {
5365 				if (!current->address_space->CanResizeArea(current, newSize))
5366 					return B_ERROR;
5367 				anyKernelArea
5368 					|= current->address_space == VMAddressSpace::Kernel();
5369 			}
5370 		} else {
5371 			// We're shrinking the areas, so we must make sure the affected
5372 			// ranges are not wired.
5373 			for (VMArea* current = cache->areas; current != NULL;
5374 					current = current->cache_next) {
5375 				anyKernelArea
5376 					|= current->address_space == VMAddressSpace::Kernel();
5377 
5378 				if (wait_if_area_range_is_wired(current,
5379 						current->Base() + newSize, oldSize - newSize, &locker,
5380 						&cacheLocker)) {
5381 					restart = true;
5382 					break;
5383 				}
5384 			}
5385 		}
5386 	} while (restart);
5387 
5388 	// Okay, looks good so far, so let's do it
5389 
5390 	int priority = kernel && anyKernelArea
5391 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5392 	uint32 allocationFlags = kernel && anyKernelArea
5393 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5394 
5395 	if (oldSize < newSize) {
5396 		// Growing the cache can fail, so we do it first.
5397 		status = cache->Resize(cache->virtual_base + newSize, priority);
5398 		if (status != B_OK)
5399 			return status;
5400 	}
5401 
5402 	for (VMArea* current = cache->areas; current != NULL;
5403 			current = current->cache_next) {
5404 		status = current->address_space->ResizeArea(current, newSize,
5405 			allocationFlags);
5406 		if (status != B_OK)
5407 			break;
5408 
5409 		// We also need to unmap all pages beyond the new size, if the area has
5410 		// shrunk
5411 		if (newSize < oldSize) {
5412 			VMCacheChainLocker cacheChainLocker(cache);
5413 			cacheChainLocker.LockAllSourceCaches();
5414 
5415 			unmap_pages(current, current->Base() + newSize,
5416 				oldSize - newSize);
5417 
5418 			cacheChainLocker.Unlock(cache);
5419 		}
5420 	}
5421 
5422 	if (status == B_OK) {
5423 		// Shrink or grow individual page protections if in use.
5424 		if (area->page_protections != NULL) {
5425 			size_t bytes = area_page_protections_size(newSize);
5426 			uint8* newProtections
5427 				= (uint8*)realloc(area->page_protections, bytes);
5428 			if (newProtections == NULL)
5429 				status = B_NO_MEMORY;
5430 			else {
5431 				area->page_protections = newProtections;
5432 
5433 				if (oldSize < newSize) {
5434 					// init the additional page protections to that of the area
5435 					uint32 offset = area_page_protections_size(oldSize);
5436 					uint32 areaProtection = area->protection
5437 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5438 					memset(area->page_protections + offset,
5439 						areaProtection | (areaProtection << 4), bytes - offset);
5440 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5441 						uint8& entry = area->page_protections[offset - 1];
5442 						entry = (entry & 0x0f) | (areaProtection << 4);
5443 					}
5444 				}
5445 			}
5446 		}
5447 	}
5448 
5449 	// shrinking the cache can't fail, so we do it now
5450 	if (status == B_OK && newSize < oldSize)
5451 		status = cache->Resize(cache->virtual_base + newSize, priority);
5452 
5453 	if (status != B_OK) {
5454 		// Something failed -- resize the areas back to their original size.
5455 		// This can fail, too, in which case we're seriously screwed.
5456 		for (VMArea* current = cache->areas; current != NULL;
5457 				current = current->cache_next) {
5458 			if (current->address_space->ResizeArea(current, oldSize,
5459 					allocationFlags) != B_OK) {
5460 				panic("vm_resize_area(): Failed and not being able to restore "
5461 					"original state.");
5462 			}
5463 		}
5464 
5465 		cache->Resize(cache->virtual_base + oldSize, priority);
5466 	}
5467 
5468 	// TODO: we must honour the lock restrictions of this area
5469 	return status;
5470 }
5471 
5472 
5473 status_t
5474 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5475 {
5476 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5477 }
5478 
5479 
5480 status_t
5481 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5482 {
5483 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5484 }
5485 
5486 
5487 status_t
5488 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5489 	bool user)
5490 {
5491 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5492 }
5493 
5494 
5495 void
5496 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5497 {
5498 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5499 }
5500 
5501 
5502 /*!	Copies a range of memory directly from/to a page that might not be mapped
5503 	at the moment.
5504 
5505 	For \a unsafeMemory the current mapping (if any is ignored). The function
5506 	walks through the respective area's cache chain to find the physical page
5507 	and copies from/to it directly.
5508 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5509 	must not cross a page boundary.
5510 
5511 	\param teamID The team ID identifying the address space \a unsafeMemory is
5512 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5513 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5514 		is passed, the address space of the thread returned by
5515 		debug_get_debugged_thread() is used.
5516 	\param unsafeMemory The start of the unsafe memory range to be copied
5517 		from/to.
5518 	\param buffer A safely accessible kernel buffer to be copied from/to.
5519 	\param size The number of bytes to be copied.
5520 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5521 		\a unsafeMemory, the other way around otherwise.
5522 */
5523 status_t
5524 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5525 	size_t size, bool copyToUnsafe)
5526 {
5527 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5528 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5529 		return B_BAD_VALUE;
5530 	}
5531 
5532 	// get the address space for the debugged thread
5533 	VMAddressSpace* addressSpace;
5534 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5535 		addressSpace = VMAddressSpace::Kernel();
5536 	} else if (teamID == B_CURRENT_TEAM) {
5537 		Thread* thread = debug_get_debugged_thread();
5538 		if (thread == NULL || thread->team == NULL)
5539 			return B_BAD_ADDRESS;
5540 
5541 		addressSpace = thread->team->address_space;
5542 	} else
5543 		addressSpace = VMAddressSpace::DebugGet(teamID);
5544 
5545 	if (addressSpace == NULL)
5546 		return B_BAD_ADDRESS;
5547 
5548 	// get the area
5549 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5550 	if (area == NULL)
5551 		return B_BAD_ADDRESS;
5552 
5553 	// search the page
5554 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5555 		+ area->cache_offset;
5556 	VMCache* cache = area->cache;
5557 	vm_page* page = NULL;
5558 	while (cache != NULL) {
5559 		page = cache->DebugLookupPage(cacheOffset);
5560 		if (page != NULL)
5561 			break;
5562 
5563 		// Page not found in this cache -- if it is paged out, we must not try
5564 		// to get it from lower caches.
5565 		if (cache->DebugHasPage(cacheOffset))
5566 			break;
5567 
5568 		cache = cache->source;
5569 	}
5570 
5571 	if (page == NULL)
5572 		return B_UNSUPPORTED;
5573 
5574 	// copy from/to physical memory
5575 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5576 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5577 
5578 	if (copyToUnsafe) {
5579 		if (page->Cache() != area->cache)
5580 			return B_UNSUPPORTED;
5581 
5582 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5583 	}
5584 
5585 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5586 }
5587 
5588 
5589 /** Validate that a memory range is either fully in kernel space, or fully in
5590  *  userspace */
5591 static inline bool
5592 validate_memory_range(const void* addr, size_t size)
5593 {
5594 	addr_t address = (addr_t)addr;
5595 
5596 	// Check for overflows on all addresses.
5597 	if ((address + size) < address)
5598 		return false;
5599 
5600 	// Validate that the address range does not cross the kernel/user boundary.
5601 	return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1);
5602 }
5603 
5604 
5605 //	#pragma mark - kernel public API
5606 
5607 
5608 status_t
5609 user_memcpy(void* to, const void* from, size_t size)
5610 {
5611 	if (!validate_memory_range(to, size) || !validate_memory_range(from, size))
5612 		return B_BAD_ADDRESS;
5613 
5614 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5615 		return B_BAD_ADDRESS;
5616 
5617 	return B_OK;
5618 }
5619 
5620 
5621 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5622 	the string in \a to, NULL-terminating the result.
5623 
5624 	\param to Pointer to the destination C-string.
5625 	\param from Pointer to the source C-string.
5626 	\param size Size in bytes of the string buffer pointed to by \a to.
5627 
5628 	\return strlen(\a from).
5629 */
5630 ssize_t
5631 user_strlcpy(char* to, const char* from, size_t size)
5632 {
5633 	if (to == NULL && size != 0)
5634 		return B_BAD_VALUE;
5635 	if (from == NULL)
5636 		return B_BAD_ADDRESS;
5637 
5638 	// Protect the source address from overflows.
5639 	size_t maxSize = size;
5640 	if ((addr_t)from + maxSize < (addr_t)from)
5641 		maxSize -= (addr_t)from + maxSize;
5642 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5643 		maxSize = USER_TOP - (addr_t)from;
5644 
5645 	if (!validate_memory_range(to, maxSize))
5646 		return B_BAD_ADDRESS;
5647 
5648 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5649 	if (result < 0)
5650 		return result;
5651 
5652 	// If we hit the address overflow boundary, fail.
5653 	if ((size_t)result >= maxSize && maxSize < size)
5654 		return B_BAD_ADDRESS;
5655 
5656 	return result;
5657 }
5658 
5659 
5660 status_t
5661 user_memset(void* s, char c, size_t count)
5662 {
5663 	if (!validate_memory_range(s, count))
5664 		return B_BAD_ADDRESS;
5665 
5666 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5667 		return B_BAD_ADDRESS;
5668 
5669 	return B_OK;
5670 }
5671 
5672 
5673 /*!	Wires a single page at the given address.
5674 
5675 	\param team The team whose address space the address belongs to. Supports
5676 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5677 		parameter is ignored.
5678 	\param address address The virtual address to wire down. Does not need to
5679 		be page aligned.
5680 	\param writable If \c true the page shall be writable.
5681 	\param info On success the info is filled in, among other things
5682 		containing the physical address the given virtual one translates to.
5683 	\return \c B_OK, when the page could be wired, another error code otherwise.
5684 */
5685 status_t
5686 vm_wire_page(team_id team, addr_t address, bool writable,
5687 	VMPageWiringInfo* info)
5688 {
5689 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5690 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5691 
5692 	// compute the page protection that is required
5693 	bool isUser = IS_USER_ADDRESS(address);
5694 	uint32 requiredProtection = PAGE_PRESENT
5695 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5696 	if (writable)
5697 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5698 
5699 	// get and read lock the address space
5700 	VMAddressSpace* addressSpace = NULL;
5701 	if (isUser) {
5702 		if (team == B_CURRENT_TEAM)
5703 			addressSpace = VMAddressSpace::GetCurrent();
5704 		else
5705 			addressSpace = VMAddressSpace::Get(team);
5706 	} else
5707 		addressSpace = VMAddressSpace::GetKernel();
5708 	if (addressSpace == NULL)
5709 		return B_ERROR;
5710 
5711 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5712 
5713 	VMTranslationMap* map = addressSpace->TranslationMap();
5714 	status_t error = B_OK;
5715 
5716 	// get the area
5717 	VMArea* area = addressSpace->LookupArea(pageAddress);
5718 	if (area == NULL) {
5719 		addressSpace->Put();
5720 		return B_BAD_ADDRESS;
5721 	}
5722 
5723 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5724 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5725 
5726 	// mark the area range wired
5727 	area->Wire(&info->range);
5728 
5729 	// Lock the area's cache chain and the translation map. Needed to look
5730 	// up the page and play with its wired count.
5731 	cacheChainLocker.LockAllSourceCaches();
5732 	map->Lock();
5733 
5734 	phys_addr_t physicalAddress;
5735 	uint32 flags;
5736 	vm_page* page;
5737 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5738 		&& (flags & requiredProtection) == requiredProtection
5739 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5740 			!= NULL) {
5741 		// Already mapped with the correct permissions -- just increment
5742 		// the page's wired count.
5743 		increment_page_wired_count(page);
5744 
5745 		map->Unlock();
5746 		cacheChainLocker.Unlock();
5747 		addressSpaceLocker.Unlock();
5748 	} else {
5749 		// Let vm_soft_fault() map the page for us, if possible. We need
5750 		// to fully unlock to avoid deadlocks. Since we have already
5751 		// wired the area itself, nothing disturbing will happen with it
5752 		// in the meantime.
5753 		map->Unlock();
5754 		cacheChainLocker.Unlock();
5755 		addressSpaceLocker.Unlock();
5756 
5757 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5758 			isUser, &page);
5759 
5760 		if (error != B_OK) {
5761 			// The page could not be mapped -- clean up.
5762 			VMCache* cache = vm_area_get_locked_cache(area);
5763 			area->Unwire(&info->range);
5764 			cache->ReleaseRefAndUnlock();
5765 			addressSpace->Put();
5766 			return error;
5767 		}
5768 	}
5769 
5770 	info->physicalAddress
5771 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5772 			+ address % B_PAGE_SIZE;
5773 	info->page = page;
5774 
5775 	return B_OK;
5776 }
5777 
5778 
5779 /*!	Unwires a single page previously wired via vm_wire_page().
5780 
5781 	\param info The same object passed to vm_wire_page() before.
5782 */
5783 void
5784 vm_unwire_page(VMPageWiringInfo* info)
5785 {
5786 	// lock the address space
5787 	VMArea* area = info->range.area;
5788 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5789 		// takes over our reference
5790 
5791 	// lock the top cache
5792 	VMCache* cache = vm_area_get_locked_cache(area);
5793 	VMCacheChainLocker cacheChainLocker(cache);
5794 
5795 	if (info->page->Cache() != cache) {
5796 		// The page is not in the top cache, so we lock the whole cache chain
5797 		// before touching the page's wired count.
5798 		cacheChainLocker.LockAllSourceCaches();
5799 	}
5800 
5801 	decrement_page_wired_count(info->page);
5802 
5803 	// remove the wired range from the range
5804 	area->Unwire(&info->range);
5805 
5806 	cacheChainLocker.Unlock();
5807 }
5808 
5809 
5810 /*!	Wires down the given address range in the specified team's address space.
5811 
5812 	If successful the function
5813 	- acquires a reference to the specified team's address space,
5814 	- adds respective wired ranges to all areas that intersect with the given
5815 	  address range,
5816 	- makes sure all pages in the given address range are mapped with the
5817 	  requested access permissions and increments their wired count.
5818 
5819 	It fails, when \a team doesn't specify a valid address space, when any part
5820 	of the specified address range is not covered by areas, when the concerned
5821 	areas don't allow mapping with the requested permissions, or when mapping
5822 	failed for another reason.
5823 
5824 	When successful the call must be balanced by a unlock_memory_etc() call with
5825 	the exact same parameters.
5826 
5827 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5828 		supported.
5829 	\param address The start of the address range to be wired.
5830 	\param numBytes The size of the address range to be wired.
5831 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5832 		requests that the range must be wired writable ("read from device
5833 		into memory").
5834 	\return \c B_OK on success, another error code otherwise.
5835 */
5836 status_t
5837 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5838 {
5839 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5840 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5841 
5842 	// compute the page protection that is required
5843 	bool isUser = IS_USER_ADDRESS(address);
5844 	bool writable = (flags & B_READ_DEVICE) == 0;
5845 	uint32 requiredProtection = PAGE_PRESENT
5846 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5847 	if (writable)
5848 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5849 
5850 	uint32 mallocFlags = isUser
5851 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5852 
5853 	// get and read lock the address space
5854 	VMAddressSpace* addressSpace = NULL;
5855 	if (isUser) {
5856 		if (team == B_CURRENT_TEAM)
5857 			addressSpace = VMAddressSpace::GetCurrent();
5858 		else
5859 			addressSpace = VMAddressSpace::Get(team);
5860 	} else
5861 		addressSpace = VMAddressSpace::GetKernel();
5862 	if (addressSpace == NULL)
5863 		return B_ERROR;
5864 
5865 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5866 		// We get a new address space reference here. The one we got above will
5867 		// be freed by unlock_memory_etc().
5868 
5869 	VMTranslationMap* map = addressSpace->TranslationMap();
5870 	status_t error = B_OK;
5871 
5872 	// iterate through all concerned areas
5873 	addr_t nextAddress = lockBaseAddress;
5874 	while (nextAddress != lockEndAddress) {
5875 		// get the next area
5876 		VMArea* area = addressSpace->LookupArea(nextAddress);
5877 		if (area == NULL) {
5878 			error = B_BAD_ADDRESS;
5879 			break;
5880 		}
5881 
5882 		addr_t areaStart = nextAddress;
5883 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5884 
5885 		// allocate the wired range (do that before locking the cache to avoid
5886 		// deadlocks)
5887 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5888 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5889 		if (range == NULL) {
5890 			error = B_NO_MEMORY;
5891 			break;
5892 		}
5893 
5894 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5895 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5896 
5897 		// mark the area range wired
5898 		area->Wire(range);
5899 
5900 		// Depending on the area cache type and the wiring, we may not need to
5901 		// look at the individual pages.
5902 		if (area->cache_type == CACHE_TYPE_NULL
5903 			|| area->cache_type == CACHE_TYPE_DEVICE
5904 			|| area->wiring == B_FULL_LOCK
5905 			|| area->wiring == B_CONTIGUOUS) {
5906 			nextAddress = areaEnd;
5907 			continue;
5908 		}
5909 
5910 		// Lock the area's cache chain and the translation map. Needed to look
5911 		// up pages and play with their wired count.
5912 		cacheChainLocker.LockAllSourceCaches();
5913 		map->Lock();
5914 
5915 		// iterate through the pages and wire them
5916 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5917 			phys_addr_t physicalAddress;
5918 			uint32 flags;
5919 
5920 			vm_page* page;
5921 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5922 				&& (flags & requiredProtection) == requiredProtection
5923 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5924 					!= NULL) {
5925 				// Already mapped with the correct permissions -- just increment
5926 				// the page's wired count.
5927 				increment_page_wired_count(page);
5928 			} else {
5929 				// Let vm_soft_fault() map the page for us, if possible. We need
5930 				// to fully unlock to avoid deadlocks. Since we have already
5931 				// wired the area itself, nothing disturbing will happen with it
5932 				// in the meantime.
5933 				map->Unlock();
5934 				cacheChainLocker.Unlock();
5935 				addressSpaceLocker.Unlock();
5936 
5937 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5938 					false, isUser, &page);
5939 
5940 				addressSpaceLocker.Lock();
5941 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5942 				cacheChainLocker.LockAllSourceCaches();
5943 				map->Lock();
5944 			}
5945 
5946 			if (error != B_OK)
5947 				break;
5948 		}
5949 
5950 		map->Unlock();
5951 
5952 		if (error == B_OK) {
5953 			cacheChainLocker.Unlock();
5954 		} else {
5955 			// An error occurred, so abort right here. If the current address
5956 			// is the first in this area, unwire the area, since we won't get
5957 			// to it when reverting what we've done so far.
5958 			if (nextAddress == areaStart) {
5959 				area->Unwire(range);
5960 				cacheChainLocker.Unlock();
5961 				range->~VMAreaWiredRange();
5962 				free_etc(range, mallocFlags);
5963 			} else
5964 				cacheChainLocker.Unlock();
5965 
5966 			break;
5967 		}
5968 	}
5969 
5970 	if (error != B_OK) {
5971 		// An error occurred, so unwire all that we've already wired. Note that
5972 		// even if not a single page was wired, unlock_memory_etc() is called
5973 		// to put the address space reference.
5974 		addressSpaceLocker.Unlock();
5975 		unlock_memory_etc(team, (void*)lockBaseAddress,
5976 			nextAddress - lockBaseAddress, flags);
5977 	}
5978 
5979 	return error;
5980 }
5981 
5982 
5983 status_t
5984 lock_memory(void* address, size_t numBytes, uint32 flags)
5985 {
5986 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5987 }
5988 
5989 
5990 /*!	Unwires an address range previously wired with lock_memory_etc().
5991 
5992 	Note that a call to this function must balance a previous lock_memory_etc()
5993 	call with exactly the same parameters.
5994 */
5995 status_t
5996 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5997 {
5998 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5999 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
6000 
6001 	// compute the page protection that is required
6002 	bool isUser = IS_USER_ADDRESS(address);
6003 	bool writable = (flags & B_READ_DEVICE) == 0;
6004 	uint32 requiredProtection = PAGE_PRESENT
6005 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
6006 	if (writable)
6007 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
6008 
6009 	uint32 mallocFlags = isUser
6010 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
6011 
6012 	// get and read lock the address space
6013 	VMAddressSpace* addressSpace = NULL;
6014 	if (isUser) {
6015 		if (team == B_CURRENT_TEAM)
6016 			addressSpace = VMAddressSpace::GetCurrent();
6017 		else
6018 			addressSpace = VMAddressSpace::Get(team);
6019 	} else
6020 		addressSpace = VMAddressSpace::GetKernel();
6021 	if (addressSpace == NULL)
6022 		return B_ERROR;
6023 
6024 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
6025 		// Take over the address space reference. We don't unlock until we're
6026 		// done.
6027 
6028 	VMTranslationMap* map = addressSpace->TranslationMap();
6029 	status_t error = B_OK;
6030 
6031 	// iterate through all concerned areas
6032 	addr_t nextAddress = lockBaseAddress;
6033 	while (nextAddress != lockEndAddress) {
6034 		// get the next area
6035 		VMArea* area = addressSpace->LookupArea(nextAddress);
6036 		if (area == NULL) {
6037 			error = B_BAD_ADDRESS;
6038 			break;
6039 		}
6040 
6041 		addr_t areaStart = nextAddress;
6042 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
6043 
6044 		// Lock the area's top cache. This is a requirement for
6045 		// VMArea::Unwire().
6046 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
6047 
6048 		// Depending on the area cache type and the wiring, we may not need to
6049 		// look at the individual pages.
6050 		if (area->cache_type == CACHE_TYPE_NULL
6051 			|| area->cache_type == CACHE_TYPE_DEVICE
6052 			|| area->wiring == B_FULL_LOCK
6053 			|| area->wiring == B_CONTIGUOUS) {
6054 			// unwire the range (to avoid deadlocks we delete the range after
6055 			// unlocking the cache)
6056 			nextAddress = areaEnd;
6057 			VMAreaWiredRange* range = area->Unwire(areaStart,
6058 				areaEnd - areaStart, writable);
6059 			cacheChainLocker.Unlock();
6060 			if (range != NULL) {
6061 				range->~VMAreaWiredRange();
6062 				free_etc(range, mallocFlags);
6063 			}
6064 			continue;
6065 		}
6066 
6067 		// Lock the area's cache chain and the translation map. Needed to look
6068 		// up pages and play with their wired count.
6069 		cacheChainLocker.LockAllSourceCaches();
6070 		map->Lock();
6071 
6072 		// iterate through the pages and unwire them
6073 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
6074 			phys_addr_t physicalAddress;
6075 			uint32 flags;
6076 
6077 			vm_page* page;
6078 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
6079 				&& (flags & PAGE_PRESENT) != 0
6080 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
6081 					!= NULL) {
6082 				// Already mapped with the correct permissions -- just increment
6083 				// the page's wired count.
6084 				decrement_page_wired_count(page);
6085 			} else {
6086 				panic("unlock_memory_etc(): Failed to unwire page: address "
6087 					"space %p, address: %#" B_PRIxADDR, addressSpace,
6088 					nextAddress);
6089 				error = B_BAD_VALUE;
6090 				break;
6091 			}
6092 		}
6093 
6094 		map->Unlock();
6095 
6096 		// All pages are unwired. Remove the area's wired range as well (to
6097 		// avoid deadlocks we delete the range after unlocking the cache).
6098 		VMAreaWiredRange* range = area->Unwire(areaStart,
6099 			areaEnd - areaStart, writable);
6100 
6101 		cacheChainLocker.Unlock();
6102 
6103 		if (range != NULL) {
6104 			range->~VMAreaWiredRange();
6105 			free_etc(range, mallocFlags);
6106 		}
6107 
6108 		if (error != B_OK)
6109 			break;
6110 	}
6111 
6112 	// get rid of the address space reference lock_memory_etc() acquired
6113 	addressSpace->Put();
6114 
6115 	return error;
6116 }
6117 
6118 
6119 status_t
6120 unlock_memory(void* address, size_t numBytes, uint32 flags)
6121 {
6122 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
6123 }
6124 
6125 
6126 /*!	Similar to get_memory_map(), but also allows to specify the address space
6127 	for the memory in question and has a saner semantics.
6128 	Returns \c B_OK when the complete range could be translated or
6129 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
6130 	case the actual number of entries is written to \c *_numEntries. Any other
6131 	error case indicates complete failure; \c *_numEntries will be set to \c 0
6132 	in this case.
6133 */
6134 status_t
6135 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
6136 	physical_entry* table, uint32* _numEntries)
6137 {
6138 	uint32 numEntries = *_numEntries;
6139 	*_numEntries = 0;
6140 
6141 	VMAddressSpace* addressSpace;
6142 	addr_t virtualAddress = (addr_t)address;
6143 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
6144 	phys_addr_t physicalAddress;
6145 	status_t status = B_OK;
6146 	int32 index = -1;
6147 	addr_t offset = 0;
6148 	bool interrupts = are_interrupts_enabled();
6149 
6150 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
6151 		"entries)\n", team, address, numBytes, numEntries));
6152 
6153 	if (numEntries == 0 || numBytes == 0)
6154 		return B_BAD_VALUE;
6155 
6156 	// in which address space is the address to be found?
6157 	if (IS_USER_ADDRESS(virtualAddress)) {
6158 		if (team == B_CURRENT_TEAM)
6159 			addressSpace = VMAddressSpace::GetCurrent();
6160 		else
6161 			addressSpace = VMAddressSpace::Get(team);
6162 	} else
6163 		addressSpace = VMAddressSpace::GetKernel();
6164 
6165 	if (addressSpace == NULL)
6166 		return B_ERROR;
6167 
6168 	VMTranslationMap* map = addressSpace->TranslationMap();
6169 
6170 	if (interrupts)
6171 		map->Lock();
6172 
6173 	while (offset < numBytes) {
6174 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
6175 		uint32 flags;
6176 
6177 		if (interrupts) {
6178 			status = map->Query((addr_t)address + offset, &physicalAddress,
6179 				&flags);
6180 		} else {
6181 			status = map->QueryInterrupt((addr_t)address + offset,
6182 				&physicalAddress, &flags);
6183 		}
6184 		if (status < B_OK)
6185 			break;
6186 		if ((flags & PAGE_PRESENT) == 0) {
6187 			panic("get_memory_map() called on unmapped memory!");
6188 			return B_BAD_ADDRESS;
6189 		}
6190 
6191 		if (index < 0 && pageOffset > 0) {
6192 			physicalAddress += pageOffset;
6193 			if (bytes > B_PAGE_SIZE - pageOffset)
6194 				bytes = B_PAGE_SIZE - pageOffset;
6195 		}
6196 
6197 		// need to switch to the next physical_entry?
6198 		if (index < 0 || table[index].address
6199 				!= physicalAddress - table[index].size) {
6200 			if ((uint32)++index + 1 > numEntries) {
6201 				// table to small
6202 				break;
6203 			}
6204 			table[index].address = physicalAddress;
6205 			table[index].size = bytes;
6206 		} else {
6207 			// page does fit in current entry
6208 			table[index].size += bytes;
6209 		}
6210 
6211 		offset += bytes;
6212 	}
6213 
6214 	if (interrupts)
6215 		map->Unlock();
6216 
6217 	if (status != B_OK)
6218 		return status;
6219 
6220 	if ((uint32)index + 1 > numEntries) {
6221 		*_numEntries = index;
6222 		return B_BUFFER_OVERFLOW;
6223 	}
6224 
6225 	*_numEntries = index + 1;
6226 	return B_OK;
6227 }
6228 
6229 
6230 /*!	According to the BeBook, this function should always succeed.
6231 	This is no longer the case.
6232 */
6233 extern "C" int32
6234 __get_memory_map_haiku(const void* address, size_t numBytes,
6235 	physical_entry* table, int32 numEntries)
6236 {
6237 	uint32 entriesRead = numEntries;
6238 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
6239 		table, &entriesRead);
6240 	if (error != B_OK)
6241 		return error;
6242 
6243 	// close the entry list
6244 
6245 	// if it's only one entry, we will silently accept the missing ending
6246 	if (numEntries == 1)
6247 		return B_OK;
6248 
6249 	if (entriesRead + 1 > (uint32)numEntries)
6250 		return B_BUFFER_OVERFLOW;
6251 
6252 	table[entriesRead].address = 0;
6253 	table[entriesRead].size = 0;
6254 
6255 	return B_OK;
6256 }
6257 
6258 
6259 area_id
6260 area_for(void* address)
6261 {
6262 	return vm_area_for((addr_t)address, true);
6263 }
6264 
6265 
6266 area_id
6267 find_area(const char* name)
6268 {
6269 	return VMAreas::Find(name);
6270 }
6271 
6272 
6273 status_t
6274 _get_area_info(area_id id, area_info* info, size_t size)
6275 {
6276 	if (size != sizeof(area_info) || info == NULL)
6277 		return B_BAD_VALUE;
6278 
6279 	AddressSpaceReadLocker locker;
6280 	VMArea* area;
6281 	status_t status = locker.SetFromArea(id, area);
6282 	if (status != B_OK)
6283 		return status;
6284 
6285 	fill_area_info(area, info, size);
6286 	return B_OK;
6287 }
6288 
6289 
6290 status_t
6291 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
6292 {
6293 	addr_t nextBase = *(addr_t*)cookie;
6294 
6295 	// we're already through the list
6296 	if (nextBase == (addr_t)-1)
6297 		return B_ENTRY_NOT_FOUND;
6298 
6299 	if (team == B_CURRENT_TEAM)
6300 		team = team_get_current_team_id();
6301 
6302 	AddressSpaceReadLocker locker(team);
6303 	if (!locker.IsLocked())
6304 		return B_BAD_TEAM_ID;
6305 
6306 	VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
6307 	if (area == NULL) {
6308 		nextBase = (addr_t)-1;
6309 		return B_ENTRY_NOT_FOUND;
6310 	}
6311 
6312 	fill_area_info(area, info, size);
6313 	*cookie = (ssize_t)(area->Base() + 1);
6314 
6315 	return B_OK;
6316 }
6317 
6318 
6319 status_t
6320 set_area_protection(area_id area, uint32 newProtection)
6321 {
6322 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6323 		newProtection, true);
6324 }
6325 
6326 
6327 status_t
6328 resize_area(area_id areaID, size_t newSize)
6329 {
6330 	return vm_resize_area(areaID, newSize, true);
6331 }
6332 
6333 
6334 /*!	Transfers the specified area to a new team. The caller must be the owner
6335 	of the area.
6336 */
6337 area_id
6338 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6339 	bool kernel)
6340 {
6341 	area_info info;
6342 	status_t status = get_area_info(id, &info);
6343 	if (status != B_OK)
6344 		return status;
6345 
6346 	if (info.team != thread_get_current_thread()->team->id)
6347 		return B_PERMISSION_DENIED;
6348 
6349 	// We need to mark the area cloneable so the following operations work.
6350 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
6351 	if (status != B_OK)
6352 		return status;
6353 
6354 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6355 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6356 	if (clonedArea < 0)
6357 		return clonedArea;
6358 
6359 	status = vm_delete_area(info.team, id, kernel);
6360 	if (status != B_OK) {
6361 		vm_delete_area(target, clonedArea, kernel);
6362 		return status;
6363 	}
6364 
6365 	// Now we can reset the protection to whatever it was before.
6366 	set_area_protection(clonedArea, info.protection);
6367 
6368 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6369 
6370 	return clonedArea;
6371 }
6372 
6373 
6374 extern "C" area_id
6375 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6376 	size_t numBytes, uint32 addressSpec, uint32 protection,
6377 	void** _virtualAddress)
6378 {
6379 	if (!arch_vm_supports_protection(protection))
6380 		return B_NOT_SUPPORTED;
6381 
6382 	fix_protection(&protection);
6383 
6384 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6385 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6386 		false);
6387 }
6388 
6389 
6390 area_id
6391 clone_area(const char* name, void** _address, uint32 addressSpec,
6392 	uint32 protection, area_id source)
6393 {
6394 	if ((protection & B_KERNEL_PROTECTION) == 0)
6395 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6396 
6397 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6398 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6399 }
6400 
6401 
6402 area_id
6403 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6404 	uint32 protection, uint32 flags, uint32 guardSize,
6405 	const virtual_address_restrictions* virtualAddressRestrictions,
6406 	const physical_address_restrictions* physicalAddressRestrictions,
6407 	void** _address)
6408 {
6409 	fix_protection(&protection);
6410 
6411 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6412 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6413 		true, _address);
6414 }
6415 
6416 
6417 extern "C" area_id
6418 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6419 	size_t size, uint32 lock, uint32 protection)
6420 {
6421 	fix_protection(&protection);
6422 
6423 	virtual_address_restrictions virtualRestrictions = {};
6424 	virtualRestrictions.address = *_address;
6425 	virtualRestrictions.address_specification = addressSpec;
6426 	physical_address_restrictions physicalRestrictions = {};
6427 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6428 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6429 		true, _address);
6430 }
6431 
6432 
6433 status_t
6434 delete_area(area_id area)
6435 {
6436 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6437 }
6438 
6439 
6440 //	#pragma mark - Userland syscalls
6441 
6442 
6443 status_t
6444 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6445 	addr_t size)
6446 {
6447 	// filter out some unavailable values (for userland)
6448 	switch (addressSpec) {
6449 		case B_ANY_KERNEL_ADDRESS:
6450 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6451 			return B_BAD_VALUE;
6452 	}
6453 
6454 	addr_t address;
6455 
6456 	if (!IS_USER_ADDRESS(userAddress)
6457 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6458 		return B_BAD_ADDRESS;
6459 
6460 	status_t status = vm_reserve_address_range(
6461 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6462 		RESERVED_AVOID_BASE);
6463 	if (status != B_OK)
6464 		return status;
6465 
6466 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6467 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6468 			(void*)address, size);
6469 		return B_BAD_ADDRESS;
6470 	}
6471 
6472 	return B_OK;
6473 }
6474 
6475 
6476 status_t
6477 _user_unreserve_address_range(addr_t address, addr_t size)
6478 {
6479 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6480 		(void*)address, size);
6481 }
6482 
6483 
6484 area_id
6485 _user_area_for(void* address)
6486 {
6487 	return vm_area_for((addr_t)address, false);
6488 }
6489 
6490 
6491 area_id
6492 _user_find_area(const char* userName)
6493 {
6494 	char name[B_OS_NAME_LENGTH];
6495 
6496 	if (!IS_USER_ADDRESS(userName)
6497 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6498 		return B_BAD_ADDRESS;
6499 
6500 	return find_area(name);
6501 }
6502 
6503 
6504 status_t
6505 _user_get_area_info(area_id area, area_info* userInfo)
6506 {
6507 	if (!IS_USER_ADDRESS(userInfo))
6508 		return B_BAD_ADDRESS;
6509 
6510 	area_info info;
6511 	status_t status = get_area_info(area, &info);
6512 	if (status < B_OK)
6513 		return status;
6514 
6515 	// TODO: do we want to prevent userland from seeing kernel protections?
6516 	//info.protection &= B_USER_PROTECTION;
6517 
6518 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6519 		return B_BAD_ADDRESS;
6520 
6521 	return status;
6522 }
6523 
6524 
6525 status_t
6526 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6527 {
6528 	ssize_t cookie;
6529 
6530 	if (!IS_USER_ADDRESS(userCookie)
6531 		|| !IS_USER_ADDRESS(userInfo)
6532 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6533 		return B_BAD_ADDRESS;
6534 
6535 	area_info info;
6536 	status_t status = _get_next_area_info(team, &cookie, &info,
6537 		sizeof(area_info));
6538 	if (status != B_OK)
6539 		return status;
6540 
6541 	//info.protection &= B_USER_PROTECTION;
6542 
6543 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6544 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6545 		return B_BAD_ADDRESS;
6546 
6547 	return status;
6548 }
6549 
6550 
6551 status_t
6552 _user_set_area_protection(area_id area, uint32 newProtection)
6553 {
6554 	if ((newProtection & ~(B_USER_PROTECTION | B_CLONEABLE_AREA)) != 0)
6555 		return B_BAD_VALUE;
6556 
6557 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6558 		newProtection, false);
6559 }
6560 
6561 
6562 status_t
6563 _user_resize_area(area_id area, size_t newSize)
6564 {
6565 	// TODO: Since we restrict deleting of areas to those owned by the team,
6566 	// we should also do that for resizing (check other functions, too).
6567 	return vm_resize_area(area, newSize, false);
6568 }
6569 
6570 
6571 area_id
6572 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6573 	team_id target)
6574 {
6575 	// filter out some unavailable values (for userland)
6576 	switch (addressSpec) {
6577 		case B_ANY_KERNEL_ADDRESS:
6578 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6579 			return B_BAD_VALUE;
6580 	}
6581 
6582 	void* address;
6583 	if (!IS_USER_ADDRESS(userAddress)
6584 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6585 		return B_BAD_ADDRESS;
6586 
6587 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6588 	if (newArea < B_OK)
6589 		return newArea;
6590 
6591 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6592 		return B_BAD_ADDRESS;
6593 
6594 	return newArea;
6595 }
6596 
6597 
6598 area_id
6599 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6600 	uint32 protection, area_id sourceArea)
6601 {
6602 	char name[B_OS_NAME_LENGTH];
6603 	void* address;
6604 
6605 	// filter out some unavailable values (for userland)
6606 	switch (addressSpec) {
6607 		case B_ANY_KERNEL_ADDRESS:
6608 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6609 			return B_BAD_VALUE;
6610 	}
6611 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6612 		return B_BAD_VALUE;
6613 
6614 	if (!IS_USER_ADDRESS(userName)
6615 		|| !IS_USER_ADDRESS(userAddress)
6616 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6617 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6618 		return B_BAD_ADDRESS;
6619 
6620 	fix_protection(&protection);
6621 
6622 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6623 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6624 		false);
6625 	if (clonedArea < B_OK)
6626 		return clonedArea;
6627 
6628 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6629 		delete_area(clonedArea);
6630 		return B_BAD_ADDRESS;
6631 	}
6632 
6633 	return clonedArea;
6634 }
6635 
6636 
6637 area_id
6638 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6639 	size_t size, uint32 lock, uint32 protection)
6640 {
6641 	char name[B_OS_NAME_LENGTH];
6642 	void* address;
6643 
6644 	// filter out some unavailable values (for userland)
6645 	switch (addressSpec) {
6646 		case B_ANY_KERNEL_ADDRESS:
6647 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6648 			return B_BAD_VALUE;
6649 	}
6650 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6651 		return B_BAD_VALUE;
6652 
6653 	if (!IS_USER_ADDRESS(userName)
6654 		|| !IS_USER_ADDRESS(userAddress)
6655 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6656 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6657 		return B_BAD_ADDRESS;
6658 
6659 	if (addressSpec == B_EXACT_ADDRESS
6660 		&& IS_KERNEL_ADDRESS(address))
6661 		return B_BAD_VALUE;
6662 
6663 	if (addressSpec == B_ANY_ADDRESS)
6664 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6665 	if (addressSpec == B_BASE_ADDRESS)
6666 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6667 
6668 	fix_protection(&protection);
6669 
6670 	virtual_address_restrictions virtualRestrictions = {};
6671 	virtualRestrictions.address = address;
6672 	virtualRestrictions.address_specification = addressSpec;
6673 	physical_address_restrictions physicalRestrictions = {};
6674 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6675 		size, lock, protection, 0, 0, &virtualRestrictions,
6676 		&physicalRestrictions, false, &address);
6677 
6678 	if (area >= B_OK
6679 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6680 		delete_area(area);
6681 		return B_BAD_ADDRESS;
6682 	}
6683 
6684 	return area;
6685 }
6686 
6687 
6688 status_t
6689 _user_delete_area(area_id area)
6690 {
6691 	// Unlike the BeOS implementation, you can now only delete areas
6692 	// that you have created yourself from userland.
6693 	// The documentation to delete_area() explicitly states that this
6694 	// will be restricted in the future, and so it will.
6695 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6696 }
6697 
6698 
6699 // TODO: create a BeOS style call for this!
6700 
6701 area_id
6702 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6703 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6704 	int fd, off_t offset)
6705 {
6706 	char name[B_OS_NAME_LENGTH];
6707 	void* address;
6708 	area_id area;
6709 
6710 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6711 		return B_BAD_VALUE;
6712 
6713 	fix_protection(&protection);
6714 
6715 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6716 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6717 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6718 		return B_BAD_ADDRESS;
6719 
6720 	if (addressSpec == B_EXACT_ADDRESS) {
6721 		if ((addr_t)address + size < (addr_t)address
6722 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6723 			return B_BAD_VALUE;
6724 		}
6725 		if (!IS_USER_ADDRESS(address)
6726 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6727 			return B_BAD_ADDRESS;
6728 		}
6729 	}
6730 
6731 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6732 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6733 		false);
6734 	if (area < B_OK)
6735 		return area;
6736 
6737 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6738 		return B_BAD_ADDRESS;
6739 
6740 	return area;
6741 }
6742 
6743 
6744 status_t
6745 _user_unmap_memory(void* _address, size_t size)
6746 {
6747 	addr_t address = (addr_t)_address;
6748 
6749 	// check params
6750 	if (size == 0 || (addr_t)address + size < (addr_t)address
6751 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6752 		return B_BAD_VALUE;
6753 	}
6754 
6755 	if (!IS_USER_ADDRESS(address)
6756 		|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6757 		return B_BAD_ADDRESS;
6758 	}
6759 
6760 	// Write lock the address space and ensure the address range is not wired.
6761 	AddressSpaceWriteLocker locker;
6762 	do {
6763 		status_t status = locker.SetTo(team_get_current_team_id());
6764 		if (status != B_OK)
6765 			return status;
6766 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6767 			size, &locker));
6768 
6769 	// unmap
6770 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6771 }
6772 
6773 
6774 status_t
6775 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6776 {
6777 	// check address range
6778 	addr_t address = (addr_t)_address;
6779 	size = PAGE_ALIGN(size);
6780 
6781 	if ((address % B_PAGE_SIZE) != 0)
6782 		return B_BAD_VALUE;
6783 	if (!is_user_address_range(_address, size)) {
6784 		// weird error code required by POSIX
6785 		return ENOMEM;
6786 	}
6787 
6788 	// extend and check protection
6789 	if ((protection & ~B_USER_PROTECTION) != 0)
6790 		return B_BAD_VALUE;
6791 
6792 	fix_protection(&protection);
6793 
6794 	// We need to write lock the address space, since we're going to play with
6795 	// the areas. Also make sure that none of the areas is wired and that we're
6796 	// actually allowed to change the protection.
6797 	AddressSpaceWriteLocker locker;
6798 
6799 	bool restart;
6800 	do {
6801 		restart = false;
6802 
6803 		status_t status = locker.SetTo(team_get_current_team_id());
6804 		if (status != B_OK)
6805 			return status;
6806 
6807 		// First round: Check whether the whole range is covered by areas and we
6808 		// are allowed to modify them.
6809 		addr_t currentAddress = address;
6810 		size_t sizeLeft = size;
6811 		while (sizeLeft > 0) {
6812 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6813 			if (area == NULL)
6814 				return B_NO_MEMORY;
6815 
6816 			if ((area->protection & B_KERNEL_AREA) != 0)
6817 				return B_NOT_ALLOWED;
6818 			if (area->protection_max != 0
6819 				&& (protection & area->protection_max) != (protection & B_USER_PROTECTION)) {
6820 				return B_NOT_ALLOWED;
6821 			}
6822 
6823 			addr_t offset = currentAddress - area->Base();
6824 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6825 
6826 			AreaCacheLocker cacheLocker(area);
6827 
6828 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6829 					&locker, &cacheLocker)) {
6830 				restart = true;
6831 				break;
6832 			}
6833 
6834 			cacheLocker.Unlock();
6835 
6836 			currentAddress += rangeSize;
6837 			sizeLeft -= rangeSize;
6838 		}
6839 	} while (restart);
6840 
6841 	// Second round: If the protections differ from that of the area, create a
6842 	// page protection array and re-map mapped pages.
6843 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6844 	addr_t currentAddress = address;
6845 	size_t sizeLeft = size;
6846 	while (sizeLeft > 0) {
6847 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6848 		if (area == NULL)
6849 			return B_NO_MEMORY;
6850 
6851 		addr_t offset = currentAddress - area->Base();
6852 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6853 
6854 		currentAddress += rangeSize;
6855 		sizeLeft -= rangeSize;
6856 
6857 		if (area->page_protections == NULL) {
6858 			if (area->protection == protection)
6859 				continue;
6860 			if (offset == 0 && rangeSize == area->Size()) {
6861 				status_t status = vm_set_area_protection(area->address_space->ID(),
6862 					area->id, protection, false);
6863 				if (status != B_OK)
6864 					return status;
6865 				continue;
6866 			}
6867 
6868 			status_t status = allocate_area_page_protections(area);
6869 			if (status != B_OK)
6870 				return status;
6871 		}
6872 
6873 		// We need to lock the complete cache chain, since we potentially unmap
6874 		// pages of lower caches.
6875 		VMCache* topCache = vm_area_get_locked_cache(area);
6876 		VMCacheChainLocker cacheChainLocker(topCache);
6877 		cacheChainLocker.LockAllSourceCaches();
6878 
6879 		for (addr_t pageAddress = area->Base() + offset;
6880 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6881 			map->Lock();
6882 
6883 			set_area_page_protection(area, pageAddress, protection);
6884 
6885 			phys_addr_t physicalAddress;
6886 			uint32 flags;
6887 
6888 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6889 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6890 				map->Unlock();
6891 				continue;
6892 			}
6893 
6894 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6895 			if (page == NULL) {
6896 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6897 					"\n", area, physicalAddress);
6898 				map->Unlock();
6899 				return B_ERROR;
6900 			}
6901 
6902 			// If the page is not in the topmost cache and write access is
6903 			// requested, we have to unmap it. Otherwise we can re-map it with
6904 			// the new protection.
6905 			bool unmapPage = page->Cache() != topCache
6906 				&& (protection & B_WRITE_AREA) != 0;
6907 
6908 			if (!unmapPage)
6909 				map->ProtectPage(area, pageAddress, protection);
6910 
6911 			map->Unlock();
6912 
6913 			if (unmapPage) {
6914 				DEBUG_PAGE_ACCESS_START(page);
6915 				unmap_page(area, pageAddress);
6916 				DEBUG_PAGE_ACCESS_END(page);
6917 			}
6918 		}
6919 	}
6920 
6921 	return B_OK;
6922 }
6923 
6924 
6925 status_t
6926 _user_sync_memory(void* _address, size_t size, uint32 flags)
6927 {
6928 	addr_t address = (addr_t)_address;
6929 	size = PAGE_ALIGN(size);
6930 
6931 	// check params
6932 	if ((address % B_PAGE_SIZE) != 0)
6933 		return B_BAD_VALUE;
6934 	if (!is_user_address_range(_address, size)) {
6935 		// weird error code required by POSIX
6936 		return ENOMEM;
6937 	}
6938 
6939 	bool writeSync = (flags & MS_SYNC) != 0;
6940 	bool writeAsync = (flags & MS_ASYNC) != 0;
6941 	if (writeSync && writeAsync)
6942 		return B_BAD_VALUE;
6943 
6944 	if (size == 0 || (!writeSync && !writeAsync))
6945 		return B_OK;
6946 
6947 	// iterate through the range and sync all concerned areas
6948 	while (size > 0) {
6949 		// read lock the address space
6950 		AddressSpaceReadLocker locker;
6951 		status_t error = locker.SetTo(team_get_current_team_id());
6952 		if (error != B_OK)
6953 			return error;
6954 
6955 		// get the first area
6956 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6957 		if (area == NULL)
6958 			return B_NO_MEMORY;
6959 
6960 		uint32 offset = address - area->Base();
6961 		size_t rangeSize = min_c(area->Size() - offset, size);
6962 		offset += area->cache_offset;
6963 
6964 		// lock the cache
6965 		AreaCacheLocker cacheLocker(area);
6966 		if (!cacheLocker)
6967 			return B_BAD_VALUE;
6968 		VMCache* cache = area->cache;
6969 
6970 		locker.Unlock();
6971 
6972 		uint32 firstPage = offset >> PAGE_SHIFT;
6973 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6974 
6975 		// write the pages
6976 		if (cache->type == CACHE_TYPE_VNODE) {
6977 			if (writeSync) {
6978 				// synchronous
6979 				error = vm_page_write_modified_page_range(cache, firstPage,
6980 					endPage);
6981 				if (error != B_OK)
6982 					return error;
6983 			} else {
6984 				// asynchronous
6985 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6986 				// TODO: This is probably not quite what is supposed to happen.
6987 				// Especially when a lot has to be written, it might take ages
6988 				// until it really hits the disk.
6989 			}
6990 		}
6991 
6992 		address += rangeSize;
6993 		size -= rangeSize;
6994 	}
6995 
6996 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6997 	// synchronize multiple mappings of the same file. In our VM they never get
6998 	// out of sync, though, so we don't have to do anything.
6999 
7000 	return B_OK;
7001 }
7002 
7003 
7004 status_t
7005 _user_memory_advice(void* _address, size_t size, uint32 advice)
7006 {
7007 	addr_t address = (addr_t)_address;
7008 	if ((address % B_PAGE_SIZE) != 0)
7009 		return B_BAD_VALUE;
7010 
7011 	size = PAGE_ALIGN(size);
7012 	if (!is_user_address_range(_address, size)) {
7013 		// weird error code required by POSIX
7014 		return B_NO_MEMORY;
7015 	}
7016 
7017 	switch (advice) {
7018 		case MADV_NORMAL:
7019 		case MADV_SEQUENTIAL:
7020 		case MADV_RANDOM:
7021 		case MADV_WILLNEED:
7022 		case MADV_DONTNEED:
7023 			// TODO: Implement!
7024 			break;
7025 
7026 		case MADV_FREE:
7027 		{
7028 			AddressSpaceWriteLocker locker;
7029 			do {
7030 				status_t status = locker.SetTo(team_get_current_team_id());
7031 				if (status != B_OK)
7032 					return status;
7033 			} while (wait_if_address_range_is_wired(locker.AddressSpace(),
7034 					address, size, &locker));
7035 
7036 			discard_address_range(locker.AddressSpace(), address, size, false);
7037 			break;
7038 		}
7039 
7040 		default:
7041 			return B_BAD_VALUE;
7042 	}
7043 
7044 	return B_OK;
7045 }
7046 
7047 
7048 status_t
7049 _user_get_memory_properties(team_id teamID, const void* address,
7050 	uint32* _protected, uint32* _lock)
7051 {
7052 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
7053 		return B_BAD_ADDRESS;
7054 
7055 	AddressSpaceReadLocker locker;
7056 	status_t error = locker.SetTo(teamID);
7057 	if (error != B_OK)
7058 		return error;
7059 
7060 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
7061 	if (area == NULL)
7062 		return B_NO_MEMORY;
7063 
7064 	uint32 protection = get_area_page_protection(area, (addr_t)address);
7065 	uint32 wiring = area->wiring;
7066 
7067 	locker.Unlock();
7068 
7069 	error = user_memcpy(_protected, &protection, sizeof(protection));
7070 	if (error != B_OK)
7071 		return error;
7072 
7073 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
7074 
7075 	return error;
7076 }
7077 
7078 
7079 static status_t
7080 user_set_memory_swappable(const void* _address, size_t size, bool swappable)
7081 {
7082 #if ENABLE_SWAP_SUPPORT
7083 	// check address range
7084 	addr_t address = (addr_t)_address;
7085 	size = PAGE_ALIGN(size);
7086 
7087 	if ((address % B_PAGE_SIZE) != 0)
7088 		return EINVAL;
7089 	if (!is_user_address_range(_address, size))
7090 		return EINVAL;
7091 
7092 	const addr_t endAddress = address + size;
7093 
7094 	AddressSpaceReadLocker addressSpaceLocker;
7095 	status_t error = addressSpaceLocker.SetTo(team_get_current_team_id());
7096 	if (error != B_OK)
7097 		return error;
7098 	VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace();
7099 
7100 	// iterate through all concerned areas
7101 	addr_t nextAddress = address;
7102 	while (nextAddress != endAddress) {
7103 		// get the next area
7104 		VMArea* area = addressSpace->LookupArea(nextAddress);
7105 		if (area == NULL) {
7106 			error = B_BAD_ADDRESS;
7107 			break;
7108 		}
7109 
7110 		const addr_t areaStart = nextAddress;
7111 		const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size());
7112 		nextAddress = areaEnd;
7113 
7114 		error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
7115 		if (error != B_OK) {
7116 			// We don't need to unset or reset things on failure.
7117 			break;
7118 		}
7119 
7120 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
7121 		VMAnonymousCache* anonCache = NULL;
7122 		if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) {
7123 			// This memory will aready never be swapped. Nothing to do.
7124 		} else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) {
7125 			error = anonCache->SetCanSwapPages(areaStart - area->Base(),
7126 				areaEnd - areaStart, swappable);
7127 		} else {
7128 			// Some other cache type? We cannot affect anything here.
7129 			error = EINVAL;
7130 		}
7131 
7132 		cacheChainLocker.Unlock();
7133 
7134 		unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
7135 		if (error != B_OK)
7136 			break;
7137 	}
7138 
7139 	return error;
7140 #else
7141 	// No swap support? Nothing to do.
7142 	return B_OK;
7143 #endif
7144 }
7145 
7146 
7147 status_t
7148 _user_mlock(const void* _address, size_t size)
7149 {
7150 	return user_set_memory_swappable(_address, size, false);
7151 }
7152 
7153 
7154 status_t
7155 _user_munlock(const void* _address, size_t size)
7156 {
7157 	// TODO: B_SHARED_AREAs need to be handled a bit differently:
7158 	// if multiple clones of an area had mlock() called on them,
7159 	// munlock() must also be called on all of them to actually unlock.
7160 	// (At present, the first munlock() will unlock all.)
7161 	// TODO: fork() should automatically unlock memory in the child.
7162 	return user_set_memory_swappable(_address, size, true);
7163 }
7164 
7165 
7166 // #pragma mark -- compatibility
7167 
7168 
7169 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7170 
7171 
7172 struct physical_entry_beos {
7173 	uint32	address;
7174 	uint32	size;
7175 };
7176 
7177 
7178 /*!	The physical_entry structure has changed. We need to translate it to the
7179 	old one.
7180 */
7181 extern "C" int32
7182 __get_memory_map_beos(const void* _address, size_t numBytes,
7183 	physical_entry_beos* table, int32 numEntries)
7184 {
7185 	if (numEntries <= 0)
7186 		return B_BAD_VALUE;
7187 
7188 	const uint8* address = (const uint8*)_address;
7189 
7190 	int32 count = 0;
7191 	while (numBytes > 0 && count < numEntries) {
7192 		physical_entry entry;
7193 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
7194 		if (result < 0) {
7195 			if (result != B_BUFFER_OVERFLOW)
7196 				return result;
7197 		}
7198 
7199 		if (entry.address >= (phys_addr_t)1 << 32) {
7200 			panic("get_memory_map(): Address is greater 4 GB!");
7201 			return B_ERROR;
7202 		}
7203 
7204 		table[count].address = entry.address;
7205 		table[count++].size = entry.size;
7206 
7207 		address += entry.size;
7208 		numBytes -= entry.size;
7209 	}
7210 
7211 	// null-terminate the table, if possible
7212 	if (count < numEntries) {
7213 		table[count].address = 0;
7214 		table[count].size = 0;
7215 	}
7216 
7217 	return B_OK;
7218 }
7219 
7220 
7221 /*!	The type of the \a physicalAddress parameter has changed from void* to
7222 	phys_addr_t.
7223 */
7224 extern "C" area_id
7225 __map_physical_memory_beos(const char* name, void* physicalAddress,
7226 	size_t numBytes, uint32 addressSpec, uint32 protection,
7227 	void** _virtualAddress)
7228 {
7229 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
7230 		addressSpec, protection, _virtualAddress);
7231 }
7232 
7233 
7234 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
7235 	we meddle with the \a lock parameter to force 32 bit.
7236 */
7237 extern "C" area_id
7238 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
7239 	size_t size, uint32 lock, uint32 protection)
7240 {
7241 	switch (lock) {
7242 		case B_NO_LOCK:
7243 			break;
7244 		case B_FULL_LOCK:
7245 		case B_LAZY_LOCK:
7246 			lock = B_32_BIT_FULL_LOCK;
7247 			break;
7248 		case B_CONTIGUOUS:
7249 			lock = B_32_BIT_CONTIGUOUS;
7250 			break;
7251 	}
7252 
7253 	return __create_area_haiku(name, _address, addressSpec, size, lock,
7254 		protection);
7255 }
7256 
7257 
7258 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
7259 	"BASE");
7260 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
7261 	"map_physical_memory@", "BASE");
7262 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
7263 	"BASE");
7264 
7265 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7266 	"get_memory_map@@", "1_ALPHA3");
7267 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7268 	"map_physical_memory@@", "1_ALPHA3");
7269 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7270 	"1_ALPHA3");
7271 
7272 
7273 #else
7274 
7275 
7276 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7277 	"get_memory_map@@", "BASE");
7278 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7279 	"map_physical_memory@@", "BASE");
7280 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7281 	"BASE");
7282 
7283 
7284 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7285