xref: /haiku/src/system/kernel/vm/vm.cpp (revision 13581b3d2a71545960b98fefebc5225b5bf29072)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleterDrivers.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <util/BitUtils.h>
51 #include <util/ThreadAutoLock.h>
52 #include <vm/vm_page.h>
53 #include <vm/vm_priv.h>
54 #include <vm/VMAddressSpace.h>
55 #include <vm/VMArea.h>
56 #include <vm/VMCache.h>
57 
58 #include "VMAddressSpaceLocking.h"
59 #include "VMAnonymousCache.h"
60 #include "VMAnonymousNoSwapCache.h"
61 #include "IORequest.h"
62 
63 
64 //#define TRACE_VM
65 //#define TRACE_FAULTS
66 #ifdef TRACE_VM
67 #	define TRACE(x) dprintf x
68 #else
69 #	define TRACE(x) ;
70 #endif
71 #ifdef TRACE_FAULTS
72 #	define FTRACE(x) dprintf x
73 #else
74 #	define FTRACE(x) ;
75 #endif
76 
77 
78 namespace {
79 
80 class AreaCacheLocking {
81 public:
82 	inline bool Lock(VMCache* lockable)
83 	{
84 		return false;
85 	}
86 
87 	inline void Unlock(VMCache* lockable)
88 	{
89 		vm_area_put_locked_cache(lockable);
90 	}
91 };
92 
93 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
94 public:
95 	inline AreaCacheLocker(VMCache* cache = NULL)
96 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
97 	{
98 	}
99 
100 	inline AreaCacheLocker(VMArea* area)
101 		: AutoLocker<VMCache, AreaCacheLocking>()
102 	{
103 		SetTo(area);
104 	}
105 
106 	inline void SetTo(VMCache* cache, bool alreadyLocked)
107 	{
108 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
109 	}
110 
111 	inline void SetTo(VMArea* area)
112 	{
113 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
114 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
115 	}
116 };
117 
118 
119 class VMCacheChainLocker {
120 public:
121 	VMCacheChainLocker()
122 		:
123 		fTopCache(NULL),
124 		fBottomCache(NULL)
125 	{
126 	}
127 
128 	VMCacheChainLocker(VMCache* topCache)
129 		:
130 		fTopCache(topCache),
131 		fBottomCache(topCache)
132 	{
133 	}
134 
135 	~VMCacheChainLocker()
136 	{
137 		Unlock();
138 	}
139 
140 	void SetTo(VMCache* topCache)
141 	{
142 		fTopCache = topCache;
143 		fBottomCache = topCache;
144 
145 		if (topCache != NULL)
146 			topCache->SetUserData(NULL);
147 	}
148 
149 	VMCache* LockSourceCache()
150 	{
151 		if (fBottomCache == NULL || fBottomCache->source == NULL)
152 			return NULL;
153 
154 		VMCache* previousCache = fBottomCache;
155 
156 		fBottomCache = fBottomCache->source;
157 		fBottomCache->Lock();
158 		fBottomCache->AcquireRefLocked();
159 		fBottomCache->SetUserData(previousCache);
160 
161 		return fBottomCache;
162 	}
163 
164 	void LockAllSourceCaches()
165 	{
166 		while (LockSourceCache() != NULL) {
167 		}
168 	}
169 
170 	void Unlock(VMCache* exceptCache = NULL)
171 	{
172 		if (fTopCache == NULL)
173 			return;
174 
175 		// Unlock caches in source -> consumer direction. This is important to
176 		// avoid double-locking and a reversal of locking order in case a cache
177 		// is eligable for merging.
178 		VMCache* cache = fBottomCache;
179 		while (cache != NULL) {
180 			VMCache* nextCache = (VMCache*)cache->UserData();
181 			if (cache != exceptCache)
182 				cache->ReleaseRefAndUnlock(cache != fTopCache);
183 
184 			if (cache == fTopCache)
185 				break;
186 
187 			cache = nextCache;
188 		}
189 
190 		fTopCache = NULL;
191 		fBottomCache = NULL;
192 	}
193 
194 	void UnlockKeepRefs(bool keepTopCacheLocked)
195 	{
196 		if (fTopCache == NULL)
197 			return;
198 
199 		VMCache* nextCache = fBottomCache;
200 		VMCache* cache = NULL;
201 
202 		while (keepTopCacheLocked
203 				? nextCache != fTopCache : cache != fTopCache) {
204 			cache = nextCache;
205 			nextCache = (VMCache*)cache->UserData();
206 			cache->Unlock(cache != fTopCache);
207 		}
208 	}
209 
210 	void RelockCaches(bool topCacheLocked)
211 	{
212 		if (fTopCache == NULL)
213 			return;
214 
215 		VMCache* nextCache = fTopCache;
216 		VMCache* cache = NULL;
217 		if (topCacheLocked) {
218 			cache = nextCache;
219 			nextCache = cache->source;
220 		}
221 
222 		while (cache != fBottomCache && nextCache != NULL) {
223 			VMCache* consumer = cache;
224 			cache = nextCache;
225 			nextCache = cache->source;
226 			cache->Lock();
227 			cache->SetUserData(consumer);
228 		}
229 	}
230 
231 private:
232 	VMCache*	fTopCache;
233 	VMCache*	fBottomCache;
234 };
235 
236 } // namespace
237 
238 
239 // The memory reserve an allocation of the certain priority must not touch.
240 static const size_t kMemoryReserveForPriority[] = {
241 	VM_MEMORY_RESERVE_USER,		// user
242 	VM_MEMORY_RESERVE_SYSTEM,	// system
243 	0							// VIP
244 };
245 
246 
247 ObjectCache* gPageMappingsObjectCache;
248 
249 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
250 
251 static off_t sAvailableMemory;
252 static off_t sNeededMemory;
253 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
254 static uint32 sPageFaults;
255 
256 static VMPhysicalPageMapper* sPhysicalPageMapper;
257 
258 #if DEBUG_CACHE_LIST
259 
260 struct cache_info {
261 	VMCache*	cache;
262 	addr_t		page_count;
263 	addr_t		committed;
264 };
265 
266 static const int kCacheInfoTableCount = 100 * 1024;
267 static cache_info* sCacheInfoTable;
268 
269 #endif	// DEBUG_CACHE_LIST
270 
271 
272 // function declarations
273 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
274 	bool addressSpaceCleanup);
275 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
276 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
277 static status_t map_backing_store(VMAddressSpace* addressSpace,
278 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
279 	int protection, int protectionMax, int mapping, uint32 flags,
280 	const virtual_address_restrictions* addressRestrictions, bool kernel,
281 	VMArea** _area, void** _virtualAddress);
282 static void fix_protection(uint32* protection);
283 
284 
285 //	#pragma mark -
286 
287 
288 #if VM_PAGE_FAULT_TRACING
289 
290 namespace VMPageFaultTracing {
291 
292 class PageFaultStart : public AbstractTraceEntry {
293 public:
294 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
295 		:
296 		fAddress(address),
297 		fPC(pc),
298 		fWrite(write),
299 		fUser(user)
300 	{
301 		Initialized();
302 	}
303 
304 	virtual void AddDump(TraceOutput& out)
305 	{
306 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
307 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
308 	}
309 
310 private:
311 	addr_t	fAddress;
312 	addr_t	fPC;
313 	bool	fWrite;
314 	bool	fUser;
315 };
316 
317 
318 // page fault errors
319 enum {
320 	PAGE_FAULT_ERROR_NO_AREA		= 0,
321 	PAGE_FAULT_ERROR_KERNEL_ONLY,
322 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
323 	PAGE_FAULT_ERROR_READ_PROTECTED,
324 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
325 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
326 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
327 };
328 
329 
330 class PageFaultError : public AbstractTraceEntry {
331 public:
332 	PageFaultError(area_id area, status_t error)
333 		:
334 		fArea(area),
335 		fError(error)
336 	{
337 		Initialized();
338 	}
339 
340 	virtual void AddDump(TraceOutput& out)
341 	{
342 		switch (fError) {
343 			case PAGE_FAULT_ERROR_NO_AREA:
344 				out.Print("page fault error: no area");
345 				break;
346 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
347 				out.Print("page fault error: area: %ld, kernel only", fArea);
348 				break;
349 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
350 				out.Print("page fault error: area: %ld, write protected",
351 					fArea);
352 				break;
353 			case PAGE_FAULT_ERROR_READ_PROTECTED:
354 				out.Print("page fault error: area: %ld, read protected", fArea);
355 				break;
356 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
357 				out.Print("page fault error: area: %ld, execute protected",
358 					fArea);
359 				break;
360 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
361 				out.Print("page fault error: kernel touching bad user memory");
362 				break;
363 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
364 				out.Print("page fault error: no address space");
365 				break;
366 			default:
367 				out.Print("page fault error: area: %ld, error: %s", fArea,
368 					strerror(fError));
369 				break;
370 		}
371 	}
372 
373 private:
374 	area_id		fArea;
375 	status_t	fError;
376 };
377 
378 
379 class PageFaultDone : public AbstractTraceEntry {
380 public:
381 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
382 			vm_page* page)
383 		:
384 		fArea(area),
385 		fTopCache(topCache),
386 		fCache(cache),
387 		fPage(page)
388 	{
389 		Initialized();
390 	}
391 
392 	virtual void AddDump(TraceOutput& out)
393 	{
394 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
395 			"page: %p", fArea, fTopCache, fCache, fPage);
396 	}
397 
398 private:
399 	area_id		fArea;
400 	VMCache*	fTopCache;
401 	VMCache*	fCache;
402 	vm_page*	fPage;
403 };
404 
405 }	// namespace VMPageFaultTracing
406 
407 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
408 #else
409 #	define TPF(x) ;
410 #endif	// VM_PAGE_FAULT_TRACING
411 
412 
413 //	#pragma mark -
414 
415 
416 /*!	The page's cache must be locked.
417 */
418 static inline void
419 increment_page_wired_count(vm_page* page)
420 {
421 	if (!page->IsMapped())
422 		atomic_add(&gMappedPagesCount, 1);
423 	page->IncrementWiredCount();
424 }
425 
426 
427 /*!	The page's cache must be locked.
428 */
429 static inline void
430 decrement_page_wired_count(vm_page* page)
431 {
432 	page->DecrementWiredCount();
433 	if (!page->IsMapped())
434 		atomic_add(&gMappedPagesCount, -1);
435 }
436 
437 
438 static inline addr_t
439 virtual_page_address(VMArea* area, vm_page* page)
440 {
441 	return area->Base()
442 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
443 }
444 
445 
446 static inline bool
447 is_page_in_area(VMArea* area, vm_page* page)
448 {
449 	off_t pageCacheOffsetBytes = (off_t)(page->cache_offset << PAGE_SHIFT);
450 	return pageCacheOffsetBytes >= area->cache_offset
451 		&& pageCacheOffsetBytes < area->cache_offset + (off_t)area->Size();
452 }
453 
454 
455 //! You need to have the address space locked when calling this function
456 static VMArea*
457 lookup_area(VMAddressSpace* addressSpace, area_id id)
458 {
459 	VMAreas::ReadLock();
460 
461 	VMArea* area = VMAreas::LookupLocked(id);
462 	if (area != NULL && area->address_space != addressSpace)
463 		area = NULL;
464 
465 	VMAreas::ReadUnlock();
466 
467 	return area;
468 }
469 
470 
471 static inline size_t
472 area_page_protections_size(size_t areaSize)
473 {
474 	// In the page protections we store only the three user protections,
475 	// so we use 4 bits per page.
476 	return (areaSize / B_PAGE_SIZE + 1) / 2;
477 }
478 
479 
480 static status_t
481 allocate_area_page_protections(VMArea* area)
482 {
483 	size_t bytes = area_page_protections_size(area->Size());
484 	area->page_protections = (uint8*)malloc_etc(bytes,
485 		area->address_space == VMAddressSpace::Kernel()
486 			? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
487 	if (area->page_protections == NULL)
488 		return B_NO_MEMORY;
489 
490 	// init the page protections for all pages to that of the area
491 	uint32 areaProtection = area->protection
492 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
493 	memset(area->page_protections, areaProtection | (areaProtection << 4),
494 		bytes);
495 	return B_OK;
496 }
497 
498 
499 static inline void
500 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
501 {
502 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
503 	addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
504 	uint8& entry = area->page_protections[pageIndex / 2];
505 	if (pageIndex % 2 == 0)
506 		entry = (entry & 0xf0) | protection;
507 	else
508 		entry = (entry & 0x0f) | (protection << 4);
509 }
510 
511 
512 static inline uint32
513 get_area_page_protection(VMArea* area, addr_t pageAddress)
514 {
515 	if (area->page_protections == NULL)
516 		return area->protection;
517 
518 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
519 	uint32 protection = area->page_protections[pageIndex / 2];
520 	if (pageIndex % 2 == 0)
521 		protection &= 0x0f;
522 	else
523 		protection >>= 4;
524 
525 	uint32 kernelProtection = 0;
526 	if ((protection & B_READ_AREA) != 0)
527 		kernelProtection |= B_KERNEL_READ_AREA;
528 	if ((protection & B_WRITE_AREA) != 0)
529 		kernelProtection |= B_KERNEL_WRITE_AREA;
530 
531 	// If this is a kernel area we return only the kernel flags.
532 	if (area->address_space == VMAddressSpace::Kernel())
533 		return kernelProtection;
534 
535 	return protection | kernelProtection;
536 }
537 
538 
539 static inline uint8*
540 realloc_page_protections(uint8* pageProtections, size_t areaSize,
541 	uint32 allocationFlags)
542 {
543 	size_t bytes = area_page_protections_size(areaSize);
544 	return (uint8*)realloc_etc(pageProtections, bytes, allocationFlags);
545 }
546 
547 
548 /*!	The caller must have reserved enough pages the translation map
549 	implementation might need to map this page.
550 	The page's cache must be locked.
551 */
552 static status_t
553 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
554 	vm_page_reservation* reservation)
555 {
556 	VMTranslationMap* map = area->address_space->TranslationMap();
557 
558 	bool wasMapped = page->IsMapped();
559 
560 	if (area->wiring == B_NO_LOCK) {
561 		DEBUG_PAGE_ACCESS_CHECK(page);
562 
563 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
564 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
565 			gPageMappingsObjectCache,
566 			CACHE_DONT_WAIT_FOR_MEMORY
567 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
568 		if (mapping == NULL)
569 			return B_NO_MEMORY;
570 
571 		mapping->page = page;
572 		mapping->area = area;
573 
574 		map->Lock();
575 
576 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
577 			area->MemoryType(), reservation);
578 
579 		// insert mapping into lists
580 		if (!page->IsMapped())
581 			atomic_add(&gMappedPagesCount, 1);
582 
583 		page->mappings.Add(mapping);
584 		area->mappings.Add(mapping);
585 
586 		map->Unlock();
587 	} else {
588 		DEBUG_PAGE_ACCESS_CHECK(page);
589 
590 		map->Lock();
591 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
592 			area->MemoryType(), reservation);
593 		map->Unlock();
594 
595 		increment_page_wired_count(page);
596 	}
597 
598 	if (!wasMapped) {
599 		// The page is mapped now, so we must not remain in the cached queue.
600 		// It also makes sense to move it from the inactive to the active, since
601 		// otherwise the page daemon wouldn't come to keep track of it (in idle
602 		// mode) -- if the page isn't touched, it will be deactivated after a
603 		// full iteration through the queue at the latest.
604 		if (page->State() == PAGE_STATE_CACHED
605 				|| page->State() == PAGE_STATE_INACTIVE) {
606 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
607 		}
608 	}
609 
610 	return B_OK;
611 }
612 
613 
614 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
615 	page's cache.
616 */
617 static inline bool
618 unmap_page(VMArea* area, addr_t virtualAddress)
619 {
620 	return area->address_space->TranslationMap()->UnmapPage(area,
621 		virtualAddress, true);
622 }
623 
624 
625 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
626 	mapped pages' caches.
627 */
628 static inline void
629 unmap_pages(VMArea* area, addr_t base, size_t size)
630 {
631 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
632 }
633 
634 
635 static inline bool
636 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
637 {
638 	if (address < area->Base()) {
639 		offset = area->Base() - address;
640 		if (offset >= size)
641 			return false;
642 
643 		address = area->Base();
644 		size -= offset;
645 		offset = 0;
646 		if (size > area->Size())
647 			size = area->Size();
648 
649 		return true;
650 	}
651 
652 	offset = address - area->Base();
653 	if (offset >= area->Size())
654 		return false;
655 
656 	if (size >= area->Size() - offset)
657 		size = area->Size() - offset;
658 
659 	return true;
660 }
661 
662 
663 /*!	Cuts a piece out of an area. If the given cut range covers the complete
664 	area, it is deleted. If it covers the beginning or the end, the area is
665 	resized accordingly. If the range covers some part in the middle of the
666 	area, it is split in two; in this case the second area is returned via
667 	\a _secondArea (the variable is left untouched in the other cases).
668 	The address space must be write locked.
669 	The caller must ensure that no part of the given range is wired.
670 */
671 static status_t
672 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
673 	addr_t size, VMArea** _secondArea, bool kernel)
674 {
675 	addr_t offset;
676 	if (!intersect_area(area, address, size, offset))
677 		return B_OK;
678 
679 	// Is the area fully covered?
680 	if (address == area->Base() && size == area->Size()) {
681 		delete_area(addressSpace, area, false);
682 		return B_OK;
683 	}
684 
685 	int priority;
686 	uint32 allocationFlags;
687 	if (addressSpace == VMAddressSpace::Kernel()) {
688 		priority = VM_PRIORITY_SYSTEM;
689 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
690 			| HEAP_DONT_LOCK_KERNEL_SPACE;
691 	} else {
692 		priority = VM_PRIORITY_USER;
693 		allocationFlags = 0;
694 	}
695 
696 	VMCache* cache = vm_area_get_locked_cache(area);
697 	VMCacheChainLocker cacheChainLocker(cache);
698 	cacheChainLocker.LockAllSourceCaches();
699 
700 	// If no one else uses the area's cache and it's an anonymous cache, we can
701 	// resize or split it, too.
702 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
703 		&& cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM;
704 
705 	const addr_t oldSize = area->Size();
706 
707 	// Cut the end only?
708 	if (offset > 0 && size == area->Size() - offset) {
709 		status_t error = addressSpace->ShrinkAreaTail(area, offset,
710 			allocationFlags);
711 		if (error != B_OK)
712 			return error;
713 
714 		if (area->page_protections != NULL) {
715 			uint8* newProtections = realloc_page_protections(
716 				area->page_protections, area->Size(), allocationFlags);
717 
718 			if (newProtections == NULL) {
719 				addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
720 				return B_NO_MEMORY;
721 			}
722 
723 			area->page_protections = newProtections;
724 		}
725 
726 		// unmap pages
727 		unmap_pages(area, address, size);
728 
729 		if (onlyCacheUser) {
730 			// Since VMCache::Resize() can temporarily drop the lock, we must
731 			// unlock all lower caches to prevent locking order inversion.
732 			cacheChainLocker.Unlock(cache);
733 			cache->Resize(cache->virtual_base + offset, priority);
734 			cache->ReleaseRefAndUnlock();
735 		}
736 
737 		return B_OK;
738 	}
739 
740 	// Cut the beginning only?
741 	if (area->Base() == address) {
742 		uint8* newProtections = NULL;
743 		if (area->page_protections != NULL) {
744 			// Allocate all memory before shifting as the shift might lose some
745 			// bits.
746 			newProtections = realloc_page_protections(NULL, area->Size(),
747 				allocationFlags);
748 
749 			if (newProtections == NULL)
750 				return B_NO_MEMORY;
751 		}
752 
753 		// resize the area
754 		status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
755 			allocationFlags);
756 		if (error != B_OK) {
757 			if (newProtections != NULL)
758 				free_etc(newProtections, allocationFlags);
759 			return error;
760 		}
761 
762 		if (area->page_protections != NULL) {
763 			size_t oldBytes = area_page_protections_size(oldSize);
764 			ssize_t pagesShifted = (oldSize - area->Size()) / B_PAGE_SIZE;
765 			bitmap_shift<uint8>(area->page_protections, oldBytes * 8, -(pagesShifted * 4));
766 
767 			size_t bytes = area_page_protections_size(area->Size());
768 			memcpy(newProtections, area->page_protections, bytes);
769 			free_etc(area->page_protections, allocationFlags);
770 			area->page_protections = newProtections;
771 		}
772 
773 		// unmap pages
774 		unmap_pages(area, address, size);
775 
776 		if (onlyCacheUser) {
777 			// Since VMCache::Rebase() can temporarily drop the lock, we must
778 			// unlock all lower caches to prevent locking order inversion.
779 			cacheChainLocker.Unlock(cache);
780 			cache->Rebase(cache->virtual_base + size, priority);
781 			cache->ReleaseRefAndUnlock();
782 		}
783 		area->cache_offset += size;
784 
785 		return B_OK;
786 	}
787 
788 	// The tough part -- cut a piece out of the middle of the area.
789 	// We do that by shrinking the area to the begin section and creating a
790 	// new area for the end section.
791 	addr_t firstNewSize = offset;
792 	addr_t secondBase = address + size;
793 	addr_t secondSize = area->Size() - offset - size;
794 
795 	// unmap pages
796 	unmap_pages(area, address, area->Size() - firstNewSize);
797 
798 	// resize the area
799 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
800 		allocationFlags);
801 	if (error != B_OK)
802 		return error;
803 
804 	uint8* areaNewProtections = NULL;
805 	uint8* secondAreaNewProtections = NULL;
806 
807 	// Try to allocate the new memory before making some hard to reverse
808 	// changes.
809 	if (area->page_protections != NULL) {
810 		areaNewProtections = realloc_page_protections(NULL, area->Size(),
811 			allocationFlags);
812 		secondAreaNewProtections = realloc_page_protections(NULL, secondSize,
813 			allocationFlags);
814 
815 		if (areaNewProtections == NULL || secondAreaNewProtections == NULL) {
816 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
817 			free_etc(areaNewProtections, allocationFlags);
818 			free_etc(secondAreaNewProtections, allocationFlags);
819 			return B_NO_MEMORY;
820 		}
821 	}
822 
823 	virtual_address_restrictions addressRestrictions = {};
824 	addressRestrictions.address = (void*)secondBase;
825 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
826 	VMArea* secondArea;
827 
828 	if (onlyCacheUser) {
829 		// Create a new cache for the second area.
830 		VMCache* secondCache;
831 		error = VMCacheFactory::CreateAnonymousCache(secondCache,
832 			area->protection & B_OVERCOMMITTING_AREA, 0, 0,
833 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
834 		if (error != B_OK) {
835 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
836 			free_etc(areaNewProtections, allocationFlags);
837 			free_etc(secondAreaNewProtections, allocationFlags);
838 			return error;
839 		}
840 
841 		secondCache->Lock();
842 		secondCache->temporary = cache->temporary;
843 		secondCache->virtual_base = area->cache_offset;
844 		secondCache->virtual_end = area->cache_offset + secondSize;
845 
846 		// Transfer the concerned pages from the first cache.
847 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
848 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
849 			area->cache_offset);
850 
851 		if (error == B_OK) {
852 			// Since VMCache::Resize() can temporarily drop the lock, we must
853 			// unlock all lower caches to prevent locking order inversion.
854 			cacheChainLocker.Unlock(cache);
855 			cache->Resize(cache->virtual_base + firstNewSize, priority);
856 			// Don't unlock the cache yet because we might have to resize it
857 			// back.
858 
859 			// Map the second area.
860 			error = map_backing_store(addressSpace, secondCache,
861 				area->cache_offset, area->name, secondSize, area->wiring,
862 				area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0,
863 				&addressRestrictions, kernel, &secondArea, NULL);
864 		}
865 
866 		if (error != B_OK) {
867 			// Restore the original cache.
868 			cache->Resize(cache->virtual_base + oldSize, priority);
869 
870 			// Move the pages back.
871 			status_t readoptStatus = cache->Adopt(secondCache,
872 				area->cache_offset, secondSize, adoptOffset);
873 			if (readoptStatus != B_OK) {
874 				// Some (swap) pages have not been moved back and will be lost
875 				// once the second cache is deleted.
876 				panic("failed to restore cache range: %s",
877 					strerror(readoptStatus));
878 
879 				// TODO: Handle out of memory cases by freeing memory and
880 				// retrying.
881 			}
882 
883 			cache->ReleaseRefAndUnlock();
884 			secondCache->ReleaseRefAndUnlock();
885 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
886 			free_etc(areaNewProtections, allocationFlags);
887 			free_etc(secondAreaNewProtections, allocationFlags);
888 			return error;
889 		}
890 
891 		// Now we can unlock it.
892 		cache->ReleaseRefAndUnlock();
893 		secondCache->Unlock();
894 	} else {
895 		error = map_backing_store(addressSpace, cache, area->cache_offset
896 			+ (secondBase - area->Base()),
897 			area->name, secondSize, area->wiring, area->protection,
898 			area->protection_max, REGION_NO_PRIVATE_MAP, 0,
899 			&addressRestrictions, kernel, &secondArea, NULL);
900 		if (error != B_OK) {
901 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
902 			free_etc(areaNewProtections, allocationFlags);
903 			free_etc(secondAreaNewProtections, allocationFlags);
904 			return error;
905 		}
906 		// We need a cache reference for the new area.
907 		cache->AcquireRefLocked();
908 	}
909 
910 	if (area->page_protections != NULL) {
911 		// Copy the protection bits of the first area.
912 		size_t areaBytes = area_page_protections_size(area->Size());
913 		memcpy(areaNewProtections, area->page_protections, areaBytes);
914 		uint8* areaOldProtections = area->page_protections;
915 		area->page_protections = areaNewProtections;
916 
917 		// Shift the protection bits of the second area to the start of
918 		// the old array.
919 		size_t oldBytes = area_page_protections_size(oldSize);
920 		addr_t secondAreaOffset = secondBase - area->Base();
921 		ssize_t secondAreaPagesShifted = secondAreaOffset / B_PAGE_SIZE;
922 		bitmap_shift<uint8>(areaOldProtections, oldBytes * 8, -(secondAreaPagesShifted * 4));
923 
924 		// Copy the protection bits of the second area.
925 		size_t secondAreaBytes = area_page_protections_size(secondSize);
926 		memcpy(secondAreaNewProtections, areaOldProtections, secondAreaBytes);
927 		secondArea->page_protections = secondAreaNewProtections;
928 
929 		// We don't need this anymore.
930 		free_etc(areaOldProtections, allocationFlags);
931 
932 		// Set the correct page protections for the second area.
933 		VMTranslationMap* map = addressSpace->TranslationMap();
934 		map->Lock();
935 		for (VMCachePagesTree::Iterator it
936 				= secondArea->cache->pages.GetIterator();
937 				vm_page* page = it.Next();) {
938 			if (is_page_in_area(secondArea, page)) {
939 				addr_t address = virtual_page_address(secondArea, page);
940 				uint32 pageProtection
941 					= get_area_page_protection(secondArea, address);
942 				map->ProtectPage(secondArea, address, pageProtection);
943 			}
944 		}
945 		map->Unlock();
946 	}
947 
948 	if (_secondArea != NULL)
949 		*_secondArea = secondArea;
950 
951 	return B_OK;
952 }
953 
954 
955 /*!	Deletes or cuts all areas in the given address range.
956 	The address space must be write-locked.
957 	The caller must ensure that no part of the given range is wired.
958 */
959 static status_t
960 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
961 	bool kernel)
962 {
963 	size = PAGE_ALIGN(size);
964 
965 	// Check, whether the caller is allowed to modify the concerned areas.
966 	if (!kernel) {
967 		for (VMAddressSpace::AreaRangeIterator it
968 				= addressSpace->GetAreaRangeIterator(address, size);
969 			VMArea* area = it.Next();) {
970 
971 			if ((area->protection & B_KERNEL_AREA) != 0) {
972 				dprintf("unmap_address_range: team %" B_PRId32 " tried to "
973 					"unmap range of kernel area %" B_PRId32 " (%s)\n",
974 					team_get_current_team_id(), area->id, area->name);
975 				return B_NOT_ALLOWED;
976 			}
977 		}
978 	}
979 
980 	for (VMAddressSpace::AreaRangeIterator it
981 			= addressSpace->GetAreaRangeIterator(address, size);
982 		VMArea* area = it.Next();) {
983 
984 		status_t error = cut_area(addressSpace, area, address, size, NULL,
985 			kernel);
986 		if (error != B_OK)
987 			return error;
988 			// Failing after already messing with areas is ugly, but we
989 			// can't do anything about it.
990 	}
991 
992 	return B_OK;
993 }
994 
995 
996 static status_t
997 discard_area_range(VMArea* area, addr_t address, addr_t size)
998 {
999 	addr_t offset;
1000 	if (!intersect_area(area, address, size, offset))
1001 		return B_OK;
1002 
1003 	// If someone else uses the area's cache or it's not an anonymous cache, we
1004 	// can't discard.
1005 	VMCache* cache = vm_area_get_locked_cache(area);
1006 	if (cache->areas != area || area->cache_next != NULL
1007 		|| !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) {
1008 		return B_OK;
1009 	}
1010 
1011 	VMCacheChainLocker cacheChainLocker(cache);
1012 	cacheChainLocker.LockAllSourceCaches();
1013 
1014 	unmap_pages(area, address, size);
1015 
1016 	// Since VMCache::Discard() can temporarily drop the lock, we must
1017 	// unlock all lower caches to prevent locking order inversion.
1018 	cacheChainLocker.Unlock(cache);
1019 	cache->Discard(cache->virtual_base + offset, size);
1020 	cache->ReleaseRefAndUnlock();
1021 
1022 	return B_OK;
1023 }
1024 
1025 
1026 static status_t
1027 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
1028 	bool kernel)
1029 {
1030 	for (VMAddressSpace::AreaRangeIterator it
1031 		= addressSpace->GetAreaRangeIterator(address, size);
1032 			VMArea* area = it.Next();) {
1033 		status_t error = discard_area_range(area, address, size);
1034 		if (error != B_OK)
1035 			return error;
1036 	}
1037 
1038 	return B_OK;
1039 }
1040 
1041 
1042 /*! You need to hold the lock of the cache and the write lock of the address
1043 	space when calling this function.
1044 	Note, that in case of error your cache will be temporarily unlocked.
1045 	If \a addressSpec is \c B_EXACT_ADDRESS and the
1046 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
1047 	that no part of the specified address range (base \c *_virtualAddress, size
1048 	\a size) is wired. The cache will also be temporarily unlocked.
1049 */
1050 static status_t
1051 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
1052 	const char* areaName, addr_t size, int wiring, int protection,
1053 	int protectionMax, int mapping,
1054 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
1055 	bool kernel, VMArea** _area, void** _virtualAddress)
1056 {
1057 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
1058 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
1059 		", protection %d, protectionMax %d, area %p, areaName '%s'\n",
1060 		addressSpace, cache, addressRestrictions->address, offset, size,
1061 		addressRestrictions->address_specification, wiring, protection,
1062 		protectionMax, _area, areaName));
1063 	cache->AssertLocked();
1064 
1065 	if (size == 0) {
1066 #if KDEBUG
1067 		panic("map_backing_store(): called with size=0 for area '%s'!",
1068 			areaName);
1069 #endif
1070 		return B_BAD_VALUE;
1071 	}
1072 	if (offset < 0)
1073 		return B_BAD_VALUE;
1074 
1075 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
1076 		| HEAP_DONT_LOCK_KERNEL_SPACE;
1077 	int priority;
1078 	if (addressSpace != VMAddressSpace::Kernel()) {
1079 		priority = VM_PRIORITY_USER;
1080 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
1081 		priority = VM_PRIORITY_VIP;
1082 		allocationFlags |= HEAP_PRIORITY_VIP;
1083 	} else
1084 		priority = VM_PRIORITY_SYSTEM;
1085 
1086 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
1087 		allocationFlags);
1088 	if (mapping != REGION_PRIVATE_MAP)
1089 		area->protection_max = protectionMax & B_USER_PROTECTION;
1090 	if (area == NULL)
1091 		return B_NO_MEMORY;
1092 
1093 	status_t status;
1094 
1095 	// if this is a private map, we need to create a new cache
1096 	// to handle the private copies of pages as they are written to
1097 	VMCache* sourceCache = cache;
1098 	if (mapping == REGION_PRIVATE_MAP) {
1099 		VMCache* newCache;
1100 
1101 		// create an anonymous cache
1102 		status = VMCacheFactory::CreateAnonymousCache(newCache,
1103 			(protection & B_STACK_AREA) != 0
1104 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
1105 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
1106 		if (status != B_OK)
1107 			goto err1;
1108 
1109 		newCache->Lock();
1110 		newCache->temporary = 1;
1111 		newCache->virtual_base = offset;
1112 		newCache->virtual_end = offset + size;
1113 
1114 		cache->AddConsumer(newCache);
1115 
1116 		cache = newCache;
1117 	}
1118 
1119 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
1120 		status = cache->SetMinimalCommitment(size, priority);
1121 		if (status != B_OK)
1122 			goto err2;
1123 	}
1124 
1125 	// check to see if this address space has entered DELETE state
1126 	if (addressSpace->IsBeingDeleted()) {
1127 		// okay, someone is trying to delete this address space now, so we can't
1128 		// insert the area, so back out
1129 		status = B_BAD_TEAM_ID;
1130 		goto err2;
1131 	}
1132 
1133 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
1134 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
1135 		// temporarily unlock the current cache since it might be mapped to
1136 		// some existing area, and unmap_address_range also needs to lock that
1137 		// cache to delete the area.
1138 		cache->Unlock();
1139 		status = unmap_address_range(addressSpace,
1140 			(addr_t)addressRestrictions->address, size, kernel);
1141 		cache->Lock();
1142 		if (status != B_OK)
1143 			goto err2;
1144 	}
1145 
1146 	status = addressSpace->InsertArea(area, size, addressRestrictions,
1147 		allocationFlags, _virtualAddress);
1148 	if (status == B_NO_MEMORY
1149 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
1150 		// Due to how many locks are held, we cannot wait here for space to be
1151 		// freed up, but we can at least notify the low_resource handler.
1152 		low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0);
1153 	}
1154 	if (status != B_OK)
1155 		goto err2;
1156 
1157 	// attach the cache to the area
1158 	area->cache = cache;
1159 	area->cache_offset = offset;
1160 
1161 	// point the cache back to the area
1162 	cache->InsertAreaLocked(area);
1163 	if (mapping == REGION_PRIVATE_MAP)
1164 		cache->Unlock();
1165 
1166 	// insert the area in the global areas map
1167 	VMAreas::Insert(area);
1168 
1169 	// grab a ref to the address space (the area holds this)
1170 	addressSpace->Get();
1171 
1172 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1173 //		cache, sourceCache, areaName, area);
1174 
1175 	*_area = area;
1176 	return B_OK;
1177 
1178 err2:
1179 	if (mapping == REGION_PRIVATE_MAP) {
1180 		// We created this cache, so we must delete it again. Note, that we
1181 		// need to temporarily unlock the source cache or we'll otherwise
1182 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1183 		sourceCache->Unlock();
1184 		cache->ReleaseRefAndUnlock();
1185 		sourceCache->Lock();
1186 	}
1187 err1:
1188 	addressSpace->DeleteArea(area, allocationFlags);
1189 	return status;
1190 }
1191 
1192 
1193 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1194 	  locker1, locker2).
1195 */
1196 template<typename LockerType1, typename LockerType2>
1197 static inline bool
1198 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1199 {
1200 	area->cache->AssertLocked();
1201 
1202 	VMAreaUnwiredWaiter waiter;
1203 	if (!area->AddWaiterIfWired(&waiter))
1204 		return false;
1205 
1206 	// unlock everything and wait
1207 	if (locker1 != NULL)
1208 		locker1->Unlock();
1209 	if (locker2 != NULL)
1210 		locker2->Unlock();
1211 
1212 	waiter.waitEntry.Wait();
1213 
1214 	return true;
1215 }
1216 
1217 
1218 /*!	Checks whether the given area has any wired ranges intersecting with the
1219 	specified range and waits, if so.
1220 
1221 	When it has to wait, the function calls \c Unlock() on both \a locker1
1222 	and \a locker2, if given.
1223 	The area's top cache must be locked and must be unlocked as a side effect
1224 	of calling \c Unlock() on either \a locker1 or \a locker2.
1225 
1226 	If the function does not have to wait it does not modify or unlock any
1227 	object.
1228 
1229 	\param area The area to be checked.
1230 	\param base The base address of the range to check.
1231 	\param size The size of the address range to check.
1232 	\param locker1 An object to be unlocked when before starting to wait (may
1233 		be \c NULL).
1234 	\param locker2 An object to be unlocked when before starting to wait (may
1235 		be \c NULL).
1236 	\return \c true, if the function had to wait, \c false otherwise.
1237 */
1238 template<typename LockerType1, typename LockerType2>
1239 static inline bool
1240 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1241 	LockerType1* locker1, LockerType2* locker2)
1242 {
1243 	area->cache->AssertLocked();
1244 
1245 	VMAreaUnwiredWaiter waiter;
1246 	if (!area->AddWaiterIfWired(&waiter, base, size))
1247 		return false;
1248 
1249 	// unlock everything and wait
1250 	if (locker1 != NULL)
1251 		locker1->Unlock();
1252 	if (locker2 != NULL)
1253 		locker2->Unlock();
1254 
1255 	waiter.waitEntry.Wait();
1256 
1257 	return true;
1258 }
1259 
1260 
1261 /*!	Checks whether the given address space has any wired ranges intersecting
1262 	with the specified range and waits, if so.
1263 
1264 	Similar to wait_if_area_range_is_wired(), with the following differences:
1265 	- All areas intersecting with the range are checked (respectively all until
1266 	  one is found that contains a wired range intersecting with the given
1267 	  range).
1268 	- The given address space must at least be read-locked and must be unlocked
1269 	  when \c Unlock() is called on \a locker.
1270 	- None of the areas' caches are allowed to be locked.
1271 */
1272 template<typename LockerType>
1273 static inline bool
1274 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1275 	size_t size, LockerType* locker)
1276 {
1277 	for (VMAddressSpace::AreaRangeIterator it
1278 		= addressSpace->GetAreaRangeIterator(base, size);
1279 			VMArea* area = it.Next();) {
1280 
1281 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1282 
1283 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1284 			return true;
1285 	}
1286 
1287 	return false;
1288 }
1289 
1290 
1291 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1292 	It must be called in a situation where the kernel address space may be
1293 	locked.
1294 */
1295 status_t
1296 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1297 {
1298 	AddressSpaceReadLocker locker;
1299 	VMArea* area;
1300 	status_t status = locker.SetFromArea(id, area);
1301 	if (status != B_OK)
1302 		return status;
1303 
1304 	if (area->page_protections == NULL) {
1305 		status = allocate_area_page_protections(area);
1306 		if (status != B_OK)
1307 			return status;
1308 	}
1309 
1310 	*cookie = (void*)area;
1311 	return B_OK;
1312 }
1313 
1314 
1315 /*!	This is a debug helper function that can only be used with very specific
1316 	use cases.
1317 	Sets protection for the given address range to the protection specified.
1318 	If \a protection is 0 then the involved pages will be marked non-present
1319 	in the translation map to cause a fault on access. The pages aren't
1320 	actually unmapped however so that they can be marked present again with
1321 	additional calls to this function. For this to work the area must be
1322 	fully locked in memory so that the pages aren't otherwise touched.
1323 	This function does not lock the kernel address space and needs to be
1324 	supplied with a \a cookie retrieved from a successful call to
1325 	vm_prepare_kernel_area_debug_protection().
1326 */
1327 status_t
1328 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1329 	uint32 protection)
1330 {
1331 	// check address range
1332 	addr_t address = (addr_t)_address;
1333 	size = PAGE_ALIGN(size);
1334 
1335 	if ((address % B_PAGE_SIZE) != 0
1336 		|| (addr_t)address + size < (addr_t)address
1337 		|| !IS_KERNEL_ADDRESS(address)
1338 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1339 		return B_BAD_VALUE;
1340 	}
1341 
1342 	// Translate the kernel protection to user protection as we only store that.
1343 	if ((protection & B_KERNEL_READ_AREA) != 0)
1344 		protection |= B_READ_AREA;
1345 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1346 		protection |= B_WRITE_AREA;
1347 
1348 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1349 	VMTranslationMap* map = addressSpace->TranslationMap();
1350 	VMArea* area = (VMArea*)cookie;
1351 
1352 	addr_t offset = address - area->Base();
1353 	if (area->Size() - offset < size) {
1354 		panic("protect range not fully within supplied area");
1355 		return B_BAD_VALUE;
1356 	}
1357 
1358 	if (area->page_protections == NULL) {
1359 		panic("area has no page protections");
1360 		return B_BAD_VALUE;
1361 	}
1362 
1363 	// Invalidate the mapping entries so any access to them will fault or
1364 	// restore the mapping entries unchanged so that lookup will success again.
1365 	map->Lock();
1366 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1367 	map->Unlock();
1368 
1369 	// And set the proper page protections so that the fault case will actually
1370 	// fail and not simply try to map a new page.
1371 	for (addr_t pageAddress = address; pageAddress < address + size;
1372 			pageAddress += B_PAGE_SIZE) {
1373 		set_area_page_protection(area, pageAddress, protection);
1374 	}
1375 
1376 	return B_OK;
1377 }
1378 
1379 
1380 status_t
1381 vm_block_address_range(const char* name, void* address, addr_t size)
1382 {
1383 	if (!arch_vm_supports_protection(0))
1384 		return B_NOT_SUPPORTED;
1385 
1386 	AddressSpaceWriteLocker locker;
1387 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1388 	if (status != B_OK)
1389 		return status;
1390 
1391 	VMAddressSpace* addressSpace = locker.AddressSpace();
1392 
1393 	// create an anonymous cache
1394 	VMCache* cache;
1395 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1396 		VM_PRIORITY_SYSTEM);
1397 	if (status != B_OK)
1398 		return status;
1399 
1400 	cache->temporary = 1;
1401 	cache->virtual_end = size;
1402 	cache->Lock();
1403 
1404 	VMArea* area;
1405 	virtual_address_restrictions addressRestrictions = {};
1406 	addressRestrictions.address = address;
1407 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1408 	status = map_backing_store(addressSpace, cache, 0, name, size,
1409 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions,
1410 		true, &area, NULL);
1411 	if (status != B_OK) {
1412 		cache->ReleaseRefAndUnlock();
1413 		return status;
1414 	}
1415 
1416 	cache->Unlock();
1417 	area->cache_type = CACHE_TYPE_RAM;
1418 	return area->id;
1419 }
1420 
1421 
1422 status_t
1423 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1424 {
1425 	AddressSpaceWriteLocker locker(team);
1426 	if (!locker.IsLocked())
1427 		return B_BAD_TEAM_ID;
1428 
1429 	VMAddressSpace* addressSpace = locker.AddressSpace();
1430 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1431 		addressSpace == VMAddressSpace::Kernel()
1432 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1433 }
1434 
1435 
1436 status_t
1437 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1438 	addr_t size, uint32 flags)
1439 {
1440 	if (size == 0)
1441 		return B_BAD_VALUE;
1442 
1443 	AddressSpaceWriteLocker locker(team);
1444 	if (!locker.IsLocked())
1445 		return B_BAD_TEAM_ID;
1446 
1447 	virtual_address_restrictions addressRestrictions = {};
1448 	addressRestrictions.address = *_address;
1449 	addressRestrictions.address_specification = addressSpec;
1450 	VMAddressSpace* addressSpace = locker.AddressSpace();
1451 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1452 		addressSpace == VMAddressSpace::Kernel()
1453 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1454 		_address);
1455 }
1456 
1457 
1458 area_id
1459 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1460 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1461 	const virtual_address_restrictions* virtualAddressRestrictions,
1462 	const physical_address_restrictions* physicalAddressRestrictions,
1463 	bool kernel, void** _address)
1464 {
1465 	VMArea* area;
1466 	VMCache* cache;
1467 	vm_page* page = NULL;
1468 	bool isStack = (protection & B_STACK_AREA) != 0;
1469 	page_num_t guardPages;
1470 	bool canOvercommit = false;
1471 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1472 		? VM_PAGE_ALLOC_CLEAR : 0;
1473 
1474 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1475 		team, name, size));
1476 
1477 	size = PAGE_ALIGN(size);
1478 	guardSize = PAGE_ALIGN(guardSize);
1479 	guardPages = guardSize / B_PAGE_SIZE;
1480 
1481 	if (size == 0 || size < guardSize)
1482 		return B_BAD_VALUE;
1483 	if (!arch_vm_supports_protection(protection))
1484 		return B_NOT_SUPPORTED;
1485 
1486 	if (team == B_CURRENT_TEAM)
1487 		team = VMAddressSpace::CurrentID();
1488 	if (team < 0)
1489 		return B_BAD_TEAM_ID;
1490 
1491 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1492 		canOvercommit = true;
1493 
1494 #ifdef DEBUG_KERNEL_STACKS
1495 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1496 		isStack = true;
1497 #endif
1498 
1499 	// check parameters
1500 	switch (virtualAddressRestrictions->address_specification) {
1501 		case B_ANY_ADDRESS:
1502 		case B_EXACT_ADDRESS:
1503 		case B_BASE_ADDRESS:
1504 		case B_ANY_KERNEL_ADDRESS:
1505 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1506 		case B_RANDOMIZED_ANY_ADDRESS:
1507 		case B_RANDOMIZED_BASE_ADDRESS:
1508 			break;
1509 
1510 		default:
1511 			return B_BAD_VALUE;
1512 	}
1513 
1514 	// If low or high physical address restrictions are given, we force
1515 	// B_CONTIGUOUS wiring, since only then we'll use
1516 	// vm_page_allocate_page_run() which deals with those restrictions.
1517 	if (physicalAddressRestrictions->low_address != 0
1518 		|| physicalAddressRestrictions->high_address != 0) {
1519 		wiring = B_CONTIGUOUS;
1520 	}
1521 
1522 	physical_address_restrictions stackPhysicalRestrictions;
1523 	bool doReserveMemory = false;
1524 	switch (wiring) {
1525 		case B_NO_LOCK:
1526 			break;
1527 		case B_FULL_LOCK:
1528 		case B_LAZY_LOCK:
1529 		case B_CONTIGUOUS:
1530 			doReserveMemory = true;
1531 			break;
1532 		case B_ALREADY_WIRED:
1533 			break;
1534 		case B_LOMEM:
1535 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1536 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1537 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1538 			wiring = B_CONTIGUOUS;
1539 			doReserveMemory = true;
1540 			break;
1541 		case B_32_BIT_FULL_LOCK:
1542 			if (B_HAIKU_PHYSICAL_BITS <= 32
1543 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1544 				wiring = B_FULL_LOCK;
1545 				doReserveMemory = true;
1546 				break;
1547 			}
1548 			// TODO: We don't really support this mode efficiently. Just fall
1549 			// through for now ...
1550 		case B_32_BIT_CONTIGUOUS:
1551 			#if B_HAIKU_PHYSICAL_BITS > 32
1552 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1553 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1554 					stackPhysicalRestrictions.high_address
1555 						= (phys_addr_t)1 << 32;
1556 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1557 				}
1558 			#endif
1559 			wiring = B_CONTIGUOUS;
1560 			doReserveMemory = true;
1561 			break;
1562 		default:
1563 			return B_BAD_VALUE;
1564 	}
1565 
1566 	// Optimization: For a single-page contiguous allocation without low/high
1567 	// memory restriction B_FULL_LOCK wiring suffices.
1568 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1569 		&& physicalAddressRestrictions->low_address == 0
1570 		&& physicalAddressRestrictions->high_address == 0) {
1571 		wiring = B_FULL_LOCK;
1572 	}
1573 
1574 	// For full lock or contiguous areas we're also going to map the pages and
1575 	// thus need to reserve pages for the mapping backend upfront.
1576 	addr_t reservedMapPages = 0;
1577 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1578 		AddressSpaceWriteLocker locker;
1579 		status_t status = locker.SetTo(team);
1580 		if (status != B_OK)
1581 			return status;
1582 
1583 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1584 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1585 	}
1586 
1587 	int priority;
1588 	if (team != VMAddressSpace::KernelID())
1589 		priority = VM_PRIORITY_USER;
1590 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1591 		priority = VM_PRIORITY_VIP;
1592 	else
1593 		priority = VM_PRIORITY_SYSTEM;
1594 
1595 	// Reserve memory before acquiring the address space lock. This reduces the
1596 	// chances of failure, since while holding the write lock to the address
1597 	// space (if it is the kernel address space that is), the low memory handler
1598 	// won't be able to free anything for us.
1599 	addr_t reservedMemory = 0;
1600 	if (doReserveMemory) {
1601 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1602 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1603 			return B_NO_MEMORY;
1604 		reservedMemory = size;
1605 		// TODO: We don't reserve the memory for the pages for the page
1606 		// directories/tables. We actually need to do since we currently don't
1607 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1608 		// there are actually less physical pages than there should be, which
1609 		// can get the VM into trouble in low memory situations.
1610 	}
1611 
1612 	AddressSpaceWriteLocker locker;
1613 	VMAddressSpace* addressSpace;
1614 	status_t status;
1615 
1616 	// For full lock areas reserve the pages before locking the address
1617 	// space. E.g. block caches can't release their memory while we hold the
1618 	// address space lock.
1619 	page_num_t reservedPages = reservedMapPages;
1620 	if (wiring == B_FULL_LOCK)
1621 		reservedPages += size / B_PAGE_SIZE;
1622 
1623 	vm_page_reservation reservation;
1624 	if (reservedPages > 0) {
1625 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1626 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1627 					priority)) {
1628 				reservedPages = 0;
1629 				status = B_WOULD_BLOCK;
1630 				goto err0;
1631 			}
1632 		} else
1633 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1634 	}
1635 
1636 	if (wiring == B_CONTIGUOUS) {
1637 		// we try to allocate the page run here upfront as this may easily
1638 		// fail for obvious reasons
1639 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1640 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1641 		if (page == NULL) {
1642 			status = B_NO_MEMORY;
1643 			goto err0;
1644 		}
1645 	}
1646 
1647 	// Lock the address space and, if B_EXACT_ADDRESS and
1648 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1649 	// is not wired.
1650 	do {
1651 		status = locker.SetTo(team);
1652 		if (status != B_OK)
1653 			goto err1;
1654 
1655 		addressSpace = locker.AddressSpace();
1656 	} while (virtualAddressRestrictions->address_specification
1657 			== B_EXACT_ADDRESS
1658 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1659 		&& wait_if_address_range_is_wired(addressSpace,
1660 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1661 
1662 	// create an anonymous cache
1663 	// if it's a stack, make sure that two pages are available at least
1664 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1665 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1666 		wiring == B_NO_LOCK, priority);
1667 	if (status != B_OK)
1668 		goto err1;
1669 
1670 	cache->temporary = 1;
1671 	cache->virtual_end = size;
1672 	cache->committed_size = reservedMemory;
1673 		// TODO: This should be done via a method.
1674 	reservedMemory = 0;
1675 
1676 	cache->Lock();
1677 
1678 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1679 		protection, 0, REGION_NO_PRIVATE_MAP, flags,
1680 		virtualAddressRestrictions, kernel, &area, _address);
1681 
1682 	if (status != B_OK) {
1683 		cache->ReleaseRefAndUnlock();
1684 		goto err1;
1685 	}
1686 
1687 	locker.DegradeToReadLock();
1688 
1689 	switch (wiring) {
1690 		case B_NO_LOCK:
1691 		case B_LAZY_LOCK:
1692 			// do nothing - the pages are mapped in as needed
1693 			break;
1694 
1695 		case B_FULL_LOCK:
1696 		{
1697 			// Allocate and map all pages for this area
1698 
1699 			off_t offset = 0;
1700 			for (addr_t address = area->Base();
1701 					address < area->Base() + (area->Size() - 1);
1702 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1703 #ifdef DEBUG_KERNEL_STACKS
1704 #	ifdef STACK_GROWS_DOWNWARDS
1705 				if (isStack && address < area->Base()
1706 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1707 #	else
1708 				if (isStack && address >= area->Base() + area->Size()
1709 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1710 #	endif
1711 					continue;
1712 #endif
1713 				vm_page* page = vm_page_allocate_page(&reservation,
1714 					PAGE_STATE_WIRED | pageAllocFlags);
1715 				cache->InsertPage(page, offset);
1716 				map_page(area, page, address, protection, &reservation);
1717 
1718 				DEBUG_PAGE_ACCESS_END(page);
1719 			}
1720 
1721 			break;
1722 		}
1723 
1724 		case B_ALREADY_WIRED:
1725 		{
1726 			// The pages should already be mapped. This is only really useful
1727 			// during boot time. Find the appropriate vm_page objects and stick
1728 			// them in the cache object.
1729 			VMTranslationMap* map = addressSpace->TranslationMap();
1730 			off_t offset = 0;
1731 
1732 			if (!gKernelStartup)
1733 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1734 
1735 			map->Lock();
1736 
1737 			for (addr_t virtualAddress = area->Base();
1738 					virtualAddress < area->Base() + (area->Size() - 1);
1739 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1740 				phys_addr_t physicalAddress;
1741 				uint32 flags;
1742 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1743 				if (status < B_OK) {
1744 					panic("looking up mapping failed for va 0x%lx\n",
1745 						virtualAddress);
1746 				}
1747 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1748 				if (page == NULL) {
1749 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1750 						"\n", physicalAddress);
1751 				}
1752 
1753 				DEBUG_PAGE_ACCESS_START(page);
1754 
1755 				cache->InsertPage(page, offset);
1756 				increment_page_wired_count(page);
1757 				vm_page_set_state(page, PAGE_STATE_WIRED);
1758 				page->busy = false;
1759 
1760 				DEBUG_PAGE_ACCESS_END(page);
1761 			}
1762 
1763 			map->Unlock();
1764 			break;
1765 		}
1766 
1767 		case B_CONTIGUOUS:
1768 		{
1769 			// We have already allocated our continuous pages run, so we can now
1770 			// just map them in the address space
1771 			VMTranslationMap* map = addressSpace->TranslationMap();
1772 			phys_addr_t physicalAddress
1773 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1774 			addr_t virtualAddress = area->Base();
1775 			off_t offset = 0;
1776 
1777 			map->Lock();
1778 
1779 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1780 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1781 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1782 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1783 				if (page == NULL)
1784 					panic("couldn't lookup physical page just allocated\n");
1785 
1786 				status = map->Map(virtualAddress, physicalAddress, protection,
1787 					area->MemoryType(), &reservation);
1788 				if (status < B_OK)
1789 					panic("couldn't map physical page in page run\n");
1790 
1791 				cache->InsertPage(page, offset);
1792 				increment_page_wired_count(page);
1793 
1794 				DEBUG_PAGE_ACCESS_END(page);
1795 			}
1796 
1797 			map->Unlock();
1798 			break;
1799 		}
1800 
1801 		default:
1802 			break;
1803 	}
1804 
1805 	cache->Unlock();
1806 
1807 	if (reservedPages > 0)
1808 		vm_page_unreserve_pages(&reservation);
1809 
1810 	TRACE(("vm_create_anonymous_area: done\n"));
1811 
1812 	area->cache_type = CACHE_TYPE_RAM;
1813 	return area->id;
1814 
1815 err1:
1816 	if (wiring == B_CONTIGUOUS) {
1817 		// we had reserved the area space upfront...
1818 		phys_addr_t pageNumber = page->physical_page_number;
1819 		int32 i;
1820 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1821 			page = vm_lookup_page(pageNumber);
1822 			if (page == NULL)
1823 				panic("couldn't lookup physical page just allocated\n");
1824 
1825 			vm_page_set_state(page, PAGE_STATE_FREE);
1826 		}
1827 	}
1828 
1829 err0:
1830 	if (reservedPages > 0)
1831 		vm_page_unreserve_pages(&reservation);
1832 	if (reservedMemory > 0)
1833 		vm_unreserve_memory(reservedMemory);
1834 
1835 	return status;
1836 }
1837 
1838 
1839 area_id
1840 vm_map_physical_memory(team_id team, const char* name, void** _address,
1841 	uint32 addressSpec, addr_t size, uint32 protection,
1842 	phys_addr_t physicalAddress, bool alreadyWired)
1843 {
1844 	VMArea* area;
1845 	VMCache* cache;
1846 	addr_t mapOffset;
1847 
1848 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1849 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1850 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1851 		addressSpec, size, protection, physicalAddress));
1852 
1853 	if (!arch_vm_supports_protection(protection))
1854 		return B_NOT_SUPPORTED;
1855 
1856 	AddressSpaceWriteLocker locker(team);
1857 	if (!locker.IsLocked())
1858 		return B_BAD_TEAM_ID;
1859 
1860 	// if the physical address is somewhat inside a page,
1861 	// move the actual area down to align on a page boundary
1862 	mapOffset = physicalAddress % B_PAGE_SIZE;
1863 	size += mapOffset;
1864 	physicalAddress -= mapOffset;
1865 
1866 	size = PAGE_ALIGN(size);
1867 
1868 	// create a device cache
1869 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1870 	if (status != B_OK)
1871 		return status;
1872 
1873 	cache->virtual_end = size;
1874 
1875 	cache->Lock();
1876 
1877 	virtual_address_restrictions addressRestrictions = {};
1878 	addressRestrictions.address = *_address;
1879 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1880 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1881 		B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1882 		true, &area, _address);
1883 
1884 	if (status < B_OK)
1885 		cache->ReleaseRefLocked();
1886 
1887 	cache->Unlock();
1888 
1889 	if (status == B_OK) {
1890 		// set requested memory type -- use uncached, if not given
1891 		uint32 memoryType = addressSpec & B_MTR_MASK;
1892 		if (memoryType == 0)
1893 			memoryType = B_MTR_UC;
1894 
1895 		area->SetMemoryType(memoryType);
1896 
1897 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1898 		if (status != B_OK)
1899 			delete_area(locker.AddressSpace(), area, false);
1900 	}
1901 
1902 	if (status != B_OK)
1903 		return status;
1904 
1905 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1906 
1907 	if (alreadyWired) {
1908 		// The area is already mapped, but possibly not with the right
1909 		// memory type.
1910 		map->Lock();
1911 		map->ProtectArea(area, area->protection);
1912 		map->Unlock();
1913 	} else {
1914 		// Map the area completely.
1915 
1916 		// reserve pages needed for the mapping
1917 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1918 			area->Base() + (size - 1));
1919 		vm_page_reservation reservation;
1920 		vm_page_reserve_pages(&reservation, reservePages,
1921 			team == VMAddressSpace::KernelID()
1922 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1923 
1924 		map->Lock();
1925 
1926 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1927 			map->Map(area->Base() + offset, physicalAddress + offset,
1928 				protection, area->MemoryType(), &reservation);
1929 		}
1930 
1931 		map->Unlock();
1932 
1933 		vm_page_unreserve_pages(&reservation);
1934 	}
1935 
1936 	// modify the pointer returned to be offset back into the new area
1937 	// the same way the physical address in was offset
1938 	*_address = (void*)((addr_t)*_address + mapOffset);
1939 
1940 	area->cache_type = CACHE_TYPE_DEVICE;
1941 	return area->id;
1942 }
1943 
1944 
1945 /*!	Don't use!
1946 	TODO: This function was introduced to map physical page vecs to
1947 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1948 	use a device cache and does not track vm_page::wired_count!
1949 */
1950 area_id
1951 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1952 	uint32 addressSpec, addr_t* _size, uint32 protection,
1953 	struct generic_io_vec* vecs, uint32 vecCount)
1954 {
1955 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1956 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1957 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1958 		addressSpec, _size, protection, vecs, vecCount));
1959 
1960 	if (!arch_vm_supports_protection(protection)
1961 		|| (addressSpec & B_MTR_MASK) != 0) {
1962 		return B_NOT_SUPPORTED;
1963 	}
1964 
1965 	AddressSpaceWriteLocker locker(team);
1966 	if (!locker.IsLocked())
1967 		return B_BAD_TEAM_ID;
1968 
1969 	if (vecCount == 0)
1970 		return B_BAD_VALUE;
1971 
1972 	addr_t size = 0;
1973 	for (uint32 i = 0; i < vecCount; i++) {
1974 		if (vecs[i].base % B_PAGE_SIZE != 0
1975 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1976 			return B_BAD_VALUE;
1977 		}
1978 
1979 		size += vecs[i].length;
1980 	}
1981 
1982 	// create a device cache
1983 	VMCache* cache;
1984 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1985 	if (result != B_OK)
1986 		return result;
1987 
1988 	cache->virtual_end = size;
1989 
1990 	cache->Lock();
1991 
1992 	VMArea* area;
1993 	virtual_address_restrictions addressRestrictions = {};
1994 	addressRestrictions.address = *_address;
1995 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1996 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1997 		size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0,
1998 		&addressRestrictions, true, &area, _address);
1999 
2000 	if (result != B_OK)
2001 		cache->ReleaseRefLocked();
2002 
2003 	cache->Unlock();
2004 
2005 	if (result != B_OK)
2006 		return result;
2007 
2008 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2009 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
2010 		area->Base() + (size - 1));
2011 
2012 	vm_page_reservation reservation;
2013 	vm_page_reserve_pages(&reservation, reservePages,
2014 			team == VMAddressSpace::KernelID()
2015 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2016 	map->Lock();
2017 
2018 	uint32 vecIndex = 0;
2019 	size_t vecOffset = 0;
2020 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
2021 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
2022 			vecOffset = 0;
2023 			vecIndex++;
2024 		}
2025 
2026 		if (vecIndex >= vecCount)
2027 			break;
2028 
2029 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
2030 			protection, area->MemoryType(), &reservation);
2031 
2032 		vecOffset += B_PAGE_SIZE;
2033 	}
2034 
2035 	map->Unlock();
2036 	vm_page_unreserve_pages(&reservation);
2037 
2038 	if (_size != NULL)
2039 		*_size = size;
2040 
2041 	area->cache_type = CACHE_TYPE_DEVICE;
2042 	return area->id;
2043 }
2044 
2045 
2046 area_id
2047 vm_create_null_area(team_id team, const char* name, void** address,
2048 	uint32 addressSpec, addr_t size, uint32 flags)
2049 {
2050 	size = PAGE_ALIGN(size);
2051 
2052 	// Lock the address space and, if B_EXACT_ADDRESS and
2053 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
2054 	// is not wired.
2055 	AddressSpaceWriteLocker locker;
2056 	do {
2057 		if (locker.SetTo(team) != B_OK)
2058 			return B_BAD_TEAM_ID;
2059 	} while (addressSpec == B_EXACT_ADDRESS
2060 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
2061 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2062 			(addr_t)*address, size, &locker));
2063 
2064 	// create a null cache
2065 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
2066 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
2067 	VMCache* cache;
2068 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
2069 	if (status != B_OK)
2070 		return status;
2071 
2072 	cache->temporary = 1;
2073 	cache->virtual_end = size;
2074 
2075 	cache->Lock();
2076 
2077 	VMArea* area;
2078 	virtual_address_restrictions addressRestrictions = {};
2079 	addressRestrictions.address = *address;
2080 	addressRestrictions.address_specification = addressSpec;
2081 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
2082 		B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA,
2083 		REGION_NO_PRIVATE_MAP, flags,
2084 		&addressRestrictions, true, &area, address);
2085 
2086 	if (status < B_OK) {
2087 		cache->ReleaseRefAndUnlock();
2088 		return status;
2089 	}
2090 
2091 	cache->Unlock();
2092 
2093 	area->cache_type = CACHE_TYPE_NULL;
2094 	return area->id;
2095 }
2096 
2097 
2098 /*!	Creates the vnode cache for the specified \a vnode.
2099 	The vnode has to be marked busy when calling this function.
2100 */
2101 status_t
2102 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
2103 {
2104 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
2105 }
2106 
2107 
2108 /*!	\a cache must be locked. The area's address space must be read-locked.
2109 */
2110 static void
2111 pre_map_area_pages(VMArea* area, VMCache* cache,
2112 	vm_page_reservation* reservation)
2113 {
2114 	addr_t baseAddress = area->Base();
2115 	addr_t cacheOffset = area->cache_offset;
2116 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
2117 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
2118 
2119 	for (VMCachePagesTree::Iterator it
2120 				= cache->pages.GetIterator(firstPage, true, true);
2121 			vm_page* page = it.Next();) {
2122 		if (page->cache_offset >= endPage)
2123 			break;
2124 
2125 		// skip busy and inactive pages
2126 		if (page->busy || page->usage_count == 0)
2127 			continue;
2128 
2129 		DEBUG_PAGE_ACCESS_START(page);
2130 		map_page(area, page,
2131 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
2132 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
2133 		DEBUG_PAGE_ACCESS_END(page);
2134 	}
2135 }
2136 
2137 
2138 /*!	Will map the file specified by \a fd to an area in memory.
2139 	The file will be mirrored beginning at the specified \a offset. The
2140 	\a offset and \a size arguments have to be page aligned.
2141 */
2142 static area_id
2143 _vm_map_file(team_id team, const char* name, void** _address,
2144 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2145 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2146 {
2147 	// TODO: for binary files, we want to make sure that they get the
2148 	//	copy of a file at a given time, ie. later changes should not
2149 	//	make it into the mapped copy -- this will need quite some changes
2150 	//	to be done in a nice way
2151 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
2152 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
2153 
2154 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2155 	size = PAGE_ALIGN(size);
2156 
2157 	if (mapping == REGION_NO_PRIVATE_MAP)
2158 		protection |= B_SHARED_AREA;
2159 	if (addressSpec != B_EXACT_ADDRESS)
2160 		unmapAddressRange = false;
2161 
2162 	uint32 mappingFlags = 0;
2163 	if (unmapAddressRange)
2164 		mappingFlags |= CREATE_AREA_UNMAP_ADDRESS_RANGE;
2165 
2166 	if (fd < 0) {
2167 		virtual_address_restrictions virtualRestrictions = {};
2168 		virtualRestrictions.address = *_address;
2169 		virtualRestrictions.address_specification = addressSpec;
2170 		physical_address_restrictions physicalRestrictions = {};
2171 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
2172 			mappingFlags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
2173 			_address);
2174 	}
2175 
2176 	// get the open flags of the FD
2177 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2178 	if (descriptor == NULL)
2179 		return EBADF;
2180 	int32 openMode = descriptor->open_mode;
2181 	put_fd(descriptor);
2182 
2183 	// The FD must open for reading at any rate. For shared mapping with write
2184 	// access, additionally the FD must be open for writing.
2185 	if ((openMode & O_ACCMODE) == O_WRONLY
2186 		|| (mapping == REGION_NO_PRIVATE_MAP
2187 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2188 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2189 		return EACCES;
2190 	}
2191 
2192 	uint32 protectionMax = 0;
2193 	if (mapping == REGION_NO_PRIVATE_MAP) {
2194 		if ((openMode & O_ACCMODE) == O_RDWR)
2195 			protectionMax = protection | B_USER_PROTECTION;
2196 		else
2197 			protectionMax = protection | (B_USER_PROTECTION & ~B_WRITE_AREA);
2198 	} else if (mapping == REGION_PRIVATE_MAP) {
2199 		// For privately mapped read-only regions, skip committing memory.
2200 		// (If protections are changed later on, memory will be committed then.)
2201 		if ((protection & B_WRITE_AREA) == 0)
2202 			mappingFlags |= CREATE_AREA_DONT_COMMIT_MEMORY;
2203 	}
2204 
2205 	// get the vnode for the object, this also grabs a ref to it
2206 	struct vnode* vnode = NULL;
2207 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2208 	if (status < B_OK)
2209 		return status;
2210 	VnodePutter vnodePutter(vnode);
2211 
2212 	// If we're going to pre-map pages, we need to reserve the pages needed by
2213 	// the mapping backend upfront.
2214 	page_num_t reservedPreMapPages = 0;
2215 	vm_page_reservation reservation;
2216 	if ((protection & B_READ_AREA) != 0) {
2217 		AddressSpaceWriteLocker locker;
2218 		status = locker.SetTo(team);
2219 		if (status != B_OK)
2220 			return status;
2221 
2222 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2223 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2224 
2225 		locker.Unlock();
2226 
2227 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2228 			team == VMAddressSpace::KernelID()
2229 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2230 	}
2231 
2232 	struct PageUnreserver {
2233 		PageUnreserver(vm_page_reservation* reservation)
2234 			:
2235 			fReservation(reservation)
2236 		{
2237 		}
2238 
2239 		~PageUnreserver()
2240 		{
2241 			if (fReservation != NULL)
2242 				vm_page_unreserve_pages(fReservation);
2243 		}
2244 
2245 		vm_page_reservation* fReservation;
2246 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2247 
2248 	// Lock the address space and, if the specified address range shall be
2249 	// unmapped, ensure it is not wired.
2250 	AddressSpaceWriteLocker locker;
2251 	do {
2252 		if (locker.SetTo(team) != B_OK)
2253 			return B_BAD_TEAM_ID;
2254 	} while (unmapAddressRange
2255 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2256 			(addr_t)*_address, size, &locker));
2257 
2258 	// TODO: this only works for file systems that use the file cache
2259 	VMCache* cache;
2260 	status = vfs_get_vnode_cache(vnode, &cache, false);
2261 	if (status < B_OK)
2262 		return status;
2263 
2264 	cache->Lock();
2265 
2266 	VMArea* area;
2267 	virtual_address_restrictions addressRestrictions = {};
2268 	addressRestrictions.address = *_address;
2269 	addressRestrictions.address_specification = addressSpec;
2270 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2271 		0, protection, protectionMax, mapping, mappingFlags,
2272 		&addressRestrictions, kernel, &area, _address);
2273 
2274 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2275 		// map_backing_store() cannot know we no longer need the ref
2276 		cache->ReleaseRefLocked();
2277 	}
2278 
2279 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2280 		pre_map_area_pages(area, cache, &reservation);
2281 
2282 	cache->Unlock();
2283 
2284 	if (status == B_OK) {
2285 		// TODO: this probably deserves a smarter solution, ie. don't always
2286 		// prefetch stuff, and also, probably don't trigger it at this place.
2287 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2288 			// prefetches at max 10 MB starting from "offset"
2289 	}
2290 
2291 	if (status != B_OK)
2292 		return status;
2293 
2294 	area->cache_type = CACHE_TYPE_VNODE;
2295 	return area->id;
2296 }
2297 
2298 
2299 area_id
2300 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2301 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2302 	int fd, off_t offset)
2303 {
2304 	if (!arch_vm_supports_protection(protection))
2305 		return B_NOT_SUPPORTED;
2306 
2307 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2308 		mapping, unmapAddressRange, fd, offset, true);
2309 }
2310 
2311 
2312 VMCache*
2313 vm_area_get_locked_cache(VMArea* area)
2314 {
2315 	rw_lock_read_lock(&sAreaCacheLock);
2316 
2317 	while (true) {
2318 		VMCache* cache = area->cache;
2319 
2320 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2321 			// cache has been deleted
2322 			rw_lock_read_lock(&sAreaCacheLock);
2323 			continue;
2324 		}
2325 
2326 		rw_lock_read_lock(&sAreaCacheLock);
2327 
2328 		if (cache == area->cache) {
2329 			cache->AcquireRefLocked();
2330 			rw_lock_read_unlock(&sAreaCacheLock);
2331 			return cache;
2332 		}
2333 
2334 		// the cache changed in the meantime
2335 		cache->Unlock();
2336 	}
2337 }
2338 
2339 
2340 void
2341 vm_area_put_locked_cache(VMCache* cache)
2342 {
2343 	cache->ReleaseRefAndUnlock();
2344 }
2345 
2346 
2347 area_id
2348 vm_clone_area(team_id team, const char* name, void** address,
2349 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2350 	bool kernel)
2351 {
2352 	VMArea* newArea = NULL;
2353 	VMArea* sourceArea;
2354 
2355 	// Check whether the source area exists and is cloneable. If so, mark it
2356 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2357 	{
2358 		AddressSpaceWriteLocker locker;
2359 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2360 		if (status != B_OK)
2361 			return status;
2362 
2363 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2364 			return B_NOT_ALLOWED;
2365 
2366 		sourceArea->protection |= B_SHARED_AREA;
2367 		protection |= B_SHARED_AREA;
2368 	}
2369 
2370 	// Now lock both address spaces and actually do the cloning.
2371 
2372 	MultiAddressSpaceLocker locker;
2373 	VMAddressSpace* sourceAddressSpace;
2374 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2375 	if (status != B_OK)
2376 		return status;
2377 
2378 	VMAddressSpace* targetAddressSpace;
2379 	status = locker.AddTeam(team, true, &targetAddressSpace);
2380 	if (status != B_OK)
2381 		return status;
2382 
2383 	status = locker.Lock();
2384 	if (status != B_OK)
2385 		return status;
2386 
2387 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2388 	if (sourceArea == NULL)
2389 		return B_BAD_VALUE;
2390 
2391 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2392 		return B_NOT_ALLOWED;
2393 
2394 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2395 
2396 	if (!kernel && sourceAddressSpace != targetAddressSpace
2397 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2398 #if KDEBUG
2399 		Team* team = thread_get_current_thread()->team;
2400 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2401 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2402 #endif
2403 		status = B_NOT_ALLOWED;
2404 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2405 		status = B_NOT_ALLOWED;
2406 	} else {
2407 		virtual_address_restrictions addressRestrictions = {};
2408 		addressRestrictions.address = *address;
2409 		addressRestrictions.address_specification = addressSpec;
2410 		status = map_backing_store(targetAddressSpace, cache,
2411 			sourceArea->cache_offset, name, sourceArea->Size(),
2412 			sourceArea->wiring, protection, sourceArea->protection_max,
2413 			mapping, 0, &addressRestrictions,
2414 			kernel, &newArea, address);
2415 	}
2416 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2417 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2418 		// to create a new cache, and has therefore already acquired a reference
2419 		// to the source cache - but otherwise it has no idea that we need
2420 		// one.
2421 		cache->AcquireRefLocked();
2422 	}
2423 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2424 		// we need to map in everything at this point
2425 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2426 			// we don't have actual pages to map but a physical area
2427 			VMTranslationMap* map
2428 				= sourceArea->address_space->TranslationMap();
2429 			map->Lock();
2430 
2431 			phys_addr_t physicalAddress;
2432 			uint32 oldProtection;
2433 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2434 
2435 			map->Unlock();
2436 
2437 			map = targetAddressSpace->TranslationMap();
2438 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2439 				newArea->Base() + (newArea->Size() - 1));
2440 
2441 			vm_page_reservation reservation;
2442 			vm_page_reserve_pages(&reservation, reservePages,
2443 				targetAddressSpace == VMAddressSpace::Kernel()
2444 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2445 			map->Lock();
2446 
2447 			for (addr_t offset = 0; offset < newArea->Size();
2448 					offset += B_PAGE_SIZE) {
2449 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2450 					protection, newArea->MemoryType(), &reservation);
2451 			}
2452 
2453 			map->Unlock();
2454 			vm_page_unreserve_pages(&reservation);
2455 		} else {
2456 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2457 			size_t reservePages = map->MaxPagesNeededToMap(
2458 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2459 			vm_page_reservation reservation;
2460 			vm_page_reserve_pages(&reservation, reservePages,
2461 				targetAddressSpace == VMAddressSpace::Kernel()
2462 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2463 
2464 			// map in all pages from source
2465 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2466 					vm_page* page  = it.Next();) {
2467 				if (!page->busy) {
2468 					DEBUG_PAGE_ACCESS_START(page);
2469 					map_page(newArea, page,
2470 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2471 							- newArea->cache_offset),
2472 						protection, &reservation);
2473 					DEBUG_PAGE_ACCESS_END(page);
2474 				}
2475 			}
2476 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2477 			// ensuring that!
2478 
2479 			vm_page_unreserve_pages(&reservation);
2480 		}
2481 	}
2482 	if (status == B_OK)
2483 		newArea->cache_type = sourceArea->cache_type;
2484 
2485 	vm_area_put_locked_cache(cache);
2486 
2487 	if (status < B_OK)
2488 		return status;
2489 
2490 	return newArea->id;
2491 }
2492 
2493 
2494 /*!	Deletes the specified area of the given address space.
2495 
2496 	The address space must be write-locked.
2497 	The caller must ensure that the area does not have any wired ranges.
2498 
2499 	\param addressSpace The address space containing the area.
2500 	\param area The area to be deleted.
2501 	\param deletingAddressSpace \c true, if the address space is in the process
2502 		of being deleted.
2503 */
2504 static void
2505 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2506 	bool deletingAddressSpace)
2507 {
2508 	ASSERT(!area->IsWired());
2509 
2510 	VMAreas::Remove(area);
2511 
2512 	// At this point the area is removed from the global hash table, but
2513 	// still exists in the area list.
2514 
2515 	// Unmap the virtual address space the area occupied.
2516 	{
2517 		// We need to lock the complete cache chain.
2518 		VMCache* topCache = vm_area_get_locked_cache(area);
2519 		VMCacheChainLocker cacheChainLocker(topCache);
2520 		cacheChainLocker.LockAllSourceCaches();
2521 
2522 		// If the area's top cache is a temporary cache and the area is the only
2523 		// one referencing it (besides us currently holding a second reference),
2524 		// the unmapping code doesn't need to care about preserving the accessed
2525 		// and dirty flags of the top cache page mappings.
2526 		bool ignoreTopCachePageFlags
2527 			= topCache->temporary && topCache->RefCount() == 2;
2528 
2529 		area->address_space->TranslationMap()->UnmapArea(area,
2530 			deletingAddressSpace, ignoreTopCachePageFlags);
2531 	}
2532 
2533 	if (!area->cache->temporary)
2534 		area->cache->WriteModified();
2535 
2536 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2537 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2538 
2539 	arch_vm_unset_memory_type(area);
2540 	addressSpace->RemoveArea(area, allocationFlags);
2541 	addressSpace->Put();
2542 
2543 	area->cache->RemoveArea(area);
2544 	area->cache->ReleaseRef();
2545 
2546 	addressSpace->DeleteArea(area, allocationFlags);
2547 }
2548 
2549 
2550 status_t
2551 vm_delete_area(team_id team, area_id id, bool kernel)
2552 {
2553 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2554 		team, id));
2555 
2556 	// lock the address space and make sure the area isn't wired
2557 	AddressSpaceWriteLocker locker;
2558 	VMArea* area;
2559 	AreaCacheLocker cacheLocker;
2560 
2561 	do {
2562 		status_t status = locker.SetFromArea(team, id, area);
2563 		if (status != B_OK)
2564 			return status;
2565 
2566 		cacheLocker.SetTo(area);
2567 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2568 
2569 	cacheLocker.Unlock();
2570 
2571 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2572 		return B_NOT_ALLOWED;
2573 
2574 	delete_area(locker.AddressSpace(), area, false);
2575 	return B_OK;
2576 }
2577 
2578 
2579 /*!	Creates a new cache on top of given cache, moves all areas from
2580 	the old cache to the new one, and changes the protection of all affected
2581 	areas' pages to read-only. If requested, wired pages are moved up to the
2582 	new cache and copies are added to the old cache in their place.
2583 	Preconditions:
2584 	- The given cache must be locked.
2585 	- All of the cache's areas' address spaces must be read locked.
2586 	- Either the cache must not have any wired ranges or a page reservation for
2587 	  all wired pages must be provided, so they can be copied.
2588 
2589 	\param lowerCache The cache on top of which a new cache shall be created.
2590 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2591 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2592 		has wired page. The wired pages are copied in this case.
2593 */
2594 static status_t
2595 vm_copy_on_write_area(VMCache* lowerCache,
2596 	vm_page_reservation* wiredPagesReservation)
2597 {
2598 	VMCache* upperCache;
2599 
2600 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2601 
2602 	// We need to separate the cache from its areas. The cache goes one level
2603 	// deeper and we create a new cache inbetween.
2604 
2605 	// create an anonymous cache
2606 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2607 		lowerCache->GuardSize() / B_PAGE_SIZE,
2608 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2609 		VM_PRIORITY_USER);
2610 	if (status != B_OK)
2611 		return status;
2612 
2613 	upperCache->Lock();
2614 
2615 	upperCache->temporary = 1;
2616 	upperCache->virtual_base = lowerCache->virtual_base;
2617 	upperCache->virtual_end = lowerCache->virtual_end;
2618 
2619 	// transfer the lower cache areas to the upper cache
2620 	rw_lock_write_lock(&sAreaCacheLock);
2621 	upperCache->TransferAreas(lowerCache);
2622 	rw_lock_write_unlock(&sAreaCacheLock);
2623 
2624 	lowerCache->AddConsumer(upperCache);
2625 
2626 	// We now need to remap all pages from all of the cache's areas read-only,
2627 	// so that a copy will be created on next write access. If there are wired
2628 	// pages, we keep their protection, move them to the upper cache and create
2629 	// copies for the lower cache.
2630 	if (wiredPagesReservation != NULL) {
2631 		// We need to handle wired pages -- iterate through the cache's pages.
2632 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2633 				vm_page* page = it.Next();) {
2634 			if (page->WiredCount() > 0) {
2635 				// allocate a new page and copy the wired one
2636 				vm_page* copiedPage = vm_page_allocate_page(
2637 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2638 
2639 				vm_memcpy_physical_page(
2640 					copiedPage->physical_page_number * B_PAGE_SIZE,
2641 					page->physical_page_number * B_PAGE_SIZE);
2642 
2643 				// move the wired page to the upper cache (note: removing is OK
2644 				// with the SplayTree iterator) and insert the copy
2645 				upperCache->MovePage(page);
2646 				lowerCache->InsertPage(copiedPage,
2647 					page->cache_offset * B_PAGE_SIZE);
2648 
2649 				DEBUG_PAGE_ACCESS_END(copiedPage);
2650 			} else {
2651 				// Change the protection of this page in all areas.
2652 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2653 						tempArea = tempArea->cache_next) {
2654 					if (!is_page_in_area(tempArea, page))
2655 						continue;
2656 
2657 					// The area must be readable in the same way it was
2658 					// previously writable.
2659 					addr_t address = virtual_page_address(tempArea, page);
2660 					uint32 protection = 0;
2661 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2662 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2663 						protection |= B_KERNEL_READ_AREA;
2664 					if ((pageProtection & B_READ_AREA) != 0)
2665 						protection |= B_READ_AREA;
2666 
2667 					VMTranslationMap* map
2668 						= tempArea->address_space->TranslationMap();
2669 					map->Lock();
2670 					map->ProtectPage(tempArea, address, protection);
2671 					map->Unlock();
2672 				}
2673 			}
2674 		}
2675 	} else {
2676 		ASSERT(lowerCache->WiredPagesCount() == 0);
2677 
2678 		// just change the protection of all areas
2679 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2680 				tempArea = tempArea->cache_next) {
2681 			if (tempArea->page_protections != NULL) {
2682 				// Change the protection of all pages in this area.
2683 				VMTranslationMap* map = tempArea->address_space->TranslationMap();
2684 				map->Lock();
2685 				for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2686 					vm_page* page = it.Next();) {
2687 					if (!is_page_in_area(tempArea, page))
2688 						continue;
2689 
2690 					// The area must be readable in the same way it was
2691 					// previously writable.
2692 					addr_t address = virtual_page_address(tempArea, page);
2693 					uint32 protection = 0;
2694 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2695 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2696 						protection |= B_KERNEL_READ_AREA;
2697 					if ((pageProtection & B_READ_AREA) != 0)
2698 						protection |= B_READ_AREA;
2699 
2700 					map->ProtectPage(tempArea, address, protection);
2701 				}
2702 				map->Unlock();
2703 				continue;
2704 			}
2705 			// The area must be readable in the same way it was previously
2706 			// writable.
2707 			uint32 protection = 0;
2708 			if ((tempArea->protection & B_KERNEL_READ_AREA) != 0)
2709 				protection |= B_KERNEL_READ_AREA;
2710 			if ((tempArea->protection & B_READ_AREA) != 0)
2711 				protection |= B_READ_AREA;
2712 
2713 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2714 			map->Lock();
2715 			map->ProtectArea(tempArea, protection);
2716 			map->Unlock();
2717 		}
2718 	}
2719 
2720 	vm_area_put_locked_cache(upperCache);
2721 
2722 	return B_OK;
2723 }
2724 
2725 
2726 area_id
2727 vm_copy_area(team_id team, const char* name, void** _address,
2728 	uint32 addressSpec, area_id sourceID)
2729 {
2730 	// Do the locking: target address space, all address spaces associated with
2731 	// the source cache, and the cache itself.
2732 	MultiAddressSpaceLocker locker;
2733 	VMAddressSpace* targetAddressSpace;
2734 	VMCache* cache;
2735 	VMArea* source;
2736 	AreaCacheLocker cacheLocker;
2737 	status_t status;
2738 	bool sharedArea;
2739 
2740 	page_num_t wiredPages = 0;
2741 	vm_page_reservation wiredPagesReservation;
2742 
2743 	bool restart;
2744 	do {
2745 		restart = false;
2746 
2747 		locker.Unset();
2748 		status = locker.AddTeam(team, true, &targetAddressSpace);
2749 		if (status == B_OK) {
2750 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2751 				&cache);
2752 		}
2753 		if (status != B_OK)
2754 			return status;
2755 
2756 		cacheLocker.SetTo(cache, true);	// already locked
2757 
2758 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2759 
2760 		page_num_t oldWiredPages = wiredPages;
2761 		wiredPages = 0;
2762 
2763 		// If the source area isn't shared, count the number of wired pages in
2764 		// the cache and reserve as many pages.
2765 		if (!sharedArea) {
2766 			wiredPages = cache->WiredPagesCount();
2767 
2768 			if (wiredPages > oldWiredPages) {
2769 				cacheLocker.Unlock();
2770 				locker.Unlock();
2771 
2772 				if (oldWiredPages > 0)
2773 					vm_page_unreserve_pages(&wiredPagesReservation);
2774 
2775 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2776 					VM_PRIORITY_USER);
2777 
2778 				restart = true;
2779 			}
2780 		} else if (oldWiredPages > 0)
2781 			vm_page_unreserve_pages(&wiredPagesReservation);
2782 	} while (restart);
2783 
2784 	// unreserve pages later
2785 	struct PagesUnreserver {
2786 		PagesUnreserver(vm_page_reservation* reservation)
2787 			:
2788 			fReservation(reservation)
2789 		{
2790 		}
2791 
2792 		~PagesUnreserver()
2793 		{
2794 			if (fReservation != NULL)
2795 				vm_page_unreserve_pages(fReservation);
2796 		}
2797 
2798 	private:
2799 		vm_page_reservation*	fReservation;
2800 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2801 
2802 	bool writableCopy
2803 		= (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2804 	uint8* targetPageProtections = NULL;
2805 
2806 	if (source->page_protections != NULL) {
2807 		size_t bytes = area_page_protections_size(source->Size());
2808 		targetPageProtections = (uint8*)malloc_etc(bytes,
2809 			(source->address_space == VMAddressSpace::Kernel()
2810 					|| targetAddressSpace == VMAddressSpace::Kernel())
2811 				? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
2812 		if (targetPageProtections == NULL)
2813 			return B_NO_MEMORY;
2814 
2815 		memcpy(targetPageProtections, source->page_protections, bytes);
2816 
2817 		if (!writableCopy) {
2818 			for (size_t i = 0; i < bytes; i++) {
2819 				if ((targetPageProtections[i]
2820 						& (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) {
2821 					writableCopy = true;
2822 					break;
2823 				}
2824 			}
2825 		}
2826 	}
2827 
2828 	if (addressSpec == B_CLONE_ADDRESS) {
2829 		addressSpec = B_EXACT_ADDRESS;
2830 		*_address = (void*)source->Base();
2831 	}
2832 
2833 	// First, create a cache on top of the source area, respectively use the
2834 	// existing one, if this is a shared area.
2835 
2836 	VMArea* target;
2837 	virtual_address_restrictions addressRestrictions = {};
2838 	addressRestrictions.address = *_address;
2839 	addressRestrictions.address_specification = addressSpec;
2840 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2841 		name, source->Size(), source->wiring, source->protection,
2842 		source->protection_max,
2843 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2844 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2845 		&addressRestrictions, true, &target, _address);
2846 	if (status < B_OK) {
2847 		free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE);
2848 		return status;
2849 	}
2850 
2851 	if (targetPageProtections != NULL)
2852 		target->page_protections = targetPageProtections;
2853 
2854 	if (sharedArea) {
2855 		// The new area uses the old area's cache, but map_backing_store()
2856 		// hasn't acquired a ref. So we have to do that now.
2857 		cache->AcquireRefLocked();
2858 	}
2859 
2860 	// If the source area is writable, we need to move it one layer up as well
2861 
2862 	if (!sharedArea) {
2863 		if (writableCopy) {
2864 			// TODO: do something more useful if this fails!
2865 			if (vm_copy_on_write_area(cache,
2866 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2867 				panic("vm_copy_on_write_area() failed!\n");
2868 			}
2869 		}
2870 	}
2871 
2872 	// we return the ID of the newly created area
2873 	return target->id;
2874 }
2875 
2876 
2877 status_t
2878 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2879 	bool kernel)
2880 {
2881 	fix_protection(&newProtection);
2882 
2883 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2884 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2885 
2886 	if (!arch_vm_supports_protection(newProtection))
2887 		return B_NOT_SUPPORTED;
2888 
2889 	bool becomesWritable
2890 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2891 
2892 	// lock address spaces and cache
2893 	MultiAddressSpaceLocker locker;
2894 	VMCache* cache;
2895 	VMArea* area;
2896 	status_t status;
2897 	AreaCacheLocker cacheLocker;
2898 	bool isWritable;
2899 
2900 	bool restart;
2901 	do {
2902 		restart = false;
2903 
2904 		locker.Unset();
2905 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2906 		if (status != B_OK)
2907 			return status;
2908 
2909 		cacheLocker.SetTo(cache, true);	// already locked
2910 
2911 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
2912 				|| (area->protection & B_KERNEL_AREA) != 0)) {
2913 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2914 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2915 				" (%s)\n", team, newProtection, areaID, area->name);
2916 			return B_NOT_ALLOWED;
2917 		}
2918 		if (!kernel && area->protection_max != 0
2919 			&& (newProtection & area->protection_max)
2920 				!= (newProtection & B_USER_PROTECTION)) {
2921 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2922 				"set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel "
2923 				"area %" B_PRId32 " (%s)\n", team, newProtection,
2924 				area->protection_max, areaID, area->name);
2925 			return B_NOT_ALLOWED;
2926 		}
2927 
2928 		if (team != VMAddressSpace::KernelID()
2929 			&& area->address_space->ID() != team) {
2930 			// unless you're the kernel, you are only allowed to set
2931 			// the protection of your own areas
2932 			return B_NOT_ALLOWED;
2933 		}
2934 
2935 		if (area->protection == newProtection)
2936 			return B_OK;
2937 
2938 		isWritable
2939 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2940 
2941 		// Make sure the area (respectively, if we're going to call
2942 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2943 		// wired ranges.
2944 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2945 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2946 					otherArea = otherArea->cache_next) {
2947 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2948 					restart = true;
2949 					break;
2950 				}
2951 			}
2952 		} else {
2953 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2954 				restart = true;
2955 		}
2956 	} while (restart);
2957 
2958 	bool changePageProtection = true;
2959 	bool changeTopCachePagesOnly = false;
2960 
2961 	if (isWritable && !becomesWritable) {
2962 		// writable -> !writable
2963 
2964 		if (cache->source != NULL && cache->temporary) {
2965 			if (cache->CountWritableAreas(area) == 0) {
2966 				// Since this cache now lives from the pages in its source cache,
2967 				// we can change the cache's commitment to take only those pages
2968 				// into account that really are in this cache.
2969 
2970 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2971 					team == VMAddressSpace::KernelID()
2972 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2973 
2974 				// TODO: we may be able to join with our source cache, if
2975 				// count == 0
2976 			}
2977 		}
2978 
2979 		// If only the writability changes, we can just remap the pages of the
2980 		// top cache, since the pages of lower caches are mapped read-only
2981 		// anyway. That's advantageous only, if the number of pages in the cache
2982 		// is significantly smaller than the number of pages in the area,
2983 		// though.
2984 		if (newProtection
2985 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2986 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2987 			changeTopCachePagesOnly = true;
2988 		}
2989 	} else if (!isWritable && becomesWritable) {
2990 		// !writable -> writable
2991 
2992 		if (!cache->consumers.IsEmpty()) {
2993 			// There are consumers -- we have to insert a new cache. Fortunately
2994 			// vm_copy_on_write_area() does everything that's needed.
2995 			changePageProtection = false;
2996 			status = vm_copy_on_write_area(cache, NULL);
2997 		} else {
2998 			// No consumers, so we don't need to insert a new one.
2999 			if (cache->source != NULL && cache->temporary) {
3000 				// the cache's commitment must contain all possible pages
3001 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
3002 					team == VMAddressSpace::KernelID()
3003 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
3004 			}
3005 
3006 			if (status == B_OK && cache->source != NULL) {
3007 				// There's a source cache, hence we can't just change all pages'
3008 				// protection or we might allow writing into pages belonging to
3009 				// a lower cache.
3010 				changeTopCachePagesOnly = true;
3011 			}
3012 		}
3013 	} else {
3014 		// we don't have anything special to do in all other cases
3015 	}
3016 
3017 	if (status == B_OK) {
3018 		// remap existing pages in this cache
3019 		if (changePageProtection) {
3020 			VMTranslationMap* map = area->address_space->TranslationMap();
3021 			map->Lock();
3022 
3023 			if (changeTopCachePagesOnly) {
3024 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
3025 				page_num_t lastPageOffset
3026 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
3027 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
3028 						vm_page* page = it.Next();) {
3029 					if (page->cache_offset >= firstPageOffset
3030 						&& page->cache_offset <= lastPageOffset) {
3031 						addr_t address = virtual_page_address(area, page);
3032 						map->ProtectPage(area, address, newProtection);
3033 					}
3034 				}
3035 			} else
3036 				map->ProtectArea(area, newProtection);
3037 
3038 			map->Unlock();
3039 		}
3040 
3041 		area->protection = newProtection;
3042 	}
3043 
3044 	return status;
3045 }
3046 
3047 
3048 status_t
3049 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
3050 {
3051 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
3052 	if (addressSpace == NULL)
3053 		return B_BAD_TEAM_ID;
3054 
3055 	VMTranslationMap* map = addressSpace->TranslationMap();
3056 
3057 	map->Lock();
3058 	uint32 dummyFlags;
3059 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
3060 	map->Unlock();
3061 
3062 	addressSpace->Put();
3063 	return status;
3064 }
3065 
3066 
3067 /*!	The page's cache must be locked.
3068 */
3069 bool
3070 vm_test_map_modification(vm_page* page)
3071 {
3072 	if (page->modified)
3073 		return true;
3074 
3075 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3076 	vm_page_mapping* mapping;
3077 	while ((mapping = iterator.Next()) != NULL) {
3078 		VMArea* area = mapping->area;
3079 		VMTranslationMap* map = area->address_space->TranslationMap();
3080 
3081 		phys_addr_t physicalAddress;
3082 		uint32 flags;
3083 		map->Lock();
3084 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
3085 		map->Unlock();
3086 
3087 		if ((flags & PAGE_MODIFIED) != 0)
3088 			return true;
3089 	}
3090 
3091 	return false;
3092 }
3093 
3094 
3095 /*!	The page's cache must be locked.
3096 */
3097 void
3098 vm_clear_map_flags(vm_page* page, uint32 flags)
3099 {
3100 	if ((flags & PAGE_ACCESSED) != 0)
3101 		page->accessed = false;
3102 	if ((flags & PAGE_MODIFIED) != 0)
3103 		page->modified = false;
3104 
3105 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3106 	vm_page_mapping* mapping;
3107 	while ((mapping = iterator.Next()) != NULL) {
3108 		VMArea* area = mapping->area;
3109 		VMTranslationMap* map = area->address_space->TranslationMap();
3110 
3111 		map->Lock();
3112 		map->ClearFlags(virtual_page_address(area, page), flags);
3113 		map->Unlock();
3114 	}
3115 }
3116 
3117 
3118 /*!	Removes all mappings from a page.
3119 	After you've called this function, the page is unmapped from memory and
3120 	the page's \c accessed and \c modified flags have been updated according
3121 	to the state of the mappings.
3122 	The page's cache must be locked.
3123 */
3124 void
3125 vm_remove_all_page_mappings(vm_page* page)
3126 {
3127 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3128 		VMArea* area = mapping->area;
3129 		VMTranslationMap* map = area->address_space->TranslationMap();
3130 		addr_t address = virtual_page_address(area, page);
3131 		map->UnmapPage(area, address, false);
3132 	}
3133 }
3134 
3135 
3136 int32
3137 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
3138 {
3139 	int32 count = 0;
3140 
3141 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3142 	vm_page_mapping* mapping;
3143 	while ((mapping = iterator.Next()) != NULL) {
3144 		VMArea* area = mapping->area;
3145 		VMTranslationMap* map = area->address_space->TranslationMap();
3146 
3147 		bool modified;
3148 		if (map->ClearAccessedAndModified(area,
3149 				virtual_page_address(area, page), false, modified)) {
3150 			count++;
3151 		}
3152 
3153 		page->modified |= modified;
3154 	}
3155 
3156 
3157 	if (page->accessed) {
3158 		count++;
3159 		page->accessed = false;
3160 	}
3161 
3162 	return count;
3163 }
3164 
3165 
3166 /*!	Removes all mappings of a page and/or clears the accessed bits of the
3167 	mappings.
3168 	The function iterates through the page mappings and removes them until
3169 	encountering one that has been accessed. From then on it will continue to
3170 	iterate, but only clear the accessed flag of the mapping. The page's
3171 	\c modified bit will be updated accordingly, the \c accessed bit will be
3172 	cleared.
3173 	\return The number of mapping accessed bits encountered, including the
3174 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
3175 		of the page have been removed.
3176 */
3177 int32
3178 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
3179 {
3180 	ASSERT(page->WiredCount() == 0);
3181 
3182 	if (page->accessed)
3183 		return vm_clear_page_mapping_accessed_flags(page);
3184 
3185 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3186 		VMArea* area = mapping->area;
3187 		VMTranslationMap* map = area->address_space->TranslationMap();
3188 		addr_t address = virtual_page_address(area, page);
3189 		bool modified = false;
3190 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
3191 			page->accessed = true;
3192 			page->modified |= modified;
3193 			return vm_clear_page_mapping_accessed_flags(page);
3194 		}
3195 		page->modified |= modified;
3196 	}
3197 
3198 	return 0;
3199 }
3200 
3201 
3202 static int
3203 display_mem(int argc, char** argv)
3204 {
3205 	bool physical = false;
3206 	addr_t copyAddress;
3207 	int32 displayWidth;
3208 	int32 itemSize;
3209 	int32 num = -1;
3210 	addr_t address;
3211 	int i = 1, j;
3212 
3213 	if (argc > 1 && argv[1][0] == '-') {
3214 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3215 			physical = true;
3216 			i++;
3217 		} else
3218 			i = 99;
3219 	}
3220 
3221 	if (argc < i + 1 || argc > i + 2) {
3222 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3223 			"\tdl - 8 bytes\n"
3224 			"\tdw - 4 bytes\n"
3225 			"\tds - 2 bytes\n"
3226 			"\tdb - 1 byte\n"
3227 			"\tstring - a whole string\n"
3228 			"  -p or --physical only allows memory from a single page to be "
3229 			"displayed.\n");
3230 		return 0;
3231 	}
3232 
3233 	address = parse_expression(argv[i]);
3234 
3235 	if (argc > i + 1)
3236 		num = parse_expression(argv[i + 1]);
3237 
3238 	// build the format string
3239 	if (strcmp(argv[0], "db") == 0) {
3240 		itemSize = 1;
3241 		displayWidth = 16;
3242 	} else if (strcmp(argv[0], "ds") == 0) {
3243 		itemSize = 2;
3244 		displayWidth = 8;
3245 	} else if (strcmp(argv[0], "dw") == 0) {
3246 		itemSize = 4;
3247 		displayWidth = 4;
3248 	} else if (strcmp(argv[0], "dl") == 0) {
3249 		itemSize = 8;
3250 		displayWidth = 2;
3251 	} else if (strcmp(argv[0], "string") == 0) {
3252 		itemSize = 1;
3253 		displayWidth = -1;
3254 	} else {
3255 		kprintf("display_mem called in an invalid way!\n");
3256 		return 0;
3257 	}
3258 
3259 	if (num <= 0)
3260 		num = displayWidth;
3261 
3262 	void* physicalPageHandle = NULL;
3263 
3264 	if (physical) {
3265 		int32 offset = address & (B_PAGE_SIZE - 1);
3266 		if (num * itemSize + offset > B_PAGE_SIZE) {
3267 			num = (B_PAGE_SIZE - offset) / itemSize;
3268 			kprintf("NOTE: number of bytes has been cut to page size\n");
3269 		}
3270 
3271 		address = ROUNDDOWN(address, B_PAGE_SIZE);
3272 
3273 		if (vm_get_physical_page_debug(address, &copyAddress,
3274 				&physicalPageHandle) != B_OK) {
3275 			kprintf("getting the hardware page failed.");
3276 			return 0;
3277 		}
3278 
3279 		address += offset;
3280 		copyAddress += offset;
3281 	} else
3282 		copyAddress = address;
3283 
3284 	if (!strcmp(argv[0], "string")) {
3285 		kprintf("%p \"", (char*)copyAddress);
3286 
3287 		// string mode
3288 		for (i = 0; true; i++) {
3289 			char c;
3290 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
3291 					!= B_OK
3292 				|| c == '\0') {
3293 				break;
3294 			}
3295 
3296 			if (c == '\n')
3297 				kprintf("\\n");
3298 			else if (c == '\t')
3299 				kprintf("\\t");
3300 			else {
3301 				if (!isprint(c))
3302 					c = '.';
3303 
3304 				kprintf("%c", c);
3305 			}
3306 		}
3307 
3308 		kprintf("\"\n");
3309 	} else {
3310 		// number mode
3311 		for (i = 0; i < num; i++) {
3312 			uint64 value;
3313 
3314 			if ((i % displayWidth) == 0) {
3315 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3316 				if (i != 0)
3317 					kprintf("\n");
3318 
3319 				kprintf("[0x%lx]  ", address + i * itemSize);
3320 
3321 				for (j = 0; j < displayed; j++) {
3322 					char c;
3323 					if (debug_memcpy(B_CURRENT_TEAM, &c,
3324 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
3325 						displayed = j;
3326 						break;
3327 					}
3328 					if (!isprint(c))
3329 						c = '.';
3330 
3331 					kprintf("%c", c);
3332 				}
3333 				if (num > displayWidth) {
3334 					// make sure the spacing in the last line is correct
3335 					for (j = displayed; j < displayWidth * itemSize; j++)
3336 						kprintf(" ");
3337 				}
3338 				kprintf("  ");
3339 			}
3340 
3341 			if (debug_memcpy(B_CURRENT_TEAM, &value,
3342 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
3343 				kprintf("read fault");
3344 				break;
3345 			}
3346 
3347 			switch (itemSize) {
3348 				case 1:
3349 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
3350 					break;
3351 				case 2:
3352 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3353 					break;
3354 				case 4:
3355 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3356 					break;
3357 				case 8:
3358 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3359 					break;
3360 			}
3361 		}
3362 
3363 		kprintf("\n");
3364 	}
3365 
3366 	if (physical) {
3367 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3368 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3369 	}
3370 	return 0;
3371 }
3372 
3373 
3374 static void
3375 dump_cache_tree_recursively(VMCache* cache, int level,
3376 	VMCache* highlightCache)
3377 {
3378 	// print this cache
3379 	for (int i = 0; i < level; i++)
3380 		kprintf("  ");
3381 	if (cache == highlightCache)
3382 		kprintf("%p <--\n", cache);
3383 	else
3384 		kprintf("%p\n", cache);
3385 
3386 	// recursively print its consumers
3387 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3388 			VMCache* consumer = it.Next();) {
3389 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3390 	}
3391 }
3392 
3393 
3394 static int
3395 dump_cache_tree(int argc, char** argv)
3396 {
3397 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3398 		kprintf("usage: %s <address>\n", argv[0]);
3399 		return 0;
3400 	}
3401 
3402 	addr_t address = parse_expression(argv[1]);
3403 	if (address == 0)
3404 		return 0;
3405 
3406 	VMCache* cache = (VMCache*)address;
3407 	VMCache* root = cache;
3408 
3409 	// find the root cache (the transitive source)
3410 	while (root->source != NULL)
3411 		root = root->source;
3412 
3413 	dump_cache_tree_recursively(root, 0, cache);
3414 
3415 	return 0;
3416 }
3417 
3418 
3419 const char*
3420 vm_cache_type_to_string(int32 type)
3421 {
3422 	switch (type) {
3423 		case CACHE_TYPE_RAM:
3424 			return "RAM";
3425 		case CACHE_TYPE_DEVICE:
3426 			return "device";
3427 		case CACHE_TYPE_VNODE:
3428 			return "vnode";
3429 		case CACHE_TYPE_NULL:
3430 			return "null";
3431 
3432 		default:
3433 			return "unknown";
3434 	}
3435 }
3436 
3437 
3438 #if DEBUG_CACHE_LIST
3439 
3440 static void
3441 update_cache_info_recursively(VMCache* cache, cache_info& info)
3442 {
3443 	info.page_count += cache->page_count;
3444 	if (cache->type == CACHE_TYPE_RAM)
3445 		info.committed += cache->committed_size;
3446 
3447 	// recurse
3448 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3449 			VMCache* consumer = it.Next();) {
3450 		update_cache_info_recursively(consumer, info);
3451 	}
3452 }
3453 
3454 
3455 static int
3456 cache_info_compare_page_count(const void* _a, const void* _b)
3457 {
3458 	const cache_info* a = (const cache_info*)_a;
3459 	const cache_info* b = (const cache_info*)_b;
3460 	if (a->page_count == b->page_count)
3461 		return 0;
3462 	return a->page_count < b->page_count ? 1 : -1;
3463 }
3464 
3465 
3466 static int
3467 cache_info_compare_committed(const void* _a, const void* _b)
3468 {
3469 	const cache_info* a = (const cache_info*)_a;
3470 	const cache_info* b = (const cache_info*)_b;
3471 	if (a->committed == b->committed)
3472 		return 0;
3473 	return a->committed < b->committed ? 1 : -1;
3474 }
3475 
3476 
3477 static void
3478 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3479 {
3480 	for (int i = 0; i < level; i++)
3481 		kprintf("  ");
3482 
3483 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3484 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3485 		cache->virtual_base, cache->virtual_end, cache->page_count);
3486 
3487 	if (level == 0)
3488 		kprintf("/%lu", info.page_count);
3489 
3490 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3491 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3492 
3493 		if (level == 0)
3494 			kprintf("/%lu", info.committed);
3495 	}
3496 
3497 	// areas
3498 	if (cache->areas != NULL) {
3499 		VMArea* area = cache->areas;
3500 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3501 			area->name, area->address_space->ID());
3502 
3503 		while (area->cache_next != NULL) {
3504 			area = area->cache_next;
3505 			kprintf(", %" B_PRId32, area->id);
3506 		}
3507 	}
3508 
3509 	kputs("\n");
3510 
3511 	// recurse
3512 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3513 			VMCache* consumer = it.Next();) {
3514 		dump_caches_recursively(consumer, info, level + 1);
3515 	}
3516 }
3517 
3518 
3519 static int
3520 dump_caches(int argc, char** argv)
3521 {
3522 	if (sCacheInfoTable == NULL) {
3523 		kprintf("No cache info table!\n");
3524 		return 0;
3525 	}
3526 
3527 	bool sortByPageCount = true;
3528 
3529 	for (int32 i = 1; i < argc; i++) {
3530 		if (strcmp(argv[i], "-c") == 0) {
3531 			sortByPageCount = false;
3532 		} else {
3533 			print_debugger_command_usage(argv[0]);
3534 			return 0;
3535 		}
3536 	}
3537 
3538 	uint32 totalCount = 0;
3539 	uint32 rootCount = 0;
3540 	off_t totalCommitted = 0;
3541 	page_num_t totalPages = 0;
3542 
3543 	VMCache* cache = gDebugCacheList;
3544 	while (cache) {
3545 		totalCount++;
3546 		if (cache->source == NULL) {
3547 			cache_info stackInfo;
3548 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3549 				? sCacheInfoTable[rootCount] : stackInfo;
3550 			rootCount++;
3551 			info.cache = cache;
3552 			info.page_count = 0;
3553 			info.committed = 0;
3554 			update_cache_info_recursively(cache, info);
3555 			totalCommitted += info.committed;
3556 			totalPages += info.page_count;
3557 		}
3558 
3559 		cache = cache->debug_next;
3560 	}
3561 
3562 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3563 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3564 			sortByPageCount
3565 				? &cache_info_compare_page_count
3566 				: &cache_info_compare_committed);
3567 	}
3568 
3569 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3570 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3571 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3572 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3573 			"page count" : "committed size");
3574 
3575 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3576 		for (uint32 i = 0; i < rootCount; i++) {
3577 			cache_info& info = sCacheInfoTable[i];
3578 			dump_caches_recursively(info.cache, info, 0);
3579 		}
3580 	} else
3581 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3582 
3583 	return 0;
3584 }
3585 
3586 #endif	// DEBUG_CACHE_LIST
3587 
3588 
3589 static int
3590 dump_cache(int argc, char** argv)
3591 {
3592 	VMCache* cache;
3593 	bool showPages = false;
3594 	int i = 1;
3595 
3596 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3597 		kprintf("usage: %s [-ps] <address>\n"
3598 			"  if -p is specified, all pages are shown, if -s is used\n"
3599 			"  only the cache info is shown respectively.\n", argv[0]);
3600 		return 0;
3601 	}
3602 	while (argv[i][0] == '-') {
3603 		char* arg = argv[i] + 1;
3604 		while (arg[0]) {
3605 			if (arg[0] == 'p')
3606 				showPages = true;
3607 			arg++;
3608 		}
3609 		i++;
3610 	}
3611 	if (argv[i] == NULL) {
3612 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3613 		return 0;
3614 	}
3615 
3616 	addr_t address = parse_expression(argv[i]);
3617 	if (address == 0)
3618 		return 0;
3619 
3620 	cache = (VMCache*)address;
3621 
3622 	cache->Dump(showPages);
3623 
3624 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3625 
3626 	return 0;
3627 }
3628 
3629 
3630 static void
3631 dump_area_struct(VMArea* area, bool mappings)
3632 {
3633 	kprintf("AREA: %p\n", area);
3634 	kprintf("name:\t\t'%s'\n", area->name);
3635 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3636 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3637 	kprintf("base:\t\t0x%lx\n", area->Base());
3638 	kprintf("size:\t\t0x%lx\n", area->Size());
3639 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3640 	kprintf("page_protection:%p\n", area->page_protections);
3641 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3642 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3643 	kprintf("cache:\t\t%p\n", area->cache);
3644 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3645 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3646 	kprintf("cache_next:\t%p\n", area->cache_next);
3647 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3648 
3649 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3650 	if (mappings) {
3651 		kprintf("page mappings:\n");
3652 		while (iterator.HasNext()) {
3653 			vm_page_mapping* mapping = iterator.Next();
3654 			kprintf("  %p", mapping->page);
3655 		}
3656 		kprintf("\n");
3657 	} else {
3658 		uint32 count = 0;
3659 		while (iterator.Next() != NULL) {
3660 			count++;
3661 		}
3662 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3663 	}
3664 }
3665 
3666 
3667 static int
3668 dump_area(int argc, char** argv)
3669 {
3670 	bool mappings = false;
3671 	bool found = false;
3672 	int32 index = 1;
3673 	VMArea* area;
3674 	addr_t num;
3675 
3676 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3677 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3678 			"All areas matching either id/address/name are listed. You can\n"
3679 			"force to check only a specific item by prefixing the specifier\n"
3680 			"with the id/contains/address/name keywords.\n"
3681 			"-m shows the area's mappings as well.\n");
3682 		return 0;
3683 	}
3684 
3685 	if (!strcmp(argv[1], "-m")) {
3686 		mappings = true;
3687 		index++;
3688 	}
3689 
3690 	int32 mode = 0xf;
3691 	if (!strcmp(argv[index], "id"))
3692 		mode = 1;
3693 	else if (!strcmp(argv[index], "contains"))
3694 		mode = 2;
3695 	else if (!strcmp(argv[index], "name"))
3696 		mode = 4;
3697 	else if (!strcmp(argv[index], "address"))
3698 		mode = 0;
3699 	if (mode != 0xf)
3700 		index++;
3701 
3702 	if (index >= argc) {
3703 		kprintf("No area specifier given.\n");
3704 		return 0;
3705 	}
3706 
3707 	num = parse_expression(argv[index]);
3708 
3709 	if (mode == 0) {
3710 		dump_area_struct((struct VMArea*)num, mappings);
3711 	} else {
3712 		// walk through the area list, looking for the arguments as a name
3713 
3714 		VMAreasTree::Iterator it = VMAreas::GetIterator();
3715 		while ((area = it.Next()) != NULL) {
3716 			if (((mode & 4) != 0
3717 					&& !strcmp(argv[index], area->name))
3718 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3719 					|| (((mode & 2) != 0 && area->Base() <= num
3720 						&& area->Base() + area->Size() > num))))) {
3721 				dump_area_struct(area, mappings);
3722 				found = true;
3723 			}
3724 		}
3725 
3726 		if (!found)
3727 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3728 	}
3729 
3730 	return 0;
3731 }
3732 
3733 
3734 static int
3735 dump_area_list(int argc, char** argv)
3736 {
3737 	VMArea* area;
3738 	const char* name = NULL;
3739 	int32 id = 0;
3740 
3741 	if (argc > 1) {
3742 		id = parse_expression(argv[1]);
3743 		if (id == 0)
3744 			name = argv[1];
3745 	}
3746 
3747 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3748 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3749 		B_PRINTF_POINTER_WIDTH, "size");
3750 
3751 	VMAreasTree::Iterator it = VMAreas::GetIterator();
3752 	while ((area = it.Next()) != NULL) {
3753 		if ((id != 0 && area->address_space->ID() != id)
3754 			|| (name != NULL && strstr(area->name, name) == NULL))
3755 			continue;
3756 
3757 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3758 			area->id, (void*)area->Base(), (void*)area->Size(),
3759 			area->protection, area->wiring, area->name);
3760 	}
3761 	return 0;
3762 }
3763 
3764 
3765 static int
3766 dump_available_memory(int argc, char** argv)
3767 {
3768 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3769 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3770 	return 0;
3771 }
3772 
3773 
3774 static int
3775 dump_mapping_info(int argc, char** argv)
3776 {
3777 	bool reverseLookup = false;
3778 	bool pageLookup = false;
3779 
3780 	int argi = 1;
3781 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3782 		const char* arg = argv[argi];
3783 		if (strcmp(arg, "-r") == 0) {
3784 			reverseLookup = true;
3785 		} else if (strcmp(arg, "-p") == 0) {
3786 			reverseLookup = true;
3787 			pageLookup = true;
3788 		} else {
3789 			print_debugger_command_usage(argv[0]);
3790 			return 0;
3791 		}
3792 	}
3793 
3794 	// We need at least one argument, the address. Optionally a thread ID can be
3795 	// specified.
3796 	if (argi >= argc || argi + 2 < argc) {
3797 		print_debugger_command_usage(argv[0]);
3798 		return 0;
3799 	}
3800 
3801 	uint64 addressValue;
3802 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3803 		return 0;
3804 
3805 	Team* team = NULL;
3806 	if (argi < argc) {
3807 		uint64 threadID;
3808 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3809 			return 0;
3810 
3811 		Thread* thread = Thread::GetDebug(threadID);
3812 		if (thread == NULL) {
3813 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3814 			return 0;
3815 		}
3816 
3817 		team = thread->team;
3818 	}
3819 
3820 	if (reverseLookup) {
3821 		phys_addr_t physicalAddress;
3822 		if (pageLookup) {
3823 			vm_page* page = (vm_page*)(addr_t)addressValue;
3824 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3825 		} else {
3826 			physicalAddress = (phys_addr_t)addressValue;
3827 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3828 		}
3829 
3830 		kprintf("    Team     Virtual Address      Area\n");
3831 		kprintf("--------------------------------------\n");
3832 
3833 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3834 			Callback()
3835 				:
3836 				fAddressSpace(NULL)
3837 			{
3838 			}
3839 
3840 			void SetAddressSpace(VMAddressSpace* addressSpace)
3841 			{
3842 				fAddressSpace = addressSpace;
3843 			}
3844 
3845 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3846 			{
3847 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3848 					virtualAddress);
3849 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3850 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3851 				else
3852 					kprintf("\n");
3853 				return false;
3854 			}
3855 
3856 		private:
3857 			VMAddressSpace*	fAddressSpace;
3858 		} callback;
3859 
3860 		if (team != NULL) {
3861 			// team specified -- get its address space
3862 			VMAddressSpace* addressSpace = team->address_space;
3863 			if (addressSpace == NULL) {
3864 				kprintf("Failed to get address space!\n");
3865 				return 0;
3866 			}
3867 
3868 			callback.SetAddressSpace(addressSpace);
3869 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3870 				physicalAddress, callback);
3871 		} else {
3872 			// no team specified -- iterate through all address spaces
3873 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3874 				addressSpace != NULL;
3875 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3876 				callback.SetAddressSpace(addressSpace);
3877 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3878 					physicalAddress, callback);
3879 			}
3880 		}
3881 	} else {
3882 		// get the address space
3883 		addr_t virtualAddress = (addr_t)addressValue;
3884 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3885 		VMAddressSpace* addressSpace;
3886 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3887 			addressSpace = VMAddressSpace::Kernel();
3888 		} else if (team != NULL) {
3889 			addressSpace = team->address_space;
3890 		} else {
3891 			Thread* thread = debug_get_debugged_thread();
3892 			if (thread == NULL || thread->team == NULL) {
3893 				kprintf("Failed to get team!\n");
3894 				return 0;
3895 			}
3896 
3897 			addressSpace = thread->team->address_space;
3898 		}
3899 
3900 		if (addressSpace == NULL) {
3901 			kprintf("Failed to get address space!\n");
3902 			return 0;
3903 		}
3904 
3905 		// let the translation map implementation do the job
3906 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3907 	}
3908 
3909 	return 0;
3910 }
3911 
3912 
3913 /*!	Deletes all areas and reserved regions in the given address space.
3914 
3915 	The caller must ensure that none of the areas has any wired ranges.
3916 
3917 	\param addressSpace The address space.
3918 	\param deletingAddressSpace \c true, if the address space is in the process
3919 		of being deleted.
3920 */
3921 void
3922 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3923 {
3924 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3925 		addressSpace->ID()));
3926 
3927 	addressSpace->WriteLock();
3928 
3929 	// remove all reserved areas in this address space
3930 	addressSpace->UnreserveAllAddressRanges(0);
3931 
3932 	// delete all the areas in this address space
3933 	while (VMArea* area = addressSpace->FirstArea()) {
3934 		ASSERT(!area->IsWired());
3935 		delete_area(addressSpace, area, deletingAddressSpace);
3936 	}
3937 
3938 	addressSpace->WriteUnlock();
3939 }
3940 
3941 
3942 static area_id
3943 vm_area_for(addr_t address, bool kernel)
3944 {
3945 	team_id team;
3946 	if (IS_USER_ADDRESS(address)) {
3947 		// we try the user team address space, if any
3948 		team = VMAddressSpace::CurrentID();
3949 		if (team < 0)
3950 			return team;
3951 	} else
3952 		team = VMAddressSpace::KernelID();
3953 
3954 	AddressSpaceReadLocker locker(team);
3955 	if (!locker.IsLocked())
3956 		return B_BAD_TEAM_ID;
3957 
3958 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3959 	if (area != NULL) {
3960 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0
3961 				&& (area->protection & B_KERNEL_AREA) != 0)
3962 			return B_ERROR;
3963 
3964 		return area->id;
3965 	}
3966 
3967 	return B_ERROR;
3968 }
3969 
3970 
3971 /*!	Frees physical pages that were used during the boot process.
3972 	\a end is inclusive.
3973 */
3974 static void
3975 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3976 {
3977 	// free all physical pages in the specified range
3978 
3979 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3980 		phys_addr_t physicalAddress;
3981 		uint32 flags;
3982 
3983 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3984 			&& (flags & PAGE_PRESENT) != 0) {
3985 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3986 			if (page != NULL && page->State() != PAGE_STATE_FREE
3987 					&& page->State() != PAGE_STATE_CLEAR
3988 					&& page->State() != PAGE_STATE_UNUSED) {
3989 				DEBUG_PAGE_ACCESS_START(page);
3990 				vm_page_set_state(page, PAGE_STATE_FREE);
3991 			}
3992 		}
3993 	}
3994 
3995 	// unmap the memory
3996 	map->Unmap(start, end);
3997 }
3998 
3999 
4000 void
4001 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
4002 {
4003 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
4004 	addr_t end = start + (size - 1);
4005 	addr_t lastEnd = start;
4006 
4007 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
4008 		(void*)start, (void*)end));
4009 
4010 	// The areas are sorted in virtual address space order, so
4011 	// we just have to find the holes between them that fall
4012 	// into the area we should dispose
4013 
4014 	map->Lock();
4015 
4016 	for (VMAddressSpace::AreaIterator it
4017 				= VMAddressSpace::Kernel()->GetAreaIterator();
4018 			VMArea* area = it.Next();) {
4019 		addr_t areaStart = area->Base();
4020 		addr_t areaEnd = areaStart + (area->Size() - 1);
4021 
4022 		if (areaEnd < start)
4023 			continue;
4024 
4025 		if (areaStart > end) {
4026 			// we are done, the area is already beyond of what we have to free
4027 			break;
4028 		}
4029 
4030 		if (areaStart > lastEnd) {
4031 			// this is something we can free
4032 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
4033 				(void*)areaStart));
4034 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
4035 		}
4036 
4037 		if (areaEnd >= end) {
4038 			lastEnd = areaEnd;
4039 				// no +1 to prevent potential overflow
4040 			break;
4041 		}
4042 
4043 		lastEnd = areaEnd + 1;
4044 	}
4045 
4046 	if (lastEnd < end) {
4047 		// we can also get rid of some space at the end of the area
4048 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
4049 			(void*)end));
4050 		unmap_and_free_physical_pages(map, lastEnd, end);
4051 	}
4052 
4053 	map->Unlock();
4054 }
4055 
4056 
4057 static void
4058 create_preloaded_image_areas(struct preloaded_image* _image)
4059 {
4060 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
4061 	char name[B_OS_NAME_LENGTH];
4062 	void* address;
4063 	int32 length;
4064 
4065 	// use file name to create a good area name
4066 	char* fileName = strrchr(image->name, '/');
4067 	if (fileName == NULL)
4068 		fileName = image->name;
4069 	else
4070 		fileName++;
4071 
4072 	length = strlen(fileName);
4073 	// make sure there is enough space for the suffix
4074 	if (length > 25)
4075 		length = 25;
4076 
4077 	memcpy(name, fileName, length);
4078 	strcpy(name + length, "_text");
4079 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
4080 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4081 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
4082 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4083 		// this will later be remapped read-only/executable by the
4084 		// ELF initialization code
4085 
4086 	strcpy(name + length, "_data");
4087 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
4088 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4089 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
4090 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4091 }
4092 
4093 
4094 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
4095 	Any boot loader resources contained in that arguments must not be accessed
4096 	anymore past this point.
4097 */
4098 void
4099 vm_free_kernel_args(kernel_args* args)
4100 {
4101 	uint32 i;
4102 
4103 	TRACE(("vm_free_kernel_args()\n"));
4104 
4105 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
4106 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
4107 		if (area >= B_OK)
4108 			delete_area(area);
4109 	}
4110 }
4111 
4112 
4113 static void
4114 allocate_kernel_args(kernel_args* args)
4115 {
4116 	TRACE(("allocate_kernel_args()\n"));
4117 
4118 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
4119 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
4120 
4121 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
4122 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
4123 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4124 	}
4125 }
4126 
4127 
4128 static void
4129 unreserve_boot_loader_ranges(kernel_args* args)
4130 {
4131 	TRACE(("unreserve_boot_loader_ranges()\n"));
4132 
4133 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4134 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
4135 			(void*)(addr_t)args->virtual_allocated_range[i].start,
4136 			args->virtual_allocated_range[i].size);
4137 	}
4138 }
4139 
4140 
4141 static void
4142 reserve_boot_loader_ranges(kernel_args* args)
4143 {
4144 	TRACE(("reserve_boot_loader_ranges()\n"));
4145 
4146 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4147 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
4148 
4149 		// If the address is no kernel address, we just skip it. The
4150 		// architecture specific code has to deal with it.
4151 		if (!IS_KERNEL_ADDRESS(address)) {
4152 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
4153 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
4154 			continue;
4155 		}
4156 
4157 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
4158 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
4159 		if (status < B_OK)
4160 			panic("could not reserve boot loader ranges\n");
4161 	}
4162 }
4163 
4164 
4165 static addr_t
4166 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
4167 {
4168 	size = PAGE_ALIGN(size);
4169 
4170 	// find a slot in the virtual allocation addr range
4171 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
4172 		// check to see if the space between this one and the last is big enough
4173 		addr_t rangeStart = args->virtual_allocated_range[i].start;
4174 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
4175 			+ args->virtual_allocated_range[i - 1].size;
4176 
4177 		addr_t base = alignment > 0
4178 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
4179 
4180 		if (base >= KERNEL_BASE && base < rangeStart
4181 				&& rangeStart - base >= size) {
4182 			args->virtual_allocated_range[i - 1].size
4183 				+= base + size - previousRangeEnd;
4184 			return base;
4185 		}
4186 	}
4187 
4188 	// we hadn't found one between allocation ranges. this is ok.
4189 	// see if there's a gap after the last one
4190 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
4191 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
4192 		+ args->virtual_allocated_range[lastEntryIndex].size;
4193 	addr_t base = alignment > 0
4194 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
4195 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
4196 		args->virtual_allocated_range[lastEntryIndex].size
4197 			+= base + size - lastRangeEnd;
4198 		return base;
4199 	}
4200 
4201 	// see if there's a gap before the first one
4202 	addr_t rangeStart = args->virtual_allocated_range[0].start;
4203 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
4204 		base = rangeStart - size;
4205 		if (alignment > 0)
4206 			base = ROUNDDOWN(base, alignment);
4207 
4208 		if (base >= KERNEL_BASE) {
4209 			args->virtual_allocated_range[0].start = base;
4210 			args->virtual_allocated_range[0].size += rangeStart - base;
4211 			return base;
4212 		}
4213 	}
4214 
4215 	return 0;
4216 }
4217 
4218 
4219 static bool
4220 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
4221 {
4222 	// TODO: horrible brute-force method of determining if the page can be
4223 	// allocated
4224 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4225 		if (address >= args->physical_memory_range[i].start
4226 			&& address < args->physical_memory_range[i].start
4227 				+ args->physical_memory_range[i].size)
4228 			return true;
4229 	}
4230 	return false;
4231 }
4232 
4233 
4234 page_num_t
4235 vm_allocate_early_physical_page(kernel_args* args)
4236 {
4237 	if (args->num_physical_allocated_ranges == 0) {
4238 		panic("early physical page allocations no longer possible!");
4239 		return 0;
4240 	}
4241 
4242 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4243 		phys_addr_t nextPage;
4244 
4245 		nextPage = args->physical_allocated_range[i].start
4246 			+ args->physical_allocated_range[i].size;
4247 		// see if the page after the next allocated paddr run can be allocated
4248 		if (i + 1 < args->num_physical_allocated_ranges
4249 			&& args->physical_allocated_range[i + 1].size != 0) {
4250 			// see if the next page will collide with the next allocated range
4251 			if (nextPage >= args->physical_allocated_range[i+1].start)
4252 				continue;
4253 		}
4254 		// see if the next physical page fits in the memory block
4255 		if (is_page_in_physical_memory_range(args, nextPage)) {
4256 			// we got one!
4257 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4258 			return nextPage / B_PAGE_SIZE;
4259 		}
4260 	}
4261 
4262 	// Expanding upwards didn't work, try going downwards.
4263 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4264 		phys_addr_t nextPage;
4265 
4266 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
4267 		// see if the page after the prev allocated paddr run can be allocated
4268 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
4269 			// see if the next page will collide with the next allocated range
4270 			if (nextPage < args->physical_allocated_range[i-1].start
4271 				+ args->physical_allocated_range[i-1].size)
4272 				continue;
4273 		}
4274 		// see if the next physical page fits in the memory block
4275 		if (is_page_in_physical_memory_range(args, nextPage)) {
4276 			// we got one!
4277 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
4278 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4279 			return nextPage / B_PAGE_SIZE;
4280 		}
4281 	}
4282 
4283 	return 0;
4284 		// could not allocate a block
4285 }
4286 
4287 
4288 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4289 	allocate some pages before the VM is completely up.
4290 */
4291 addr_t
4292 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4293 	uint32 attributes, addr_t alignment)
4294 {
4295 	if (physicalSize > virtualSize)
4296 		physicalSize = virtualSize;
4297 
4298 	// find the vaddr to allocate at
4299 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
4300 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
4301 	if (virtualBase == 0) {
4302 		panic("vm_allocate_early: could not allocate virtual address\n");
4303 		return 0;
4304 	}
4305 
4306 	// map the pages
4307 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4308 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
4309 		if (physicalAddress == 0)
4310 			panic("error allocating early page!\n");
4311 
4312 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4313 
4314 		status_t status = arch_vm_translation_map_early_map(args,
4315 			virtualBase + i * B_PAGE_SIZE,
4316 			physicalAddress * B_PAGE_SIZE, attributes,
4317 			&vm_allocate_early_physical_page);
4318 		if (status != B_OK)
4319 			panic("error mapping early page!");
4320 	}
4321 
4322 	return virtualBase;
4323 }
4324 
4325 
4326 /*!	The main entrance point to initialize the VM. */
4327 status_t
4328 vm_init(kernel_args* args)
4329 {
4330 	struct preloaded_image* image;
4331 	void* address;
4332 	status_t err = 0;
4333 	uint32 i;
4334 
4335 	TRACE(("vm_init: entry\n"));
4336 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
4337 	err = arch_vm_init(args);
4338 
4339 	// initialize some globals
4340 	vm_page_init_num_pages(args);
4341 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4342 
4343 	slab_init(args);
4344 
4345 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4346 	off_t heapSize = INITIAL_HEAP_SIZE;
4347 	// try to accomodate low memory systems
4348 	while (heapSize > sAvailableMemory / 8)
4349 		heapSize /= 2;
4350 	if (heapSize < 1024 * 1024)
4351 		panic("vm_init: go buy some RAM please.");
4352 
4353 	// map in the new heap and initialize it
4354 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4355 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
4356 	TRACE(("heap at 0x%lx\n", heapBase));
4357 	heap_init(heapBase, heapSize);
4358 #endif
4359 
4360 	// initialize the free page list and physical page mapper
4361 	vm_page_init(args);
4362 
4363 	// initialize the cache allocators
4364 	vm_cache_init(args);
4365 
4366 	{
4367 		status_t error = VMAreas::Init();
4368 		if (error != B_OK)
4369 			panic("vm_init: error initializing areas map\n");
4370 	}
4371 
4372 	VMAddressSpace::Init();
4373 	reserve_boot_loader_ranges(args);
4374 
4375 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4376 	heap_init_post_area();
4377 #endif
4378 
4379 	// Do any further initialization that the architecture dependant layers may
4380 	// need now
4381 	arch_vm_translation_map_init_post_area(args);
4382 	arch_vm_init_post_area(args);
4383 	vm_page_init_post_area(args);
4384 	slab_init_post_area();
4385 
4386 	// allocate areas to represent stuff that already exists
4387 
4388 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4389 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4390 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4391 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4392 #endif
4393 
4394 	allocate_kernel_args(args);
4395 
4396 	create_preloaded_image_areas(args->kernel_image);
4397 
4398 	// allocate areas for preloaded images
4399 	for (image = args->preloaded_images; image != NULL; image = image->next)
4400 		create_preloaded_image_areas(image);
4401 
4402 	// allocate kernel stacks
4403 	for (i = 0; i < args->num_cpus; i++) {
4404 		char name[64];
4405 
4406 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4407 		address = (void*)args->cpu_kstack[i].start;
4408 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4409 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4410 	}
4411 
4412 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4413 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4414 
4415 #if PARANOID_KERNEL_MALLOC
4416 	vm_block_address_range("uninitialized heap memory",
4417 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4418 #endif
4419 #if PARANOID_KERNEL_FREE
4420 	vm_block_address_range("freed heap memory",
4421 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4422 #endif
4423 
4424 	// create the object cache for the page mappings
4425 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4426 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4427 		NULL, NULL);
4428 	if (gPageMappingsObjectCache == NULL)
4429 		panic("failed to create page mappings object cache");
4430 
4431 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4432 
4433 #if DEBUG_CACHE_LIST
4434 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4435 		virtual_address_restrictions virtualRestrictions = {};
4436 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4437 		physical_address_restrictions physicalRestrictions = {};
4438 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4439 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4440 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4441 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4442 			&physicalRestrictions, (void**)&sCacheInfoTable);
4443 	}
4444 #endif	// DEBUG_CACHE_LIST
4445 
4446 	// add some debugger commands
4447 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4448 	add_debugger_command("area", &dump_area,
4449 		"Dump info about a particular area");
4450 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4451 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4452 #if DEBUG_CACHE_LIST
4453 	if (sCacheInfoTable != NULL) {
4454 		add_debugger_command_etc("caches", &dump_caches,
4455 			"List all VMCache trees",
4456 			"[ \"-c\" ]\n"
4457 			"All cache trees are listed sorted in decreasing order by number "
4458 				"of\n"
4459 			"used pages or, if \"-c\" is specified, by size of committed "
4460 				"memory.\n",
4461 			0);
4462 	}
4463 #endif
4464 	add_debugger_command("avail", &dump_available_memory,
4465 		"Dump available memory");
4466 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4467 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4468 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4469 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4470 	add_debugger_command("string", &display_mem, "dump strings");
4471 
4472 	add_debugger_command_etc("mapping", &dump_mapping_info,
4473 		"Print address mapping information",
4474 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4475 		"Prints low-level page mapping information for a given address. If\n"
4476 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4477 		"address that is looked up in the translation map of the current\n"
4478 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4479 		"\"-r\" is specified, <address> is a physical address that is\n"
4480 		"searched in the translation map of all teams, respectively the team\n"
4481 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4482 		"<address> is the address of a vm_page structure. The behavior is\n"
4483 		"equivalent to specifying \"-r\" with the physical address of that\n"
4484 		"page.\n",
4485 		0);
4486 
4487 	TRACE(("vm_init: exit\n"));
4488 
4489 	vm_cache_init_post_heap();
4490 
4491 	return err;
4492 }
4493 
4494 
4495 status_t
4496 vm_init_post_sem(kernel_args* args)
4497 {
4498 	// This frees all unused boot loader resources and makes its space available
4499 	// again
4500 	arch_vm_init_end(args);
4501 	unreserve_boot_loader_ranges(args);
4502 
4503 	// fill in all of the semaphores that were not allocated before
4504 	// since we're still single threaded and only the kernel address space
4505 	// exists, it isn't that hard to find all of the ones we need to create
4506 
4507 	arch_vm_translation_map_init_post_sem(args);
4508 
4509 	slab_init_post_sem();
4510 
4511 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4512 	heap_init_post_sem();
4513 #endif
4514 
4515 	return B_OK;
4516 }
4517 
4518 
4519 status_t
4520 vm_init_post_thread(kernel_args* args)
4521 {
4522 	vm_page_init_post_thread(args);
4523 	slab_init_post_thread();
4524 	return heap_init_post_thread();
4525 }
4526 
4527 
4528 status_t
4529 vm_init_post_modules(kernel_args* args)
4530 {
4531 	return arch_vm_init_post_modules(args);
4532 }
4533 
4534 
4535 void
4536 permit_page_faults(void)
4537 {
4538 	Thread* thread = thread_get_current_thread();
4539 	if (thread != NULL)
4540 		atomic_add(&thread->page_faults_allowed, 1);
4541 }
4542 
4543 
4544 void
4545 forbid_page_faults(void)
4546 {
4547 	Thread* thread = thread_get_current_thread();
4548 	if (thread != NULL)
4549 		atomic_add(&thread->page_faults_allowed, -1);
4550 }
4551 
4552 
4553 status_t
4554 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4555 	bool isUser, addr_t* newIP)
4556 {
4557 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4558 		faultAddress));
4559 
4560 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4561 
4562 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4563 	VMAddressSpace* addressSpace = NULL;
4564 
4565 	status_t status = B_OK;
4566 	*newIP = 0;
4567 	atomic_add((int32*)&sPageFaults, 1);
4568 
4569 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4570 		addressSpace = VMAddressSpace::GetKernel();
4571 	} else if (IS_USER_ADDRESS(pageAddress)) {
4572 		addressSpace = VMAddressSpace::GetCurrent();
4573 		if (addressSpace == NULL) {
4574 			if (!isUser) {
4575 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4576 					"memory!\n");
4577 				status = B_BAD_ADDRESS;
4578 				TPF(PageFaultError(-1,
4579 					VMPageFaultTracing
4580 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4581 			} else {
4582 				// XXX weird state.
4583 				panic("vm_page_fault: non kernel thread accessing user memory "
4584 					"that doesn't exist!\n");
4585 				status = B_BAD_ADDRESS;
4586 			}
4587 		}
4588 	} else {
4589 		// the hit was probably in the 64k DMZ between kernel and user space
4590 		// this keeps a user space thread from passing a buffer that crosses
4591 		// into kernel space
4592 		status = B_BAD_ADDRESS;
4593 		TPF(PageFaultError(-1,
4594 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4595 	}
4596 
4597 	if (status == B_OK) {
4598 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4599 			isUser, NULL);
4600 	}
4601 
4602 	if (status < B_OK) {
4603 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4604 			"0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n",
4605 			strerror(status), address, faultAddress, isWrite, isUser, isExecute,
4606 			thread_get_current_thread_id());
4607 		if (!isUser) {
4608 			Thread* thread = thread_get_current_thread();
4609 			if (thread != NULL && thread->fault_handler != 0) {
4610 				// this will cause the arch dependant page fault handler to
4611 				// modify the IP on the interrupt frame or whatever to return
4612 				// to this address
4613 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4614 			} else {
4615 				// unhandled page fault in the kernel
4616 				panic("vm_page_fault: unhandled page fault in kernel space at "
4617 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4618 			}
4619 		} else {
4620 			Thread* thread = thread_get_current_thread();
4621 
4622 #ifdef TRACE_FAULTS
4623 			VMArea* area = NULL;
4624 			if (addressSpace != NULL) {
4625 				addressSpace->ReadLock();
4626 				area = addressSpace->LookupArea(faultAddress);
4627 			}
4628 
4629 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4630 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4631 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4632 				thread->team->Name(), thread->team->id,
4633 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4634 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4635 					area->Base() : 0x0));
4636 
4637 			if (addressSpace != NULL)
4638 				addressSpace->ReadUnlock();
4639 #endif
4640 
4641 			// If the thread has a signal handler for SIGSEGV, we simply
4642 			// send it the signal. Otherwise we notify the user debugger
4643 			// first.
4644 			struct sigaction action;
4645 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4646 					&& action.sa_handler != SIG_DFL
4647 					&& action.sa_handler != SIG_IGN)
4648 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4649 					SIGSEGV)) {
4650 				Signal signal(SIGSEGV,
4651 					status == B_PERMISSION_DENIED
4652 						? SEGV_ACCERR : SEGV_MAPERR,
4653 					EFAULT, thread->team->id);
4654 				signal.SetAddress((void*)address);
4655 				send_signal_to_thread(thread, signal, 0);
4656 			}
4657 		}
4658 	}
4659 
4660 	if (addressSpace != NULL)
4661 		addressSpace->Put();
4662 
4663 	return B_HANDLED_INTERRUPT;
4664 }
4665 
4666 
4667 struct PageFaultContext {
4668 	AddressSpaceReadLocker	addressSpaceLocker;
4669 	VMCacheChainLocker		cacheChainLocker;
4670 
4671 	VMTranslationMap*		map;
4672 	VMCache*				topCache;
4673 	off_t					cacheOffset;
4674 	vm_page_reservation		reservation;
4675 	bool					isWrite;
4676 
4677 	// return values
4678 	vm_page*				page;
4679 	bool					restart;
4680 	bool					pageAllocated;
4681 
4682 
4683 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4684 		:
4685 		addressSpaceLocker(addressSpace, true),
4686 		map(addressSpace->TranslationMap()),
4687 		isWrite(isWrite)
4688 	{
4689 	}
4690 
4691 	~PageFaultContext()
4692 	{
4693 		UnlockAll();
4694 		vm_page_unreserve_pages(&reservation);
4695 	}
4696 
4697 	void Prepare(VMCache* topCache, off_t cacheOffset)
4698 	{
4699 		this->topCache = topCache;
4700 		this->cacheOffset = cacheOffset;
4701 		page = NULL;
4702 		restart = false;
4703 		pageAllocated = false;
4704 
4705 		cacheChainLocker.SetTo(topCache);
4706 	}
4707 
4708 	void UnlockAll(VMCache* exceptCache = NULL)
4709 	{
4710 		topCache = NULL;
4711 		addressSpaceLocker.Unlock();
4712 		cacheChainLocker.Unlock(exceptCache);
4713 	}
4714 };
4715 
4716 
4717 /*!	Gets the page that should be mapped into the area.
4718 	Returns an error code other than \c B_OK, if the page couldn't be found or
4719 	paged in. The locking state of the address space and the caches is undefined
4720 	in that case.
4721 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4722 	had to unlock the address space and all caches and is supposed to be called
4723 	again.
4724 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4725 	found. It is returned in \c context.page. The address space will still be
4726 	locked as well as all caches starting from the top cache to at least the
4727 	cache the page lives in.
4728 */
4729 static status_t
4730 fault_get_page(PageFaultContext& context)
4731 {
4732 	VMCache* cache = context.topCache;
4733 	VMCache* lastCache = NULL;
4734 	vm_page* page = NULL;
4735 
4736 	while (cache != NULL) {
4737 		// We already hold the lock of the cache at this point.
4738 
4739 		lastCache = cache;
4740 
4741 		page = cache->LookupPage(context.cacheOffset);
4742 		if (page != NULL && page->busy) {
4743 			// page must be busy -- wait for it to become unbusy
4744 			context.UnlockAll(cache);
4745 			cache->ReleaseRefLocked();
4746 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4747 
4748 			// restart the whole process
4749 			context.restart = true;
4750 			return B_OK;
4751 		}
4752 
4753 		if (page != NULL)
4754 			break;
4755 
4756 		// The current cache does not contain the page we're looking for.
4757 
4758 		// see if the backing store has it
4759 		if (cache->HasPage(context.cacheOffset)) {
4760 			// insert a fresh page and mark it busy -- we're going to read it in
4761 			page = vm_page_allocate_page(&context.reservation,
4762 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4763 			cache->InsertPage(page, context.cacheOffset);
4764 
4765 			// We need to unlock all caches and the address space while reading
4766 			// the page in. Keep a reference to the cache around.
4767 			cache->AcquireRefLocked();
4768 			context.UnlockAll();
4769 
4770 			// read the page in
4771 			generic_io_vec vec;
4772 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4773 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4774 
4775 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4776 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4777 
4778 			cache->Lock();
4779 
4780 			if (status < B_OK) {
4781 				// on error remove and free the page
4782 				dprintf("reading page from cache %p returned: %s!\n",
4783 					cache, strerror(status));
4784 
4785 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4786 				cache->RemovePage(page);
4787 				vm_page_set_state(page, PAGE_STATE_FREE);
4788 
4789 				cache->ReleaseRefAndUnlock();
4790 				return status;
4791 			}
4792 
4793 			// mark the page unbusy again
4794 			cache->MarkPageUnbusy(page);
4795 
4796 			DEBUG_PAGE_ACCESS_END(page);
4797 
4798 			// Since we needed to unlock everything temporarily, the area
4799 			// situation might have changed. So we need to restart the whole
4800 			// process.
4801 			cache->ReleaseRefAndUnlock();
4802 			context.restart = true;
4803 			return B_OK;
4804 		}
4805 
4806 		cache = context.cacheChainLocker.LockSourceCache();
4807 	}
4808 
4809 	if (page == NULL) {
4810 		// There was no adequate page, determine the cache for a clean one.
4811 		// Read-only pages come in the deepest cache, only the top most cache
4812 		// may have direct write access.
4813 		cache = context.isWrite ? context.topCache : lastCache;
4814 
4815 		// allocate a clean page
4816 		page = vm_page_allocate_page(&context.reservation,
4817 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4818 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4819 			page->physical_page_number));
4820 
4821 		// insert the new page into our cache
4822 		cache->InsertPage(page, context.cacheOffset);
4823 		context.pageAllocated = true;
4824 	} else if (page->Cache() != context.topCache && context.isWrite) {
4825 		// We have a page that has the data we want, but in the wrong cache
4826 		// object so we need to copy it and stick it into the top cache.
4827 		vm_page* sourcePage = page;
4828 
4829 		// TODO: If memory is low, it might be a good idea to steal the page
4830 		// from our source cache -- if possible, that is.
4831 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4832 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4833 
4834 		// To not needlessly kill concurrency we unlock all caches but the top
4835 		// one while copying the page. Lacking another mechanism to ensure that
4836 		// the source page doesn't disappear, we mark it busy.
4837 		sourcePage->busy = true;
4838 		context.cacheChainLocker.UnlockKeepRefs(true);
4839 
4840 		// copy the page
4841 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4842 			sourcePage->physical_page_number * B_PAGE_SIZE);
4843 
4844 		context.cacheChainLocker.RelockCaches(true);
4845 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4846 
4847 		// insert the new page into our cache
4848 		context.topCache->InsertPage(page, context.cacheOffset);
4849 		context.pageAllocated = true;
4850 	} else
4851 		DEBUG_PAGE_ACCESS_START(page);
4852 
4853 	context.page = page;
4854 	return B_OK;
4855 }
4856 
4857 
4858 /*!	Makes sure the address in the given address space is mapped.
4859 
4860 	\param addressSpace The address space.
4861 	\param originalAddress The address. Doesn't need to be page aligned.
4862 	\param isWrite If \c true the address shall be write-accessible.
4863 	\param isUser If \c true the access is requested by a userland team.
4864 	\param wirePage On success, if non \c NULL, the wired count of the page
4865 		mapped at the given address is incremented and the page is returned
4866 		via this parameter.
4867 	\return \c B_OK on success, another error code otherwise.
4868 */
4869 static status_t
4870 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4871 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4872 {
4873 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4874 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4875 		originalAddress, isWrite, isUser));
4876 
4877 	PageFaultContext context(addressSpace, isWrite);
4878 
4879 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4880 	status_t status = B_OK;
4881 
4882 	addressSpace->IncrementFaultCount();
4883 
4884 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4885 	// the pages upfront makes sure we don't have any cache locked, so that the
4886 	// page daemon/thief can do their job without problems.
4887 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4888 		originalAddress);
4889 	context.addressSpaceLocker.Unlock();
4890 	vm_page_reserve_pages(&context.reservation, reservePages,
4891 		addressSpace == VMAddressSpace::Kernel()
4892 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4893 
4894 	while (true) {
4895 		context.addressSpaceLocker.Lock();
4896 
4897 		// get the area the fault was in
4898 		VMArea* area = addressSpace->LookupArea(address);
4899 		if (area == NULL) {
4900 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4901 				"space\n", originalAddress);
4902 			TPF(PageFaultError(-1,
4903 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4904 			status = B_BAD_ADDRESS;
4905 			break;
4906 		}
4907 
4908 		// check permissions
4909 		uint32 protection = get_area_page_protection(area, address);
4910 		if (isUser && (protection & B_USER_PROTECTION) == 0
4911 				&& (area->protection & B_KERNEL_AREA) != 0) {
4912 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4913 				area->id, (void*)originalAddress);
4914 			TPF(PageFaultError(area->id,
4915 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4916 			status = B_PERMISSION_DENIED;
4917 			break;
4918 		}
4919 		if (isWrite && (protection
4920 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4921 			dprintf("write access attempted on write-protected area 0x%"
4922 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4923 			TPF(PageFaultError(area->id,
4924 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4925 			status = B_PERMISSION_DENIED;
4926 			break;
4927 		} else if (isExecute && (protection
4928 				& (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4929 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4930 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4931 			TPF(PageFaultError(area->id,
4932 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4933 			status = B_PERMISSION_DENIED;
4934 			break;
4935 		} else if (!isWrite && !isExecute && (protection
4936 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4937 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4938 				" at %p\n", area->id, (void*)originalAddress);
4939 			TPF(PageFaultError(area->id,
4940 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4941 			status = B_PERMISSION_DENIED;
4942 			break;
4943 		}
4944 
4945 		// We have the area, it was a valid access, so let's try to resolve the
4946 		// page fault now.
4947 		// At first, the top most cache from the area is investigated.
4948 
4949 		context.Prepare(vm_area_get_locked_cache(area),
4950 			address - area->Base() + area->cache_offset);
4951 
4952 		// See if this cache has a fault handler -- this will do all the work
4953 		// for us.
4954 		{
4955 			// Note, since the page fault is resolved with interrupts enabled,
4956 			// the fault handler could be called more than once for the same
4957 			// reason -- the store must take this into account.
4958 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4959 			if (status != B_BAD_HANDLER)
4960 				break;
4961 		}
4962 
4963 		// The top most cache has no fault handler, so let's see if the cache or
4964 		// its sources already have the page we're searching for (we're going
4965 		// from top to bottom).
4966 		status = fault_get_page(context);
4967 		if (status != B_OK) {
4968 			TPF(PageFaultError(area->id, status));
4969 			break;
4970 		}
4971 
4972 		if (context.restart)
4973 			continue;
4974 
4975 		// All went fine, all there is left to do is to map the page into the
4976 		// address space.
4977 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4978 			context.page));
4979 
4980 		// If the page doesn't reside in the area's cache, we need to make sure
4981 		// it's mapped in read-only, so that we cannot overwrite someone else's
4982 		// data (copy-on-write)
4983 		uint32 newProtection = protection;
4984 		if (context.page->Cache() != context.topCache && !isWrite)
4985 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4986 
4987 		bool unmapPage = false;
4988 		bool mapPage = true;
4989 
4990 		// check whether there's already a page mapped at the address
4991 		context.map->Lock();
4992 
4993 		phys_addr_t physicalAddress;
4994 		uint32 flags;
4995 		vm_page* mappedPage = NULL;
4996 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4997 			&& (flags & PAGE_PRESENT) != 0
4998 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4999 				!= NULL) {
5000 			// Yep there's already a page. If it's ours, we can simply adjust
5001 			// its protection. Otherwise we have to unmap it.
5002 			if (mappedPage == context.page) {
5003 				context.map->ProtectPage(area, address, newProtection);
5004 					// Note: We assume that ProtectPage() is atomic (i.e.
5005 					// the page isn't temporarily unmapped), otherwise we'd have
5006 					// to make sure it isn't wired.
5007 				mapPage = false;
5008 			} else
5009 				unmapPage = true;
5010 		}
5011 
5012 		context.map->Unlock();
5013 
5014 		if (unmapPage) {
5015 			// If the page is wired, we can't unmap it. Wait until it is unwired
5016 			// again and restart. Note that the page cannot be wired for
5017 			// writing, since it it isn't in the topmost cache. So we can safely
5018 			// ignore ranges wired for writing (our own and other concurrent
5019 			// wiring attempts in progress) and in fact have to do that to avoid
5020 			// a deadlock.
5021 			VMAreaUnwiredWaiter waiter;
5022 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
5023 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
5024 				// unlock everything and wait
5025 				if (context.pageAllocated) {
5026 					// ... but since we allocated a page and inserted it into
5027 					// the top cache, remove and free it first. Otherwise we'd
5028 					// have a page from a lower cache mapped while an upper
5029 					// cache has a page that would shadow it.
5030 					context.topCache->RemovePage(context.page);
5031 					vm_page_free_etc(context.topCache, context.page,
5032 						&context.reservation);
5033 				} else
5034 					DEBUG_PAGE_ACCESS_END(context.page);
5035 
5036 				context.UnlockAll();
5037 				waiter.waitEntry.Wait();
5038 				continue;
5039 			}
5040 
5041 			// Note: The mapped page is a page of a lower cache. We are
5042 			// guaranteed to have that cached locked, our new page is a copy of
5043 			// that page, and the page is not busy. The logic for that guarantee
5044 			// is as follows: Since the page is mapped, it must live in the top
5045 			// cache (ruled out above) or any of its lower caches, and there is
5046 			// (was before the new page was inserted) no other page in any
5047 			// cache between the top cache and the page's cache (otherwise that
5048 			// would be mapped instead). That in turn means that our algorithm
5049 			// must have found it and therefore it cannot be busy either.
5050 			DEBUG_PAGE_ACCESS_START(mappedPage);
5051 			unmap_page(area, address);
5052 			DEBUG_PAGE_ACCESS_END(mappedPage);
5053 		}
5054 
5055 		if (mapPage) {
5056 			if (map_page(area, context.page, address, newProtection,
5057 					&context.reservation) != B_OK) {
5058 				// Mapping can only fail, when the page mapping object couldn't
5059 				// be allocated. Save for the missing mapping everything is
5060 				// fine, though. If this was a regular page fault, we'll simply
5061 				// leave and probably fault again. To make sure we'll have more
5062 				// luck then, we ensure that the minimum object reserve is
5063 				// available.
5064 				DEBUG_PAGE_ACCESS_END(context.page);
5065 
5066 				context.UnlockAll();
5067 
5068 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
5069 						!= B_OK) {
5070 					// Apparently the situation is serious. Let's get ourselves
5071 					// killed.
5072 					status = B_NO_MEMORY;
5073 				} else if (wirePage != NULL) {
5074 					// The caller expects us to wire the page. Since
5075 					// object_cache_reserve() succeeded, we should now be able
5076 					// to allocate a mapping structure. Restart.
5077 					continue;
5078 				}
5079 
5080 				break;
5081 			}
5082 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
5083 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
5084 
5085 		// also wire the page, if requested
5086 		if (wirePage != NULL && status == B_OK) {
5087 			increment_page_wired_count(context.page);
5088 			*wirePage = context.page;
5089 		}
5090 
5091 		DEBUG_PAGE_ACCESS_END(context.page);
5092 
5093 		break;
5094 	}
5095 
5096 	return status;
5097 }
5098 
5099 
5100 status_t
5101 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
5102 {
5103 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
5104 }
5105 
5106 status_t
5107 vm_put_physical_page(addr_t vaddr, void* handle)
5108 {
5109 	return sPhysicalPageMapper->PutPage(vaddr, handle);
5110 }
5111 
5112 
5113 status_t
5114 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
5115 	void** _handle)
5116 {
5117 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
5118 }
5119 
5120 status_t
5121 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
5122 {
5123 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
5124 }
5125 
5126 
5127 status_t
5128 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
5129 {
5130 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
5131 }
5132 
5133 status_t
5134 vm_put_physical_page_debug(addr_t vaddr, void* handle)
5135 {
5136 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
5137 }
5138 
5139 
5140 void
5141 vm_get_info(system_info* info)
5142 {
5143 	swap_get_info(info);
5144 
5145 	MutexLocker locker(sAvailableMemoryLock);
5146 	info->needed_memory = sNeededMemory;
5147 	info->free_memory = sAvailableMemory;
5148 }
5149 
5150 
5151 uint32
5152 vm_num_page_faults(void)
5153 {
5154 	return sPageFaults;
5155 }
5156 
5157 
5158 off_t
5159 vm_available_memory(void)
5160 {
5161 	MutexLocker locker(sAvailableMemoryLock);
5162 	return sAvailableMemory;
5163 }
5164 
5165 
5166 off_t
5167 vm_available_not_needed_memory(void)
5168 {
5169 	MutexLocker locker(sAvailableMemoryLock);
5170 	return sAvailableMemory - sNeededMemory;
5171 }
5172 
5173 
5174 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
5175 	debugger.
5176 */
5177 off_t
5178 vm_available_not_needed_memory_debug(void)
5179 {
5180 	return sAvailableMemory - sNeededMemory;
5181 }
5182 
5183 
5184 size_t
5185 vm_kernel_address_space_left(void)
5186 {
5187 	return VMAddressSpace::Kernel()->FreeSpace();
5188 }
5189 
5190 
5191 void
5192 vm_unreserve_memory(size_t amount)
5193 {
5194 	mutex_lock(&sAvailableMemoryLock);
5195 
5196 	sAvailableMemory += amount;
5197 
5198 	mutex_unlock(&sAvailableMemoryLock);
5199 }
5200 
5201 
5202 status_t
5203 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
5204 {
5205 	size_t reserve = kMemoryReserveForPriority[priority];
5206 
5207 	MutexLocker locker(sAvailableMemoryLock);
5208 
5209 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5210 
5211 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
5212 		sAvailableMemory -= amount;
5213 		return B_OK;
5214 	}
5215 
5216 	if (amount >= (vm_page_num_pages() * B_PAGE_SIZE)) {
5217 		// Do not wait for something that will never happen.
5218 		return B_NO_MEMORY;
5219 	}
5220 
5221 	if (timeout <= 0)
5222 		return B_NO_MEMORY;
5223 
5224 	// turn timeout into an absolute timeout
5225 	timeout += system_time();
5226 
5227 	// loop until we've got the memory or the timeout occurs
5228 	do {
5229 		sNeededMemory += amount;
5230 
5231 		// call the low resource manager
5232 		locker.Unlock();
5233 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5234 			B_ABSOLUTE_TIMEOUT, timeout);
5235 		locker.Lock();
5236 
5237 		sNeededMemory -= amount;
5238 
5239 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
5240 			sAvailableMemory -= amount;
5241 			return B_OK;
5242 		}
5243 	} while (timeout > system_time());
5244 
5245 	return B_NO_MEMORY;
5246 }
5247 
5248 
5249 status_t
5250 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
5251 {
5252 	// NOTE: The caller is responsible for synchronizing calls to this function!
5253 
5254 	AddressSpaceReadLocker locker;
5255 	VMArea* area;
5256 	status_t status = locker.SetFromArea(id, area);
5257 	if (status != B_OK)
5258 		return status;
5259 
5260 	// nothing to do, if the type doesn't change
5261 	uint32 oldType = area->MemoryType();
5262 	if (type == oldType)
5263 		return B_OK;
5264 
5265 	// set the memory type of the area and the mapped pages
5266 	VMTranslationMap* map = area->address_space->TranslationMap();
5267 	map->Lock();
5268 	area->SetMemoryType(type);
5269 	map->ProtectArea(area, area->protection);
5270 	map->Unlock();
5271 
5272 	// set the physical memory type
5273 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
5274 	if (error != B_OK) {
5275 		// reset the memory type of the area and the mapped pages
5276 		map->Lock();
5277 		area->SetMemoryType(oldType);
5278 		map->ProtectArea(area, area->protection);
5279 		map->Unlock();
5280 		return error;
5281 	}
5282 
5283 	return B_OK;
5284 
5285 }
5286 
5287 
5288 /*!	This function enforces some protection properties:
5289 	 - kernel areas must be W^X (after kernel startup)
5290 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
5291 	 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5292 */
5293 static void
5294 fix_protection(uint32* protection)
5295 {
5296 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
5297 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
5298 			|| (*protection & B_WRITE_AREA) != 0)
5299 		&& !gKernelStartup)
5300 		panic("kernel areas cannot be both writable and executable!");
5301 
5302 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5303 		if ((*protection & B_WRITE_AREA) != 0)
5304 			*protection |= B_KERNEL_WRITE_AREA;
5305 		if ((*protection & B_READ_AREA) != 0)
5306 			*protection |= B_KERNEL_READ_AREA;
5307 	}
5308 }
5309 
5310 
5311 static void
5312 fill_area_info(struct VMArea* area, area_info* info, size_t size)
5313 {
5314 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5315 	info->area = area->id;
5316 	info->address = (void*)area->Base();
5317 	info->size = area->Size();
5318 	info->protection = area->protection;
5319 	info->lock = area->wiring;
5320 	info->team = area->address_space->ID();
5321 	info->copy_count = 0;
5322 	info->in_count = 0;
5323 	info->out_count = 0;
5324 		// TODO: retrieve real values here!
5325 
5326 	VMCache* cache = vm_area_get_locked_cache(area);
5327 
5328 	// Note, this is a simplification; the cache could be larger than this area
5329 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5330 
5331 	vm_area_put_locked_cache(cache);
5332 }
5333 
5334 
5335 static status_t
5336 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5337 {
5338 	// is newSize a multiple of B_PAGE_SIZE?
5339 	if (newSize & (B_PAGE_SIZE - 1))
5340 		return B_BAD_VALUE;
5341 
5342 	// lock all affected address spaces and the cache
5343 	VMArea* area;
5344 	VMCache* cache;
5345 
5346 	MultiAddressSpaceLocker locker;
5347 	AreaCacheLocker cacheLocker;
5348 
5349 	status_t status;
5350 	size_t oldSize;
5351 	bool anyKernelArea;
5352 	bool restart;
5353 
5354 	do {
5355 		anyKernelArea = false;
5356 		restart = false;
5357 
5358 		locker.Unset();
5359 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5360 		if (status != B_OK)
5361 			return status;
5362 		cacheLocker.SetTo(cache, true);	// already locked
5363 
5364 		// enforce restrictions
5365 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
5366 				|| (area->protection & B_KERNEL_AREA) != 0)) {
5367 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5368 				"resize kernel area %" B_PRId32 " (%s)\n",
5369 				team_get_current_team_id(), areaID, area->name);
5370 			return B_NOT_ALLOWED;
5371 		}
5372 		// TODO: Enforce all restrictions (team, etc.)!
5373 
5374 		oldSize = area->Size();
5375 		if (newSize == oldSize)
5376 			return B_OK;
5377 
5378 		if (cache->type != CACHE_TYPE_RAM)
5379 			return B_NOT_ALLOWED;
5380 
5381 		if (oldSize < newSize) {
5382 			// We need to check if all areas of this cache can be resized.
5383 			for (VMArea* current = cache->areas; current != NULL;
5384 					current = current->cache_next) {
5385 				if (!current->address_space->CanResizeArea(current, newSize))
5386 					return B_ERROR;
5387 				anyKernelArea
5388 					|= current->address_space == VMAddressSpace::Kernel();
5389 			}
5390 		} else {
5391 			// We're shrinking the areas, so we must make sure the affected
5392 			// ranges are not wired.
5393 			for (VMArea* current = cache->areas; current != NULL;
5394 					current = current->cache_next) {
5395 				anyKernelArea
5396 					|= current->address_space == VMAddressSpace::Kernel();
5397 
5398 				if (wait_if_area_range_is_wired(current,
5399 						current->Base() + newSize, oldSize - newSize, &locker,
5400 						&cacheLocker)) {
5401 					restart = true;
5402 					break;
5403 				}
5404 			}
5405 		}
5406 	} while (restart);
5407 
5408 	// Okay, looks good so far, so let's do it
5409 
5410 	int priority = kernel && anyKernelArea
5411 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5412 	uint32 allocationFlags = kernel && anyKernelArea
5413 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5414 
5415 	if (oldSize < newSize) {
5416 		// Growing the cache can fail, so we do it first.
5417 		status = cache->Resize(cache->virtual_base + newSize, priority);
5418 		if (status != B_OK)
5419 			return status;
5420 	}
5421 
5422 	for (VMArea* current = cache->areas; current != NULL;
5423 			current = current->cache_next) {
5424 		status = current->address_space->ResizeArea(current, newSize,
5425 			allocationFlags);
5426 		if (status != B_OK)
5427 			break;
5428 
5429 		// We also need to unmap all pages beyond the new size, if the area has
5430 		// shrunk
5431 		if (newSize < oldSize) {
5432 			VMCacheChainLocker cacheChainLocker(cache);
5433 			cacheChainLocker.LockAllSourceCaches();
5434 
5435 			unmap_pages(current, current->Base() + newSize,
5436 				oldSize - newSize);
5437 
5438 			cacheChainLocker.Unlock(cache);
5439 		}
5440 	}
5441 
5442 	if (status == B_OK) {
5443 		// Shrink or grow individual page protections if in use.
5444 		if (area->page_protections != NULL) {
5445 			size_t bytes = area_page_protections_size(newSize);
5446 			uint8* newProtections
5447 				= (uint8*)realloc(area->page_protections, bytes);
5448 			if (newProtections == NULL)
5449 				status = B_NO_MEMORY;
5450 			else {
5451 				area->page_protections = newProtections;
5452 
5453 				if (oldSize < newSize) {
5454 					// init the additional page protections to that of the area
5455 					uint32 offset = area_page_protections_size(oldSize);
5456 					uint32 areaProtection = area->protection
5457 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5458 					memset(area->page_protections + offset,
5459 						areaProtection | (areaProtection << 4), bytes - offset);
5460 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5461 						uint8& entry = area->page_protections[offset - 1];
5462 						entry = (entry & 0x0f) | (areaProtection << 4);
5463 					}
5464 				}
5465 			}
5466 		}
5467 	}
5468 
5469 	// shrinking the cache can't fail, so we do it now
5470 	if (status == B_OK && newSize < oldSize)
5471 		status = cache->Resize(cache->virtual_base + newSize, priority);
5472 
5473 	if (status != B_OK) {
5474 		// Something failed -- resize the areas back to their original size.
5475 		// This can fail, too, in which case we're seriously screwed.
5476 		for (VMArea* current = cache->areas; current != NULL;
5477 				current = current->cache_next) {
5478 			if (current->address_space->ResizeArea(current, oldSize,
5479 					allocationFlags) != B_OK) {
5480 				panic("vm_resize_area(): Failed and not being able to restore "
5481 					"original state.");
5482 			}
5483 		}
5484 
5485 		cache->Resize(cache->virtual_base + oldSize, priority);
5486 	}
5487 
5488 	// TODO: we must honour the lock restrictions of this area
5489 	return status;
5490 }
5491 
5492 
5493 status_t
5494 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5495 {
5496 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5497 }
5498 
5499 
5500 status_t
5501 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5502 {
5503 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5504 }
5505 
5506 
5507 status_t
5508 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5509 	bool user)
5510 {
5511 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5512 }
5513 
5514 
5515 void
5516 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5517 {
5518 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5519 }
5520 
5521 
5522 /*!	Copies a range of memory directly from/to a page that might not be mapped
5523 	at the moment.
5524 
5525 	For \a unsafeMemory the current mapping (if any is ignored). The function
5526 	walks through the respective area's cache chain to find the physical page
5527 	and copies from/to it directly.
5528 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5529 	must not cross a page boundary.
5530 
5531 	\param teamID The team ID identifying the address space \a unsafeMemory is
5532 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5533 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5534 		is passed, the address space of the thread returned by
5535 		debug_get_debugged_thread() is used.
5536 	\param unsafeMemory The start of the unsafe memory range to be copied
5537 		from/to.
5538 	\param buffer A safely accessible kernel buffer to be copied from/to.
5539 	\param size The number of bytes to be copied.
5540 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5541 		\a unsafeMemory, the other way around otherwise.
5542 */
5543 status_t
5544 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5545 	size_t size, bool copyToUnsafe)
5546 {
5547 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5548 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5549 		return B_BAD_VALUE;
5550 	}
5551 
5552 	// get the address space for the debugged thread
5553 	VMAddressSpace* addressSpace;
5554 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5555 		addressSpace = VMAddressSpace::Kernel();
5556 	} else if (teamID == B_CURRENT_TEAM) {
5557 		Thread* thread = debug_get_debugged_thread();
5558 		if (thread == NULL || thread->team == NULL)
5559 			return B_BAD_ADDRESS;
5560 
5561 		addressSpace = thread->team->address_space;
5562 	} else
5563 		addressSpace = VMAddressSpace::DebugGet(teamID);
5564 
5565 	if (addressSpace == NULL)
5566 		return B_BAD_ADDRESS;
5567 
5568 	// get the area
5569 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5570 	if (area == NULL)
5571 		return B_BAD_ADDRESS;
5572 
5573 	// search the page
5574 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5575 		+ area->cache_offset;
5576 	VMCache* cache = area->cache;
5577 	vm_page* page = NULL;
5578 	while (cache != NULL) {
5579 		page = cache->DebugLookupPage(cacheOffset);
5580 		if (page != NULL)
5581 			break;
5582 
5583 		// Page not found in this cache -- if it is paged out, we must not try
5584 		// to get it from lower caches.
5585 		if (cache->DebugHasPage(cacheOffset))
5586 			break;
5587 
5588 		cache = cache->source;
5589 	}
5590 
5591 	if (page == NULL)
5592 		return B_UNSUPPORTED;
5593 
5594 	// copy from/to physical memory
5595 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5596 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5597 
5598 	if (copyToUnsafe) {
5599 		if (page->Cache() != area->cache)
5600 			return B_UNSUPPORTED;
5601 
5602 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5603 	}
5604 
5605 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5606 }
5607 
5608 
5609 /** Validate that a memory range is either fully in kernel space, or fully in
5610  *  userspace */
5611 static inline bool
5612 validate_memory_range(const void* addr, size_t size)
5613 {
5614 	addr_t address = (addr_t)addr;
5615 
5616 	// Check for overflows on all addresses.
5617 	if ((address + size) < address)
5618 		return false;
5619 
5620 	// Validate that the address range does not cross the kernel/user boundary.
5621 	return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1);
5622 }
5623 
5624 
5625 //	#pragma mark - kernel public API
5626 
5627 
5628 status_t
5629 user_memcpy(void* to, const void* from, size_t size)
5630 {
5631 	if (!validate_memory_range(to, size) || !validate_memory_range(from, size))
5632 		return B_BAD_ADDRESS;
5633 
5634 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5635 		return B_BAD_ADDRESS;
5636 
5637 	return B_OK;
5638 }
5639 
5640 
5641 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5642 	the string in \a to, NULL-terminating the result.
5643 
5644 	\param to Pointer to the destination C-string.
5645 	\param from Pointer to the source C-string.
5646 	\param size Size in bytes of the string buffer pointed to by \a to.
5647 
5648 	\return strlen(\a from).
5649 */
5650 ssize_t
5651 user_strlcpy(char* to, const char* from, size_t size)
5652 {
5653 	if (to == NULL && size != 0)
5654 		return B_BAD_VALUE;
5655 	if (from == NULL)
5656 		return B_BAD_ADDRESS;
5657 
5658 	// Protect the source address from overflows.
5659 	size_t maxSize = size;
5660 	if ((addr_t)from + maxSize < (addr_t)from)
5661 		maxSize -= (addr_t)from + maxSize;
5662 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5663 		maxSize = USER_TOP - (addr_t)from;
5664 
5665 	if (!validate_memory_range(to, maxSize))
5666 		return B_BAD_ADDRESS;
5667 
5668 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5669 	if (result < 0)
5670 		return result;
5671 
5672 	// If we hit the address overflow boundary, fail.
5673 	if ((size_t)result >= maxSize && maxSize < size)
5674 		return B_BAD_ADDRESS;
5675 
5676 	return result;
5677 }
5678 
5679 
5680 status_t
5681 user_memset(void* s, char c, size_t count)
5682 {
5683 	if (!validate_memory_range(s, count))
5684 		return B_BAD_ADDRESS;
5685 
5686 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5687 		return B_BAD_ADDRESS;
5688 
5689 	return B_OK;
5690 }
5691 
5692 
5693 /*!	Wires a single page at the given address.
5694 
5695 	\param team The team whose address space the address belongs to. Supports
5696 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5697 		parameter is ignored.
5698 	\param address address The virtual address to wire down. Does not need to
5699 		be page aligned.
5700 	\param writable If \c true the page shall be writable.
5701 	\param info On success the info is filled in, among other things
5702 		containing the physical address the given virtual one translates to.
5703 	\return \c B_OK, when the page could be wired, another error code otherwise.
5704 */
5705 status_t
5706 vm_wire_page(team_id team, addr_t address, bool writable,
5707 	VMPageWiringInfo* info)
5708 {
5709 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5710 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5711 
5712 	// compute the page protection that is required
5713 	bool isUser = IS_USER_ADDRESS(address);
5714 	uint32 requiredProtection = PAGE_PRESENT
5715 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5716 	if (writable)
5717 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5718 
5719 	// get and read lock the address space
5720 	VMAddressSpace* addressSpace = NULL;
5721 	if (isUser) {
5722 		if (team == B_CURRENT_TEAM)
5723 			addressSpace = VMAddressSpace::GetCurrent();
5724 		else
5725 			addressSpace = VMAddressSpace::Get(team);
5726 	} else
5727 		addressSpace = VMAddressSpace::GetKernel();
5728 	if (addressSpace == NULL)
5729 		return B_ERROR;
5730 
5731 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5732 
5733 	VMTranslationMap* map = addressSpace->TranslationMap();
5734 	status_t error = B_OK;
5735 
5736 	// get the area
5737 	VMArea* area = addressSpace->LookupArea(pageAddress);
5738 	if (area == NULL) {
5739 		addressSpace->Put();
5740 		return B_BAD_ADDRESS;
5741 	}
5742 
5743 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5744 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5745 
5746 	// mark the area range wired
5747 	area->Wire(&info->range);
5748 
5749 	// Lock the area's cache chain and the translation map. Needed to look
5750 	// up the page and play with its wired count.
5751 	cacheChainLocker.LockAllSourceCaches();
5752 	map->Lock();
5753 
5754 	phys_addr_t physicalAddress;
5755 	uint32 flags;
5756 	vm_page* page;
5757 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5758 		&& (flags & requiredProtection) == requiredProtection
5759 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5760 			!= NULL) {
5761 		// Already mapped with the correct permissions -- just increment
5762 		// the page's wired count.
5763 		increment_page_wired_count(page);
5764 
5765 		map->Unlock();
5766 		cacheChainLocker.Unlock();
5767 		addressSpaceLocker.Unlock();
5768 	} else {
5769 		// Let vm_soft_fault() map the page for us, if possible. We need
5770 		// to fully unlock to avoid deadlocks. Since we have already
5771 		// wired the area itself, nothing disturbing will happen with it
5772 		// in the meantime.
5773 		map->Unlock();
5774 		cacheChainLocker.Unlock();
5775 		addressSpaceLocker.Unlock();
5776 
5777 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5778 			isUser, &page);
5779 
5780 		if (error != B_OK) {
5781 			// The page could not be mapped -- clean up.
5782 			VMCache* cache = vm_area_get_locked_cache(area);
5783 			area->Unwire(&info->range);
5784 			cache->ReleaseRefAndUnlock();
5785 			addressSpace->Put();
5786 			return error;
5787 		}
5788 	}
5789 
5790 	info->physicalAddress
5791 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5792 			+ address % B_PAGE_SIZE;
5793 	info->page = page;
5794 
5795 	return B_OK;
5796 }
5797 
5798 
5799 /*!	Unwires a single page previously wired via vm_wire_page().
5800 
5801 	\param info The same object passed to vm_wire_page() before.
5802 */
5803 void
5804 vm_unwire_page(VMPageWiringInfo* info)
5805 {
5806 	// lock the address space
5807 	VMArea* area = info->range.area;
5808 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5809 		// takes over our reference
5810 
5811 	// lock the top cache
5812 	VMCache* cache = vm_area_get_locked_cache(area);
5813 	VMCacheChainLocker cacheChainLocker(cache);
5814 
5815 	if (info->page->Cache() != cache) {
5816 		// The page is not in the top cache, so we lock the whole cache chain
5817 		// before touching the page's wired count.
5818 		cacheChainLocker.LockAllSourceCaches();
5819 	}
5820 
5821 	decrement_page_wired_count(info->page);
5822 
5823 	// remove the wired range from the range
5824 	area->Unwire(&info->range);
5825 
5826 	cacheChainLocker.Unlock();
5827 }
5828 
5829 
5830 /*!	Wires down the given address range in the specified team's address space.
5831 
5832 	If successful the function
5833 	- acquires a reference to the specified team's address space,
5834 	- adds respective wired ranges to all areas that intersect with the given
5835 	  address range,
5836 	- makes sure all pages in the given address range are mapped with the
5837 	  requested access permissions and increments their wired count.
5838 
5839 	It fails, when \a team doesn't specify a valid address space, when any part
5840 	of the specified address range is not covered by areas, when the concerned
5841 	areas don't allow mapping with the requested permissions, or when mapping
5842 	failed for another reason.
5843 
5844 	When successful the call must be balanced by a unlock_memory_etc() call with
5845 	the exact same parameters.
5846 
5847 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5848 		supported.
5849 	\param address The start of the address range to be wired.
5850 	\param numBytes The size of the address range to be wired.
5851 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5852 		requests that the range must be wired writable ("read from device
5853 		into memory").
5854 	\return \c B_OK on success, another error code otherwise.
5855 */
5856 status_t
5857 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5858 {
5859 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5860 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5861 
5862 	// compute the page protection that is required
5863 	bool isUser = IS_USER_ADDRESS(address);
5864 	bool writable = (flags & B_READ_DEVICE) == 0;
5865 	uint32 requiredProtection = PAGE_PRESENT
5866 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5867 	if (writable)
5868 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5869 
5870 	uint32 mallocFlags = isUser
5871 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5872 
5873 	// get and read lock the address space
5874 	VMAddressSpace* addressSpace = NULL;
5875 	if (isUser) {
5876 		if (team == B_CURRENT_TEAM)
5877 			addressSpace = VMAddressSpace::GetCurrent();
5878 		else
5879 			addressSpace = VMAddressSpace::Get(team);
5880 	} else
5881 		addressSpace = VMAddressSpace::GetKernel();
5882 	if (addressSpace == NULL)
5883 		return B_ERROR;
5884 
5885 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5886 		// We get a new address space reference here. The one we got above will
5887 		// be freed by unlock_memory_etc().
5888 
5889 	VMTranslationMap* map = addressSpace->TranslationMap();
5890 	status_t error = B_OK;
5891 
5892 	// iterate through all concerned areas
5893 	addr_t nextAddress = lockBaseAddress;
5894 	while (nextAddress != lockEndAddress) {
5895 		// get the next area
5896 		VMArea* area = addressSpace->LookupArea(nextAddress);
5897 		if (area == NULL) {
5898 			error = B_BAD_ADDRESS;
5899 			break;
5900 		}
5901 
5902 		addr_t areaStart = nextAddress;
5903 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5904 
5905 		// allocate the wired range (do that before locking the cache to avoid
5906 		// deadlocks)
5907 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5908 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5909 		if (range == NULL) {
5910 			error = B_NO_MEMORY;
5911 			break;
5912 		}
5913 
5914 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5915 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5916 
5917 		// mark the area range wired
5918 		area->Wire(range);
5919 
5920 		// Depending on the area cache type and the wiring, we may not need to
5921 		// look at the individual pages.
5922 		if (area->cache_type == CACHE_TYPE_NULL
5923 			|| area->cache_type == CACHE_TYPE_DEVICE
5924 			|| area->wiring == B_FULL_LOCK
5925 			|| area->wiring == B_CONTIGUOUS) {
5926 			nextAddress = areaEnd;
5927 			continue;
5928 		}
5929 
5930 		// Lock the area's cache chain and the translation map. Needed to look
5931 		// up pages and play with their wired count.
5932 		cacheChainLocker.LockAllSourceCaches();
5933 		map->Lock();
5934 
5935 		// iterate through the pages and wire them
5936 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5937 			phys_addr_t physicalAddress;
5938 			uint32 flags;
5939 
5940 			vm_page* page;
5941 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5942 				&& (flags & requiredProtection) == requiredProtection
5943 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5944 					!= NULL) {
5945 				// Already mapped with the correct permissions -- just increment
5946 				// the page's wired count.
5947 				increment_page_wired_count(page);
5948 			} else {
5949 				// Let vm_soft_fault() map the page for us, if possible. We need
5950 				// to fully unlock to avoid deadlocks. Since we have already
5951 				// wired the area itself, nothing disturbing will happen with it
5952 				// in the meantime.
5953 				map->Unlock();
5954 				cacheChainLocker.Unlock();
5955 				addressSpaceLocker.Unlock();
5956 
5957 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5958 					false, isUser, &page);
5959 
5960 				addressSpaceLocker.Lock();
5961 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5962 				cacheChainLocker.LockAllSourceCaches();
5963 				map->Lock();
5964 			}
5965 
5966 			if (error != B_OK)
5967 				break;
5968 		}
5969 
5970 		map->Unlock();
5971 
5972 		if (error == B_OK) {
5973 			cacheChainLocker.Unlock();
5974 		} else {
5975 			// An error occurred, so abort right here. If the current address
5976 			// is the first in this area, unwire the area, since we won't get
5977 			// to it when reverting what we've done so far.
5978 			if (nextAddress == areaStart) {
5979 				area->Unwire(range);
5980 				cacheChainLocker.Unlock();
5981 				range->~VMAreaWiredRange();
5982 				free_etc(range, mallocFlags);
5983 			} else
5984 				cacheChainLocker.Unlock();
5985 
5986 			break;
5987 		}
5988 	}
5989 
5990 	if (error != B_OK) {
5991 		// An error occurred, so unwire all that we've already wired. Note that
5992 		// even if not a single page was wired, unlock_memory_etc() is called
5993 		// to put the address space reference.
5994 		addressSpaceLocker.Unlock();
5995 		unlock_memory_etc(team, (void*)lockBaseAddress,
5996 			nextAddress - lockBaseAddress, flags);
5997 	}
5998 
5999 	return error;
6000 }
6001 
6002 
6003 status_t
6004 lock_memory(void* address, size_t numBytes, uint32 flags)
6005 {
6006 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
6007 }
6008 
6009 
6010 /*!	Unwires an address range previously wired with lock_memory_etc().
6011 
6012 	Note that a call to this function must balance a previous lock_memory_etc()
6013 	call with exactly the same parameters.
6014 */
6015 status_t
6016 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
6017 {
6018 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
6019 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
6020 
6021 	// compute the page protection that is required
6022 	bool isUser = IS_USER_ADDRESS(address);
6023 	bool writable = (flags & B_READ_DEVICE) == 0;
6024 	uint32 requiredProtection = PAGE_PRESENT
6025 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
6026 	if (writable)
6027 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
6028 
6029 	uint32 mallocFlags = isUser
6030 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
6031 
6032 	// get and read lock the address space
6033 	VMAddressSpace* addressSpace = NULL;
6034 	if (isUser) {
6035 		if (team == B_CURRENT_TEAM)
6036 			addressSpace = VMAddressSpace::GetCurrent();
6037 		else
6038 			addressSpace = VMAddressSpace::Get(team);
6039 	} else
6040 		addressSpace = VMAddressSpace::GetKernel();
6041 	if (addressSpace == NULL)
6042 		return B_ERROR;
6043 
6044 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
6045 		// Take over the address space reference. We don't unlock until we're
6046 		// done.
6047 
6048 	VMTranslationMap* map = addressSpace->TranslationMap();
6049 	status_t error = B_OK;
6050 
6051 	// iterate through all concerned areas
6052 	addr_t nextAddress = lockBaseAddress;
6053 	while (nextAddress != lockEndAddress) {
6054 		// get the next area
6055 		VMArea* area = addressSpace->LookupArea(nextAddress);
6056 		if (area == NULL) {
6057 			error = B_BAD_ADDRESS;
6058 			break;
6059 		}
6060 
6061 		addr_t areaStart = nextAddress;
6062 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
6063 
6064 		// Lock the area's top cache. This is a requirement for
6065 		// VMArea::Unwire().
6066 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
6067 
6068 		// Depending on the area cache type and the wiring, we may not need to
6069 		// look at the individual pages.
6070 		if (area->cache_type == CACHE_TYPE_NULL
6071 			|| area->cache_type == CACHE_TYPE_DEVICE
6072 			|| area->wiring == B_FULL_LOCK
6073 			|| area->wiring == B_CONTIGUOUS) {
6074 			// unwire the range (to avoid deadlocks we delete the range after
6075 			// unlocking the cache)
6076 			nextAddress = areaEnd;
6077 			VMAreaWiredRange* range = area->Unwire(areaStart,
6078 				areaEnd - areaStart, writable);
6079 			cacheChainLocker.Unlock();
6080 			if (range != NULL) {
6081 				range->~VMAreaWiredRange();
6082 				free_etc(range, mallocFlags);
6083 			}
6084 			continue;
6085 		}
6086 
6087 		// Lock the area's cache chain and the translation map. Needed to look
6088 		// up pages and play with their wired count.
6089 		cacheChainLocker.LockAllSourceCaches();
6090 		map->Lock();
6091 
6092 		// iterate through the pages and unwire them
6093 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
6094 			phys_addr_t physicalAddress;
6095 			uint32 flags;
6096 
6097 			vm_page* page;
6098 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
6099 				&& (flags & PAGE_PRESENT) != 0
6100 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
6101 					!= NULL) {
6102 				// Already mapped with the correct permissions -- just increment
6103 				// the page's wired count.
6104 				decrement_page_wired_count(page);
6105 			} else {
6106 				panic("unlock_memory_etc(): Failed to unwire page: address "
6107 					"space %p, address: %#" B_PRIxADDR, addressSpace,
6108 					nextAddress);
6109 				error = B_BAD_VALUE;
6110 				break;
6111 			}
6112 		}
6113 
6114 		map->Unlock();
6115 
6116 		// All pages are unwired. Remove the area's wired range as well (to
6117 		// avoid deadlocks we delete the range after unlocking the cache).
6118 		VMAreaWiredRange* range = area->Unwire(areaStart,
6119 			areaEnd - areaStart, writable);
6120 
6121 		cacheChainLocker.Unlock();
6122 
6123 		if (range != NULL) {
6124 			range->~VMAreaWiredRange();
6125 			free_etc(range, mallocFlags);
6126 		}
6127 
6128 		if (error != B_OK)
6129 			break;
6130 	}
6131 
6132 	// get rid of the address space reference lock_memory_etc() acquired
6133 	addressSpace->Put();
6134 
6135 	return error;
6136 }
6137 
6138 
6139 status_t
6140 unlock_memory(void* address, size_t numBytes, uint32 flags)
6141 {
6142 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
6143 }
6144 
6145 
6146 /*!	Similar to get_memory_map(), but also allows to specify the address space
6147 	for the memory in question and has a saner semantics.
6148 	Returns \c B_OK when the complete range could be translated or
6149 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
6150 	case the actual number of entries is written to \c *_numEntries. Any other
6151 	error case indicates complete failure; \c *_numEntries will be set to \c 0
6152 	in this case.
6153 */
6154 status_t
6155 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
6156 	physical_entry* table, uint32* _numEntries)
6157 {
6158 	uint32 numEntries = *_numEntries;
6159 	*_numEntries = 0;
6160 
6161 	VMAddressSpace* addressSpace;
6162 	addr_t virtualAddress = (addr_t)address;
6163 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
6164 	phys_addr_t physicalAddress;
6165 	status_t status = B_OK;
6166 	int32 index = -1;
6167 	addr_t offset = 0;
6168 	bool interrupts = are_interrupts_enabled();
6169 
6170 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
6171 		"entries)\n", team, address, numBytes, numEntries));
6172 
6173 	if (numEntries == 0 || numBytes == 0)
6174 		return B_BAD_VALUE;
6175 
6176 	// in which address space is the address to be found?
6177 	if (IS_USER_ADDRESS(virtualAddress)) {
6178 		if (team == B_CURRENT_TEAM)
6179 			addressSpace = VMAddressSpace::GetCurrent();
6180 		else
6181 			addressSpace = VMAddressSpace::Get(team);
6182 	} else
6183 		addressSpace = VMAddressSpace::GetKernel();
6184 
6185 	if (addressSpace == NULL)
6186 		return B_ERROR;
6187 
6188 	VMTranslationMap* map = addressSpace->TranslationMap();
6189 
6190 	if (interrupts)
6191 		map->Lock();
6192 
6193 	while (offset < numBytes) {
6194 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
6195 		uint32 flags;
6196 
6197 		if (interrupts) {
6198 			status = map->Query((addr_t)address + offset, &physicalAddress,
6199 				&flags);
6200 		} else {
6201 			status = map->QueryInterrupt((addr_t)address + offset,
6202 				&physicalAddress, &flags);
6203 		}
6204 		if (status < B_OK)
6205 			break;
6206 		if ((flags & PAGE_PRESENT) == 0) {
6207 			panic("get_memory_map() called on unmapped memory!");
6208 			return B_BAD_ADDRESS;
6209 		}
6210 
6211 		if (index < 0 && pageOffset > 0) {
6212 			physicalAddress += pageOffset;
6213 			if (bytes > B_PAGE_SIZE - pageOffset)
6214 				bytes = B_PAGE_SIZE - pageOffset;
6215 		}
6216 
6217 		// need to switch to the next physical_entry?
6218 		if (index < 0 || table[index].address
6219 				!= physicalAddress - table[index].size) {
6220 			if ((uint32)++index + 1 > numEntries) {
6221 				// table to small
6222 				break;
6223 			}
6224 			table[index].address = physicalAddress;
6225 			table[index].size = bytes;
6226 		} else {
6227 			// page does fit in current entry
6228 			table[index].size += bytes;
6229 		}
6230 
6231 		offset += bytes;
6232 	}
6233 
6234 	if (interrupts)
6235 		map->Unlock();
6236 
6237 	if (status != B_OK)
6238 		return status;
6239 
6240 	if ((uint32)index + 1 > numEntries) {
6241 		*_numEntries = index;
6242 		return B_BUFFER_OVERFLOW;
6243 	}
6244 
6245 	*_numEntries = index + 1;
6246 	return B_OK;
6247 }
6248 
6249 
6250 /*!	According to the BeBook, this function should always succeed.
6251 	This is no longer the case.
6252 */
6253 extern "C" int32
6254 __get_memory_map_haiku(const void* address, size_t numBytes,
6255 	physical_entry* table, int32 numEntries)
6256 {
6257 	uint32 entriesRead = numEntries;
6258 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
6259 		table, &entriesRead);
6260 	if (error != B_OK)
6261 		return error;
6262 
6263 	// close the entry list
6264 
6265 	// if it's only one entry, we will silently accept the missing ending
6266 	if (numEntries == 1)
6267 		return B_OK;
6268 
6269 	if (entriesRead + 1 > (uint32)numEntries)
6270 		return B_BUFFER_OVERFLOW;
6271 
6272 	table[entriesRead].address = 0;
6273 	table[entriesRead].size = 0;
6274 
6275 	return B_OK;
6276 }
6277 
6278 
6279 area_id
6280 area_for(void* address)
6281 {
6282 	return vm_area_for((addr_t)address, true);
6283 }
6284 
6285 
6286 area_id
6287 find_area(const char* name)
6288 {
6289 	return VMAreas::Find(name);
6290 }
6291 
6292 
6293 status_t
6294 _get_area_info(area_id id, area_info* info, size_t size)
6295 {
6296 	if (size != sizeof(area_info) || info == NULL)
6297 		return B_BAD_VALUE;
6298 
6299 	AddressSpaceReadLocker locker;
6300 	VMArea* area;
6301 	status_t status = locker.SetFromArea(id, area);
6302 	if (status != B_OK)
6303 		return status;
6304 
6305 	fill_area_info(area, info, size);
6306 	return B_OK;
6307 }
6308 
6309 
6310 status_t
6311 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
6312 {
6313 	addr_t nextBase = *(addr_t*)cookie;
6314 
6315 	// we're already through the list
6316 	if (nextBase == (addr_t)-1)
6317 		return B_ENTRY_NOT_FOUND;
6318 
6319 	if (team == B_CURRENT_TEAM)
6320 		team = team_get_current_team_id();
6321 
6322 	AddressSpaceReadLocker locker(team);
6323 	if (!locker.IsLocked())
6324 		return B_BAD_TEAM_ID;
6325 
6326 	VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
6327 	if (area == NULL) {
6328 		nextBase = (addr_t)-1;
6329 		return B_ENTRY_NOT_FOUND;
6330 	}
6331 
6332 	fill_area_info(area, info, size);
6333 	*cookie = (ssize_t)(area->Base() + 1);
6334 
6335 	return B_OK;
6336 }
6337 
6338 
6339 status_t
6340 set_area_protection(area_id area, uint32 newProtection)
6341 {
6342 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6343 		newProtection, true);
6344 }
6345 
6346 
6347 status_t
6348 resize_area(area_id areaID, size_t newSize)
6349 {
6350 	return vm_resize_area(areaID, newSize, true);
6351 }
6352 
6353 
6354 /*!	Transfers the specified area to a new team. The caller must be the owner
6355 	of the area.
6356 */
6357 area_id
6358 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6359 	bool kernel)
6360 {
6361 	area_info info;
6362 	status_t status = get_area_info(id, &info);
6363 	if (status != B_OK)
6364 		return status;
6365 
6366 	if (info.team != thread_get_current_thread()->team->id)
6367 		return B_PERMISSION_DENIED;
6368 
6369 	// We need to mark the area cloneable so the following operations work.
6370 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
6371 	if (status != B_OK)
6372 		return status;
6373 
6374 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6375 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6376 	if (clonedArea < 0)
6377 		return clonedArea;
6378 
6379 	status = vm_delete_area(info.team, id, kernel);
6380 	if (status != B_OK) {
6381 		vm_delete_area(target, clonedArea, kernel);
6382 		return status;
6383 	}
6384 
6385 	// Now we can reset the protection to whatever it was before.
6386 	set_area_protection(clonedArea, info.protection);
6387 
6388 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6389 
6390 	return clonedArea;
6391 }
6392 
6393 
6394 extern "C" area_id
6395 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6396 	size_t numBytes, uint32 addressSpec, uint32 protection,
6397 	void** _virtualAddress)
6398 {
6399 	if (!arch_vm_supports_protection(protection))
6400 		return B_NOT_SUPPORTED;
6401 
6402 	fix_protection(&protection);
6403 
6404 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6405 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6406 		false);
6407 }
6408 
6409 
6410 area_id
6411 clone_area(const char* name, void** _address, uint32 addressSpec,
6412 	uint32 protection, area_id source)
6413 {
6414 	if ((protection & B_KERNEL_PROTECTION) == 0)
6415 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6416 
6417 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6418 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6419 }
6420 
6421 
6422 area_id
6423 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6424 	uint32 protection, uint32 flags, uint32 guardSize,
6425 	const virtual_address_restrictions* virtualAddressRestrictions,
6426 	const physical_address_restrictions* physicalAddressRestrictions,
6427 	void** _address)
6428 {
6429 	fix_protection(&protection);
6430 
6431 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6432 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6433 		true, _address);
6434 }
6435 
6436 
6437 extern "C" area_id
6438 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6439 	size_t size, uint32 lock, uint32 protection)
6440 {
6441 	fix_protection(&protection);
6442 
6443 	virtual_address_restrictions virtualRestrictions = {};
6444 	virtualRestrictions.address = *_address;
6445 	virtualRestrictions.address_specification = addressSpec;
6446 	physical_address_restrictions physicalRestrictions = {};
6447 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6448 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6449 		true, _address);
6450 }
6451 
6452 
6453 status_t
6454 delete_area(area_id area)
6455 {
6456 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6457 }
6458 
6459 
6460 //	#pragma mark - Userland syscalls
6461 
6462 
6463 status_t
6464 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6465 	addr_t size)
6466 {
6467 	// filter out some unavailable values (for userland)
6468 	switch (addressSpec) {
6469 		case B_ANY_KERNEL_ADDRESS:
6470 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6471 			return B_BAD_VALUE;
6472 	}
6473 
6474 	addr_t address;
6475 
6476 	if (!IS_USER_ADDRESS(userAddress)
6477 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6478 		return B_BAD_ADDRESS;
6479 
6480 	status_t status = vm_reserve_address_range(
6481 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6482 		RESERVED_AVOID_BASE);
6483 	if (status != B_OK)
6484 		return status;
6485 
6486 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6487 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6488 			(void*)address, size);
6489 		return B_BAD_ADDRESS;
6490 	}
6491 
6492 	return B_OK;
6493 }
6494 
6495 
6496 status_t
6497 _user_unreserve_address_range(addr_t address, addr_t size)
6498 {
6499 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6500 		(void*)address, size);
6501 }
6502 
6503 
6504 area_id
6505 _user_area_for(void* address)
6506 {
6507 	return vm_area_for((addr_t)address, false);
6508 }
6509 
6510 
6511 area_id
6512 _user_find_area(const char* userName)
6513 {
6514 	char name[B_OS_NAME_LENGTH];
6515 
6516 	if (!IS_USER_ADDRESS(userName)
6517 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6518 		return B_BAD_ADDRESS;
6519 
6520 	return find_area(name);
6521 }
6522 
6523 
6524 status_t
6525 _user_get_area_info(area_id area, area_info* userInfo)
6526 {
6527 	if (!IS_USER_ADDRESS(userInfo))
6528 		return B_BAD_ADDRESS;
6529 
6530 	area_info info;
6531 	status_t status = get_area_info(area, &info);
6532 	if (status < B_OK)
6533 		return status;
6534 
6535 	// TODO: do we want to prevent userland from seeing kernel protections?
6536 	//info.protection &= B_USER_PROTECTION;
6537 
6538 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6539 		return B_BAD_ADDRESS;
6540 
6541 	return status;
6542 }
6543 
6544 
6545 status_t
6546 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6547 {
6548 	ssize_t cookie;
6549 
6550 	if (!IS_USER_ADDRESS(userCookie)
6551 		|| !IS_USER_ADDRESS(userInfo)
6552 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6553 		return B_BAD_ADDRESS;
6554 
6555 	area_info info;
6556 	status_t status = _get_next_area_info(team, &cookie, &info,
6557 		sizeof(area_info));
6558 	if (status != B_OK)
6559 		return status;
6560 
6561 	//info.protection &= B_USER_PROTECTION;
6562 
6563 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6564 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6565 		return B_BAD_ADDRESS;
6566 
6567 	return status;
6568 }
6569 
6570 
6571 status_t
6572 _user_set_area_protection(area_id area, uint32 newProtection)
6573 {
6574 	if ((newProtection & ~(B_USER_PROTECTION | B_CLONEABLE_AREA)) != 0)
6575 		return B_BAD_VALUE;
6576 
6577 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6578 		newProtection, false);
6579 }
6580 
6581 
6582 status_t
6583 _user_resize_area(area_id area, size_t newSize)
6584 {
6585 	// TODO: Since we restrict deleting of areas to those owned by the team,
6586 	// we should also do that for resizing (check other functions, too).
6587 	return vm_resize_area(area, newSize, false);
6588 }
6589 
6590 
6591 area_id
6592 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6593 	team_id target)
6594 {
6595 	// filter out some unavailable values (for userland)
6596 	switch (addressSpec) {
6597 		case B_ANY_KERNEL_ADDRESS:
6598 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6599 			return B_BAD_VALUE;
6600 	}
6601 
6602 	void* address;
6603 	if (!IS_USER_ADDRESS(userAddress)
6604 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6605 		return B_BAD_ADDRESS;
6606 
6607 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6608 	if (newArea < B_OK)
6609 		return newArea;
6610 
6611 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6612 		return B_BAD_ADDRESS;
6613 
6614 	return newArea;
6615 }
6616 
6617 
6618 area_id
6619 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6620 	uint32 protection, area_id sourceArea)
6621 {
6622 	char name[B_OS_NAME_LENGTH];
6623 	void* address;
6624 
6625 	// filter out some unavailable values (for userland)
6626 	switch (addressSpec) {
6627 		case B_ANY_KERNEL_ADDRESS:
6628 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6629 			return B_BAD_VALUE;
6630 	}
6631 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6632 		return B_BAD_VALUE;
6633 
6634 	if (!IS_USER_ADDRESS(userName)
6635 		|| !IS_USER_ADDRESS(userAddress)
6636 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6637 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6638 		return B_BAD_ADDRESS;
6639 
6640 	fix_protection(&protection);
6641 
6642 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6643 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6644 		false);
6645 	if (clonedArea < B_OK)
6646 		return clonedArea;
6647 
6648 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6649 		delete_area(clonedArea);
6650 		return B_BAD_ADDRESS;
6651 	}
6652 
6653 	return clonedArea;
6654 }
6655 
6656 
6657 area_id
6658 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6659 	size_t size, uint32 lock, uint32 protection)
6660 {
6661 	char name[B_OS_NAME_LENGTH];
6662 	void* address;
6663 
6664 	// filter out some unavailable values (for userland)
6665 	switch (addressSpec) {
6666 		case B_ANY_KERNEL_ADDRESS:
6667 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6668 			return B_BAD_VALUE;
6669 	}
6670 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6671 		return B_BAD_VALUE;
6672 
6673 	if (!IS_USER_ADDRESS(userName)
6674 		|| !IS_USER_ADDRESS(userAddress)
6675 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6676 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6677 		return B_BAD_ADDRESS;
6678 
6679 	if (addressSpec == B_EXACT_ADDRESS
6680 		&& IS_KERNEL_ADDRESS(address))
6681 		return B_BAD_VALUE;
6682 
6683 	if (addressSpec == B_ANY_ADDRESS)
6684 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6685 	if (addressSpec == B_BASE_ADDRESS)
6686 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6687 
6688 	fix_protection(&protection);
6689 
6690 	virtual_address_restrictions virtualRestrictions = {};
6691 	virtualRestrictions.address = address;
6692 	virtualRestrictions.address_specification = addressSpec;
6693 	physical_address_restrictions physicalRestrictions = {};
6694 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6695 		size, lock, protection, 0, 0, &virtualRestrictions,
6696 		&physicalRestrictions, false, &address);
6697 
6698 	if (area >= B_OK
6699 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6700 		delete_area(area);
6701 		return B_BAD_ADDRESS;
6702 	}
6703 
6704 	return area;
6705 }
6706 
6707 
6708 status_t
6709 _user_delete_area(area_id area)
6710 {
6711 	// Unlike the BeOS implementation, you can now only delete areas
6712 	// that you have created yourself from userland.
6713 	// The documentation to delete_area() explicitly states that this
6714 	// will be restricted in the future, and so it will.
6715 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6716 }
6717 
6718 
6719 // TODO: create a BeOS style call for this!
6720 
6721 area_id
6722 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6723 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6724 	int fd, off_t offset)
6725 {
6726 	char name[B_OS_NAME_LENGTH];
6727 	void* address;
6728 	area_id area;
6729 
6730 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6731 		return B_BAD_VALUE;
6732 
6733 	fix_protection(&protection);
6734 
6735 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6736 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6737 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6738 		return B_BAD_ADDRESS;
6739 
6740 	if (addressSpec == B_EXACT_ADDRESS) {
6741 		if ((addr_t)address + size < (addr_t)address
6742 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6743 			return B_BAD_VALUE;
6744 		}
6745 		if (!IS_USER_ADDRESS(address)
6746 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6747 			return B_BAD_ADDRESS;
6748 		}
6749 	}
6750 
6751 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6752 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6753 		false);
6754 	if (area < B_OK)
6755 		return area;
6756 
6757 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6758 		return B_BAD_ADDRESS;
6759 
6760 	return area;
6761 }
6762 
6763 
6764 status_t
6765 _user_unmap_memory(void* _address, size_t size)
6766 {
6767 	addr_t address = (addr_t)_address;
6768 
6769 	// check params
6770 	if (size == 0 || (addr_t)address + size < (addr_t)address
6771 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6772 		return B_BAD_VALUE;
6773 	}
6774 
6775 	if (!IS_USER_ADDRESS(address)
6776 		|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6777 		return B_BAD_ADDRESS;
6778 	}
6779 
6780 	// Write lock the address space and ensure the address range is not wired.
6781 	AddressSpaceWriteLocker locker;
6782 	do {
6783 		status_t status = locker.SetTo(team_get_current_team_id());
6784 		if (status != B_OK)
6785 			return status;
6786 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6787 			size, &locker));
6788 
6789 	// unmap
6790 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6791 }
6792 
6793 
6794 status_t
6795 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6796 {
6797 	// check address range
6798 	addr_t address = (addr_t)_address;
6799 	size = PAGE_ALIGN(size);
6800 
6801 	if ((address % B_PAGE_SIZE) != 0)
6802 		return B_BAD_VALUE;
6803 	if (!is_user_address_range(_address, size)) {
6804 		// weird error code required by POSIX
6805 		return ENOMEM;
6806 	}
6807 
6808 	// extend and check protection
6809 	if ((protection & ~B_USER_PROTECTION) != 0)
6810 		return B_BAD_VALUE;
6811 
6812 	fix_protection(&protection);
6813 
6814 	// We need to write lock the address space, since we're going to play with
6815 	// the areas. Also make sure that none of the areas is wired and that we're
6816 	// actually allowed to change the protection.
6817 	AddressSpaceWriteLocker locker;
6818 
6819 	bool restart;
6820 	do {
6821 		restart = false;
6822 
6823 		status_t status = locker.SetTo(team_get_current_team_id());
6824 		if (status != B_OK)
6825 			return status;
6826 
6827 		// First round: Check whether the whole range is covered by areas and we
6828 		// are allowed to modify them.
6829 		addr_t currentAddress = address;
6830 		size_t sizeLeft = size;
6831 		while (sizeLeft > 0) {
6832 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6833 			if (area == NULL)
6834 				return B_NO_MEMORY;
6835 
6836 			if ((area->protection & B_KERNEL_AREA) != 0)
6837 				return B_NOT_ALLOWED;
6838 			if (area->protection_max != 0
6839 				&& (protection & area->protection_max) != (protection & B_USER_PROTECTION)) {
6840 				return B_NOT_ALLOWED;
6841 			}
6842 
6843 			addr_t offset = currentAddress - area->Base();
6844 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6845 
6846 			AreaCacheLocker cacheLocker(area);
6847 
6848 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6849 					&locker, &cacheLocker)) {
6850 				restart = true;
6851 				break;
6852 			}
6853 
6854 			cacheLocker.Unlock();
6855 
6856 			currentAddress += rangeSize;
6857 			sizeLeft -= rangeSize;
6858 		}
6859 	} while (restart);
6860 
6861 	// Second round: If the protections differ from that of the area, create a
6862 	// page protection array and re-map mapped pages.
6863 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6864 	addr_t currentAddress = address;
6865 	size_t sizeLeft = size;
6866 	while (sizeLeft > 0) {
6867 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6868 		if (area == NULL)
6869 			return B_NO_MEMORY;
6870 
6871 		addr_t offset = currentAddress - area->Base();
6872 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6873 
6874 		currentAddress += rangeSize;
6875 		sizeLeft -= rangeSize;
6876 
6877 		if (area->page_protections == NULL) {
6878 			if (area->protection == protection)
6879 				continue;
6880 			if (offset == 0 && rangeSize == area->Size()) {
6881 				// The whole area is covered: let set_area_protection handle it.
6882 				status_t status = vm_set_area_protection(area->address_space->ID(),
6883 					area->id, protection, false);
6884 				if (status != B_OK)
6885 					return status;
6886 				continue;
6887 			}
6888 
6889 			status_t status = allocate_area_page_protections(area);
6890 			if (status != B_OK)
6891 				return status;
6892 		}
6893 
6894 		// We need to lock the complete cache chain, since we potentially unmap
6895 		// pages of lower caches.
6896 		VMCache* topCache = vm_area_get_locked_cache(area);
6897 		VMCacheChainLocker cacheChainLocker(topCache);
6898 		cacheChainLocker.LockAllSourceCaches();
6899 
6900 		// Adjust the committed size, if necessary.
6901 		if (topCache->source != NULL && topCache->temporary) {
6902 			const bool becomesWritable = (protection & B_WRITE_AREA) != 0;
6903 			ssize_t commitmentChange = 0;
6904 			for (addr_t pageAddress = area->Base() + offset;
6905 					pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6906 				if (topCache->LookupPage(pageAddress) != NULL) {
6907 					// This page should already be accounted for in the commitment.
6908 					continue;
6909 				}
6910 
6911 				const bool isWritable
6912 					= (get_area_page_protection(area, pageAddress) & B_WRITE_AREA) != 0;
6913 
6914 				if (becomesWritable && !isWritable)
6915 					commitmentChange += B_PAGE_SIZE;
6916 				else if (!becomesWritable && isWritable)
6917 					commitmentChange -= B_PAGE_SIZE;
6918 			}
6919 
6920 			if (commitmentChange != 0) {
6921 				const off_t newCommitment = topCache->committed_size + commitmentChange;
6922 				ASSERT(newCommitment <= (topCache->virtual_end - topCache->virtual_base));
6923 				status_t status = topCache->Commit(newCommitment, VM_PRIORITY_USER);
6924 				if (status != B_OK)
6925 					return status;
6926 			}
6927 		}
6928 
6929 		for (addr_t pageAddress = area->Base() + offset;
6930 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6931 			map->Lock();
6932 
6933 			set_area_page_protection(area, pageAddress, protection);
6934 
6935 			phys_addr_t physicalAddress;
6936 			uint32 flags;
6937 
6938 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6939 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6940 				map->Unlock();
6941 				continue;
6942 			}
6943 
6944 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6945 			if (page == NULL) {
6946 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6947 					"\n", area, physicalAddress);
6948 				map->Unlock();
6949 				return B_ERROR;
6950 			}
6951 
6952 			// If the page is not in the topmost cache and write access is
6953 			// requested, we have to unmap it. Otherwise we can re-map it with
6954 			// the new protection.
6955 			bool unmapPage = page->Cache() != topCache
6956 				&& (protection & B_WRITE_AREA) != 0;
6957 
6958 			if (!unmapPage)
6959 				map->ProtectPage(area, pageAddress, protection);
6960 
6961 			map->Unlock();
6962 
6963 			if (unmapPage) {
6964 				DEBUG_PAGE_ACCESS_START(page);
6965 				unmap_page(area, pageAddress);
6966 				DEBUG_PAGE_ACCESS_END(page);
6967 			}
6968 		}
6969 	}
6970 
6971 	return B_OK;
6972 }
6973 
6974 
6975 status_t
6976 _user_sync_memory(void* _address, size_t size, uint32 flags)
6977 {
6978 	addr_t address = (addr_t)_address;
6979 	size = PAGE_ALIGN(size);
6980 
6981 	// check params
6982 	if ((address % B_PAGE_SIZE) != 0)
6983 		return B_BAD_VALUE;
6984 	if (!is_user_address_range(_address, size)) {
6985 		// weird error code required by POSIX
6986 		return ENOMEM;
6987 	}
6988 
6989 	bool writeSync = (flags & MS_SYNC) != 0;
6990 	bool writeAsync = (flags & MS_ASYNC) != 0;
6991 	if (writeSync && writeAsync)
6992 		return B_BAD_VALUE;
6993 
6994 	if (size == 0 || (!writeSync && !writeAsync))
6995 		return B_OK;
6996 
6997 	// iterate through the range and sync all concerned areas
6998 	while (size > 0) {
6999 		// read lock the address space
7000 		AddressSpaceReadLocker locker;
7001 		status_t error = locker.SetTo(team_get_current_team_id());
7002 		if (error != B_OK)
7003 			return error;
7004 
7005 		// get the first area
7006 		VMArea* area = locker.AddressSpace()->LookupArea(address);
7007 		if (area == NULL)
7008 			return B_NO_MEMORY;
7009 
7010 		uint32 offset = address - area->Base();
7011 		size_t rangeSize = min_c(area->Size() - offset, size);
7012 		offset += area->cache_offset;
7013 
7014 		// lock the cache
7015 		AreaCacheLocker cacheLocker(area);
7016 		if (!cacheLocker)
7017 			return B_BAD_VALUE;
7018 		VMCache* cache = area->cache;
7019 
7020 		locker.Unlock();
7021 
7022 		uint32 firstPage = offset >> PAGE_SHIFT;
7023 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
7024 
7025 		// write the pages
7026 		if (cache->type == CACHE_TYPE_VNODE) {
7027 			if (writeSync) {
7028 				// synchronous
7029 				error = vm_page_write_modified_page_range(cache, firstPage,
7030 					endPage);
7031 				if (error != B_OK)
7032 					return error;
7033 			} else {
7034 				// asynchronous
7035 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
7036 				// TODO: This is probably not quite what is supposed to happen.
7037 				// Especially when a lot has to be written, it might take ages
7038 				// until it really hits the disk.
7039 			}
7040 		}
7041 
7042 		address += rangeSize;
7043 		size -= rangeSize;
7044 	}
7045 
7046 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
7047 	// synchronize multiple mappings of the same file. In our VM they never get
7048 	// out of sync, though, so we don't have to do anything.
7049 
7050 	return B_OK;
7051 }
7052 
7053 
7054 status_t
7055 _user_memory_advice(void* _address, size_t size, uint32 advice)
7056 {
7057 	addr_t address = (addr_t)_address;
7058 	if ((address % B_PAGE_SIZE) != 0)
7059 		return B_BAD_VALUE;
7060 
7061 	size = PAGE_ALIGN(size);
7062 	if (!is_user_address_range(_address, size)) {
7063 		// weird error code required by POSIX
7064 		return B_NO_MEMORY;
7065 	}
7066 
7067 	switch (advice) {
7068 		case MADV_NORMAL:
7069 		case MADV_SEQUENTIAL:
7070 		case MADV_RANDOM:
7071 		case MADV_WILLNEED:
7072 		case MADV_DONTNEED:
7073 			// TODO: Implement!
7074 			break;
7075 
7076 		case MADV_FREE:
7077 		{
7078 			AddressSpaceWriteLocker locker;
7079 			do {
7080 				status_t status = locker.SetTo(team_get_current_team_id());
7081 				if (status != B_OK)
7082 					return status;
7083 			} while (wait_if_address_range_is_wired(locker.AddressSpace(),
7084 					address, size, &locker));
7085 
7086 			discard_address_range(locker.AddressSpace(), address, size, false);
7087 			break;
7088 		}
7089 
7090 		default:
7091 			return B_BAD_VALUE;
7092 	}
7093 
7094 	return B_OK;
7095 }
7096 
7097 
7098 status_t
7099 _user_get_memory_properties(team_id teamID, const void* address,
7100 	uint32* _protected, uint32* _lock)
7101 {
7102 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
7103 		return B_BAD_ADDRESS;
7104 
7105 	AddressSpaceReadLocker locker;
7106 	status_t error = locker.SetTo(teamID);
7107 	if (error != B_OK)
7108 		return error;
7109 
7110 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
7111 	if (area == NULL)
7112 		return B_NO_MEMORY;
7113 
7114 	uint32 protection = get_area_page_protection(area, (addr_t)address);
7115 	uint32 wiring = area->wiring;
7116 
7117 	locker.Unlock();
7118 
7119 	error = user_memcpy(_protected, &protection, sizeof(protection));
7120 	if (error != B_OK)
7121 		return error;
7122 
7123 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
7124 
7125 	return error;
7126 }
7127 
7128 
7129 static status_t
7130 user_set_memory_swappable(const void* _address, size_t size, bool swappable)
7131 {
7132 #if ENABLE_SWAP_SUPPORT
7133 	// check address range
7134 	addr_t address = (addr_t)_address;
7135 	size = PAGE_ALIGN(size);
7136 
7137 	if ((address % B_PAGE_SIZE) != 0)
7138 		return EINVAL;
7139 	if (!is_user_address_range(_address, size))
7140 		return EINVAL;
7141 
7142 	const addr_t endAddress = address + size;
7143 
7144 	AddressSpaceReadLocker addressSpaceLocker;
7145 	status_t error = addressSpaceLocker.SetTo(team_get_current_team_id());
7146 	if (error != B_OK)
7147 		return error;
7148 	VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace();
7149 
7150 	// iterate through all concerned areas
7151 	addr_t nextAddress = address;
7152 	while (nextAddress != endAddress) {
7153 		// get the next area
7154 		VMArea* area = addressSpace->LookupArea(nextAddress);
7155 		if (area == NULL) {
7156 			error = B_BAD_ADDRESS;
7157 			break;
7158 		}
7159 
7160 		const addr_t areaStart = nextAddress;
7161 		const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size());
7162 		nextAddress = areaEnd;
7163 
7164 		error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
7165 		if (error != B_OK) {
7166 			// We don't need to unset or reset things on failure.
7167 			break;
7168 		}
7169 
7170 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
7171 		VMAnonymousCache* anonCache = NULL;
7172 		if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) {
7173 			// This memory will aready never be swapped. Nothing to do.
7174 		} else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) {
7175 			error = anonCache->SetCanSwapPages(areaStart - area->Base(),
7176 				areaEnd - areaStart, swappable);
7177 		} else {
7178 			// Some other cache type? We cannot affect anything here.
7179 			error = EINVAL;
7180 		}
7181 
7182 		cacheChainLocker.Unlock();
7183 
7184 		unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
7185 		if (error != B_OK)
7186 			break;
7187 	}
7188 
7189 	return error;
7190 #else
7191 	// No swap support? Nothing to do.
7192 	return B_OK;
7193 #endif
7194 }
7195 
7196 
7197 status_t
7198 _user_mlock(const void* _address, size_t size)
7199 {
7200 	return user_set_memory_swappable(_address, size, false);
7201 }
7202 
7203 
7204 status_t
7205 _user_munlock(const void* _address, size_t size)
7206 {
7207 	// TODO: B_SHARED_AREAs need to be handled a bit differently:
7208 	// if multiple clones of an area had mlock() called on them,
7209 	// munlock() must also be called on all of them to actually unlock.
7210 	// (At present, the first munlock() will unlock all.)
7211 	// TODO: fork() should automatically unlock memory in the child.
7212 	return user_set_memory_swappable(_address, size, true);
7213 }
7214 
7215 
7216 // #pragma mark -- compatibility
7217 
7218 
7219 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7220 
7221 
7222 struct physical_entry_beos {
7223 	uint32	address;
7224 	uint32	size;
7225 };
7226 
7227 
7228 /*!	The physical_entry structure has changed. We need to translate it to the
7229 	old one.
7230 */
7231 extern "C" int32
7232 __get_memory_map_beos(const void* _address, size_t numBytes,
7233 	physical_entry_beos* table, int32 numEntries)
7234 {
7235 	if (numEntries <= 0)
7236 		return B_BAD_VALUE;
7237 
7238 	const uint8* address = (const uint8*)_address;
7239 
7240 	int32 count = 0;
7241 	while (numBytes > 0 && count < numEntries) {
7242 		physical_entry entry;
7243 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
7244 		if (result < 0) {
7245 			if (result != B_BUFFER_OVERFLOW)
7246 				return result;
7247 		}
7248 
7249 		if (entry.address >= (phys_addr_t)1 << 32) {
7250 			panic("get_memory_map(): Address is greater 4 GB!");
7251 			return B_ERROR;
7252 		}
7253 
7254 		table[count].address = entry.address;
7255 		table[count++].size = entry.size;
7256 
7257 		address += entry.size;
7258 		numBytes -= entry.size;
7259 	}
7260 
7261 	// null-terminate the table, if possible
7262 	if (count < numEntries) {
7263 		table[count].address = 0;
7264 		table[count].size = 0;
7265 	}
7266 
7267 	return B_OK;
7268 }
7269 
7270 
7271 /*!	The type of the \a physicalAddress parameter has changed from void* to
7272 	phys_addr_t.
7273 */
7274 extern "C" area_id
7275 __map_physical_memory_beos(const char* name, void* physicalAddress,
7276 	size_t numBytes, uint32 addressSpec, uint32 protection,
7277 	void** _virtualAddress)
7278 {
7279 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
7280 		addressSpec, protection, _virtualAddress);
7281 }
7282 
7283 
7284 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
7285 	we meddle with the \a lock parameter to force 32 bit.
7286 */
7287 extern "C" area_id
7288 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
7289 	size_t size, uint32 lock, uint32 protection)
7290 {
7291 	switch (lock) {
7292 		case B_NO_LOCK:
7293 			break;
7294 		case B_FULL_LOCK:
7295 		case B_LAZY_LOCK:
7296 			lock = B_32_BIT_FULL_LOCK;
7297 			break;
7298 		case B_CONTIGUOUS:
7299 			lock = B_32_BIT_CONTIGUOUS;
7300 			break;
7301 	}
7302 
7303 	return __create_area_haiku(name, _address, addressSpec, size, lock,
7304 		protection);
7305 }
7306 
7307 
7308 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
7309 	"BASE");
7310 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
7311 	"map_physical_memory@", "BASE");
7312 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
7313 	"BASE");
7314 
7315 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7316 	"get_memory_map@@", "1_ALPHA3");
7317 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7318 	"map_physical_memory@@", "1_ALPHA3");
7319 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7320 	"1_ALPHA3");
7321 
7322 
7323 #else
7324 
7325 
7326 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7327 	"get_memory_map@@", "BASE");
7328 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7329 	"map_physical_memory@@", "BASE");
7330 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7331 	"BASE");
7332 
7333 
7334 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7335