xref: /haiku/src/system/kernel/vm/vm.cpp (revision 909af08f4328301fbdef1ffb41f566c3b5bec0c7)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleterDrivers.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <util/BitUtils.h>
51 #include <util/ThreadAutoLock.h>
52 #include <vm/vm_page.h>
53 #include <vm/vm_priv.h>
54 #include <vm/VMAddressSpace.h>
55 #include <vm/VMArea.h>
56 #include <vm/VMCache.h>
57 
58 #include "VMAddressSpaceLocking.h"
59 #include "VMAnonymousCache.h"
60 #include "VMAnonymousNoSwapCache.h"
61 #include "IORequest.h"
62 
63 
64 //#define TRACE_VM
65 //#define TRACE_FAULTS
66 #ifdef TRACE_VM
67 #	define TRACE(x) dprintf x
68 #else
69 #	define TRACE(x) ;
70 #endif
71 #ifdef TRACE_FAULTS
72 #	define FTRACE(x) dprintf x
73 #else
74 #	define FTRACE(x) ;
75 #endif
76 
77 
78 namespace {
79 
80 class AreaCacheLocking {
81 public:
82 	inline bool Lock(VMCache* lockable)
83 	{
84 		return false;
85 	}
86 
87 	inline void Unlock(VMCache* lockable)
88 	{
89 		vm_area_put_locked_cache(lockable);
90 	}
91 };
92 
93 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
94 public:
95 	inline AreaCacheLocker(VMCache* cache = NULL)
96 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
97 	{
98 	}
99 
100 	inline AreaCacheLocker(VMArea* area)
101 		: AutoLocker<VMCache, AreaCacheLocking>()
102 	{
103 		SetTo(area);
104 	}
105 
106 	inline void SetTo(VMCache* cache, bool alreadyLocked)
107 	{
108 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
109 	}
110 
111 	inline void SetTo(VMArea* area)
112 	{
113 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
114 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
115 	}
116 };
117 
118 
119 class VMCacheChainLocker {
120 public:
121 	VMCacheChainLocker()
122 		:
123 		fTopCache(NULL),
124 		fBottomCache(NULL)
125 	{
126 	}
127 
128 	VMCacheChainLocker(VMCache* topCache)
129 		:
130 		fTopCache(topCache),
131 		fBottomCache(topCache)
132 	{
133 	}
134 
135 	~VMCacheChainLocker()
136 	{
137 		Unlock();
138 	}
139 
140 	void SetTo(VMCache* topCache)
141 	{
142 		fTopCache = topCache;
143 		fBottomCache = topCache;
144 
145 		if (topCache != NULL)
146 			topCache->SetUserData(NULL);
147 	}
148 
149 	VMCache* LockSourceCache()
150 	{
151 		if (fBottomCache == NULL || fBottomCache->source == NULL)
152 			return NULL;
153 
154 		VMCache* previousCache = fBottomCache;
155 
156 		fBottomCache = fBottomCache->source;
157 		fBottomCache->Lock();
158 		fBottomCache->AcquireRefLocked();
159 		fBottomCache->SetUserData(previousCache);
160 
161 		return fBottomCache;
162 	}
163 
164 	void LockAllSourceCaches()
165 	{
166 		while (LockSourceCache() != NULL) {
167 		}
168 	}
169 
170 	void Unlock(VMCache* exceptCache = NULL)
171 	{
172 		if (fTopCache == NULL)
173 			return;
174 
175 		// Unlock caches in source -> consumer direction. This is important to
176 		// avoid double-locking and a reversal of locking order in case a cache
177 		// is eligable for merging.
178 		VMCache* cache = fBottomCache;
179 		while (cache != NULL) {
180 			VMCache* nextCache = (VMCache*)cache->UserData();
181 			if (cache != exceptCache)
182 				cache->ReleaseRefAndUnlock(cache != fTopCache);
183 
184 			if (cache == fTopCache)
185 				break;
186 
187 			cache = nextCache;
188 		}
189 
190 		fTopCache = NULL;
191 		fBottomCache = NULL;
192 	}
193 
194 	void UnlockKeepRefs(bool keepTopCacheLocked)
195 	{
196 		if (fTopCache == NULL)
197 			return;
198 
199 		VMCache* nextCache = fBottomCache;
200 		VMCache* cache = NULL;
201 
202 		while (keepTopCacheLocked
203 				? nextCache != fTopCache : cache != fTopCache) {
204 			cache = nextCache;
205 			nextCache = (VMCache*)cache->UserData();
206 			cache->Unlock(cache != fTopCache);
207 		}
208 	}
209 
210 	void RelockCaches(bool topCacheLocked)
211 	{
212 		if (fTopCache == NULL)
213 			return;
214 
215 		VMCache* nextCache = fTopCache;
216 		VMCache* cache = NULL;
217 		if (topCacheLocked) {
218 			cache = nextCache;
219 			nextCache = cache->source;
220 		}
221 
222 		while (cache != fBottomCache && nextCache != NULL) {
223 			VMCache* consumer = cache;
224 			cache = nextCache;
225 			nextCache = cache->source;
226 			cache->Lock();
227 			cache->SetUserData(consumer);
228 		}
229 	}
230 
231 private:
232 	VMCache*	fTopCache;
233 	VMCache*	fBottomCache;
234 };
235 
236 } // namespace
237 
238 
239 // The memory reserve an allocation of the certain priority must not touch.
240 static const size_t kMemoryReserveForPriority[] = {
241 	VM_MEMORY_RESERVE_USER,		// user
242 	VM_MEMORY_RESERVE_SYSTEM,	// system
243 	0							// VIP
244 };
245 
246 
247 static ObjectCache** sPageMappingsObjectCaches;
248 static uint32 sPageMappingsMask;
249 
250 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
251 
252 static off_t sAvailableMemory;
253 static off_t sNeededMemory;
254 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
255 static uint32 sPageFaults;
256 
257 static VMPhysicalPageMapper* sPhysicalPageMapper;
258 
259 #if DEBUG_CACHE_LIST
260 
261 struct cache_info {
262 	VMCache*	cache;
263 	addr_t		page_count;
264 	addr_t		committed;
265 };
266 
267 static const int kCacheInfoTableCount = 100 * 1024;
268 static cache_info* sCacheInfoTable;
269 
270 #endif	// DEBUG_CACHE_LIST
271 
272 
273 // function declarations
274 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
275 	bool addressSpaceCleanup);
276 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
277 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
278 static status_t map_backing_store(VMAddressSpace* addressSpace,
279 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
280 	int protection, int protectionMax, int mapping, uint32 flags,
281 	const virtual_address_restrictions* addressRestrictions, bool kernel,
282 	VMArea** _area, void** _virtualAddress);
283 static void fix_protection(uint32* protection);
284 
285 
286 //	#pragma mark -
287 
288 
289 #if VM_PAGE_FAULT_TRACING
290 
291 namespace VMPageFaultTracing {
292 
293 class PageFaultStart : public AbstractTraceEntry {
294 public:
295 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
296 		:
297 		fAddress(address),
298 		fPC(pc),
299 		fWrite(write),
300 		fUser(user)
301 	{
302 		Initialized();
303 	}
304 
305 	virtual void AddDump(TraceOutput& out)
306 	{
307 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
308 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
309 	}
310 
311 private:
312 	addr_t	fAddress;
313 	addr_t	fPC;
314 	bool	fWrite;
315 	bool	fUser;
316 };
317 
318 
319 // page fault errors
320 enum {
321 	PAGE_FAULT_ERROR_NO_AREA		= 0,
322 	PAGE_FAULT_ERROR_KERNEL_ONLY,
323 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
324 	PAGE_FAULT_ERROR_READ_PROTECTED,
325 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
326 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
327 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
328 };
329 
330 
331 class PageFaultError : public AbstractTraceEntry {
332 public:
333 	PageFaultError(area_id area, status_t error)
334 		:
335 		fArea(area),
336 		fError(error)
337 	{
338 		Initialized();
339 	}
340 
341 	virtual void AddDump(TraceOutput& out)
342 	{
343 		switch (fError) {
344 			case PAGE_FAULT_ERROR_NO_AREA:
345 				out.Print("page fault error: no area");
346 				break;
347 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
348 				out.Print("page fault error: area: %ld, kernel only", fArea);
349 				break;
350 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
351 				out.Print("page fault error: area: %ld, write protected",
352 					fArea);
353 				break;
354 			case PAGE_FAULT_ERROR_READ_PROTECTED:
355 				out.Print("page fault error: area: %ld, read protected", fArea);
356 				break;
357 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
358 				out.Print("page fault error: area: %ld, execute protected",
359 					fArea);
360 				break;
361 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
362 				out.Print("page fault error: kernel touching bad user memory");
363 				break;
364 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
365 				out.Print("page fault error: no address space");
366 				break;
367 			default:
368 				out.Print("page fault error: area: %ld, error: %s", fArea,
369 					strerror(fError));
370 				break;
371 		}
372 	}
373 
374 private:
375 	area_id		fArea;
376 	status_t	fError;
377 };
378 
379 
380 class PageFaultDone : public AbstractTraceEntry {
381 public:
382 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
383 			vm_page* page)
384 		:
385 		fArea(area),
386 		fTopCache(topCache),
387 		fCache(cache),
388 		fPage(page)
389 	{
390 		Initialized();
391 	}
392 
393 	virtual void AddDump(TraceOutput& out)
394 	{
395 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
396 			"page: %p", fArea, fTopCache, fCache, fPage);
397 	}
398 
399 private:
400 	area_id		fArea;
401 	VMCache*	fTopCache;
402 	VMCache*	fCache;
403 	vm_page*	fPage;
404 };
405 
406 }	// namespace VMPageFaultTracing
407 
408 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
409 #else
410 #	define TPF(x) ;
411 #endif	// VM_PAGE_FAULT_TRACING
412 
413 
414 //	#pragma mark - page mappings allocation
415 
416 
417 static void
418 create_page_mappings_object_caches()
419 {
420 	// We want an even power of 2 smaller than the number of CPUs.
421 	const int32 numCPUs = smp_get_num_cpus();
422 	int32 count = next_power_of_2(numCPUs);
423 	if (count > numCPUs)
424 		count >>= 1;
425 	sPageMappingsMask = count - 1;
426 
427 	sPageMappingsObjectCaches = new object_cache*[count];
428 	if (sPageMappingsObjectCaches == NULL)
429 		panic("failed to allocate page mappings object_cache array");
430 
431 	for (int32 i = 0; i < count; i++) {
432 		char name[32];
433 		snprintf(name, sizeof(name), "page mappings %" B_PRId32, i);
434 
435 		object_cache* cache = create_object_cache_etc(name,
436 			sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
437 			NULL, NULL);
438 		if (cache == NULL)
439 			panic("failed to create page mappings object_cache");
440 
441 		object_cache_set_minimum_reserve(cache, 1024);
442 		sPageMappingsObjectCaches[i] = cache;
443 	}
444 }
445 
446 
447 static object_cache*
448 page_mapping_object_cache_for(page_num_t page)
449 {
450 	return sPageMappingsObjectCaches[page & sPageMappingsMask];
451 }
452 
453 
454 static vm_page_mapping*
455 allocate_page_mapping(page_num_t page, uint32 flags = 0)
456 {
457 	return (vm_page_mapping*)object_cache_alloc(page_mapping_object_cache_for(page),
458 		flags);
459 }
460 
461 
462 void
463 vm_free_page_mapping(page_num_t page, vm_page_mapping* mapping, uint32 flags)
464 {
465 	object_cache_free(page_mapping_object_cache_for(page), mapping, flags);
466 }
467 
468 
469 //	#pragma mark -
470 
471 
472 /*!	The page's cache must be locked.
473 */
474 static inline void
475 increment_page_wired_count(vm_page* page)
476 {
477 	if (!page->IsMapped())
478 		atomic_add(&gMappedPagesCount, 1);
479 	page->IncrementWiredCount();
480 }
481 
482 
483 /*!	The page's cache must be locked.
484 */
485 static inline void
486 decrement_page_wired_count(vm_page* page)
487 {
488 	page->DecrementWiredCount();
489 	if (!page->IsMapped())
490 		atomic_add(&gMappedPagesCount, -1);
491 }
492 
493 
494 static inline addr_t
495 virtual_page_address(VMArea* area, vm_page* page)
496 {
497 	return area->Base()
498 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
499 }
500 
501 
502 static inline bool
503 is_page_in_area(VMArea* area, vm_page* page)
504 {
505 	off_t pageCacheOffsetBytes = (off_t)(page->cache_offset << PAGE_SHIFT);
506 	return pageCacheOffsetBytes >= area->cache_offset
507 		&& pageCacheOffsetBytes < area->cache_offset + (off_t)area->Size();
508 }
509 
510 
511 //! You need to have the address space locked when calling this function
512 static VMArea*
513 lookup_area(VMAddressSpace* addressSpace, area_id id)
514 {
515 	VMAreas::ReadLock();
516 
517 	VMArea* area = VMAreas::LookupLocked(id);
518 	if (area != NULL && area->address_space != addressSpace)
519 		area = NULL;
520 
521 	VMAreas::ReadUnlock();
522 
523 	return area;
524 }
525 
526 
527 static inline size_t
528 area_page_protections_size(size_t areaSize)
529 {
530 	// In the page protections we store only the three user protections,
531 	// so we use 4 bits per page.
532 	return (areaSize / B_PAGE_SIZE + 1) / 2;
533 }
534 
535 
536 static status_t
537 allocate_area_page_protections(VMArea* area)
538 {
539 	size_t bytes = area_page_protections_size(area->Size());
540 	area->page_protections = (uint8*)malloc_etc(bytes,
541 		area->address_space == VMAddressSpace::Kernel()
542 			? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
543 	if (area->page_protections == NULL)
544 		return B_NO_MEMORY;
545 
546 	// init the page protections for all pages to that of the area
547 	uint32 areaProtection = area->protection
548 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
549 	memset(area->page_protections, areaProtection | (areaProtection << 4),
550 		bytes);
551 	return B_OK;
552 }
553 
554 
555 static inline void
556 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
557 {
558 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
559 	addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
560 	uint8& entry = area->page_protections[pageIndex / 2];
561 	if (pageIndex % 2 == 0)
562 		entry = (entry & 0xf0) | protection;
563 	else
564 		entry = (entry & 0x0f) | (protection << 4);
565 }
566 
567 
568 static inline uint32
569 get_area_page_protection(VMArea* area, addr_t pageAddress)
570 {
571 	if (area->page_protections == NULL)
572 		return area->protection;
573 
574 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
575 	uint32 protection = area->page_protections[pageIndex / 2];
576 	if (pageIndex % 2 == 0)
577 		protection &= 0x0f;
578 	else
579 		protection >>= 4;
580 
581 	uint32 kernelProtection = 0;
582 	if ((protection & B_READ_AREA) != 0)
583 		kernelProtection |= B_KERNEL_READ_AREA;
584 	if ((protection & B_WRITE_AREA) != 0)
585 		kernelProtection |= B_KERNEL_WRITE_AREA;
586 
587 	// If this is a kernel area we return only the kernel flags.
588 	if (area->address_space == VMAddressSpace::Kernel())
589 		return kernelProtection;
590 
591 	return protection | kernelProtection;
592 }
593 
594 
595 static inline uint8*
596 realloc_page_protections(uint8* pageProtections, size_t areaSize,
597 	uint32 allocationFlags)
598 {
599 	size_t bytes = area_page_protections_size(areaSize);
600 	return (uint8*)realloc_etc(pageProtections, bytes, allocationFlags);
601 }
602 
603 
604 /*!	The caller must have reserved enough pages the translation map
605 	implementation might need to map this page.
606 	The page's cache must be locked.
607 */
608 static status_t
609 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
610 	vm_page_reservation* reservation)
611 {
612 	VMTranslationMap* map = area->address_space->TranslationMap();
613 
614 	bool wasMapped = page->IsMapped();
615 
616 	if (area->wiring == B_NO_LOCK) {
617 		DEBUG_PAGE_ACCESS_CHECK(page);
618 
619 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
620 		vm_page_mapping* mapping = allocate_page_mapping(page->physical_page_number,
621 			CACHE_DONT_WAIT_FOR_MEMORY
622 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
623 		if (mapping == NULL)
624 			return B_NO_MEMORY;
625 
626 		mapping->page = page;
627 		mapping->area = area;
628 
629 		map->Lock();
630 
631 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
632 			area->MemoryType(), reservation);
633 
634 		// insert mapping into lists
635 		if (!page->IsMapped())
636 			atomic_add(&gMappedPagesCount, 1);
637 
638 		page->mappings.Add(mapping);
639 		area->mappings.Add(mapping);
640 
641 		map->Unlock();
642 	} else {
643 		DEBUG_PAGE_ACCESS_CHECK(page);
644 
645 		map->Lock();
646 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
647 			area->MemoryType(), reservation);
648 		map->Unlock();
649 
650 		increment_page_wired_count(page);
651 	}
652 
653 	if (!wasMapped) {
654 		// The page is mapped now, so we must not remain in the cached queue.
655 		// It also makes sense to move it from the inactive to the active, since
656 		// otherwise the page daemon wouldn't come to keep track of it (in idle
657 		// mode) -- if the page isn't touched, it will be deactivated after a
658 		// full iteration through the queue at the latest.
659 		if (page->State() == PAGE_STATE_CACHED
660 				|| page->State() == PAGE_STATE_INACTIVE) {
661 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
662 		}
663 	}
664 
665 	return B_OK;
666 }
667 
668 
669 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
670 	page's cache.
671 */
672 static inline bool
673 unmap_page(VMArea* area, addr_t virtualAddress)
674 {
675 	return area->address_space->TranslationMap()->UnmapPage(area,
676 		virtualAddress, true);
677 }
678 
679 
680 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
681 	mapped pages' caches.
682 */
683 static inline void
684 unmap_pages(VMArea* area, addr_t base, size_t size)
685 {
686 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
687 }
688 
689 
690 static inline bool
691 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
692 {
693 	if (address < area->Base()) {
694 		offset = area->Base() - address;
695 		if (offset >= size)
696 			return false;
697 
698 		address = area->Base();
699 		size -= offset;
700 		offset = 0;
701 		if (size > area->Size())
702 			size = area->Size();
703 
704 		return true;
705 	}
706 
707 	offset = address - area->Base();
708 	if (offset >= area->Size())
709 		return false;
710 
711 	if (size >= area->Size() - offset)
712 		size = area->Size() - offset;
713 
714 	return true;
715 }
716 
717 
718 /*!	Cuts a piece out of an area. If the given cut range covers the complete
719 	area, it is deleted. If it covers the beginning or the end, the area is
720 	resized accordingly. If the range covers some part in the middle of the
721 	area, it is split in two; in this case the second area is returned via
722 	\a _secondArea (the variable is left untouched in the other cases).
723 	The address space must be write locked.
724 	The caller must ensure that no part of the given range is wired.
725 */
726 static status_t
727 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
728 	addr_t size, VMArea** _secondArea, bool kernel)
729 {
730 	addr_t offset;
731 	if (!intersect_area(area, address, size, offset))
732 		return B_OK;
733 
734 	// Is the area fully covered?
735 	if (address == area->Base() && size == area->Size()) {
736 		delete_area(addressSpace, area, false);
737 		return B_OK;
738 	}
739 
740 	int priority;
741 	uint32 allocationFlags;
742 	if (addressSpace == VMAddressSpace::Kernel()) {
743 		priority = VM_PRIORITY_SYSTEM;
744 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
745 			| HEAP_DONT_LOCK_KERNEL_SPACE;
746 	} else {
747 		priority = VM_PRIORITY_USER;
748 		allocationFlags = 0;
749 	}
750 
751 	VMCache* cache = vm_area_get_locked_cache(area);
752 	VMCacheChainLocker cacheChainLocker(cache);
753 	cacheChainLocker.LockAllSourceCaches();
754 
755 	// If no one else uses the area's cache and it's an anonymous cache, we can
756 	// resize or split it, too.
757 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
758 		&& cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM;
759 
760 	const addr_t oldSize = area->Size();
761 
762 	// Cut the end only?
763 	if (offset > 0 && size == area->Size() - offset) {
764 		status_t error = addressSpace->ShrinkAreaTail(area, offset,
765 			allocationFlags);
766 		if (error != B_OK)
767 			return error;
768 
769 		if (area->page_protections != NULL) {
770 			uint8* newProtections = realloc_page_protections(
771 				area->page_protections, area->Size(), allocationFlags);
772 
773 			if (newProtections == NULL) {
774 				addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
775 				return B_NO_MEMORY;
776 			}
777 
778 			area->page_protections = newProtections;
779 		}
780 
781 		// unmap pages
782 		unmap_pages(area, address, size);
783 
784 		if (onlyCacheUser) {
785 			// Since VMCache::Resize() can temporarily drop the lock, we must
786 			// unlock all lower caches to prevent locking order inversion.
787 			cacheChainLocker.Unlock(cache);
788 			cache->Resize(cache->virtual_base + offset, priority);
789 			cache->ReleaseRefAndUnlock();
790 		}
791 
792 		return B_OK;
793 	}
794 
795 	// Cut the beginning only?
796 	if (area->Base() == address) {
797 		uint8* newProtections = NULL;
798 		if (area->page_protections != NULL) {
799 			// Allocate all memory before shifting as the shift might lose some
800 			// bits.
801 			newProtections = realloc_page_protections(NULL, area->Size(),
802 				allocationFlags);
803 
804 			if (newProtections == NULL)
805 				return B_NO_MEMORY;
806 		}
807 
808 		// resize the area
809 		status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
810 			allocationFlags);
811 		if (error != B_OK) {
812 			if (newProtections != NULL)
813 				free_etc(newProtections, allocationFlags);
814 			return error;
815 		}
816 
817 		if (area->page_protections != NULL) {
818 			size_t oldBytes = area_page_protections_size(oldSize);
819 			ssize_t pagesShifted = (oldSize - area->Size()) / B_PAGE_SIZE;
820 			bitmap_shift<uint8>(area->page_protections, oldBytes * 8, -(pagesShifted * 4));
821 
822 			size_t bytes = area_page_protections_size(area->Size());
823 			memcpy(newProtections, area->page_protections, bytes);
824 			free_etc(area->page_protections, allocationFlags);
825 			area->page_protections = newProtections;
826 		}
827 
828 		// unmap pages
829 		unmap_pages(area, address, size);
830 
831 		if (onlyCacheUser) {
832 			// Since VMCache::Rebase() can temporarily drop the lock, we must
833 			// unlock all lower caches to prevent locking order inversion.
834 			cacheChainLocker.Unlock(cache);
835 			cache->Rebase(cache->virtual_base + size, priority);
836 			cache->ReleaseRefAndUnlock();
837 		}
838 		area->cache_offset += size;
839 
840 		return B_OK;
841 	}
842 
843 	// The tough part -- cut a piece out of the middle of the area.
844 	// We do that by shrinking the area to the begin section and creating a
845 	// new area for the end section.
846 	addr_t firstNewSize = offset;
847 	addr_t secondBase = address + size;
848 	addr_t secondSize = area->Size() - offset - size;
849 
850 	// unmap pages
851 	unmap_pages(area, address, area->Size() - firstNewSize);
852 
853 	// resize the area
854 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
855 		allocationFlags);
856 	if (error != B_OK)
857 		return error;
858 
859 	uint8* areaNewProtections = NULL;
860 	uint8* secondAreaNewProtections = NULL;
861 
862 	// Try to allocate the new memory before making some hard to reverse
863 	// changes.
864 	if (area->page_protections != NULL) {
865 		areaNewProtections = realloc_page_protections(NULL, area->Size(),
866 			allocationFlags);
867 		secondAreaNewProtections = realloc_page_protections(NULL, secondSize,
868 			allocationFlags);
869 
870 		if (areaNewProtections == NULL || secondAreaNewProtections == NULL) {
871 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
872 			free_etc(areaNewProtections, allocationFlags);
873 			free_etc(secondAreaNewProtections, allocationFlags);
874 			return B_NO_MEMORY;
875 		}
876 	}
877 
878 	virtual_address_restrictions addressRestrictions = {};
879 	addressRestrictions.address = (void*)secondBase;
880 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
881 	VMArea* secondArea;
882 
883 	if (onlyCacheUser) {
884 		// Create a new cache for the second area.
885 		VMCache* secondCache;
886 		error = VMCacheFactory::CreateAnonymousCache(secondCache,
887 			area->protection & B_OVERCOMMITTING_AREA, 0, 0,
888 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
889 		if (error != B_OK) {
890 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
891 			free_etc(areaNewProtections, allocationFlags);
892 			free_etc(secondAreaNewProtections, allocationFlags);
893 			return error;
894 		}
895 
896 		secondCache->Lock();
897 		secondCache->temporary = cache->temporary;
898 		secondCache->virtual_base = area->cache_offset;
899 		secondCache->virtual_end = area->cache_offset + secondSize;
900 
901 		// Transfer the concerned pages from the first cache.
902 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
903 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
904 			area->cache_offset);
905 
906 		if (error == B_OK) {
907 			// Since VMCache::Resize() can temporarily drop the lock, we must
908 			// unlock all lower caches to prevent locking order inversion.
909 			cacheChainLocker.Unlock(cache);
910 			cache->Resize(cache->virtual_base + firstNewSize, priority);
911 			// Don't unlock the cache yet because we might have to resize it
912 			// back.
913 
914 			// Map the second area.
915 			error = map_backing_store(addressSpace, secondCache,
916 				area->cache_offset, area->name, secondSize, area->wiring,
917 				area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0,
918 				&addressRestrictions, kernel, &secondArea, NULL);
919 		}
920 
921 		if (error != B_OK) {
922 			// Restore the original cache.
923 			cache->Resize(cache->virtual_base + oldSize, priority);
924 
925 			// Move the pages back.
926 			status_t readoptStatus = cache->Adopt(secondCache,
927 				area->cache_offset, secondSize, adoptOffset);
928 			if (readoptStatus != B_OK) {
929 				// Some (swap) pages have not been moved back and will be lost
930 				// once the second cache is deleted.
931 				panic("failed to restore cache range: %s",
932 					strerror(readoptStatus));
933 
934 				// TODO: Handle out of memory cases by freeing memory and
935 				// retrying.
936 			}
937 
938 			cache->ReleaseRefAndUnlock();
939 			secondCache->ReleaseRefAndUnlock();
940 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
941 			free_etc(areaNewProtections, allocationFlags);
942 			free_etc(secondAreaNewProtections, allocationFlags);
943 			return error;
944 		}
945 
946 		// Now we can unlock it.
947 		cache->ReleaseRefAndUnlock();
948 		secondCache->Unlock();
949 	} else {
950 		error = map_backing_store(addressSpace, cache, area->cache_offset
951 			+ (secondBase - area->Base()),
952 			area->name, secondSize, area->wiring, area->protection,
953 			area->protection_max, REGION_NO_PRIVATE_MAP, 0,
954 			&addressRestrictions, kernel, &secondArea, NULL);
955 		if (error != B_OK) {
956 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
957 			free_etc(areaNewProtections, allocationFlags);
958 			free_etc(secondAreaNewProtections, allocationFlags);
959 			return error;
960 		}
961 		// We need a cache reference for the new area.
962 		cache->AcquireRefLocked();
963 	}
964 
965 	if (area->page_protections != NULL) {
966 		// Copy the protection bits of the first area.
967 		size_t areaBytes = area_page_protections_size(area->Size());
968 		memcpy(areaNewProtections, area->page_protections, areaBytes);
969 		uint8* areaOldProtections = area->page_protections;
970 		area->page_protections = areaNewProtections;
971 
972 		// Shift the protection bits of the second area to the start of
973 		// the old array.
974 		size_t oldBytes = area_page_protections_size(oldSize);
975 		addr_t secondAreaOffset = secondBase - area->Base();
976 		ssize_t secondAreaPagesShifted = secondAreaOffset / B_PAGE_SIZE;
977 		bitmap_shift<uint8>(areaOldProtections, oldBytes * 8, -(secondAreaPagesShifted * 4));
978 
979 		// Copy the protection bits of the second area.
980 		size_t secondAreaBytes = area_page_protections_size(secondSize);
981 		memcpy(secondAreaNewProtections, areaOldProtections, secondAreaBytes);
982 		secondArea->page_protections = secondAreaNewProtections;
983 
984 		// We don't need this anymore.
985 		free_etc(areaOldProtections, allocationFlags);
986 
987 		// Set the correct page protections for the second area.
988 		VMTranslationMap* map = addressSpace->TranslationMap();
989 		map->Lock();
990 		for (VMCachePagesTree::Iterator it
991 				= secondArea->cache->pages.GetIterator();
992 				vm_page* page = it.Next();) {
993 			if (is_page_in_area(secondArea, page)) {
994 				addr_t address = virtual_page_address(secondArea, page);
995 				uint32 pageProtection
996 					= get_area_page_protection(secondArea, address);
997 				map->ProtectPage(secondArea, address, pageProtection);
998 			}
999 		}
1000 		map->Unlock();
1001 	}
1002 
1003 	if (_secondArea != NULL)
1004 		*_secondArea = secondArea;
1005 
1006 	return B_OK;
1007 }
1008 
1009 
1010 /*!	Deletes or cuts all areas in the given address range.
1011 	The address space must be write-locked.
1012 	The caller must ensure that no part of the given range is wired.
1013 */
1014 static status_t
1015 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
1016 	bool kernel)
1017 {
1018 	size = PAGE_ALIGN(size);
1019 
1020 	// Check, whether the caller is allowed to modify the concerned areas.
1021 	if (!kernel) {
1022 		for (VMAddressSpace::AreaRangeIterator it
1023 				= addressSpace->GetAreaRangeIterator(address, size);
1024 			VMArea* area = it.Next();) {
1025 
1026 			if ((area->protection & B_KERNEL_AREA) != 0) {
1027 				dprintf("unmap_address_range: team %" B_PRId32 " tried to "
1028 					"unmap range of kernel area %" B_PRId32 " (%s)\n",
1029 					team_get_current_team_id(), area->id, area->name);
1030 				return B_NOT_ALLOWED;
1031 			}
1032 		}
1033 	}
1034 
1035 	for (VMAddressSpace::AreaRangeIterator it
1036 			= addressSpace->GetAreaRangeIterator(address, size);
1037 		VMArea* area = it.Next();) {
1038 
1039 		status_t error = cut_area(addressSpace, area, address, size, NULL,
1040 			kernel);
1041 		if (error != B_OK)
1042 			return error;
1043 			// Failing after already messing with areas is ugly, but we
1044 			// can't do anything about it.
1045 	}
1046 
1047 	return B_OK;
1048 }
1049 
1050 
1051 static status_t
1052 discard_area_range(VMArea* area, addr_t address, addr_t size)
1053 {
1054 	addr_t offset;
1055 	if (!intersect_area(area, address, size, offset))
1056 		return B_OK;
1057 
1058 	// If someone else uses the area's cache or it's not an anonymous cache, we
1059 	// can't discard.
1060 	VMCache* cache = vm_area_get_locked_cache(area);
1061 	if (cache->areas != area || area->cache_next != NULL
1062 		|| !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) {
1063 		return B_OK;
1064 	}
1065 
1066 	VMCacheChainLocker cacheChainLocker(cache);
1067 	cacheChainLocker.LockAllSourceCaches();
1068 
1069 	unmap_pages(area, address, size);
1070 
1071 	// Since VMCache::Discard() can temporarily drop the lock, we must
1072 	// unlock all lower caches to prevent locking order inversion.
1073 	cacheChainLocker.Unlock(cache);
1074 	cache->Discard(cache->virtual_base + offset, size);
1075 	cache->ReleaseRefAndUnlock();
1076 
1077 	return B_OK;
1078 }
1079 
1080 
1081 static status_t
1082 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
1083 	bool kernel)
1084 {
1085 	for (VMAddressSpace::AreaRangeIterator it
1086 		= addressSpace->GetAreaRangeIterator(address, size);
1087 			VMArea* area = it.Next();) {
1088 		status_t error = discard_area_range(area, address, size);
1089 		if (error != B_OK)
1090 			return error;
1091 	}
1092 
1093 	return B_OK;
1094 }
1095 
1096 
1097 /*! You need to hold the lock of the cache and the write lock of the address
1098 	space when calling this function.
1099 	Note, that in case of error your cache will be temporarily unlocked.
1100 	If \a addressSpec is \c B_EXACT_ADDRESS and the
1101 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
1102 	that no part of the specified address range (base \c *_virtualAddress, size
1103 	\a size) is wired. The cache will also be temporarily unlocked.
1104 */
1105 static status_t
1106 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
1107 	const char* areaName, addr_t size, int wiring, int protection,
1108 	int protectionMax, int mapping,
1109 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
1110 	bool kernel, VMArea** _area, void** _virtualAddress)
1111 {
1112 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
1113 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
1114 		", protection %d, protectionMax %d, area %p, areaName '%s'\n",
1115 		addressSpace, cache, addressRestrictions->address, offset, size,
1116 		addressRestrictions->address_specification, wiring, protection,
1117 		protectionMax, _area, areaName));
1118 	cache->AssertLocked();
1119 
1120 	if (size == 0) {
1121 #if KDEBUG
1122 		panic("map_backing_store(): called with size=0 for area '%s'!",
1123 			areaName);
1124 #endif
1125 		return B_BAD_VALUE;
1126 	}
1127 	if (offset < 0)
1128 		return B_BAD_VALUE;
1129 
1130 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
1131 		| HEAP_DONT_LOCK_KERNEL_SPACE;
1132 	int priority;
1133 	if (addressSpace != VMAddressSpace::Kernel()) {
1134 		priority = VM_PRIORITY_USER;
1135 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
1136 		priority = VM_PRIORITY_VIP;
1137 		allocationFlags |= HEAP_PRIORITY_VIP;
1138 	} else
1139 		priority = VM_PRIORITY_SYSTEM;
1140 
1141 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
1142 		allocationFlags);
1143 	if (mapping != REGION_PRIVATE_MAP)
1144 		area->protection_max = protectionMax & B_USER_PROTECTION;
1145 	if (area == NULL)
1146 		return B_NO_MEMORY;
1147 
1148 	status_t status;
1149 
1150 	// if this is a private map, we need to create a new cache
1151 	// to handle the private copies of pages as they are written to
1152 	VMCache* sourceCache = cache;
1153 	if (mapping == REGION_PRIVATE_MAP) {
1154 		VMCache* newCache;
1155 
1156 		// create an anonymous cache
1157 		status = VMCacheFactory::CreateAnonymousCache(newCache,
1158 			(protection & B_STACK_AREA) != 0
1159 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
1160 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
1161 		if (status != B_OK)
1162 			goto err1;
1163 
1164 		newCache->Lock();
1165 		newCache->temporary = 1;
1166 		newCache->virtual_base = offset;
1167 		newCache->virtual_end = offset + size;
1168 
1169 		cache->AddConsumer(newCache);
1170 
1171 		cache = newCache;
1172 	}
1173 
1174 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
1175 		status = cache->SetMinimalCommitment(size, priority);
1176 		if (status != B_OK)
1177 			goto err2;
1178 	}
1179 
1180 	// check to see if this address space has entered DELETE state
1181 	if (addressSpace->IsBeingDeleted()) {
1182 		// okay, someone is trying to delete this address space now, so we can't
1183 		// insert the area, so back out
1184 		status = B_BAD_TEAM_ID;
1185 		goto err2;
1186 	}
1187 
1188 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
1189 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
1190 		// temporarily unlock the current cache since it might be mapped to
1191 		// some existing area, and unmap_address_range also needs to lock that
1192 		// cache to delete the area.
1193 		cache->Unlock();
1194 		status = unmap_address_range(addressSpace,
1195 			(addr_t)addressRestrictions->address, size, kernel);
1196 		cache->Lock();
1197 		if (status != B_OK)
1198 			goto err2;
1199 	}
1200 
1201 	status = addressSpace->InsertArea(area, size, addressRestrictions,
1202 		allocationFlags, _virtualAddress);
1203 	if (status == B_NO_MEMORY
1204 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
1205 		// Due to how many locks are held, we cannot wait here for space to be
1206 		// freed up, but we can at least notify the low_resource handler.
1207 		low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0);
1208 	}
1209 	if (status != B_OK)
1210 		goto err2;
1211 
1212 	// attach the cache to the area
1213 	area->cache = cache;
1214 	area->cache_offset = offset;
1215 
1216 	// point the cache back to the area
1217 	cache->InsertAreaLocked(area);
1218 	if (mapping == REGION_PRIVATE_MAP)
1219 		cache->Unlock();
1220 
1221 	// insert the area in the global areas map
1222 	VMAreas::Insert(area);
1223 
1224 	// grab a ref to the address space (the area holds this)
1225 	addressSpace->Get();
1226 
1227 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1228 //		cache, sourceCache, areaName, area);
1229 
1230 	*_area = area;
1231 	return B_OK;
1232 
1233 err2:
1234 	if (mapping == REGION_PRIVATE_MAP) {
1235 		// We created this cache, so we must delete it again. Note, that we
1236 		// need to temporarily unlock the source cache or we'll otherwise
1237 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1238 		sourceCache->Unlock();
1239 		cache->ReleaseRefAndUnlock();
1240 		sourceCache->Lock();
1241 	}
1242 err1:
1243 	addressSpace->DeleteArea(area, allocationFlags);
1244 	return status;
1245 }
1246 
1247 
1248 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1249 	  locker1, locker2).
1250 */
1251 template<typename LockerType1, typename LockerType2>
1252 static inline bool
1253 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1254 {
1255 	area->cache->AssertLocked();
1256 
1257 	VMAreaUnwiredWaiter waiter;
1258 	if (!area->AddWaiterIfWired(&waiter))
1259 		return false;
1260 
1261 	// unlock everything and wait
1262 	if (locker1 != NULL)
1263 		locker1->Unlock();
1264 	if (locker2 != NULL)
1265 		locker2->Unlock();
1266 
1267 	waiter.waitEntry.Wait();
1268 
1269 	return true;
1270 }
1271 
1272 
1273 /*!	Checks whether the given area has any wired ranges intersecting with the
1274 	specified range and waits, if so.
1275 
1276 	When it has to wait, the function calls \c Unlock() on both \a locker1
1277 	and \a locker2, if given.
1278 	The area's top cache must be locked and must be unlocked as a side effect
1279 	of calling \c Unlock() on either \a locker1 or \a locker2.
1280 
1281 	If the function does not have to wait it does not modify or unlock any
1282 	object.
1283 
1284 	\param area The area to be checked.
1285 	\param base The base address of the range to check.
1286 	\param size The size of the address range to check.
1287 	\param locker1 An object to be unlocked when before starting to wait (may
1288 		be \c NULL).
1289 	\param locker2 An object to be unlocked when before starting to wait (may
1290 		be \c NULL).
1291 	\return \c true, if the function had to wait, \c false otherwise.
1292 */
1293 template<typename LockerType1, typename LockerType2>
1294 static inline bool
1295 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1296 	LockerType1* locker1, LockerType2* locker2)
1297 {
1298 	area->cache->AssertLocked();
1299 
1300 	VMAreaUnwiredWaiter waiter;
1301 	if (!area->AddWaiterIfWired(&waiter, base, size))
1302 		return false;
1303 
1304 	// unlock everything and wait
1305 	if (locker1 != NULL)
1306 		locker1->Unlock();
1307 	if (locker2 != NULL)
1308 		locker2->Unlock();
1309 
1310 	waiter.waitEntry.Wait();
1311 
1312 	return true;
1313 }
1314 
1315 
1316 /*!	Checks whether the given address space has any wired ranges intersecting
1317 	with the specified range and waits, if so.
1318 
1319 	Similar to wait_if_area_range_is_wired(), with the following differences:
1320 	- All areas intersecting with the range are checked (respectively all until
1321 	  one is found that contains a wired range intersecting with the given
1322 	  range).
1323 	- The given address space must at least be read-locked and must be unlocked
1324 	  when \c Unlock() is called on \a locker.
1325 	- None of the areas' caches are allowed to be locked.
1326 */
1327 template<typename LockerType>
1328 static inline bool
1329 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1330 	size_t size, LockerType* locker)
1331 {
1332 	for (VMAddressSpace::AreaRangeIterator it
1333 		= addressSpace->GetAreaRangeIterator(base, size);
1334 			VMArea* area = it.Next();) {
1335 
1336 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1337 
1338 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1339 			return true;
1340 	}
1341 
1342 	return false;
1343 }
1344 
1345 
1346 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1347 	It must be called in a situation where the kernel address space may be
1348 	locked.
1349 */
1350 status_t
1351 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1352 {
1353 	AddressSpaceReadLocker locker;
1354 	VMArea* area;
1355 	status_t status = locker.SetFromArea(id, area);
1356 	if (status != B_OK)
1357 		return status;
1358 
1359 	if (area->page_protections == NULL) {
1360 		status = allocate_area_page_protections(area);
1361 		if (status != B_OK)
1362 			return status;
1363 	}
1364 
1365 	*cookie = (void*)area;
1366 	return B_OK;
1367 }
1368 
1369 
1370 /*!	This is a debug helper function that can only be used with very specific
1371 	use cases.
1372 	Sets protection for the given address range to the protection specified.
1373 	If \a protection is 0 then the involved pages will be marked non-present
1374 	in the translation map to cause a fault on access. The pages aren't
1375 	actually unmapped however so that they can be marked present again with
1376 	additional calls to this function. For this to work the area must be
1377 	fully locked in memory so that the pages aren't otherwise touched.
1378 	This function does not lock the kernel address space and needs to be
1379 	supplied with a \a cookie retrieved from a successful call to
1380 	vm_prepare_kernel_area_debug_protection().
1381 */
1382 status_t
1383 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1384 	uint32 protection)
1385 {
1386 	// check address range
1387 	addr_t address = (addr_t)_address;
1388 	size = PAGE_ALIGN(size);
1389 
1390 	if ((address % B_PAGE_SIZE) != 0
1391 		|| (addr_t)address + size < (addr_t)address
1392 		|| !IS_KERNEL_ADDRESS(address)
1393 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1394 		return B_BAD_VALUE;
1395 	}
1396 
1397 	// Translate the kernel protection to user protection as we only store that.
1398 	if ((protection & B_KERNEL_READ_AREA) != 0)
1399 		protection |= B_READ_AREA;
1400 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1401 		protection |= B_WRITE_AREA;
1402 
1403 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1404 	VMTranslationMap* map = addressSpace->TranslationMap();
1405 	VMArea* area = (VMArea*)cookie;
1406 
1407 	addr_t offset = address - area->Base();
1408 	if (area->Size() - offset < size) {
1409 		panic("protect range not fully within supplied area");
1410 		return B_BAD_VALUE;
1411 	}
1412 
1413 	if (area->page_protections == NULL) {
1414 		panic("area has no page protections");
1415 		return B_BAD_VALUE;
1416 	}
1417 
1418 	// Invalidate the mapping entries so any access to them will fault or
1419 	// restore the mapping entries unchanged so that lookup will success again.
1420 	map->Lock();
1421 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1422 	map->Unlock();
1423 
1424 	// And set the proper page protections so that the fault case will actually
1425 	// fail and not simply try to map a new page.
1426 	for (addr_t pageAddress = address; pageAddress < address + size;
1427 			pageAddress += B_PAGE_SIZE) {
1428 		set_area_page_protection(area, pageAddress, protection);
1429 	}
1430 
1431 	return B_OK;
1432 }
1433 
1434 
1435 status_t
1436 vm_block_address_range(const char* name, void* address, addr_t size)
1437 {
1438 	if (!arch_vm_supports_protection(0))
1439 		return B_NOT_SUPPORTED;
1440 
1441 	AddressSpaceWriteLocker locker;
1442 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1443 	if (status != B_OK)
1444 		return status;
1445 
1446 	VMAddressSpace* addressSpace = locker.AddressSpace();
1447 
1448 	// create an anonymous cache
1449 	VMCache* cache;
1450 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1451 		VM_PRIORITY_SYSTEM);
1452 	if (status != B_OK)
1453 		return status;
1454 
1455 	cache->temporary = 1;
1456 	cache->virtual_end = size;
1457 	cache->Lock();
1458 
1459 	VMArea* area;
1460 	virtual_address_restrictions addressRestrictions = {};
1461 	addressRestrictions.address = address;
1462 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1463 	status = map_backing_store(addressSpace, cache, 0, name, size,
1464 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions,
1465 		true, &area, NULL);
1466 	if (status != B_OK) {
1467 		cache->ReleaseRefAndUnlock();
1468 		return status;
1469 	}
1470 
1471 	cache->Unlock();
1472 	area->cache_type = CACHE_TYPE_RAM;
1473 	return area->id;
1474 }
1475 
1476 
1477 status_t
1478 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1479 {
1480 	AddressSpaceWriteLocker locker(team);
1481 	if (!locker.IsLocked())
1482 		return B_BAD_TEAM_ID;
1483 
1484 	VMAddressSpace* addressSpace = locker.AddressSpace();
1485 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1486 		addressSpace == VMAddressSpace::Kernel()
1487 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1488 }
1489 
1490 
1491 status_t
1492 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1493 	addr_t size, uint32 flags)
1494 {
1495 	if (size == 0)
1496 		return B_BAD_VALUE;
1497 
1498 	AddressSpaceWriteLocker locker(team);
1499 	if (!locker.IsLocked())
1500 		return B_BAD_TEAM_ID;
1501 
1502 	virtual_address_restrictions addressRestrictions = {};
1503 	addressRestrictions.address = *_address;
1504 	addressRestrictions.address_specification = addressSpec;
1505 	VMAddressSpace* addressSpace = locker.AddressSpace();
1506 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1507 		addressSpace == VMAddressSpace::Kernel()
1508 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1509 		_address);
1510 }
1511 
1512 
1513 area_id
1514 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1515 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1516 	const virtual_address_restrictions* virtualAddressRestrictions,
1517 	const physical_address_restrictions* physicalAddressRestrictions,
1518 	bool kernel, void** _address)
1519 {
1520 	VMArea* area;
1521 	VMCache* cache;
1522 	vm_page* page = NULL;
1523 	bool isStack = (protection & B_STACK_AREA) != 0;
1524 	page_num_t guardPages;
1525 	bool canOvercommit = false;
1526 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1527 		? VM_PAGE_ALLOC_CLEAR : 0;
1528 
1529 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1530 		team, name, size));
1531 
1532 	size = PAGE_ALIGN(size);
1533 	guardSize = PAGE_ALIGN(guardSize);
1534 	guardPages = guardSize / B_PAGE_SIZE;
1535 
1536 	if (size == 0 || size < guardSize)
1537 		return B_BAD_VALUE;
1538 	if (!arch_vm_supports_protection(protection))
1539 		return B_NOT_SUPPORTED;
1540 
1541 	if (team == B_CURRENT_TEAM)
1542 		team = VMAddressSpace::CurrentID();
1543 	if (team < 0)
1544 		return B_BAD_TEAM_ID;
1545 
1546 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1547 		canOvercommit = true;
1548 
1549 #ifdef DEBUG_KERNEL_STACKS
1550 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1551 		isStack = true;
1552 #endif
1553 
1554 	// check parameters
1555 	switch (virtualAddressRestrictions->address_specification) {
1556 		case B_ANY_ADDRESS:
1557 		case B_EXACT_ADDRESS:
1558 		case B_BASE_ADDRESS:
1559 		case B_ANY_KERNEL_ADDRESS:
1560 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1561 		case B_RANDOMIZED_ANY_ADDRESS:
1562 		case B_RANDOMIZED_BASE_ADDRESS:
1563 			break;
1564 
1565 		default:
1566 			return B_BAD_VALUE;
1567 	}
1568 
1569 	// If low or high physical address restrictions are given, we force
1570 	// B_CONTIGUOUS wiring, since only then we'll use
1571 	// vm_page_allocate_page_run() which deals with those restrictions.
1572 	if (physicalAddressRestrictions->low_address != 0
1573 		|| physicalAddressRestrictions->high_address != 0) {
1574 		wiring = B_CONTIGUOUS;
1575 	}
1576 
1577 	physical_address_restrictions stackPhysicalRestrictions;
1578 	bool doReserveMemory = false;
1579 	switch (wiring) {
1580 		case B_NO_LOCK:
1581 			break;
1582 		case B_FULL_LOCK:
1583 		case B_LAZY_LOCK:
1584 		case B_CONTIGUOUS:
1585 			doReserveMemory = true;
1586 			break;
1587 		case B_ALREADY_WIRED:
1588 			break;
1589 		case B_LOMEM:
1590 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1591 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1592 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1593 			wiring = B_CONTIGUOUS;
1594 			doReserveMemory = true;
1595 			break;
1596 		case B_32_BIT_FULL_LOCK:
1597 			if (B_HAIKU_PHYSICAL_BITS <= 32
1598 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1599 				wiring = B_FULL_LOCK;
1600 				doReserveMemory = true;
1601 				break;
1602 			}
1603 			// TODO: We don't really support this mode efficiently. Just fall
1604 			// through for now ...
1605 		case B_32_BIT_CONTIGUOUS:
1606 			#if B_HAIKU_PHYSICAL_BITS > 32
1607 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1608 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1609 					stackPhysicalRestrictions.high_address
1610 						= (phys_addr_t)1 << 32;
1611 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1612 				}
1613 			#endif
1614 			wiring = B_CONTIGUOUS;
1615 			doReserveMemory = true;
1616 			break;
1617 		default:
1618 			return B_BAD_VALUE;
1619 	}
1620 
1621 	// Optimization: For a single-page contiguous allocation without low/high
1622 	// memory restriction B_FULL_LOCK wiring suffices.
1623 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1624 		&& physicalAddressRestrictions->low_address == 0
1625 		&& physicalAddressRestrictions->high_address == 0) {
1626 		wiring = B_FULL_LOCK;
1627 	}
1628 
1629 	// For full lock or contiguous areas we're also going to map the pages and
1630 	// thus need to reserve pages for the mapping backend upfront.
1631 	addr_t reservedMapPages = 0;
1632 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1633 		AddressSpaceWriteLocker locker;
1634 		status_t status = locker.SetTo(team);
1635 		if (status != B_OK)
1636 			return status;
1637 
1638 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1639 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1640 	}
1641 
1642 	int priority;
1643 	if (team != VMAddressSpace::KernelID())
1644 		priority = VM_PRIORITY_USER;
1645 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1646 		priority = VM_PRIORITY_VIP;
1647 	else
1648 		priority = VM_PRIORITY_SYSTEM;
1649 
1650 	// Reserve memory before acquiring the address space lock. This reduces the
1651 	// chances of failure, since while holding the write lock to the address
1652 	// space (if it is the kernel address space that is), the low memory handler
1653 	// won't be able to free anything for us.
1654 	addr_t reservedMemory = 0;
1655 	if (doReserveMemory) {
1656 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1657 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1658 			return B_NO_MEMORY;
1659 		reservedMemory = size;
1660 		// TODO: We don't reserve the memory for the pages for the page
1661 		// directories/tables. We actually need to do since we currently don't
1662 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1663 		// there are actually less physical pages than there should be, which
1664 		// can get the VM into trouble in low memory situations.
1665 	}
1666 
1667 	AddressSpaceWriteLocker locker;
1668 	VMAddressSpace* addressSpace;
1669 	status_t status;
1670 
1671 	// For full lock areas reserve the pages before locking the address
1672 	// space. E.g. block caches can't release their memory while we hold the
1673 	// address space lock.
1674 	page_num_t reservedPages = reservedMapPages;
1675 	if (wiring == B_FULL_LOCK)
1676 		reservedPages += size / B_PAGE_SIZE;
1677 
1678 	vm_page_reservation reservation;
1679 	if (reservedPages > 0) {
1680 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1681 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1682 					priority)) {
1683 				reservedPages = 0;
1684 				status = B_WOULD_BLOCK;
1685 				goto err0;
1686 			}
1687 		} else
1688 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1689 	}
1690 
1691 	if (wiring == B_CONTIGUOUS) {
1692 		// we try to allocate the page run here upfront as this may easily
1693 		// fail for obvious reasons
1694 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1695 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1696 		if (page == NULL) {
1697 			status = B_NO_MEMORY;
1698 			goto err0;
1699 		}
1700 	}
1701 
1702 	// Lock the address space and, if B_EXACT_ADDRESS and
1703 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1704 	// is not wired.
1705 	do {
1706 		status = locker.SetTo(team);
1707 		if (status != B_OK)
1708 			goto err1;
1709 
1710 		addressSpace = locker.AddressSpace();
1711 	} while (virtualAddressRestrictions->address_specification
1712 			== B_EXACT_ADDRESS
1713 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1714 		&& wait_if_address_range_is_wired(addressSpace,
1715 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1716 
1717 	// create an anonymous cache
1718 	// if it's a stack, make sure that two pages are available at least
1719 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1720 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1721 		wiring == B_NO_LOCK, priority);
1722 	if (status != B_OK)
1723 		goto err1;
1724 
1725 	cache->temporary = 1;
1726 	cache->virtual_end = size;
1727 	cache->committed_size = reservedMemory;
1728 		// TODO: This should be done via a method.
1729 	reservedMemory = 0;
1730 
1731 	cache->Lock();
1732 
1733 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1734 		protection, 0, REGION_NO_PRIVATE_MAP, flags,
1735 		virtualAddressRestrictions, kernel, &area, _address);
1736 
1737 	if (status != B_OK) {
1738 		cache->ReleaseRefAndUnlock();
1739 		goto err1;
1740 	}
1741 
1742 	locker.DegradeToReadLock();
1743 
1744 	switch (wiring) {
1745 		case B_NO_LOCK:
1746 		case B_LAZY_LOCK:
1747 			// do nothing - the pages are mapped in as needed
1748 			break;
1749 
1750 		case B_FULL_LOCK:
1751 		{
1752 			// Allocate and map all pages for this area
1753 
1754 			off_t offset = 0;
1755 			for (addr_t address = area->Base();
1756 					address < area->Base() + (area->Size() - 1);
1757 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1758 #ifdef DEBUG_KERNEL_STACKS
1759 #	ifdef STACK_GROWS_DOWNWARDS
1760 				if (isStack && address < area->Base()
1761 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1762 #	else
1763 				if (isStack && address >= area->Base() + area->Size()
1764 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1765 #	endif
1766 					continue;
1767 #endif
1768 				vm_page* page = vm_page_allocate_page(&reservation,
1769 					PAGE_STATE_WIRED | pageAllocFlags);
1770 				cache->InsertPage(page, offset);
1771 				map_page(area, page, address, protection, &reservation);
1772 
1773 				DEBUG_PAGE_ACCESS_END(page);
1774 			}
1775 
1776 			break;
1777 		}
1778 
1779 		case B_ALREADY_WIRED:
1780 		{
1781 			// The pages should already be mapped. This is only really useful
1782 			// during boot time. Find the appropriate vm_page objects and stick
1783 			// them in the cache object.
1784 			VMTranslationMap* map = addressSpace->TranslationMap();
1785 			off_t offset = 0;
1786 
1787 			if (!gKernelStartup)
1788 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1789 
1790 			map->Lock();
1791 
1792 			for (addr_t virtualAddress = area->Base();
1793 					virtualAddress < area->Base() + (area->Size() - 1);
1794 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1795 				phys_addr_t physicalAddress;
1796 				uint32 flags;
1797 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1798 				if (status < B_OK) {
1799 					panic("looking up mapping failed for va 0x%lx\n",
1800 						virtualAddress);
1801 				}
1802 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1803 				if (page == NULL) {
1804 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1805 						"\n", physicalAddress);
1806 				}
1807 
1808 				DEBUG_PAGE_ACCESS_START(page);
1809 
1810 				cache->InsertPage(page, offset);
1811 				increment_page_wired_count(page);
1812 				vm_page_set_state(page, PAGE_STATE_WIRED);
1813 				page->busy = false;
1814 
1815 				DEBUG_PAGE_ACCESS_END(page);
1816 			}
1817 
1818 			map->Unlock();
1819 			break;
1820 		}
1821 
1822 		case B_CONTIGUOUS:
1823 		{
1824 			// We have already allocated our continuous pages run, so we can now
1825 			// just map them in the address space
1826 			VMTranslationMap* map = addressSpace->TranslationMap();
1827 			phys_addr_t physicalAddress
1828 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1829 			addr_t virtualAddress = area->Base();
1830 			off_t offset = 0;
1831 
1832 			map->Lock();
1833 
1834 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1835 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1836 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1837 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1838 				if (page == NULL)
1839 					panic("couldn't lookup physical page just allocated\n");
1840 
1841 				status = map->Map(virtualAddress, physicalAddress, protection,
1842 					area->MemoryType(), &reservation);
1843 				if (status < B_OK)
1844 					panic("couldn't map physical page in page run\n");
1845 
1846 				cache->InsertPage(page, offset);
1847 				increment_page_wired_count(page);
1848 
1849 				DEBUG_PAGE_ACCESS_END(page);
1850 			}
1851 
1852 			map->Unlock();
1853 			break;
1854 		}
1855 
1856 		default:
1857 			break;
1858 	}
1859 
1860 	cache->Unlock();
1861 
1862 	if (reservedPages > 0)
1863 		vm_page_unreserve_pages(&reservation);
1864 
1865 	TRACE(("vm_create_anonymous_area: done\n"));
1866 
1867 	area->cache_type = CACHE_TYPE_RAM;
1868 	return area->id;
1869 
1870 err1:
1871 	if (wiring == B_CONTIGUOUS) {
1872 		// we had reserved the area space upfront...
1873 		phys_addr_t pageNumber = page->physical_page_number;
1874 		int32 i;
1875 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1876 			page = vm_lookup_page(pageNumber);
1877 			if (page == NULL)
1878 				panic("couldn't lookup physical page just allocated\n");
1879 
1880 			vm_page_set_state(page, PAGE_STATE_FREE);
1881 		}
1882 	}
1883 
1884 err0:
1885 	if (reservedPages > 0)
1886 		vm_page_unreserve_pages(&reservation);
1887 	if (reservedMemory > 0)
1888 		vm_unreserve_memory(reservedMemory);
1889 
1890 	return status;
1891 }
1892 
1893 
1894 area_id
1895 vm_map_physical_memory(team_id team, const char* name, void** _address,
1896 	uint32 addressSpec, addr_t size, uint32 protection,
1897 	phys_addr_t physicalAddress, bool alreadyWired)
1898 {
1899 	VMArea* area;
1900 	VMCache* cache;
1901 	addr_t mapOffset;
1902 
1903 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1904 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1905 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1906 		addressSpec, size, protection, physicalAddress));
1907 
1908 	if (!arch_vm_supports_protection(protection))
1909 		return B_NOT_SUPPORTED;
1910 
1911 	AddressSpaceWriteLocker locker(team);
1912 	if (!locker.IsLocked())
1913 		return B_BAD_TEAM_ID;
1914 
1915 	// if the physical address is somewhat inside a page,
1916 	// move the actual area down to align on a page boundary
1917 	mapOffset = physicalAddress % B_PAGE_SIZE;
1918 	size += mapOffset;
1919 	physicalAddress -= mapOffset;
1920 
1921 	size = PAGE_ALIGN(size);
1922 
1923 	// create a device cache
1924 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1925 	if (status != B_OK)
1926 		return status;
1927 
1928 	cache->virtual_end = size;
1929 
1930 	cache->Lock();
1931 
1932 	virtual_address_restrictions addressRestrictions = {};
1933 	addressRestrictions.address = *_address;
1934 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1935 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1936 		B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1937 		true, &area, _address);
1938 
1939 	if (status < B_OK)
1940 		cache->ReleaseRefLocked();
1941 
1942 	cache->Unlock();
1943 
1944 	if (status == B_OK) {
1945 		// set requested memory type -- use uncached, if not given
1946 		uint32 memoryType = addressSpec & B_MTR_MASK;
1947 		if (memoryType == 0)
1948 			memoryType = B_MTR_UC;
1949 
1950 		area->SetMemoryType(memoryType);
1951 
1952 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1953 		if (status != B_OK)
1954 			delete_area(locker.AddressSpace(), area, false);
1955 	}
1956 
1957 	if (status != B_OK)
1958 		return status;
1959 
1960 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1961 
1962 	if (alreadyWired) {
1963 		// The area is already mapped, but possibly not with the right
1964 		// memory type.
1965 		map->Lock();
1966 		map->ProtectArea(area, area->protection);
1967 		map->Unlock();
1968 	} else {
1969 		// Map the area completely.
1970 
1971 		// reserve pages needed for the mapping
1972 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1973 			area->Base() + (size - 1));
1974 		vm_page_reservation reservation;
1975 		vm_page_reserve_pages(&reservation, reservePages,
1976 			team == VMAddressSpace::KernelID()
1977 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1978 
1979 		map->Lock();
1980 
1981 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1982 			map->Map(area->Base() + offset, physicalAddress + offset,
1983 				protection, area->MemoryType(), &reservation);
1984 		}
1985 
1986 		map->Unlock();
1987 
1988 		vm_page_unreserve_pages(&reservation);
1989 	}
1990 
1991 	// modify the pointer returned to be offset back into the new area
1992 	// the same way the physical address in was offset
1993 	*_address = (void*)((addr_t)*_address + mapOffset);
1994 
1995 	area->cache_type = CACHE_TYPE_DEVICE;
1996 	return area->id;
1997 }
1998 
1999 
2000 /*!	Don't use!
2001 	TODO: This function was introduced to map physical page vecs to
2002 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
2003 	use a device cache and does not track vm_page::wired_count!
2004 */
2005 area_id
2006 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
2007 	uint32 addressSpec, addr_t* _size, uint32 protection,
2008 	struct generic_io_vec* vecs, uint32 vecCount)
2009 {
2010 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
2011 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
2012 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
2013 		addressSpec, _size, protection, vecs, vecCount));
2014 
2015 	if (!arch_vm_supports_protection(protection)
2016 		|| (addressSpec & B_MTR_MASK) != 0) {
2017 		return B_NOT_SUPPORTED;
2018 	}
2019 
2020 	AddressSpaceWriteLocker locker(team);
2021 	if (!locker.IsLocked())
2022 		return B_BAD_TEAM_ID;
2023 
2024 	if (vecCount == 0)
2025 		return B_BAD_VALUE;
2026 
2027 	addr_t size = 0;
2028 	for (uint32 i = 0; i < vecCount; i++) {
2029 		if (vecs[i].base % B_PAGE_SIZE != 0
2030 			|| vecs[i].length % B_PAGE_SIZE != 0) {
2031 			return B_BAD_VALUE;
2032 		}
2033 
2034 		size += vecs[i].length;
2035 	}
2036 
2037 	// create a device cache
2038 	VMCache* cache;
2039 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
2040 	if (result != B_OK)
2041 		return result;
2042 
2043 	cache->virtual_end = size;
2044 
2045 	cache->Lock();
2046 
2047 	VMArea* area;
2048 	virtual_address_restrictions addressRestrictions = {};
2049 	addressRestrictions.address = *_address;
2050 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
2051 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
2052 		size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0,
2053 		&addressRestrictions, true, &area, _address);
2054 
2055 	if (result != B_OK)
2056 		cache->ReleaseRefLocked();
2057 
2058 	cache->Unlock();
2059 
2060 	if (result != B_OK)
2061 		return result;
2062 
2063 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2064 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
2065 		area->Base() + (size - 1));
2066 
2067 	vm_page_reservation reservation;
2068 	vm_page_reserve_pages(&reservation, reservePages,
2069 			team == VMAddressSpace::KernelID()
2070 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2071 	map->Lock();
2072 
2073 	uint32 vecIndex = 0;
2074 	size_t vecOffset = 0;
2075 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
2076 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
2077 			vecOffset = 0;
2078 			vecIndex++;
2079 		}
2080 
2081 		if (vecIndex >= vecCount)
2082 			break;
2083 
2084 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
2085 			protection, area->MemoryType(), &reservation);
2086 
2087 		vecOffset += B_PAGE_SIZE;
2088 	}
2089 
2090 	map->Unlock();
2091 	vm_page_unreserve_pages(&reservation);
2092 
2093 	if (_size != NULL)
2094 		*_size = size;
2095 
2096 	area->cache_type = CACHE_TYPE_DEVICE;
2097 	return area->id;
2098 }
2099 
2100 
2101 area_id
2102 vm_create_null_area(team_id team, const char* name, void** address,
2103 	uint32 addressSpec, addr_t size, uint32 flags)
2104 {
2105 	size = PAGE_ALIGN(size);
2106 
2107 	// Lock the address space and, if B_EXACT_ADDRESS and
2108 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
2109 	// is not wired.
2110 	AddressSpaceWriteLocker locker;
2111 	do {
2112 		if (locker.SetTo(team) != B_OK)
2113 			return B_BAD_TEAM_ID;
2114 	} while (addressSpec == B_EXACT_ADDRESS
2115 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
2116 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2117 			(addr_t)*address, size, &locker));
2118 
2119 	// create a null cache
2120 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
2121 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
2122 	VMCache* cache;
2123 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
2124 	if (status != B_OK)
2125 		return status;
2126 
2127 	cache->temporary = 1;
2128 	cache->virtual_end = size;
2129 
2130 	cache->Lock();
2131 
2132 	VMArea* area;
2133 	virtual_address_restrictions addressRestrictions = {};
2134 	addressRestrictions.address = *address;
2135 	addressRestrictions.address_specification = addressSpec;
2136 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
2137 		B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA,
2138 		REGION_NO_PRIVATE_MAP, flags,
2139 		&addressRestrictions, true, &area, address);
2140 
2141 	if (status < B_OK) {
2142 		cache->ReleaseRefAndUnlock();
2143 		return status;
2144 	}
2145 
2146 	cache->Unlock();
2147 
2148 	area->cache_type = CACHE_TYPE_NULL;
2149 	return area->id;
2150 }
2151 
2152 
2153 /*!	Creates the vnode cache for the specified \a vnode.
2154 	The vnode has to be marked busy when calling this function.
2155 */
2156 status_t
2157 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
2158 {
2159 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
2160 }
2161 
2162 
2163 /*!	\a cache must be locked. The area's address space must be read-locked.
2164 */
2165 static void
2166 pre_map_area_pages(VMArea* area, VMCache* cache,
2167 	vm_page_reservation* reservation, int32 maxCount)
2168 {
2169 	addr_t baseAddress = area->Base();
2170 	addr_t cacheOffset = area->cache_offset;
2171 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
2172 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
2173 
2174 	VMCachePagesTree::Iterator it = cache->pages.GetIterator(firstPage, true, true);
2175 	vm_page* page;
2176 	while ((page = it.Next()) != NULL && maxCount > 0) {
2177 		if (page->cache_offset >= endPage)
2178 			break;
2179 
2180 		// skip busy and inactive pages
2181 		if (page->busy || (page->usage_count == 0 && !page->accessed))
2182 			continue;
2183 
2184 		DEBUG_PAGE_ACCESS_START(page);
2185 		map_page(area, page,
2186 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
2187 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
2188 		maxCount--;
2189 		DEBUG_PAGE_ACCESS_END(page);
2190 	}
2191 }
2192 
2193 
2194 /*!	Will map the file specified by \a fd to an area in memory.
2195 	The file will be mirrored beginning at the specified \a offset. The
2196 	\a offset and \a size arguments have to be page aligned.
2197 */
2198 static area_id
2199 _vm_map_file(team_id team, const char* name, void** _address,
2200 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2201 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2202 {
2203 	// TODO: for binary files, we want to make sure that they get the
2204 	//	copy of a file at a given time, ie. later changes should not
2205 	//	make it into the mapped copy -- this will need quite some changes
2206 	//	to be done in a nice way
2207 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
2208 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
2209 
2210 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2211 	size = PAGE_ALIGN(size);
2212 
2213 	if (mapping == REGION_NO_PRIVATE_MAP)
2214 		protection |= B_SHARED_AREA;
2215 	if (addressSpec != B_EXACT_ADDRESS)
2216 		unmapAddressRange = false;
2217 
2218 	uint32 mappingFlags = 0;
2219 	if (unmapAddressRange)
2220 		mappingFlags |= CREATE_AREA_UNMAP_ADDRESS_RANGE;
2221 
2222 	if (fd < 0) {
2223 		virtual_address_restrictions virtualRestrictions = {};
2224 		virtualRestrictions.address = *_address;
2225 		virtualRestrictions.address_specification = addressSpec;
2226 		physical_address_restrictions physicalRestrictions = {};
2227 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
2228 			mappingFlags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
2229 			_address);
2230 	}
2231 
2232 	// get the open flags of the FD
2233 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2234 	if (descriptor == NULL)
2235 		return EBADF;
2236 	int32 openMode = descriptor->open_mode;
2237 	put_fd(descriptor);
2238 
2239 	// The FD must open for reading at any rate. For shared mapping with write
2240 	// access, additionally the FD must be open for writing.
2241 	if ((openMode & O_ACCMODE) == O_WRONLY
2242 		|| (mapping == REGION_NO_PRIVATE_MAP
2243 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2244 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2245 		return EACCES;
2246 	}
2247 
2248 	uint32 protectionMax = 0;
2249 	if (mapping == REGION_NO_PRIVATE_MAP) {
2250 		if ((openMode & O_ACCMODE) == O_RDWR)
2251 			protectionMax = protection | B_USER_PROTECTION;
2252 		else
2253 			protectionMax = protection | (B_USER_PROTECTION & ~B_WRITE_AREA);
2254 	} else if (mapping == REGION_PRIVATE_MAP) {
2255 		// For privately mapped read-only regions, skip committing memory.
2256 		// (If protections are changed later on, memory will be committed then.)
2257 		if ((protection & B_WRITE_AREA) == 0)
2258 			mappingFlags |= CREATE_AREA_DONT_COMMIT_MEMORY;
2259 	}
2260 
2261 	// get the vnode for the object, this also grabs a ref to it
2262 	struct vnode* vnode = NULL;
2263 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2264 	if (status < B_OK)
2265 		return status;
2266 	VnodePutter vnodePutter(vnode);
2267 
2268 	// If we're going to pre-map pages, we need to reserve the pages needed by
2269 	// the mapping backend upfront.
2270 	page_num_t reservedPreMapPages = 0;
2271 	vm_page_reservation reservation;
2272 	if ((protection & B_READ_AREA) != 0) {
2273 		AddressSpaceWriteLocker locker;
2274 		status = locker.SetTo(team);
2275 		if (status != B_OK)
2276 			return status;
2277 
2278 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2279 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2280 
2281 		locker.Unlock();
2282 
2283 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2284 			team == VMAddressSpace::KernelID()
2285 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2286 	}
2287 
2288 	struct PageUnreserver {
2289 		PageUnreserver(vm_page_reservation* reservation)
2290 			:
2291 			fReservation(reservation)
2292 		{
2293 		}
2294 
2295 		~PageUnreserver()
2296 		{
2297 			if (fReservation != NULL)
2298 				vm_page_unreserve_pages(fReservation);
2299 		}
2300 
2301 		vm_page_reservation* fReservation;
2302 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2303 
2304 	// Lock the address space and, if the specified address range shall be
2305 	// unmapped, ensure it is not wired.
2306 	AddressSpaceWriteLocker locker;
2307 	do {
2308 		if (locker.SetTo(team) != B_OK)
2309 			return B_BAD_TEAM_ID;
2310 	} while (unmapAddressRange
2311 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2312 			(addr_t)*_address, size, &locker));
2313 
2314 	// TODO: this only works for file systems that use the file cache
2315 	VMCache* cache;
2316 	status = vfs_get_vnode_cache(vnode, &cache, false);
2317 	if (status < B_OK)
2318 		return status;
2319 
2320 	cache->Lock();
2321 
2322 	VMArea* area;
2323 	virtual_address_restrictions addressRestrictions = {};
2324 	addressRestrictions.address = *_address;
2325 	addressRestrictions.address_specification = addressSpec;
2326 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2327 		0, protection, protectionMax, mapping, mappingFlags,
2328 		&addressRestrictions, kernel, &area, _address);
2329 
2330 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2331 		// map_backing_store() cannot know we no longer need the ref
2332 		cache->ReleaseRefLocked();
2333 	}
2334 
2335 	if (status == B_OK && (protection & B_READ_AREA) != 0) {
2336 		// Pre-map at most 10MB worth of pages.
2337 		pre_map_area_pages(area, cache, &reservation,
2338 			(10LL * 1024 * 1024) / B_PAGE_SIZE);
2339 	}
2340 
2341 	cache->Unlock();
2342 
2343 	if (status == B_OK) {
2344 		// TODO: this probably deserves a smarter solution, e.g. probably
2345 		// trigger prefetch somewhere else.
2346 
2347 		// Prefetch at most 10MB starting from "offset", but only if the cache
2348 		// doesn't already contain more pages than the prefetch size.
2349 		const size_t prefetch = min_c(size, 10LL * 1024 * 1024);
2350 		if (cache->page_count < (prefetch / B_PAGE_SIZE))
2351 			cache_prefetch_vnode(vnode, offset, prefetch);
2352 	}
2353 
2354 	if (status != B_OK)
2355 		return status;
2356 
2357 	area->cache_type = CACHE_TYPE_VNODE;
2358 	return area->id;
2359 }
2360 
2361 
2362 area_id
2363 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2364 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2365 	int fd, off_t offset)
2366 {
2367 	if (!arch_vm_supports_protection(protection))
2368 		return B_NOT_SUPPORTED;
2369 
2370 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2371 		mapping, unmapAddressRange, fd, offset, true);
2372 }
2373 
2374 
2375 VMCache*
2376 vm_area_get_locked_cache(VMArea* area)
2377 {
2378 	rw_lock_read_lock(&sAreaCacheLock);
2379 
2380 	while (true) {
2381 		VMCache* cache = area->cache;
2382 
2383 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2384 			// cache has been deleted
2385 			rw_lock_read_lock(&sAreaCacheLock);
2386 			continue;
2387 		}
2388 
2389 		rw_lock_read_lock(&sAreaCacheLock);
2390 
2391 		if (cache == area->cache) {
2392 			cache->AcquireRefLocked();
2393 			rw_lock_read_unlock(&sAreaCacheLock);
2394 			return cache;
2395 		}
2396 
2397 		// the cache changed in the meantime
2398 		cache->Unlock();
2399 	}
2400 }
2401 
2402 
2403 void
2404 vm_area_put_locked_cache(VMCache* cache)
2405 {
2406 	cache->ReleaseRefAndUnlock();
2407 }
2408 
2409 
2410 area_id
2411 vm_clone_area(team_id team, const char* name, void** address,
2412 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2413 	bool kernel)
2414 {
2415 	VMArea* newArea = NULL;
2416 	VMArea* sourceArea;
2417 
2418 	// Check whether the source area exists and is cloneable. If so, mark it
2419 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2420 	{
2421 		AddressSpaceWriteLocker locker;
2422 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2423 		if (status != B_OK)
2424 			return status;
2425 
2426 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2427 			return B_NOT_ALLOWED;
2428 
2429 		sourceArea->protection |= B_SHARED_AREA;
2430 		protection |= B_SHARED_AREA;
2431 	}
2432 
2433 	// Now lock both address spaces and actually do the cloning.
2434 
2435 	MultiAddressSpaceLocker locker;
2436 	VMAddressSpace* sourceAddressSpace;
2437 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2438 	if (status != B_OK)
2439 		return status;
2440 
2441 	VMAddressSpace* targetAddressSpace;
2442 	status = locker.AddTeam(team, true, &targetAddressSpace);
2443 	if (status != B_OK)
2444 		return status;
2445 
2446 	status = locker.Lock();
2447 	if (status != B_OK)
2448 		return status;
2449 
2450 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2451 	if (sourceArea == NULL)
2452 		return B_BAD_VALUE;
2453 
2454 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2455 		return B_NOT_ALLOWED;
2456 
2457 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2458 
2459 	if (!kernel && sourceAddressSpace != targetAddressSpace
2460 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2461 #if KDEBUG
2462 		Team* team = thread_get_current_thread()->team;
2463 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2464 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2465 #endif
2466 		status = B_NOT_ALLOWED;
2467 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2468 		status = B_NOT_ALLOWED;
2469 	} else {
2470 		virtual_address_restrictions addressRestrictions = {};
2471 		addressRestrictions.address = *address;
2472 		addressRestrictions.address_specification = addressSpec;
2473 		status = map_backing_store(targetAddressSpace, cache,
2474 			sourceArea->cache_offset, name, sourceArea->Size(),
2475 			sourceArea->wiring, protection, sourceArea->protection_max,
2476 			mapping, 0, &addressRestrictions,
2477 			kernel, &newArea, address);
2478 	}
2479 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2480 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2481 		// to create a new cache, and has therefore already acquired a reference
2482 		// to the source cache - but otherwise it has no idea that we need
2483 		// one.
2484 		cache->AcquireRefLocked();
2485 	}
2486 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2487 		// we need to map in everything at this point
2488 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2489 			// we don't have actual pages to map but a physical area
2490 			VMTranslationMap* map
2491 				= sourceArea->address_space->TranslationMap();
2492 			map->Lock();
2493 
2494 			phys_addr_t physicalAddress;
2495 			uint32 oldProtection;
2496 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2497 
2498 			map->Unlock();
2499 
2500 			map = targetAddressSpace->TranslationMap();
2501 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2502 				newArea->Base() + (newArea->Size() - 1));
2503 
2504 			vm_page_reservation reservation;
2505 			vm_page_reserve_pages(&reservation, reservePages,
2506 				targetAddressSpace == VMAddressSpace::Kernel()
2507 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2508 			map->Lock();
2509 
2510 			for (addr_t offset = 0; offset < newArea->Size();
2511 					offset += B_PAGE_SIZE) {
2512 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2513 					protection, newArea->MemoryType(), &reservation);
2514 			}
2515 
2516 			map->Unlock();
2517 			vm_page_unreserve_pages(&reservation);
2518 		} else {
2519 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2520 			size_t reservePages = map->MaxPagesNeededToMap(
2521 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2522 			vm_page_reservation reservation;
2523 			vm_page_reserve_pages(&reservation, reservePages,
2524 				targetAddressSpace == VMAddressSpace::Kernel()
2525 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2526 
2527 			// map in all pages from source
2528 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2529 					vm_page* page  = it.Next();) {
2530 				if (!page->busy) {
2531 					DEBUG_PAGE_ACCESS_START(page);
2532 					map_page(newArea, page,
2533 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2534 							- newArea->cache_offset),
2535 						protection, &reservation);
2536 					DEBUG_PAGE_ACCESS_END(page);
2537 				}
2538 			}
2539 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2540 			// ensuring that!
2541 
2542 			vm_page_unreserve_pages(&reservation);
2543 		}
2544 	}
2545 	if (status == B_OK)
2546 		newArea->cache_type = sourceArea->cache_type;
2547 
2548 	vm_area_put_locked_cache(cache);
2549 
2550 	if (status < B_OK)
2551 		return status;
2552 
2553 	return newArea->id;
2554 }
2555 
2556 
2557 /*!	Deletes the specified area of the given address space.
2558 
2559 	The address space must be write-locked.
2560 	The caller must ensure that the area does not have any wired ranges.
2561 
2562 	\param addressSpace The address space containing the area.
2563 	\param area The area to be deleted.
2564 	\param deletingAddressSpace \c true, if the address space is in the process
2565 		of being deleted.
2566 */
2567 static void
2568 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2569 	bool deletingAddressSpace)
2570 {
2571 	ASSERT(!area->IsWired());
2572 
2573 	if (area->id >= 0)
2574 		VMAreas::Remove(area);
2575 
2576 	// At this point the area is removed from the global hash table, but
2577 	// still exists in the area list.
2578 
2579 	// Unmap the virtual address space the area occupied.
2580 	{
2581 		// We need to lock the complete cache chain.
2582 		VMCache* topCache = vm_area_get_locked_cache(area);
2583 		VMCacheChainLocker cacheChainLocker(topCache);
2584 		cacheChainLocker.LockAllSourceCaches();
2585 
2586 		// If the area's top cache is a temporary cache and the area is the only
2587 		// one referencing it (besides us currently holding a second reference),
2588 		// the unmapping code doesn't need to care about preserving the accessed
2589 		// and dirty flags of the top cache page mappings.
2590 		bool ignoreTopCachePageFlags
2591 			= topCache->temporary && topCache->RefCount() == 2;
2592 
2593 		area->address_space->TranslationMap()->UnmapArea(area,
2594 			deletingAddressSpace, ignoreTopCachePageFlags);
2595 	}
2596 
2597 	if (!area->cache->temporary)
2598 		area->cache->WriteModified();
2599 
2600 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2601 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2602 
2603 	arch_vm_unset_memory_type(area);
2604 	addressSpace->RemoveArea(area, allocationFlags);
2605 	addressSpace->Put();
2606 
2607 	area->cache->RemoveArea(area);
2608 	area->cache->ReleaseRef();
2609 
2610 	addressSpace->DeleteArea(area, allocationFlags);
2611 }
2612 
2613 
2614 status_t
2615 vm_delete_area(team_id team, area_id id, bool kernel)
2616 {
2617 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2618 		team, id));
2619 
2620 	// lock the address space and make sure the area isn't wired
2621 	AddressSpaceWriteLocker locker;
2622 	VMArea* area;
2623 	AreaCacheLocker cacheLocker;
2624 
2625 	do {
2626 		status_t status = locker.SetFromArea(team, id, area);
2627 		if (status != B_OK)
2628 			return status;
2629 
2630 		cacheLocker.SetTo(area);
2631 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2632 
2633 	cacheLocker.Unlock();
2634 
2635 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2636 		return B_NOT_ALLOWED;
2637 
2638 	delete_area(locker.AddressSpace(), area, false);
2639 	return B_OK;
2640 }
2641 
2642 
2643 /*!	Creates a new cache on top of given cache, moves all areas from
2644 	the old cache to the new one, and changes the protection of all affected
2645 	areas' pages to read-only. If requested, wired pages are moved up to the
2646 	new cache and copies are added to the old cache in their place.
2647 	Preconditions:
2648 	- The given cache must be locked.
2649 	- All of the cache's areas' address spaces must be read locked.
2650 	- Either the cache must not have any wired ranges or a page reservation for
2651 	  all wired pages must be provided, so they can be copied.
2652 
2653 	\param lowerCache The cache on top of which a new cache shall be created.
2654 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2655 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2656 		has wired page. The wired pages are copied in this case.
2657 */
2658 static status_t
2659 vm_copy_on_write_area(VMCache* lowerCache,
2660 	vm_page_reservation* wiredPagesReservation)
2661 {
2662 	VMCache* upperCache;
2663 
2664 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2665 
2666 	// We need to separate the cache from its areas. The cache goes one level
2667 	// deeper and we create a new cache inbetween.
2668 
2669 	// create an anonymous cache
2670 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2671 		lowerCache->GuardSize() / B_PAGE_SIZE,
2672 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2673 		VM_PRIORITY_USER);
2674 	if (status != B_OK)
2675 		return status;
2676 
2677 	upperCache->Lock();
2678 
2679 	upperCache->temporary = 1;
2680 	upperCache->virtual_base = lowerCache->virtual_base;
2681 	upperCache->virtual_end = lowerCache->virtual_end;
2682 
2683 	// transfer the lower cache areas to the upper cache
2684 	rw_lock_write_lock(&sAreaCacheLock);
2685 	upperCache->TransferAreas(lowerCache);
2686 	rw_lock_write_unlock(&sAreaCacheLock);
2687 
2688 	lowerCache->AddConsumer(upperCache);
2689 
2690 	// We now need to remap all pages from all of the cache's areas read-only,
2691 	// so that a copy will be created on next write access. If there are wired
2692 	// pages, we keep their protection, move them to the upper cache and create
2693 	// copies for the lower cache.
2694 	if (wiredPagesReservation != NULL) {
2695 		// We need to handle wired pages -- iterate through the cache's pages.
2696 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2697 				vm_page* page = it.Next();) {
2698 			if (page->WiredCount() > 0) {
2699 				// allocate a new page and copy the wired one
2700 				vm_page* copiedPage = vm_page_allocate_page(
2701 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2702 
2703 				vm_memcpy_physical_page(
2704 					copiedPage->physical_page_number * B_PAGE_SIZE,
2705 					page->physical_page_number * B_PAGE_SIZE);
2706 
2707 				// move the wired page to the upper cache (note: removing is OK
2708 				// with the SplayTree iterator) and insert the copy
2709 				upperCache->MovePage(page);
2710 				lowerCache->InsertPage(copiedPage,
2711 					page->cache_offset * B_PAGE_SIZE);
2712 
2713 				DEBUG_PAGE_ACCESS_END(copiedPage);
2714 			} else {
2715 				// Change the protection of this page in all areas.
2716 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2717 						tempArea = tempArea->cache_next) {
2718 					if (!is_page_in_area(tempArea, page))
2719 						continue;
2720 
2721 					// The area must be readable in the same way it was
2722 					// previously writable.
2723 					addr_t address = virtual_page_address(tempArea, page);
2724 					uint32 protection = 0;
2725 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2726 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2727 						protection |= B_KERNEL_READ_AREA;
2728 					if ((pageProtection & B_READ_AREA) != 0)
2729 						protection |= B_READ_AREA;
2730 
2731 					VMTranslationMap* map
2732 						= tempArea->address_space->TranslationMap();
2733 					map->Lock();
2734 					map->ProtectPage(tempArea, address, protection);
2735 					map->Unlock();
2736 				}
2737 			}
2738 		}
2739 	} else {
2740 		ASSERT(lowerCache->WiredPagesCount() == 0);
2741 
2742 		// just change the protection of all areas
2743 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2744 				tempArea = tempArea->cache_next) {
2745 			if (tempArea->page_protections != NULL) {
2746 				// Change the protection of all pages in this area.
2747 				VMTranslationMap* map = tempArea->address_space->TranslationMap();
2748 				map->Lock();
2749 				for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2750 					vm_page* page = it.Next();) {
2751 					if (!is_page_in_area(tempArea, page))
2752 						continue;
2753 
2754 					// The area must be readable in the same way it was
2755 					// previously writable.
2756 					addr_t address = virtual_page_address(tempArea, page);
2757 					uint32 protection = 0;
2758 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2759 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2760 						protection |= B_KERNEL_READ_AREA;
2761 					if ((pageProtection & B_READ_AREA) != 0)
2762 						protection |= B_READ_AREA;
2763 
2764 					map->ProtectPage(tempArea, address, protection);
2765 				}
2766 				map->Unlock();
2767 				continue;
2768 			}
2769 			// The area must be readable in the same way it was previously
2770 			// writable.
2771 			uint32 protection = 0;
2772 			if ((tempArea->protection & B_KERNEL_READ_AREA) != 0)
2773 				protection |= B_KERNEL_READ_AREA;
2774 			if ((tempArea->protection & B_READ_AREA) != 0)
2775 				protection |= B_READ_AREA;
2776 
2777 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2778 			map->Lock();
2779 			map->ProtectArea(tempArea, protection);
2780 			map->Unlock();
2781 		}
2782 	}
2783 
2784 	vm_area_put_locked_cache(upperCache);
2785 
2786 	return B_OK;
2787 }
2788 
2789 
2790 area_id
2791 vm_copy_area(team_id team, const char* name, void** _address,
2792 	uint32 addressSpec, area_id sourceID)
2793 {
2794 	// Do the locking: target address space, all address spaces associated with
2795 	// the source cache, and the cache itself.
2796 	MultiAddressSpaceLocker locker;
2797 	VMAddressSpace* targetAddressSpace;
2798 	VMCache* cache;
2799 	VMArea* source;
2800 	AreaCacheLocker cacheLocker;
2801 	status_t status;
2802 	bool sharedArea;
2803 
2804 	page_num_t wiredPages = 0;
2805 	vm_page_reservation wiredPagesReservation;
2806 
2807 	bool restart;
2808 	do {
2809 		restart = false;
2810 
2811 		locker.Unset();
2812 		status = locker.AddTeam(team, true, &targetAddressSpace);
2813 		if (status == B_OK) {
2814 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2815 				&cache);
2816 		}
2817 		if (status != B_OK)
2818 			return status;
2819 
2820 		cacheLocker.SetTo(cache, true);	// already locked
2821 
2822 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2823 
2824 		page_num_t oldWiredPages = wiredPages;
2825 		wiredPages = 0;
2826 
2827 		// If the source area isn't shared, count the number of wired pages in
2828 		// the cache and reserve as many pages.
2829 		if (!sharedArea) {
2830 			wiredPages = cache->WiredPagesCount();
2831 
2832 			if (wiredPages > oldWiredPages) {
2833 				cacheLocker.Unlock();
2834 				locker.Unlock();
2835 
2836 				if (oldWiredPages > 0)
2837 					vm_page_unreserve_pages(&wiredPagesReservation);
2838 
2839 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2840 					VM_PRIORITY_USER);
2841 
2842 				restart = true;
2843 			}
2844 		} else if (oldWiredPages > 0)
2845 			vm_page_unreserve_pages(&wiredPagesReservation);
2846 	} while (restart);
2847 
2848 	// unreserve pages later
2849 	struct PagesUnreserver {
2850 		PagesUnreserver(vm_page_reservation* reservation)
2851 			:
2852 			fReservation(reservation)
2853 		{
2854 		}
2855 
2856 		~PagesUnreserver()
2857 		{
2858 			if (fReservation != NULL)
2859 				vm_page_unreserve_pages(fReservation);
2860 		}
2861 
2862 	private:
2863 		vm_page_reservation*	fReservation;
2864 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2865 
2866 	bool writableCopy
2867 		= (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2868 	uint8* targetPageProtections = NULL;
2869 
2870 	if (source->page_protections != NULL) {
2871 		size_t bytes = area_page_protections_size(source->Size());
2872 		targetPageProtections = (uint8*)malloc_etc(bytes,
2873 			(source->address_space == VMAddressSpace::Kernel()
2874 					|| targetAddressSpace == VMAddressSpace::Kernel())
2875 				? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
2876 		if (targetPageProtections == NULL)
2877 			return B_NO_MEMORY;
2878 
2879 		memcpy(targetPageProtections, source->page_protections, bytes);
2880 
2881 		if (!writableCopy) {
2882 			for (size_t i = 0; i < bytes; i++) {
2883 				if ((targetPageProtections[i]
2884 						& (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) {
2885 					writableCopy = true;
2886 					break;
2887 				}
2888 			}
2889 		}
2890 	}
2891 
2892 	if (addressSpec == B_CLONE_ADDRESS) {
2893 		addressSpec = B_EXACT_ADDRESS;
2894 		*_address = (void*)source->Base();
2895 	}
2896 
2897 	// First, create a cache on top of the source area, respectively use the
2898 	// existing one, if this is a shared area.
2899 
2900 	VMArea* target;
2901 	virtual_address_restrictions addressRestrictions = {};
2902 	addressRestrictions.address = *_address;
2903 	addressRestrictions.address_specification = addressSpec;
2904 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2905 		name, source->Size(), source->wiring, source->protection,
2906 		source->protection_max,
2907 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2908 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2909 		&addressRestrictions, true, &target, _address);
2910 	if (status < B_OK) {
2911 		free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE);
2912 		return status;
2913 	}
2914 
2915 	if (targetPageProtections != NULL)
2916 		target->page_protections = targetPageProtections;
2917 
2918 	if (sharedArea) {
2919 		// The new area uses the old area's cache, but map_backing_store()
2920 		// hasn't acquired a ref. So we have to do that now.
2921 		cache->AcquireRefLocked();
2922 	}
2923 
2924 	// If the source area is writable, we need to move it one layer up as well
2925 
2926 	if (!sharedArea) {
2927 		if (writableCopy) {
2928 			// TODO: do something more useful if this fails!
2929 			if (vm_copy_on_write_area(cache,
2930 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2931 				panic("vm_copy_on_write_area() failed!\n");
2932 			}
2933 		}
2934 	}
2935 
2936 	// we return the ID of the newly created area
2937 	return target->id;
2938 }
2939 
2940 
2941 status_t
2942 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2943 	bool kernel)
2944 {
2945 	fix_protection(&newProtection);
2946 
2947 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2948 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2949 
2950 	if (!arch_vm_supports_protection(newProtection))
2951 		return B_NOT_SUPPORTED;
2952 
2953 	bool becomesWritable
2954 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2955 
2956 	// lock address spaces and cache
2957 	MultiAddressSpaceLocker locker;
2958 	VMCache* cache;
2959 	VMArea* area;
2960 	status_t status;
2961 	AreaCacheLocker cacheLocker;
2962 	bool isWritable;
2963 
2964 	bool restart;
2965 	do {
2966 		restart = false;
2967 
2968 		locker.Unset();
2969 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2970 		if (status != B_OK)
2971 			return status;
2972 
2973 		cacheLocker.SetTo(cache, true);	// already locked
2974 
2975 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
2976 				|| (area->protection & B_KERNEL_AREA) != 0)) {
2977 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2978 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2979 				" (%s)\n", team, newProtection, areaID, area->name);
2980 			return B_NOT_ALLOWED;
2981 		}
2982 		if (!kernel && area->protection_max != 0
2983 			&& (newProtection & area->protection_max)
2984 				!= (newProtection & B_USER_PROTECTION)) {
2985 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2986 				"set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel "
2987 				"area %" B_PRId32 " (%s)\n", team, newProtection,
2988 				area->protection_max, areaID, area->name);
2989 			return B_NOT_ALLOWED;
2990 		}
2991 
2992 		if (team != VMAddressSpace::KernelID()
2993 			&& area->address_space->ID() != team) {
2994 			// unless you're the kernel, you are only allowed to set
2995 			// the protection of your own areas
2996 			return B_NOT_ALLOWED;
2997 		}
2998 
2999 		if (area->protection == newProtection)
3000 			return B_OK;
3001 
3002 		isWritable
3003 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
3004 
3005 		// Make sure the area (respectively, if we're going to call
3006 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
3007 		// wired ranges.
3008 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
3009 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
3010 					otherArea = otherArea->cache_next) {
3011 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
3012 					restart = true;
3013 					break;
3014 				}
3015 			}
3016 		} else {
3017 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
3018 				restart = true;
3019 		}
3020 	} while (restart);
3021 
3022 	bool changePageProtection = true;
3023 	bool changeTopCachePagesOnly = false;
3024 
3025 	if (isWritable && !becomesWritable) {
3026 		// writable -> !writable
3027 
3028 		if (cache->source != NULL && cache->temporary) {
3029 			if (cache->CountWritableAreas(area) == 0) {
3030 				// Since this cache now lives from the pages in its source cache,
3031 				// we can change the cache's commitment to take only those pages
3032 				// into account that really are in this cache.
3033 
3034 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
3035 					team == VMAddressSpace::KernelID()
3036 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
3037 
3038 				// TODO: we may be able to join with our source cache, if
3039 				// count == 0
3040 			}
3041 		}
3042 
3043 		// If only the writability changes, we can just remap the pages of the
3044 		// top cache, since the pages of lower caches are mapped read-only
3045 		// anyway. That's advantageous only, if the number of pages in the cache
3046 		// is significantly smaller than the number of pages in the area,
3047 		// though.
3048 		if (newProtection
3049 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
3050 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
3051 			changeTopCachePagesOnly = true;
3052 		}
3053 	} else if (!isWritable && becomesWritable) {
3054 		// !writable -> writable
3055 
3056 		if (!cache->consumers.IsEmpty()) {
3057 			// There are consumers -- we have to insert a new cache. Fortunately
3058 			// vm_copy_on_write_area() does everything that's needed.
3059 			changePageProtection = false;
3060 			status = vm_copy_on_write_area(cache, NULL);
3061 		} else {
3062 			// No consumers, so we don't need to insert a new one.
3063 			if (cache->source != NULL && cache->temporary) {
3064 				// the cache's commitment must contain all possible pages
3065 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
3066 					team == VMAddressSpace::KernelID()
3067 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
3068 			}
3069 
3070 			if (status == B_OK && cache->source != NULL) {
3071 				// There's a source cache, hence we can't just change all pages'
3072 				// protection or we might allow writing into pages belonging to
3073 				// a lower cache.
3074 				changeTopCachePagesOnly = true;
3075 			}
3076 		}
3077 	} else {
3078 		// we don't have anything special to do in all other cases
3079 	}
3080 
3081 	if (status == B_OK) {
3082 		// remap existing pages in this cache
3083 		if (changePageProtection) {
3084 			VMTranslationMap* map = area->address_space->TranslationMap();
3085 			map->Lock();
3086 
3087 			if (changeTopCachePagesOnly) {
3088 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
3089 				page_num_t lastPageOffset
3090 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
3091 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
3092 						vm_page* page = it.Next();) {
3093 					if (page->cache_offset >= firstPageOffset
3094 						&& page->cache_offset <= lastPageOffset) {
3095 						addr_t address = virtual_page_address(area, page);
3096 						map->ProtectPage(area, address, newProtection);
3097 					}
3098 				}
3099 			} else
3100 				map->ProtectArea(area, newProtection);
3101 
3102 			map->Unlock();
3103 		}
3104 
3105 		area->protection = newProtection;
3106 	}
3107 
3108 	return status;
3109 }
3110 
3111 
3112 status_t
3113 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
3114 {
3115 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
3116 	if (addressSpace == NULL)
3117 		return B_BAD_TEAM_ID;
3118 
3119 	VMTranslationMap* map = addressSpace->TranslationMap();
3120 
3121 	map->Lock();
3122 	uint32 dummyFlags;
3123 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
3124 	map->Unlock();
3125 
3126 	addressSpace->Put();
3127 	return status;
3128 }
3129 
3130 
3131 /*!	The page's cache must be locked.
3132 */
3133 bool
3134 vm_test_map_modification(vm_page* page)
3135 {
3136 	if (page->modified)
3137 		return true;
3138 
3139 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3140 	vm_page_mapping* mapping;
3141 	while ((mapping = iterator.Next()) != NULL) {
3142 		VMArea* area = mapping->area;
3143 		VMTranslationMap* map = area->address_space->TranslationMap();
3144 
3145 		phys_addr_t physicalAddress;
3146 		uint32 flags;
3147 		map->Lock();
3148 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
3149 		map->Unlock();
3150 
3151 		if ((flags & PAGE_MODIFIED) != 0)
3152 			return true;
3153 	}
3154 
3155 	return false;
3156 }
3157 
3158 
3159 /*!	The page's cache must be locked.
3160 */
3161 void
3162 vm_clear_map_flags(vm_page* page, uint32 flags)
3163 {
3164 	if ((flags & PAGE_ACCESSED) != 0)
3165 		page->accessed = false;
3166 	if ((flags & PAGE_MODIFIED) != 0)
3167 		page->modified = false;
3168 
3169 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3170 	vm_page_mapping* mapping;
3171 	while ((mapping = iterator.Next()) != NULL) {
3172 		VMArea* area = mapping->area;
3173 		VMTranslationMap* map = area->address_space->TranslationMap();
3174 
3175 		map->Lock();
3176 		map->ClearFlags(virtual_page_address(area, page), flags);
3177 		map->Unlock();
3178 	}
3179 }
3180 
3181 
3182 /*!	Removes all mappings from a page.
3183 	After you've called this function, the page is unmapped from memory and
3184 	the page's \c accessed and \c modified flags have been updated according
3185 	to the state of the mappings.
3186 	The page's cache must be locked.
3187 */
3188 void
3189 vm_remove_all_page_mappings(vm_page* page)
3190 {
3191 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3192 		VMArea* area = mapping->area;
3193 		VMTranslationMap* map = area->address_space->TranslationMap();
3194 		addr_t address = virtual_page_address(area, page);
3195 		map->UnmapPage(area, address, false);
3196 	}
3197 }
3198 
3199 
3200 int32
3201 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
3202 {
3203 	int32 count = 0;
3204 
3205 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3206 	vm_page_mapping* mapping;
3207 	while ((mapping = iterator.Next()) != NULL) {
3208 		VMArea* area = mapping->area;
3209 		VMTranslationMap* map = area->address_space->TranslationMap();
3210 
3211 		bool modified;
3212 		if (map->ClearAccessedAndModified(area,
3213 				virtual_page_address(area, page), false, modified)) {
3214 			count++;
3215 		}
3216 
3217 		page->modified |= modified;
3218 	}
3219 
3220 
3221 	if (page->accessed) {
3222 		count++;
3223 		page->accessed = false;
3224 	}
3225 
3226 	return count;
3227 }
3228 
3229 
3230 /*!	Removes all mappings of a page and/or clears the accessed bits of the
3231 	mappings.
3232 	The function iterates through the page mappings and removes them until
3233 	encountering one that has been accessed. From then on it will continue to
3234 	iterate, but only clear the accessed flag of the mapping. The page's
3235 	\c modified bit will be updated accordingly, the \c accessed bit will be
3236 	cleared.
3237 	\return The number of mapping accessed bits encountered, including the
3238 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
3239 		of the page have been removed.
3240 */
3241 int32
3242 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
3243 {
3244 	ASSERT(page->WiredCount() == 0);
3245 
3246 	if (page->accessed)
3247 		return vm_clear_page_mapping_accessed_flags(page);
3248 
3249 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3250 		VMArea* area = mapping->area;
3251 		VMTranslationMap* map = area->address_space->TranslationMap();
3252 		addr_t address = virtual_page_address(area, page);
3253 		bool modified = false;
3254 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
3255 			page->accessed = true;
3256 			page->modified |= modified;
3257 			return vm_clear_page_mapping_accessed_flags(page);
3258 		}
3259 		page->modified |= modified;
3260 	}
3261 
3262 	return 0;
3263 }
3264 
3265 
3266 static int
3267 display_mem(int argc, char** argv)
3268 {
3269 	bool physical = false;
3270 	addr_t copyAddress;
3271 	int32 displayWidth;
3272 	int32 itemSize;
3273 	int32 num = -1;
3274 	addr_t address;
3275 	int i = 1, j;
3276 
3277 	if (argc > 1 && argv[1][0] == '-') {
3278 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3279 			physical = true;
3280 			i++;
3281 		} else
3282 			i = 99;
3283 	}
3284 
3285 	if (argc < i + 1 || argc > i + 2) {
3286 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3287 			"\tdl - 8 bytes\n"
3288 			"\tdw - 4 bytes\n"
3289 			"\tds - 2 bytes\n"
3290 			"\tdb - 1 byte\n"
3291 			"\tstring - a whole string\n"
3292 			"  -p or --physical only allows memory from a single page to be "
3293 			"displayed.\n");
3294 		return 0;
3295 	}
3296 
3297 	address = parse_expression(argv[i]);
3298 
3299 	if (argc > i + 1)
3300 		num = parse_expression(argv[i + 1]);
3301 
3302 	// build the format string
3303 	if (strcmp(argv[0], "db") == 0) {
3304 		itemSize = 1;
3305 		displayWidth = 16;
3306 	} else if (strcmp(argv[0], "ds") == 0) {
3307 		itemSize = 2;
3308 		displayWidth = 8;
3309 	} else if (strcmp(argv[0], "dw") == 0) {
3310 		itemSize = 4;
3311 		displayWidth = 4;
3312 	} else if (strcmp(argv[0], "dl") == 0) {
3313 		itemSize = 8;
3314 		displayWidth = 2;
3315 	} else if (strcmp(argv[0], "string") == 0) {
3316 		itemSize = 1;
3317 		displayWidth = -1;
3318 	} else {
3319 		kprintf("display_mem called in an invalid way!\n");
3320 		return 0;
3321 	}
3322 
3323 	if (num <= 0)
3324 		num = displayWidth;
3325 
3326 	void* physicalPageHandle = NULL;
3327 
3328 	if (physical) {
3329 		int32 offset = address & (B_PAGE_SIZE - 1);
3330 		if (num * itemSize + offset > B_PAGE_SIZE) {
3331 			num = (B_PAGE_SIZE - offset) / itemSize;
3332 			kprintf("NOTE: number of bytes has been cut to page size\n");
3333 		}
3334 
3335 		address = ROUNDDOWN(address, B_PAGE_SIZE);
3336 
3337 		if (vm_get_physical_page_debug(address, &copyAddress,
3338 				&physicalPageHandle) != B_OK) {
3339 			kprintf("getting the hardware page failed.");
3340 			return 0;
3341 		}
3342 
3343 		address += offset;
3344 		copyAddress += offset;
3345 	} else
3346 		copyAddress = address;
3347 
3348 	if (!strcmp(argv[0], "string")) {
3349 		kprintf("%p \"", (char*)copyAddress);
3350 
3351 		// string mode
3352 		for (i = 0; true; i++) {
3353 			char c;
3354 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
3355 					!= B_OK
3356 				|| c == '\0') {
3357 				break;
3358 			}
3359 
3360 			if (c == '\n')
3361 				kprintf("\\n");
3362 			else if (c == '\t')
3363 				kprintf("\\t");
3364 			else {
3365 				if (!isprint(c))
3366 					c = '.';
3367 
3368 				kprintf("%c", c);
3369 			}
3370 		}
3371 
3372 		kprintf("\"\n");
3373 	} else {
3374 		// number mode
3375 		for (i = 0; i < num; i++) {
3376 			uint64 value;
3377 
3378 			if ((i % displayWidth) == 0) {
3379 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3380 				if (i != 0)
3381 					kprintf("\n");
3382 
3383 				kprintf("[0x%lx]  ", address + i * itemSize);
3384 
3385 				for (j = 0; j < displayed; j++) {
3386 					char c;
3387 					if (debug_memcpy(B_CURRENT_TEAM, &c,
3388 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
3389 						displayed = j;
3390 						break;
3391 					}
3392 					if (!isprint(c))
3393 						c = '.';
3394 
3395 					kprintf("%c", c);
3396 				}
3397 				if (num > displayWidth) {
3398 					// make sure the spacing in the last line is correct
3399 					for (j = displayed; j < displayWidth * itemSize; j++)
3400 						kprintf(" ");
3401 				}
3402 				kprintf("  ");
3403 			}
3404 
3405 			if (debug_memcpy(B_CURRENT_TEAM, &value,
3406 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
3407 				kprintf("read fault");
3408 				break;
3409 			}
3410 
3411 			switch (itemSize) {
3412 				case 1:
3413 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
3414 					break;
3415 				case 2:
3416 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3417 					break;
3418 				case 4:
3419 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3420 					break;
3421 				case 8:
3422 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3423 					break;
3424 			}
3425 		}
3426 
3427 		kprintf("\n");
3428 	}
3429 
3430 	if (physical) {
3431 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3432 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3433 	}
3434 	return 0;
3435 }
3436 
3437 
3438 static void
3439 dump_cache_tree_recursively(VMCache* cache, int level,
3440 	VMCache* highlightCache)
3441 {
3442 	// print this cache
3443 	for (int i = 0; i < level; i++)
3444 		kprintf("  ");
3445 	if (cache == highlightCache)
3446 		kprintf("%p <--\n", cache);
3447 	else
3448 		kprintf("%p\n", cache);
3449 
3450 	// recursively print its consumers
3451 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3452 			VMCache* consumer = it.Next();) {
3453 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3454 	}
3455 }
3456 
3457 
3458 static int
3459 dump_cache_tree(int argc, char** argv)
3460 {
3461 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3462 		kprintf("usage: %s <address>\n", argv[0]);
3463 		return 0;
3464 	}
3465 
3466 	addr_t address = parse_expression(argv[1]);
3467 	if (address == 0)
3468 		return 0;
3469 
3470 	VMCache* cache = (VMCache*)address;
3471 	VMCache* root = cache;
3472 
3473 	// find the root cache (the transitive source)
3474 	while (root->source != NULL)
3475 		root = root->source;
3476 
3477 	dump_cache_tree_recursively(root, 0, cache);
3478 
3479 	return 0;
3480 }
3481 
3482 
3483 const char*
3484 vm_cache_type_to_string(int32 type)
3485 {
3486 	switch (type) {
3487 		case CACHE_TYPE_RAM:
3488 			return "RAM";
3489 		case CACHE_TYPE_DEVICE:
3490 			return "device";
3491 		case CACHE_TYPE_VNODE:
3492 			return "vnode";
3493 		case CACHE_TYPE_NULL:
3494 			return "null";
3495 
3496 		default:
3497 			return "unknown";
3498 	}
3499 }
3500 
3501 
3502 #if DEBUG_CACHE_LIST
3503 
3504 static void
3505 update_cache_info_recursively(VMCache* cache, cache_info& info)
3506 {
3507 	info.page_count += cache->page_count;
3508 	if (cache->type == CACHE_TYPE_RAM)
3509 		info.committed += cache->committed_size;
3510 
3511 	// recurse
3512 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3513 			VMCache* consumer = it.Next();) {
3514 		update_cache_info_recursively(consumer, info);
3515 	}
3516 }
3517 
3518 
3519 static int
3520 cache_info_compare_page_count(const void* _a, const void* _b)
3521 {
3522 	const cache_info* a = (const cache_info*)_a;
3523 	const cache_info* b = (const cache_info*)_b;
3524 	if (a->page_count == b->page_count)
3525 		return 0;
3526 	return a->page_count < b->page_count ? 1 : -1;
3527 }
3528 
3529 
3530 static int
3531 cache_info_compare_committed(const void* _a, const void* _b)
3532 {
3533 	const cache_info* a = (const cache_info*)_a;
3534 	const cache_info* b = (const cache_info*)_b;
3535 	if (a->committed == b->committed)
3536 		return 0;
3537 	return a->committed < b->committed ? 1 : -1;
3538 }
3539 
3540 
3541 static void
3542 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3543 {
3544 	for (int i = 0; i < level; i++)
3545 		kprintf("  ");
3546 
3547 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3548 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3549 		cache->virtual_base, cache->virtual_end, cache->page_count);
3550 
3551 	if (level == 0)
3552 		kprintf("/%lu", info.page_count);
3553 
3554 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3555 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3556 
3557 		if (level == 0)
3558 			kprintf("/%lu", info.committed);
3559 	}
3560 
3561 	// areas
3562 	if (cache->areas != NULL) {
3563 		VMArea* area = cache->areas;
3564 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3565 			area->name, area->address_space->ID());
3566 
3567 		while (area->cache_next != NULL) {
3568 			area = area->cache_next;
3569 			kprintf(", %" B_PRId32, area->id);
3570 		}
3571 	}
3572 
3573 	kputs("\n");
3574 
3575 	// recurse
3576 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3577 			VMCache* consumer = it.Next();) {
3578 		dump_caches_recursively(consumer, info, level + 1);
3579 	}
3580 }
3581 
3582 
3583 static int
3584 dump_caches(int argc, char** argv)
3585 {
3586 	if (sCacheInfoTable == NULL) {
3587 		kprintf("No cache info table!\n");
3588 		return 0;
3589 	}
3590 
3591 	bool sortByPageCount = true;
3592 
3593 	for (int32 i = 1; i < argc; i++) {
3594 		if (strcmp(argv[i], "-c") == 0) {
3595 			sortByPageCount = false;
3596 		} else {
3597 			print_debugger_command_usage(argv[0]);
3598 			return 0;
3599 		}
3600 	}
3601 
3602 	uint32 totalCount = 0;
3603 	uint32 rootCount = 0;
3604 	off_t totalCommitted = 0;
3605 	page_num_t totalPages = 0;
3606 
3607 	VMCache* cache = gDebugCacheList;
3608 	while (cache) {
3609 		totalCount++;
3610 		if (cache->source == NULL) {
3611 			cache_info stackInfo;
3612 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3613 				? sCacheInfoTable[rootCount] : stackInfo;
3614 			rootCount++;
3615 			info.cache = cache;
3616 			info.page_count = 0;
3617 			info.committed = 0;
3618 			update_cache_info_recursively(cache, info);
3619 			totalCommitted += info.committed;
3620 			totalPages += info.page_count;
3621 		}
3622 
3623 		cache = cache->debug_next;
3624 	}
3625 
3626 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3627 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3628 			sortByPageCount
3629 				? &cache_info_compare_page_count
3630 				: &cache_info_compare_committed);
3631 	}
3632 
3633 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3634 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3635 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3636 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3637 			"page count" : "committed size");
3638 
3639 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3640 		for (uint32 i = 0; i < rootCount; i++) {
3641 			cache_info& info = sCacheInfoTable[i];
3642 			dump_caches_recursively(info.cache, info, 0);
3643 		}
3644 	} else
3645 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3646 
3647 	return 0;
3648 }
3649 
3650 #endif	// DEBUG_CACHE_LIST
3651 
3652 
3653 static int
3654 dump_cache(int argc, char** argv)
3655 {
3656 	VMCache* cache;
3657 	bool showPages = false;
3658 	int i = 1;
3659 
3660 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3661 		kprintf("usage: %s [-ps] <address>\n"
3662 			"  if -p is specified, all pages are shown, if -s is used\n"
3663 			"  only the cache info is shown respectively.\n", argv[0]);
3664 		return 0;
3665 	}
3666 	while (argv[i][0] == '-') {
3667 		char* arg = argv[i] + 1;
3668 		while (arg[0]) {
3669 			if (arg[0] == 'p')
3670 				showPages = true;
3671 			arg++;
3672 		}
3673 		i++;
3674 	}
3675 	if (argv[i] == NULL) {
3676 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3677 		return 0;
3678 	}
3679 
3680 	addr_t address = parse_expression(argv[i]);
3681 	if (address == 0)
3682 		return 0;
3683 
3684 	cache = (VMCache*)address;
3685 
3686 	cache->Dump(showPages);
3687 
3688 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3689 
3690 	return 0;
3691 }
3692 
3693 
3694 static void
3695 dump_area_struct(VMArea* area, bool mappings)
3696 {
3697 	kprintf("AREA: %p\n", area);
3698 	kprintf("name:\t\t'%s'\n", area->name);
3699 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3700 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3701 	kprintf("base:\t\t0x%lx\n", area->Base());
3702 	kprintf("size:\t\t0x%lx\n", area->Size());
3703 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3704 	kprintf("page_protection:%p\n", area->page_protections);
3705 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3706 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3707 	kprintf("cache:\t\t%p\n", area->cache);
3708 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3709 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3710 	kprintf("cache_next:\t%p\n", area->cache_next);
3711 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3712 
3713 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3714 	if (mappings) {
3715 		kprintf("page mappings:\n");
3716 		while (iterator.HasNext()) {
3717 			vm_page_mapping* mapping = iterator.Next();
3718 			kprintf("  %p", mapping->page);
3719 		}
3720 		kprintf("\n");
3721 	} else {
3722 		uint32 count = 0;
3723 		while (iterator.Next() != NULL) {
3724 			count++;
3725 		}
3726 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3727 	}
3728 }
3729 
3730 
3731 static int
3732 dump_area(int argc, char** argv)
3733 {
3734 	bool mappings = false;
3735 	bool found = false;
3736 	int32 index = 1;
3737 	VMArea* area;
3738 	addr_t num;
3739 
3740 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3741 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3742 			"All areas matching either id/address/name are listed. You can\n"
3743 			"force to check only a specific item by prefixing the specifier\n"
3744 			"with the id/contains/address/name keywords.\n"
3745 			"-m shows the area's mappings as well.\n");
3746 		return 0;
3747 	}
3748 
3749 	if (!strcmp(argv[1], "-m")) {
3750 		mappings = true;
3751 		index++;
3752 	}
3753 
3754 	int32 mode = 0xf;
3755 	if (!strcmp(argv[index], "id"))
3756 		mode = 1;
3757 	else if (!strcmp(argv[index], "contains"))
3758 		mode = 2;
3759 	else if (!strcmp(argv[index], "name"))
3760 		mode = 4;
3761 	else if (!strcmp(argv[index], "address"))
3762 		mode = 0;
3763 	if (mode != 0xf)
3764 		index++;
3765 
3766 	if (index >= argc) {
3767 		kprintf("No area specifier given.\n");
3768 		return 0;
3769 	}
3770 
3771 	num = parse_expression(argv[index]);
3772 
3773 	if (mode == 0) {
3774 		dump_area_struct((struct VMArea*)num, mappings);
3775 	} else {
3776 		// walk through the area list, looking for the arguments as a name
3777 
3778 		VMAreasTree::Iterator it = VMAreas::GetIterator();
3779 		while ((area = it.Next()) != NULL) {
3780 			if (((mode & 4) != 0
3781 					&& !strcmp(argv[index], area->name))
3782 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3783 					|| (((mode & 2) != 0 && area->Base() <= num
3784 						&& area->Base() + area->Size() > num))))) {
3785 				dump_area_struct(area, mappings);
3786 				found = true;
3787 			}
3788 		}
3789 
3790 		if (!found)
3791 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3792 	}
3793 
3794 	return 0;
3795 }
3796 
3797 
3798 static int
3799 dump_area_list(int argc, char** argv)
3800 {
3801 	VMArea* area;
3802 	const char* name = NULL;
3803 	int32 id = 0;
3804 
3805 	if (argc > 1) {
3806 		id = parse_expression(argv[1]);
3807 		if (id == 0)
3808 			name = argv[1];
3809 	}
3810 
3811 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3812 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3813 		B_PRINTF_POINTER_WIDTH, "size");
3814 
3815 	VMAreasTree::Iterator it = VMAreas::GetIterator();
3816 	while ((area = it.Next()) != NULL) {
3817 		if ((id != 0 && area->address_space->ID() != id)
3818 			|| (name != NULL && strstr(area->name, name) == NULL))
3819 			continue;
3820 
3821 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3822 			area->id, (void*)area->Base(), (void*)area->Size(),
3823 			area->protection, area->wiring, area->name);
3824 	}
3825 	return 0;
3826 }
3827 
3828 
3829 static int
3830 dump_available_memory(int argc, char** argv)
3831 {
3832 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3833 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3834 	return 0;
3835 }
3836 
3837 
3838 static int
3839 dump_mapping_info(int argc, char** argv)
3840 {
3841 	bool reverseLookup = false;
3842 	bool pageLookup = false;
3843 
3844 	int argi = 1;
3845 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3846 		const char* arg = argv[argi];
3847 		if (strcmp(arg, "-r") == 0) {
3848 			reverseLookup = true;
3849 		} else if (strcmp(arg, "-p") == 0) {
3850 			reverseLookup = true;
3851 			pageLookup = true;
3852 		} else {
3853 			print_debugger_command_usage(argv[0]);
3854 			return 0;
3855 		}
3856 	}
3857 
3858 	// We need at least one argument, the address. Optionally a thread ID can be
3859 	// specified.
3860 	if (argi >= argc || argi + 2 < argc) {
3861 		print_debugger_command_usage(argv[0]);
3862 		return 0;
3863 	}
3864 
3865 	uint64 addressValue;
3866 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3867 		return 0;
3868 
3869 	Team* team = NULL;
3870 	if (argi < argc) {
3871 		uint64 threadID;
3872 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3873 			return 0;
3874 
3875 		Thread* thread = Thread::GetDebug(threadID);
3876 		if (thread == NULL) {
3877 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3878 			return 0;
3879 		}
3880 
3881 		team = thread->team;
3882 	}
3883 
3884 	if (reverseLookup) {
3885 		phys_addr_t physicalAddress;
3886 		if (pageLookup) {
3887 			vm_page* page = (vm_page*)(addr_t)addressValue;
3888 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3889 		} else {
3890 			physicalAddress = (phys_addr_t)addressValue;
3891 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3892 		}
3893 
3894 		kprintf("    Team     Virtual Address      Area\n");
3895 		kprintf("--------------------------------------\n");
3896 
3897 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3898 			Callback()
3899 				:
3900 				fAddressSpace(NULL)
3901 			{
3902 			}
3903 
3904 			void SetAddressSpace(VMAddressSpace* addressSpace)
3905 			{
3906 				fAddressSpace = addressSpace;
3907 			}
3908 
3909 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3910 			{
3911 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3912 					virtualAddress);
3913 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3914 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3915 				else
3916 					kprintf("\n");
3917 				return false;
3918 			}
3919 
3920 		private:
3921 			VMAddressSpace*	fAddressSpace;
3922 		} callback;
3923 
3924 		if (team != NULL) {
3925 			// team specified -- get its address space
3926 			VMAddressSpace* addressSpace = team->address_space;
3927 			if (addressSpace == NULL) {
3928 				kprintf("Failed to get address space!\n");
3929 				return 0;
3930 			}
3931 
3932 			callback.SetAddressSpace(addressSpace);
3933 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3934 				physicalAddress, callback);
3935 		} else {
3936 			// no team specified -- iterate through all address spaces
3937 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3938 				addressSpace != NULL;
3939 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3940 				callback.SetAddressSpace(addressSpace);
3941 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3942 					physicalAddress, callback);
3943 			}
3944 		}
3945 	} else {
3946 		// get the address space
3947 		addr_t virtualAddress = (addr_t)addressValue;
3948 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3949 		VMAddressSpace* addressSpace;
3950 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3951 			addressSpace = VMAddressSpace::Kernel();
3952 		} else if (team != NULL) {
3953 			addressSpace = team->address_space;
3954 		} else {
3955 			Thread* thread = debug_get_debugged_thread();
3956 			if (thread == NULL || thread->team == NULL) {
3957 				kprintf("Failed to get team!\n");
3958 				return 0;
3959 			}
3960 
3961 			addressSpace = thread->team->address_space;
3962 		}
3963 
3964 		if (addressSpace == NULL) {
3965 			kprintf("Failed to get address space!\n");
3966 			return 0;
3967 		}
3968 
3969 		// let the translation map implementation do the job
3970 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3971 	}
3972 
3973 	return 0;
3974 }
3975 
3976 
3977 /*!	Deletes all areas and reserved regions in the given address space.
3978 
3979 	The caller must ensure that none of the areas has any wired ranges.
3980 
3981 	\param addressSpace The address space.
3982 	\param deletingAddressSpace \c true, if the address space is in the process
3983 		of being deleted.
3984 */
3985 void
3986 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3987 {
3988 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3989 		addressSpace->ID()));
3990 
3991 	addressSpace->WriteLock();
3992 
3993 	// remove all reserved areas in this address space
3994 	addressSpace->UnreserveAllAddressRanges(0);
3995 
3996 	// remove all areas from the areas map at once (to avoid lock contention)
3997 	VMAreas::WriteLock();
3998 	{
3999 		VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
4000 		while (VMArea* area = it.Next()) {
4001 			VMAreas::Remove(area);
4002 			area->id = INT32_MIN;
4003 		}
4004 	}
4005 	VMAreas::WriteUnlock();
4006 
4007 	// delete all the areas in this address space
4008 	while (VMArea* area = addressSpace->FirstArea()) {
4009 		ASSERT(!area->IsWired());
4010 		delete_area(addressSpace, area, deletingAddressSpace);
4011 	}
4012 
4013 	addressSpace->WriteUnlock();
4014 }
4015 
4016 
4017 static area_id
4018 vm_area_for(addr_t address, bool kernel)
4019 {
4020 	team_id team;
4021 	if (IS_USER_ADDRESS(address)) {
4022 		// we try the user team address space, if any
4023 		team = VMAddressSpace::CurrentID();
4024 		if (team < 0)
4025 			return team;
4026 	} else
4027 		team = VMAddressSpace::KernelID();
4028 
4029 	AddressSpaceReadLocker locker(team);
4030 	if (!locker.IsLocked())
4031 		return B_BAD_TEAM_ID;
4032 
4033 	VMArea* area = locker.AddressSpace()->LookupArea(address);
4034 	if (area != NULL) {
4035 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0
4036 				&& (area->protection & B_KERNEL_AREA) != 0)
4037 			return B_ERROR;
4038 
4039 		return area->id;
4040 	}
4041 
4042 	return B_ERROR;
4043 }
4044 
4045 
4046 /*!	Frees physical pages that were used during the boot process.
4047 	\a end is inclusive.
4048 */
4049 static void
4050 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
4051 {
4052 	// free all physical pages in the specified range
4053 
4054 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
4055 		phys_addr_t physicalAddress;
4056 		uint32 flags;
4057 
4058 		if (map->Query(current, &physicalAddress, &flags) == B_OK
4059 			&& (flags & PAGE_PRESENT) != 0) {
4060 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
4061 			if (page != NULL && page->State() != PAGE_STATE_FREE
4062 					&& page->State() != PAGE_STATE_CLEAR
4063 					&& page->State() != PAGE_STATE_UNUSED) {
4064 				DEBUG_PAGE_ACCESS_START(page);
4065 				vm_page_set_state(page, PAGE_STATE_FREE);
4066 			}
4067 		}
4068 	}
4069 
4070 	// unmap the memory
4071 	map->Unmap(start, end);
4072 }
4073 
4074 
4075 void
4076 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
4077 {
4078 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
4079 	addr_t end = start + (size - 1);
4080 	addr_t lastEnd = start;
4081 
4082 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
4083 		(void*)start, (void*)end));
4084 
4085 	// The areas are sorted in virtual address space order, so
4086 	// we just have to find the holes between them that fall
4087 	// into the area we should dispose
4088 
4089 	map->Lock();
4090 
4091 	for (VMAddressSpace::AreaIterator it
4092 				= VMAddressSpace::Kernel()->GetAreaIterator();
4093 			VMArea* area = it.Next();) {
4094 		addr_t areaStart = area->Base();
4095 		addr_t areaEnd = areaStart + (area->Size() - 1);
4096 
4097 		if (areaEnd < start)
4098 			continue;
4099 
4100 		if (areaStart > end) {
4101 			// we are done, the area is already beyond of what we have to free
4102 			break;
4103 		}
4104 
4105 		if (areaStart > lastEnd) {
4106 			// this is something we can free
4107 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
4108 				(void*)areaStart));
4109 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
4110 		}
4111 
4112 		if (areaEnd >= end) {
4113 			lastEnd = areaEnd;
4114 				// no +1 to prevent potential overflow
4115 			break;
4116 		}
4117 
4118 		lastEnd = areaEnd + 1;
4119 	}
4120 
4121 	if (lastEnd < end) {
4122 		// we can also get rid of some space at the end of the area
4123 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
4124 			(void*)end));
4125 		unmap_and_free_physical_pages(map, lastEnd, end);
4126 	}
4127 
4128 	map->Unlock();
4129 }
4130 
4131 
4132 static void
4133 create_preloaded_image_areas(struct preloaded_image* _image)
4134 {
4135 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
4136 	char name[B_OS_NAME_LENGTH];
4137 	void* address;
4138 	int32 length;
4139 
4140 	// use file name to create a good area name
4141 	char* fileName = strrchr(image->name, '/');
4142 	if (fileName == NULL)
4143 		fileName = image->name;
4144 	else
4145 		fileName++;
4146 
4147 	length = strlen(fileName);
4148 	// make sure there is enough space for the suffix
4149 	if (length > 25)
4150 		length = 25;
4151 
4152 	memcpy(name, fileName, length);
4153 	strcpy(name + length, "_text");
4154 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
4155 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4156 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
4157 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4158 		// this will later be remapped read-only/executable by the
4159 		// ELF initialization code
4160 
4161 	strcpy(name + length, "_data");
4162 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
4163 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4164 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
4165 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4166 }
4167 
4168 
4169 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
4170 	Any boot loader resources contained in that arguments must not be accessed
4171 	anymore past this point.
4172 */
4173 void
4174 vm_free_kernel_args(kernel_args* args)
4175 {
4176 	uint32 i;
4177 
4178 	TRACE(("vm_free_kernel_args()\n"));
4179 
4180 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
4181 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
4182 		if (area >= B_OK)
4183 			delete_area(area);
4184 	}
4185 }
4186 
4187 
4188 static void
4189 allocate_kernel_args(kernel_args* args)
4190 {
4191 	TRACE(("allocate_kernel_args()\n"));
4192 
4193 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
4194 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
4195 
4196 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
4197 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
4198 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4199 	}
4200 }
4201 
4202 
4203 static void
4204 unreserve_boot_loader_ranges(kernel_args* args)
4205 {
4206 	TRACE(("unreserve_boot_loader_ranges()\n"));
4207 
4208 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4209 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
4210 			(void*)(addr_t)args->virtual_allocated_range[i].start,
4211 			args->virtual_allocated_range[i].size);
4212 	}
4213 }
4214 
4215 
4216 static void
4217 reserve_boot_loader_ranges(kernel_args* args)
4218 {
4219 	TRACE(("reserve_boot_loader_ranges()\n"));
4220 
4221 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4222 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
4223 
4224 		// If the address is no kernel address, we just skip it. The
4225 		// architecture specific code has to deal with it.
4226 		if (!IS_KERNEL_ADDRESS(address)) {
4227 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
4228 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
4229 			continue;
4230 		}
4231 
4232 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
4233 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
4234 		if (status < B_OK)
4235 			panic("could not reserve boot loader ranges\n");
4236 	}
4237 }
4238 
4239 
4240 static addr_t
4241 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
4242 {
4243 	size = PAGE_ALIGN(size);
4244 
4245 	// find a slot in the virtual allocation addr range
4246 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
4247 		// check to see if the space between this one and the last is big enough
4248 		addr_t rangeStart = args->virtual_allocated_range[i].start;
4249 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
4250 			+ args->virtual_allocated_range[i - 1].size;
4251 
4252 		addr_t base = alignment > 0
4253 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
4254 
4255 		if (base >= KERNEL_BASE && base < rangeStart
4256 				&& rangeStart - base >= size) {
4257 			args->virtual_allocated_range[i - 1].size
4258 				+= base + size - previousRangeEnd;
4259 			return base;
4260 		}
4261 	}
4262 
4263 	// we hadn't found one between allocation ranges. this is ok.
4264 	// see if there's a gap after the last one
4265 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
4266 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
4267 		+ args->virtual_allocated_range[lastEntryIndex].size;
4268 	addr_t base = alignment > 0
4269 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
4270 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
4271 		args->virtual_allocated_range[lastEntryIndex].size
4272 			+= base + size - lastRangeEnd;
4273 		return base;
4274 	}
4275 
4276 	// see if there's a gap before the first one
4277 	addr_t rangeStart = args->virtual_allocated_range[0].start;
4278 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
4279 		base = rangeStart - size;
4280 		if (alignment > 0)
4281 			base = ROUNDDOWN(base, alignment);
4282 
4283 		if (base >= KERNEL_BASE) {
4284 			args->virtual_allocated_range[0].start = base;
4285 			args->virtual_allocated_range[0].size += rangeStart - base;
4286 			return base;
4287 		}
4288 	}
4289 
4290 	return 0;
4291 }
4292 
4293 
4294 static bool
4295 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
4296 {
4297 	// TODO: horrible brute-force method of determining if the page can be
4298 	// allocated
4299 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4300 		if (address >= args->physical_memory_range[i].start
4301 			&& address < args->physical_memory_range[i].start
4302 				+ args->physical_memory_range[i].size)
4303 			return true;
4304 	}
4305 	return false;
4306 }
4307 
4308 
4309 page_num_t
4310 vm_allocate_early_physical_page(kernel_args* args)
4311 {
4312 	if (args->num_physical_allocated_ranges == 0) {
4313 		panic("early physical page allocations no longer possible!");
4314 		return 0;
4315 	}
4316 
4317 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4318 		phys_addr_t nextPage;
4319 
4320 		nextPage = args->physical_allocated_range[i].start
4321 			+ args->physical_allocated_range[i].size;
4322 		// see if the page after the next allocated paddr run can be allocated
4323 		if (i + 1 < args->num_physical_allocated_ranges
4324 			&& args->physical_allocated_range[i + 1].size != 0) {
4325 			// see if the next page will collide with the next allocated range
4326 			if (nextPage >= args->physical_allocated_range[i+1].start)
4327 				continue;
4328 		}
4329 		// see if the next physical page fits in the memory block
4330 		if (is_page_in_physical_memory_range(args, nextPage)) {
4331 			// we got one!
4332 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4333 			return nextPage / B_PAGE_SIZE;
4334 		}
4335 	}
4336 
4337 	// Expanding upwards didn't work, try going downwards.
4338 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4339 		phys_addr_t nextPage;
4340 
4341 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
4342 		// see if the page after the prev allocated paddr run can be allocated
4343 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
4344 			// see if the next page will collide with the next allocated range
4345 			if (nextPage < args->physical_allocated_range[i-1].start
4346 				+ args->physical_allocated_range[i-1].size)
4347 				continue;
4348 		}
4349 		// see if the next physical page fits in the memory block
4350 		if (is_page_in_physical_memory_range(args, nextPage)) {
4351 			// we got one!
4352 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
4353 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4354 			return nextPage / B_PAGE_SIZE;
4355 		}
4356 	}
4357 
4358 	return 0;
4359 		// could not allocate a block
4360 }
4361 
4362 
4363 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4364 	allocate some pages before the VM is completely up.
4365 */
4366 addr_t
4367 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4368 	uint32 attributes, addr_t alignment)
4369 {
4370 	if (physicalSize > virtualSize)
4371 		physicalSize = virtualSize;
4372 
4373 	// find the vaddr to allocate at
4374 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
4375 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
4376 	if (virtualBase == 0) {
4377 		panic("vm_allocate_early: could not allocate virtual address\n");
4378 		return 0;
4379 	}
4380 
4381 	// map the pages
4382 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4383 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
4384 		if (physicalAddress == 0)
4385 			panic("error allocating early page!\n");
4386 
4387 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4388 
4389 		status_t status = arch_vm_translation_map_early_map(args,
4390 			virtualBase + i * B_PAGE_SIZE,
4391 			physicalAddress * B_PAGE_SIZE, attributes,
4392 			&vm_allocate_early_physical_page);
4393 		if (status != B_OK)
4394 			panic("error mapping early page!");
4395 	}
4396 
4397 	return virtualBase;
4398 }
4399 
4400 
4401 /*!	The main entrance point to initialize the VM. */
4402 status_t
4403 vm_init(kernel_args* args)
4404 {
4405 	struct preloaded_image* image;
4406 	void* address;
4407 	status_t err = 0;
4408 	uint32 i;
4409 
4410 	TRACE(("vm_init: entry\n"));
4411 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
4412 	err = arch_vm_init(args);
4413 
4414 	// initialize some globals
4415 	vm_page_init_num_pages(args);
4416 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4417 
4418 	slab_init(args);
4419 
4420 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4421 	off_t heapSize = INITIAL_HEAP_SIZE;
4422 	// try to accomodate low memory systems
4423 	while (heapSize > sAvailableMemory / 8)
4424 		heapSize /= 2;
4425 	if (heapSize < 1024 * 1024)
4426 		panic("vm_init: go buy some RAM please.");
4427 
4428 	// map in the new heap and initialize it
4429 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4430 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
4431 	TRACE(("heap at 0x%lx\n", heapBase));
4432 	heap_init(heapBase, heapSize);
4433 #endif
4434 
4435 	// initialize the free page list and physical page mapper
4436 	vm_page_init(args);
4437 
4438 	// initialize the cache allocators
4439 	vm_cache_init(args);
4440 
4441 	{
4442 		status_t error = VMAreas::Init();
4443 		if (error != B_OK)
4444 			panic("vm_init: error initializing areas map\n");
4445 	}
4446 
4447 	VMAddressSpace::Init();
4448 	reserve_boot_loader_ranges(args);
4449 
4450 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4451 	heap_init_post_area();
4452 #endif
4453 
4454 	// Do any further initialization that the architecture dependant layers may
4455 	// need now
4456 	arch_vm_translation_map_init_post_area(args);
4457 	arch_vm_init_post_area(args);
4458 	vm_page_init_post_area(args);
4459 	slab_init_post_area();
4460 
4461 	// allocate areas to represent stuff that already exists
4462 
4463 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4464 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4465 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4466 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4467 #endif
4468 
4469 	allocate_kernel_args(args);
4470 
4471 	create_preloaded_image_areas(args->kernel_image);
4472 
4473 	// allocate areas for preloaded images
4474 	for (image = args->preloaded_images; image != NULL; image = image->next)
4475 		create_preloaded_image_areas(image);
4476 
4477 	// allocate kernel stacks
4478 	for (i = 0; i < args->num_cpus; i++) {
4479 		char name[64];
4480 
4481 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4482 		address = (void*)args->cpu_kstack[i].start;
4483 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4484 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4485 	}
4486 
4487 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4488 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4489 
4490 #if PARANOID_KERNEL_MALLOC
4491 	vm_block_address_range("uninitialized heap memory",
4492 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4493 #endif
4494 #if PARANOID_KERNEL_FREE
4495 	vm_block_address_range("freed heap memory",
4496 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4497 #endif
4498 
4499 	create_page_mappings_object_caches();
4500 
4501 #if DEBUG_CACHE_LIST
4502 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4503 		virtual_address_restrictions virtualRestrictions = {};
4504 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4505 		physical_address_restrictions physicalRestrictions = {};
4506 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4507 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4508 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4509 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4510 			&physicalRestrictions, (void**)&sCacheInfoTable);
4511 	}
4512 #endif	// DEBUG_CACHE_LIST
4513 
4514 	// add some debugger commands
4515 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4516 	add_debugger_command("area", &dump_area,
4517 		"Dump info about a particular area");
4518 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4519 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4520 #if DEBUG_CACHE_LIST
4521 	if (sCacheInfoTable != NULL) {
4522 		add_debugger_command_etc("caches", &dump_caches,
4523 			"List all VMCache trees",
4524 			"[ \"-c\" ]\n"
4525 			"All cache trees are listed sorted in decreasing order by number "
4526 				"of\n"
4527 			"used pages or, if \"-c\" is specified, by size of committed "
4528 				"memory.\n",
4529 			0);
4530 	}
4531 #endif
4532 	add_debugger_command("avail", &dump_available_memory,
4533 		"Dump available memory");
4534 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4535 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4536 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4537 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4538 	add_debugger_command("string", &display_mem, "dump strings");
4539 
4540 	add_debugger_command_etc("mapping", &dump_mapping_info,
4541 		"Print address mapping information",
4542 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4543 		"Prints low-level page mapping information for a given address. If\n"
4544 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4545 		"address that is looked up in the translation map of the current\n"
4546 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4547 		"\"-r\" is specified, <address> is a physical address that is\n"
4548 		"searched in the translation map of all teams, respectively the team\n"
4549 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4550 		"<address> is the address of a vm_page structure. The behavior is\n"
4551 		"equivalent to specifying \"-r\" with the physical address of that\n"
4552 		"page.\n",
4553 		0);
4554 
4555 	TRACE(("vm_init: exit\n"));
4556 
4557 	vm_cache_init_post_heap();
4558 
4559 	return err;
4560 }
4561 
4562 
4563 status_t
4564 vm_init_post_sem(kernel_args* args)
4565 {
4566 	// This frees all unused boot loader resources and makes its space available
4567 	// again
4568 	arch_vm_init_end(args);
4569 	unreserve_boot_loader_ranges(args);
4570 
4571 	// fill in all of the semaphores that were not allocated before
4572 	// since we're still single threaded and only the kernel address space
4573 	// exists, it isn't that hard to find all of the ones we need to create
4574 
4575 	arch_vm_translation_map_init_post_sem(args);
4576 
4577 	slab_init_post_sem();
4578 
4579 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4580 	heap_init_post_sem();
4581 #endif
4582 
4583 	return B_OK;
4584 }
4585 
4586 
4587 status_t
4588 vm_init_post_thread(kernel_args* args)
4589 {
4590 	vm_page_init_post_thread(args);
4591 	slab_init_post_thread();
4592 	return heap_init_post_thread();
4593 }
4594 
4595 
4596 status_t
4597 vm_init_post_modules(kernel_args* args)
4598 {
4599 	return arch_vm_init_post_modules(args);
4600 }
4601 
4602 
4603 void
4604 permit_page_faults(void)
4605 {
4606 	Thread* thread = thread_get_current_thread();
4607 	if (thread != NULL)
4608 		atomic_add(&thread->page_faults_allowed, 1);
4609 }
4610 
4611 
4612 void
4613 forbid_page_faults(void)
4614 {
4615 	Thread* thread = thread_get_current_thread();
4616 	if (thread != NULL)
4617 		atomic_add(&thread->page_faults_allowed, -1);
4618 }
4619 
4620 
4621 status_t
4622 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4623 	bool isUser, addr_t* newIP)
4624 {
4625 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4626 		faultAddress));
4627 
4628 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4629 
4630 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4631 	VMAddressSpace* addressSpace = NULL;
4632 
4633 	status_t status = B_OK;
4634 	*newIP = 0;
4635 	atomic_add((int32*)&sPageFaults, 1);
4636 
4637 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4638 		addressSpace = VMAddressSpace::GetKernel();
4639 	} else if (IS_USER_ADDRESS(pageAddress)) {
4640 		addressSpace = VMAddressSpace::GetCurrent();
4641 		if (addressSpace == NULL) {
4642 			if (!isUser) {
4643 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4644 					"memory!\n");
4645 				status = B_BAD_ADDRESS;
4646 				TPF(PageFaultError(-1,
4647 					VMPageFaultTracing
4648 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4649 			} else {
4650 				// XXX weird state.
4651 				panic("vm_page_fault: non kernel thread accessing user memory "
4652 					"that doesn't exist!\n");
4653 				status = B_BAD_ADDRESS;
4654 			}
4655 		}
4656 	} else {
4657 		// the hit was probably in the 64k DMZ between kernel and user space
4658 		// this keeps a user space thread from passing a buffer that crosses
4659 		// into kernel space
4660 		status = B_BAD_ADDRESS;
4661 		TPF(PageFaultError(-1,
4662 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4663 	}
4664 
4665 	if (status == B_OK) {
4666 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4667 			isUser, NULL);
4668 	}
4669 
4670 	if (status < B_OK) {
4671 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4672 			"0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n",
4673 			strerror(status), address, faultAddress, isWrite, isUser, isExecute,
4674 			thread_get_current_thread_id());
4675 		if (!isUser) {
4676 			Thread* thread = thread_get_current_thread();
4677 			if (thread != NULL && thread->fault_handler != 0) {
4678 				// this will cause the arch dependant page fault handler to
4679 				// modify the IP on the interrupt frame or whatever to return
4680 				// to this address
4681 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4682 			} else {
4683 				// unhandled page fault in the kernel
4684 				panic("vm_page_fault: unhandled page fault in kernel space at "
4685 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4686 			}
4687 		} else {
4688 			Thread* thread = thread_get_current_thread();
4689 
4690 #ifdef TRACE_FAULTS
4691 			VMArea* area = NULL;
4692 			if (addressSpace != NULL) {
4693 				addressSpace->ReadLock();
4694 				area = addressSpace->LookupArea(faultAddress);
4695 			}
4696 
4697 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4698 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4699 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4700 				thread->team->Name(), thread->team->id,
4701 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4702 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4703 					area->Base() : 0x0));
4704 
4705 			if (addressSpace != NULL)
4706 				addressSpace->ReadUnlock();
4707 #endif
4708 
4709 			// If the thread has a signal handler for SIGSEGV, we simply
4710 			// send it the signal. Otherwise we notify the user debugger
4711 			// first.
4712 			struct sigaction action;
4713 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4714 					&& action.sa_handler != SIG_DFL
4715 					&& action.sa_handler != SIG_IGN)
4716 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4717 					SIGSEGV)) {
4718 				Signal signal(SIGSEGV,
4719 					status == B_PERMISSION_DENIED
4720 						? SEGV_ACCERR : SEGV_MAPERR,
4721 					EFAULT, thread->team->id);
4722 				signal.SetAddress((void*)address);
4723 				send_signal_to_thread(thread, signal, 0);
4724 			}
4725 		}
4726 	}
4727 
4728 	if (addressSpace != NULL)
4729 		addressSpace->Put();
4730 
4731 	return B_HANDLED_INTERRUPT;
4732 }
4733 
4734 
4735 struct PageFaultContext {
4736 	AddressSpaceReadLocker	addressSpaceLocker;
4737 	VMCacheChainLocker		cacheChainLocker;
4738 
4739 	VMTranslationMap*		map;
4740 	VMCache*				topCache;
4741 	off_t					cacheOffset;
4742 	vm_page_reservation		reservation;
4743 	bool					isWrite;
4744 
4745 	// return values
4746 	vm_page*				page;
4747 	bool					restart;
4748 	bool					pageAllocated;
4749 
4750 
4751 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4752 		:
4753 		addressSpaceLocker(addressSpace, true),
4754 		map(addressSpace->TranslationMap()),
4755 		isWrite(isWrite)
4756 	{
4757 	}
4758 
4759 	~PageFaultContext()
4760 	{
4761 		UnlockAll();
4762 		vm_page_unreserve_pages(&reservation);
4763 	}
4764 
4765 	void Prepare(VMCache* topCache, off_t cacheOffset)
4766 	{
4767 		this->topCache = topCache;
4768 		this->cacheOffset = cacheOffset;
4769 		page = NULL;
4770 		restart = false;
4771 		pageAllocated = false;
4772 
4773 		cacheChainLocker.SetTo(topCache);
4774 	}
4775 
4776 	void UnlockAll(VMCache* exceptCache = NULL)
4777 	{
4778 		topCache = NULL;
4779 		addressSpaceLocker.Unlock();
4780 		cacheChainLocker.Unlock(exceptCache);
4781 	}
4782 };
4783 
4784 
4785 /*!	Gets the page that should be mapped into the area.
4786 	Returns an error code other than \c B_OK, if the page couldn't be found or
4787 	paged in. The locking state of the address space and the caches is undefined
4788 	in that case.
4789 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4790 	had to unlock the address space and all caches and is supposed to be called
4791 	again.
4792 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4793 	found. It is returned in \c context.page. The address space will still be
4794 	locked as well as all caches starting from the top cache to at least the
4795 	cache the page lives in.
4796 */
4797 static status_t
4798 fault_get_page(PageFaultContext& context)
4799 {
4800 	VMCache* cache = context.topCache;
4801 	VMCache* lastCache = NULL;
4802 	vm_page* page = NULL;
4803 
4804 	while (cache != NULL) {
4805 		// We already hold the lock of the cache at this point.
4806 
4807 		lastCache = cache;
4808 
4809 		page = cache->LookupPage(context.cacheOffset);
4810 		if (page != NULL && page->busy) {
4811 			// page must be busy -- wait for it to become unbusy
4812 			context.UnlockAll(cache);
4813 			cache->ReleaseRefLocked();
4814 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4815 
4816 			// restart the whole process
4817 			context.restart = true;
4818 			return B_OK;
4819 		}
4820 
4821 		if (page != NULL)
4822 			break;
4823 
4824 		// The current cache does not contain the page we're looking for.
4825 
4826 		// see if the backing store has it
4827 		if (cache->HasPage(context.cacheOffset)) {
4828 			// insert a fresh page and mark it busy -- we're going to read it in
4829 			page = vm_page_allocate_page(&context.reservation,
4830 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4831 			cache->InsertPage(page, context.cacheOffset);
4832 
4833 			// We need to unlock all caches and the address space while reading
4834 			// the page in. Keep a reference to the cache around.
4835 			cache->AcquireRefLocked();
4836 			context.UnlockAll();
4837 
4838 			// read the page in
4839 			generic_io_vec vec;
4840 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4841 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4842 
4843 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4844 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4845 
4846 			cache->Lock();
4847 
4848 			if (status < B_OK) {
4849 				// on error remove and free the page
4850 				dprintf("reading page from cache %p returned: %s!\n",
4851 					cache, strerror(status));
4852 
4853 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4854 				cache->RemovePage(page);
4855 				vm_page_set_state(page, PAGE_STATE_FREE);
4856 
4857 				cache->ReleaseRefAndUnlock();
4858 				return status;
4859 			}
4860 
4861 			// mark the page unbusy again
4862 			cache->MarkPageUnbusy(page);
4863 
4864 			DEBUG_PAGE_ACCESS_END(page);
4865 
4866 			// Since we needed to unlock everything temporarily, the area
4867 			// situation might have changed. So we need to restart the whole
4868 			// process.
4869 			cache->ReleaseRefAndUnlock();
4870 			context.restart = true;
4871 			return B_OK;
4872 		}
4873 
4874 		cache = context.cacheChainLocker.LockSourceCache();
4875 	}
4876 
4877 	if (page == NULL) {
4878 		// There was no adequate page, determine the cache for a clean one.
4879 		// Read-only pages come in the deepest cache, only the top most cache
4880 		// may have direct write access.
4881 		cache = context.isWrite ? context.topCache : lastCache;
4882 
4883 		// allocate a clean page
4884 		page = vm_page_allocate_page(&context.reservation,
4885 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4886 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4887 			page->physical_page_number));
4888 
4889 		// insert the new page into our cache
4890 		cache->InsertPage(page, context.cacheOffset);
4891 		context.pageAllocated = true;
4892 	} else if (page->Cache() != context.topCache && context.isWrite) {
4893 		// We have a page that has the data we want, but in the wrong cache
4894 		// object so we need to copy it and stick it into the top cache.
4895 		vm_page* sourcePage = page;
4896 
4897 		// TODO: If memory is low, it might be a good idea to steal the page
4898 		// from our source cache -- if possible, that is.
4899 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4900 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4901 
4902 		// To not needlessly kill concurrency we unlock all caches but the top
4903 		// one while copying the page. Lacking another mechanism to ensure that
4904 		// the source page doesn't disappear, we mark it busy.
4905 		sourcePage->busy = true;
4906 		context.cacheChainLocker.UnlockKeepRefs(true);
4907 
4908 		// copy the page
4909 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4910 			sourcePage->physical_page_number * B_PAGE_SIZE);
4911 
4912 		context.cacheChainLocker.RelockCaches(true);
4913 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4914 
4915 		// insert the new page into our cache
4916 		context.topCache->InsertPage(page, context.cacheOffset);
4917 		context.pageAllocated = true;
4918 	} else
4919 		DEBUG_PAGE_ACCESS_START(page);
4920 
4921 	context.page = page;
4922 	return B_OK;
4923 }
4924 
4925 
4926 /*!	Makes sure the address in the given address space is mapped.
4927 
4928 	\param addressSpace The address space.
4929 	\param originalAddress The address. Doesn't need to be page aligned.
4930 	\param isWrite If \c true the address shall be write-accessible.
4931 	\param isUser If \c true the access is requested by a userland team.
4932 	\param wirePage On success, if non \c NULL, the wired count of the page
4933 		mapped at the given address is incremented and the page is returned
4934 		via this parameter.
4935 	\return \c B_OK on success, another error code otherwise.
4936 */
4937 static status_t
4938 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4939 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4940 {
4941 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4942 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4943 		originalAddress, isWrite, isUser));
4944 
4945 	PageFaultContext context(addressSpace, isWrite);
4946 
4947 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4948 	status_t status = B_OK;
4949 
4950 	addressSpace->IncrementFaultCount();
4951 
4952 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4953 	// the pages upfront makes sure we don't have any cache locked, so that the
4954 	// page daemon/thief can do their job without problems.
4955 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4956 		originalAddress);
4957 	context.addressSpaceLocker.Unlock();
4958 	vm_page_reserve_pages(&context.reservation, reservePages,
4959 		addressSpace == VMAddressSpace::Kernel()
4960 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4961 
4962 	while (true) {
4963 		context.addressSpaceLocker.Lock();
4964 
4965 		// get the area the fault was in
4966 		VMArea* area = addressSpace->LookupArea(address);
4967 		if (area == NULL) {
4968 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4969 				"space\n", originalAddress);
4970 			TPF(PageFaultError(-1,
4971 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4972 			status = B_BAD_ADDRESS;
4973 			break;
4974 		}
4975 
4976 		// check permissions
4977 		uint32 protection = get_area_page_protection(area, address);
4978 		if (isUser && (protection & B_USER_PROTECTION) == 0
4979 				&& (area->protection & B_KERNEL_AREA) != 0) {
4980 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4981 				area->id, (void*)originalAddress);
4982 			TPF(PageFaultError(area->id,
4983 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4984 			status = B_PERMISSION_DENIED;
4985 			break;
4986 		}
4987 		if (isWrite && (protection
4988 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4989 			dprintf("write access attempted on write-protected area 0x%"
4990 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4991 			TPF(PageFaultError(area->id,
4992 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4993 			status = B_PERMISSION_DENIED;
4994 			break;
4995 		} else if (isExecute && (protection
4996 				& (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4997 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4998 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4999 			TPF(PageFaultError(area->id,
5000 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
5001 			status = B_PERMISSION_DENIED;
5002 			break;
5003 		} else if (!isWrite && !isExecute && (protection
5004 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
5005 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
5006 				" at %p\n", area->id, (void*)originalAddress);
5007 			TPF(PageFaultError(area->id,
5008 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
5009 			status = B_PERMISSION_DENIED;
5010 			break;
5011 		}
5012 
5013 		// We have the area, it was a valid access, so let's try to resolve the
5014 		// page fault now.
5015 		// At first, the top most cache from the area is investigated.
5016 
5017 		context.Prepare(vm_area_get_locked_cache(area),
5018 			address - area->Base() + area->cache_offset);
5019 
5020 		// See if this cache has a fault handler -- this will do all the work
5021 		// for us.
5022 		{
5023 			// Note, since the page fault is resolved with interrupts enabled,
5024 			// the fault handler could be called more than once for the same
5025 			// reason -- the store must take this into account.
5026 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
5027 			if (status != B_BAD_HANDLER)
5028 				break;
5029 		}
5030 
5031 		// The top most cache has no fault handler, so let's see if the cache or
5032 		// its sources already have the page we're searching for (we're going
5033 		// from top to bottom).
5034 		status = fault_get_page(context);
5035 		if (status != B_OK) {
5036 			TPF(PageFaultError(area->id, status));
5037 			break;
5038 		}
5039 
5040 		if (context.restart)
5041 			continue;
5042 
5043 		// All went fine, all there is left to do is to map the page into the
5044 		// address space.
5045 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
5046 			context.page));
5047 
5048 		// If the page doesn't reside in the area's cache, we need to make sure
5049 		// it's mapped in read-only, so that we cannot overwrite someone else's
5050 		// data (copy-on-write)
5051 		uint32 newProtection = protection;
5052 		if (context.page->Cache() != context.topCache && !isWrite)
5053 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
5054 
5055 		bool unmapPage = false;
5056 		bool mapPage = true;
5057 
5058 		// check whether there's already a page mapped at the address
5059 		context.map->Lock();
5060 
5061 		phys_addr_t physicalAddress;
5062 		uint32 flags;
5063 		vm_page* mappedPage = NULL;
5064 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
5065 			&& (flags & PAGE_PRESENT) != 0
5066 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5067 				!= NULL) {
5068 			// Yep there's already a page. If it's ours, we can simply adjust
5069 			// its protection. Otherwise we have to unmap it.
5070 			if (mappedPage == context.page) {
5071 				context.map->ProtectPage(area, address, newProtection);
5072 					// Note: We assume that ProtectPage() is atomic (i.e.
5073 					// the page isn't temporarily unmapped), otherwise we'd have
5074 					// to make sure it isn't wired.
5075 				mapPage = false;
5076 			} else
5077 				unmapPage = true;
5078 		}
5079 
5080 		context.map->Unlock();
5081 
5082 		if (unmapPage) {
5083 			// If the page is wired, we can't unmap it. Wait until it is unwired
5084 			// again and restart. Note that the page cannot be wired for
5085 			// writing, since it it isn't in the topmost cache. So we can safely
5086 			// ignore ranges wired for writing (our own and other concurrent
5087 			// wiring attempts in progress) and in fact have to do that to avoid
5088 			// a deadlock.
5089 			VMAreaUnwiredWaiter waiter;
5090 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
5091 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
5092 				// unlock everything and wait
5093 				if (context.pageAllocated) {
5094 					// ... but since we allocated a page and inserted it into
5095 					// the top cache, remove and free it first. Otherwise we'd
5096 					// have a page from a lower cache mapped while an upper
5097 					// cache has a page that would shadow it.
5098 					context.topCache->RemovePage(context.page);
5099 					vm_page_free_etc(context.topCache, context.page,
5100 						&context.reservation);
5101 				} else
5102 					DEBUG_PAGE_ACCESS_END(context.page);
5103 
5104 				context.UnlockAll();
5105 				waiter.waitEntry.Wait();
5106 				continue;
5107 			}
5108 
5109 			// Note: The mapped page is a page of a lower cache. We are
5110 			// guaranteed to have that cached locked, our new page is a copy of
5111 			// that page, and the page is not busy. The logic for that guarantee
5112 			// is as follows: Since the page is mapped, it must live in the top
5113 			// cache (ruled out above) or any of its lower caches, and there is
5114 			// (was before the new page was inserted) no other page in any
5115 			// cache between the top cache and the page's cache (otherwise that
5116 			// would be mapped instead). That in turn means that our algorithm
5117 			// must have found it and therefore it cannot be busy either.
5118 			DEBUG_PAGE_ACCESS_START(mappedPage);
5119 			unmap_page(area, address);
5120 			DEBUG_PAGE_ACCESS_END(mappedPage);
5121 		}
5122 
5123 		if (mapPage) {
5124 			if (map_page(area, context.page, address, newProtection,
5125 					&context.reservation) != B_OK) {
5126 				// Mapping can only fail, when the page mapping object couldn't
5127 				// be allocated. Save for the missing mapping everything is
5128 				// fine, though. If this was a regular page fault, we'll simply
5129 				// leave and probably fault again. To make sure we'll have more
5130 				// luck then, we ensure that the minimum object reserve is
5131 				// available.
5132 				DEBUG_PAGE_ACCESS_END(context.page);
5133 
5134 				context.UnlockAll();
5135 
5136 				if (object_cache_reserve(page_mapping_object_cache_for(
5137 							context.page->physical_page_number), 1, 0)
5138 						!= B_OK) {
5139 					// Apparently the situation is serious. Let's get ourselves
5140 					// killed.
5141 					status = B_NO_MEMORY;
5142 				} else if (wirePage != NULL) {
5143 					// The caller expects us to wire the page. Since
5144 					// object_cache_reserve() succeeded, we should now be able
5145 					// to allocate a mapping structure. Restart.
5146 					continue;
5147 				}
5148 
5149 				break;
5150 			}
5151 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
5152 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
5153 
5154 		// also wire the page, if requested
5155 		if (wirePage != NULL && status == B_OK) {
5156 			increment_page_wired_count(context.page);
5157 			*wirePage = context.page;
5158 		}
5159 
5160 		DEBUG_PAGE_ACCESS_END(context.page);
5161 
5162 		break;
5163 	}
5164 
5165 	return status;
5166 }
5167 
5168 
5169 status_t
5170 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
5171 {
5172 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
5173 }
5174 
5175 status_t
5176 vm_put_physical_page(addr_t vaddr, void* handle)
5177 {
5178 	return sPhysicalPageMapper->PutPage(vaddr, handle);
5179 }
5180 
5181 
5182 status_t
5183 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
5184 	void** _handle)
5185 {
5186 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
5187 }
5188 
5189 status_t
5190 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
5191 {
5192 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
5193 }
5194 
5195 
5196 status_t
5197 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
5198 {
5199 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
5200 }
5201 
5202 status_t
5203 vm_put_physical_page_debug(addr_t vaddr, void* handle)
5204 {
5205 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
5206 }
5207 
5208 
5209 void
5210 vm_get_info(system_info* info)
5211 {
5212 	swap_get_info(info);
5213 
5214 	MutexLocker locker(sAvailableMemoryLock);
5215 	info->needed_memory = sNeededMemory;
5216 	info->free_memory = sAvailableMemory;
5217 }
5218 
5219 
5220 uint32
5221 vm_num_page_faults(void)
5222 {
5223 	return sPageFaults;
5224 }
5225 
5226 
5227 off_t
5228 vm_available_memory(void)
5229 {
5230 	MutexLocker locker(sAvailableMemoryLock);
5231 	return sAvailableMemory;
5232 }
5233 
5234 
5235 off_t
5236 vm_available_not_needed_memory(void)
5237 {
5238 	MutexLocker locker(sAvailableMemoryLock);
5239 	return sAvailableMemory - sNeededMemory;
5240 }
5241 
5242 
5243 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
5244 	debugger.
5245 */
5246 off_t
5247 vm_available_not_needed_memory_debug(void)
5248 {
5249 	return sAvailableMemory - sNeededMemory;
5250 }
5251 
5252 
5253 size_t
5254 vm_kernel_address_space_left(void)
5255 {
5256 	return VMAddressSpace::Kernel()->FreeSpace();
5257 }
5258 
5259 
5260 void
5261 vm_unreserve_memory(size_t amount)
5262 {
5263 	mutex_lock(&sAvailableMemoryLock);
5264 
5265 	sAvailableMemory += amount;
5266 
5267 	mutex_unlock(&sAvailableMemoryLock);
5268 }
5269 
5270 
5271 status_t
5272 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
5273 {
5274 	size_t reserve = kMemoryReserveForPriority[priority];
5275 
5276 	MutexLocker locker(sAvailableMemoryLock);
5277 
5278 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5279 
5280 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
5281 		sAvailableMemory -= amount;
5282 		return B_OK;
5283 	}
5284 
5285 	if (amount >= (vm_page_num_pages() * B_PAGE_SIZE)) {
5286 		// Do not wait for something that will never happen.
5287 		return B_NO_MEMORY;
5288 	}
5289 
5290 	if (timeout <= 0)
5291 		return B_NO_MEMORY;
5292 
5293 	// turn timeout into an absolute timeout
5294 	timeout += system_time();
5295 
5296 	// loop until we've got the memory or the timeout occurs
5297 	do {
5298 		sNeededMemory += amount;
5299 
5300 		// call the low resource manager
5301 		locker.Unlock();
5302 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5303 			B_ABSOLUTE_TIMEOUT, timeout);
5304 		locker.Lock();
5305 
5306 		sNeededMemory -= amount;
5307 
5308 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
5309 			sAvailableMemory -= amount;
5310 			return B_OK;
5311 		}
5312 	} while (timeout > system_time());
5313 
5314 	return B_NO_MEMORY;
5315 }
5316 
5317 
5318 status_t
5319 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
5320 {
5321 	// NOTE: The caller is responsible for synchronizing calls to this function!
5322 
5323 	AddressSpaceReadLocker locker;
5324 	VMArea* area;
5325 	status_t status = locker.SetFromArea(id, area);
5326 	if (status != B_OK)
5327 		return status;
5328 
5329 	// nothing to do, if the type doesn't change
5330 	uint32 oldType = area->MemoryType();
5331 	if (type == oldType)
5332 		return B_OK;
5333 
5334 	// set the memory type of the area and the mapped pages
5335 	VMTranslationMap* map = area->address_space->TranslationMap();
5336 	map->Lock();
5337 	area->SetMemoryType(type);
5338 	map->ProtectArea(area, area->protection);
5339 	map->Unlock();
5340 
5341 	// set the physical memory type
5342 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
5343 	if (error != B_OK) {
5344 		// reset the memory type of the area and the mapped pages
5345 		map->Lock();
5346 		area->SetMemoryType(oldType);
5347 		map->ProtectArea(area, area->protection);
5348 		map->Unlock();
5349 		return error;
5350 	}
5351 
5352 	return B_OK;
5353 
5354 }
5355 
5356 
5357 /*!	This function enforces some protection properties:
5358 	 - kernel areas must be W^X (after kernel startup)
5359 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
5360 	 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5361 */
5362 static void
5363 fix_protection(uint32* protection)
5364 {
5365 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
5366 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
5367 			|| (*protection & B_WRITE_AREA) != 0)
5368 		&& !gKernelStartup)
5369 		panic("kernel areas cannot be both writable and executable!");
5370 
5371 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5372 		if ((*protection & B_WRITE_AREA) != 0)
5373 			*protection |= B_KERNEL_WRITE_AREA;
5374 		if ((*protection & B_READ_AREA) != 0)
5375 			*protection |= B_KERNEL_READ_AREA;
5376 	}
5377 }
5378 
5379 
5380 static void
5381 fill_area_info(struct VMArea* area, area_info* info, size_t size)
5382 {
5383 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5384 	info->area = area->id;
5385 	info->address = (void*)area->Base();
5386 	info->size = area->Size();
5387 	info->protection = area->protection;
5388 	info->lock = area->wiring;
5389 	info->team = area->address_space->ID();
5390 	info->copy_count = 0;
5391 	info->in_count = 0;
5392 	info->out_count = 0;
5393 		// TODO: retrieve real values here!
5394 
5395 	VMCache* cache = vm_area_get_locked_cache(area);
5396 
5397 	// Note, this is a simplification; the cache could be larger than this area
5398 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5399 
5400 	vm_area_put_locked_cache(cache);
5401 }
5402 
5403 
5404 static status_t
5405 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5406 {
5407 	// is newSize a multiple of B_PAGE_SIZE?
5408 	if (newSize & (B_PAGE_SIZE - 1))
5409 		return B_BAD_VALUE;
5410 
5411 	// lock all affected address spaces and the cache
5412 	VMArea* area;
5413 	VMCache* cache;
5414 
5415 	MultiAddressSpaceLocker locker;
5416 	AreaCacheLocker cacheLocker;
5417 
5418 	status_t status;
5419 	size_t oldSize;
5420 	bool anyKernelArea;
5421 	bool restart;
5422 
5423 	do {
5424 		anyKernelArea = false;
5425 		restart = false;
5426 
5427 		locker.Unset();
5428 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5429 		if (status != B_OK)
5430 			return status;
5431 		cacheLocker.SetTo(cache, true);	// already locked
5432 
5433 		// enforce restrictions
5434 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
5435 				|| (area->protection & B_KERNEL_AREA) != 0)) {
5436 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5437 				"resize kernel area %" B_PRId32 " (%s)\n",
5438 				team_get_current_team_id(), areaID, area->name);
5439 			return B_NOT_ALLOWED;
5440 		}
5441 		// TODO: Enforce all restrictions (team, etc.)!
5442 
5443 		oldSize = area->Size();
5444 		if (newSize == oldSize)
5445 			return B_OK;
5446 
5447 		if (cache->type != CACHE_TYPE_RAM)
5448 			return B_NOT_ALLOWED;
5449 
5450 		if (oldSize < newSize) {
5451 			// We need to check if all areas of this cache can be resized.
5452 			for (VMArea* current = cache->areas; current != NULL;
5453 					current = current->cache_next) {
5454 				if (!current->address_space->CanResizeArea(current, newSize))
5455 					return B_ERROR;
5456 				anyKernelArea
5457 					|= current->address_space == VMAddressSpace::Kernel();
5458 			}
5459 		} else {
5460 			// We're shrinking the areas, so we must make sure the affected
5461 			// ranges are not wired.
5462 			for (VMArea* current = cache->areas; current != NULL;
5463 					current = current->cache_next) {
5464 				anyKernelArea
5465 					|= current->address_space == VMAddressSpace::Kernel();
5466 
5467 				if (wait_if_area_range_is_wired(current,
5468 						current->Base() + newSize, oldSize - newSize, &locker,
5469 						&cacheLocker)) {
5470 					restart = true;
5471 					break;
5472 				}
5473 			}
5474 		}
5475 	} while (restart);
5476 
5477 	// Okay, looks good so far, so let's do it
5478 
5479 	int priority = kernel && anyKernelArea
5480 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5481 	uint32 allocationFlags = kernel && anyKernelArea
5482 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5483 
5484 	if (oldSize < newSize) {
5485 		// Growing the cache can fail, so we do it first.
5486 		status = cache->Resize(cache->virtual_base + newSize, priority);
5487 		if (status != B_OK)
5488 			return status;
5489 	}
5490 
5491 	for (VMArea* current = cache->areas; current != NULL;
5492 			current = current->cache_next) {
5493 		status = current->address_space->ResizeArea(current, newSize,
5494 			allocationFlags);
5495 		if (status != B_OK)
5496 			break;
5497 
5498 		// We also need to unmap all pages beyond the new size, if the area has
5499 		// shrunk
5500 		if (newSize < oldSize) {
5501 			VMCacheChainLocker cacheChainLocker(cache);
5502 			cacheChainLocker.LockAllSourceCaches();
5503 
5504 			unmap_pages(current, current->Base() + newSize,
5505 				oldSize - newSize);
5506 
5507 			cacheChainLocker.Unlock(cache);
5508 		}
5509 	}
5510 
5511 	if (status == B_OK) {
5512 		// Shrink or grow individual page protections if in use.
5513 		if (area->page_protections != NULL) {
5514 			size_t bytes = area_page_protections_size(newSize);
5515 			uint8* newProtections
5516 				= (uint8*)realloc(area->page_protections, bytes);
5517 			if (newProtections == NULL)
5518 				status = B_NO_MEMORY;
5519 			else {
5520 				area->page_protections = newProtections;
5521 
5522 				if (oldSize < newSize) {
5523 					// init the additional page protections to that of the area
5524 					uint32 offset = area_page_protections_size(oldSize);
5525 					uint32 areaProtection = area->protection
5526 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5527 					memset(area->page_protections + offset,
5528 						areaProtection | (areaProtection << 4), bytes - offset);
5529 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5530 						uint8& entry = area->page_protections[offset - 1];
5531 						entry = (entry & 0x0f) | (areaProtection << 4);
5532 					}
5533 				}
5534 			}
5535 		}
5536 	}
5537 
5538 	// shrinking the cache can't fail, so we do it now
5539 	if (status == B_OK && newSize < oldSize)
5540 		status = cache->Resize(cache->virtual_base + newSize, priority);
5541 
5542 	if (status != B_OK) {
5543 		// Something failed -- resize the areas back to their original size.
5544 		// This can fail, too, in which case we're seriously screwed.
5545 		for (VMArea* current = cache->areas; current != NULL;
5546 				current = current->cache_next) {
5547 			if (current->address_space->ResizeArea(current, oldSize,
5548 					allocationFlags) != B_OK) {
5549 				panic("vm_resize_area(): Failed and not being able to restore "
5550 					"original state.");
5551 			}
5552 		}
5553 
5554 		cache->Resize(cache->virtual_base + oldSize, priority);
5555 	}
5556 
5557 	// TODO: we must honour the lock restrictions of this area
5558 	return status;
5559 }
5560 
5561 
5562 status_t
5563 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5564 {
5565 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5566 }
5567 
5568 
5569 status_t
5570 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5571 {
5572 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5573 }
5574 
5575 
5576 status_t
5577 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5578 	bool user)
5579 {
5580 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5581 }
5582 
5583 
5584 void
5585 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5586 {
5587 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5588 }
5589 
5590 
5591 /*!	Copies a range of memory directly from/to a page that might not be mapped
5592 	at the moment.
5593 
5594 	For \a unsafeMemory the current mapping (if any is ignored). The function
5595 	walks through the respective area's cache chain to find the physical page
5596 	and copies from/to it directly.
5597 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5598 	must not cross a page boundary.
5599 
5600 	\param teamID The team ID identifying the address space \a unsafeMemory is
5601 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5602 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5603 		is passed, the address space of the thread returned by
5604 		debug_get_debugged_thread() is used.
5605 	\param unsafeMemory The start of the unsafe memory range to be copied
5606 		from/to.
5607 	\param buffer A safely accessible kernel buffer to be copied from/to.
5608 	\param size The number of bytes to be copied.
5609 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5610 		\a unsafeMemory, the other way around otherwise.
5611 */
5612 status_t
5613 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5614 	size_t size, bool copyToUnsafe)
5615 {
5616 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5617 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5618 		return B_BAD_VALUE;
5619 	}
5620 
5621 	// get the address space for the debugged thread
5622 	VMAddressSpace* addressSpace;
5623 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5624 		addressSpace = VMAddressSpace::Kernel();
5625 	} else if (teamID == B_CURRENT_TEAM) {
5626 		Thread* thread = debug_get_debugged_thread();
5627 		if (thread == NULL || thread->team == NULL)
5628 			return B_BAD_ADDRESS;
5629 
5630 		addressSpace = thread->team->address_space;
5631 	} else
5632 		addressSpace = VMAddressSpace::DebugGet(teamID);
5633 
5634 	if (addressSpace == NULL)
5635 		return B_BAD_ADDRESS;
5636 
5637 	// get the area
5638 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5639 	if (area == NULL)
5640 		return B_BAD_ADDRESS;
5641 
5642 	// search the page
5643 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5644 		+ area->cache_offset;
5645 	VMCache* cache = area->cache;
5646 	vm_page* page = NULL;
5647 	while (cache != NULL) {
5648 		page = cache->DebugLookupPage(cacheOffset);
5649 		if (page != NULL)
5650 			break;
5651 
5652 		// Page not found in this cache -- if it is paged out, we must not try
5653 		// to get it from lower caches.
5654 		if (cache->DebugHasPage(cacheOffset))
5655 			break;
5656 
5657 		cache = cache->source;
5658 	}
5659 
5660 	if (page == NULL)
5661 		return B_UNSUPPORTED;
5662 
5663 	// copy from/to physical memory
5664 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5665 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5666 
5667 	if (copyToUnsafe) {
5668 		if (page->Cache() != area->cache)
5669 			return B_UNSUPPORTED;
5670 
5671 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5672 	}
5673 
5674 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5675 }
5676 
5677 
5678 /** Validate that a memory range is either fully in kernel space, or fully in
5679  *  userspace */
5680 static inline bool
5681 validate_memory_range(const void* addr, size_t size)
5682 {
5683 	addr_t address = (addr_t)addr;
5684 
5685 	// Check for overflows on all addresses.
5686 	if ((address + size) < address)
5687 		return false;
5688 
5689 	// Validate that the address range does not cross the kernel/user boundary.
5690 	return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1);
5691 }
5692 
5693 
5694 //	#pragma mark - kernel public API
5695 
5696 
5697 status_t
5698 user_memcpy(void* to, const void* from, size_t size)
5699 {
5700 	if (!validate_memory_range(to, size) || !validate_memory_range(from, size))
5701 		return B_BAD_ADDRESS;
5702 
5703 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5704 		return B_BAD_ADDRESS;
5705 
5706 	return B_OK;
5707 }
5708 
5709 
5710 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5711 	the string in \a to, NULL-terminating the result.
5712 
5713 	\param to Pointer to the destination C-string.
5714 	\param from Pointer to the source C-string.
5715 	\param size Size in bytes of the string buffer pointed to by \a to.
5716 
5717 	\return strlen(\a from).
5718 */
5719 ssize_t
5720 user_strlcpy(char* to, const char* from, size_t size)
5721 {
5722 	if (to == NULL && size != 0)
5723 		return B_BAD_VALUE;
5724 	if (from == NULL)
5725 		return B_BAD_ADDRESS;
5726 
5727 	// Protect the source address from overflows.
5728 	size_t maxSize = size;
5729 	if ((addr_t)from + maxSize < (addr_t)from)
5730 		maxSize -= (addr_t)from + maxSize;
5731 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5732 		maxSize = USER_TOP - (addr_t)from;
5733 
5734 	if (!validate_memory_range(to, maxSize))
5735 		return B_BAD_ADDRESS;
5736 
5737 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5738 	if (result < 0)
5739 		return result;
5740 
5741 	// If we hit the address overflow boundary, fail.
5742 	if ((size_t)result >= maxSize && maxSize < size)
5743 		return B_BAD_ADDRESS;
5744 
5745 	return result;
5746 }
5747 
5748 
5749 status_t
5750 user_memset(void* s, char c, size_t count)
5751 {
5752 	if (!validate_memory_range(s, count))
5753 		return B_BAD_ADDRESS;
5754 
5755 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5756 		return B_BAD_ADDRESS;
5757 
5758 	return B_OK;
5759 }
5760 
5761 
5762 /*!	Wires a single page at the given address.
5763 
5764 	\param team The team whose address space the address belongs to. Supports
5765 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5766 		parameter is ignored.
5767 	\param address address The virtual address to wire down. Does not need to
5768 		be page aligned.
5769 	\param writable If \c true the page shall be writable.
5770 	\param info On success the info is filled in, among other things
5771 		containing the physical address the given virtual one translates to.
5772 	\return \c B_OK, when the page could be wired, another error code otherwise.
5773 */
5774 status_t
5775 vm_wire_page(team_id team, addr_t address, bool writable,
5776 	VMPageWiringInfo* info)
5777 {
5778 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5779 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5780 
5781 	// compute the page protection that is required
5782 	bool isUser = IS_USER_ADDRESS(address);
5783 	uint32 requiredProtection = PAGE_PRESENT
5784 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5785 	if (writable)
5786 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5787 
5788 	// get and read lock the address space
5789 	VMAddressSpace* addressSpace = NULL;
5790 	if (isUser) {
5791 		if (team == B_CURRENT_TEAM)
5792 			addressSpace = VMAddressSpace::GetCurrent();
5793 		else
5794 			addressSpace = VMAddressSpace::Get(team);
5795 	} else
5796 		addressSpace = VMAddressSpace::GetKernel();
5797 	if (addressSpace == NULL)
5798 		return B_ERROR;
5799 
5800 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5801 
5802 	VMTranslationMap* map = addressSpace->TranslationMap();
5803 	status_t error = B_OK;
5804 
5805 	// get the area
5806 	VMArea* area = addressSpace->LookupArea(pageAddress);
5807 	if (area == NULL) {
5808 		addressSpace->Put();
5809 		return B_BAD_ADDRESS;
5810 	}
5811 
5812 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5813 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5814 
5815 	// mark the area range wired
5816 	area->Wire(&info->range);
5817 
5818 	// Lock the area's cache chain and the translation map. Needed to look
5819 	// up the page and play with its wired count.
5820 	cacheChainLocker.LockAllSourceCaches();
5821 	map->Lock();
5822 
5823 	phys_addr_t physicalAddress;
5824 	uint32 flags;
5825 	vm_page* page;
5826 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5827 		&& (flags & requiredProtection) == requiredProtection
5828 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5829 			!= NULL) {
5830 		// Already mapped with the correct permissions -- just increment
5831 		// the page's wired count.
5832 		increment_page_wired_count(page);
5833 
5834 		map->Unlock();
5835 		cacheChainLocker.Unlock();
5836 		addressSpaceLocker.Unlock();
5837 	} else {
5838 		// Let vm_soft_fault() map the page for us, if possible. We need
5839 		// to fully unlock to avoid deadlocks. Since we have already
5840 		// wired the area itself, nothing disturbing will happen with it
5841 		// in the meantime.
5842 		map->Unlock();
5843 		cacheChainLocker.Unlock();
5844 		addressSpaceLocker.Unlock();
5845 
5846 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5847 			isUser, &page);
5848 
5849 		if (error != B_OK) {
5850 			// The page could not be mapped -- clean up.
5851 			VMCache* cache = vm_area_get_locked_cache(area);
5852 			area->Unwire(&info->range);
5853 			cache->ReleaseRefAndUnlock();
5854 			addressSpace->Put();
5855 			return error;
5856 		}
5857 	}
5858 
5859 	info->physicalAddress
5860 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5861 			+ address % B_PAGE_SIZE;
5862 	info->page = page;
5863 
5864 	return B_OK;
5865 }
5866 
5867 
5868 /*!	Unwires a single page previously wired via vm_wire_page().
5869 
5870 	\param info The same object passed to vm_wire_page() before.
5871 */
5872 void
5873 vm_unwire_page(VMPageWiringInfo* info)
5874 {
5875 	// lock the address space
5876 	VMArea* area = info->range.area;
5877 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5878 		// takes over our reference
5879 
5880 	// lock the top cache
5881 	VMCache* cache = vm_area_get_locked_cache(area);
5882 	VMCacheChainLocker cacheChainLocker(cache);
5883 
5884 	if (info->page->Cache() != cache) {
5885 		// The page is not in the top cache, so we lock the whole cache chain
5886 		// before touching the page's wired count.
5887 		cacheChainLocker.LockAllSourceCaches();
5888 	}
5889 
5890 	decrement_page_wired_count(info->page);
5891 
5892 	// remove the wired range from the range
5893 	area->Unwire(&info->range);
5894 
5895 	cacheChainLocker.Unlock();
5896 }
5897 
5898 
5899 /*!	Wires down the given address range in the specified team's address space.
5900 
5901 	If successful the function
5902 	- acquires a reference to the specified team's address space,
5903 	- adds respective wired ranges to all areas that intersect with the given
5904 	  address range,
5905 	- makes sure all pages in the given address range are mapped with the
5906 	  requested access permissions and increments their wired count.
5907 
5908 	It fails, when \a team doesn't specify a valid address space, when any part
5909 	of the specified address range is not covered by areas, when the concerned
5910 	areas don't allow mapping with the requested permissions, or when mapping
5911 	failed for another reason.
5912 
5913 	When successful the call must be balanced by a unlock_memory_etc() call with
5914 	the exact same parameters.
5915 
5916 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5917 		supported.
5918 	\param address The start of the address range to be wired.
5919 	\param numBytes The size of the address range to be wired.
5920 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5921 		requests that the range must be wired writable ("read from device
5922 		into memory").
5923 	\return \c B_OK on success, another error code otherwise.
5924 */
5925 status_t
5926 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5927 {
5928 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5929 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5930 
5931 	// compute the page protection that is required
5932 	bool isUser = IS_USER_ADDRESS(address);
5933 	bool writable = (flags & B_READ_DEVICE) == 0;
5934 	uint32 requiredProtection = PAGE_PRESENT
5935 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5936 	if (writable)
5937 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5938 
5939 	uint32 mallocFlags = isUser
5940 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5941 
5942 	// get and read lock the address space
5943 	VMAddressSpace* addressSpace = NULL;
5944 	if (isUser) {
5945 		if (team == B_CURRENT_TEAM)
5946 			addressSpace = VMAddressSpace::GetCurrent();
5947 		else
5948 			addressSpace = VMAddressSpace::Get(team);
5949 	} else
5950 		addressSpace = VMAddressSpace::GetKernel();
5951 	if (addressSpace == NULL)
5952 		return B_ERROR;
5953 
5954 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5955 		// We get a new address space reference here. The one we got above will
5956 		// be freed by unlock_memory_etc().
5957 
5958 	VMTranslationMap* map = addressSpace->TranslationMap();
5959 	status_t error = B_OK;
5960 
5961 	// iterate through all concerned areas
5962 	addr_t nextAddress = lockBaseAddress;
5963 	while (nextAddress != lockEndAddress) {
5964 		// get the next area
5965 		VMArea* area = addressSpace->LookupArea(nextAddress);
5966 		if (area == NULL) {
5967 			error = B_BAD_ADDRESS;
5968 			break;
5969 		}
5970 
5971 		addr_t areaStart = nextAddress;
5972 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5973 
5974 		// allocate the wired range (do that before locking the cache to avoid
5975 		// deadlocks)
5976 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5977 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5978 		if (range == NULL) {
5979 			error = B_NO_MEMORY;
5980 			break;
5981 		}
5982 
5983 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5984 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5985 
5986 		// mark the area range wired
5987 		area->Wire(range);
5988 
5989 		// Depending on the area cache type and the wiring, we may not need to
5990 		// look at the individual pages.
5991 		if (area->cache_type == CACHE_TYPE_NULL
5992 			|| area->cache_type == CACHE_TYPE_DEVICE
5993 			|| area->wiring == B_FULL_LOCK
5994 			|| area->wiring == B_CONTIGUOUS) {
5995 			nextAddress = areaEnd;
5996 			continue;
5997 		}
5998 
5999 		// Lock the area's cache chain and the translation map. Needed to look
6000 		// up pages and play with their wired count.
6001 		cacheChainLocker.LockAllSourceCaches();
6002 		map->Lock();
6003 
6004 		// iterate through the pages and wire them
6005 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
6006 			phys_addr_t physicalAddress;
6007 			uint32 flags;
6008 
6009 			vm_page* page;
6010 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
6011 				&& (flags & requiredProtection) == requiredProtection
6012 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
6013 					!= NULL) {
6014 				// Already mapped with the correct permissions -- just increment
6015 				// the page's wired count.
6016 				increment_page_wired_count(page);
6017 			} else {
6018 				// Let vm_soft_fault() map the page for us, if possible. We need
6019 				// to fully unlock to avoid deadlocks. Since we have already
6020 				// wired the area itself, nothing disturbing will happen with it
6021 				// in the meantime.
6022 				map->Unlock();
6023 				cacheChainLocker.Unlock();
6024 				addressSpaceLocker.Unlock();
6025 
6026 				error = vm_soft_fault(addressSpace, nextAddress, writable,
6027 					false, isUser, &page);
6028 
6029 				addressSpaceLocker.Lock();
6030 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
6031 				cacheChainLocker.LockAllSourceCaches();
6032 				map->Lock();
6033 			}
6034 
6035 			if (error != B_OK)
6036 				break;
6037 		}
6038 
6039 		map->Unlock();
6040 
6041 		if (error == B_OK) {
6042 			cacheChainLocker.Unlock();
6043 		} else {
6044 			// An error occurred, so abort right here. If the current address
6045 			// is the first in this area, unwire the area, since we won't get
6046 			// to it when reverting what we've done so far.
6047 			if (nextAddress == areaStart) {
6048 				area->Unwire(range);
6049 				cacheChainLocker.Unlock();
6050 				range->~VMAreaWiredRange();
6051 				free_etc(range, mallocFlags);
6052 			} else
6053 				cacheChainLocker.Unlock();
6054 
6055 			break;
6056 		}
6057 	}
6058 
6059 	if (error != B_OK) {
6060 		// An error occurred, so unwire all that we've already wired. Note that
6061 		// even if not a single page was wired, unlock_memory_etc() is called
6062 		// to put the address space reference.
6063 		addressSpaceLocker.Unlock();
6064 		unlock_memory_etc(team, (void*)lockBaseAddress,
6065 			nextAddress - lockBaseAddress, flags);
6066 	}
6067 
6068 	return error;
6069 }
6070 
6071 
6072 status_t
6073 lock_memory(void* address, size_t numBytes, uint32 flags)
6074 {
6075 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
6076 }
6077 
6078 
6079 /*!	Unwires an address range previously wired with lock_memory_etc().
6080 
6081 	Note that a call to this function must balance a previous lock_memory_etc()
6082 	call with exactly the same parameters.
6083 */
6084 status_t
6085 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
6086 {
6087 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
6088 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
6089 
6090 	// compute the page protection that is required
6091 	bool isUser = IS_USER_ADDRESS(address);
6092 	bool writable = (flags & B_READ_DEVICE) == 0;
6093 	uint32 requiredProtection = PAGE_PRESENT
6094 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
6095 	if (writable)
6096 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
6097 
6098 	uint32 mallocFlags = isUser
6099 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
6100 
6101 	// get and read lock the address space
6102 	VMAddressSpace* addressSpace = NULL;
6103 	if (isUser) {
6104 		if (team == B_CURRENT_TEAM)
6105 			addressSpace = VMAddressSpace::GetCurrent();
6106 		else
6107 			addressSpace = VMAddressSpace::Get(team);
6108 	} else
6109 		addressSpace = VMAddressSpace::GetKernel();
6110 	if (addressSpace == NULL)
6111 		return B_ERROR;
6112 
6113 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
6114 		// Take over the address space reference. We don't unlock until we're
6115 		// done.
6116 
6117 	VMTranslationMap* map = addressSpace->TranslationMap();
6118 	status_t error = B_OK;
6119 
6120 	// iterate through all concerned areas
6121 	addr_t nextAddress = lockBaseAddress;
6122 	while (nextAddress != lockEndAddress) {
6123 		// get the next area
6124 		VMArea* area = addressSpace->LookupArea(nextAddress);
6125 		if (area == NULL) {
6126 			error = B_BAD_ADDRESS;
6127 			break;
6128 		}
6129 
6130 		addr_t areaStart = nextAddress;
6131 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
6132 
6133 		// Lock the area's top cache. This is a requirement for
6134 		// VMArea::Unwire().
6135 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
6136 
6137 		// Depending on the area cache type and the wiring, we may not need to
6138 		// look at the individual pages.
6139 		if (area->cache_type == CACHE_TYPE_NULL
6140 			|| area->cache_type == CACHE_TYPE_DEVICE
6141 			|| area->wiring == B_FULL_LOCK
6142 			|| area->wiring == B_CONTIGUOUS) {
6143 			// unwire the range (to avoid deadlocks we delete the range after
6144 			// unlocking the cache)
6145 			nextAddress = areaEnd;
6146 			VMAreaWiredRange* range = area->Unwire(areaStart,
6147 				areaEnd - areaStart, writable);
6148 			cacheChainLocker.Unlock();
6149 			if (range != NULL) {
6150 				range->~VMAreaWiredRange();
6151 				free_etc(range, mallocFlags);
6152 			}
6153 			continue;
6154 		}
6155 
6156 		// Lock the area's cache chain and the translation map. Needed to look
6157 		// up pages and play with their wired count.
6158 		cacheChainLocker.LockAllSourceCaches();
6159 		map->Lock();
6160 
6161 		// iterate through the pages and unwire them
6162 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
6163 			phys_addr_t physicalAddress;
6164 			uint32 flags;
6165 
6166 			vm_page* page;
6167 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
6168 				&& (flags & PAGE_PRESENT) != 0
6169 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
6170 					!= NULL) {
6171 				// Already mapped with the correct permissions -- just increment
6172 				// the page's wired count.
6173 				decrement_page_wired_count(page);
6174 			} else {
6175 				panic("unlock_memory_etc(): Failed to unwire page: address "
6176 					"space %p, address: %#" B_PRIxADDR, addressSpace,
6177 					nextAddress);
6178 				error = B_BAD_VALUE;
6179 				break;
6180 			}
6181 		}
6182 
6183 		map->Unlock();
6184 
6185 		// All pages are unwired. Remove the area's wired range as well (to
6186 		// avoid deadlocks we delete the range after unlocking the cache).
6187 		VMAreaWiredRange* range = area->Unwire(areaStart,
6188 			areaEnd - areaStart, writable);
6189 
6190 		cacheChainLocker.Unlock();
6191 
6192 		if (range != NULL) {
6193 			range->~VMAreaWiredRange();
6194 			free_etc(range, mallocFlags);
6195 		}
6196 
6197 		if (error != B_OK)
6198 			break;
6199 	}
6200 
6201 	// get rid of the address space reference lock_memory_etc() acquired
6202 	addressSpace->Put();
6203 
6204 	return error;
6205 }
6206 
6207 
6208 status_t
6209 unlock_memory(void* address, size_t numBytes, uint32 flags)
6210 {
6211 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
6212 }
6213 
6214 
6215 /*!	Similar to get_memory_map(), but also allows to specify the address space
6216 	for the memory in question and has a saner semantics.
6217 	Returns \c B_OK when the complete range could be translated or
6218 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
6219 	case the actual number of entries is written to \c *_numEntries. Any other
6220 	error case indicates complete failure; \c *_numEntries will be set to \c 0
6221 	in this case.
6222 */
6223 status_t
6224 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
6225 	physical_entry* table, uint32* _numEntries)
6226 {
6227 	uint32 numEntries = *_numEntries;
6228 	*_numEntries = 0;
6229 
6230 	VMAddressSpace* addressSpace;
6231 	addr_t virtualAddress = (addr_t)address;
6232 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
6233 	phys_addr_t physicalAddress;
6234 	status_t status = B_OK;
6235 	int32 index = -1;
6236 	addr_t offset = 0;
6237 	bool interrupts = are_interrupts_enabled();
6238 
6239 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
6240 		"entries)\n", team, address, numBytes, numEntries));
6241 
6242 	if (numEntries == 0 || numBytes == 0)
6243 		return B_BAD_VALUE;
6244 
6245 	// in which address space is the address to be found?
6246 	if (IS_USER_ADDRESS(virtualAddress)) {
6247 		if (team == B_CURRENT_TEAM)
6248 			addressSpace = VMAddressSpace::GetCurrent();
6249 		else
6250 			addressSpace = VMAddressSpace::Get(team);
6251 	} else
6252 		addressSpace = VMAddressSpace::GetKernel();
6253 
6254 	if (addressSpace == NULL)
6255 		return B_ERROR;
6256 
6257 	VMTranslationMap* map = addressSpace->TranslationMap();
6258 
6259 	if (interrupts)
6260 		map->Lock();
6261 
6262 	while (offset < numBytes) {
6263 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
6264 		uint32 flags;
6265 
6266 		if (interrupts) {
6267 			status = map->Query((addr_t)address + offset, &physicalAddress,
6268 				&flags);
6269 		} else {
6270 			status = map->QueryInterrupt((addr_t)address + offset,
6271 				&physicalAddress, &flags);
6272 		}
6273 		if (status < B_OK)
6274 			break;
6275 		if ((flags & PAGE_PRESENT) == 0) {
6276 			panic("get_memory_map() called on unmapped memory!");
6277 			return B_BAD_ADDRESS;
6278 		}
6279 
6280 		if (index < 0 && pageOffset > 0) {
6281 			physicalAddress += pageOffset;
6282 			if (bytes > B_PAGE_SIZE - pageOffset)
6283 				bytes = B_PAGE_SIZE - pageOffset;
6284 		}
6285 
6286 		// need to switch to the next physical_entry?
6287 		if (index < 0 || table[index].address
6288 				!= physicalAddress - table[index].size) {
6289 			if ((uint32)++index + 1 > numEntries) {
6290 				// table to small
6291 				break;
6292 			}
6293 			table[index].address = physicalAddress;
6294 			table[index].size = bytes;
6295 		} else {
6296 			// page does fit in current entry
6297 			table[index].size += bytes;
6298 		}
6299 
6300 		offset += bytes;
6301 	}
6302 
6303 	if (interrupts)
6304 		map->Unlock();
6305 
6306 	if (status != B_OK)
6307 		return status;
6308 
6309 	if ((uint32)index + 1 > numEntries) {
6310 		*_numEntries = index;
6311 		return B_BUFFER_OVERFLOW;
6312 	}
6313 
6314 	*_numEntries = index + 1;
6315 	return B_OK;
6316 }
6317 
6318 
6319 /*!	According to the BeBook, this function should always succeed.
6320 	This is no longer the case.
6321 */
6322 extern "C" int32
6323 __get_memory_map_haiku(const void* address, size_t numBytes,
6324 	physical_entry* table, int32 numEntries)
6325 {
6326 	uint32 entriesRead = numEntries;
6327 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
6328 		table, &entriesRead);
6329 	if (error != B_OK)
6330 		return error;
6331 
6332 	// close the entry list
6333 
6334 	// if it's only one entry, we will silently accept the missing ending
6335 	if (numEntries == 1)
6336 		return B_OK;
6337 
6338 	if (entriesRead + 1 > (uint32)numEntries)
6339 		return B_BUFFER_OVERFLOW;
6340 
6341 	table[entriesRead].address = 0;
6342 	table[entriesRead].size = 0;
6343 
6344 	return B_OK;
6345 }
6346 
6347 
6348 area_id
6349 area_for(void* address)
6350 {
6351 	return vm_area_for((addr_t)address, true);
6352 }
6353 
6354 
6355 area_id
6356 find_area(const char* name)
6357 {
6358 	return VMAreas::Find(name);
6359 }
6360 
6361 
6362 status_t
6363 _get_area_info(area_id id, area_info* info, size_t size)
6364 {
6365 	if (size != sizeof(area_info) || info == NULL)
6366 		return B_BAD_VALUE;
6367 
6368 	AddressSpaceReadLocker locker;
6369 	VMArea* area;
6370 	status_t status = locker.SetFromArea(id, area);
6371 	if (status != B_OK)
6372 		return status;
6373 
6374 	fill_area_info(area, info, size);
6375 	return B_OK;
6376 }
6377 
6378 
6379 status_t
6380 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
6381 {
6382 	addr_t nextBase = *(addr_t*)cookie;
6383 
6384 	// we're already through the list
6385 	if (nextBase == (addr_t)-1)
6386 		return B_ENTRY_NOT_FOUND;
6387 
6388 	if (team == B_CURRENT_TEAM)
6389 		team = team_get_current_team_id();
6390 
6391 	AddressSpaceReadLocker locker(team);
6392 	if (!locker.IsLocked())
6393 		return B_BAD_TEAM_ID;
6394 
6395 	VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
6396 	if (area == NULL) {
6397 		nextBase = (addr_t)-1;
6398 		return B_ENTRY_NOT_FOUND;
6399 	}
6400 
6401 	fill_area_info(area, info, size);
6402 	*cookie = (ssize_t)(area->Base() + 1);
6403 
6404 	return B_OK;
6405 }
6406 
6407 
6408 status_t
6409 set_area_protection(area_id area, uint32 newProtection)
6410 {
6411 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6412 		newProtection, true);
6413 }
6414 
6415 
6416 status_t
6417 resize_area(area_id areaID, size_t newSize)
6418 {
6419 	return vm_resize_area(areaID, newSize, true);
6420 }
6421 
6422 
6423 /*!	Transfers the specified area to a new team. The caller must be the owner
6424 	of the area.
6425 */
6426 area_id
6427 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6428 	bool kernel)
6429 {
6430 	area_info info;
6431 	status_t status = get_area_info(id, &info);
6432 	if (status != B_OK)
6433 		return status;
6434 
6435 	if (!kernel && info.team != thread_get_current_thread()->team->id)
6436 		return B_PERMISSION_DENIED;
6437 
6438 	// We need to mark the area cloneable so the following operations work.
6439 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
6440 	if (status != B_OK)
6441 		return status;
6442 
6443 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6444 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6445 	if (clonedArea < 0)
6446 		return clonedArea;
6447 
6448 	status = vm_delete_area(info.team, id, kernel);
6449 	if (status != B_OK) {
6450 		vm_delete_area(target, clonedArea, kernel);
6451 		return status;
6452 	}
6453 
6454 	// Now we can reset the protection to whatever it was before.
6455 	set_area_protection(clonedArea, info.protection);
6456 
6457 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6458 
6459 	return clonedArea;
6460 }
6461 
6462 
6463 extern "C" area_id
6464 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6465 	size_t numBytes, uint32 addressSpec, uint32 protection,
6466 	void** _virtualAddress)
6467 {
6468 	if (!arch_vm_supports_protection(protection))
6469 		return B_NOT_SUPPORTED;
6470 
6471 	fix_protection(&protection);
6472 
6473 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6474 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6475 		false);
6476 }
6477 
6478 
6479 area_id
6480 clone_area(const char* name, void** _address, uint32 addressSpec,
6481 	uint32 protection, area_id source)
6482 {
6483 	if ((protection & B_KERNEL_PROTECTION) == 0)
6484 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6485 
6486 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6487 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6488 }
6489 
6490 
6491 area_id
6492 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6493 	uint32 protection, uint32 flags, uint32 guardSize,
6494 	const virtual_address_restrictions* virtualAddressRestrictions,
6495 	const physical_address_restrictions* physicalAddressRestrictions,
6496 	void** _address)
6497 {
6498 	fix_protection(&protection);
6499 
6500 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6501 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6502 		true, _address);
6503 }
6504 
6505 
6506 extern "C" area_id
6507 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6508 	size_t size, uint32 lock, uint32 protection)
6509 {
6510 	fix_protection(&protection);
6511 
6512 	virtual_address_restrictions virtualRestrictions = {};
6513 	virtualRestrictions.address = *_address;
6514 	virtualRestrictions.address_specification = addressSpec;
6515 	physical_address_restrictions physicalRestrictions = {};
6516 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6517 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6518 		true, _address);
6519 }
6520 
6521 
6522 status_t
6523 delete_area(area_id area)
6524 {
6525 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6526 }
6527 
6528 
6529 //	#pragma mark - Userland syscalls
6530 
6531 
6532 status_t
6533 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6534 	addr_t size)
6535 {
6536 	// filter out some unavailable values (for userland)
6537 	switch (addressSpec) {
6538 		case B_ANY_KERNEL_ADDRESS:
6539 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6540 			return B_BAD_VALUE;
6541 	}
6542 
6543 	addr_t address;
6544 
6545 	if (!IS_USER_ADDRESS(userAddress)
6546 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6547 		return B_BAD_ADDRESS;
6548 
6549 	status_t status = vm_reserve_address_range(
6550 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6551 		RESERVED_AVOID_BASE);
6552 	if (status != B_OK)
6553 		return status;
6554 
6555 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6556 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6557 			(void*)address, size);
6558 		return B_BAD_ADDRESS;
6559 	}
6560 
6561 	return B_OK;
6562 }
6563 
6564 
6565 status_t
6566 _user_unreserve_address_range(addr_t address, addr_t size)
6567 {
6568 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6569 		(void*)address, size);
6570 }
6571 
6572 
6573 area_id
6574 _user_area_for(void* address)
6575 {
6576 	return vm_area_for((addr_t)address, false);
6577 }
6578 
6579 
6580 area_id
6581 _user_find_area(const char* userName)
6582 {
6583 	char name[B_OS_NAME_LENGTH];
6584 
6585 	if (!IS_USER_ADDRESS(userName)
6586 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6587 		return B_BAD_ADDRESS;
6588 
6589 	return find_area(name);
6590 }
6591 
6592 
6593 status_t
6594 _user_get_area_info(area_id area, area_info* userInfo)
6595 {
6596 	if (!IS_USER_ADDRESS(userInfo))
6597 		return B_BAD_ADDRESS;
6598 
6599 	area_info info;
6600 	status_t status = get_area_info(area, &info);
6601 	if (status < B_OK)
6602 		return status;
6603 
6604 	// TODO: do we want to prevent userland from seeing kernel protections?
6605 	//info.protection &= B_USER_PROTECTION;
6606 
6607 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6608 		return B_BAD_ADDRESS;
6609 
6610 	return status;
6611 }
6612 
6613 
6614 status_t
6615 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6616 {
6617 	ssize_t cookie;
6618 
6619 	if (!IS_USER_ADDRESS(userCookie)
6620 		|| !IS_USER_ADDRESS(userInfo)
6621 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6622 		return B_BAD_ADDRESS;
6623 
6624 	area_info info;
6625 	status_t status = _get_next_area_info(team, &cookie, &info,
6626 		sizeof(area_info));
6627 	if (status != B_OK)
6628 		return status;
6629 
6630 	//info.protection &= B_USER_PROTECTION;
6631 
6632 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6633 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6634 		return B_BAD_ADDRESS;
6635 
6636 	return status;
6637 }
6638 
6639 
6640 status_t
6641 _user_set_area_protection(area_id area, uint32 newProtection)
6642 {
6643 	if ((newProtection & ~(B_USER_PROTECTION | B_CLONEABLE_AREA)) != 0)
6644 		return B_BAD_VALUE;
6645 
6646 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6647 		newProtection, false);
6648 }
6649 
6650 
6651 status_t
6652 _user_resize_area(area_id area, size_t newSize)
6653 {
6654 	// TODO: Since we restrict deleting of areas to those owned by the team,
6655 	// we should also do that for resizing (check other functions, too).
6656 	return vm_resize_area(area, newSize, false);
6657 }
6658 
6659 
6660 area_id
6661 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6662 	team_id target)
6663 {
6664 	// filter out some unavailable values (for userland)
6665 	switch (addressSpec) {
6666 		case B_ANY_KERNEL_ADDRESS:
6667 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6668 			return B_BAD_VALUE;
6669 	}
6670 
6671 	void* address;
6672 	if (!IS_USER_ADDRESS(userAddress)
6673 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6674 		return B_BAD_ADDRESS;
6675 
6676 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6677 	if (newArea < B_OK)
6678 		return newArea;
6679 
6680 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6681 		return B_BAD_ADDRESS;
6682 
6683 	return newArea;
6684 }
6685 
6686 
6687 area_id
6688 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6689 	uint32 protection, area_id sourceArea)
6690 {
6691 	char name[B_OS_NAME_LENGTH];
6692 	void* address;
6693 
6694 	// filter out some unavailable values (for userland)
6695 	switch (addressSpec) {
6696 		case B_ANY_KERNEL_ADDRESS:
6697 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6698 			return B_BAD_VALUE;
6699 	}
6700 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6701 		return B_BAD_VALUE;
6702 
6703 	if (!IS_USER_ADDRESS(userName)
6704 		|| !IS_USER_ADDRESS(userAddress)
6705 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6706 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6707 		return B_BAD_ADDRESS;
6708 
6709 	fix_protection(&protection);
6710 
6711 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6712 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6713 		false);
6714 	if (clonedArea < B_OK)
6715 		return clonedArea;
6716 
6717 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6718 		delete_area(clonedArea);
6719 		return B_BAD_ADDRESS;
6720 	}
6721 
6722 	return clonedArea;
6723 }
6724 
6725 
6726 area_id
6727 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6728 	size_t size, uint32 lock, uint32 protection)
6729 {
6730 	char name[B_OS_NAME_LENGTH];
6731 	void* address;
6732 
6733 	// filter out some unavailable values (for userland)
6734 	switch (addressSpec) {
6735 		case B_ANY_KERNEL_ADDRESS:
6736 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6737 			return B_BAD_VALUE;
6738 	}
6739 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6740 		return B_BAD_VALUE;
6741 
6742 	if (!IS_USER_ADDRESS(userName)
6743 		|| !IS_USER_ADDRESS(userAddress)
6744 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6745 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6746 		return B_BAD_ADDRESS;
6747 
6748 	if (addressSpec == B_EXACT_ADDRESS
6749 		&& IS_KERNEL_ADDRESS(address))
6750 		return B_BAD_VALUE;
6751 
6752 	if (addressSpec == B_ANY_ADDRESS)
6753 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6754 	if (addressSpec == B_BASE_ADDRESS)
6755 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6756 
6757 	fix_protection(&protection);
6758 
6759 	virtual_address_restrictions virtualRestrictions = {};
6760 	virtualRestrictions.address = address;
6761 	virtualRestrictions.address_specification = addressSpec;
6762 	physical_address_restrictions physicalRestrictions = {};
6763 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6764 		size, lock, protection, 0, 0, &virtualRestrictions,
6765 		&physicalRestrictions, false, &address);
6766 
6767 	if (area >= B_OK
6768 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6769 		delete_area(area);
6770 		return B_BAD_ADDRESS;
6771 	}
6772 
6773 	return area;
6774 }
6775 
6776 
6777 status_t
6778 _user_delete_area(area_id area)
6779 {
6780 	// Unlike the BeOS implementation, you can now only delete areas
6781 	// that you have created yourself from userland.
6782 	// The documentation to delete_area() explicitly states that this
6783 	// will be restricted in the future, and so it will.
6784 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6785 }
6786 
6787 
6788 // TODO: create a BeOS style call for this!
6789 
6790 area_id
6791 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6792 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6793 	int fd, off_t offset)
6794 {
6795 	char name[B_OS_NAME_LENGTH];
6796 	void* address;
6797 	area_id area;
6798 
6799 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6800 		return B_BAD_VALUE;
6801 
6802 	fix_protection(&protection);
6803 
6804 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6805 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6806 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6807 		return B_BAD_ADDRESS;
6808 
6809 	if (addressSpec == B_EXACT_ADDRESS) {
6810 		if ((addr_t)address + size < (addr_t)address
6811 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6812 			return B_BAD_VALUE;
6813 		}
6814 		if (!IS_USER_ADDRESS(address)
6815 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6816 			return B_BAD_ADDRESS;
6817 		}
6818 	}
6819 
6820 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6821 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6822 		false);
6823 	if (area < B_OK)
6824 		return area;
6825 
6826 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6827 		return B_BAD_ADDRESS;
6828 
6829 	return area;
6830 }
6831 
6832 
6833 status_t
6834 _user_unmap_memory(void* _address, size_t size)
6835 {
6836 	addr_t address = (addr_t)_address;
6837 
6838 	// check params
6839 	if (size == 0 || (addr_t)address + size < (addr_t)address
6840 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6841 		return B_BAD_VALUE;
6842 	}
6843 
6844 	if (!IS_USER_ADDRESS(address)
6845 		|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6846 		return B_BAD_ADDRESS;
6847 	}
6848 
6849 	// Write lock the address space and ensure the address range is not wired.
6850 	AddressSpaceWriteLocker locker;
6851 	do {
6852 		status_t status = locker.SetTo(team_get_current_team_id());
6853 		if (status != B_OK)
6854 			return status;
6855 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6856 			size, &locker));
6857 
6858 	// unmap
6859 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6860 }
6861 
6862 
6863 status_t
6864 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6865 {
6866 	// check address range
6867 	addr_t address = (addr_t)_address;
6868 	size = PAGE_ALIGN(size);
6869 
6870 	if ((address % B_PAGE_SIZE) != 0)
6871 		return B_BAD_VALUE;
6872 	if (!is_user_address_range(_address, size)) {
6873 		// weird error code required by POSIX
6874 		return ENOMEM;
6875 	}
6876 
6877 	// extend and check protection
6878 	if ((protection & ~B_USER_PROTECTION) != 0)
6879 		return B_BAD_VALUE;
6880 
6881 	fix_protection(&protection);
6882 
6883 	// We need to write lock the address space, since we're going to play with
6884 	// the areas. Also make sure that none of the areas is wired and that we're
6885 	// actually allowed to change the protection.
6886 	AddressSpaceWriteLocker locker;
6887 
6888 	bool restart;
6889 	do {
6890 		restart = false;
6891 
6892 		status_t status = locker.SetTo(team_get_current_team_id());
6893 		if (status != B_OK)
6894 			return status;
6895 
6896 		// First round: Check whether the whole range is covered by areas and we
6897 		// are allowed to modify them.
6898 		addr_t currentAddress = address;
6899 		size_t sizeLeft = size;
6900 		while (sizeLeft > 0) {
6901 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6902 			if (area == NULL)
6903 				return B_NO_MEMORY;
6904 
6905 			if ((area->protection & B_KERNEL_AREA) != 0)
6906 				return B_NOT_ALLOWED;
6907 			if (area->protection_max != 0
6908 				&& (protection & area->protection_max) != (protection & B_USER_PROTECTION)) {
6909 				return B_NOT_ALLOWED;
6910 			}
6911 
6912 			addr_t offset = currentAddress - area->Base();
6913 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6914 
6915 			AreaCacheLocker cacheLocker(area);
6916 
6917 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6918 					&locker, &cacheLocker)) {
6919 				restart = true;
6920 				break;
6921 			}
6922 
6923 			cacheLocker.Unlock();
6924 
6925 			currentAddress += rangeSize;
6926 			sizeLeft -= rangeSize;
6927 		}
6928 	} while (restart);
6929 
6930 	// Second round: If the protections differ from that of the area, create a
6931 	// page protection array and re-map mapped pages.
6932 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6933 	addr_t currentAddress = address;
6934 	size_t sizeLeft = size;
6935 	while (sizeLeft > 0) {
6936 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6937 		if (area == NULL)
6938 			return B_NO_MEMORY;
6939 
6940 		addr_t offset = currentAddress - area->Base();
6941 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6942 
6943 		currentAddress += rangeSize;
6944 		sizeLeft -= rangeSize;
6945 
6946 		if (area->page_protections == NULL) {
6947 			if (area->protection == protection)
6948 				continue;
6949 			if (offset == 0 && rangeSize == area->Size()) {
6950 				// The whole area is covered: let set_area_protection handle it.
6951 				status_t status = vm_set_area_protection(area->address_space->ID(),
6952 					area->id, protection, false);
6953 				if (status != B_OK)
6954 					return status;
6955 				continue;
6956 			}
6957 
6958 			status_t status = allocate_area_page_protections(area);
6959 			if (status != B_OK)
6960 				return status;
6961 		}
6962 
6963 		// We need to lock the complete cache chain, since we potentially unmap
6964 		// pages of lower caches.
6965 		VMCache* topCache = vm_area_get_locked_cache(area);
6966 		VMCacheChainLocker cacheChainLocker(topCache);
6967 		cacheChainLocker.LockAllSourceCaches();
6968 
6969 		// Adjust the committed size, if necessary.
6970 		if (topCache->source != NULL && topCache->temporary) {
6971 			const bool becomesWritable = (protection & B_WRITE_AREA) != 0;
6972 			ssize_t commitmentChange = 0;
6973 			for (addr_t pageAddress = area->Base() + offset;
6974 					pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6975 				if (topCache->LookupPage(pageAddress) != NULL) {
6976 					// This page should already be accounted for in the commitment.
6977 					continue;
6978 				}
6979 
6980 				const bool isWritable
6981 					= (get_area_page_protection(area, pageAddress) & B_WRITE_AREA) != 0;
6982 
6983 				if (becomesWritable && !isWritable)
6984 					commitmentChange += B_PAGE_SIZE;
6985 				else if (!becomesWritable && isWritable)
6986 					commitmentChange -= B_PAGE_SIZE;
6987 			}
6988 
6989 			if (commitmentChange != 0) {
6990 				const off_t newCommitment = topCache->committed_size + commitmentChange;
6991 				ASSERT(newCommitment <= (topCache->virtual_end - topCache->virtual_base));
6992 				status_t status = topCache->Commit(newCommitment, VM_PRIORITY_USER);
6993 				if (status != B_OK)
6994 					return status;
6995 			}
6996 		}
6997 
6998 		for (addr_t pageAddress = area->Base() + offset;
6999 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
7000 			map->Lock();
7001 
7002 			set_area_page_protection(area, pageAddress, protection);
7003 
7004 			phys_addr_t physicalAddress;
7005 			uint32 flags;
7006 
7007 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
7008 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
7009 				map->Unlock();
7010 				continue;
7011 			}
7012 
7013 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
7014 			if (page == NULL) {
7015 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
7016 					"\n", area, physicalAddress);
7017 				map->Unlock();
7018 				return B_ERROR;
7019 			}
7020 
7021 			// If the page is not in the topmost cache and write access is
7022 			// requested, we have to unmap it. Otherwise we can re-map it with
7023 			// the new protection.
7024 			bool unmapPage = page->Cache() != topCache
7025 				&& (protection & B_WRITE_AREA) != 0;
7026 
7027 			if (!unmapPage)
7028 				map->ProtectPage(area, pageAddress, protection);
7029 
7030 			map->Unlock();
7031 
7032 			if (unmapPage) {
7033 				DEBUG_PAGE_ACCESS_START(page);
7034 				unmap_page(area, pageAddress);
7035 				DEBUG_PAGE_ACCESS_END(page);
7036 			}
7037 		}
7038 	}
7039 
7040 	return B_OK;
7041 }
7042 
7043 
7044 status_t
7045 _user_sync_memory(void* _address, size_t size, uint32 flags)
7046 {
7047 	addr_t address = (addr_t)_address;
7048 	size = PAGE_ALIGN(size);
7049 
7050 	// check params
7051 	if ((address % B_PAGE_SIZE) != 0)
7052 		return B_BAD_VALUE;
7053 	if (!is_user_address_range(_address, size)) {
7054 		// weird error code required by POSIX
7055 		return ENOMEM;
7056 	}
7057 
7058 	bool writeSync = (flags & MS_SYNC) != 0;
7059 	bool writeAsync = (flags & MS_ASYNC) != 0;
7060 	if (writeSync && writeAsync)
7061 		return B_BAD_VALUE;
7062 
7063 	if (size == 0 || (!writeSync && !writeAsync))
7064 		return B_OK;
7065 
7066 	// iterate through the range and sync all concerned areas
7067 	while (size > 0) {
7068 		// read lock the address space
7069 		AddressSpaceReadLocker locker;
7070 		status_t error = locker.SetTo(team_get_current_team_id());
7071 		if (error != B_OK)
7072 			return error;
7073 
7074 		// get the first area
7075 		VMArea* area = locker.AddressSpace()->LookupArea(address);
7076 		if (area == NULL)
7077 			return B_NO_MEMORY;
7078 
7079 		uint32 offset = address - area->Base();
7080 		size_t rangeSize = min_c(area->Size() - offset, size);
7081 		offset += area->cache_offset;
7082 
7083 		// lock the cache
7084 		AreaCacheLocker cacheLocker(area);
7085 		if (!cacheLocker)
7086 			return B_BAD_VALUE;
7087 		VMCache* cache = area->cache;
7088 
7089 		locker.Unlock();
7090 
7091 		uint32 firstPage = offset >> PAGE_SHIFT;
7092 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
7093 
7094 		// write the pages
7095 		if (cache->type == CACHE_TYPE_VNODE) {
7096 			if (writeSync) {
7097 				// synchronous
7098 				error = vm_page_write_modified_page_range(cache, firstPage,
7099 					endPage);
7100 				if (error != B_OK)
7101 					return error;
7102 			} else {
7103 				// asynchronous
7104 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
7105 				// TODO: This is probably not quite what is supposed to happen.
7106 				// Especially when a lot has to be written, it might take ages
7107 				// until it really hits the disk.
7108 			}
7109 		}
7110 
7111 		address += rangeSize;
7112 		size -= rangeSize;
7113 	}
7114 
7115 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
7116 	// synchronize multiple mappings of the same file. In our VM they never get
7117 	// out of sync, though, so we don't have to do anything.
7118 
7119 	return B_OK;
7120 }
7121 
7122 
7123 status_t
7124 _user_memory_advice(void* _address, size_t size, uint32 advice)
7125 {
7126 	addr_t address = (addr_t)_address;
7127 	if ((address % B_PAGE_SIZE) != 0)
7128 		return B_BAD_VALUE;
7129 
7130 	size = PAGE_ALIGN(size);
7131 	if (!is_user_address_range(_address, size)) {
7132 		// weird error code required by POSIX
7133 		return B_NO_MEMORY;
7134 	}
7135 
7136 	switch (advice) {
7137 		case MADV_NORMAL:
7138 		case MADV_SEQUENTIAL:
7139 		case MADV_RANDOM:
7140 		case MADV_WILLNEED:
7141 		case MADV_DONTNEED:
7142 			// TODO: Implement!
7143 			break;
7144 
7145 		case MADV_FREE:
7146 		{
7147 			AddressSpaceWriteLocker locker;
7148 			do {
7149 				status_t status = locker.SetTo(team_get_current_team_id());
7150 				if (status != B_OK)
7151 					return status;
7152 			} while (wait_if_address_range_is_wired(locker.AddressSpace(),
7153 					address, size, &locker));
7154 
7155 			discard_address_range(locker.AddressSpace(), address, size, false);
7156 			break;
7157 		}
7158 
7159 		default:
7160 			return B_BAD_VALUE;
7161 	}
7162 
7163 	return B_OK;
7164 }
7165 
7166 
7167 status_t
7168 _user_get_memory_properties(team_id teamID, const void* address,
7169 	uint32* _protected, uint32* _lock)
7170 {
7171 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
7172 		return B_BAD_ADDRESS;
7173 
7174 	AddressSpaceReadLocker locker;
7175 	status_t error = locker.SetTo(teamID);
7176 	if (error != B_OK)
7177 		return error;
7178 
7179 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
7180 	if (area == NULL)
7181 		return B_NO_MEMORY;
7182 
7183 	uint32 protection = get_area_page_protection(area, (addr_t)address);
7184 	uint32 wiring = area->wiring;
7185 
7186 	locker.Unlock();
7187 
7188 	error = user_memcpy(_protected, &protection, sizeof(protection));
7189 	if (error != B_OK)
7190 		return error;
7191 
7192 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
7193 
7194 	return error;
7195 }
7196 
7197 
7198 static status_t
7199 user_set_memory_swappable(const void* _address, size_t size, bool swappable)
7200 {
7201 #if ENABLE_SWAP_SUPPORT
7202 	// check address range
7203 	addr_t address = (addr_t)_address;
7204 	size = PAGE_ALIGN(size);
7205 
7206 	if ((address % B_PAGE_SIZE) != 0)
7207 		return EINVAL;
7208 	if (!is_user_address_range(_address, size))
7209 		return EINVAL;
7210 
7211 	const addr_t endAddress = address + size;
7212 
7213 	AddressSpaceReadLocker addressSpaceLocker;
7214 	status_t error = addressSpaceLocker.SetTo(team_get_current_team_id());
7215 	if (error != B_OK)
7216 		return error;
7217 	VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace();
7218 
7219 	// iterate through all concerned areas
7220 	addr_t nextAddress = address;
7221 	while (nextAddress != endAddress) {
7222 		// get the next area
7223 		VMArea* area = addressSpace->LookupArea(nextAddress);
7224 		if (area == NULL) {
7225 			error = B_BAD_ADDRESS;
7226 			break;
7227 		}
7228 
7229 		const addr_t areaStart = nextAddress;
7230 		const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size());
7231 		nextAddress = areaEnd;
7232 
7233 		error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
7234 		if (error != B_OK) {
7235 			// We don't need to unset or reset things on failure.
7236 			break;
7237 		}
7238 
7239 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
7240 		VMAnonymousCache* anonCache = NULL;
7241 		if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) {
7242 			// This memory will aready never be swapped. Nothing to do.
7243 		} else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) {
7244 			error = anonCache->SetCanSwapPages(areaStart - area->Base(),
7245 				areaEnd - areaStart, swappable);
7246 		} else {
7247 			// Some other cache type? We cannot affect anything here.
7248 			error = EINVAL;
7249 		}
7250 
7251 		cacheChainLocker.Unlock();
7252 
7253 		unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
7254 		if (error != B_OK)
7255 			break;
7256 	}
7257 
7258 	return error;
7259 #else
7260 	// No swap support? Nothing to do.
7261 	return B_OK;
7262 #endif
7263 }
7264 
7265 
7266 status_t
7267 _user_mlock(const void* _address, size_t size)
7268 {
7269 	return user_set_memory_swappable(_address, size, false);
7270 }
7271 
7272 
7273 status_t
7274 _user_munlock(const void* _address, size_t size)
7275 {
7276 	// TODO: B_SHARED_AREAs need to be handled a bit differently:
7277 	// if multiple clones of an area had mlock() called on them,
7278 	// munlock() must also be called on all of them to actually unlock.
7279 	// (At present, the first munlock() will unlock all.)
7280 	// TODO: fork() should automatically unlock memory in the child.
7281 	return user_set_memory_swappable(_address, size, true);
7282 }
7283 
7284 
7285 // #pragma mark -- compatibility
7286 
7287 
7288 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7289 
7290 
7291 struct physical_entry_beos {
7292 	uint32	address;
7293 	uint32	size;
7294 };
7295 
7296 
7297 /*!	The physical_entry structure has changed. We need to translate it to the
7298 	old one.
7299 */
7300 extern "C" int32
7301 __get_memory_map_beos(const void* _address, size_t numBytes,
7302 	physical_entry_beos* table, int32 numEntries)
7303 {
7304 	if (numEntries <= 0)
7305 		return B_BAD_VALUE;
7306 
7307 	const uint8* address = (const uint8*)_address;
7308 
7309 	int32 count = 0;
7310 	while (numBytes > 0 && count < numEntries) {
7311 		physical_entry entry;
7312 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
7313 		if (result < 0) {
7314 			if (result != B_BUFFER_OVERFLOW)
7315 				return result;
7316 		}
7317 
7318 		if (entry.address >= (phys_addr_t)1 << 32) {
7319 			panic("get_memory_map(): Address is greater 4 GB!");
7320 			return B_ERROR;
7321 		}
7322 
7323 		table[count].address = entry.address;
7324 		table[count++].size = entry.size;
7325 
7326 		address += entry.size;
7327 		numBytes -= entry.size;
7328 	}
7329 
7330 	// null-terminate the table, if possible
7331 	if (count < numEntries) {
7332 		table[count].address = 0;
7333 		table[count].size = 0;
7334 	}
7335 
7336 	return B_OK;
7337 }
7338 
7339 
7340 /*!	The type of the \a physicalAddress parameter has changed from void* to
7341 	phys_addr_t.
7342 */
7343 extern "C" area_id
7344 __map_physical_memory_beos(const char* name, void* physicalAddress,
7345 	size_t numBytes, uint32 addressSpec, uint32 protection,
7346 	void** _virtualAddress)
7347 {
7348 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
7349 		addressSpec, protection, _virtualAddress);
7350 }
7351 
7352 
7353 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
7354 	we meddle with the \a lock parameter to force 32 bit.
7355 */
7356 extern "C" area_id
7357 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
7358 	size_t size, uint32 lock, uint32 protection)
7359 {
7360 	switch (lock) {
7361 		case B_NO_LOCK:
7362 			break;
7363 		case B_FULL_LOCK:
7364 		case B_LAZY_LOCK:
7365 			lock = B_32_BIT_FULL_LOCK;
7366 			break;
7367 		case B_CONTIGUOUS:
7368 			lock = B_32_BIT_CONTIGUOUS;
7369 			break;
7370 	}
7371 
7372 	return __create_area_haiku(name, _address, addressSpec, size, lock,
7373 		protection);
7374 }
7375 
7376 
7377 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
7378 	"BASE");
7379 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
7380 	"map_physical_memory@", "BASE");
7381 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
7382 	"BASE");
7383 
7384 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7385 	"get_memory_map@@", "1_ALPHA3");
7386 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7387 	"map_physical_memory@@", "1_ALPHA3");
7388 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7389 	"1_ALPHA3");
7390 
7391 
7392 #else
7393 
7394 
7395 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7396 	"get_memory_map@@", "BASE");
7397 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7398 	"map_physical_memory@@", "BASE");
7399 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7400 	"BASE");
7401 
7402 
7403 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7404