xref: /haiku/src/system/kernel/vm/vm.cpp (revision 6a2d53e7237764eab0c7b6d121772f26d636fb60)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleterDrivers.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <util/BitUtils.h>
51 #include <util/ThreadAutoLock.h>
52 #include <vm/vm_page.h>
53 #include <vm/vm_priv.h>
54 #include <vm/VMAddressSpace.h>
55 #include <vm/VMArea.h>
56 #include <vm/VMCache.h>
57 
58 #include "VMAddressSpaceLocking.h"
59 #include "VMAnonymousCache.h"
60 #include "VMAnonymousNoSwapCache.h"
61 #include "IORequest.h"
62 
63 
64 //#define TRACE_VM
65 //#define TRACE_FAULTS
66 #ifdef TRACE_VM
67 #	define TRACE(x) dprintf x
68 #else
69 #	define TRACE(x) ;
70 #endif
71 #ifdef TRACE_FAULTS
72 #	define FTRACE(x) dprintf x
73 #else
74 #	define FTRACE(x) ;
75 #endif
76 
77 
78 namespace {
79 
80 class AreaCacheLocking {
81 public:
82 	inline bool Lock(VMCache* lockable)
83 	{
84 		return false;
85 	}
86 
87 	inline void Unlock(VMCache* lockable)
88 	{
89 		vm_area_put_locked_cache(lockable);
90 	}
91 };
92 
93 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
94 public:
95 	inline AreaCacheLocker(VMCache* cache = NULL)
96 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
97 	{
98 	}
99 
100 	inline AreaCacheLocker(VMArea* area)
101 		: AutoLocker<VMCache, AreaCacheLocking>()
102 	{
103 		SetTo(area);
104 	}
105 
106 	inline void SetTo(VMCache* cache, bool alreadyLocked)
107 	{
108 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
109 	}
110 
111 	inline void SetTo(VMArea* area)
112 	{
113 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
114 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
115 	}
116 };
117 
118 
119 class VMCacheChainLocker {
120 public:
121 	VMCacheChainLocker()
122 		:
123 		fTopCache(NULL),
124 		fBottomCache(NULL)
125 	{
126 	}
127 
128 	VMCacheChainLocker(VMCache* topCache)
129 		:
130 		fTopCache(topCache),
131 		fBottomCache(topCache)
132 	{
133 	}
134 
135 	~VMCacheChainLocker()
136 	{
137 		Unlock();
138 	}
139 
140 	void SetTo(VMCache* topCache)
141 	{
142 		fTopCache = topCache;
143 		fBottomCache = topCache;
144 
145 		if (topCache != NULL)
146 			topCache->SetUserData(NULL);
147 	}
148 
149 	VMCache* LockSourceCache()
150 	{
151 		if (fBottomCache == NULL || fBottomCache->source == NULL)
152 			return NULL;
153 
154 		VMCache* previousCache = fBottomCache;
155 
156 		fBottomCache = fBottomCache->source;
157 		fBottomCache->Lock();
158 		fBottomCache->AcquireRefLocked();
159 		fBottomCache->SetUserData(previousCache);
160 
161 		return fBottomCache;
162 	}
163 
164 	void LockAllSourceCaches()
165 	{
166 		while (LockSourceCache() != NULL) {
167 		}
168 	}
169 
170 	void Unlock(VMCache* exceptCache = NULL)
171 	{
172 		if (fTopCache == NULL)
173 			return;
174 
175 		// Unlock caches in source -> consumer direction. This is important to
176 		// avoid double-locking and a reversal of locking order in case a cache
177 		// is eligable for merging.
178 		VMCache* cache = fBottomCache;
179 		while (cache != NULL) {
180 			VMCache* nextCache = (VMCache*)cache->UserData();
181 			if (cache != exceptCache)
182 				cache->ReleaseRefAndUnlock(cache != fTopCache);
183 
184 			if (cache == fTopCache)
185 				break;
186 
187 			cache = nextCache;
188 		}
189 
190 		fTopCache = NULL;
191 		fBottomCache = NULL;
192 	}
193 
194 	void UnlockKeepRefs(bool keepTopCacheLocked)
195 	{
196 		if (fTopCache == NULL)
197 			return;
198 
199 		VMCache* nextCache = fBottomCache;
200 		VMCache* cache = NULL;
201 
202 		while (keepTopCacheLocked
203 				? nextCache != fTopCache : cache != fTopCache) {
204 			cache = nextCache;
205 			nextCache = (VMCache*)cache->UserData();
206 			cache->Unlock(cache != fTopCache);
207 		}
208 	}
209 
210 	void RelockCaches(bool topCacheLocked)
211 	{
212 		if (fTopCache == NULL)
213 			return;
214 
215 		VMCache* nextCache = fTopCache;
216 		VMCache* cache = NULL;
217 		if (topCacheLocked) {
218 			cache = nextCache;
219 			nextCache = cache->source;
220 		}
221 
222 		while (cache != fBottomCache && nextCache != NULL) {
223 			VMCache* consumer = cache;
224 			cache = nextCache;
225 			nextCache = cache->source;
226 			cache->Lock();
227 			cache->SetUserData(consumer);
228 		}
229 	}
230 
231 private:
232 	VMCache*	fTopCache;
233 	VMCache*	fBottomCache;
234 };
235 
236 } // namespace
237 
238 
239 // The memory reserve an allocation of the certain priority must not touch.
240 static const size_t kMemoryReserveForPriority[] = {
241 	VM_MEMORY_RESERVE_USER,		// user
242 	VM_MEMORY_RESERVE_SYSTEM,	// system
243 	0							// VIP
244 };
245 
246 
247 static ObjectCache** sPageMappingsObjectCaches;
248 static uint32 sPageMappingsMask;
249 
250 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
251 
252 static off_t sAvailableMemory;
253 static off_t sNeededMemory;
254 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
255 static uint32 sPageFaults;
256 
257 static VMPhysicalPageMapper* sPhysicalPageMapper;
258 
259 #if DEBUG_CACHE_LIST
260 
261 struct cache_info {
262 	VMCache*	cache;
263 	addr_t		page_count;
264 	addr_t		committed;
265 };
266 
267 static const int kCacheInfoTableCount = 100 * 1024;
268 static cache_info* sCacheInfoTable;
269 
270 #endif	// DEBUG_CACHE_LIST
271 
272 
273 // function declarations
274 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
275 	bool addressSpaceCleanup);
276 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
277 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
278 static status_t map_backing_store(VMAddressSpace* addressSpace,
279 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
280 	int protection, int protectionMax, int mapping, uint32 flags,
281 	const virtual_address_restrictions* addressRestrictions, bool kernel,
282 	VMArea** _area, void** _virtualAddress);
283 static void fix_protection(uint32* protection);
284 
285 
286 //	#pragma mark -
287 
288 
289 #if VM_PAGE_FAULT_TRACING
290 
291 namespace VMPageFaultTracing {
292 
293 class PageFaultStart : public AbstractTraceEntry {
294 public:
295 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
296 		:
297 		fAddress(address),
298 		fPC(pc),
299 		fWrite(write),
300 		fUser(user)
301 	{
302 		Initialized();
303 	}
304 
305 	virtual void AddDump(TraceOutput& out)
306 	{
307 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
308 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
309 	}
310 
311 private:
312 	addr_t	fAddress;
313 	addr_t	fPC;
314 	bool	fWrite;
315 	bool	fUser;
316 };
317 
318 
319 // page fault errors
320 enum {
321 	PAGE_FAULT_ERROR_NO_AREA		= 0,
322 	PAGE_FAULT_ERROR_KERNEL_ONLY,
323 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
324 	PAGE_FAULT_ERROR_READ_PROTECTED,
325 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
326 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
327 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
328 };
329 
330 
331 class PageFaultError : public AbstractTraceEntry {
332 public:
333 	PageFaultError(area_id area, status_t error)
334 		:
335 		fArea(area),
336 		fError(error)
337 	{
338 		Initialized();
339 	}
340 
341 	virtual void AddDump(TraceOutput& out)
342 	{
343 		switch (fError) {
344 			case PAGE_FAULT_ERROR_NO_AREA:
345 				out.Print("page fault error: no area");
346 				break;
347 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
348 				out.Print("page fault error: area: %ld, kernel only", fArea);
349 				break;
350 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
351 				out.Print("page fault error: area: %ld, write protected",
352 					fArea);
353 				break;
354 			case PAGE_FAULT_ERROR_READ_PROTECTED:
355 				out.Print("page fault error: area: %ld, read protected", fArea);
356 				break;
357 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
358 				out.Print("page fault error: area: %ld, execute protected",
359 					fArea);
360 				break;
361 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
362 				out.Print("page fault error: kernel touching bad user memory");
363 				break;
364 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
365 				out.Print("page fault error: no address space");
366 				break;
367 			default:
368 				out.Print("page fault error: area: %ld, error: %s", fArea,
369 					strerror(fError));
370 				break;
371 		}
372 	}
373 
374 private:
375 	area_id		fArea;
376 	status_t	fError;
377 };
378 
379 
380 class PageFaultDone : public AbstractTraceEntry {
381 public:
382 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
383 			vm_page* page)
384 		:
385 		fArea(area),
386 		fTopCache(topCache),
387 		fCache(cache),
388 		fPage(page)
389 	{
390 		Initialized();
391 	}
392 
393 	virtual void AddDump(TraceOutput& out)
394 	{
395 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
396 			"page: %p", fArea, fTopCache, fCache, fPage);
397 	}
398 
399 private:
400 	area_id		fArea;
401 	VMCache*	fTopCache;
402 	VMCache*	fCache;
403 	vm_page*	fPage;
404 };
405 
406 }	// namespace VMPageFaultTracing
407 
408 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
409 #else
410 #	define TPF(x) ;
411 #endif	// VM_PAGE_FAULT_TRACING
412 
413 
414 //	#pragma mark - page mappings allocation
415 
416 
417 static void
418 create_page_mappings_object_caches()
419 {
420 	// We want an even power of 2 smaller than the number of CPUs.
421 	const int32 numCPUs = smp_get_num_cpus();
422 	int32 count = next_power_of_2(numCPUs);
423 	if (count > numCPUs)
424 		count >>= 1;
425 	sPageMappingsMask = count - 1;
426 
427 	sPageMappingsObjectCaches = new object_cache*[count];
428 	if (sPageMappingsObjectCaches == NULL)
429 		panic("failed to allocate page mappings object_cache array");
430 
431 	for (int32 i = 0; i < count; i++) {
432 		char name[32];
433 		snprintf(name, sizeof(name), "page mappings %" B_PRId32, i);
434 
435 		object_cache* cache = create_object_cache_etc(name,
436 			sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
437 			NULL, NULL);
438 		if (cache == NULL)
439 			panic("failed to create page mappings object_cache");
440 
441 		object_cache_set_minimum_reserve(cache, 1024);
442 		sPageMappingsObjectCaches[i] = cache;
443 	}
444 }
445 
446 
447 static object_cache*
448 page_mapping_object_cache_for(page_num_t page)
449 {
450 	return sPageMappingsObjectCaches[page & sPageMappingsMask];
451 }
452 
453 
454 static vm_page_mapping*
455 allocate_page_mapping(page_num_t page, uint32 flags = 0)
456 {
457 	return (vm_page_mapping*)object_cache_alloc(page_mapping_object_cache_for(page),
458 		flags);
459 }
460 
461 
462 void
463 vm_free_page_mapping(page_num_t page, vm_page_mapping* mapping, uint32 flags)
464 {
465 	object_cache_free(page_mapping_object_cache_for(page), mapping, flags);
466 }
467 
468 
469 //	#pragma mark -
470 
471 
472 /*!	The page's cache must be locked.
473 */
474 static inline void
475 increment_page_wired_count(vm_page* page)
476 {
477 	if (!page->IsMapped())
478 		atomic_add(&gMappedPagesCount, 1);
479 	page->IncrementWiredCount();
480 }
481 
482 
483 /*!	The page's cache must be locked.
484 */
485 static inline void
486 decrement_page_wired_count(vm_page* page)
487 {
488 	page->DecrementWiredCount();
489 	if (!page->IsMapped())
490 		atomic_add(&gMappedPagesCount, -1);
491 }
492 
493 
494 static inline addr_t
495 virtual_page_address(VMArea* area, vm_page* page)
496 {
497 	return area->Base()
498 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
499 }
500 
501 
502 static inline bool
503 is_page_in_area(VMArea* area, vm_page* page)
504 {
505 	off_t pageCacheOffsetBytes = (off_t)(page->cache_offset << PAGE_SHIFT);
506 	return pageCacheOffsetBytes >= area->cache_offset
507 		&& pageCacheOffsetBytes < area->cache_offset + (off_t)area->Size();
508 }
509 
510 
511 //! You need to have the address space locked when calling this function
512 static VMArea*
513 lookup_area(VMAddressSpace* addressSpace, area_id id)
514 {
515 	VMAreas::ReadLock();
516 
517 	VMArea* area = VMAreas::LookupLocked(id);
518 	if (area != NULL && area->address_space != addressSpace)
519 		area = NULL;
520 
521 	VMAreas::ReadUnlock();
522 
523 	return area;
524 }
525 
526 
527 static inline size_t
528 area_page_protections_size(size_t areaSize)
529 {
530 	// In the page protections we store only the three user protections,
531 	// so we use 4 bits per page.
532 	return (areaSize / B_PAGE_SIZE + 1) / 2;
533 }
534 
535 
536 static status_t
537 allocate_area_page_protections(VMArea* area)
538 {
539 	size_t bytes = area_page_protections_size(area->Size());
540 	area->page_protections = (uint8*)malloc_etc(bytes,
541 		area->address_space == VMAddressSpace::Kernel()
542 			? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
543 	if (area->page_protections == NULL)
544 		return B_NO_MEMORY;
545 
546 	// init the page protections for all pages to that of the area
547 	uint32 areaProtection = area->protection
548 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
549 	memset(area->page_protections, areaProtection | (areaProtection << 4),
550 		bytes);
551 	return B_OK;
552 }
553 
554 
555 static inline void
556 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
557 {
558 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
559 	addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
560 	uint8& entry = area->page_protections[pageIndex / 2];
561 	if (pageIndex % 2 == 0)
562 		entry = (entry & 0xf0) | protection;
563 	else
564 		entry = (entry & 0x0f) | (protection << 4);
565 }
566 
567 
568 static inline uint32
569 get_area_page_protection(VMArea* area, addr_t pageAddress)
570 {
571 	if (area->page_protections == NULL)
572 		return area->protection;
573 
574 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
575 	uint32 protection = area->page_protections[pageIndex / 2];
576 	if (pageIndex % 2 == 0)
577 		protection &= 0x0f;
578 	else
579 		protection >>= 4;
580 
581 	uint32 kernelProtection = 0;
582 	if ((protection & B_READ_AREA) != 0)
583 		kernelProtection |= B_KERNEL_READ_AREA;
584 	if ((protection & B_WRITE_AREA) != 0)
585 		kernelProtection |= B_KERNEL_WRITE_AREA;
586 
587 	// If this is a kernel area we return only the kernel flags.
588 	if (area->address_space == VMAddressSpace::Kernel())
589 		return kernelProtection;
590 
591 	return protection | kernelProtection;
592 }
593 
594 
595 static inline uint8*
596 realloc_page_protections(uint8* pageProtections, size_t areaSize,
597 	uint32 allocationFlags)
598 {
599 	size_t bytes = area_page_protections_size(areaSize);
600 	return (uint8*)realloc_etc(pageProtections, bytes, allocationFlags);
601 }
602 
603 
604 /*!	The caller must have reserved enough pages the translation map
605 	implementation might need to map this page.
606 	The page's cache must be locked.
607 */
608 static status_t
609 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
610 	vm_page_reservation* reservation)
611 {
612 	VMTranslationMap* map = area->address_space->TranslationMap();
613 
614 	bool wasMapped = page->IsMapped();
615 
616 	if (area->wiring == B_NO_LOCK) {
617 		DEBUG_PAGE_ACCESS_CHECK(page);
618 
619 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
620 		vm_page_mapping* mapping = allocate_page_mapping(page->physical_page_number,
621 			CACHE_DONT_WAIT_FOR_MEMORY
622 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
623 		if (mapping == NULL)
624 			return B_NO_MEMORY;
625 
626 		mapping->page = page;
627 		mapping->area = area;
628 
629 		map->Lock();
630 
631 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
632 			area->MemoryType(), reservation);
633 
634 		// insert mapping into lists
635 		if (!page->IsMapped())
636 			atomic_add(&gMappedPagesCount, 1);
637 
638 		page->mappings.Add(mapping);
639 		area->mappings.Add(mapping);
640 
641 		map->Unlock();
642 	} else {
643 		DEBUG_PAGE_ACCESS_CHECK(page);
644 
645 		map->Lock();
646 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
647 			area->MemoryType(), reservation);
648 		map->Unlock();
649 
650 		increment_page_wired_count(page);
651 	}
652 
653 	if (!wasMapped) {
654 		// The page is mapped now, so we must not remain in the cached queue.
655 		// It also makes sense to move it from the inactive to the active, since
656 		// otherwise the page daemon wouldn't come to keep track of it (in idle
657 		// mode) -- if the page isn't touched, it will be deactivated after a
658 		// full iteration through the queue at the latest.
659 		if (page->State() == PAGE_STATE_CACHED
660 				|| page->State() == PAGE_STATE_INACTIVE) {
661 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
662 		}
663 	}
664 
665 	return B_OK;
666 }
667 
668 
669 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
670 	page's cache.
671 */
672 static inline bool
673 unmap_page(VMArea* area, addr_t virtualAddress)
674 {
675 	return area->address_space->TranslationMap()->UnmapPage(area,
676 		virtualAddress, true);
677 }
678 
679 
680 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
681 	mapped pages' caches.
682 */
683 static inline void
684 unmap_pages(VMArea* area, addr_t base, size_t size)
685 {
686 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
687 }
688 
689 
690 static inline bool
691 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
692 {
693 	if (address < area->Base()) {
694 		offset = area->Base() - address;
695 		if (offset >= size)
696 			return false;
697 
698 		address = area->Base();
699 		size -= offset;
700 		offset = 0;
701 		if (size > area->Size())
702 			size = area->Size();
703 
704 		return true;
705 	}
706 
707 	offset = address - area->Base();
708 	if (offset >= area->Size())
709 		return false;
710 
711 	if (size >= area->Size() - offset)
712 		size = area->Size() - offset;
713 
714 	return true;
715 }
716 
717 
718 /*!	Cuts a piece out of an area. If the given cut range covers the complete
719 	area, it is deleted. If it covers the beginning or the end, the area is
720 	resized accordingly. If the range covers some part in the middle of the
721 	area, it is split in two; in this case the second area is returned via
722 	\a _secondArea (the variable is left untouched in the other cases).
723 	The address space must be write locked.
724 	The caller must ensure that no part of the given range is wired.
725 */
726 static status_t
727 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
728 	addr_t size, VMArea** _secondArea, bool kernel)
729 {
730 	addr_t offset;
731 	if (!intersect_area(area, address, size, offset))
732 		return B_OK;
733 
734 	// Is the area fully covered?
735 	if (address == area->Base() && size == area->Size()) {
736 		delete_area(addressSpace, area, false);
737 		return B_OK;
738 	}
739 
740 	int priority;
741 	uint32 allocationFlags;
742 	if (addressSpace == VMAddressSpace::Kernel()) {
743 		priority = VM_PRIORITY_SYSTEM;
744 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
745 			| HEAP_DONT_LOCK_KERNEL_SPACE;
746 	} else {
747 		priority = VM_PRIORITY_USER;
748 		allocationFlags = 0;
749 	}
750 
751 	VMCache* cache = vm_area_get_locked_cache(area);
752 	VMCacheChainLocker cacheChainLocker(cache);
753 	cacheChainLocker.LockAllSourceCaches();
754 
755 	// If no one else uses the area's cache and it's an anonymous cache, we can
756 	// resize or split it, too.
757 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
758 		&& cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM;
759 
760 	const addr_t oldSize = area->Size();
761 
762 	// Cut the end only?
763 	if (offset > 0 && size == area->Size() - offset) {
764 		status_t error = addressSpace->ShrinkAreaTail(area, offset,
765 			allocationFlags);
766 		if (error != B_OK)
767 			return error;
768 
769 		if (area->page_protections != NULL) {
770 			uint8* newProtections = realloc_page_protections(
771 				area->page_protections, area->Size(), allocationFlags);
772 
773 			if (newProtections == NULL) {
774 				addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
775 				return B_NO_MEMORY;
776 			}
777 
778 			area->page_protections = newProtections;
779 		}
780 
781 		// unmap pages
782 		unmap_pages(area, address, size);
783 
784 		if (onlyCacheUser) {
785 			// Since VMCache::Resize() can temporarily drop the lock, we must
786 			// unlock all lower caches to prevent locking order inversion.
787 			cacheChainLocker.Unlock(cache);
788 			cache->Resize(cache->virtual_base + offset, priority);
789 			cache->ReleaseRefAndUnlock();
790 		}
791 
792 		return B_OK;
793 	}
794 
795 	// Cut the beginning only?
796 	if (area->Base() == address) {
797 		uint8* newProtections = NULL;
798 		if (area->page_protections != NULL) {
799 			// Allocate all memory before shifting as the shift might lose some
800 			// bits.
801 			newProtections = realloc_page_protections(NULL, area->Size(),
802 				allocationFlags);
803 
804 			if (newProtections == NULL)
805 				return B_NO_MEMORY;
806 		}
807 
808 		// resize the area
809 		status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
810 			allocationFlags);
811 		if (error != B_OK) {
812 			if (newProtections != NULL)
813 				free_etc(newProtections, allocationFlags);
814 			return error;
815 		}
816 
817 		if (area->page_protections != NULL) {
818 			size_t oldBytes = area_page_protections_size(oldSize);
819 			ssize_t pagesShifted = (oldSize - area->Size()) / B_PAGE_SIZE;
820 			bitmap_shift<uint8>(area->page_protections, oldBytes * 8, -(pagesShifted * 4));
821 
822 			size_t bytes = area_page_protections_size(area->Size());
823 			memcpy(newProtections, area->page_protections, bytes);
824 			free_etc(area->page_protections, allocationFlags);
825 			area->page_protections = newProtections;
826 		}
827 
828 		// unmap pages
829 		unmap_pages(area, address, size);
830 
831 		if (onlyCacheUser) {
832 			// Since VMCache::Rebase() can temporarily drop the lock, we must
833 			// unlock all lower caches to prevent locking order inversion.
834 			cacheChainLocker.Unlock(cache);
835 			cache->Rebase(cache->virtual_base + size, priority);
836 			cache->ReleaseRefAndUnlock();
837 		}
838 		area->cache_offset += size;
839 
840 		return B_OK;
841 	}
842 
843 	// The tough part -- cut a piece out of the middle of the area.
844 	// We do that by shrinking the area to the begin section and creating a
845 	// new area for the end section.
846 	addr_t firstNewSize = offset;
847 	addr_t secondBase = address + size;
848 	addr_t secondSize = area->Size() - offset - size;
849 
850 	// unmap pages
851 	unmap_pages(area, address, area->Size() - firstNewSize);
852 
853 	// resize the area
854 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
855 		allocationFlags);
856 	if (error != B_OK)
857 		return error;
858 
859 	uint8* areaNewProtections = NULL;
860 	uint8* secondAreaNewProtections = NULL;
861 
862 	// Try to allocate the new memory before making some hard to reverse
863 	// changes.
864 	if (area->page_protections != NULL) {
865 		areaNewProtections = realloc_page_protections(NULL, area->Size(),
866 			allocationFlags);
867 		secondAreaNewProtections = realloc_page_protections(NULL, secondSize,
868 			allocationFlags);
869 
870 		if (areaNewProtections == NULL || secondAreaNewProtections == NULL) {
871 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
872 			free_etc(areaNewProtections, allocationFlags);
873 			free_etc(secondAreaNewProtections, allocationFlags);
874 			return B_NO_MEMORY;
875 		}
876 	}
877 
878 	virtual_address_restrictions addressRestrictions = {};
879 	addressRestrictions.address = (void*)secondBase;
880 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
881 	VMArea* secondArea;
882 
883 	if (onlyCacheUser) {
884 		// Create a new cache for the second area.
885 		VMCache* secondCache;
886 		error = VMCacheFactory::CreateAnonymousCache(secondCache,
887 			area->protection & B_OVERCOMMITTING_AREA, 0, 0,
888 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
889 		if (error != B_OK) {
890 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
891 			free_etc(areaNewProtections, allocationFlags);
892 			free_etc(secondAreaNewProtections, allocationFlags);
893 			return error;
894 		}
895 
896 		secondCache->Lock();
897 		secondCache->temporary = cache->temporary;
898 		secondCache->virtual_base = area->cache_offset;
899 		secondCache->virtual_end = area->cache_offset + secondSize;
900 
901 		// Transfer the concerned pages from the first cache.
902 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
903 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
904 			area->cache_offset);
905 
906 		if (error == B_OK) {
907 			// Since VMCache::Resize() can temporarily drop the lock, we must
908 			// unlock all lower caches to prevent locking order inversion.
909 			cacheChainLocker.Unlock(cache);
910 			cache->Resize(cache->virtual_base + firstNewSize, priority);
911 			// Don't unlock the cache yet because we might have to resize it
912 			// back.
913 
914 			// Map the second area.
915 			error = map_backing_store(addressSpace, secondCache,
916 				area->cache_offset, area->name, secondSize, area->wiring,
917 				area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0,
918 				&addressRestrictions, kernel, &secondArea, NULL);
919 		}
920 
921 		if (error != B_OK) {
922 			// Restore the original cache.
923 			cache->Resize(cache->virtual_base + oldSize, priority);
924 
925 			// Move the pages back.
926 			status_t readoptStatus = cache->Adopt(secondCache,
927 				area->cache_offset, secondSize, adoptOffset);
928 			if (readoptStatus != B_OK) {
929 				// Some (swap) pages have not been moved back and will be lost
930 				// once the second cache is deleted.
931 				panic("failed to restore cache range: %s",
932 					strerror(readoptStatus));
933 
934 				// TODO: Handle out of memory cases by freeing memory and
935 				// retrying.
936 			}
937 
938 			cache->ReleaseRefAndUnlock();
939 			secondCache->ReleaseRefAndUnlock();
940 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
941 			free_etc(areaNewProtections, allocationFlags);
942 			free_etc(secondAreaNewProtections, allocationFlags);
943 			return error;
944 		}
945 
946 		// Now we can unlock it.
947 		cache->ReleaseRefAndUnlock();
948 		secondCache->Unlock();
949 	} else {
950 		error = map_backing_store(addressSpace, cache, area->cache_offset
951 			+ (secondBase - area->Base()),
952 			area->name, secondSize, area->wiring, area->protection,
953 			area->protection_max, REGION_NO_PRIVATE_MAP, 0,
954 			&addressRestrictions, kernel, &secondArea, NULL);
955 		if (error != B_OK) {
956 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
957 			free_etc(areaNewProtections, allocationFlags);
958 			free_etc(secondAreaNewProtections, allocationFlags);
959 			return error;
960 		}
961 		// We need a cache reference for the new area.
962 		cache->AcquireRefLocked();
963 	}
964 
965 	if (area->page_protections != NULL) {
966 		// Copy the protection bits of the first area.
967 		size_t areaBytes = area_page_protections_size(area->Size());
968 		memcpy(areaNewProtections, area->page_protections, areaBytes);
969 		uint8* areaOldProtections = area->page_protections;
970 		area->page_protections = areaNewProtections;
971 
972 		// Shift the protection bits of the second area to the start of
973 		// the old array.
974 		size_t oldBytes = area_page_protections_size(oldSize);
975 		addr_t secondAreaOffset = secondBase - area->Base();
976 		ssize_t secondAreaPagesShifted = secondAreaOffset / B_PAGE_SIZE;
977 		bitmap_shift<uint8>(areaOldProtections, oldBytes * 8, -(secondAreaPagesShifted * 4));
978 
979 		// Copy the protection bits of the second area.
980 		size_t secondAreaBytes = area_page_protections_size(secondSize);
981 		memcpy(secondAreaNewProtections, areaOldProtections, secondAreaBytes);
982 		secondArea->page_protections = secondAreaNewProtections;
983 
984 		// We don't need this anymore.
985 		free_etc(areaOldProtections, allocationFlags);
986 
987 		// Set the correct page protections for the second area.
988 		VMTranslationMap* map = addressSpace->TranslationMap();
989 		map->Lock();
990 		for (VMCachePagesTree::Iterator it
991 				= secondArea->cache->pages.GetIterator();
992 				vm_page* page = it.Next();) {
993 			if (is_page_in_area(secondArea, page)) {
994 				addr_t address = virtual_page_address(secondArea, page);
995 				uint32 pageProtection
996 					= get_area_page_protection(secondArea, address);
997 				map->ProtectPage(secondArea, address, pageProtection);
998 			}
999 		}
1000 		map->Unlock();
1001 	}
1002 
1003 	if (_secondArea != NULL)
1004 		*_secondArea = secondArea;
1005 
1006 	return B_OK;
1007 }
1008 
1009 
1010 /*!	Deletes or cuts all areas in the given address range.
1011 	The address space must be write-locked.
1012 	The caller must ensure that no part of the given range is wired.
1013 */
1014 static status_t
1015 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
1016 	bool kernel)
1017 {
1018 	size = PAGE_ALIGN(size);
1019 
1020 	// Check, whether the caller is allowed to modify the concerned areas.
1021 	if (!kernel) {
1022 		for (VMAddressSpace::AreaRangeIterator it
1023 				= addressSpace->GetAreaRangeIterator(address, size);
1024 			VMArea* area = it.Next();) {
1025 
1026 			if ((area->protection & B_KERNEL_AREA) != 0) {
1027 				dprintf("unmap_address_range: team %" B_PRId32 " tried to "
1028 					"unmap range of kernel area %" B_PRId32 " (%s)\n",
1029 					team_get_current_team_id(), area->id, area->name);
1030 				return B_NOT_ALLOWED;
1031 			}
1032 		}
1033 	}
1034 
1035 	for (VMAddressSpace::AreaRangeIterator it
1036 			= addressSpace->GetAreaRangeIterator(address, size);
1037 		VMArea* area = it.Next();) {
1038 
1039 		status_t error = cut_area(addressSpace, area, address, size, NULL,
1040 			kernel);
1041 		if (error != B_OK)
1042 			return error;
1043 			// Failing after already messing with areas is ugly, but we
1044 			// can't do anything about it.
1045 	}
1046 
1047 	return B_OK;
1048 }
1049 
1050 
1051 static status_t
1052 discard_area_range(VMArea* area, addr_t address, addr_t size)
1053 {
1054 	addr_t offset;
1055 	if (!intersect_area(area, address, size, offset))
1056 		return B_OK;
1057 
1058 	// If someone else uses the area's cache or it's not an anonymous cache, we
1059 	// can't discard.
1060 	VMCache* cache = vm_area_get_locked_cache(area);
1061 	if (cache->areas != area || area->cache_next != NULL
1062 		|| !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) {
1063 		return B_OK;
1064 	}
1065 
1066 	VMCacheChainLocker cacheChainLocker(cache);
1067 	cacheChainLocker.LockAllSourceCaches();
1068 
1069 	unmap_pages(area, address, size);
1070 
1071 	// Since VMCache::Discard() can temporarily drop the lock, we must
1072 	// unlock all lower caches to prevent locking order inversion.
1073 	cacheChainLocker.Unlock(cache);
1074 	cache->Discard(cache->virtual_base + offset, size);
1075 	cache->ReleaseRefAndUnlock();
1076 
1077 	return B_OK;
1078 }
1079 
1080 
1081 static status_t
1082 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
1083 	bool kernel)
1084 {
1085 	for (VMAddressSpace::AreaRangeIterator it
1086 		= addressSpace->GetAreaRangeIterator(address, size);
1087 			VMArea* area = it.Next();) {
1088 		status_t error = discard_area_range(area, address, size);
1089 		if (error != B_OK)
1090 			return error;
1091 	}
1092 
1093 	return B_OK;
1094 }
1095 
1096 
1097 /*! You need to hold the lock of the cache and the write lock of the address
1098 	space when calling this function.
1099 	Note, that in case of error your cache will be temporarily unlocked.
1100 	If \a addressSpec is \c B_EXACT_ADDRESS and the
1101 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
1102 	that no part of the specified address range (base \c *_virtualAddress, size
1103 	\a size) is wired. The cache will also be temporarily unlocked.
1104 */
1105 static status_t
1106 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
1107 	const char* areaName, addr_t size, int wiring, int protection,
1108 	int protectionMax, int mapping,
1109 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
1110 	bool kernel, VMArea** _area, void** _virtualAddress)
1111 {
1112 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
1113 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
1114 		", protection %d, protectionMax %d, area %p, areaName '%s'\n",
1115 		addressSpace, cache, addressRestrictions->address, offset, size,
1116 		addressRestrictions->address_specification, wiring, protection,
1117 		protectionMax, _area, areaName));
1118 	cache->AssertLocked();
1119 
1120 	if (size == 0) {
1121 #if KDEBUG
1122 		panic("map_backing_store(): called with size=0 for area '%s'!",
1123 			areaName);
1124 #endif
1125 		return B_BAD_VALUE;
1126 	}
1127 	if (offset < 0)
1128 		return B_BAD_VALUE;
1129 
1130 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
1131 		| HEAP_DONT_LOCK_KERNEL_SPACE;
1132 	int priority;
1133 	if (addressSpace != VMAddressSpace::Kernel()) {
1134 		priority = VM_PRIORITY_USER;
1135 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
1136 		priority = VM_PRIORITY_VIP;
1137 		allocationFlags |= HEAP_PRIORITY_VIP;
1138 	} else
1139 		priority = VM_PRIORITY_SYSTEM;
1140 
1141 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
1142 		allocationFlags);
1143 	if (mapping != REGION_PRIVATE_MAP)
1144 		area->protection_max = protectionMax & B_USER_PROTECTION;
1145 	if (area == NULL)
1146 		return B_NO_MEMORY;
1147 
1148 	status_t status;
1149 
1150 	// if this is a private map, we need to create a new cache
1151 	// to handle the private copies of pages as they are written to
1152 	VMCache* sourceCache = cache;
1153 	if (mapping == REGION_PRIVATE_MAP) {
1154 		VMCache* newCache;
1155 
1156 		// create an anonymous cache
1157 		status = VMCacheFactory::CreateAnonymousCache(newCache,
1158 			(protection & B_STACK_AREA) != 0
1159 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
1160 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
1161 		if (status != B_OK)
1162 			goto err1;
1163 
1164 		newCache->Lock();
1165 		newCache->temporary = 1;
1166 		newCache->virtual_base = offset;
1167 		newCache->virtual_end = offset + size;
1168 
1169 		cache->AddConsumer(newCache);
1170 
1171 		cache = newCache;
1172 	}
1173 
1174 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
1175 		status = cache->SetMinimalCommitment(size, priority);
1176 		if (status != B_OK)
1177 			goto err2;
1178 	}
1179 
1180 	// check to see if this address space has entered DELETE state
1181 	if (addressSpace->IsBeingDeleted()) {
1182 		// okay, someone is trying to delete this address space now, so we can't
1183 		// insert the area, so back out
1184 		status = B_BAD_TEAM_ID;
1185 		goto err2;
1186 	}
1187 
1188 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
1189 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
1190 		// temporarily unlock the current cache since it might be mapped to
1191 		// some existing area, and unmap_address_range also needs to lock that
1192 		// cache to delete the area.
1193 		cache->Unlock();
1194 		status = unmap_address_range(addressSpace,
1195 			(addr_t)addressRestrictions->address, size, kernel);
1196 		cache->Lock();
1197 		if (status != B_OK)
1198 			goto err2;
1199 	}
1200 
1201 	status = addressSpace->InsertArea(area, size, addressRestrictions,
1202 		allocationFlags, _virtualAddress);
1203 	if (status == B_NO_MEMORY
1204 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
1205 		// Due to how many locks are held, we cannot wait here for space to be
1206 		// freed up, but we can at least notify the low_resource handler.
1207 		low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0);
1208 	}
1209 	if (status != B_OK)
1210 		goto err2;
1211 
1212 	// attach the cache to the area
1213 	area->cache = cache;
1214 	area->cache_offset = offset;
1215 
1216 	// point the cache back to the area
1217 	cache->InsertAreaLocked(area);
1218 	if (mapping == REGION_PRIVATE_MAP)
1219 		cache->Unlock();
1220 
1221 	// insert the area in the global areas map
1222 	status = VMAreas::Insert(area);
1223 	if (status != B_OK)
1224 		goto err3;
1225 
1226 	// grab a ref to the address space (the area holds this)
1227 	addressSpace->Get();
1228 
1229 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1230 //		cache, sourceCache, areaName, area);
1231 
1232 	*_area = area;
1233 	return B_OK;
1234 
1235 err3:
1236 	cache->Lock();
1237 	cache->RemoveArea(area);
1238 	area->cache = NULL;
1239 err2:
1240 	if (mapping == REGION_PRIVATE_MAP) {
1241 		// We created this cache, so we must delete it again. Note, that we
1242 		// need to temporarily unlock the source cache or we'll otherwise
1243 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1244 		sourceCache->Unlock();
1245 		cache->ReleaseRefAndUnlock();
1246 		sourceCache->Lock();
1247 	}
1248 err1:
1249 	addressSpace->DeleteArea(area, allocationFlags);
1250 	return status;
1251 }
1252 
1253 
1254 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1255 	  locker1, locker2).
1256 */
1257 template<typename LockerType1, typename LockerType2>
1258 static inline bool
1259 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1260 {
1261 	area->cache->AssertLocked();
1262 
1263 	VMAreaUnwiredWaiter waiter;
1264 	if (!area->AddWaiterIfWired(&waiter))
1265 		return false;
1266 
1267 	// unlock everything and wait
1268 	if (locker1 != NULL)
1269 		locker1->Unlock();
1270 	if (locker2 != NULL)
1271 		locker2->Unlock();
1272 
1273 	waiter.waitEntry.Wait();
1274 
1275 	return true;
1276 }
1277 
1278 
1279 /*!	Checks whether the given area has any wired ranges intersecting with the
1280 	specified range and waits, if so.
1281 
1282 	When it has to wait, the function calls \c Unlock() on both \a locker1
1283 	and \a locker2, if given.
1284 	The area's top cache must be locked and must be unlocked as a side effect
1285 	of calling \c Unlock() on either \a locker1 or \a locker2.
1286 
1287 	If the function does not have to wait it does not modify or unlock any
1288 	object.
1289 
1290 	\param area The area to be checked.
1291 	\param base The base address of the range to check.
1292 	\param size The size of the address range to check.
1293 	\param locker1 An object to be unlocked when before starting to wait (may
1294 		be \c NULL).
1295 	\param locker2 An object to be unlocked when before starting to wait (may
1296 		be \c NULL).
1297 	\return \c true, if the function had to wait, \c false otherwise.
1298 */
1299 template<typename LockerType1, typename LockerType2>
1300 static inline bool
1301 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1302 	LockerType1* locker1, LockerType2* locker2)
1303 {
1304 	area->cache->AssertLocked();
1305 
1306 	VMAreaUnwiredWaiter waiter;
1307 	if (!area->AddWaiterIfWired(&waiter, base, size))
1308 		return false;
1309 
1310 	// unlock everything and wait
1311 	if (locker1 != NULL)
1312 		locker1->Unlock();
1313 	if (locker2 != NULL)
1314 		locker2->Unlock();
1315 
1316 	waiter.waitEntry.Wait();
1317 
1318 	return true;
1319 }
1320 
1321 
1322 /*!	Checks whether the given address space has any wired ranges intersecting
1323 	with the specified range and waits, if so.
1324 
1325 	Similar to wait_if_area_range_is_wired(), with the following differences:
1326 	- All areas intersecting with the range are checked (respectively all until
1327 	  one is found that contains a wired range intersecting with the given
1328 	  range).
1329 	- The given address space must at least be read-locked and must be unlocked
1330 	  when \c Unlock() is called on \a locker.
1331 	- None of the areas' caches are allowed to be locked.
1332 */
1333 template<typename LockerType>
1334 static inline bool
1335 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1336 	size_t size, LockerType* locker)
1337 {
1338 	for (VMAddressSpace::AreaRangeIterator it
1339 		= addressSpace->GetAreaRangeIterator(base, size);
1340 			VMArea* area = it.Next();) {
1341 
1342 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1343 
1344 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1345 			return true;
1346 	}
1347 
1348 	return false;
1349 }
1350 
1351 
1352 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1353 	It must be called in a situation where the kernel address space may be
1354 	locked.
1355 */
1356 status_t
1357 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1358 {
1359 	AddressSpaceReadLocker locker;
1360 	VMArea* area;
1361 	status_t status = locker.SetFromArea(id, area);
1362 	if (status != B_OK)
1363 		return status;
1364 
1365 	if (area->page_protections == NULL) {
1366 		status = allocate_area_page_protections(area);
1367 		if (status != B_OK)
1368 			return status;
1369 	}
1370 
1371 	*cookie = (void*)area;
1372 	return B_OK;
1373 }
1374 
1375 
1376 /*!	This is a debug helper function that can only be used with very specific
1377 	use cases.
1378 	Sets protection for the given address range to the protection specified.
1379 	If \a protection is 0 then the involved pages will be marked non-present
1380 	in the translation map to cause a fault on access. The pages aren't
1381 	actually unmapped however so that they can be marked present again with
1382 	additional calls to this function. For this to work the area must be
1383 	fully locked in memory so that the pages aren't otherwise touched.
1384 	This function does not lock the kernel address space and needs to be
1385 	supplied with a \a cookie retrieved from a successful call to
1386 	vm_prepare_kernel_area_debug_protection().
1387 */
1388 status_t
1389 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1390 	uint32 protection)
1391 {
1392 	// check address range
1393 	addr_t address = (addr_t)_address;
1394 	size = PAGE_ALIGN(size);
1395 
1396 	if ((address % B_PAGE_SIZE) != 0
1397 		|| (addr_t)address + size < (addr_t)address
1398 		|| !IS_KERNEL_ADDRESS(address)
1399 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1400 		return B_BAD_VALUE;
1401 	}
1402 
1403 	// Translate the kernel protection to user protection as we only store that.
1404 	if ((protection & B_KERNEL_READ_AREA) != 0)
1405 		protection |= B_READ_AREA;
1406 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1407 		protection |= B_WRITE_AREA;
1408 
1409 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1410 	VMTranslationMap* map = addressSpace->TranslationMap();
1411 	VMArea* area = (VMArea*)cookie;
1412 
1413 	addr_t offset = address - area->Base();
1414 	if (area->Size() - offset < size) {
1415 		panic("protect range not fully within supplied area");
1416 		return B_BAD_VALUE;
1417 	}
1418 
1419 	if (area->page_protections == NULL) {
1420 		panic("area has no page protections");
1421 		return B_BAD_VALUE;
1422 	}
1423 
1424 	// Invalidate the mapping entries so any access to them will fault or
1425 	// restore the mapping entries unchanged so that lookup will success again.
1426 	map->Lock();
1427 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1428 	map->Unlock();
1429 
1430 	// And set the proper page protections so that the fault case will actually
1431 	// fail and not simply try to map a new page.
1432 	for (addr_t pageAddress = address; pageAddress < address + size;
1433 			pageAddress += B_PAGE_SIZE) {
1434 		set_area_page_protection(area, pageAddress, protection);
1435 	}
1436 
1437 	return B_OK;
1438 }
1439 
1440 
1441 status_t
1442 vm_block_address_range(const char* name, void* address, addr_t size)
1443 {
1444 	if (!arch_vm_supports_protection(0))
1445 		return B_NOT_SUPPORTED;
1446 
1447 	AddressSpaceWriteLocker locker;
1448 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1449 	if (status != B_OK)
1450 		return status;
1451 
1452 	VMAddressSpace* addressSpace = locker.AddressSpace();
1453 
1454 	// create an anonymous cache
1455 	VMCache* cache;
1456 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1457 		VM_PRIORITY_SYSTEM);
1458 	if (status != B_OK)
1459 		return status;
1460 
1461 	cache->temporary = 1;
1462 	cache->virtual_end = size;
1463 	cache->Lock();
1464 
1465 	VMArea* area;
1466 	virtual_address_restrictions addressRestrictions = {};
1467 	addressRestrictions.address = address;
1468 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1469 	status = map_backing_store(addressSpace, cache, 0, name, size,
1470 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions,
1471 		true, &area, NULL);
1472 	if (status != B_OK) {
1473 		cache->ReleaseRefAndUnlock();
1474 		return status;
1475 	}
1476 
1477 	cache->Unlock();
1478 	area->cache_type = CACHE_TYPE_RAM;
1479 	return area->id;
1480 }
1481 
1482 
1483 status_t
1484 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1485 {
1486 	AddressSpaceWriteLocker locker(team);
1487 	if (!locker.IsLocked())
1488 		return B_BAD_TEAM_ID;
1489 
1490 	VMAddressSpace* addressSpace = locker.AddressSpace();
1491 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1492 		addressSpace == VMAddressSpace::Kernel()
1493 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1494 }
1495 
1496 
1497 status_t
1498 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1499 	addr_t size, uint32 flags)
1500 {
1501 	if (size == 0)
1502 		return B_BAD_VALUE;
1503 
1504 	AddressSpaceWriteLocker locker(team);
1505 	if (!locker.IsLocked())
1506 		return B_BAD_TEAM_ID;
1507 
1508 	virtual_address_restrictions addressRestrictions = {};
1509 	addressRestrictions.address = *_address;
1510 	addressRestrictions.address_specification = addressSpec;
1511 	VMAddressSpace* addressSpace = locker.AddressSpace();
1512 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1513 		addressSpace == VMAddressSpace::Kernel()
1514 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1515 		_address);
1516 }
1517 
1518 
1519 area_id
1520 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1521 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1522 	const virtual_address_restrictions* virtualAddressRestrictions,
1523 	const physical_address_restrictions* physicalAddressRestrictions,
1524 	bool kernel, void** _address)
1525 {
1526 	VMArea* area;
1527 	VMCache* cache;
1528 	vm_page* page = NULL;
1529 	bool isStack = (protection & B_STACK_AREA) != 0;
1530 	page_num_t guardPages;
1531 	bool canOvercommit = false;
1532 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1533 		? VM_PAGE_ALLOC_CLEAR : 0;
1534 
1535 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1536 		team, name, size));
1537 
1538 	size = PAGE_ALIGN(size);
1539 	guardSize = PAGE_ALIGN(guardSize);
1540 	guardPages = guardSize / B_PAGE_SIZE;
1541 
1542 	if (size == 0 || size < guardSize)
1543 		return B_BAD_VALUE;
1544 	if (!arch_vm_supports_protection(protection))
1545 		return B_NOT_SUPPORTED;
1546 
1547 	if (team == B_CURRENT_TEAM)
1548 		team = VMAddressSpace::CurrentID();
1549 	if (team < 0)
1550 		return B_BAD_TEAM_ID;
1551 
1552 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1553 		canOvercommit = true;
1554 
1555 #ifdef DEBUG_KERNEL_STACKS
1556 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1557 		isStack = true;
1558 #endif
1559 
1560 	// check parameters
1561 	switch (virtualAddressRestrictions->address_specification) {
1562 		case B_ANY_ADDRESS:
1563 		case B_EXACT_ADDRESS:
1564 		case B_BASE_ADDRESS:
1565 		case B_ANY_KERNEL_ADDRESS:
1566 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1567 		case B_RANDOMIZED_ANY_ADDRESS:
1568 		case B_RANDOMIZED_BASE_ADDRESS:
1569 			break;
1570 
1571 		default:
1572 			return B_BAD_VALUE;
1573 	}
1574 
1575 	// If low or high physical address restrictions are given, we force
1576 	// B_CONTIGUOUS wiring, since only then we'll use
1577 	// vm_page_allocate_page_run() which deals with those restrictions.
1578 	if (physicalAddressRestrictions->low_address != 0
1579 		|| physicalAddressRestrictions->high_address != 0) {
1580 		wiring = B_CONTIGUOUS;
1581 	}
1582 
1583 	physical_address_restrictions stackPhysicalRestrictions;
1584 	bool doReserveMemory = false;
1585 	switch (wiring) {
1586 		case B_NO_LOCK:
1587 			break;
1588 		case B_FULL_LOCK:
1589 		case B_LAZY_LOCK:
1590 		case B_CONTIGUOUS:
1591 			doReserveMemory = true;
1592 			break;
1593 		case B_ALREADY_WIRED:
1594 			break;
1595 		case B_LOMEM:
1596 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1597 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1598 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1599 			wiring = B_CONTIGUOUS;
1600 			doReserveMemory = true;
1601 			break;
1602 		case B_32_BIT_FULL_LOCK:
1603 			if (B_HAIKU_PHYSICAL_BITS <= 32
1604 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1605 				wiring = B_FULL_LOCK;
1606 				doReserveMemory = true;
1607 				break;
1608 			}
1609 			// TODO: We don't really support this mode efficiently. Just fall
1610 			// through for now ...
1611 		case B_32_BIT_CONTIGUOUS:
1612 			#if B_HAIKU_PHYSICAL_BITS > 32
1613 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1614 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1615 					stackPhysicalRestrictions.high_address
1616 						= (phys_addr_t)1 << 32;
1617 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1618 				}
1619 			#endif
1620 			wiring = B_CONTIGUOUS;
1621 			doReserveMemory = true;
1622 			break;
1623 		default:
1624 			return B_BAD_VALUE;
1625 	}
1626 
1627 	// Optimization: For a single-page contiguous allocation without low/high
1628 	// memory restriction B_FULL_LOCK wiring suffices.
1629 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1630 		&& physicalAddressRestrictions->low_address == 0
1631 		&& physicalAddressRestrictions->high_address == 0) {
1632 		wiring = B_FULL_LOCK;
1633 	}
1634 
1635 	// For full lock or contiguous areas we're also going to map the pages and
1636 	// thus need to reserve pages for the mapping backend upfront.
1637 	addr_t reservedMapPages = 0;
1638 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1639 		AddressSpaceWriteLocker locker;
1640 		status_t status = locker.SetTo(team);
1641 		if (status != B_OK)
1642 			return status;
1643 
1644 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1645 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1646 	}
1647 
1648 	int priority;
1649 	if (team != VMAddressSpace::KernelID())
1650 		priority = VM_PRIORITY_USER;
1651 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1652 		priority = VM_PRIORITY_VIP;
1653 	else
1654 		priority = VM_PRIORITY_SYSTEM;
1655 
1656 	// Reserve memory before acquiring the address space lock. This reduces the
1657 	// chances of failure, since while holding the write lock to the address
1658 	// space (if it is the kernel address space that is), the low memory handler
1659 	// won't be able to free anything for us.
1660 	addr_t reservedMemory = 0;
1661 	if (doReserveMemory) {
1662 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1663 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1664 			return B_NO_MEMORY;
1665 		reservedMemory = size;
1666 		// TODO: We don't reserve the memory for the pages for the page
1667 		// directories/tables. We actually need to do since we currently don't
1668 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1669 		// there are actually less physical pages than there should be, which
1670 		// can get the VM into trouble in low memory situations.
1671 	}
1672 
1673 	AddressSpaceWriteLocker locker;
1674 	VMAddressSpace* addressSpace;
1675 	status_t status;
1676 
1677 	// For full lock areas reserve the pages before locking the address
1678 	// space. E.g. block caches can't release their memory while we hold the
1679 	// address space lock.
1680 	page_num_t reservedPages = reservedMapPages;
1681 	if (wiring == B_FULL_LOCK)
1682 		reservedPages += size / B_PAGE_SIZE;
1683 
1684 	vm_page_reservation reservation;
1685 	if (reservedPages > 0) {
1686 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1687 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1688 					priority)) {
1689 				reservedPages = 0;
1690 				status = B_WOULD_BLOCK;
1691 				goto err0;
1692 			}
1693 		} else
1694 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1695 	}
1696 
1697 	if (wiring == B_CONTIGUOUS) {
1698 		// we try to allocate the page run here upfront as this may easily
1699 		// fail for obvious reasons
1700 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1701 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1702 		if (page == NULL) {
1703 			status = B_NO_MEMORY;
1704 			goto err0;
1705 		}
1706 	}
1707 
1708 	// Lock the address space and, if B_EXACT_ADDRESS and
1709 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1710 	// is not wired.
1711 	do {
1712 		status = locker.SetTo(team);
1713 		if (status != B_OK)
1714 			goto err1;
1715 
1716 		addressSpace = locker.AddressSpace();
1717 	} while (virtualAddressRestrictions->address_specification
1718 			== B_EXACT_ADDRESS
1719 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1720 		&& wait_if_address_range_is_wired(addressSpace,
1721 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1722 
1723 	// create an anonymous cache
1724 	// if it's a stack, make sure that two pages are available at least
1725 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1726 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1727 		wiring == B_NO_LOCK, priority);
1728 	if (status != B_OK)
1729 		goto err1;
1730 
1731 	cache->temporary = 1;
1732 	cache->virtual_end = size;
1733 	cache->committed_size = reservedMemory;
1734 		// TODO: This should be done via a method.
1735 	reservedMemory = 0;
1736 
1737 	cache->Lock();
1738 
1739 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1740 		protection, 0, REGION_NO_PRIVATE_MAP, flags,
1741 		virtualAddressRestrictions, kernel, &area, _address);
1742 
1743 	if (status != B_OK) {
1744 		cache->ReleaseRefAndUnlock();
1745 		goto err1;
1746 	}
1747 
1748 	locker.DegradeToReadLock();
1749 
1750 	switch (wiring) {
1751 		case B_NO_LOCK:
1752 		case B_LAZY_LOCK:
1753 			// do nothing - the pages are mapped in as needed
1754 			break;
1755 
1756 		case B_FULL_LOCK:
1757 		{
1758 			// Allocate and map all pages for this area
1759 
1760 			off_t offset = 0;
1761 			for (addr_t address = area->Base();
1762 					address < area->Base() + (area->Size() - 1);
1763 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1764 #ifdef DEBUG_KERNEL_STACKS
1765 #	ifdef STACK_GROWS_DOWNWARDS
1766 				if (isStack && address < area->Base()
1767 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1768 #	else
1769 				if (isStack && address >= area->Base() + area->Size()
1770 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1771 #	endif
1772 					continue;
1773 #endif
1774 				vm_page* page = vm_page_allocate_page(&reservation,
1775 					PAGE_STATE_WIRED | pageAllocFlags);
1776 				cache->InsertPage(page, offset);
1777 				map_page(area, page, address, protection, &reservation);
1778 
1779 				DEBUG_PAGE_ACCESS_END(page);
1780 			}
1781 
1782 			break;
1783 		}
1784 
1785 		case B_ALREADY_WIRED:
1786 		{
1787 			// The pages should already be mapped. This is only really useful
1788 			// during boot time. Find the appropriate vm_page objects and stick
1789 			// them in the cache object.
1790 			VMTranslationMap* map = addressSpace->TranslationMap();
1791 			off_t offset = 0;
1792 
1793 			if (!gKernelStartup)
1794 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1795 
1796 			map->Lock();
1797 
1798 			for (addr_t virtualAddress = area->Base();
1799 					virtualAddress < area->Base() + (area->Size() - 1);
1800 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1801 				phys_addr_t physicalAddress;
1802 				uint32 flags;
1803 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1804 				if (status < B_OK) {
1805 					panic("looking up mapping failed for va 0x%lx\n",
1806 						virtualAddress);
1807 				}
1808 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1809 				if (page == NULL) {
1810 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1811 						"\n", physicalAddress);
1812 				}
1813 
1814 				DEBUG_PAGE_ACCESS_START(page);
1815 
1816 				cache->InsertPage(page, offset);
1817 				increment_page_wired_count(page);
1818 				vm_page_set_state(page, PAGE_STATE_WIRED);
1819 				page->busy = false;
1820 
1821 				DEBUG_PAGE_ACCESS_END(page);
1822 			}
1823 
1824 			map->Unlock();
1825 			break;
1826 		}
1827 
1828 		case B_CONTIGUOUS:
1829 		{
1830 			// We have already allocated our continuous pages run, so we can now
1831 			// just map them in the address space
1832 			VMTranslationMap* map = addressSpace->TranslationMap();
1833 			phys_addr_t physicalAddress
1834 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1835 			addr_t virtualAddress = area->Base();
1836 			off_t offset = 0;
1837 
1838 			map->Lock();
1839 
1840 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1841 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1842 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1843 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1844 				if (page == NULL)
1845 					panic("couldn't lookup physical page just allocated\n");
1846 
1847 				status = map->Map(virtualAddress, physicalAddress, protection,
1848 					area->MemoryType(), &reservation);
1849 				if (status < B_OK)
1850 					panic("couldn't map physical page in page run\n");
1851 
1852 				cache->InsertPage(page, offset);
1853 				increment_page_wired_count(page);
1854 
1855 				DEBUG_PAGE_ACCESS_END(page);
1856 			}
1857 
1858 			map->Unlock();
1859 			break;
1860 		}
1861 
1862 		default:
1863 			break;
1864 	}
1865 
1866 	cache->Unlock();
1867 
1868 	if (reservedPages > 0)
1869 		vm_page_unreserve_pages(&reservation);
1870 
1871 	TRACE(("vm_create_anonymous_area: done\n"));
1872 
1873 	area->cache_type = CACHE_TYPE_RAM;
1874 	return area->id;
1875 
1876 err1:
1877 	if (wiring == B_CONTIGUOUS) {
1878 		// we had reserved the area space upfront...
1879 		phys_addr_t pageNumber = page->physical_page_number;
1880 		int32 i;
1881 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1882 			page = vm_lookup_page(pageNumber);
1883 			if (page == NULL)
1884 				panic("couldn't lookup physical page just allocated\n");
1885 
1886 			vm_page_set_state(page, PAGE_STATE_FREE);
1887 		}
1888 	}
1889 
1890 err0:
1891 	if (reservedPages > 0)
1892 		vm_page_unreserve_pages(&reservation);
1893 	if (reservedMemory > 0)
1894 		vm_unreserve_memory(reservedMemory);
1895 
1896 	return status;
1897 }
1898 
1899 
1900 area_id
1901 vm_map_physical_memory(team_id team, const char* name, void** _address,
1902 	uint32 addressSpec, addr_t size, uint32 protection,
1903 	phys_addr_t physicalAddress, bool alreadyWired)
1904 {
1905 	VMArea* area;
1906 	VMCache* cache;
1907 	addr_t mapOffset;
1908 
1909 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1910 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1911 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1912 		addressSpec, size, protection, physicalAddress));
1913 
1914 	if (!arch_vm_supports_protection(protection))
1915 		return B_NOT_SUPPORTED;
1916 
1917 	AddressSpaceWriteLocker locker(team);
1918 	if (!locker.IsLocked())
1919 		return B_BAD_TEAM_ID;
1920 
1921 	// if the physical address is somewhat inside a page,
1922 	// move the actual area down to align on a page boundary
1923 	mapOffset = physicalAddress % B_PAGE_SIZE;
1924 	size += mapOffset;
1925 	physicalAddress -= mapOffset;
1926 
1927 	size = PAGE_ALIGN(size);
1928 
1929 	// create a device cache
1930 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1931 	if (status != B_OK)
1932 		return status;
1933 
1934 	cache->virtual_end = size;
1935 
1936 	cache->Lock();
1937 
1938 	virtual_address_restrictions addressRestrictions = {};
1939 	addressRestrictions.address = *_address;
1940 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1941 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1942 		B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1943 		true, &area, _address);
1944 
1945 	if (status < B_OK)
1946 		cache->ReleaseRefLocked();
1947 
1948 	cache->Unlock();
1949 
1950 	if (status == B_OK) {
1951 		// set requested memory type -- use uncached, if not given
1952 		uint32 memoryType = addressSpec & B_MTR_MASK;
1953 		if (memoryType == 0)
1954 			memoryType = B_MTR_UC;
1955 
1956 		area->SetMemoryType(memoryType);
1957 
1958 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1959 		if (status != B_OK)
1960 			delete_area(locker.AddressSpace(), area, false);
1961 	}
1962 
1963 	if (status != B_OK)
1964 		return status;
1965 
1966 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1967 
1968 	if (alreadyWired) {
1969 		// The area is already mapped, but possibly not with the right
1970 		// memory type.
1971 		map->Lock();
1972 		map->ProtectArea(area, area->protection);
1973 		map->Unlock();
1974 	} else {
1975 		// Map the area completely.
1976 
1977 		// reserve pages needed for the mapping
1978 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1979 			area->Base() + (size - 1));
1980 		vm_page_reservation reservation;
1981 		vm_page_reserve_pages(&reservation, reservePages,
1982 			team == VMAddressSpace::KernelID()
1983 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1984 
1985 		map->Lock();
1986 
1987 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1988 			map->Map(area->Base() + offset, physicalAddress + offset,
1989 				protection, area->MemoryType(), &reservation);
1990 		}
1991 
1992 		map->Unlock();
1993 
1994 		vm_page_unreserve_pages(&reservation);
1995 	}
1996 
1997 	// modify the pointer returned to be offset back into the new area
1998 	// the same way the physical address in was offset
1999 	*_address = (void*)((addr_t)*_address + mapOffset);
2000 
2001 	area->cache_type = CACHE_TYPE_DEVICE;
2002 	return area->id;
2003 }
2004 
2005 
2006 /*!	Don't use!
2007 	TODO: This function was introduced to map physical page vecs to
2008 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
2009 	use a device cache and does not track vm_page::wired_count!
2010 */
2011 area_id
2012 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
2013 	uint32 addressSpec, addr_t* _size, uint32 protection,
2014 	struct generic_io_vec* vecs, uint32 vecCount)
2015 {
2016 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
2017 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
2018 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
2019 		addressSpec, _size, protection, vecs, vecCount));
2020 
2021 	if (!arch_vm_supports_protection(protection)
2022 		|| (addressSpec & B_MTR_MASK) != 0) {
2023 		return B_NOT_SUPPORTED;
2024 	}
2025 
2026 	AddressSpaceWriteLocker locker(team);
2027 	if (!locker.IsLocked())
2028 		return B_BAD_TEAM_ID;
2029 
2030 	if (vecCount == 0)
2031 		return B_BAD_VALUE;
2032 
2033 	addr_t size = 0;
2034 	for (uint32 i = 0; i < vecCount; i++) {
2035 		if (vecs[i].base % B_PAGE_SIZE != 0
2036 			|| vecs[i].length % B_PAGE_SIZE != 0) {
2037 			return B_BAD_VALUE;
2038 		}
2039 
2040 		size += vecs[i].length;
2041 	}
2042 
2043 	// create a device cache
2044 	VMCache* cache;
2045 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
2046 	if (result != B_OK)
2047 		return result;
2048 
2049 	cache->virtual_end = size;
2050 
2051 	cache->Lock();
2052 
2053 	VMArea* area;
2054 	virtual_address_restrictions addressRestrictions = {};
2055 	addressRestrictions.address = *_address;
2056 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
2057 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
2058 		size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0,
2059 		&addressRestrictions, true, &area, _address);
2060 
2061 	if (result != B_OK)
2062 		cache->ReleaseRefLocked();
2063 
2064 	cache->Unlock();
2065 
2066 	if (result != B_OK)
2067 		return result;
2068 
2069 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2070 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
2071 		area->Base() + (size - 1));
2072 
2073 	vm_page_reservation reservation;
2074 	vm_page_reserve_pages(&reservation, reservePages,
2075 			team == VMAddressSpace::KernelID()
2076 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2077 	map->Lock();
2078 
2079 	uint32 vecIndex = 0;
2080 	size_t vecOffset = 0;
2081 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
2082 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
2083 			vecOffset = 0;
2084 			vecIndex++;
2085 		}
2086 
2087 		if (vecIndex >= vecCount)
2088 			break;
2089 
2090 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
2091 			protection, area->MemoryType(), &reservation);
2092 
2093 		vecOffset += B_PAGE_SIZE;
2094 	}
2095 
2096 	map->Unlock();
2097 	vm_page_unreserve_pages(&reservation);
2098 
2099 	if (_size != NULL)
2100 		*_size = size;
2101 
2102 	area->cache_type = CACHE_TYPE_DEVICE;
2103 	return area->id;
2104 }
2105 
2106 
2107 area_id
2108 vm_create_null_area(team_id team, const char* name, void** address,
2109 	uint32 addressSpec, addr_t size, uint32 flags)
2110 {
2111 	size = PAGE_ALIGN(size);
2112 
2113 	// Lock the address space and, if B_EXACT_ADDRESS and
2114 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
2115 	// is not wired.
2116 	AddressSpaceWriteLocker locker;
2117 	do {
2118 		if (locker.SetTo(team) != B_OK)
2119 			return B_BAD_TEAM_ID;
2120 	} while (addressSpec == B_EXACT_ADDRESS
2121 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
2122 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2123 			(addr_t)*address, size, &locker));
2124 
2125 	// create a null cache
2126 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
2127 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
2128 	VMCache* cache;
2129 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
2130 	if (status != B_OK)
2131 		return status;
2132 
2133 	cache->temporary = 1;
2134 	cache->virtual_end = size;
2135 
2136 	cache->Lock();
2137 
2138 	VMArea* area;
2139 	virtual_address_restrictions addressRestrictions = {};
2140 	addressRestrictions.address = *address;
2141 	addressRestrictions.address_specification = addressSpec;
2142 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
2143 		B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA,
2144 		REGION_NO_PRIVATE_MAP, flags,
2145 		&addressRestrictions, true, &area, address);
2146 
2147 	if (status < B_OK) {
2148 		cache->ReleaseRefAndUnlock();
2149 		return status;
2150 	}
2151 
2152 	cache->Unlock();
2153 
2154 	area->cache_type = CACHE_TYPE_NULL;
2155 	return area->id;
2156 }
2157 
2158 
2159 /*!	Creates the vnode cache for the specified \a vnode.
2160 	The vnode has to be marked busy when calling this function.
2161 */
2162 status_t
2163 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
2164 {
2165 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
2166 }
2167 
2168 
2169 /*!	\a cache must be locked. The area's address space must be read-locked.
2170 */
2171 static void
2172 pre_map_area_pages(VMArea* area, VMCache* cache,
2173 	vm_page_reservation* reservation, int32 maxCount)
2174 {
2175 	addr_t baseAddress = area->Base();
2176 	addr_t cacheOffset = area->cache_offset;
2177 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
2178 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
2179 
2180 	VMCachePagesTree::Iterator it = cache->pages.GetIterator(firstPage, true, true);
2181 	vm_page* page;
2182 	while ((page = it.Next()) != NULL && maxCount > 0) {
2183 		if (page->cache_offset >= endPage)
2184 			break;
2185 
2186 		// skip busy and inactive pages
2187 		if (page->busy || (page->usage_count == 0 && !page->accessed))
2188 			continue;
2189 
2190 		DEBUG_PAGE_ACCESS_START(page);
2191 		map_page(area, page,
2192 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
2193 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
2194 		maxCount--;
2195 		DEBUG_PAGE_ACCESS_END(page);
2196 	}
2197 }
2198 
2199 
2200 /*!	Will map the file specified by \a fd to an area in memory.
2201 	The file will be mirrored beginning at the specified \a offset. The
2202 	\a offset and \a size arguments have to be page aligned.
2203 */
2204 static area_id
2205 _vm_map_file(team_id team, const char* name, void** _address,
2206 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2207 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2208 {
2209 	// TODO: for binary files, we want to make sure that they get the
2210 	//	copy of a file at a given time, ie. later changes should not
2211 	//	make it into the mapped copy -- this will need quite some changes
2212 	//	to be done in a nice way
2213 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
2214 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
2215 
2216 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2217 	size = PAGE_ALIGN(size);
2218 
2219 	if (mapping == REGION_NO_PRIVATE_MAP)
2220 		protection |= B_SHARED_AREA;
2221 	if (addressSpec != B_EXACT_ADDRESS)
2222 		unmapAddressRange = false;
2223 
2224 	uint32 mappingFlags = 0;
2225 	if (unmapAddressRange)
2226 		mappingFlags |= CREATE_AREA_UNMAP_ADDRESS_RANGE;
2227 
2228 	if (fd < 0) {
2229 		virtual_address_restrictions virtualRestrictions = {};
2230 		virtualRestrictions.address = *_address;
2231 		virtualRestrictions.address_specification = addressSpec;
2232 		physical_address_restrictions physicalRestrictions = {};
2233 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
2234 			mappingFlags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
2235 			_address);
2236 	}
2237 
2238 	// get the open flags of the FD
2239 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2240 	if (descriptor == NULL)
2241 		return EBADF;
2242 	int32 openMode = descriptor->open_mode;
2243 	put_fd(descriptor);
2244 
2245 	// The FD must open for reading at any rate. For shared mapping with write
2246 	// access, additionally the FD must be open for writing.
2247 	if ((openMode & O_ACCMODE) == O_WRONLY
2248 		|| (mapping == REGION_NO_PRIVATE_MAP
2249 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2250 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2251 		return EACCES;
2252 	}
2253 
2254 	uint32 protectionMax = 0;
2255 	if (mapping == REGION_NO_PRIVATE_MAP) {
2256 		if ((openMode & O_ACCMODE) == O_RDWR)
2257 			protectionMax = protection | B_USER_PROTECTION;
2258 		else
2259 			protectionMax = protection | (B_USER_PROTECTION & ~B_WRITE_AREA);
2260 	} else if (mapping == REGION_PRIVATE_MAP) {
2261 		// For privately mapped read-only regions, skip committing memory.
2262 		// (If protections are changed later on, memory will be committed then.)
2263 		if ((protection & B_WRITE_AREA) == 0)
2264 			mappingFlags |= CREATE_AREA_DONT_COMMIT_MEMORY;
2265 	}
2266 
2267 	// get the vnode for the object, this also grabs a ref to it
2268 	struct vnode* vnode = NULL;
2269 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2270 	if (status < B_OK)
2271 		return status;
2272 	VnodePutter vnodePutter(vnode);
2273 
2274 	// If we're going to pre-map pages, we need to reserve the pages needed by
2275 	// the mapping backend upfront.
2276 	page_num_t reservedPreMapPages = 0;
2277 	vm_page_reservation reservation;
2278 	if ((protection & B_READ_AREA) != 0) {
2279 		AddressSpaceWriteLocker locker;
2280 		status = locker.SetTo(team);
2281 		if (status != B_OK)
2282 			return status;
2283 
2284 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2285 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2286 
2287 		locker.Unlock();
2288 
2289 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2290 			team == VMAddressSpace::KernelID()
2291 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2292 	}
2293 
2294 	struct PageUnreserver {
2295 		PageUnreserver(vm_page_reservation* reservation)
2296 			:
2297 			fReservation(reservation)
2298 		{
2299 		}
2300 
2301 		~PageUnreserver()
2302 		{
2303 			if (fReservation != NULL)
2304 				vm_page_unreserve_pages(fReservation);
2305 		}
2306 
2307 		vm_page_reservation* fReservation;
2308 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2309 
2310 	// Lock the address space and, if the specified address range shall be
2311 	// unmapped, ensure it is not wired.
2312 	AddressSpaceWriteLocker locker;
2313 	do {
2314 		if (locker.SetTo(team) != B_OK)
2315 			return B_BAD_TEAM_ID;
2316 	} while (unmapAddressRange
2317 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2318 			(addr_t)*_address, size, &locker));
2319 
2320 	// TODO: this only works for file systems that use the file cache
2321 	VMCache* cache;
2322 	status = vfs_get_vnode_cache(vnode, &cache, false);
2323 	if (status < B_OK)
2324 		return status;
2325 
2326 	cache->Lock();
2327 
2328 	VMArea* area;
2329 	virtual_address_restrictions addressRestrictions = {};
2330 	addressRestrictions.address = *_address;
2331 	addressRestrictions.address_specification = addressSpec;
2332 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2333 		0, protection, protectionMax, mapping, mappingFlags,
2334 		&addressRestrictions, kernel, &area, _address);
2335 
2336 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2337 		// map_backing_store() cannot know we no longer need the ref
2338 		cache->ReleaseRefLocked();
2339 	}
2340 
2341 	if (status == B_OK && (protection & B_READ_AREA) != 0) {
2342 		// Pre-map at most 10MB worth of pages.
2343 		pre_map_area_pages(area, cache, &reservation,
2344 			(10LL * 1024 * 1024) / B_PAGE_SIZE);
2345 	}
2346 
2347 	cache->Unlock();
2348 
2349 	if (status == B_OK) {
2350 		// TODO: this probably deserves a smarter solution, e.g. probably
2351 		// trigger prefetch somewhere else.
2352 
2353 		// Prefetch at most 10MB starting from "offset", but only if the cache
2354 		// doesn't already contain more pages than the prefetch size.
2355 		const size_t prefetch = min_c(size, 10LL * 1024 * 1024);
2356 		if (cache->page_count < (prefetch / B_PAGE_SIZE))
2357 			cache_prefetch_vnode(vnode, offset, prefetch);
2358 	}
2359 
2360 	if (status != B_OK)
2361 		return status;
2362 
2363 	area->cache_type = CACHE_TYPE_VNODE;
2364 	return area->id;
2365 }
2366 
2367 
2368 area_id
2369 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2370 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2371 	int fd, off_t offset)
2372 {
2373 	if (!arch_vm_supports_protection(protection))
2374 		return B_NOT_SUPPORTED;
2375 
2376 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2377 		mapping, unmapAddressRange, fd, offset, true);
2378 }
2379 
2380 
2381 VMCache*
2382 vm_area_get_locked_cache(VMArea* area)
2383 {
2384 	rw_lock_read_lock(&sAreaCacheLock);
2385 
2386 	while (true) {
2387 		VMCache* cache = area->cache;
2388 
2389 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2390 			// cache has been deleted
2391 			rw_lock_read_lock(&sAreaCacheLock);
2392 			continue;
2393 		}
2394 
2395 		rw_lock_read_lock(&sAreaCacheLock);
2396 
2397 		if (cache == area->cache) {
2398 			cache->AcquireRefLocked();
2399 			rw_lock_read_unlock(&sAreaCacheLock);
2400 			return cache;
2401 		}
2402 
2403 		// the cache changed in the meantime
2404 		cache->Unlock();
2405 	}
2406 }
2407 
2408 
2409 void
2410 vm_area_put_locked_cache(VMCache* cache)
2411 {
2412 	cache->ReleaseRefAndUnlock();
2413 }
2414 
2415 
2416 area_id
2417 vm_clone_area(team_id team, const char* name, void** address,
2418 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2419 	bool kernel)
2420 {
2421 	VMArea* newArea = NULL;
2422 	VMArea* sourceArea;
2423 
2424 	// Check whether the source area exists and is cloneable. If so, mark it
2425 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2426 	{
2427 		AddressSpaceWriteLocker locker;
2428 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2429 		if (status != B_OK)
2430 			return status;
2431 
2432 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2433 			return B_NOT_ALLOWED;
2434 
2435 		sourceArea->protection |= B_SHARED_AREA;
2436 		protection |= B_SHARED_AREA;
2437 	}
2438 
2439 	// Now lock both address spaces and actually do the cloning.
2440 
2441 	MultiAddressSpaceLocker locker;
2442 	VMAddressSpace* sourceAddressSpace;
2443 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2444 	if (status != B_OK)
2445 		return status;
2446 
2447 	VMAddressSpace* targetAddressSpace;
2448 	status = locker.AddTeam(team, true, &targetAddressSpace);
2449 	if (status != B_OK)
2450 		return status;
2451 
2452 	status = locker.Lock();
2453 	if (status != B_OK)
2454 		return status;
2455 
2456 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2457 	if (sourceArea == NULL)
2458 		return B_BAD_VALUE;
2459 
2460 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2461 		return B_NOT_ALLOWED;
2462 
2463 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2464 
2465 	if (!kernel && sourceAddressSpace != targetAddressSpace
2466 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2467 #if KDEBUG
2468 		Team* team = thread_get_current_thread()->team;
2469 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2470 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2471 #endif
2472 		status = B_NOT_ALLOWED;
2473 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2474 		status = B_NOT_ALLOWED;
2475 	} else {
2476 		virtual_address_restrictions addressRestrictions = {};
2477 		addressRestrictions.address = *address;
2478 		addressRestrictions.address_specification = addressSpec;
2479 		status = map_backing_store(targetAddressSpace, cache,
2480 			sourceArea->cache_offset, name, sourceArea->Size(),
2481 			sourceArea->wiring, protection, sourceArea->protection_max,
2482 			mapping, 0, &addressRestrictions,
2483 			kernel, &newArea, address);
2484 	}
2485 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2486 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2487 		// to create a new cache, and has therefore already acquired a reference
2488 		// to the source cache - but otherwise it has no idea that we need
2489 		// one.
2490 		cache->AcquireRefLocked();
2491 	}
2492 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2493 		// we need to map in everything at this point
2494 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2495 			// we don't have actual pages to map but a physical area
2496 			VMTranslationMap* map
2497 				= sourceArea->address_space->TranslationMap();
2498 			map->Lock();
2499 
2500 			phys_addr_t physicalAddress;
2501 			uint32 oldProtection;
2502 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2503 
2504 			map->Unlock();
2505 
2506 			map = targetAddressSpace->TranslationMap();
2507 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2508 				newArea->Base() + (newArea->Size() - 1));
2509 
2510 			vm_page_reservation reservation;
2511 			vm_page_reserve_pages(&reservation, reservePages,
2512 				targetAddressSpace == VMAddressSpace::Kernel()
2513 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2514 			map->Lock();
2515 
2516 			for (addr_t offset = 0; offset < newArea->Size();
2517 					offset += B_PAGE_SIZE) {
2518 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2519 					protection, newArea->MemoryType(), &reservation);
2520 			}
2521 
2522 			map->Unlock();
2523 			vm_page_unreserve_pages(&reservation);
2524 		} else {
2525 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2526 			size_t reservePages = map->MaxPagesNeededToMap(
2527 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2528 			vm_page_reservation reservation;
2529 			vm_page_reserve_pages(&reservation, reservePages,
2530 				targetAddressSpace == VMAddressSpace::Kernel()
2531 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2532 
2533 			// map in all pages from source
2534 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2535 					vm_page* page  = it.Next();) {
2536 				if (!page->busy) {
2537 					DEBUG_PAGE_ACCESS_START(page);
2538 					map_page(newArea, page,
2539 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2540 							- newArea->cache_offset),
2541 						protection, &reservation);
2542 					DEBUG_PAGE_ACCESS_END(page);
2543 				}
2544 			}
2545 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2546 			// ensuring that!
2547 
2548 			vm_page_unreserve_pages(&reservation);
2549 		}
2550 	}
2551 	if (status == B_OK)
2552 		newArea->cache_type = sourceArea->cache_type;
2553 
2554 	vm_area_put_locked_cache(cache);
2555 
2556 	if (status < B_OK)
2557 		return status;
2558 
2559 	return newArea->id;
2560 }
2561 
2562 
2563 /*!	Deletes the specified area of the given address space.
2564 
2565 	The address space must be write-locked.
2566 	The caller must ensure that the area does not have any wired ranges.
2567 
2568 	\param addressSpace The address space containing the area.
2569 	\param area The area to be deleted.
2570 	\param deletingAddressSpace \c true, if the address space is in the process
2571 		of being deleted.
2572 */
2573 static void
2574 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2575 	bool deletingAddressSpace)
2576 {
2577 	ASSERT(!area->IsWired());
2578 
2579 	if (area->id >= 0)
2580 		VMAreas::Remove(area);
2581 
2582 	// At this point the area is removed from the global hash table, but
2583 	// still exists in the area list.
2584 
2585 	// Unmap the virtual address space the area occupied.
2586 	{
2587 		// We need to lock the complete cache chain.
2588 		VMCache* topCache = vm_area_get_locked_cache(area);
2589 		VMCacheChainLocker cacheChainLocker(topCache);
2590 		cacheChainLocker.LockAllSourceCaches();
2591 
2592 		// If the area's top cache is a temporary cache and the area is the only
2593 		// one referencing it (besides us currently holding a second reference),
2594 		// the unmapping code doesn't need to care about preserving the accessed
2595 		// and dirty flags of the top cache page mappings.
2596 		bool ignoreTopCachePageFlags
2597 			= topCache->temporary && topCache->RefCount() == 2;
2598 
2599 		area->address_space->TranslationMap()->UnmapArea(area,
2600 			deletingAddressSpace, ignoreTopCachePageFlags);
2601 	}
2602 
2603 	if (!area->cache->temporary)
2604 		area->cache->WriteModified();
2605 
2606 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2607 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2608 
2609 	arch_vm_unset_memory_type(area);
2610 	addressSpace->RemoveArea(area, allocationFlags);
2611 	addressSpace->Put();
2612 
2613 	area->cache->RemoveArea(area);
2614 	area->cache->ReleaseRef();
2615 
2616 	addressSpace->DeleteArea(area, allocationFlags);
2617 }
2618 
2619 
2620 status_t
2621 vm_delete_area(team_id team, area_id id, bool kernel)
2622 {
2623 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2624 		team, id));
2625 
2626 	// lock the address space and make sure the area isn't wired
2627 	AddressSpaceWriteLocker locker;
2628 	VMArea* area;
2629 	AreaCacheLocker cacheLocker;
2630 
2631 	do {
2632 		status_t status = locker.SetFromArea(team, id, area);
2633 		if (status != B_OK)
2634 			return status;
2635 
2636 		cacheLocker.SetTo(area);
2637 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2638 
2639 	cacheLocker.Unlock();
2640 
2641 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2642 		return B_NOT_ALLOWED;
2643 
2644 	delete_area(locker.AddressSpace(), area, false);
2645 	return B_OK;
2646 }
2647 
2648 
2649 /*!	Creates a new cache on top of given cache, moves all areas from
2650 	the old cache to the new one, and changes the protection of all affected
2651 	areas' pages to read-only. If requested, wired pages are moved up to the
2652 	new cache and copies are added to the old cache in their place.
2653 	Preconditions:
2654 	- The given cache must be locked.
2655 	- All of the cache's areas' address spaces must be read locked.
2656 	- Either the cache must not have any wired ranges or a page reservation for
2657 	  all wired pages must be provided, so they can be copied.
2658 
2659 	\param lowerCache The cache on top of which a new cache shall be created.
2660 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2661 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2662 		has wired page. The wired pages are copied in this case.
2663 */
2664 static status_t
2665 vm_copy_on_write_area(VMCache* lowerCache,
2666 	vm_page_reservation* wiredPagesReservation)
2667 {
2668 	VMCache* upperCache;
2669 
2670 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2671 
2672 	// We need to separate the cache from its areas. The cache goes one level
2673 	// deeper and we create a new cache inbetween.
2674 
2675 	// create an anonymous cache
2676 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2677 		lowerCache->GuardSize() / B_PAGE_SIZE,
2678 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2679 		VM_PRIORITY_USER);
2680 	if (status != B_OK)
2681 		return status;
2682 
2683 	upperCache->Lock();
2684 
2685 	upperCache->temporary = 1;
2686 	upperCache->virtual_base = lowerCache->virtual_base;
2687 	upperCache->virtual_end = lowerCache->virtual_end;
2688 
2689 	// transfer the lower cache areas to the upper cache
2690 	rw_lock_write_lock(&sAreaCacheLock);
2691 	upperCache->TransferAreas(lowerCache);
2692 	rw_lock_write_unlock(&sAreaCacheLock);
2693 
2694 	lowerCache->AddConsumer(upperCache);
2695 
2696 	// We now need to remap all pages from all of the cache's areas read-only,
2697 	// so that a copy will be created on next write access. If there are wired
2698 	// pages, we keep their protection, move them to the upper cache and create
2699 	// copies for the lower cache.
2700 	if (wiredPagesReservation != NULL) {
2701 		// We need to handle wired pages -- iterate through the cache's pages.
2702 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2703 				vm_page* page = it.Next();) {
2704 			if (page->WiredCount() > 0) {
2705 				// allocate a new page and copy the wired one
2706 				vm_page* copiedPage = vm_page_allocate_page(
2707 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2708 
2709 				vm_memcpy_physical_page(
2710 					copiedPage->physical_page_number * B_PAGE_SIZE,
2711 					page->physical_page_number * B_PAGE_SIZE);
2712 
2713 				// move the wired page to the upper cache (note: removing is OK
2714 				// with the SplayTree iterator) and insert the copy
2715 				upperCache->MovePage(page);
2716 				lowerCache->InsertPage(copiedPage,
2717 					page->cache_offset * B_PAGE_SIZE);
2718 
2719 				DEBUG_PAGE_ACCESS_END(copiedPage);
2720 			} else {
2721 				// Change the protection of this page in all areas.
2722 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2723 						tempArea = tempArea->cache_next) {
2724 					if (!is_page_in_area(tempArea, page))
2725 						continue;
2726 
2727 					// The area must be readable in the same way it was
2728 					// previously writable.
2729 					addr_t address = virtual_page_address(tempArea, page);
2730 					uint32 protection = 0;
2731 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2732 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2733 						protection |= B_KERNEL_READ_AREA;
2734 					if ((pageProtection & B_READ_AREA) != 0)
2735 						protection |= B_READ_AREA;
2736 
2737 					VMTranslationMap* map
2738 						= tempArea->address_space->TranslationMap();
2739 					map->Lock();
2740 					map->ProtectPage(tempArea, address, protection);
2741 					map->Unlock();
2742 				}
2743 			}
2744 		}
2745 	} else {
2746 		ASSERT(lowerCache->WiredPagesCount() == 0);
2747 
2748 		// just change the protection of all areas
2749 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2750 				tempArea = tempArea->cache_next) {
2751 			if (tempArea->page_protections != NULL) {
2752 				// Change the protection of all pages in this area.
2753 				VMTranslationMap* map = tempArea->address_space->TranslationMap();
2754 				map->Lock();
2755 				for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2756 					vm_page* page = it.Next();) {
2757 					if (!is_page_in_area(tempArea, page))
2758 						continue;
2759 
2760 					// The area must be readable in the same way it was
2761 					// previously writable.
2762 					addr_t address = virtual_page_address(tempArea, page);
2763 					uint32 protection = 0;
2764 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2765 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2766 						protection |= B_KERNEL_READ_AREA;
2767 					if ((pageProtection & B_READ_AREA) != 0)
2768 						protection |= B_READ_AREA;
2769 
2770 					map->ProtectPage(tempArea, address, protection);
2771 				}
2772 				map->Unlock();
2773 				continue;
2774 			}
2775 			// The area must be readable in the same way it was previously
2776 			// writable.
2777 			uint32 protection = 0;
2778 			if ((tempArea->protection & B_KERNEL_READ_AREA) != 0)
2779 				protection |= B_KERNEL_READ_AREA;
2780 			if ((tempArea->protection & B_READ_AREA) != 0)
2781 				protection |= B_READ_AREA;
2782 
2783 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2784 			map->Lock();
2785 			map->ProtectArea(tempArea, protection);
2786 			map->Unlock();
2787 		}
2788 	}
2789 
2790 	vm_area_put_locked_cache(upperCache);
2791 
2792 	return B_OK;
2793 }
2794 
2795 
2796 area_id
2797 vm_copy_area(team_id team, const char* name, void** _address,
2798 	uint32 addressSpec, area_id sourceID)
2799 {
2800 	// Do the locking: target address space, all address spaces associated with
2801 	// the source cache, and the cache itself.
2802 	MultiAddressSpaceLocker locker;
2803 	VMAddressSpace* targetAddressSpace;
2804 	VMCache* cache;
2805 	VMArea* source;
2806 	AreaCacheLocker cacheLocker;
2807 	status_t status;
2808 	bool sharedArea;
2809 
2810 	page_num_t wiredPages = 0;
2811 	vm_page_reservation wiredPagesReservation;
2812 
2813 	bool restart;
2814 	do {
2815 		restart = false;
2816 
2817 		locker.Unset();
2818 		status = locker.AddTeam(team, true, &targetAddressSpace);
2819 		if (status == B_OK) {
2820 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2821 				&cache);
2822 		}
2823 		if (status != B_OK)
2824 			return status;
2825 
2826 		cacheLocker.SetTo(cache, true);	// already locked
2827 
2828 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2829 
2830 		page_num_t oldWiredPages = wiredPages;
2831 		wiredPages = 0;
2832 
2833 		// If the source area isn't shared, count the number of wired pages in
2834 		// the cache and reserve as many pages.
2835 		if (!sharedArea) {
2836 			wiredPages = cache->WiredPagesCount();
2837 
2838 			if (wiredPages > oldWiredPages) {
2839 				cacheLocker.Unlock();
2840 				locker.Unlock();
2841 
2842 				if (oldWiredPages > 0)
2843 					vm_page_unreserve_pages(&wiredPagesReservation);
2844 
2845 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2846 					VM_PRIORITY_USER);
2847 
2848 				restart = true;
2849 			}
2850 		} else if (oldWiredPages > 0)
2851 			vm_page_unreserve_pages(&wiredPagesReservation);
2852 	} while (restart);
2853 
2854 	// unreserve pages later
2855 	struct PagesUnreserver {
2856 		PagesUnreserver(vm_page_reservation* reservation)
2857 			:
2858 			fReservation(reservation)
2859 		{
2860 		}
2861 
2862 		~PagesUnreserver()
2863 		{
2864 			if (fReservation != NULL)
2865 				vm_page_unreserve_pages(fReservation);
2866 		}
2867 
2868 	private:
2869 		vm_page_reservation*	fReservation;
2870 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2871 
2872 	bool writableCopy
2873 		= (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2874 	uint8* targetPageProtections = NULL;
2875 
2876 	if (source->page_protections != NULL) {
2877 		size_t bytes = area_page_protections_size(source->Size());
2878 		targetPageProtections = (uint8*)malloc_etc(bytes,
2879 			(source->address_space == VMAddressSpace::Kernel()
2880 					|| targetAddressSpace == VMAddressSpace::Kernel())
2881 				? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
2882 		if (targetPageProtections == NULL)
2883 			return B_NO_MEMORY;
2884 
2885 		memcpy(targetPageProtections, source->page_protections, bytes);
2886 
2887 		if (!writableCopy) {
2888 			for (size_t i = 0; i < bytes; i++) {
2889 				if ((targetPageProtections[i]
2890 						& (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) {
2891 					writableCopy = true;
2892 					break;
2893 				}
2894 			}
2895 		}
2896 	}
2897 
2898 	if (addressSpec == B_CLONE_ADDRESS) {
2899 		addressSpec = B_EXACT_ADDRESS;
2900 		*_address = (void*)source->Base();
2901 	}
2902 
2903 	// First, create a cache on top of the source area, respectively use the
2904 	// existing one, if this is a shared area.
2905 
2906 	VMArea* target;
2907 	virtual_address_restrictions addressRestrictions = {};
2908 	addressRestrictions.address = *_address;
2909 	addressRestrictions.address_specification = addressSpec;
2910 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2911 		name, source->Size(), source->wiring, source->protection,
2912 		source->protection_max,
2913 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2914 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2915 		&addressRestrictions, true, &target, _address);
2916 	if (status < B_OK) {
2917 		free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE);
2918 		return status;
2919 	}
2920 
2921 	if (targetPageProtections != NULL)
2922 		target->page_protections = targetPageProtections;
2923 
2924 	if (sharedArea) {
2925 		// The new area uses the old area's cache, but map_backing_store()
2926 		// hasn't acquired a ref. So we have to do that now.
2927 		cache->AcquireRefLocked();
2928 	}
2929 
2930 	// If the source area is writable, we need to move it one layer up as well
2931 
2932 	if (!sharedArea) {
2933 		if (writableCopy) {
2934 			// TODO: do something more useful if this fails!
2935 			if (vm_copy_on_write_area(cache,
2936 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2937 				panic("vm_copy_on_write_area() failed!\n");
2938 			}
2939 		}
2940 	}
2941 
2942 	// we return the ID of the newly created area
2943 	return target->id;
2944 }
2945 
2946 
2947 status_t
2948 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2949 	bool kernel)
2950 {
2951 	fix_protection(&newProtection);
2952 
2953 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2954 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2955 
2956 	if (!arch_vm_supports_protection(newProtection))
2957 		return B_NOT_SUPPORTED;
2958 
2959 	bool becomesWritable
2960 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2961 
2962 	// lock address spaces and cache
2963 	MultiAddressSpaceLocker locker;
2964 	VMCache* cache;
2965 	VMArea* area;
2966 	status_t status;
2967 	AreaCacheLocker cacheLocker;
2968 	bool isWritable;
2969 
2970 	bool restart;
2971 	do {
2972 		restart = false;
2973 
2974 		locker.Unset();
2975 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2976 		if (status != B_OK)
2977 			return status;
2978 
2979 		cacheLocker.SetTo(cache, true);	// already locked
2980 
2981 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
2982 				|| (area->protection & B_KERNEL_AREA) != 0)) {
2983 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2984 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2985 				" (%s)\n", team, newProtection, areaID, area->name);
2986 			return B_NOT_ALLOWED;
2987 		}
2988 		if (!kernel && area->protection_max != 0
2989 			&& (newProtection & area->protection_max)
2990 				!= (newProtection & B_USER_PROTECTION)) {
2991 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2992 				"set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel "
2993 				"area %" B_PRId32 " (%s)\n", team, newProtection,
2994 				area->protection_max, areaID, area->name);
2995 			return B_NOT_ALLOWED;
2996 		}
2997 
2998 		if (team != VMAddressSpace::KernelID()
2999 			&& area->address_space->ID() != team) {
3000 			// unless you're the kernel, you are only allowed to set
3001 			// the protection of your own areas
3002 			return B_NOT_ALLOWED;
3003 		}
3004 
3005 		if (area->protection == newProtection)
3006 			return B_OK;
3007 
3008 		isWritable
3009 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
3010 
3011 		// Make sure the area (respectively, if we're going to call
3012 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
3013 		// wired ranges.
3014 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
3015 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
3016 					otherArea = otherArea->cache_next) {
3017 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
3018 					restart = true;
3019 					break;
3020 				}
3021 			}
3022 		} else {
3023 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
3024 				restart = true;
3025 		}
3026 	} while (restart);
3027 
3028 	bool changePageProtection = true;
3029 	bool changeTopCachePagesOnly = false;
3030 
3031 	if (isWritable && !becomesWritable) {
3032 		// writable -> !writable
3033 
3034 		if (cache->source != NULL && cache->temporary) {
3035 			if (cache->CountWritableAreas(area) == 0) {
3036 				// Since this cache now lives from the pages in its source cache,
3037 				// we can change the cache's commitment to take only those pages
3038 				// into account that really are in this cache.
3039 
3040 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
3041 					team == VMAddressSpace::KernelID()
3042 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
3043 
3044 				// TODO: we may be able to join with our source cache, if
3045 				// count == 0
3046 			}
3047 		}
3048 
3049 		// If only the writability changes, we can just remap the pages of the
3050 		// top cache, since the pages of lower caches are mapped read-only
3051 		// anyway. That's advantageous only, if the number of pages in the cache
3052 		// is significantly smaller than the number of pages in the area,
3053 		// though.
3054 		if (newProtection
3055 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
3056 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
3057 			changeTopCachePagesOnly = true;
3058 		}
3059 	} else if (!isWritable && becomesWritable) {
3060 		// !writable -> writable
3061 
3062 		if (!cache->consumers.IsEmpty()) {
3063 			// There are consumers -- we have to insert a new cache. Fortunately
3064 			// vm_copy_on_write_area() does everything that's needed.
3065 			changePageProtection = false;
3066 			status = vm_copy_on_write_area(cache, NULL);
3067 		} else {
3068 			// No consumers, so we don't need to insert a new one.
3069 			if (cache->source != NULL && cache->temporary) {
3070 				// the cache's commitment must contain all possible pages
3071 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
3072 					team == VMAddressSpace::KernelID()
3073 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
3074 			}
3075 
3076 			if (status == B_OK && cache->source != NULL) {
3077 				// There's a source cache, hence we can't just change all pages'
3078 				// protection or we might allow writing into pages belonging to
3079 				// a lower cache.
3080 				changeTopCachePagesOnly = true;
3081 			}
3082 		}
3083 	} else {
3084 		// we don't have anything special to do in all other cases
3085 	}
3086 
3087 	if (status == B_OK) {
3088 		// remap existing pages in this cache
3089 		if (changePageProtection) {
3090 			VMTranslationMap* map = area->address_space->TranslationMap();
3091 			map->Lock();
3092 
3093 			if (changeTopCachePagesOnly) {
3094 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
3095 				page_num_t lastPageOffset
3096 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
3097 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
3098 						vm_page* page = it.Next();) {
3099 					if (page->cache_offset >= firstPageOffset
3100 						&& page->cache_offset <= lastPageOffset) {
3101 						addr_t address = virtual_page_address(area, page);
3102 						map->ProtectPage(area, address, newProtection);
3103 					}
3104 				}
3105 			} else
3106 				map->ProtectArea(area, newProtection);
3107 
3108 			map->Unlock();
3109 		}
3110 
3111 		area->protection = newProtection;
3112 	}
3113 
3114 	return status;
3115 }
3116 
3117 
3118 status_t
3119 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
3120 {
3121 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
3122 	if (addressSpace == NULL)
3123 		return B_BAD_TEAM_ID;
3124 
3125 	VMTranslationMap* map = addressSpace->TranslationMap();
3126 
3127 	map->Lock();
3128 	uint32 dummyFlags;
3129 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
3130 	map->Unlock();
3131 
3132 	addressSpace->Put();
3133 	return status;
3134 }
3135 
3136 
3137 /*!	The page's cache must be locked.
3138 */
3139 bool
3140 vm_test_map_modification(vm_page* page)
3141 {
3142 	if (page->modified)
3143 		return true;
3144 
3145 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3146 	vm_page_mapping* mapping;
3147 	while ((mapping = iterator.Next()) != NULL) {
3148 		VMArea* area = mapping->area;
3149 		VMTranslationMap* map = area->address_space->TranslationMap();
3150 
3151 		phys_addr_t physicalAddress;
3152 		uint32 flags;
3153 		map->Lock();
3154 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
3155 		map->Unlock();
3156 
3157 		if ((flags & PAGE_MODIFIED) != 0)
3158 			return true;
3159 	}
3160 
3161 	return false;
3162 }
3163 
3164 
3165 /*!	The page's cache must be locked.
3166 */
3167 void
3168 vm_clear_map_flags(vm_page* page, uint32 flags)
3169 {
3170 	if ((flags & PAGE_ACCESSED) != 0)
3171 		page->accessed = false;
3172 	if ((flags & PAGE_MODIFIED) != 0)
3173 		page->modified = false;
3174 
3175 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3176 	vm_page_mapping* mapping;
3177 	while ((mapping = iterator.Next()) != NULL) {
3178 		VMArea* area = mapping->area;
3179 		VMTranslationMap* map = area->address_space->TranslationMap();
3180 
3181 		map->Lock();
3182 		map->ClearFlags(virtual_page_address(area, page), flags);
3183 		map->Unlock();
3184 	}
3185 }
3186 
3187 
3188 /*!	Removes all mappings from a page.
3189 	After you've called this function, the page is unmapped from memory and
3190 	the page's \c accessed and \c modified flags have been updated according
3191 	to the state of the mappings.
3192 	The page's cache must be locked.
3193 */
3194 void
3195 vm_remove_all_page_mappings(vm_page* page)
3196 {
3197 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3198 		VMArea* area = mapping->area;
3199 		VMTranslationMap* map = area->address_space->TranslationMap();
3200 		addr_t address = virtual_page_address(area, page);
3201 		map->UnmapPage(area, address, false);
3202 	}
3203 }
3204 
3205 
3206 int32
3207 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
3208 {
3209 	int32 count = 0;
3210 
3211 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3212 	vm_page_mapping* mapping;
3213 	while ((mapping = iterator.Next()) != NULL) {
3214 		VMArea* area = mapping->area;
3215 		VMTranslationMap* map = area->address_space->TranslationMap();
3216 
3217 		bool modified;
3218 		if (map->ClearAccessedAndModified(area,
3219 				virtual_page_address(area, page), false, modified)) {
3220 			count++;
3221 		}
3222 
3223 		page->modified |= modified;
3224 	}
3225 
3226 
3227 	if (page->accessed) {
3228 		count++;
3229 		page->accessed = false;
3230 	}
3231 
3232 	return count;
3233 }
3234 
3235 
3236 /*!	Removes all mappings of a page and/or clears the accessed bits of the
3237 	mappings.
3238 	The function iterates through the page mappings and removes them until
3239 	encountering one that has been accessed. From then on it will continue to
3240 	iterate, but only clear the accessed flag of the mapping. The page's
3241 	\c modified bit will be updated accordingly, the \c accessed bit will be
3242 	cleared.
3243 	\return The number of mapping accessed bits encountered, including the
3244 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
3245 		of the page have been removed.
3246 */
3247 int32
3248 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
3249 {
3250 	ASSERT(page->WiredCount() == 0);
3251 
3252 	if (page->accessed)
3253 		return vm_clear_page_mapping_accessed_flags(page);
3254 
3255 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3256 		VMArea* area = mapping->area;
3257 		VMTranslationMap* map = area->address_space->TranslationMap();
3258 		addr_t address = virtual_page_address(area, page);
3259 		bool modified = false;
3260 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
3261 			page->accessed = true;
3262 			page->modified |= modified;
3263 			return vm_clear_page_mapping_accessed_flags(page);
3264 		}
3265 		page->modified |= modified;
3266 	}
3267 
3268 	return 0;
3269 }
3270 
3271 
3272 static int
3273 display_mem(int argc, char** argv)
3274 {
3275 	bool physical = false;
3276 	addr_t copyAddress;
3277 	int32 displayWidth;
3278 	int32 itemSize;
3279 	int32 num = -1;
3280 	addr_t address;
3281 	int i = 1, j;
3282 
3283 	if (argc > 1 && argv[1][0] == '-') {
3284 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3285 			physical = true;
3286 			i++;
3287 		} else
3288 			i = 99;
3289 	}
3290 
3291 	if (argc < i + 1 || argc > i + 2) {
3292 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3293 			"\tdl - 8 bytes\n"
3294 			"\tdw - 4 bytes\n"
3295 			"\tds - 2 bytes\n"
3296 			"\tdb - 1 byte\n"
3297 			"\tstring - a whole string\n"
3298 			"  -p or --physical only allows memory from a single page to be "
3299 			"displayed.\n");
3300 		return 0;
3301 	}
3302 
3303 	address = parse_expression(argv[i]);
3304 
3305 	if (argc > i + 1)
3306 		num = parse_expression(argv[i + 1]);
3307 
3308 	// build the format string
3309 	if (strcmp(argv[0], "db") == 0) {
3310 		itemSize = 1;
3311 		displayWidth = 16;
3312 	} else if (strcmp(argv[0], "ds") == 0) {
3313 		itemSize = 2;
3314 		displayWidth = 8;
3315 	} else if (strcmp(argv[0], "dw") == 0) {
3316 		itemSize = 4;
3317 		displayWidth = 4;
3318 	} else if (strcmp(argv[0], "dl") == 0) {
3319 		itemSize = 8;
3320 		displayWidth = 2;
3321 	} else if (strcmp(argv[0], "string") == 0) {
3322 		itemSize = 1;
3323 		displayWidth = -1;
3324 	} else {
3325 		kprintf("display_mem called in an invalid way!\n");
3326 		return 0;
3327 	}
3328 
3329 	if (num <= 0)
3330 		num = displayWidth;
3331 
3332 	void* physicalPageHandle = NULL;
3333 
3334 	if (physical) {
3335 		int32 offset = address & (B_PAGE_SIZE - 1);
3336 		if (num * itemSize + offset > B_PAGE_SIZE) {
3337 			num = (B_PAGE_SIZE - offset) / itemSize;
3338 			kprintf("NOTE: number of bytes has been cut to page size\n");
3339 		}
3340 
3341 		address = ROUNDDOWN(address, B_PAGE_SIZE);
3342 
3343 		if (vm_get_physical_page_debug(address, &copyAddress,
3344 				&physicalPageHandle) != B_OK) {
3345 			kprintf("getting the hardware page failed.");
3346 			return 0;
3347 		}
3348 
3349 		address += offset;
3350 		copyAddress += offset;
3351 	} else
3352 		copyAddress = address;
3353 
3354 	if (!strcmp(argv[0], "string")) {
3355 		kprintf("%p \"", (char*)copyAddress);
3356 
3357 		// string mode
3358 		for (i = 0; true; i++) {
3359 			char c;
3360 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
3361 					!= B_OK
3362 				|| c == '\0') {
3363 				break;
3364 			}
3365 
3366 			if (c == '\n')
3367 				kprintf("\\n");
3368 			else if (c == '\t')
3369 				kprintf("\\t");
3370 			else {
3371 				if (!isprint(c))
3372 					c = '.';
3373 
3374 				kprintf("%c", c);
3375 			}
3376 		}
3377 
3378 		kprintf("\"\n");
3379 	} else {
3380 		// number mode
3381 		for (i = 0; i < num; i++) {
3382 			uint64 value;
3383 
3384 			if ((i % displayWidth) == 0) {
3385 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3386 				if (i != 0)
3387 					kprintf("\n");
3388 
3389 				kprintf("[0x%lx]  ", address + i * itemSize);
3390 
3391 				for (j = 0; j < displayed; j++) {
3392 					char c;
3393 					if (debug_memcpy(B_CURRENT_TEAM, &c,
3394 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
3395 						displayed = j;
3396 						break;
3397 					}
3398 					if (!isprint(c))
3399 						c = '.';
3400 
3401 					kprintf("%c", c);
3402 				}
3403 				if (num > displayWidth) {
3404 					// make sure the spacing in the last line is correct
3405 					for (j = displayed; j < displayWidth * itemSize; j++)
3406 						kprintf(" ");
3407 				}
3408 				kprintf("  ");
3409 			}
3410 
3411 			if (debug_memcpy(B_CURRENT_TEAM, &value,
3412 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
3413 				kprintf("read fault");
3414 				break;
3415 			}
3416 
3417 			switch (itemSize) {
3418 				case 1:
3419 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
3420 					break;
3421 				case 2:
3422 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3423 					break;
3424 				case 4:
3425 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3426 					break;
3427 				case 8:
3428 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3429 					break;
3430 			}
3431 		}
3432 
3433 		kprintf("\n");
3434 	}
3435 
3436 	if (physical) {
3437 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3438 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3439 	}
3440 	return 0;
3441 }
3442 
3443 
3444 static void
3445 dump_cache_tree_recursively(VMCache* cache, int level,
3446 	VMCache* highlightCache)
3447 {
3448 	// print this cache
3449 	for (int i = 0; i < level; i++)
3450 		kprintf("  ");
3451 	if (cache == highlightCache)
3452 		kprintf("%p <--\n", cache);
3453 	else
3454 		kprintf("%p\n", cache);
3455 
3456 	// recursively print its consumers
3457 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3458 			VMCache* consumer = it.Next();) {
3459 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3460 	}
3461 }
3462 
3463 
3464 static int
3465 dump_cache_tree(int argc, char** argv)
3466 {
3467 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3468 		kprintf("usage: %s <address>\n", argv[0]);
3469 		return 0;
3470 	}
3471 
3472 	addr_t address = parse_expression(argv[1]);
3473 	if (address == 0)
3474 		return 0;
3475 
3476 	VMCache* cache = (VMCache*)address;
3477 	VMCache* root = cache;
3478 
3479 	// find the root cache (the transitive source)
3480 	while (root->source != NULL)
3481 		root = root->source;
3482 
3483 	dump_cache_tree_recursively(root, 0, cache);
3484 
3485 	return 0;
3486 }
3487 
3488 
3489 const char*
3490 vm_cache_type_to_string(int32 type)
3491 {
3492 	switch (type) {
3493 		case CACHE_TYPE_RAM:
3494 			return "RAM";
3495 		case CACHE_TYPE_DEVICE:
3496 			return "device";
3497 		case CACHE_TYPE_VNODE:
3498 			return "vnode";
3499 		case CACHE_TYPE_NULL:
3500 			return "null";
3501 
3502 		default:
3503 			return "unknown";
3504 	}
3505 }
3506 
3507 
3508 #if DEBUG_CACHE_LIST
3509 
3510 static void
3511 update_cache_info_recursively(VMCache* cache, cache_info& info)
3512 {
3513 	info.page_count += cache->page_count;
3514 	if (cache->type == CACHE_TYPE_RAM)
3515 		info.committed += cache->committed_size;
3516 
3517 	// recurse
3518 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3519 			VMCache* consumer = it.Next();) {
3520 		update_cache_info_recursively(consumer, info);
3521 	}
3522 }
3523 
3524 
3525 static int
3526 cache_info_compare_page_count(const void* _a, const void* _b)
3527 {
3528 	const cache_info* a = (const cache_info*)_a;
3529 	const cache_info* b = (const cache_info*)_b;
3530 	if (a->page_count == b->page_count)
3531 		return 0;
3532 	return a->page_count < b->page_count ? 1 : -1;
3533 }
3534 
3535 
3536 static int
3537 cache_info_compare_committed(const void* _a, const void* _b)
3538 {
3539 	const cache_info* a = (const cache_info*)_a;
3540 	const cache_info* b = (const cache_info*)_b;
3541 	if (a->committed == b->committed)
3542 		return 0;
3543 	return a->committed < b->committed ? 1 : -1;
3544 }
3545 
3546 
3547 static void
3548 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3549 {
3550 	for (int i = 0; i < level; i++)
3551 		kprintf("  ");
3552 
3553 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3554 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3555 		cache->virtual_base, cache->virtual_end, cache->page_count);
3556 
3557 	if (level == 0)
3558 		kprintf("/%lu", info.page_count);
3559 
3560 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3561 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3562 
3563 		if (level == 0)
3564 			kprintf("/%lu", info.committed);
3565 	}
3566 
3567 	// areas
3568 	if (cache->areas != NULL) {
3569 		VMArea* area = cache->areas;
3570 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3571 			area->name, area->address_space->ID());
3572 
3573 		while (area->cache_next != NULL) {
3574 			area = area->cache_next;
3575 			kprintf(", %" B_PRId32, area->id);
3576 		}
3577 	}
3578 
3579 	kputs("\n");
3580 
3581 	// recurse
3582 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3583 			VMCache* consumer = it.Next();) {
3584 		dump_caches_recursively(consumer, info, level + 1);
3585 	}
3586 }
3587 
3588 
3589 static int
3590 dump_caches(int argc, char** argv)
3591 {
3592 	if (sCacheInfoTable == NULL) {
3593 		kprintf("No cache info table!\n");
3594 		return 0;
3595 	}
3596 
3597 	bool sortByPageCount = true;
3598 
3599 	for (int32 i = 1; i < argc; i++) {
3600 		if (strcmp(argv[i], "-c") == 0) {
3601 			sortByPageCount = false;
3602 		} else {
3603 			print_debugger_command_usage(argv[0]);
3604 			return 0;
3605 		}
3606 	}
3607 
3608 	uint32 totalCount = 0;
3609 	uint32 rootCount = 0;
3610 	off_t totalCommitted = 0;
3611 	page_num_t totalPages = 0;
3612 
3613 	VMCache* cache = gDebugCacheList;
3614 	while (cache) {
3615 		totalCount++;
3616 		if (cache->source == NULL) {
3617 			cache_info stackInfo;
3618 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3619 				? sCacheInfoTable[rootCount] : stackInfo;
3620 			rootCount++;
3621 			info.cache = cache;
3622 			info.page_count = 0;
3623 			info.committed = 0;
3624 			update_cache_info_recursively(cache, info);
3625 			totalCommitted += info.committed;
3626 			totalPages += info.page_count;
3627 		}
3628 
3629 		cache = cache->debug_next;
3630 	}
3631 
3632 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3633 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3634 			sortByPageCount
3635 				? &cache_info_compare_page_count
3636 				: &cache_info_compare_committed);
3637 	}
3638 
3639 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3640 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3641 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3642 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3643 			"page count" : "committed size");
3644 
3645 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3646 		for (uint32 i = 0; i < rootCount; i++) {
3647 			cache_info& info = sCacheInfoTable[i];
3648 			dump_caches_recursively(info.cache, info, 0);
3649 		}
3650 	} else
3651 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3652 
3653 	return 0;
3654 }
3655 
3656 #endif	// DEBUG_CACHE_LIST
3657 
3658 
3659 static int
3660 dump_cache(int argc, char** argv)
3661 {
3662 	VMCache* cache;
3663 	bool showPages = false;
3664 	int i = 1;
3665 
3666 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3667 		kprintf("usage: %s [-ps] <address>\n"
3668 			"  if -p is specified, all pages are shown, if -s is used\n"
3669 			"  only the cache info is shown respectively.\n", argv[0]);
3670 		return 0;
3671 	}
3672 	while (argv[i][0] == '-') {
3673 		char* arg = argv[i] + 1;
3674 		while (arg[0]) {
3675 			if (arg[0] == 'p')
3676 				showPages = true;
3677 			arg++;
3678 		}
3679 		i++;
3680 	}
3681 	if (argv[i] == NULL) {
3682 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3683 		return 0;
3684 	}
3685 
3686 	addr_t address = parse_expression(argv[i]);
3687 	if (address == 0)
3688 		return 0;
3689 
3690 	cache = (VMCache*)address;
3691 
3692 	cache->Dump(showPages);
3693 
3694 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3695 
3696 	return 0;
3697 }
3698 
3699 
3700 static void
3701 dump_area_struct(VMArea* area, bool mappings)
3702 {
3703 	kprintf("AREA: %p\n", area);
3704 	kprintf("name:\t\t'%s'\n", area->name);
3705 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3706 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3707 	kprintf("base:\t\t0x%lx\n", area->Base());
3708 	kprintf("size:\t\t0x%lx\n", area->Size());
3709 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3710 	kprintf("page_protection:%p\n", area->page_protections);
3711 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3712 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3713 	kprintf("cache:\t\t%p\n", area->cache);
3714 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3715 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3716 	kprintf("cache_next:\t%p\n", area->cache_next);
3717 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3718 
3719 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3720 	if (mappings) {
3721 		kprintf("page mappings:\n");
3722 		while (iterator.HasNext()) {
3723 			vm_page_mapping* mapping = iterator.Next();
3724 			kprintf("  %p", mapping->page);
3725 		}
3726 		kprintf("\n");
3727 	} else {
3728 		uint32 count = 0;
3729 		while (iterator.Next() != NULL) {
3730 			count++;
3731 		}
3732 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3733 	}
3734 }
3735 
3736 
3737 static int
3738 dump_area(int argc, char** argv)
3739 {
3740 	bool mappings = false;
3741 	bool found = false;
3742 	int32 index = 1;
3743 	VMArea* area;
3744 	addr_t num;
3745 
3746 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3747 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3748 			"All areas matching either id/address/name are listed. You can\n"
3749 			"force to check only a specific item by prefixing the specifier\n"
3750 			"with the id/contains/address/name keywords.\n"
3751 			"-m shows the area's mappings as well.\n");
3752 		return 0;
3753 	}
3754 
3755 	if (!strcmp(argv[1], "-m")) {
3756 		mappings = true;
3757 		index++;
3758 	}
3759 
3760 	int32 mode = 0xf;
3761 	if (!strcmp(argv[index], "id"))
3762 		mode = 1;
3763 	else if (!strcmp(argv[index], "contains"))
3764 		mode = 2;
3765 	else if (!strcmp(argv[index], "name"))
3766 		mode = 4;
3767 	else if (!strcmp(argv[index], "address"))
3768 		mode = 0;
3769 	if (mode != 0xf)
3770 		index++;
3771 
3772 	if (index >= argc) {
3773 		kprintf("No area specifier given.\n");
3774 		return 0;
3775 	}
3776 
3777 	num = parse_expression(argv[index]);
3778 
3779 	if (mode == 0) {
3780 		dump_area_struct((struct VMArea*)num, mappings);
3781 	} else {
3782 		// walk through the area list, looking for the arguments as a name
3783 
3784 		VMAreasTree::Iterator it = VMAreas::GetIterator();
3785 		while ((area = it.Next()) != NULL) {
3786 			if (((mode & 4) != 0
3787 					&& !strcmp(argv[index], area->name))
3788 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3789 					|| (((mode & 2) != 0 && area->Base() <= num
3790 						&& area->Base() + area->Size() > num))))) {
3791 				dump_area_struct(area, mappings);
3792 				found = true;
3793 			}
3794 		}
3795 
3796 		if (!found)
3797 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3798 	}
3799 
3800 	return 0;
3801 }
3802 
3803 
3804 static int
3805 dump_area_list(int argc, char** argv)
3806 {
3807 	VMArea* area;
3808 	const char* name = NULL;
3809 	int32 id = 0;
3810 
3811 	if (argc > 1) {
3812 		id = parse_expression(argv[1]);
3813 		if (id == 0)
3814 			name = argv[1];
3815 	}
3816 
3817 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3818 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3819 		B_PRINTF_POINTER_WIDTH, "size");
3820 
3821 	VMAreasTree::Iterator it = VMAreas::GetIterator();
3822 	while ((area = it.Next()) != NULL) {
3823 		if ((id != 0 && area->address_space->ID() != id)
3824 			|| (name != NULL && strstr(area->name, name) == NULL))
3825 			continue;
3826 
3827 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3828 			area->id, (void*)area->Base(), (void*)area->Size(),
3829 			area->protection, area->wiring, area->name);
3830 	}
3831 	return 0;
3832 }
3833 
3834 
3835 static int
3836 dump_available_memory(int argc, char** argv)
3837 {
3838 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3839 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3840 	return 0;
3841 }
3842 
3843 
3844 static int
3845 dump_mapping_info(int argc, char** argv)
3846 {
3847 	bool reverseLookup = false;
3848 	bool pageLookup = false;
3849 
3850 	int argi = 1;
3851 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3852 		const char* arg = argv[argi];
3853 		if (strcmp(arg, "-r") == 0) {
3854 			reverseLookup = true;
3855 		} else if (strcmp(arg, "-p") == 0) {
3856 			reverseLookup = true;
3857 			pageLookup = true;
3858 		} else {
3859 			print_debugger_command_usage(argv[0]);
3860 			return 0;
3861 		}
3862 	}
3863 
3864 	// We need at least one argument, the address. Optionally a thread ID can be
3865 	// specified.
3866 	if (argi >= argc || argi + 2 < argc) {
3867 		print_debugger_command_usage(argv[0]);
3868 		return 0;
3869 	}
3870 
3871 	uint64 addressValue;
3872 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3873 		return 0;
3874 
3875 	Team* team = NULL;
3876 	if (argi < argc) {
3877 		uint64 threadID;
3878 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3879 			return 0;
3880 
3881 		Thread* thread = Thread::GetDebug(threadID);
3882 		if (thread == NULL) {
3883 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3884 			return 0;
3885 		}
3886 
3887 		team = thread->team;
3888 	}
3889 
3890 	if (reverseLookup) {
3891 		phys_addr_t physicalAddress;
3892 		if (pageLookup) {
3893 			vm_page* page = (vm_page*)(addr_t)addressValue;
3894 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3895 		} else {
3896 			physicalAddress = (phys_addr_t)addressValue;
3897 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3898 		}
3899 
3900 		kprintf("    Team     Virtual Address      Area\n");
3901 		kprintf("--------------------------------------\n");
3902 
3903 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3904 			Callback()
3905 				:
3906 				fAddressSpace(NULL)
3907 			{
3908 			}
3909 
3910 			void SetAddressSpace(VMAddressSpace* addressSpace)
3911 			{
3912 				fAddressSpace = addressSpace;
3913 			}
3914 
3915 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3916 			{
3917 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3918 					virtualAddress);
3919 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3920 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3921 				else
3922 					kprintf("\n");
3923 				return false;
3924 			}
3925 
3926 		private:
3927 			VMAddressSpace*	fAddressSpace;
3928 		} callback;
3929 
3930 		if (team != NULL) {
3931 			// team specified -- get its address space
3932 			VMAddressSpace* addressSpace = team->address_space;
3933 			if (addressSpace == NULL) {
3934 				kprintf("Failed to get address space!\n");
3935 				return 0;
3936 			}
3937 
3938 			callback.SetAddressSpace(addressSpace);
3939 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3940 				physicalAddress, callback);
3941 		} else {
3942 			// no team specified -- iterate through all address spaces
3943 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3944 				addressSpace != NULL;
3945 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3946 				callback.SetAddressSpace(addressSpace);
3947 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3948 					physicalAddress, callback);
3949 			}
3950 		}
3951 	} else {
3952 		// get the address space
3953 		addr_t virtualAddress = (addr_t)addressValue;
3954 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3955 		VMAddressSpace* addressSpace;
3956 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3957 			addressSpace = VMAddressSpace::Kernel();
3958 		} else if (team != NULL) {
3959 			addressSpace = team->address_space;
3960 		} else {
3961 			Thread* thread = debug_get_debugged_thread();
3962 			if (thread == NULL || thread->team == NULL) {
3963 				kprintf("Failed to get team!\n");
3964 				return 0;
3965 			}
3966 
3967 			addressSpace = thread->team->address_space;
3968 		}
3969 
3970 		if (addressSpace == NULL) {
3971 			kprintf("Failed to get address space!\n");
3972 			return 0;
3973 		}
3974 
3975 		// let the translation map implementation do the job
3976 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3977 	}
3978 
3979 	return 0;
3980 }
3981 
3982 
3983 /*!	Deletes all areas and reserved regions in the given address space.
3984 
3985 	The caller must ensure that none of the areas has any wired ranges.
3986 
3987 	\param addressSpace The address space.
3988 	\param deletingAddressSpace \c true, if the address space is in the process
3989 		of being deleted.
3990 */
3991 void
3992 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3993 {
3994 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3995 		addressSpace->ID()));
3996 
3997 	addressSpace->WriteLock();
3998 
3999 	// remove all reserved areas in this address space
4000 	addressSpace->UnreserveAllAddressRanges(0);
4001 
4002 	// remove all areas from the areas map at once (to avoid lock contention)
4003 	VMAreas::WriteLock();
4004 	{
4005 		VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
4006 		while (VMArea* area = it.Next()) {
4007 			VMAreas::Remove(area);
4008 			area->id = INT32_MIN;
4009 		}
4010 	}
4011 	VMAreas::WriteUnlock();
4012 
4013 	// delete all the areas in this address space
4014 	while (VMArea* area = addressSpace->FirstArea()) {
4015 		ASSERT(!area->IsWired());
4016 		delete_area(addressSpace, area, deletingAddressSpace);
4017 	}
4018 
4019 	addressSpace->WriteUnlock();
4020 }
4021 
4022 
4023 static area_id
4024 vm_area_for(addr_t address, bool kernel)
4025 {
4026 	team_id team;
4027 	if (IS_USER_ADDRESS(address)) {
4028 		// we try the user team address space, if any
4029 		team = VMAddressSpace::CurrentID();
4030 		if (team < 0)
4031 			return team;
4032 	} else
4033 		team = VMAddressSpace::KernelID();
4034 
4035 	AddressSpaceReadLocker locker(team);
4036 	if (!locker.IsLocked())
4037 		return B_BAD_TEAM_ID;
4038 
4039 	VMArea* area = locker.AddressSpace()->LookupArea(address);
4040 	if (area != NULL) {
4041 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0
4042 				&& (area->protection & B_KERNEL_AREA) != 0)
4043 			return B_ERROR;
4044 
4045 		return area->id;
4046 	}
4047 
4048 	return B_ERROR;
4049 }
4050 
4051 
4052 /*!	Frees physical pages that were used during the boot process.
4053 	\a end is inclusive.
4054 */
4055 static void
4056 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
4057 {
4058 	// free all physical pages in the specified range
4059 
4060 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
4061 		phys_addr_t physicalAddress;
4062 		uint32 flags;
4063 
4064 		if (map->Query(current, &physicalAddress, &flags) == B_OK
4065 			&& (flags & PAGE_PRESENT) != 0) {
4066 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
4067 			if (page != NULL && page->State() != PAGE_STATE_FREE
4068 					&& page->State() != PAGE_STATE_CLEAR
4069 					&& page->State() != PAGE_STATE_UNUSED) {
4070 				DEBUG_PAGE_ACCESS_START(page);
4071 				vm_page_set_state(page, PAGE_STATE_FREE);
4072 			}
4073 		}
4074 	}
4075 
4076 	// unmap the memory
4077 	map->Unmap(start, end);
4078 }
4079 
4080 
4081 void
4082 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
4083 {
4084 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
4085 	addr_t end = start + (size - 1);
4086 	addr_t lastEnd = start;
4087 
4088 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
4089 		(void*)start, (void*)end));
4090 
4091 	// The areas are sorted in virtual address space order, so
4092 	// we just have to find the holes between them that fall
4093 	// into the area we should dispose
4094 
4095 	map->Lock();
4096 
4097 	for (VMAddressSpace::AreaIterator it
4098 				= VMAddressSpace::Kernel()->GetAreaIterator();
4099 			VMArea* area = it.Next();) {
4100 		addr_t areaStart = area->Base();
4101 		addr_t areaEnd = areaStart + (area->Size() - 1);
4102 
4103 		if (areaEnd < start)
4104 			continue;
4105 
4106 		if (areaStart > end) {
4107 			// we are done, the area is already beyond of what we have to free
4108 			break;
4109 		}
4110 
4111 		if (areaStart > lastEnd) {
4112 			// this is something we can free
4113 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
4114 				(void*)areaStart));
4115 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
4116 		}
4117 
4118 		if (areaEnd >= end) {
4119 			lastEnd = areaEnd;
4120 				// no +1 to prevent potential overflow
4121 			break;
4122 		}
4123 
4124 		lastEnd = areaEnd + 1;
4125 	}
4126 
4127 	if (lastEnd < end) {
4128 		// we can also get rid of some space at the end of the area
4129 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
4130 			(void*)end));
4131 		unmap_and_free_physical_pages(map, lastEnd, end);
4132 	}
4133 
4134 	map->Unlock();
4135 }
4136 
4137 
4138 static void
4139 create_preloaded_image_areas(struct preloaded_image* _image)
4140 {
4141 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
4142 	char name[B_OS_NAME_LENGTH];
4143 	void* address;
4144 	int32 length;
4145 
4146 	// use file name to create a good area name
4147 	char* fileName = strrchr(image->name, '/');
4148 	if (fileName == NULL)
4149 		fileName = image->name;
4150 	else
4151 		fileName++;
4152 
4153 	length = strlen(fileName);
4154 	// make sure there is enough space for the suffix
4155 	if (length > 25)
4156 		length = 25;
4157 
4158 	memcpy(name, fileName, length);
4159 	strcpy(name + length, "_text");
4160 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
4161 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4162 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
4163 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4164 		// this will later be remapped read-only/executable by the
4165 		// ELF initialization code
4166 
4167 	strcpy(name + length, "_data");
4168 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
4169 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4170 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
4171 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4172 }
4173 
4174 
4175 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
4176 	Any boot loader resources contained in that arguments must not be accessed
4177 	anymore past this point.
4178 */
4179 void
4180 vm_free_kernel_args(kernel_args* args)
4181 {
4182 	uint32 i;
4183 
4184 	TRACE(("vm_free_kernel_args()\n"));
4185 
4186 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
4187 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
4188 		if (area >= B_OK)
4189 			delete_area(area);
4190 	}
4191 }
4192 
4193 
4194 static void
4195 allocate_kernel_args(kernel_args* args)
4196 {
4197 	TRACE(("allocate_kernel_args()\n"));
4198 
4199 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
4200 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
4201 
4202 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
4203 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
4204 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4205 	}
4206 }
4207 
4208 
4209 static void
4210 unreserve_boot_loader_ranges(kernel_args* args)
4211 {
4212 	TRACE(("unreserve_boot_loader_ranges()\n"));
4213 
4214 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4215 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
4216 			(void*)(addr_t)args->virtual_allocated_range[i].start,
4217 			args->virtual_allocated_range[i].size);
4218 	}
4219 }
4220 
4221 
4222 static void
4223 reserve_boot_loader_ranges(kernel_args* args)
4224 {
4225 	TRACE(("reserve_boot_loader_ranges()\n"));
4226 
4227 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4228 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
4229 
4230 		// If the address is no kernel address, we just skip it. The
4231 		// architecture specific code has to deal with it.
4232 		if (!IS_KERNEL_ADDRESS(address)) {
4233 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
4234 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
4235 			continue;
4236 		}
4237 
4238 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
4239 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
4240 		if (status < B_OK)
4241 			panic("could not reserve boot loader ranges\n");
4242 	}
4243 }
4244 
4245 
4246 static addr_t
4247 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
4248 {
4249 	size = PAGE_ALIGN(size);
4250 
4251 	// find a slot in the virtual allocation addr range
4252 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
4253 		// check to see if the space between this one and the last is big enough
4254 		addr_t rangeStart = args->virtual_allocated_range[i].start;
4255 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
4256 			+ args->virtual_allocated_range[i - 1].size;
4257 
4258 		addr_t base = alignment > 0
4259 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
4260 
4261 		if (base >= KERNEL_BASE && base < rangeStart
4262 				&& rangeStart - base >= size) {
4263 			args->virtual_allocated_range[i - 1].size
4264 				+= base + size - previousRangeEnd;
4265 			return base;
4266 		}
4267 	}
4268 
4269 	// we hadn't found one between allocation ranges. this is ok.
4270 	// see if there's a gap after the last one
4271 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
4272 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
4273 		+ args->virtual_allocated_range[lastEntryIndex].size;
4274 	addr_t base = alignment > 0
4275 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
4276 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
4277 		args->virtual_allocated_range[lastEntryIndex].size
4278 			+= base + size - lastRangeEnd;
4279 		return base;
4280 	}
4281 
4282 	// see if there's a gap before the first one
4283 	addr_t rangeStart = args->virtual_allocated_range[0].start;
4284 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
4285 		base = rangeStart - size;
4286 		if (alignment > 0)
4287 			base = ROUNDDOWN(base, alignment);
4288 
4289 		if (base >= KERNEL_BASE) {
4290 			args->virtual_allocated_range[0].start = base;
4291 			args->virtual_allocated_range[0].size += rangeStart - base;
4292 			return base;
4293 		}
4294 	}
4295 
4296 	return 0;
4297 }
4298 
4299 
4300 static bool
4301 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
4302 {
4303 	// TODO: horrible brute-force method of determining if the page can be
4304 	// allocated
4305 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4306 		if (address >= args->physical_memory_range[i].start
4307 			&& address < args->physical_memory_range[i].start
4308 				+ args->physical_memory_range[i].size)
4309 			return true;
4310 	}
4311 	return false;
4312 }
4313 
4314 
4315 page_num_t
4316 vm_allocate_early_physical_page(kernel_args* args)
4317 {
4318 	if (args->num_physical_allocated_ranges == 0) {
4319 		panic("early physical page allocations no longer possible!");
4320 		return 0;
4321 	}
4322 
4323 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4324 		phys_addr_t nextPage;
4325 
4326 		nextPage = args->physical_allocated_range[i].start
4327 			+ args->physical_allocated_range[i].size;
4328 		// see if the page after the next allocated paddr run can be allocated
4329 		if (i + 1 < args->num_physical_allocated_ranges
4330 			&& args->physical_allocated_range[i + 1].size != 0) {
4331 			// see if the next page will collide with the next allocated range
4332 			if (nextPage >= args->physical_allocated_range[i+1].start)
4333 				continue;
4334 		}
4335 		// see if the next physical page fits in the memory block
4336 		if (is_page_in_physical_memory_range(args, nextPage)) {
4337 			// we got one!
4338 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4339 			return nextPage / B_PAGE_SIZE;
4340 		}
4341 	}
4342 
4343 	// Expanding upwards didn't work, try going downwards.
4344 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4345 		phys_addr_t nextPage;
4346 
4347 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
4348 		// see if the page after the prev allocated paddr run can be allocated
4349 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
4350 			// see if the next page will collide with the next allocated range
4351 			if (nextPage < args->physical_allocated_range[i-1].start
4352 				+ args->physical_allocated_range[i-1].size)
4353 				continue;
4354 		}
4355 		// see if the next physical page fits in the memory block
4356 		if (is_page_in_physical_memory_range(args, nextPage)) {
4357 			// we got one!
4358 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
4359 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4360 			return nextPage / B_PAGE_SIZE;
4361 		}
4362 	}
4363 
4364 	return 0;
4365 		// could not allocate a block
4366 }
4367 
4368 
4369 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4370 	allocate some pages before the VM is completely up.
4371 */
4372 addr_t
4373 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4374 	uint32 attributes, addr_t alignment)
4375 {
4376 	if (physicalSize > virtualSize)
4377 		physicalSize = virtualSize;
4378 
4379 	// find the vaddr to allocate at
4380 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
4381 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
4382 	if (virtualBase == 0) {
4383 		panic("vm_allocate_early: could not allocate virtual address\n");
4384 		return 0;
4385 	}
4386 
4387 	// map the pages
4388 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4389 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
4390 		if (physicalAddress == 0)
4391 			panic("error allocating early page!\n");
4392 
4393 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4394 
4395 		status_t status = arch_vm_translation_map_early_map(args,
4396 			virtualBase + i * B_PAGE_SIZE,
4397 			physicalAddress * B_PAGE_SIZE, attributes,
4398 			&vm_allocate_early_physical_page);
4399 		if (status != B_OK)
4400 			panic("error mapping early page!");
4401 	}
4402 
4403 	return virtualBase;
4404 }
4405 
4406 
4407 /*!	The main entrance point to initialize the VM. */
4408 status_t
4409 vm_init(kernel_args* args)
4410 {
4411 	struct preloaded_image* image;
4412 	void* address;
4413 	status_t err = 0;
4414 	uint32 i;
4415 
4416 	TRACE(("vm_init: entry\n"));
4417 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
4418 	err = arch_vm_init(args);
4419 
4420 	// initialize some globals
4421 	vm_page_init_num_pages(args);
4422 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4423 
4424 	slab_init(args);
4425 
4426 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4427 	off_t heapSize = INITIAL_HEAP_SIZE;
4428 	// try to accomodate low memory systems
4429 	while (heapSize > sAvailableMemory / 8)
4430 		heapSize /= 2;
4431 	if (heapSize < 1024 * 1024)
4432 		panic("vm_init: go buy some RAM please.");
4433 
4434 	// map in the new heap and initialize it
4435 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4436 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
4437 	TRACE(("heap at 0x%lx\n", heapBase));
4438 	heap_init(heapBase, heapSize);
4439 #endif
4440 
4441 	// initialize the free page list and physical page mapper
4442 	vm_page_init(args);
4443 
4444 	// initialize the cache allocators
4445 	vm_cache_init(args);
4446 
4447 	{
4448 		status_t error = VMAreas::Init();
4449 		if (error != B_OK)
4450 			panic("vm_init: error initializing areas map\n");
4451 	}
4452 
4453 	VMAddressSpace::Init();
4454 	reserve_boot_loader_ranges(args);
4455 
4456 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4457 	heap_init_post_area();
4458 #endif
4459 
4460 	// Do any further initialization that the architecture dependant layers may
4461 	// need now
4462 	arch_vm_translation_map_init_post_area(args);
4463 	arch_vm_init_post_area(args);
4464 	vm_page_init_post_area(args);
4465 	slab_init_post_area();
4466 
4467 	// allocate areas to represent stuff that already exists
4468 
4469 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4470 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4471 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4472 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4473 #endif
4474 
4475 	allocate_kernel_args(args);
4476 
4477 	create_preloaded_image_areas(args->kernel_image);
4478 
4479 	// allocate areas for preloaded images
4480 	for (image = args->preloaded_images; image != NULL; image = image->next)
4481 		create_preloaded_image_areas(image);
4482 
4483 	// allocate kernel stacks
4484 	for (i = 0; i < args->num_cpus; i++) {
4485 		char name[64];
4486 
4487 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4488 		address = (void*)args->cpu_kstack[i].start;
4489 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4490 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4491 	}
4492 
4493 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4494 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4495 
4496 #if PARANOID_KERNEL_MALLOC
4497 	vm_block_address_range("uninitialized heap memory",
4498 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4499 #endif
4500 #if PARANOID_KERNEL_FREE
4501 	vm_block_address_range("freed heap memory",
4502 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4503 #endif
4504 
4505 	create_page_mappings_object_caches();
4506 
4507 #if DEBUG_CACHE_LIST
4508 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4509 		virtual_address_restrictions virtualRestrictions = {};
4510 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4511 		physical_address_restrictions physicalRestrictions = {};
4512 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4513 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4514 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4515 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4516 			&physicalRestrictions, (void**)&sCacheInfoTable);
4517 	}
4518 #endif	// DEBUG_CACHE_LIST
4519 
4520 	// add some debugger commands
4521 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4522 	add_debugger_command("area", &dump_area,
4523 		"Dump info about a particular area");
4524 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4525 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4526 #if DEBUG_CACHE_LIST
4527 	if (sCacheInfoTable != NULL) {
4528 		add_debugger_command_etc("caches", &dump_caches,
4529 			"List all VMCache trees",
4530 			"[ \"-c\" ]\n"
4531 			"All cache trees are listed sorted in decreasing order by number "
4532 				"of\n"
4533 			"used pages or, if \"-c\" is specified, by size of committed "
4534 				"memory.\n",
4535 			0);
4536 	}
4537 #endif
4538 	add_debugger_command("avail", &dump_available_memory,
4539 		"Dump available memory");
4540 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4541 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4542 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4543 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4544 	add_debugger_command("string", &display_mem, "dump strings");
4545 
4546 	add_debugger_command_etc("mapping", &dump_mapping_info,
4547 		"Print address mapping information",
4548 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4549 		"Prints low-level page mapping information for a given address. If\n"
4550 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4551 		"address that is looked up in the translation map of the current\n"
4552 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4553 		"\"-r\" is specified, <address> is a physical address that is\n"
4554 		"searched in the translation map of all teams, respectively the team\n"
4555 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4556 		"<address> is the address of a vm_page structure. The behavior is\n"
4557 		"equivalent to specifying \"-r\" with the physical address of that\n"
4558 		"page.\n",
4559 		0);
4560 
4561 	TRACE(("vm_init: exit\n"));
4562 
4563 	vm_cache_init_post_heap();
4564 
4565 	return err;
4566 }
4567 
4568 
4569 status_t
4570 vm_init_post_sem(kernel_args* args)
4571 {
4572 	// This frees all unused boot loader resources and makes its space available
4573 	// again
4574 	arch_vm_init_end(args);
4575 	unreserve_boot_loader_ranges(args);
4576 
4577 	// fill in all of the semaphores that were not allocated before
4578 	// since we're still single threaded and only the kernel address space
4579 	// exists, it isn't that hard to find all of the ones we need to create
4580 
4581 	arch_vm_translation_map_init_post_sem(args);
4582 
4583 	slab_init_post_sem();
4584 
4585 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4586 	heap_init_post_sem();
4587 #endif
4588 
4589 	return B_OK;
4590 }
4591 
4592 
4593 status_t
4594 vm_init_post_thread(kernel_args* args)
4595 {
4596 	vm_page_init_post_thread(args);
4597 	slab_init_post_thread();
4598 	return heap_init_post_thread();
4599 }
4600 
4601 
4602 status_t
4603 vm_init_post_modules(kernel_args* args)
4604 {
4605 	return arch_vm_init_post_modules(args);
4606 }
4607 
4608 
4609 void
4610 permit_page_faults(void)
4611 {
4612 	Thread* thread = thread_get_current_thread();
4613 	if (thread != NULL)
4614 		atomic_add(&thread->page_faults_allowed, 1);
4615 }
4616 
4617 
4618 void
4619 forbid_page_faults(void)
4620 {
4621 	Thread* thread = thread_get_current_thread();
4622 	if (thread != NULL)
4623 		atomic_add(&thread->page_faults_allowed, -1);
4624 }
4625 
4626 
4627 status_t
4628 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4629 	bool isUser, addr_t* newIP)
4630 {
4631 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4632 		faultAddress));
4633 
4634 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4635 
4636 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4637 	VMAddressSpace* addressSpace = NULL;
4638 
4639 	status_t status = B_OK;
4640 	*newIP = 0;
4641 	atomic_add((int32*)&sPageFaults, 1);
4642 
4643 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4644 		addressSpace = VMAddressSpace::GetKernel();
4645 	} else if (IS_USER_ADDRESS(pageAddress)) {
4646 		addressSpace = VMAddressSpace::GetCurrent();
4647 		if (addressSpace == NULL) {
4648 			if (!isUser) {
4649 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4650 					"memory!\n");
4651 				status = B_BAD_ADDRESS;
4652 				TPF(PageFaultError(-1,
4653 					VMPageFaultTracing
4654 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4655 			} else {
4656 				// XXX weird state.
4657 				panic("vm_page_fault: non kernel thread accessing user memory "
4658 					"that doesn't exist!\n");
4659 				status = B_BAD_ADDRESS;
4660 			}
4661 		}
4662 	} else {
4663 		// the hit was probably in the 64k DMZ between kernel and user space
4664 		// this keeps a user space thread from passing a buffer that crosses
4665 		// into kernel space
4666 		status = B_BAD_ADDRESS;
4667 		TPF(PageFaultError(-1,
4668 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4669 	}
4670 
4671 	if (status == B_OK) {
4672 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4673 			isUser, NULL);
4674 	}
4675 
4676 	if (status < B_OK) {
4677 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4678 			"0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n",
4679 			strerror(status), address, faultAddress, isWrite, isUser, isExecute,
4680 			thread_get_current_thread_id());
4681 		if (!isUser) {
4682 			Thread* thread = thread_get_current_thread();
4683 			if (thread != NULL && thread->fault_handler != 0) {
4684 				// this will cause the arch dependant page fault handler to
4685 				// modify the IP on the interrupt frame or whatever to return
4686 				// to this address
4687 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4688 			} else {
4689 				// unhandled page fault in the kernel
4690 				panic("vm_page_fault: unhandled page fault in kernel space at "
4691 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4692 			}
4693 		} else {
4694 			Thread* thread = thread_get_current_thread();
4695 
4696 #ifdef TRACE_FAULTS
4697 			VMArea* area = NULL;
4698 			if (addressSpace != NULL) {
4699 				addressSpace->ReadLock();
4700 				area = addressSpace->LookupArea(faultAddress);
4701 			}
4702 
4703 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4704 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4705 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4706 				thread->team->Name(), thread->team->id,
4707 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4708 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4709 					area->Base() : 0x0));
4710 
4711 			if (addressSpace != NULL)
4712 				addressSpace->ReadUnlock();
4713 #endif
4714 
4715 			// If the thread has a signal handler for SIGSEGV, we simply
4716 			// send it the signal. Otherwise we notify the user debugger
4717 			// first.
4718 			struct sigaction action;
4719 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4720 					&& action.sa_handler != SIG_DFL
4721 					&& action.sa_handler != SIG_IGN)
4722 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4723 					SIGSEGV)) {
4724 				Signal signal(SIGSEGV,
4725 					status == B_PERMISSION_DENIED
4726 						? SEGV_ACCERR : SEGV_MAPERR,
4727 					EFAULT, thread->team->id);
4728 				signal.SetAddress((void*)address);
4729 				send_signal_to_thread(thread, signal, 0);
4730 			}
4731 		}
4732 	}
4733 
4734 	if (addressSpace != NULL)
4735 		addressSpace->Put();
4736 
4737 	return B_HANDLED_INTERRUPT;
4738 }
4739 
4740 
4741 struct PageFaultContext {
4742 	AddressSpaceReadLocker	addressSpaceLocker;
4743 	VMCacheChainLocker		cacheChainLocker;
4744 
4745 	VMTranslationMap*		map;
4746 	VMCache*				topCache;
4747 	off_t					cacheOffset;
4748 	vm_page_reservation		reservation;
4749 	bool					isWrite;
4750 
4751 	// return values
4752 	vm_page*				page;
4753 	bool					restart;
4754 	bool					pageAllocated;
4755 
4756 
4757 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4758 		:
4759 		addressSpaceLocker(addressSpace, true),
4760 		map(addressSpace->TranslationMap()),
4761 		isWrite(isWrite)
4762 	{
4763 	}
4764 
4765 	~PageFaultContext()
4766 	{
4767 		UnlockAll();
4768 		vm_page_unreserve_pages(&reservation);
4769 	}
4770 
4771 	void Prepare(VMCache* topCache, off_t cacheOffset)
4772 	{
4773 		this->topCache = topCache;
4774 		this->cacheOffset = cacheOffset;
4775 		page = NULL;
4776 		restart = false;
4777 		pageAllocated = false;
4778 
4779 		cacheChainLocker.SetTo(topCache);
4780 	}
4781 
4782 	void UnlockAll(VMCache* exceptCache = NULL)
4783 	{
4784 		topCache = NULL;
4785 		addressSpaceLocker.Unlock();
4786 		cacheChainLocker.Unlock(exceptCache);
4787 	}
4788 };
4789 
4790 
4791 /*!	Gets the page that should be mapped into the area.
4792 	Returns an error code other than \c B_OK, if the page couldn't be found or
4793 	paged in. The locking state of the address space and the caches is undefined
4794 	in that case.
4795 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4796 	had to unlock the address space and all caches and is supposed to be called
4797 	again.
4798 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4799 	found. It is returned in \c context.page. The address space will still be
4800 	locked as well as all caches starting from the top cache to at least the
4801 	cache the page lives in.
4802 */
4803 static status_t
4804 fault_get_page(PageFaultContext& context)
4805 {
4806 	VMCache* cache = context.topCache;
4807 	VMCache* lastCache = NULL;
4808 	vm_page* page = NULL;
4809 
4810 	while (cache != NULL) {
4811 		// We already hold the lock of the cache at this point.
4812 
4813 		lastCache = cache;
4814 
4815 		page = cache->LookupPage(context.cacheOffset);
4816 		if (page != NULL && page->busy) {
4817 			// page must be busy -- wait for it to become unbusy
4818 			context.UnlockAll(cache);
4819 			cache->ReleaseRefLocked();
4820 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4821 
4822 			// restart the whole process
4823 			context.restart = true;
4824 			return B_OK;
4825 		}
4826 
4827 		if (page != NULL)
4828 			break;
4829 
4830 		// The current cache does not contain the page we're looking for.
4831 
4832 		// see if the backing store has it
4833 		if (cache->HasPage(context.cacheOffset)) {
4834 			// insert a fresh page and mark it busy -- we're going to read it in
4835 			page = vm_page_allocate_page(&context.reservation,
4836 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4837 			cache->InsertPage(page, context.cacheOffset);
4838 
4839 			// We need to unlock all caches and the address space while reading
4840 			// the page in. Keep a reference to the cache around.
4841 			cache->AcquireRefLocked();
4842 			context.UnlockAll();
4843 
4844 			// read the page in
4845 			generic_io_vec vec;
4846 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4847 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4848 
4849 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4850 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4851 
4852 			cache->Lock();
4853 
4854 			if (status < B_OK) {
4855 				// on error remove and free the page
4856 				dprintf("reading page from cache %p returned: %s!\n",
4857 					cache, strerror(status));
4858 
4859 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4860 				cache->RemovePage(page);
4861 				vm_page_set_state(page, PAGE_STATE_FREE);
4862 
4863 				cache->ReleaseRefAndUnlock();
4864 				return status;
4865 			}
4866 
4867 			// mark the page unbusy again
4868 			cache->MarkPageUnbusy(page);
4869 
4870 			DEBUG_PAGE_ACCESS_END(page);
4871 
4872 			// Since we needed to unlock everything temporarily, the area
4873 			// situation might have changed. So we need to restart the whole
4874 			// process.
4875 			cache->ReleaseRefAndUnlock();
4876 			context.restart = true;
4877 			return B_OK;
4878 		}
4879 
4880 		cache = context.cacheChainLocker.LockSourceCache();
4881 	}
4882 
4883 	if (page == NULL) {
4884 		// There was no adequate page, determine the cache for a clean one.
4885 		// Read-only pages come in the deepest cache, only the top most cache
4886 		// may have direct write access.
4887 		cache = context.isWrite ? context.topCache : lastCache;
4888 
4889 		// allocate a clean page
4890 		page = vm_page_allocate_page(&context.reservation,
4891 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4892 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4893 			page->physical_page_number));
4894 
4895 		// insert the new page into our cache
4896 		cache->InsertPage(page, context.cacheOffset);
4897 		context.pageAllocated = true;
4898 	} else if (page->Cache() != context.topCache && context.isWrite) {
4899 		// We have a page that has the data we want, but in the wrong cache
4900 		// object so we need to copy it and stick it into the top cache.
4901 		vm_page* sourcePage = page;
4902 
4903 		// TODO: If memory is low, it might be a good idea to steal the page
4904 		// from our source cache -- if possible, that is.
4905 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4906 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4907 
4908 		// To not needlessly kill concurrency we unlock all caches but the top
4909 		// one while copying the page. Lacking another mechanism to ensure that
4910 		// the source page doesn't disappear, we mark it busy.
4911 		sourcePage->busy = true;
4912 		context.cacheChainLocker.UnlockKeepRefs(true);
4913 
4914 		// copy the page
4915 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4916 			sourcePage->physical_page_number * B_PAGE_SIZE);
4917 
4918 		context.cacheChainLocker.RelockCaches(true);
4919 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4920 
4921 		// insert the new page into our cache
4922 		context.topCache->InsertPage(page, context.cacheOffset);
4923 		context.pageAllocated = true;
4924 	} else
4925 		DEBUG_PAGE_ACCESS_START(page);
4926 
4927 	context.page = page;
4928 	return B_OK;
4929 }
4930 
4931 
4932 /*!	Makes sure the address in the given address space is mapped.
4933 
4934 	\param addressSpace The address space.
4935 	\param originalAddress The address. Doesn't need to be page aligned.
4936 	\param isWrite If \c true the address shall be write-accessible.
4937 	\param isUser If \c true the access is requested by a userland team.
4938 	\param wirePage On success, if non \c NULL, the wired count of the page
4939 		mapped at the given address is incremented and the page is returned
4940 		via this parameter.
4941 	\return \c B_OK on success, another error code otherwise.
4942 */
4943 static status_t
4944 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4945 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4946 {
4947 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4948 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4949 		originalAddress, isWrite, isUser));
4950 
4951 	PageFaultContext context(addressSpace, isWrite);
4952 
4953 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4954 	status_t status = B_OK;
4955 
4956 	addressSpace->IncrementFaultCount();
4957 
4958 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4959 	// the pages upfront makes sure we don't have any cache locked, so that the
4960 	// page daemon/thief can do their job without problems.
4961 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4962 		originalAddress);
4963 	context.addressSpaceLocker.Unlock();
4964 	vm_page_reserve_pages(&context.reservation, reservePages,
4965 		addressSpace == VMAddressSpace::Kernel()
4966 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4967 
4968 	while (true) {
4969 		context.addressSpaceLocker.Lock();
4970 
4971 		// get the area the fault was in
4972 		VMArea* area = addressSpace->LookupArea(address);
4973 		if (area == NULL) {
4974 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4975 				"space\n", originalAddress);
4976 			TPF(PageFaultError(-1,
4977 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4978 			status = B_BAD_ADDRESS;
4979 			break;
4980 		}
4981 
4982 		// check permissions
4983 		uint32 protection = get_area_page_protection(area, address);
4984 		if (isUser && (protection & B_USER_PROTECTION) == 0
4985 				&& (area->protection & B_KERNEL_AREA) != 0) {
4986 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4987 				area->id, (void*)originalAddress);
4988 			TPF(PageFaultError(area->id,
4989 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4990 			status = B_PERMISSION_DENIED;
4991 			break;
4992 		}
4993 		if (isWrite && (protection
4994 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4995 			dprintf("write access attempted on write-protected area 0x%"
4996 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4997 			TPF(PageFaultError(area->id,
4998 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4999 			status = B_PERMISSION_DENIED;
5000 			break;
5001 		} else if (isExecute && (protection
5002 				& (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
5003 			dprintf("instruction fetch attempted on execute-protected area 0x%"
5004 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
5005 			TPF(PageFaultError(area->id,
5006 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
5007 			status = B_PERMISSION_DENIED;
5008 			break;
5009 		} else if (!isWrite && !isExecute && (protection
5010 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
5011 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
5012 				" at %p\n", area->id, (void*)originalAddress);
5013 			TPF(PageFaultError(area->id,
5014 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
5015 			status = B_PERMISSION_DENIED;
5016 			break;
5017 		}
5018 
5019 		// We have the area, it was a valid access, so let's try to resolve the
5020 		// page fault now.
5021 		// At first, the top most cache from the area is investigated.
5022 
5023 		context.Prepare(vm_area_get_locked_cache(area),
5024 			address - area->Base() + area->cache_offset);
5025 
5026 		// See if this cache has a fault handler -- this will do all the work
5027 		// for us.
5028 		{
5029 			// Note, since the page fault is resolved with interrupts enabled,
5030 			// the fault handler could be called more than once for the same
5031 			// reason -- the store must take this into account.
5032 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
5033 			if (status != B_BAD_HANDLER)
5034 				break;
5035 		}
5036 
5037 		// The top most cache has no fault handler, so let's see if the cache or
5038 		// its sources already have the page we're searching for (we're going
5039 		// from top to bottom).
5040 		status = fault_get_page(context);
5041 		if (status != B_OK) {
5042 			TPF(PageFaultError(area->id, status));
5043 			break;
5044 		}
5045 
5046 		if (context.restart)
5047 			continue;
5048 
5049 		// All went fine, all there is left to do is to map the page into the
5050 		// address space.
5051 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
5052 			context.page));
5053 
5054 		// If the page doesn't reside in the area's cache, we need to make sure
5055 		// it's mapped in read-only, so that we cannot overwrite someone else's
5056 		// data (copy-on-write)
5057 		uint32 newProtection = protection;
5058 		if (context.page->Cache() != context.topCache && !isWrite)
5059 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
5060 
5061 		bool unmapPage = false;
5062 		bool mapPage = true;
5063 
5064 		// check whether there's already a page mapped at the address
5065 		context.map->Lock();
5066 
5067 		phys_addr_t physicalAddress;
5068 		uint32 flags;
5069 		vm_page* mappedPage = NULL;
5070 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
5071 			&& (flags & PAGE_PRESENT) != 0
5072 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5073 				!= NULL) {
5074 			// Yep there's already a page. If it's ours, we can simply adjust
5075 			// its protection. Otherwise we have to unmap it.
5076 			if (mappedPage == context.page) {
5077 				context.map->ProtectPage(area, address, newProtection);
5078 					// Note: We assume that ProtectPage() is atomic (i.e.
5079 					// the page isn't temporarily unmapped), otherwise we'd have
5080 					// to make sure it isn't wired.
5081 				mapPage = false;
5082 			} else
5083 				unmapPage = true;
5084 		}
5085 
5086 		context.map->Unlock();
5087 
5088 		if (unmapPage) {
5089 			// If the page is wired, we can't unmap it. Wait until it is unwired
5090 			// again and restart. Note that the page cannot be wired for
5091 			// writing, since it it isn't in the topmost cache. So we can safely
5092 			// ignore ranges wired for writing (our own and other concurrent
5093 			// wiring attempts in progress) and in fact have to do that to avoid
5094 			// a deadlock.
5095 			VMAreaUnwiredWaiter waiter;
5096 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
5097 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
5098 				// unlock everything and wait
5099 				if (context.pageAllocated) {
5100 					// ... but since we allocated a page and inserted it into
5101 					// the top cache, remove and free it first. Otherwise we'd
5102 					// have a page from a lower cache mapped while an upper
5103 					// cache has a page that would shadow it.
5104 					context.topCache->RemovePage(context.page);
5105 					vm_page_free_etc(context.topCache, context.page,
5106 						&context.reservation);
5107 				} else
5108 					DEBUG_PAGE_ACCESS_END(context.page);
5109 
5110 				context.UnlockAll();
5111 				waiter.waitEntry.Wait();
5112 				continue;
5113 			}
5114 
5115 			// Note: The mapped page is a page of a lower cache. We are
5116 			// guaranteed to have that cached locked, our new page is a copy of
5117 			// that page, and the page is not busy. The logic for that guarantee
5118 			// is as follows: Since the page is mapped, it must live in the top
5119 			// cache (ruled out above) or any of its lower caches, and there is
5120 			// (was before the new page was inserted) no other page in any
5121 			// cache between the top cache and the page's cache (otherwise that
5122 			// would be mapped instead). That in turn means that our algorithm
5123 			// must have found it and therefore it cannot be busy either.
5124 			DEBUG_PAGE_ACCESS_START(mappedPage);
5125 			unmap_page(area, address);
5126 			DEBUG_PAGE_ACCESS_END(mappedPage);
5127 		}
5128 
5129 		if (mapPage) {
5130 			if (map_page(area, context.page, address, newProtection,
5131 					&context.reservation) != B_OK) {
5132 				// Mapping can only fail, when the page mapping object couldn't
5133 				// be allocated. Save for the missing mapping everything is
5134 				// fine, though. If this was a regular page fault, we'll simply
5135 				// leave and probably fault again. To make sure we'll have more
5136 				// luck then, we ensure that the minimum object reserve is
5137 				// available.
5138 				DEBUG_PAGE_ACCESS_END(context.page);
5139 
5140 				context.UnlockAll();
5141 
5142 				if (object_cache_reserve(page_mapping_object_cache_for(
5143 							context.page->physical_page_number), 1, 0)
5144 						!= B_OK) {
5145 					// Apparently the situation is serious. Let's get ourselves
5146 					// killed.
5147 					status = B_NO_MEMORY;
5148 				} else if (wirePage != NULL) {
5149 					// The caller expects us to wire the page. Since
5150 					// object_cache_reserve() succeeded, we should now be able
5151 					// to allocate a mapping structure. Restart.
5152 					continue;
5153 				}
5154 
5155 				break;
5156 			}
5157 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
5158 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
5159 
5160 		// also wire the page, if requested
5161 		if (wirePage != NULL && status == B_OK) {
5162 			increment_page_wired_count(context.page);
5163 			*wirePage = context.page;
5164 		}
5165 
5166 		DEBUG_PAGE_ACCESS_END(context.page);
5167 
5168 		break;
5169 	}
5170 
5171 	return status;
5172 }
5173 
5174 
5175 status_t
5176 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
5177 {
5178 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
5179 }
5180 
5181 status_t
5182 vm_put_physical_page(addr_t vaddr, void* handle)
5183 {
5184 	return sPhysicalPageMapper->PutPage(vaddr, handle);
5185 }
5186 
5187 
5188 status_t
5189 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
5190 	void** _handle)
5191 {
5192 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
5193 }
5194 
5195 status_t
5196 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
5197 {
5198 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
5199 }
5200 
5201 
5202 status_t
5203 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
5204 {
5205 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
5206 }
5207 
5208 status_t
5209 vm_put_physical_page_debug(addr_t vaddr, void* handle)
5210 {
5211 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
5212 }
5213 
5214 
5215 void
5216 vm_get_info(system_info* info)
5217 {
5218 	swap_get_info(info);
5219 
5220 	MutexLocker locker(sAvailableMemoryLock);
5221 	info->needed_memory = sNeededMemory;
5222 	info->free_memory = sAvailableMemory;
5223 }
5224 
5225 
5226 uint32
5227 vm_num_page_faults(void)
5228 {
5229 	return sPageFaults;
5230 }
5231 
5232 
5233 off_t
5234 vm_available_memory(void)
5235 {
5236 	MutexLocker locker(sAvailableMemoryLock);
5237 	return sAvailableMemory;
5238 }
5239 
5240 
5241 off_t
5242 vm_available_not_needed_memory(void)
5243 {
5244 	MutexLocker locker(sAvailableMemoryLock);
5245 	return sAvailableMemory - sNeededMemory;
5246 }
5247 
5248 
5249 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
5250 	debugger.
5251 */
5252 off_t
5253 vm_available_not_needed_memory_debug(void)
5254 {
5255 	return sAvailableMemory - sNeededMemory;
5256 }
5257 
5258 
5259 size_t
5260 vm_kernel_address_space_left(void)
5261 {
5262 	return VMAddressSpace::Kernel()->FreeSpace();
5263 }
5264 
5265 
5266 void
5267 vm_unreserve_memory(size_t amount)
5268 {
5269 	mutex_lock(&sAvailableMemoryLock);
5270 
5271 	sAvailableMemory += amount;
5272 
5273 	mutex_unlock(&sAvailableMemoryLock);
5274 }
5275 
5276 
5277 status_t
5278 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
5279 {
5280 	size_t reserve = kMemoryReserveForPriority[priority];
5281 
5282 	MutexLocker locker(sAvailableMemoryLock);
5283 
5284 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5285 
5286 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
5287 		sAvailableMemory -= amount;
5288 		return B_OK;
5289 	}
5290 
5291 	if (amount >= (vm_page_num_pages() * B_PAGE_SIZE)) {
5292 		// Do not wait for something that will never happen.
5293 		return B_NO_MEMORY;
5294 	}
5295 
5296 	if (timeout <= 0)
5297 		return B_NO_MEMORY;
5298 
5299 	// turn timeout into an absolute timeout
5300 	timeout += system_time();
5301 
5302 	// loop until we've got the memory or the timeout occurs
5303 	do {
5304 		sNeededMemory += amount;
5305 
5306 		// call the low resource manager
5307 		locker.Unlock();
5308 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5309 			B_ABSOLUTE_TIMEOUT, timeout);
5310 		locker.Lock();
5311 
5312 		sNeededMemory -= amount;
5313 
5314 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
5315 			sAvailableMemory -= amount;
5316 			return B_OK;
5317 		}
5318 	} while (timeout > system_time());
5319 
5320 	return B_NO_MEMORY;
5321 }
5322 
5323 
5324 status_t
5325 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
5326 {
5327 	// NOTE: The caller is responsible for synchronizing calls to this function!
5328 
5329 	AddressSpaceReadLocker locker;
5330 	VMArea* area;
5331 	status_t status = locker.SetFromArea(id, area);
5332 	if (status != B_OK)
5333 		return status;
5334 
5335 	// nothing to do, if the type doesn't change
5336 	uint32 oldType = area->MemoryType();
5337 	if (type == oldType)
5338 		return B_OK;
5339 
5340 	// set the memory type of the area and the mapped pages
5341 	VMTranslationMap* map = area->address_space->TranslationMap();
5342 	map->Lock();
5343 	area->SetMemoryType(type);
5344 	map->ProtectArea(area, area->protection);
5345 	map->Unlock();
5346 
5347 	// set the physical memory type
5348 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
5349 	if (error != B_OK) {
5350 		// reset the memory type of the area and the mapped pages
5351 		map->Lock();
5352 		area->SetMemoryType(oldType);
5353 		map->ProtectArea(area, area->protection);
5354 		map->Unlock();
5355 		return error;
5356 	}
5357 
5358 	return B_OK;
5359 
5360 }
5361 
5362 
5363 /*!	This function enforces some protection properties:
5364 	 - kernel areas must be W^X (after kernel startup)
5365 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
5366 	 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5367 */
5368 static void
5369 fix_protection(uint32* protection)
5370 {
5371 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
5372 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
5373 			|| (*protection & B_WRITE_AREA) != 0)
5374 		&& !gKernelStartup)
5375 		panic("kernel areas cannot be both writable and executable!");
5376 
5377 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5378 		if ((*protection & B_WRITE_AREA) != 0)
5379 			*protection |= B_KERNEL_WRITE_AREA;
5380 		if ((*protection & B_READ_AREA) != 0)
5381 			*protection |= B_KERNEL_READ_AREA;
5382 	}
5383 }
5384 
5385 
5386 static void
5387 fill_area_info(struct VMArea* area, area_info* info, size_t size)
5388 {
5389 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5390 	info->area = area->id;
5391 	info->address = (void*)area->Base();
5392 	info->size = area->Size();
5393 	info->protection = area->protection;
5394 	info->lock = area->wiring;
5395 	info->team = area->address_space->ID();
5396 	info->copy_count = 0;
5397 	info->in_count = 0;
5398 	info->out_count = 0;
5399 		// TODO: retrieve real values here!
5400 
5401 	VMCache* cache = vm_area_get_locked_cache(area);
5402 
5403 	// Note, this is a simplification; the cache could be larger than this area
5404 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5405 
5406 	vm_area_put_locked_cache(cache);
5407 }
5408 
5409 
5410 static status_t
5411 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5412 {
5413 	// is newSize a multiple of B_PAGE_SIZE?
5414 	if (newSize & (B_PAGE_SIZE - 1))
5415 		return B_BAD_VALUE;
5416 
5417 	// lock all affected address spaces and the cache
5418 	VMArea* area;
5419 	VMCache* cache;
5420 
5421 	MultiAddressSpaceLocker locker;
5422 	AreaCacheLocker cacheLocker;
5423 
5424 	status_t status;
5425 	size_t oldSize;
5426 	bool anyKernelArea;
5427 	bool restart;
5428 
5429 	do {
5430 		anyKernelArea = false;
5431 		restart = false;
5432 
5433 		locker.Unset();
5434 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5435 		if (status != B_OK)
5436 			return status;
5437 		cacheLocker.SetTo(cache, true);	// already locked
5438 
5439 		// enforce restrictions
5440 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
5441 				|| (area->protection & B_KERNEL_AREA) != 0)) {
5442 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5443 				"resize kernel area %" B_PRId32 " (%s)\n",
5444 				team_get_current_team_id(), areaID, area->name);
5445 			return B_NOT_ALLOWED;
5446 		}
5447 		// TODO: Enforce all restrictions (team, etc.)!
5448 
5449 		oldSize = area->Size();
5450 		if (newSize == oldSize)
5451 			return B_OK;
5452 
5453 		if (cache->type != CACHE_TYPE_RAM)
5454 			return B_NOT_ALLOWED;
5455 
5456 		if (oldSize < newSize) {
5457 			// We need to check if all areas of this cache can be resized.
5458 			for (VMArea* current = cache->areas; current != NULL;
5459 					current = current->cache_next) {
5460 				if (!current->address_space->CanResizeArea(current, newSize))
5461 					return B_ERROR;
5462 				anyKernelArea
5463 					|= current->address_space == VMAddressSpace::Kernel();
5464 			}
5465 		} else {
5466 			// We're shrinking the areas, so we must make sure the affected
5467 			// ranges are not wired.
5468 			for (VMArea* current = cache->areas; current != NULL;
5469 					current = current->cache_next) {
5470 				anyKernelArea
5471 					|= current->address_space == VMAddressSpace::Kernel();
5472 
5473 				if (wait_if_area_range_is_wired(current,
5474 						current->Base() + newSize, oldSize - newSize, &locker,
5475 						&cacheLocker)) {
5476 					restart = true;
5477 					break;
5478 				}
5479 			}
5480 		}
5481 	} while (restart);
5482 
5483 	// Okay, looks good so far, so let's do it
5484 
5485 	int priority = kernel && anyKernelArea
5486 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5487 	uint32 allocationFlags = kernel && anyKernelArea
5488 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5489 
5490 	if (oldSize < newSize) {
5491 		// Growing the cache can fail, so we do it first.
5492 		status = cache->Resize(cache->virtual_base + newSize, priority);
5493 		if (status != B_OK)
5494 			return status;
5495 	}
5496 
5497 	for (VMArea* current = cache->areas; current != NULL;
5498 			current = current->cache_next) {
5499 		status = current->address_space->ResizeArea(current, newSize,
5500 			allocationFlags);
5501 		if (status != B_OK)
5502 			break;
5503 
5504 		// We also need to unmap all pages beyond the new size, if the area has
5505 		// shrunk
5506 		if (newSize < oldSize) {
5507 			VMCacheChainLocker cacheChainLocker(cache);
5508 			cacheChainLocker.LockAllSourceCaches();
5509 
5510 			unmap_pages(current, current->Base() + newSize,
5511 				oldSize - newSize);
5512 
5513 			cacheChainLocker.Unlock(cache);
5514 		}
5515 	}
5516 
5517 	if (status == B_OK) {
5518 		// Shrink or grow individual page protections if in use.
5519 		if (area->page_protections != NULL) {
5520 			size_t bytes = area_page_protections_size(newSize);
5521 			uint8* newProtections
5522 				= (uint8*)realloc(area->page_protections, bytes);
5523 			if (newProtections == NULL)
5524 				status = B_NO_MEMORY;
5525 			else {
5526 				area->page_protections = newProtections;
5527 
5528 				if (oldSize < newSize) {
5529 					// init the additional page protections to that of the area
5530 					uint32 offset = area_page_protections_size(oldSize);
5531 					uint32 areaProtection = area->protection
5532 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5533 					memset(area->page_protections + offset,
5534 						areaProtection | (areaProtection << 4), bytes - offset);
5535 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5536 						uint8& entry = area->page_protections[offset - 1];
5537 						entry = (entry & 0x0f) | (areaProtection << 4);
5538 					}
5539 				}
5540 			}
5541 		}
5542 	}
5543 
5544 	// shrinking the cache can't fail, so we do it now
5545 	if (status == B_OK && newSize < oldSize)
5546 		status = cache->Resize(cache->virtual_base + newSize, priority);
5547 
5548 	if (status != B_OK) {
5549 		// Something failed -- resize the areas back to their original size.
5550 		// This can fail, too, in which case we're seriously screwed.
5551 		for (VMArea* current = cache->areas; current != NULL;
5552 				current = current->cache_next) {
5553 			if (current->address_space->ResizeArea(current, oldSize,
5554 					allocationFlags) != B_OK) {
5555 				panic("vm_resize_area(): Failed and not being able to restore "
5556 					"original state.");
5557 			}
5558 		}
5559 
5560 		cache->Resize(cache->virtual_base + oldSize, priority);
5561 	}
5562 
5563 	// TODO: we must honour the lock restrictions of this area
5564 	return status;
5565 }
5566 
5567 
5568 status_t
5569 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5570 {
5571 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5572 }
5573 
5574 
5575 status_t
5576 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5577 {
5578 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5579 }
5580 
5581 
5582 status_t
5583 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5584 	bool user)
5585 {
5586 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5587 }
5588 
5589 
5590 void
5591 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5592 {
5593 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5594 }
5595 
5596 
5597 /*!	Copies a range of memory directly from/to a page that might not be mapped
5598 	at the moment.
5599 
5600 	For \a unsafeMemory the current mapping (if any is ignored). The function
5601 	walks through the respective area's cache chain to find the physical page
5602 	and copies from/to it directly.
5603 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5604 	must not cross a page boundary.
5605 
5606 	\param teamID The team ID identifying the address space \a unsafeMemory is
5607 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5608 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5609 		is passed, the address space of the thread returned by
5610 		debug_get_debugged_thread() is used.
5611 	\param unsafeMemory The start of the unsafe memory range to be copied
5612 		from/to.
5613 	\param buffer A safely accessible kernel buffer to be copied from/to.
5614 	\param size The number of bytes to be copied.
5615 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5616 		\a unsafeMemory, the other way around otherwise.
5617 */
5618 status_t
5619 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5620 	size_t size, bool copyToUnsafe)
5621 {
5622 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5623 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5624 		return B_BAD_VALUE;
5625 	}
5626 
5627 	// get the address space for the debugged thread
5628 	VMAddressSpace* addressSpace;
5629 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5630 		addressSpace = VMAddressSpace::Kernel();
5631 	} else if (teamID == B_CURRENT_TEAM) {
5632 		Thread* thread = debug_get_debugged_thread();
5633 		if (thread == NULL || thread->team == NULL)
5634 			return B_BAD_ADDRESS;
5635 
5636 		addressSpace = thread->team->address_space;
5637 	} else
5638 		addressSpace = VMAddressSpace::DebugGet(teamID);
5639 
5640 	if (addressSpace == NULL)
5641 		return B_BAD_ADDRESS;
5642 
5643 	// get the area
5644 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5645 	if (area == NULL)
5646 		return B_BAD_ADDRESS;
5647 
5648 	// search the page
5649 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5650 		+ area->cache_offset;
5651 	VMCache* cache = area->cache;
5652 	vm_page* page = NULL;
5653 	while (cache != NULL) {
5654 		page = cache->DebugLookupPage(cacheOffset);
5655 		if (page != NULL)
5656 			break;
5657 
5658 		// Page not found in this cache -- if it is paged out, we must not try
5659 		// to get it from lower caches.
5660 		if (cache->DebugHasPage(cacheOffset))
5661 			break;
5662 
5663 		cache = cache->source;
5664 	}
5665 
5666 	if (page == NULL)
5667 		return B_UNSUPPORTED;
5668 
5669 	// copy from/to physical memory
5670 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5671 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5672 
5673 	if (copyToUnsafe) {
5674 		if (page->Cache() != area->cache)
5675 			return B_UNSUPPORTED;
5676 
5677 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5678 	}
5679 
5680 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5681 }
5682 
5683 
5684 /** Validate that a memory range is either fully in kernel space, or fully in
5685  *  userspace */
5686 static inline bool
5687 validate_memory_range(const void* addr, size_t size)
5688 {
5689 	addr_t address = (addr_t)addr;
5690 
5691 	// Check for overflows on all addresses.
5692 	if ((address + size) < address)
5693 		return false;
5694 
5695 	// Validate that the address range does not cross the kernel/user boundary.
5696 	return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1);
5697 }
5698 
5699 
5700 //	#pragma mark - kernel public API
5701 
5702 
5703 status_t
5704 user_memcpy(void* to, const void* from, size_t size)
5705 {
5706 	if (!validate_memory_range(to, size) || !validate_memory_range(from, size))
5707 		return B_BAD_ADDRESS;
5708 
5709 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5710 		return B_BAD_ADDRESS;
5711 
5712 	return B_OK;
5713 }
5714 
5715 
5716 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5717 	the string in \a to, NULL-terminating the result.
5718 
5719 	\param to Pointer to the destination C-string.
5720 	\param from Pointer to the source C-string.
5721 	\param size Size in bytes of the string buffer pointed to by \a to.
5722 
5723 	\return strlen(\a from).
5724 */
5725 ssize_t
5726 user_strlcpy(char* to, const char* from, size_t size)
5727 {
5728 	if (to == NULL && size != 0)
5729 		return B_BAD_VALUE;
5730 	if (from == NULL)
5731 		return B_BAD_ADDRESS;
5732 
5733 	// Protect the source address from overflows.
5734 	size_t maxSize = size;
5735 	if ((addr_t)from + maxSize < (addr_t)from)
5736 		maxSize -= (addr_t)from + maxSize;
5737 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5738 		maxSize = USER_TOP - (addr_t)from;
5739 
5740 	if (!validate_memory_range(to, maxSize))
5741 		return B_BAD_ADDRESS;
5742 
5743 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5744 	if (result < 0)
5745 		return result;
5746 
5747 	// If we hit the address overflow boundary, fail.
5748 	if ((size_t)result >= maxSize && maxSize < size)
5749 		return B_BAD_ADDRESS;
5750 
5751 	return result;
5752 }
5753 
5754 
5755 status_t
5756 user_memset(void* s, char c, size_t count)
5757 {
5758 	if (!validate_memory_range(s, count))
5759 		return B_BAD_ADDRESS;
5760 
5761 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5762 		return B_BAD_ADDRESS;
5763 
5764 	return B_OK;
5765 }
5766 
5767 
5768 /*!	Wires a single page at the given address.
5769 
5770 	\param team The team whose address space the address belongs to. Supports
5771 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5772 		parameter is ignored.
5773 	\param address address The virtual address to wire down. Does not need to
5774 		be page aligned.
5775 	\param writable If \c true the page shall be writable.
5776 	\param info On success the info is filled in, among other things
5777 		containing the physical address the given virtual one translates to.
5778 	\return \c B_OK, when the page could be wired, another error code otherwise.
5779 */
5780 status_t
5781 vm_wire_page(team_id team, addr_t address, bool writable,
5782 	VMPageWiringInfo* info)
5783 {
5784 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5785 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5786 
5787 	// compute the page protection that is required
5788 	bool isUser = IS_USER_ADDRESS(address);
5789 	uint32 requiredProtection = PAGE_PRESENT
5790 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5791 	if (writable)
5792 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5793 
5794 	// get and read lock the address space
5795 	VMAddressSpace* addressSpace = NULL;
5796 	if (isUser) {
5797 		if (team == B_CURRENT_TEAM)
5798 			addressSpace = VMAddressSpace::GetCurrent();
5799 		else
5800 			addressSpace = VMAddressSpace::Get(team);
5801 	} else
5802 		addressSpace = VMAddressSpace::GetKernel();
5803 	if (addressSpace == NULL)
5804 		return B_ERROR;
5805 
5806 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5807 
5808 	VMTranslationMap* map = addressSpace->TranslationMap();
5809 	status_t error = B_OK;
5810 
5811 	// get the area
5812 	VMArea* area = addressSpace->LookupArea(pageAddress);
5813 	if (area == NULL) {
5814 		addressSpace->Put();
5815 		return B_BAD_ADDRESS;
5816 	}
5817 
5818 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5819 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5820 
5821 	// mark the area range wired
5822 	area->Wire(&info->range);
5823 
5824 	// Lock the area's cache chain and the translation map. Needed to look
5825 	// up the page and play with its wired count.
5826 	cacheChainLocker.LockAllSourceCaches();
5827 	map->Lock();
5828 
5829 	phys_addr_t physicalAddress;
5830 	uint32 flags;
5831 	vm_page* page;
5832 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5833 		&& (flags & requiredProtection) == requiredProtection
5834 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5835 			!= NULL) {
5836 		// Already mapped with the correct permissions -- just increment
5837 		// the page's wired count.
5838 		increment_page_wired_count(page);
5839 
5840 		map->Unlock();
5841 		cacheChainLocker.Unlock();
5842 		addressSpaceLocker.Unlock();
5843 	} else {
5844 		// Let vm_soft_fault() map the page for us, if possible. We need
5845 		// to fully unlock to avoid deadlocks. Since we have already
5846 		// wired the area itself, nothing disturbing will happen with it
5847 		// in the meantime.
5848 		map->Unlock();
5849 		cacheChainLocker.Unlock();
5850 		addressSpaceLocker.Unlock();
5851 
5852 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5853 			isUser, &page);
5854 
5855 		if (error != B_OK) {
5856 			// The page could not be mapped -- clean up.
5857 			VMCache* cache = vm_area_get_locked_cache(area);
5858 			area->Unwire(&info->range);
5859 			cache->ReleaseRefAndUnlock();
5860 			addressSpace->Put();
5861 			return error;
5862 		}
5863 	}
5864 
5865 	info->physicalAddress
5866 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5867 			+ address % B_PAGE_SIZE;
5868 	info->page = page;
5869 
5870 	return B_OK;
5871 }
5872 
5873 
5874 /*!	Unwires a single page previously wired via vm_wire_page().
5875 
5876 	\param info The same object passed to vm_wire_page() before.
5877 */
5878 void
5879 vm_unwire_page(VMPageWiringInfo* info)
5880 {
5881 	// lock the address space
5882 	VMArea* area = info->range.area;
5883 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5884 		// takes over our reference
5885 
5886 	// lock the top cache
5887 	VMCache* cache = vm_area_get_locked_cache(area);
5888 	VMCacheChainLocker cacheChainLocker(cache);
5889 
5890 	if (info->page->Cache() != cache) {
5891 		// The page is not in the top cache, so we lock the whole cache chain
5892 		// before touching the page's wired count.
5893 		cacheChainLocker.LockAllSourceCaches();
5894 	}
5895 
5896 	decrement_page_wired_count(info->page);
5897 
5898 	// remove the wired range from the range
5899 	area->Unwire(&info->range);
5900 
5901 	cacheChainLocker.Unlock();
5902 }
5903 
5904 
5905 /*!	Wires down the given address range in the specified team's address space.
5906 
5907 	If successful the function
5908 	- acquires a reference to the specified team's address space,
5909 	- adds respective wired ranges to all areas that intersect with the given
5910 	  address range,
5911 	- makes sure all pages in the given address range are mapped with the
5912 	  requested access permissions and increments their wired count.
5913 
5914 	It fails, when \a team doesn't specify a valid address space, when any part
5915 	of the specified address range is not covered by areas, when the concerned
5916 	areas don't allow mapping with the requested permissions, or when mapping
5917 	failed for another reason.
5918 
5919 	When successful the call must be balanced by a unlock_memory_etc() call with
5920 	the exact same parameters.
5921 
5922 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5923 		supported.
5924 	\param address The start of the address range to be wired.
5925 	\param numBytes The size of the address range to be wired.
5926 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5927 		requests that the range must be wired writable ("read from device
5928 		into memory").
5929 	\return \c B_OK on success, another error code otherwise.
5930 */
5931 status_t
5932 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5933 {
5934 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5935 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5936 
5937 	// compute the page protection that is required
5938 	bool isUser = IS_USER_ADDRESS(address);
5939 	bool writable = (flags & B_READ_DEVICE) == 0;
5940 	uint32 requiredProtection = PAGE_PRESENT
5941 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5942 	if (writable)
5943 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5944 
5945 	uint32 mallocFlags = isUser
5946 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5947 
5948 	// get and read lock the address space
5949 	VMAddressSpace* addressSpace = NULL;
5950 	if (isUser) {
5951 		if (team == B_CURRENT_TEAM)
5952 			addressSpace = VMAddressSpace::GetCurrent();
5953 		else
5954 			addressSpace = VMAddressSpace::Get(team);
5955 	} else
5956 		addressSpace = VMAddressSpace::GetKernel();
5957 	if (addressSpace == NULL)
5958 		return B_ERROR;
5959 
5960 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5961 		// We get a new address space reference here. The one we got above will
5962 		// be freed by unlock_memory_etc().
5963 
5964 	VMTranslationMap* map = addressSpace->TranslationMap();
5965 	status_t error = B_OK;
5966 
5967 	// iterate through all concerned areas
5968 	addr_t nextAddress = lockBaseAddress;
5969 	while (nextAddress != lockEndAddress) {
5970 		// get the next area
5971 		VMArea* area = addressSpace->LookupArea(nextAddress);
5972 		if (area == NULL) {
5973 			error = B_BAD_ADDRESS;
5974 			break;
5975 		}
5976 
5977 		addr_t areaStart = nextAddress;
5978 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5979 
5980 		// allocate the wired range (do that before locking the cache to avoid
5981 		// deadlocks)
5982 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5983 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5984 		if (range == NULL) {
5985 			error = B_NO_MEMORY;
5986 			break;
5987 		}
5988 
5989 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5990 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5991 
5992 		// mark the area range wired
5993 		area->Wire(range);
5994 
5995 		// Depending on the area cache type and the wiring, we may not need to
5996 		// look at the individual pages.
5997 		if (area->cache_type == CACHE_TYPE_NULL
5998 			|| area->cache_type == CACHE_TYPE_DEVICE
5999 			|| area->wiring == B_FULL_LOCK
6000 			|| area->wiring == B_CONTIGUOUS) {
6001 			nextAddress = areaEnd;
6002 			continue;
6003 		}
6004 
6005 		// Lock the area's cache chain and the translation map. Needed to look
6006 		// up pages and play with their wired count.
6007 		cacheChainLocker.LockAllSourceCaches();
6008 		map->Lock();
6009 
6010 		// iterate through the pages and wire them
6011 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
6012 			phys_addr_t physicalAddress;
6013 			uint32 flags;
6014 
6015 			vm_page* page;
6016 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
6017 				&& (flags & requiredProtection) == requiredProtection
6018 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
6019 					!= NULL) {
6020 				// Already mapped with the correct permissions -- just increment
6021 				// the page's wired count.
6022 				increment_page_wired_count(page);
6023 			} else {
6024 				// Let vm_soft_fault() map the page for us, if possible. We need
6025 				// to fully unlock to avoid deadlocks. Since we have already
6026 				// wired the area itself, nothing disturbing will happen with it
6027 				// in the meantime.
6028 				map->Unlock();
6029 				cacheChainLocker.Unlock();
6030 				addressSpaceLocker.Unlock();
6031 
6032 				error = vm_soft_fault(addressSpace, nextAddress, writable,
6033 					false, isUser, &page);
6034 
6035 				addressSpaceLocker.Lock();
6036 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
6037 				cacheChainLocker.LockAllSourceCaches();
6038 				map->Lock();
6039 			}
6040 
6041 			if (error != B_OK)
6042 				break;
6043 		}
6044 
6045 		map->Unlock();
6046 
6047 		if (error == B_OK) {
6048 			cacheChainLocker.Unlock();
6049 		} else {
6050 			// An error occurred, so abort right here. If the current address
6051 			// is the first in this area, unwire the area, since we won't get
6052 			// to it when reverting what we've done so far.
6053 			if (nextAddress == areaStart) {
6054 				area->Unwire(range);
6055 				cacheChainLocker.Unlock();
6056 				range->~VMAreaWiredRange();
6057 				free_etc(range, mallocFlags);
6058 			} else
6059 				cacheChainLocker.Unlock();
6060 
6061 			break;
6062 		}
6063 	}
6064 
6065 	if (error != B_OK) {
6066 		// An error occurred, so unwire all that we've already wired. Note that
6067 		// even if not a single page was wired, unlock_memory_etc() is called
6068 		// to put the address space reference.
6069 		addressSpaceLocker.Unlock();
6070 		unlock_memory_etc(team, (void*)lockBaseAddress,
6071 			nextAddress - lockBaseAddress, flags);
6072 	}
6073 
6074 	return error;
6075 }
6076 
6077 
6078 status_t
6079 lock_memory(void* address, size_t numBytes, uint32 flags)
6080 {
6081 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
6082 }
6083 
6084 
6085 /*!	Unwires an address range previously wired with lock_memory_etc().
6086 
6087 	Note that a call to this function must balance a previous lock_memory_etc()
6088 	call with exactly the same parameters.
6089 */
6090 status_t
6091 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
6092 {
6093 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
6094 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
6095 
6096 	// compute the page protection that is required
6097 	bool isUser = IS_USER_ADDRESS(address);
6098 	bool writable = (flags & B_READ_DEVICE) == 0;
6099 	uint32 requiredProtection = PAGE_PRESENT
6100 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
6101 	if (writable)
6102 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
6103 
6104 	uint32 mallocFlags = isUser
6105 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
6106 
6107 	// get and read lock the address space
6108 	VMAddressSpace* addressSpace = NULL;
6109 	if (isUser) {
6110 		if (team == B_CURRENT_TEAM)
6111 			addressSpace = VMAddressSpace::GetCurrent();
6112 		else
6113 			addressSpace = VMAddressSpace::Get(team);
6114 	} else
6115 		addressSpace = VMAddressSpace::GetKernel();
6116 	if (addressSpace == NULL)
6117 		return B_ERROR;
6118 
6119 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
6120 		// Take over the address space reference. We don't unlock until we're
6121 		// done.
6122 
6123 	VMTranslationMap* map = addressSpace->TranslationMap();
6124 	status_t error = B_OK;
6125 
6126 	// iterate through all concerned areas
6127 	addr_t nextAddress = lockBaseAddress;
6128 	while (nextAddress != lockEndAddress) {
6129 		// get the next area
6130 		VMArea* area = addressSpace->LookupArea(nextAddress);
6131 		if (area == NULL) {
6132 			error = B_BAD_ADDRESS;
6133 			break;
6134 		}
6135 
6136 		addr_t areaStart = nextAddress;
6137 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
6138 
6139 		// Lock the area's top cache. This is a requirement for
6140 		// VMArea::Unwire().
6141 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
6142 
6143 		// Depending on the area cache type and the wiring, we may not need to
6144 		// look at the individual pages.
6145 		if (area->cache_type == CACHE_TYPE_NULL
6146 			|| area->cache_type == CACHE_TYPE_DEVICE
6147 			|| area->wiring == B_FULL_LOCK
6148 			|| area->wiring == B_CONTIGUOUS) {
6149 			// unwire the range (to avoid deadlocks we delete the range after
6150 			// unlocking the cache)
6151 			nextAddress = areaEnd;
6152 			VMAreaWiredRange* range = area->Unwire(areaStart,
6153 				areaEnd - areaStart, writable);
6154 			cacheChainLocker.Unlock();
6155 			if (range != NULL) {
6156 				range->~VMAreaWiredRange();
6157 				free_etc(range, mallocFlags);
6158 			}
6159 			continue;
6160 		}
6161 
6162 		// Lock the area's cache chain and the translation map. Needed to look
6163 		// up pages and play with their wired count.
6164 		cacheChainLocker.LockAllSourceCaches();
6165 		map->Lock();
6166 
6167 		// iterate through the pages and unwire them
6168 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
6169 			phys_addr_t physicalAddress;
6170 			uint32 flags;
6171 
6172 			vm_page* page;
6173 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
6174 				&& (flags & PAGE_PRESENT) != 0
6175 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
6176 					!= NULL) {
6177 				// Already mapped with the correct permissions -- just increment
6178 				// the page's wired count.
6179 				decrement_page_wired_count(page);
6180 			} else {
6181 				panic("unlock_memory_etc(): Failed to unwire page: address "
6182 					"space %p, address: %#" B_PRIxADDR, addressSpace,
6183 					nextAddress);
6184 				error = B_BAD_VALUE;
6185 				break;
6186 			}
6187 		}
6188 
6189 		map->Unlock();
6190 
6191 		// All pages are unwired. Remove the area's wired range as well (to
6192 		// avoid deadlocks we delete the range after unlocking the cache).
6193 		VMAreaWiredRange* range = area->Unwire(areaStart,
6194 			areaEnd - areaStart, writable);
6195 
6196 		cacheChainLocker.Unlock();
6197 
6198 		if (range != NULL) {
6199 			range->~VMAreaWiredRange();
6200 			free_etc(range, mallocFlags);
6201 		}
6202 
6203 		if (error != B_OK)
6204 			break;
6205 	}
6206 
6207 	// get rid of the address space reference lock_memory_etc() acquired
6208 	addressSpace->Put();
6209 
6210 	return error;
6211 }
6212 
6213 
6214 status_t
6215 unlock_memory(void* address, size_t numBytes, uint32 flags)
6216 {
6217 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
6218 }
6219 
6220 
6221 /*!	Similar to get_memory_map(), but also allows to specify the address space
6222 	for the memory in question and has a saner semantics.
6223 	Returns \c B_OK when the complete range could be translated or
6224 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
6225 	case the actual number of entries is written to \c *_numEntries. Any other
6226 	error case indicates complete failure; \c *_numEntries will be set to \c 0
6227 	in this case.
6228 */
6229 status_t
6230 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
6231 	physical_entry* table, uint32* _numEntries)
6232 {
6233 	uint32 numEntries = *_numEntries;
6234 	*_numEntries = 0;
6235 
6236 	VMAddressSpace* addressSpace;
6237 	addr_t virtualAddress = (addr_t)address;
6238 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
6239 	phys_addr_t physicalAddress;
6240 	status_t status = B_OK;
6241 	int32 index = -1;
6242 	addr_t offset = 0;
6243 	bool interrupts = are_interrupts_enabled();
6244 
6245 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
6246 		"entries)\n", team, address, numBytes, numEntries));
6247 
6248 	if (numEntries == 0 || numBytes == 0)
6249 		return B_BAD_VALUE;
6250 
6251 	// in which address space is the address to be found?
6252 	if (IS_USER_ADDRESS(virtualAddress)) {
6253 		if (team == B_CURRENT_TEAM)
6254 			addressSpace = VMAddressSpace::GetCurrent();
6255 		else
6256 			addressSpace = VMAddressSpace::Get(team);
6257 	} else
6258 		addressSpace = VMAddressSpace::GetKernel();
6259 
6260 	if (addressSpace == NULL)
6261 		return B_ERROR;
6262 
6263 	VMTranslationMap* map = addressSpace->TranslationMap();
6264 
6265 	if (interrupts)
6266 		map->Lock();
6267 
6268 	while (offset < numBytes) {
6269 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
6270 		uint32 flags;
6271 
6272 		if (interrupts) {
6273 			status = map->Query((addr_t)address + offset, &physicalAddress,
6274 				&flags);
6275 		} else {
6276 			status = map->QueryInterrupt((addr_t)address + offset,
6277 				&physicalAddress, &flags);
6278 		}
6279 		if (status < B_OK)
6280 			break;
6281 		if ((flags & PAGE_PRESENT) == 0) {
6282 			panic("get_memory_map() called on unmapped memory!");
6283 			return B_BAD_ADDRESS;
6284 		}
6285 
6286 		if (index < 0 && pageOffset > 0) {
6287 			physicalAddress += pageOffset;
6288 			if (bytes > B_PAGE_SIZE - pageOffset)
6289 				bytes = B_PAGE_SIZE - pageOffset;
6290 		}
6291 
6292 		// need to switch to the next physical_entry?
6293 		if (index < 0 || table[index].address
6294 				!= physicalAddress - table[index].size) {
6295 			if ((uint32)++index + 1 > numEntries) {
6296 				// table to small
6297 				break;
6298 			}
6299 			table[index].address = physicalAddress;
6300 			table[index].size = bytes;
6301 		} else {
6302 			// page does fit in current entry
6303 			table[index].size += bytes;
6304 		}
6305 
6306 		offset += bytes;
6307 	}
6308 
6309 	if (interrupts)
6310 		map->Unlock();
6311 
6312 	if (status != B_OK)
6313 		return status;
6314 
6315 	if ((uint32)index + 1 > numEntries) {
6316 		*_numEntries = index;
6317 		return B_BUFFER_OVERFLOW;
6318 	}
6319 
6320 	*_numEntries = index + 1;
6321 	return B_OK;
6322 }
6323 
6324 
6325 /*!	According to the BeBook, this function should always succeed.
6326 	This is no longer the case.
6327 */
6328 extern "C" int32
6329 __get_memory_map_haiku(const void* address, size_t numBytes,
6330 	physical_entry* table, int32 numEntries)
6331 {
6332 	uint32 entriesRead = numEntries;
6333 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
6334 		table, &entriesRead);
6335 	if (error != B_OK)
6336 		return error;
6337 
6338 	// close the entry list
6339 
6340 	// if it's only one entry, we will silently accept the missing ending
6341 	if (numEntries == 1)
6342 		return B_OK;
6343 
6344 	if (entriesRead + 1 > (uint32)numEntries)
6345 		return B_BUFFER_OVERFLOW;
6346 
6347 	table[entriesRead].address = 0;
6348 	table[entriesRead].size = 0;
6349 
6350 	return B_OK;
6351 }
6352 
6353 
6354 area_id
6355 area_for(void* address)
6356 {
6357 	return vm_area_for((addr_t)address, true);
6358 }
6359 
6360 
6361 area_id
6362 find_area(const char* name)
6363 {
6364 	return VMAreas::Find(name);
6365 }
6366 
6367 
6368 status_t
6369 _get_area_info(area_id id, area_info* info, size_t size)
6370 {
6371 	if (size != sizeof(area_info) || info == NULL)
6372 		return B_BAD_VALUE;
6373 
6374 	AddressSpaceReadLocker locker;
6375 	VMArea* area;
6376 	status_t status = locker.SetFromArea(id, area);
6377 	if (status != B_OK)
6378 		return status;
6379 
6380 	fill_area_info(area, info, size);
6381 	return B_OK;
6382 }
6383 
6384 
6385 status_t
6386 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
6387 {
6388 	addr_t nextBase = *(addr_t*)cookie;
6389 
6390 	// we're already through the list
6391 	if (nextBase == (addr_t)-1)
6392 		return B_ENTRY_NOT_FOUND;
6393 
6394 	if (team == B_CURRENT_TEAM)
6395 		team = team_get_current_team_id();
6396 
6397 	AddressSpaceReadLocker locker(team);
6398 	if (!locker.IsLocked())
6399 		return B_BAD_TEAM_ID;
6400 
6401 	VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
6402 	if (area == NULL) {
6403 		nextBase = (addr_t)-1;
6404 		return B_ENTRY_NOT_FOUND;
6405 	}
6406 
6407 	fill_area_info(area, info, size);
6408 	*cookie = (ssize_t)(area->Base() + 1);
6409 
6410 	return B_OK;
6411 }
6412 
6413 
6414 status_t
6415 set_area_protection(area_id area, uint32 newProtection)
6416 {
6417 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6418 		newProtection, true);
6419 }
6420 
6421 
6422 status_t
6423 resize_area(area_id areaID, size_t newSize)
6424 {
6425 	return vm_resize_area(areaID, newSize, true);
6426 }
6427 
6428 
6429 /*!	Transfers the specified area to a new team. The caller must be the owner
6430 	of the area.
6431 */
6432 area_id
6433 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6434 	bool kernel)
6435 {
6436 	area_info info;
6437 	status_t status = get_area_info(id, &info);
6438 	if (status != B_OK)
6439 		return status;
6440 
6441 	if (!kernel && info.team != thread_get_current_thread()->team->id)
6442 		return B_PERMISSION_DENIED;
6443 
6444 	// We need to mark the area cloneable so the following operations work.
6445 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
6446 	if (status != B_OK)
6447 		return status;
6448 
6449 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6450 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6451 	if (clonedArea < 0)
6452 		return clonedArea;
6453 
6454 	status = vm_delete_area(info.team, id, kernel);
6455 	if (status != B_OK) {
6456 		vm_delete_area(target, clonedArea, kernel);
6457 		return status;
6458 	}
6459 
6460 	// Now we can reset the protection to whatever it was before.
6461 	set_area_protection(clonedArea, info.protection);
6462 
6463 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6464 
6465 	return clonedArea;
6466 }
6467 
6468 
6469 extern "C" area_id
6470 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6471 	size_t numBytes, uint32 addressSpec, uint32 protection,
6472 	void** _virtualAddress)
6473 {
6474 	if (!arch_vm_supports_protection(protection))
6475 		return B_NOT_SUPPORTED;
6476 
6477 	fix_protection(&protection);
6478 
6479 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6480 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6481 		false);
6482 }
6483 
6484 
6485 area_id
6486 clone_area(const char* name, void** _address, uint32 addressSpec,
6487 	uint32 protection, area_id source)
6488 {
6489 	if ((protection & B_KERNEL_PROTECTION) == 0)
6490 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6491 
6492 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6493 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6494 }
6495 
6496 
6497 area_id
6498 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6499 	uint32 protection, uint32 flags, uint32 guardSize,
6500 	const virtual_address_restrictions* virtualAddressRestrictions,
6501 	const physical_address_restrictions* physicalAddressRestrictions,
6502 	void** _address)
6503 {
6504 	fix_protection(&protection);
6505 
6506 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6507 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6508 		true, _address);
6509 }
6510 
6511 
6512 extern "C" area_id
6513 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6514 	size_t size, uint32 lock, uint32 protection)
6515 {
6516 	fix_protection(&protection);
6517 
6518 	virtual_address_restrictions virtualRestrictions = {};
6519 	virtualRestrictions.address = *_address;
6520 	virtualRestrictions.address_specification = addressSpec;
6521 	physical_address_restrictions physicalRestrictions = {};
6522 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6523 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6524 		true, _address);
6525 }
6526 
6527 
6528 status_t
6529 delete_area(area_id area)
6530 {
6531 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6532 }
6533 
6534 
6535 //	#pragma mark - Userland syscalls
6536 
6537 
6538 status_t
6539 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6540 	addr_t size)
6541 {
6542 	// filter out some unavailable values (for userland)
6543 	switch (addressSpec) {
6544 		case B_ANY_KERNEL_ADDRESS:
6545 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6546 			return B_BAD_VALUE;
6547 	}
6548 
6549 	addr_t address;
6550 
6551 	if (!IS_USER_ADDRESS(userAddress)
6552 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6553 		return B_BAD_ADDRESS;
6554 
6555 	status_t status = vm_reserve_address_range(
6556 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6557 		RESERVED_AVOID_BASE);
6558 	if (status != B_OK)
6559 		return status;
6560 
6561 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6562 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6563 			(void*)address, size);
6564 		return B_BAD_ADDRESS;
6565 	}
6566 
6567 	return B_OK;
6568 }
6569 
6570 
6571 status_t
6572 _user_unreserve_address_range(addr_t address, addr_t size)
6573 {
6574 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6575 		(void*)address, size);
6576 }
6577 
6578 
6579 area_id
6580 _user_area_for(void* address)
6581 {
6582 	return vm_area_for((addr_t)address, false);
6583 }
6584 
6585 
6586 area_id
6587 _user_find_area(const char* userName)
6588 {
6589 	char name[B_OS_NAME_LENGTH];
6590 
6591 	if (!IS_USER_ADDRESS(userName)
6592 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6593 		return B_BAD_ADDRESS;
6594 
6595 	return find_area(name);
6596 }
6597 
6598 
6599 status_t
6600 _user_get_area_info(area_id area, area_info* userInfo)
6601 {
6602 	if (!IS_USER_ADDRESS(userInfo))
6603 		return B_BAD_ADDRESS;
6604 
6605 	area_info info;
6606 	status_t status = get_area_info(area, &info);
6607 	if (status < B_OK)
6608 		return status;
6609 
6610 	// TODO: do we want to prevent userland from seeing kernel protections?
6611 	//info.protection &= B_USER_PROTECTION;
6612 
6613 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6614 		return B_BAD_ADDRESS;
6615 
6616 	return status;
6617 }
6618 
6619 
6620 status_t
6621 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6622 {
6623 	ssize_t cookie;
6624 
6625 	if (!IS_USER_ADDRESS(userCookie)
6626 		|| !IS_USER_ADDRESS(userInfo)
6627 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6628 		return B_BAD_ADDRESS;
6629 
6630 	area_info info;
6631 	status_t status = _get_next_area_info(team, &cookie, &info,
6632 		sizeof(area_info));
6633 	if (status != B_OK)
6634 		return status;
6635 
6636 	//info.protection &= B_USER_PROTECTION;
6637 
6638 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6639 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6640 		return B_BAD_ADDRESS;
6641 
6642 	return status;
6643 }
6644 
6645 
6646 status_t
6647 _user_set_area_protection(area_id area, uint32 newProtection)
6648 {
6649 	if ((newProtection & ~(B_USER_PROTECTION | B_CLONEABLE_AREA)) != 0)
6650 		return B_BAD_VALUE;
6651 
6652 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6653 		newProtection, false);
6654 }
6655 
6656 
6657 status_t
6658 _user_resize_area(area_id area, size_t newSize)
6659 {
6660 	// TODO: Since we restrict deleting of areas to those owned by the team,
6661 	// we should also do that for resizing (check other functions, too).
6662 	return vm_resize_area(area, newSize, false);
6663 }
6664 
6665 
6666 area_id
6667 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6668 	team_id target)
6669 {
6670 	// filter out some unavailable values (for userland)
6671 	switch (addressSpec) {
6672 		case B_ANY_KERNEL_ADDRESS:
6673 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6674 			return B_BAD_VALUE;
6675 	}
6676 
6677 	void* address;
6678 	if (!IS_USER_ADDRESS(userAddress)
6679 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6680 		return B_BAD_ADDRESS;
6681 
6682 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6683 	if (newArea < B_OK)
6684 		return newArea;
6685 
6686 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6687 		return B_BAD_ADDRESS;
6688 
6689 	return newArea;
6690 }
6691 
6692 
6693 area_id
6694 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6695 	uint32 protection, area_id sourceArea)
6696 {
6697 	char name[B_OS_NAME_LENGTH];
6698 	void* address;
6699 
6700 	// filter out some unavailable values (for userland)
6701 	switch (addressSpec) {
6702 		case B_ANY_KERNEL_ADDRESS:
6703 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6704 			return B_BAD_VALUE;
6705 	}
6706 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6707 		return B_BAD_VALUE;
6708 
6709 	if (!IS_USER_ADDRESS(userName)
6710 		|| !IS_USER_ADDRESS(userAddress)
6711 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6712 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6713 		return B_BAD_ADDRESS;
6714 
6715 	fix_protection(&protection);
6716 
6717 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6718 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6719 		false);
6720 	if (clonedArea < B_OK)
6721 		return clonedArea;
6722 
6723 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6724 		delete_area(clonedArea);
6725 		return B_BAD_ADDRESS;
6726 	}
6727 
6728 	return clonedArea;
6729 }
6730 
6731 
6732 area_id
6733 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6734 	size_t size, uint32 lock, uint32 protection)
6735 {
6736 	char name[B_OS_NAME_LENGTH];
6737 	void* address;
6738 
6739 	// filter out some unavailable values (for userland)
6740 	switch (addressSpec) {
6741 		case B_ANY_KERNEL_ADDRESS:
6742 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6743 			return B_BAD_VALUE;
6744 	}
6745 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6746 		return B_BAD_VALUE;
6747 
6748 	if (!IS_USER_ADDRESS(userName)
6749 		|| !IS_USER_ADDRESS(userAddress)
6750 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6751 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6752 		return B_BAD_ADDRESS;
6753 
6754 	if (addressSpec == B_EXACT_ADDRESS
6755 		&& IS_KERNEL_ADDRESS(address))
6756 		return B_BAD_VALUE;
6757 
6758 	if (addressSpec == B_ANY_ADDRESS)
6759 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6760 	if (addressSpec == B_BASE_ADDRESS)
6761 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6762 
6763 	fix_protection(&protection);
6764 
6765 	virtual_address_restrictions virtualRestrictions = {};
6766 	virtualRestrictions.address = address;
6767 	virtualRestrictions.address_specification = addressSpec;
6768 	physical_address_restrictions physicalRestrictions = {};
6769 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6770 		size, lock, protection, 0, 0, &virtualRestrictions,
6771 		&physicalRestrictions, false, &address);
6772 
6773 	if (area >= B_OK
6774 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6775 		delete_area(area);
6776 		return B_BAD_ADDRESS;
6777 	}
6778 
6779 	return area;
6780 }
6781 
6782 
6783 status_t
6784 _user_delete_area(area_id area)
6785 {
6786 	// Unlike the BeOS implementation, you can now only delete areas
6787 	// that you have created yourself from userland.
6788 	// The documentation to delete_area() explicitly states that this
6789 	// will be restricted in the future, and so it will.
6790 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6791 }
6792 
6793 
6794 // TODO: create a BeOS style call for this!
6795 
6796 area_id
6797 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6798 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6799 	int fd, off_t offset)
6800 {
6801 	char name[B_OS_NAME_LENGTH];
6802 	void* address;
6803 	area_id area;
6804 
6805 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6806 		return B_BAD_VALUE;
6807 
6808 	fix_protection(&protection);
6809 
6810 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6811 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6812 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6813 		return B_BAD_ADDRESS;
6814 
6815 	if (addressSpec == B_EXACT_ADDRESS) {
6816 		if ((addr_t)address + size < (addr_t)address
6817 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6818 			return B_BAD_VALUE;
6819 		}
6820 		if (!IS_USER_ADDRESS(address)
6821 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6822 			return B_BAD_ADDRESS;
6823 		}
6824 	}
6825 
6826 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6827 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6828 		false);
6829 	if (area < B_OK)
6830 		return area;
6831 
6832 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6833 		return B_BAD_ADDRESS;
6834 
6835 	return area;
6836 }
6837 
6838 
6839 status_t
6840 _user_unmap_memory(void* _address, size_t size)
6841 {
6842 	addr_t address = (addr_t)_address;
6843 
6844 	// check params
6845 	if (size == 0 || (addr_t)address + size < (addr_t)address
6846 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6847 		return B_BAD_VALUE;
6848 	}
6849 
6850 	if (!IS_USER_ADDRESS(address)
6851 		|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6852 		return B_BAD_ADDRESS;
6853 	}
6854 
6855 	// Write lock the address space and ensure the address range is not wired.
6856 	AddressSpaceWriteLocker locker;
6857 	do {
6858 		status_t status = locker.SetTo(team_get_current_team_id());
6859 		if (status != B_OK)
6860 			return status;
6861 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6862 			size, &locker));
6863 
6864 	// unmap
6865 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6866 }
6867 
6868 
6869 status_t
6870 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6871 {
6872 	// check address range
6873 	addr_t address = (addr_t)_address;
6874 	size = PAGE_ALIGN(size);
6875 
6876 	if ((address % B_PAGE_SIZE) != 0)
6877 		return B_BAD_VALUE;
6878 	if (!is_user_address_range(_address, size)) {
6879 		// weird error code required by POSIX
6880 		return ENOMEM;
6881 	}
6882 
6883 	// extend and check protection
6884 	if ((protection & ~B_USER_PROTECTION) != 0)
6885 		return B_BAD_VALUE;
6886 
6887 	fix_protection(&protection);
6888 
6889 	// We need to write lock the address space, since we're going to play with
6890 	// the areas. Also make sure that none of the areas is wired and that we're
6891 	// actually allowed to change the protection.
6892 	AddressSpaceWriteLocker locker;
6893 
6894 	bool restart;
6895 	do {
6896 		restart = false;
6897 
6898 		status_t status = locker.SetTo(team_get_current_team_id());
6899 		if (status != B_OK)
6900 			return status;
6901 
6902 		// First round: Check whether the whole range is covered by areas and we
6903 		// are allowed to modify them.
6904 		addr_t currentAddress = address;
6905 		size_t sizeLeft = size;
6906 		while (sizeLeft > 0) {
6907 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6908 			if (area == NULL)
6909 				return B_NO_MEMORY;
6910 
6911 			if ((area->protection & B_KERNEL_AREA) != 0)
6912 				return B_NOT_ALLOWED;
6913 			if (area->protection_max != 0
6914 				&& (protection & area->protection_max) != (protection & B_USER_PROTECTION)) {
6915 				return B_NOT_ALLOWED;
6916 			}
6917 
6918 			addr_t offset = currentAddress - area->Base();
6919 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6920 
6921 			AreaCacheLocker cacheLocker(area);
6922 
6923 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6924 					&locker, &cacheLocker)) {
6925 				restart = true;
6926 				break;
6927 			}
6928 
6929 			cacheLocker.Unlock();
6930 
6931 			currentAddress += rangeSize;
6932 			sizeLeft -= rangeSize;
6933 		}
6934 	} while (restart);
6935 
6936 	// Second round: If the protections differ from that of the area, create a
6937 	// page protection array and re-map mapped pages.
6938 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6939 	addr_t currentAddress = address;
6940 	size_t sizeLeft = size;
6941 	while (sizeLeft > 0) {
6942 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6943 		if (area == NULL)
6944 			return B_NO_MEMORY;
6945 
6946 		addr_t offset = currentAddress - area->Base();
6947 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6948 
6949 		currentAddress += rangeSize;
6950 		sizeLeft -= rangeSize;
6951 
6952 		if (area->page_protections == NULL) {
6953 			if (area->protection == protection)
6954 				continue;
6955 			if (offset == 0 && rangeSize == area->Size()) {
6956 				// The whole area is covered: let set_area_protection handle it.
6957 				status_t status = vm_set_area_protection(area->address_space->ID(),
6958 					area->id, protection, false);
6959 				if (status != B_OK)
6960 					return status;
6961 				continue;
6962 			}
6963 
6964 			status_t status = allocate_area_page_protections(area);
6965 			if (status != B_OK)
6966 				return status;
6967 		}
6968 
6969 		// We need to lock the complete cache chain, since we potentially unmap
6970 		// pages of lower caches.
6971 		VMCache* topCache = vm_area_get_locked_cache(area);
6972 		VMCacheChainLocker cacheChainLocker(topCache);
6973 		cacheChainLocker.LockAllSourceCaches();
6974 
6975 		// Adjust the committed size, if necessary.
6976 		if (topCache->source != NULL && topCache->temporary) {
6977 			const bool becomesWritable = (protection & B_WRITE_AREA) != 0;
6978 			ssize_t commitmentChange = 0;
6979 			for (addr_t pageAddress = area->Base() + offset;
6980 					pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6981 				if (topCache->LookupPage(pageAddress) != NULL) {
6982 					// This page should already be accounted for in the commitment.
6983 					continue;
6984 				}
6985 
6986 				const bool isWritable
6987 					= (get_area_page_protection(area, pageAddress) & B_WRITE_AREA) != 0;
6988 
6989 				if (becomesWritable && !isWritable)
6990 					commitmentChange += B_PAGE_SIZE;
6991 				else if (!becomesWritable && isWritable)
6992 					commitmentChange -= B_PAGE_SIZE;
6993 			}
6994 
6995 			if (commitmentChange != 0) {
6996 				const off_t newCommitment = topCache->committed_size + commitmentChange;
6997 				ASSERT(newCommitment <= (topCache->virtual_end - topCache->virtual_base));
6998 				status_t status = topCache->Commit(newCommitment, VM_PRIORITY_USER);
6999 				if (status != B_OK)
7000 					return status;
7001 			}
7002 		}
7003 
7004 		for (addr_t pageAddress = area->Base() + offset;
7005 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
7006 			map->Lock();
7007 
7008 			set_area_page_protection(area, pageAddress, protection);
7009 
7010 			phys_addr_t physicalAddress;
7011 			uint32 flags;
7012 
7013 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
7014 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
7015 				map->Unlock();
7016 				continue;
7017 			}
7018 
7019 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
7020 			if (page == NULL) {
7021 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
7022 					"\n", area, physicalAddress);
7023 				map->Unlock();
7024 				return B_ERROR;
7025 			}
7026 
7027 			// If the page is not in the topmost cache and write access is
7028 			// requested, we have to unmap it. Otherwise we can re-map it with
7029 			// the new protection.
7030 			bool unmapPage = page->Cache() != topCache
7031 				&& (protection & B_WRITE_AREA) != 0;
7032 
7033 			if (!unmapPage)
7034 				map->ProtectPage(area, pageAddress, protection);
7035 
7036 			map->Unlock();
7037 
7038 			if (unmapPage) {
7039 				DEBUG_PAGE_ACCESS_START(page);
7040 				unmap_page(area, pageAddress);
7041 				DEBUG_PAGE_ACCESS_END(page);
7042 			}
7043 		}
7044 	}
7045 
7046 	return B_OK;
7047 }
7048 
7049 
7050 status_t
7051 _user_sync_memory(void* _address, size_t size, uint32 flags)
7052 {
7053 	addr_t address = (addr_t)_address;
7054 	size = PAGE_ALIGN(size);
7055 
7056 	// check params
7057 	if ((address % B_PAGE_SIZE) != 0)
7058 		return B_BAD_VALUE;
7059 	if (!is_user_address_range(_address, size)) {
7060 		// weird error code required by POSIX
7061 		return ENOMEM;
7062 	}
7063 
7064 	bool writeSync = (flags & MS_SYNC) != 0;
7065 	bool writeAsync = (flags & MS_ASYNC) != 0;
7066 	if (writeSync && writeAsync)
7067 		return B_BAD_VALUE;
7068 
7069 	if (size == 0 || (!writeSync && !writeAsync))
7070 		return B_OK;
7071 
7072 	// iterate through the range and sync all concerned areas
7073 	while (size > 0) {
7074 		// read lock the address space
7075 		AddressSpaceReadLocker locker;
7076 		status_t error = locker.SetTo(team_get_current_team_id());
7077 		if (error != B_OK)
7078 			return error;
7079 
7080 		// get the first area
7081 		VMArea* area = locker.AddressSpace()->LookupArea(address);
7082 		if (area == NULL)
7083 			return B_NO_MEMORY;
7084 
7085 		uint32 offset = address - area->Base();
7086 		size_t rangeSize = min_c(area->Size() - offset, size);
7087 		offset += area->cache_offset;
7088 
7089 		// lock the cache
7090 		AreaCacheLocker cacheLocker(area);
7091 		if (!cacheLocker)
7092 			return B_BAD_VALUE;
7093 		VMCache* cache = area->cache;
7094 
7095 		locker.Unlock();
7096 
7097 		uint32 firstPage = offset >> PAGE_SHIFT;
7098 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
7099 
7100 		// write the pages
7101 		if (cache->type == CACHE_TYPE_VNODE) {
7102 			if (writeSync) {
7103 				// synchronous
7104 				error = vm_page_write_modified_page_range(cache, firstPage,
7105 					endPage);
7106 				if (error != B_OK)
7107 					return error;
7108 			} else {
7109 				// asynchronous
7110 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
7111 				// TODO: This is probably not quite what is supposed to happen.
7112 				// Especially when a lot has to be written, it might take ages
7113 				// until it really hits the disk.
7114 			}
7115 		}
7116 
7117 		address += rangeSize;
7118 		size -= rangeSize;
7119 	}
7120 
7121 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
7122 	// synchronize multiple mappings of the same file. In our VM they never get
7123 	// out of sync, though, so we don't have to do anything.
7124 
7125 	return B_OK;
7126 }
7127 
7128 
7129 status_t
7130 _user_memory_advice(void* _address, size_t size, uint32 advice)
7131 {
7132 	addr_t address = (addr_t)_address;
7133 	if ((address % B_PAGE_SIZE) != 0)
7134 		return B_BAD_VALUE;
7135 
7136 	size = PAGE_ALIGN(size);
7137 	if (!is_user_address_range(_address, size)) {
7138 		// weird error code required by POSIX
7139 		return B_NO_MEMORY;
7140 	}
7141 
7142 	switch (advice) {
7143 		case MADV_NORMAL:
7144 		case MADV_SEQUENTIAL:
7145 		case MADV_RANDOM:
7146 		case MADV_WILLNEED:
7147 		case MADV_DONTNEED:
7148 			// TODO: Implement!
7149 			break;
7150 
7151 		case MADV_FREE:
7152 		{
7153 			AddressSpaceWriteLocker locker;
7154 			do {
7155 				status_t status = locker.SetTo(team_get_current_team_id());
7156 				if (status != B_OK)
7157 					return status;
7158 			} while (wait_if_address_range_is_wired(locker.AddressSpace(),
7159 					address, size, &locker));
7160 
7161 			discard_address_range(locker.AddressSpace(), address, size, false);
7162 			break;
7163 		}
7164 
7165 		default:
7166 			return B_BAD_VALUE;
7167 	}
7168 
7169 	return B_OK;
7170 }
7171 
7172 
7173 status_t
7174 _user_get_memory_properties(team_id teamID, const void* address,
7175 	uint32* _protected, uint32* _lock)
7176 {
7177 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
7178 		return B_BAD_ADDRESS;
7179 
7180 	AddressSpaceReadLocker locker;
7181 	status_t error = locker.SetTo(teamID);
7182 	if (error != B_OK)
7183 		return error;
7184 
7185 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
7186 	if (area == NULL)
7187 		return B_NO_MEMORY;
7188 
7189 	uint32 protection = get_area_page_protection(area, (addr_t)address);
7190 	uint32 wiring = area->wiring;
7191 
7192 	locker.Unlock();
7193 
7194 	error = user_memcpy(_protected, &protection, sizeof(protection));
7195 	if (error != B_OK)
7196 		return error;
7197 
7198 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
7199 
7200 	return error;
7201 }
7202 
7203 
7204 static status_t
7205 user_set_memory_swappable(const void* _address, size_t size, bool swappable)
7206 {
7207 #if ENABLE_SWAP_SUPPORT
7208 	// check address range
7209 	addr_t address = (addr_t)_address;
7210 	size = PAGE_ALIGN(size);
7211 
7212 	if ((address % B_PAGE_SIZE) != 0)
7213 		return EINVAL;
7214 	if (!is_user_address_range(_address, size))
7215 		return EINVAL;
7216 
7217 	const addr_t endAddress = address + size;
7218 
7219 	AddressSpaceReadLocker addressSpaceLocker;
7220 	status_t error = addressSpaceLocker.SetTo(team_get_current_team_id());
7221 	if (error != B_OK)
7222 		return error;
7223 	VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace();
7224 
7225 	// iterate through all concerned areas
7226 	addr_t nextAddress = address;
7227 	while (nextAddress != endAddress) {
7228 		// get the next area
7229 		VMArea* area = addressSpace->LookupArea(nextAddress);
7230 		if (area == NULL) {
7231 			error = B_BAD_ADDRESS;
7232 			break;
7233 		}
7234 
7235 		const addr_t areaStart = nextAddress;
7236 		const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size());
7237 		nextAddress = areaEnd;
7238 
7239 		error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
7240 		if (error != B_OK) {
7241 			// We don't need to unset or reset things on failure.
7242 			break;
7243 		}
7244 
7245 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
7246 		VMAnonymousCache* anonCache = NULL;
7247 		if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) {
7248 			// This memory will aready never be swapped. Nothing to do.
7249 		} else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) {
7250 			error = anonCache->SetCanSwapPages(areaStart - area->Base(),
7251 				areaEnd - areaStart, swappable);
7252 		} else {
7253 			// Some other cache type? We cannot affect anything here.
7254 			error = EINVAL;
7255 		}
7256 
7257 		cacheChainLocker.Unlock();
7258 
7259 		unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
7260 		if (error != B_OK)
7261 			break;
7262 	}
7263 
7264 	return error;
7265 #else
7266 	// No swap support? Nothing to do.
7267 	return B_OK;
7268 #endif
7269 }
7270 
7271 
7272 status_t
7273 _user_mlock(const void* _address, size_t size)
7274 {
7275 	return user_set_memory_swappable(_address, size, false);
7276 }
7277 
7278 
7279 status_t
7280 _user_munlock(const void* _address, size_t size)
7281 {
7282 	// TODO: B_SHARED_AREAs need to be handled a bit differently:
7283 	// if multiple clones of an area had mlock() called on them,
7284 	// munlock() must also be called on all of them to actually unlock.
7285 	// (At present, the first munlock() will unlock all.)
7286 	// TODO: fork() should automatically unlock memory in the child.
7287 	return user_set_memory_swappable(_address, size, true);
7288 }
7289 
7290 
7291 // #pragma mark -- compatibility
7292 
7293 
7294 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7295 
7296 
7297 struct physical_entry_beos {
7298 	uint32	address;
7299 	uint32	size;
7300 };
7301 
7302 
7303 /*!	The physical_entry structure has changed. We need to translate it to the
7304 	old one.
7305 */
7306 extern "C" int32
7307 __get_memory_map_beos(const void* _address, size_t numBytes,
7308 	physical_entry_beos* table, int32 numEntries)
7309 {
7310 	if (numEntries <= 0)
7311 		return B_BAD_VALUE;
7312 
7313 	const uint8* address = (const uint8*)_address;
7314 
7315 	int32 count = 0;
7316 	while (numBytes > 0 && count < numEntries) {
7317 		physical_entry entry;
7318 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
7319 		if (result < 0) {
7320 			if (result != B_BUFFER_OVERFLOW)
7321 				return result;
7322 		}
7323 
7324 		if (entry.address >= (phys_addr_t)1 << 32) {
7325 			panic("get_memory_map(): Address is greater 4 GB!");
7326 			return B_ERROR;
7327 		}
7328 
7329 		table[count].address = entry.address;
7330 		table[count++].size = entry.size;
7331 
7332 		address += entry.size;
7333 		numBytes -= entry.size;
7334 	}
7335 
7336 	// null-terminate the table, if possible
7337 	if (count < numEntries) {
7338 		table[count].address = 0;
7339 		table[count].size = 0;
7340 	}
7341 
7342 	return B_OK;
7343 }
7344 
7345 
7346 /*!	The type of the \a physicalAddress parameter has changed from void* to
7347 	phys_addr_t.
7348 */
7349 extern "C" area_id
7350 __map_physical_memory_beos(const char* name, void* physicalAddress,
7351 	size_t numBytes, uint32 addressSpec, uint32 protection,
7352 	void** _virtualAddress)
7353 {
7354 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
7355 		addressSpec, protection, _virtualAddress);
7356 }
7357 
7358 
7359 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
7360 	we meddle with the \a lock parameter to force 32 bit.
7361 */
7362 extern "C" area_id
7363 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
7364 	size_t size, uint32 lock, uint32 protection)
7365 {
7366 	switch (lock) {
7367 		case B_NO_LOCK:
7368 			break;
7369 		case B_FULL_LOCK:
7370 		case B_LAZY_LOCK:
7371 			lock = B_32_BIT_FULL_LOCK;
7372 			break;
7373 		case B_CONTIGUOUS:
7374 			lock = B_32_BIT_CONTIGUOUS;
7375 			break;
7376 	}
7377 
7378 	return __create_area_haiku(name, _address, addressSpec, size, lock,
7379 		protection);
7380 }
7381 
7382 
7383 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
7384 	"BASE");
7385 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
7386 	"map_physical_memory@", "BASE");
7387 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
7388 	"BASE");
7389 
7390 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7391 	"get_memory_map@@", "1_ALPHA3");
7392 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7393 	"map_physical_memory@@", "1_ALPHA3");
7394 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7395 	"1_ALPHA3");
7396 
7397 
7398 #else
7399 
7400 
7401 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7402 	"get_memory_map@@", "BASE");
7403 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7404 	"map_physical_memory@@", "BASE");
7405 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7406 	"BASE");
7407 
7408 
7409 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7410