xref: /haiku/src/system/kernel/vm/vm.cpp (revision f97357a6ed57951086b29e0fa0b3a7393feecf72)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleterDrivers.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "VMAnonymousNoSwapCache.h"
59 #include "IORequest.h"
60 
61 
62 //#define TRACE_VM
63 //#define TRACE_FAULTS
64 #ifdef TRACE_VM
65 #	define TRACE(x) dprintf x
66 #else
67 #	define TRACE(x) ;
68 #endif
69 #ifdef TRACE_FAULTS
70 #	define FTRACE(x) dprintf x
71 #else
72 #	define FTRACE(x) ;
73 #endif
74 
75 
76 namespace {
77 
78 class AreaCacheLocking {
79 public:
80 	inline bool Lock(VMCache* lockable)
81 	{
82 		return false;
83 	}
84 
85 	inline void Unlock(VMCache* lockable)
86 	{
87 		vm_area_put_locked_cache(lockable);
88 	}
89 };
90 
91 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
92 public:
93 	inline AreaCacheLocker(VMCache* cache = NULL)
94 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
95 	{
96 	}
97 
98 	inline AreaCacheLocker(VMArea* area)
99 		: AutoLocker<VMCache, AreaCacheLocking>()
100 	{
101 		SetTo(area);
102 	}
103 
104 	inline void SetTo(VMCache* cache, bool alreadyLocked)
105 	{
106 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
107 	}
108 
109 	inline void SetTo(VMArea* area)
110 	{
111 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
112 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
113 	}
114 };
115 
116 
117 class VMCacheChainLocker {
118 public:
119 	VMCacheChainLocker()
120 		:
121 		fTopCache(NULL),
122 		fBottomCache(NULL)
123 	{
124 	}
125 
126 	VMCacheChainLocker(VMCache* topCache)
127 		:
128 		fTopCache(topCache),
129 		fBottomCache(topCache)
130 	{
131 	}
132 
133 	~VMCacheChainLocker()
134 	{
135 		Unlock();
136 	}
137 
138 	void SetTo(VMCache* topCache)
139 	{
140 		fTopCache = topCache;
141 		fBottomCache = topCache;
142 
143 		if (topCache != NULL)
144 			topCache->SetUserData(NULL);
145 	}
146 
147 	VMCache* LockSourceCache()
148 	{
149 		if (fBottomCache == NULL || fBottomCache->source == NULL)
150 			return NULL;
151 
152 		VMCache* previousCache = fBottomCache;
153 
154 		fBottomCache = fBottomCache->source;
155 		fBottomCache->Lock();
156 		fBottomCache->AcquireRefLocked();
157 		fBottomCache->SetUserData(previousCache);
158 
159 		return fBottomCache;
160 	}
161 
162 	void LockAllSourceCaches()
163 	{
164 		while (LockSourceCache() != NULL) {
165 		}
166 	}
167 
168 	void Unlock(VMCache* exceptCache = NULL)
169 	{
170 		if (fTopCache == NULL)
171 			return;
172 
173 		// Unlock caches in source -> consumer direction. This is important to
174 		// avoid double-locking and a reversal of locking order in case a cache
175 		// is eligable for merging.
176 		VMCache* cache = fBottomCache;
177 		while (cache != NULL) {
178 			VMCache* nextCache = (VMCache*)cache->UserData();
179 			if (cache != exceptCache)
180 				cache->ReleaseRefAndUnlock(cache != fTopCache);
181 
182 			if (cache == fTopCache)
183 				break;
184 
185 			cache = nextCache;
186 		}
187 
188 		fTopCache = NULL;
189 		fBottomCache = NULL;
190 	}
191 
192 	void UnlockKeepRefs(bool keepTopCacheLocked)
193 	{
194 		if (fTopCache == NULL)
195 			return;
196 
197 		VMCache* nextCache = fBottomCache;
198 		VMCache* cache = NULL;
199 
200 		while (keepTopCacheLocked
201 				? nextCache != fTopCache : cache != fTopCache) {
202 			cache = nextCache;
203 			nextCache = (VMCache*)cache->UserData();
204 			cache->Unlock(cache != fTopCache);
205 		}
206 	}
207 
208 	void RelockCaches(bool topCacheLocked)
209 	{
210 		if (fTopCache == NULL)
211 			return;
212 
213 		VMCache* nextCache = fTopCache;
214 		VMCache* cache = NULL;
215 		if (topCacheLocked) {
216 			cache = nextCache;
217 			nextCache = cache->source;
218 		}
219 
220 		while (cache != fBottomCache && nextCache != NULL) {
221 			VMCache* consumer = cache;
222 			cache = nextCache;
223 			nextCache = cache->source;
224 			cache->Lock();
225 			cache->SetUserData(consumer);
226 		}
227 	}
228 
229 private:
230 	VMCache*	fTopCache;
231 	VMCache*	fBottomCache;
232 };
233 
234 } // namespace
235 
236 
237 // The memory reserve an allocation of the certain priority must not touch.
238 static const size_t kMemoryReserveForPriority[] = {
239 	VM_MEMORY_RESERVE_USER,		// user
240 	VM_MEMORY_RESERVE_SYSTEM,	// system
241 	0							// VIP
242 };
243 
244 
245 ObjectCache* gPageMappingsObjectCache;
246 
247 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
248 
249 static off_t sAvailableMemory;
250 static off_t sNeededMemory;
251 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
252 static uint32 sPageFaults;
253 
254 static VMPhysicalPageMapper* sPhysicalPageMapper;
255 
256 #if DEBUG_CACHE_LIST
257 
258 struct cache_info {
259 	VMCache*	cache;
260 	addr_t		page_count;
261 	addr_t		committed;
262 };
263 
264 static const int kCacheInfoTableCount = 100 * 1024;
265 static cache_info* sCacheInfoTable;
266 
267 #endif	// DEBUG_CACHE_LIST
268 
269 
270 // function declarations
271 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
272 	bool addressSpaceCleanup);
273 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
274 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
275 static status_t map_backing_store(VMAddressSpace* addressSpace,
276 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
277 	int protection, int protectionMax, int mapping, uint32 flags,
278 	const virtual_address_restrictions* addressRestrictions, bool kernel,
279 	VMArea** _area, void** _virtualAddress);
280 static void fix_protection(uint32* protection);
281 
282 
283 //	#pragma mark -
284 
285 
286 #if VM_PAGE_FAULT_TRACING
287 
288 namespace VMPageFaultTracing {
289 
290 class PageFaultStart : public AbstractTraceEntry {
291 public:
292 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
293 		:
294 		fAddress(address),
295 		fPC(pc),
296 		fWrite(write),
297 		fUser(user)
298 	{
299 		Initialized();
300 	}
301 
302 	virtual void AddDump(TraceOutput& out)
303 	{
304 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
305 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
306 	}
307 
308 private:
309 	addr_t	fAddress;
310 	addr_t	fPC;
311 	bool	fWrite;
312 	bool	fUser;
313 };
314 
315 
316 // page fault errors
317 enum {
318 	PAGE_FAULT_ERROR_NO_AREA		= 0,
319 	PAGE_FAULT_ERROR_KERNEL_ONLY,
320 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
321 	PAGE_FAULT_ERROR_READ_PROTECTED,
322 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
323 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
324 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
325 };
326 
327 
328 class PageFaultError : public AbstractTraceEntry {
329 public:
330 	PageFaultError(area_id area, status_t error)
331 		:
332 		fArea(area),
333 		fError(error)
334 	{
335 		Initialized();
336 	}
337 
338 	virtual void AddDump(TraceOutput& out)
339 	{
340 		switch (fError) {
341 			case PAGE_FAULT_ERROR_NO_AREA:
342 				out.Print("page fault error: no area");
343 				break;
344 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
345 				out.Print("page fault error: area: %ld, kernel only", fArea);
346 				break;
347 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
348 				out.Print("page fault error: area: %ld, write protected",
349 					fArea);
350 				break;
351 			case PAGE_FAULT_ERROR_READ_PROTECTED:
352 				out.Print("page fault error: area: %ld, read protected", fArea);
353 				break;
354 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
355 				out.Print("page fault error: area: %ld, execute protected",
356 					fArea);
357 				break;
358 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
359 				out.Print("page fault error: kernel touching bad user memory");
360 				break;
361 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
362 				out.Print("page fault error: no address space");
363 				break;
364 			default:
365 				out.Print("page fault error: area: %ld, error: %s", fArea,
366 					strerror(fError));
367 				break;
368 		}
369 	}
370 
371 private:
372 	area_id		fArea;
373 	status_t	fError;
374 };
375 
376 
377 class PageFaultDone : public AbstractTraceEntry {
378 public:
379 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
380 			vm_page* page)
381 		:
382 		fArea(area),
383 		fTopCache(topCache),
384 		fCache(cache),
385 		fPage(page)
386 	{
387 		Initialized();
388 	}
389 
390 	virtual void AddDump(TraceOutput& out)
391 	{
392 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
393 			"page: %p", fArea, fTopCache, fCache, fPage);
394 	}
395 
396 private:
397 	area_id		fArea;
398 	VMCache*	fTopCache;
399 	VMCache*	fCache;
400 	vm_page*	fPage;
401 };
402 
403 }	// namespace VMPageFaultTracing
404 
405 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
406 #else
407 #	define TPF(x) ;
408 #endif	// VM_PAGE_FAULT_TRACING
409 
410 
411 //	#pragma mark -
412 
413 
414 /*!	The page's cache must be locked.
415 */
416 static inline void
417 increment_page_wired_count(vm_page* page)
418 {
419 	if (!page->IsMapped())
420 		atomic_add(&gMappedPagesCount, 1);
421 	page->IncrementWiredCount();
422 }
423 
424 
425 /*!	The page's cache must be locked.
426 */
427 static inline void
428 decrement_page_wired_count(vm_page* page)
429 {
430 	page->DecrementWiredCount();
431 	if (!page->IsMapped())
432 		atomic_add(&gMappedPagesCount, -1);
433 }
434 
435 
436 static inline addr_t
437 virtual_page_address(VMArea* area, vm_page* page)
438 {
439 	return area->Base()
440 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
441 }
442 
443 
444 //! You need to have the address space locked when calling this function
445 static VMArea*
446 lookup_area(VMAddressSpace* addressSpace, area_id id)
447 {
448 	VMAreaHash::ReadLock();
449 
450 	VMArea* area = VMAreaHash::LookupLocked(id);
451 	if (area != NULL && area->address_space != addressSpace)
452 		area = NULL;
453 
454 	VMAreaHash::ReadUnlock();
455 
456 	return area;
457 }
458 
459 
460 static status_t
461 allocate_area_page_protections(VMArea* area)
462 {
463 	// In the page protections we store only the three user protections,
464 	// so we use 4 bits per page.
465 	size_t bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
466 	area->page_protections = (uint8*)malloc_etc(bytes,
467 		HEAP_DONT_LOCK_KERNEL_SPACE);
468 	if (area->page_protections == NULL)
469 		return B_NO_MEMORY;
470 
471 	// init the page protections for all pages to that of the area
472 	uint32 areaProtection = area->protection
473 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
474 	memset(area->page_protections, areaProtection | (areaProtection << 4),
475 		bytes);
476 	return B_OK;
477 }
478 
479 
480 static inline void
481 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
482 {
483 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
484 	addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
485 	uint8& entry = area->page_protections[pageIndex / 2];
486 	if (pageIndex % 2 == 0)
487 		entry = (entry & 0xf0) | protection;
488 	else
489 		entry = (entry & 0x0f) | (protection << 4);
490 }
491 
492 
493 static inline uint32
494 get_area_page_protection(VMArea* area, addr_t pageAddress)
495 {
496 	if (area->page_protections == NULL)
497 		return area->protection;
498 
499 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
500 	uint32 protection = area->page_protections[pageIndex / 2];
501 	if (pageIndex % 2 == 0)
502 		protection &= 0x0f;
503 	else
504 		protection >>= 4;
505 
506 	// If this is a kernel area we translate the user flags to kernel flags.
507 	if (area->address_space == VMAddressSpace::Kernel()) {
508 		uint32 kernelProtection = 0;
509 		if ((protection & B_READ_AREA) != 0)
510 			kernelProtection |= B_KERNEL_READ_AREA;
511 		if ((protection & B_WRITE_AREA) != 0)
512 			kernelProtection |= B_KERNEL_WRITE_AREA;
513 
514 		return kernelProtection;
515 	}
516 
517 	return protection | B_KERNEL_READ_AREA
518 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
519 }
520 
521 
522 /*!	The caller must have reserved enough pages the translation map
523 	implementation might need to map this page.
524 	The page's cache must be locked.
525 */
526 static status_t
527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528 	vm_page_reservation* reservation)
529 {
530 	VMTranslationMap* map = area->address_space->TranslationMap();
531 
532 	bool wasMapped = page->IsMapped();
533 
534 	if (area->wiring == B_NO_LOCK) {
535 		DEBUG_PAGE_ACCESS_CHECK(page);
536 
537 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539 			gPageMappingsObjectCache,
540 			CACHE_DONT_WAIT_FOR_MEMORY
541 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542 		if (mapping == NULL)
543 			return B_NO_MEMORY;
544 
545 		mapping->page = page;
546 		mapping->area = area;
547 
548 		map->Lock();
549 
550 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551 			area->MemoryType(), reservation);
552 
553 		// insert mapping into lists
554 		if (!page->IsMapped())
555 			atomic_add(&gMappedPagesCount, 1);
556 
557 		page->mappings.Add(mapping);
558 		area->mappings.Add(mapping);
559 
560 		map->Unlock();
561 	} else {
562 		DEBUG_PAGE_ACCESS_CHECK(page);
563 
564 		map->Lock();
565 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566 			area->MemoryType(), reservation);
567 		map->Unlock();
568 
569 		increment_page_wired_count(page);
570 	}
571 
572 	if (!wasMapped) {
573 		// The page is mapped now, so we must not remain in the cached queue.
574 		// It also makes sense to move it from the inactive to the active, since
575 		// otherwise the page daemon wouldn't come to keep track of it (in idle
576 		// mode) -- if the page isn't touched, it will be deactivated after a
577 		// full iteration through the queue at the latest.
578 		if (page->State() == PAGE_STATE_CACHED
579 				|| page->State() == PAGE_STATE_INACTIVE) {
580 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
581 		}
582 	}
583 
584 	return B_OK;
585 }
586 
587 
588 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
589 	page's cache.
590 */
591 static inline bool
592 unmap_page(VMArea* area, addr_t virtualAddress)
593 {
594 	return area->address_space->TranslationMap()->UnmapPage(area,
595 		virtualAddress, true);
596 }
597 
598 
599 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
600 	mapped pages' caches.
601 */
602 static inline void
603 unmap_pages(VMArea* area, addr_t base, size_t size)
604 {
605 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
606 }
607 
608 
609 static inline bool
610 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
611 {
612 	if (address < area->Base()) {
613 		offset = area->Base() - address;
614 		if (offset >= size)
615 			return false;
616 
617 		address = area->Base();
618 		size -= offset;
619 		offset = 0;
620 		if (size > area->Size())
621 			size = area->Size();
622 
623 		return true;
624 	}
625 
626 	offset = address - area->Base();
627 	if (offset >= area->Size())
628 		return false;
629 
630 	if (size >= area->Size() - offset)
631 		size = area->Size() - offset;
632 
633 	return true;
634 }
635 
636 
637 /*!	Cuts a piece out of an area. If the given cut range covers the complete
638 	area, it is deleted. If it covers the beginning or the end, the area is
639 	resized accordingly. If the range covers some part in the middle of the
640 	area, it is split in two; in this case the second area is returned via
641 	\a _secondArea (the variable is left untouched in the other cases).
642 	The address space must be write locked.
643 	The caller must ensure that no part of the given range is wired.
644 */
645 static status_t
646 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
647 	addr_t size, VMArea** _secondArea, bool kernel)
648 {
649 	addr_t offset;
650 	if (!intersect_area(area, address, size, offset))
651 		return B_OK;
652 
653 	// Is the area fully covered?
654 	if (address == area->Base() && size == area->Size()) {
655 		delete_area(addressSpace, area, false);
656 		return B_OK;
657 	}
658 
659 	int priority;
660 	uint32 allocationFlags;
661 	if (addressSpace == VMAddressSpace::Kernel()) {
662 		priority = VM_PRIORITY_SYSTEM;
663 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
664 			| HEAP_DONT_LOCK_KERNEL_SPACE;
665 	} else {
666 		priority = VM_PRIORITY_USER;
667 		allocationFlags = 0;
668 	}
669 
670 	VMCache* cache = vm_area_get_locked_cache(area);
671 	VMCacheChainLocker cacheChainLocker(cache);
672 	cacheChainLocker.LockAllSourceCaches();
673 
674 	// If no one else uses the area's cache and it's an anonymous cache, we can
675 	// resize or split it, too.
676 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
677 		&& cache->consumers.IsEmpty() && cache->type == CACHE_TYPE_RAM;
678 
679 	// Cut the end only?
680 	if (offset > 0 && size == area->Size() - offset) {
681 		status_t error = addressSpace->ShrinkAreaTail(area, offset,
682 			allocationFlags);
683 		if (error != B_OK)
684 			return error;
685 
686 		// unmap pages
687 		unmap_pages(area, address, size);
688 
689 		if (onlyCacheUser) {
690 			// Since VMCache::Resize() can temporarily drop the lock, we must
691 			// unlock all lower caches to prevent locking order inversion.
692 			cacheChainLocker.Unlock(cache);
693 			cache->Resize(cache->virtual_base + offset, priority);
694 			cache->ReleaseRefAndUnlock();
695 		}
696 
697 		return B_OK;
698 	}
699 
700 	// Cut the beginning only?
701 	if (area->Base() == address) {
702 		// resize the area
703 		status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
704 			allocationFlags);
705 		if (error != B_OK)
706 			return error;
707 
708 		// unmap pages
709 		unmap_pages(area, address, size);
710 
711 		if (onlyCacheUser) {
712 			// Since VMCache::Rebase() can temporarily drop the lock, we must
713 			// unlock all lower caches to prevent locking order inversion.
714 			cacheChainLocker.Unlock(cache);
715 			cache->Rebase(cache->virtual_base + size, priority);
716 			cache->ReleaseRefAndUnlock();
717 		}
718 		area->cache_offset += size;
719 
720 		return B_OK;
721 	}
722 
723 	// The tough part -- cut a piece out of the middle of the area.
724 	// We do that by shrinking the area to the begin section and creating a
725 	// new area for the end section.
726 	addr_t firstNewSize = offset;
727 	addr_t secondBase = address + size;
728 	addr_t secondSize = area->Size() - offset - size;
729 
730 	// unmap pages
731 	unmap_pages(area, address, area->Size() - firstNewSize);
732 
733 	// resize the area
734 	addr_t oldSize = area->Size();
735 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
736 		allocationFlags);
737 	if (error != B_OK)
738 		return error;
739 
740 	virtual_address_restrictions addressRestrictions = {};
741 	addressRestrictions.address = (void*)secondBase;
742 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
743 	VMArea* secondArea;
744 
745 	if (onlyCacheUser) {
746 		// Create a new cache for the second area.
747 		VMCache* secondCache;
748 		error = VMCacheFactory::CreateAnonymousCache(secondCache, false, 0, 0,
749 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
750 		if (error != B_OK) {
751 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
752 			return error;
753 		}
754 
755 		secondCache->Lock();
756 		secondCache->temporary = cache->temporary;
757 		secondCache->virtual_base = area->cache_offset;
758 		secondCache->virtual_end = area->cache_offset + secondSize;
759 
760 		// Transfer the concerned pages from the first cache.
761 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
762 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
763 			area->cache_offset);
764 
765 		if (error == B_OK) {
766 			// Since VMCache::Resize() can temporarily drop the lock, we must
767 			// unlock all lower caches to prevent locking order inversion.
768 			cacheChainLocker.Unlock(cache);
769 			cache->Resize(cache->virtual_base + firstNewSize, priority);
770 			// Don't unlock the cache yet because we might have to resize it
771 			// back.
772 
773 			// Map the second area.
774 			error = map_backing_store(addressSpace, secondCache,
775 				area->cache_offset, area->name, secondSize, area->wiring,
776 				area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0,
777 				&addressRestrictions, kernel, &secondArea, NULL);
778 		}
779 
780 		if (error != B_OK) {
781 			// Restore the original cache.
782 			cache->Resize(cache->virtual_base + oldSize, priority);
783 
784 			// Move the pages back.
785 			status_t readoptStatus = cache->Adopt(secondCache,
786 				area->cache_offset, secondSize, adoptOffset);
787 			if (readoptStatus != B_OK) {
788 				// Some (swap) pages have not been moved back and will be lost
789 				// once the second cache is deleted.
790 				panic("failed to restore cache range: %s",
791 					strerror(readoptStatus));
792 
793 				// TODO: Handle out of memory cases by freeing memory and
794 				// retrying.
795 			}
796 
797 			cache->ReleaseRefAndUnlock();
798 			secondCache->ReleaseRefAndUnlock();
799 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
800 			return error;
801 		}
802 
803 		// Now we can unlock it.
804 		cache->ReleaseRefAndUnlock();
805 		secondCache->Unlock();
806 	} else {
807 		error = map_backing_store(addressSpace, cache, area->cache_offset
808 			+ (secondBase - area->Base()),
809 			area->name, secondSize, area->wiring, area->protection,
810 			area->protection_max, REGION_NO_PRIVATE_MAP, 0,
811 			&addressRestrictions, kernel, &secondArea, NULL);
812 		if (error != B_OK) {
813 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
814 			return error;
815 		}
816 		// We need a cache reference for the new area.
817 		cache->AcquireRefLocked();
818 	}
819 
820 	if (_secondArea != NULL)
821 		*_secondArea = secondArea;
822 
823 	return B_OK;
824 }
825 
826 
827 /*!	Deletes or cuts all areas in the given address range.
828 	The address space must be write-locked.
829 	The caller must ensure that no part of the given range is wired.
830 */
831 static status_t
832 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
833 	bool kernel)
834 {
835 	size = PAGE_ALIGN(size);
836 
837 	// Check, whether the caller is allowed to modify the concerned areas.
838 	if (!kernel) {
839 		for (VMAddressSpace::AreaRangeIterator it
840 				= addressSpace->GetAreaRangeIterator(address, size);
841 			VMArea* area = it.Next();) {
842 
843 			if ((area->protection & B_KERNEL_AREA) != 0) {
844 				dprintf("unmap_address_range: team %" B_PRId32 " tried to "
845 					"unmap range of kernel area %" B_PRId32 " (%s)\n",
846 					team_get_current_team_id(), area->id, area->name);
847 				return B_NOT_ALLOWED;
848 			}
849 		}
850 	}
851 
852 	for (VMAddressSpace::AreaRangeIterator it
853 			= addressSpace->GetAreaRangeIterator(address, size);
854 		VMArea* area = it.Next();) {
855 
856 		status_t error = cut_area(addressSpace, area, address, size, NULL,
857 			kernel);
858 		if (error != B_OK)
859 			return error;
860 			// Failing after already messing with areas is ugly, but we
861 			// can't do anything about it.
862 	}
863 
864 	return B_OK;
865 }
866 
867 
868 static status_t
869 discard_area_range(VMArea* area, addr_t address, addr_t size)
870 {
871 	addr_t offset;
872 	if (!intersect_area(area, address, size, offset))
873 		return B_OK;
874 
875 	// If someone else uses the area's cache or it's not an anonymous cache, we
876 	// can't discard.
877 	VMCache* cache = vm_area_get_locked_cache(area);
878 	if (cache->areas != area || area->cache_next != NULL
879 		|| !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) {
880 		return B_OK;
881 	}
882 
883 	VMCacheChainLocker cacheChainLocker(cache);
884 	cacheChainLocker.LockAllSourceCaches();
885 
886 	unmap_pages(area, address, size);
887 
888 	// Since VMCache::Discard() can temporarily drop the lock, we must
889 	// unlock all lower caches to prevent locking order inversion.
890 	cacheChainLocker.Unlock(cache);
891 	cache->Discard(cache->virtual_base + offset, size);
892 	cache->ReleaseRefAndUnlock();
893 
894 	return B_OK;
895 }
896 
897 
898 static status_t
899 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
900 	bool kernel)
901 {
902 	for (VMAddressSpace::AreaRangeIterator it
903 		= addressSpace->GetAreaRangeIterator(address, size);
904 			VMArea* area = it.Next();) {
905 		status_t error = discard_area_range(area, address, size);
906 		if (error != B_OK)
907 			return error;
908 	}
909 
910 	return B_OK;
911 }
912 
913 
914 /*! You need to hold the lock of the cache and the write lock of the address
915 	space when calling this function.
916 	Note, that in case of error your cache will be temporarily unlocked.
917 	If \a addressSpec is \c B_EXACT_ADDRESS and the
918 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
919 	that no part of the specified address range (base \c *_virtualAddress, size
920 	\a size) is wired.
921 */
922 static status_t
923 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
924 	const char* areaName, addr_t size, int wiring, int protection,
925 	int protectionMax, int mapping,
926 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
927 	bool kernel, VMArea** _area, void** _virtualAddress)
928 {
929 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
930 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
931 		", protection %d, protectionMax %d, area %p, areaName '%s'\n",
932 		addressSpace, cache, addressRestrictions->address, offset, size,
933 		addressRestrictions->address_specification, wiring, protection,
934 		protectionMax, _area, areaName));
935 	cache->AssertLocked();
936 
937 	if (size == 0) {
938 #if KDEBUG
939 		panic("map_backing_store(): called with size=0 for area '%s'!",
940 			areaName);
941 #endif
942 		return B_BAD_VALUE;
943 	}
944 
945 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
946 		| HEAP_DONT_LOCK_KERNEL_SPACE;
947 	int priority;
948 	if (addressSpace != VMAddressSpace::Kernel()) {
949 		priority = VM_PRIORITY_USER;
950 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
951 		priority = VM_PRIORITY_VIP;
952 		allocationFlags |= HEAP_PRIORITY_VIP;
953 	} else
954 		priority = VM_PRIORITY_SYSTEM;
955 
956 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
957 		allocationFlags);
958 	if (mapping != REGION_PRIVATE_MAP)
959 		area->protection_max = protectionMax & B_USER_PROTECTION;
960 	if (area == NULL)
961 		return B_NO_MEMORY;
962 
963 	status_t status;
964 
965 	// if this is a private map, we need to create a new cache
966 	// to handle the private copies of pages as they are written to
967 	VMCache* sourceCache = cache;
968 	if (mapping == REGION_PRIVATE_MAP) {
969 		VMCache* newCache;
970 
971 		// create an anonymous cache
972 		status = VMCacheFactory::CreateAnonymousCache(newCache,
973 			(protection & B_STACK_AREA) != 0
974 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
975 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
976 		if (status != B_OK)
977 			goto err1;
978 
979 		newCache->Lock();
980 		newCache->temporary = 1;
981 		newCache->virtual_base = offset;
982 		newCache->virtual_end = offset + size;
983 
984 		cache->AddConsumer(newCache);
985 
986 		cache = newCache;
987 	}
988 
989 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
990 		status = cache->SetMinimalCommitment(size, priority);
991 		if (status != B_OK)
992 			goto err2;
993 	}
994 
995 	// check to see if this address space has entered DELETE state
996 	if (addressSpace->IsBeingDeleted()) {
997 		// okay, someone is trying to delete this address space now, so we can't
998 		// insert the area, so back out
999 		status = B_BAD_TEAM_ID;
1000 		goto err2;
1001 	}
1002 
1003 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
1004 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
1005 		status = unmap_address_range(addressSpace,
1006 			(addr_t)addressRestrictions->address, size, kernel);
1007 		if (status != B_OK)
1008 			goto err2;
1009 	}
1010 
1011 	status = addressSpace->InsertArea(area, size, addressRestrictions,
1012 		allocationFlags, _virtualAddress);
1013 	if (status == B_NO_MEMORY
1014 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
1015 		// TODO: At present, there is no way to notify the low_resource monitor
1016 		// that kernel addresss space is fragmented, nor does it check for this
1017 		// automatically. Due to how many locks are held, we cannot wait here
1018 		// for space to be freed up, but it would be good to at least notify
1019 		// that we tried and failed to allocate some amount.
1020 	}
1021 	if (status != B_OK)
1022 		goto err2;
1023 
1024 	// attach the cache to the area
1025 	area->cache = cache;
1026 	area->cache_offset = offset;
1027 
1028 	// point the cache back to the area
1029 	cache->InsertAreaLocked(area);
1030 	if (mapping == REGION_PRIVATE_MAP)
1031 		cache->Unlock();
1032 
1033 	// insert the area in the global area hash table
1034 	VMAreaHash::Insert(area);
1035 
1036 	// grab a ref to the address space (the area holds this)
1037 	addressSpace->Get();
1038 
1039 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1040 //		cache, sourceCache, areaName, area);
1041 
1042 	*_area = area;
1043 	return B_OK;
1044 
1045 err2:
1046 	if (mapping == REGION_PRIVATE_MAP) {
1047 		// We created this cache, so we must delete it again. Note, that we
1048 		// need to temporarily unlock the source cache or we'll otherwise
1049 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1050 		sourceCache->Unlock();
1051 		cache->ReleaseRefAndUnlock();
1052 		sourceCache->Lock();
1053 	}
1054 err1:
1055 	addressSpace->DeleteArea(area, allocationFlags);
1056 	return status;
1057 }
1058 
1059 
1060 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1061 	  locker1, locker2).
1062 */
1063 template<typename LockerType1, typename LockerType2>
1064 static inline bool
1065 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1066 {
1067 	area->cache->AssertLocked();
1068 
1069 	VMAreaUnwiredWaiter waiter;
1070 	if (!area->AddWaiterIfWired(&waiter))
1071 		return false;
1072 
1073 	// unlock everything and wait
1074 	if (locker1 != NULL)
1075 		locker1->Unlock();
1076 	if (locker2 != NULL)
1077 		locker2->Unlock();
1078 
1079 	waiter.waitEntry.Wait();
1080 
1081 	return true;
1082 }
1083 
1084 
1085 /*!	Checks whether the given area has any wired ranges intersecting with the
1086 	specified range and waits, if so.
1087 
1088 	When it has to wait, the function calls \c Unlock() on both \a locker1
1089 	and \a locker2, if given.
1090 	The area's top cache must be locked and must be unlocked as a side effect
1091 	of calling \c Unlock() on either \a locker1 or \a locker2.
1092 
1093 	If the function does not have to wait it does not modify or unlock any
1094 	object.
1095 
1096 	\param area The area to be checked.
1097 	\param base The base address of the range to check.
1098 	\param size The size of the address range to check.
1099 	\param locker1 An object to be unlocked when before starting to wait (may
1100 		be \c NULL).
1101 	\param locker2 An object to be unlocked when before starting to wait (may
1102 		be \c NULL).
1103 	\return \c true, if the function had to wait, \c false otherwise.
1104 */
1105 template<typename LockerType1, typename LockerType2>
1106 static inline bool
1107 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1108 	LockerType1* locker1, LockerType2* locker2)
1109 {
1110 	area->cache->AssertLocked();
1111 
1112 	VMAreaUnwiredWaiter waiter;
1113 	if (!area->AddWaiterIfWired(&waiter, base, size))
1114 		return false;
1115 
1116 	// unlock everything and wait
1117 	if (locker1 != NULL)
1118 		locker1->Unlock();
1119 	if (locker2 != NULL)
1120 		locker2->Unlock();
1121 
1122 	waiter.waitEntry.Wait();
1123 
1124 	return true;
1125 }
1126 
1127 
1128 /*!	Checks whether the given address space has any wired ranges intersecting
1129 	with the specified range and waits, if so.
1130 
1131 	Similar to wait_if_area_range_is_wired(), with the following differences:
1132 	- All areas intersecting with the range are checked (respectively all until
1133 	  one is found that contains a wired range intersecting with the given
1134 	  range).
1135 	- The given address space must at least be read-locked and must be unlocked
1136 	  when \c Unlock() is called on \a locker.
1137 	- None of the areas' caches are allowed to be locked.
1138 */
1139 template<typename LockerType>
1140 static inline bool
1141 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1142 	size_t size, LockerType* locker)
1143 {
1144 	for (VMAddressSpace::AreaRangeIterator it
1145 		= addressSpace->GetAreaRangeIterator(base, size);
1146 			VMArea* area = it.Next();) {
1147 
1148 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1149 
1150 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1151 			return true;
1152 	}
1153 
1154 	return false;
1155 }
1156 
1157 
1158 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1159 	It must be called in a situation where the kernel address space may be
1160 	locked.
1161 */
1162 status_t
1163 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1164 {
1165 	AddressSpaceReadLocker locker;
1166 	VMArea* area;
1167 	status_t status = locker.SetFromArea(id, area);
1168 	if (status != B_OK)
1169 		return status;
1170 
1171 	if (area->page_protections == NULL) {
1172 		status = allocate_area_page_protections(area);
1173 		if (status != B_OK)
1174 			return status;
1175 	}
1176 
1177 	*cookie = (void*)area;
1178 	return B_OK;
1179 }
1180 
1181 
1182 /*!	This is a debug helper function that can only be used with very specific
1183 	use cases.
1184 	Sets protection for the given address range to the protection specified.
1185 	If \a protection is 0 then the involved pages will be marked non-present
1186 	in the translation map to cause a fault on access. The pages aren't
1187 	actually unmapped however so that they can be marked present again with
1188 	additional calls to this function. For this to work the area must be
1189 	fully locked in memory so that the pages aren't otherwise touched.
1190 	This function does not lock the kernel address space and needs to be
1191 	supplied with a \a cookie retrieved from a successful call to
1192 	vm_prepare_kernel_area_debug_protection().
1193 */
1194 status_t
1195 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1196 	uint32 protection)
1197 {
1198 	// check address range
1199 	addr_t address = (addr_t)_address;
1200 	size = PAGE_ALIGN(size);
1201 
1202 	if ((address % B_PAGE_SIZE) != 0
1203 		|| (addr_t)address + size < (addr_t)address
1204 		|| !IS_KERNEL_ADDRESS(address)
1205 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1206 		return B_BAD_VALUE;
1207 	}
1208 
1209 	// Translate the kernel protection to user protection as we only store that.
1210 	if ((protection & B_KERNEL_READ_AREA) != 0)
1211 		protection |= B_READ_AREA;
1212 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1213 		protection |= B_WRITE_AREA;
1214 
1215 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1216 	VMTranslationMap* map = addressSpace->TranslationMap();
1217 	VMArea* area = (VMArea*)cookie;
1218 
1219 	addr_t offset = address - area->Base();
1220 	if (area->Size() - offset < size) {
1221 		panic("protect range not fully within supplied area");
1222 		return B_BAD_VALUE;
1223 	}
1224 
1225 	if (area->page_protections == NULL) {
1226 		panic("area has no page protections");
1227 		return B_BAD_VALUE;
1228 	}
1229 
1230 	// Invalidate the mapping entries so any access to them will fault or
1231 	// restore the mapping entries unchanged so that lookup will success again.
1232 	map->Lock();
1233 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1234 	map->Unlock();
1235 
1236 	// And set the proper page protections so that the fault case will actually
1237 	// fail and not simply try to map a new page.
1238 	for (addr_t pageAddress = address; pageAddress < address + size;
1239 			pageAddress += B_PAGE_SIZE) {
1240 		set_area_page_protection(area, pageAddress, protection);
1241 	}
1242 
1243 	return B_OK;
1244 }
1245 
1246 
1247 status_t
1248 vm_block_address_range(const char* name, void* address, addr_t size)
1249 {
1250 	if (!arch_vm_supports_protection(0))
1251 		return B_NOT_SUPPORTED;
1252 
1253 	AddressSpaceWriteLocker locker;
1254 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1255 	if (status != B_OK)
1256 		return status;
1257 
1258 	VMAddressSpace* addressSpace = locker.AddressSpace();
1259 
1260 	// create an anonymous cache
1261 	VMCache* cache;
1262 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1263 		VM_PRIORITY_SYSTEM);
1264 	if (status != B_OK)
1265 		return status;
1266 
1267 	cache->temporary = 1;
1268 	cache->virtual_end = size;
1269 	cache->Lock();
1270 
1271 	VMArea* area;
1272 	virtual_address_restrictions addressRestrictions = {};
1273 	addressRestrictions.address = address;
1274 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1275 	status = map_backing_store(addressSpace, cache, 0, name, size,
1276 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions,
1277 		true, &area, NULL);
1278 	if (status != B_OK) {
1279 		cache->ReleaseRefAndUnlock();
1280 		return status;
1281 	}
1282 
1283 	cache->Unlock();
1284 	area->cache_type = CACHE_TYPE_RAM;
1285 	return area->id;
1286 }
1287 
1288 
1289 status_t
1290 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1291 {
1292 	AddressSpaceWriteLocker locker(team);
1293 	if (!locker.IsLocked())
1294 		return B_BAD_TEAM_ID;
1295 
1296 	VMAddressSpace* addressSpace = locker.AddressSpace();
1297 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1298 		addressSpace == VMAddressSpace::Kernel()
1299 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1300 }
1301 
1302 
1303 status_t
1304 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1305 	addr_t size, uint32 flags)
1306 {
1307 	if (size == 0)
1308 		return B_BAD_VALUE;
1309 
1310 	AddressSpaceWriteLocker locker(team);
1311 	if (!locker.IsLocked())
1312 		return B_BAD_TEAM_ID;
1313 
1314 	virtual_address_restrictions addressRestrictions = {};
1315 	addressRestrictions.address = *_address;
1316 	addressRestrictions.address_specification = addressSpec;
1317 	VMAddressSpace* addressSpace = locker.AddressSpace();
1318 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1319 		addressSpace == VMAddressSpace::Kernel()
1320 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1321 		_address);
1322 }
1323 
1324 
1325 area_id
1326 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1327 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1328 	const virtual_address_restrictions* virtualAddressRestrictions,
1329 	const physical_address_restrictions* physicalAddressRestrictions,
1330 	bool kernel, void** _address)
1331 {
1332 	VMArea* area;
1333 	VMCache* cache;
1334 	vm_page* page = NULL;
1335 	bool isStack = (protection & B_STACK_AREA) != 0;
1336 	page_num_t guardPages;
1337 	bool canOvercommit = false;
1338 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1339 		? VM_PAGE_ALLOC_CLEAR : 0;
1340 
1341 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1342 		team, name, size));
1343 
1344 	size = PAGE_ALIGN(size);
1345 	guardSize = PAGE_ALIGN(guardSize);
1346 	guardPages = guardSize / B_PAGE_SIZE;
1347 
1348 	if (size == 0 || size < guardSize)
1349 		return B_BAD_VALUE;
1350 	if (!arch_vm_supports_protection(protection))
1351 		return B_NOT_SUPPORTED;
1352 
1353 	if (team == B_CURRENT_TEAM)
1354 		team = VMAddressSpace::CurrentID();
1355 	if (team < 0)
1356 		return B_BAD_TEAM_ID;
1357 
1358 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1359 		canOvercommit = true;
1360 
1361 #ifdef DEBUG_KERNEL_STACKS
1362 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1363 		isStack = true;
1364 #endif
1365 
1366 	// check parameters
1367 	switch (virtualAddressRestrictions->address_specification) {
1368 		case B_ANY_ADDRESS:
1369 		case B_EXACT_ADDRESS:
1370 		case B_BASE_ADDRESS:
1371 		case B_ANY_KERNEL_ADDRESS:
1372 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1373 		case B_RANDOMIZED_ANY_ADDRESS:
1374 		case B_RANDOMIZED_BASE_ADDRESS:
1375 			break;
1376 
1377 		default:
1378 			return B_BAD_VALUE;
1379 	}
1380 
1381 	// If low or high physical address restrictions are given, we force
1382 	// B_CONTIGUOUS wiring, since only then we'll use
1383 	// vm_page_allocate_page_run() which deals with those restrictions.
1384 	if (physicalAddressRestrictions->low_address != 0
1385 		|| physicalAddressRestrictions->high_address != 0) {
1386 		wiring = B_CONTIGUOUS;
1387 	}
1388 
1389 	physical_address_restrictions stackPhysicalRestrictions;
1390 	bool doReserveMemory = false;
1391 	switch (wiring) {
1392 		case B_NO_LOCK:
1393 			break;
1394 		case B_FULL_LOCK:
1395 		case B_LAZY_LOCK:
1396 		case B_CONTIGUOUS:
1397 			doReserveMemory = true;
1398 			break;
1399 		case B_ALREADY_WIRED:
1400 			break;
1401 		case B_LOMEM:
1402 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1403 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1404 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1405 			wiring = B_CONTIGUOUS;
1406 			doReserveMemory = true;
1407 			break;
1408 		case B_32_BIT_FULL_LOCK:
1409 			if (B_HAIKU_PHYSICAL_BITS <= 32
1410 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1411 				wiring = B_FULL_LOCK;
1412 				doReserveMemory = true;
1413 				break;
1414 			}
1415 			// TODO: We don't really support this mode efficiently. Just fall
1416 			// through for now ...
1417 		case B_32_BIT_CONTIGUOUS:
1418 			#if B_HAIKU_PHYSICAL_BITS > 32
1419 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1420 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1421 					stackPhysicalRestrictions.high_address
1422 						= (phys_addr_t)1 << 32;
1423 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1424 				}
1425 			#endif
1426 			wiring = B_CONTIGUOUS;
1427 			doReserveMemory = true;
1428 			break;
1429 		default:
1430 			return B_BAD_VALUE;
1431 	}
1432 
1433 	// Optimization: For a single-page contiguous allocation without low/high
1434 	// memory restriction B_FULL_LOCK wiring suffices.
1435 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1436 		&& physicalAddressRestrictions->low_address == 0
1437 		&& physicalAddressRestrictions->high_address == 0) {
1438 		wiring = B_FULL_LOCK;
1439 	}
1440 
1441 	// For full lock or contiguous areas we're also going to map the pages and
1442 	// thus need to reserve pages for the mapping backend upfront.
1443 	addr_t reservedMapPages = 0;
1444 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1445 		AddressSpaceWriteLocker locker;
1446 		status_t status = locker.SetTo(team);
1447 		if (status != B_OK)
1448 			return status;
1449 
1450 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1451 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1452 	}
1453 
1454 	int priority;
1455 	if (team != VMAddressSpace::KernelID())
1456 		priority = VM_PRIORITY_USER;
1457 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1458 		priority = VM_PRIORITY_VIP;
1459 	else
1460 		priority = VM_PRIORITY_SYSTEM;
1461 
1462 	// Reserve memory before acquiring the address space lock. This reduces the
1463 	// chances of failure, since while holding the write lock to the address
1464 	// space (if it is the kernel address space that is), the low memory handler
1465 	// won't be able to free anything for us.
1466 	addr_t reservedMemory = 0;
1467 	if (doReserveMemory) {
1468 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1469 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1470 			return B_NO_MEMORY;
1471 		reservedMemory = size;
1472 		// TODO: We don't reserve the memory for the pages for the page
1473 		// directories/tables. We actually need to do since we currently don't
1474 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1475 		// there are actually less physical pages than there should be, which
1476 		// can get the VM into trouble in low memory situations.
1477 	}
1478 
1479 	AddressSpaceWriteLocker locker;
1480 	VMAddressSpace* addressSpace;
1481 	status_t status;
1482 
1483 	// For full lock areas reserve the pages before locking the address
1484 	// space. E.g. block caches can't release their memory while we hold the
1485 	// address space lock.
1486 	page_num_t reservedPages = reservedMapPages;
1487 	if (wiring == B_FULL_LOCK)
1488 		reservedPages += size / B_PAGE_SIZE;
1489 
1490 	vm_page_reservation reservation;
1491 	if (reservedPages > 0) {
1492 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1493 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1494 					priority)) {
1495 				reservedPages = 0;
1496 				status = B_WOULD_BLOCK;
1497 				goto err0;
1498 			}
1499 		} else
1500 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1501 	}
1502 
1503 	if (wiring == B_CONTIGUOUS) {
1504 		// we try to allocate the page run here upfront as this may easily
1505 		// fail for obvious reasons
1506 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1507 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1508 		if (page == NULL) {
1509 			status = B_NO_MEMORY;
1510 			goto err0;
1511 		}
1512 	}
1513 
1514 	// Lock the address space and, if B_EXACT_ADDRESS and
1515 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1516 	// is not wired.
1517 	do {
1518 		status = locker.SetTo(team);
1519 		if (status != B_OK)
1520 			goto err1;
1521 
1522 		addressSpace = locker.AddressSpace();
1523 	} while (virtualAddressRestrictions->address_specification
1524 			== B_EXACT_ADDRESS
1525 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1526 		&& wait_if_address_range_is_wired(addressSpace,
1527 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1528 
1529 	// create an anonymous cache
1530 	// if it's a stack, make sure that two pages are available at least
1531 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1532 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1533 		wiring == B_NO_LOCK, priority);
1534 	if (status != B_OK)
1535 		goto err1;
1536 
1537 	cache->temporary = 1;
1538 	cache->virtual_end = size;
1539 	cache->committed_size = reservedMemory;
1540 		// TODO: This should be done via a method.
1541 	reservedMemory = 0;
1542 
1543 	cache->Lock();
1544 
1545 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1546 		protection, 0, REGION_NO_PRIVATE_MAP, flags,
1547 		virtualAddressRestrictions, kernel, &area, _address);
1548 
1549 	if (status != B_OK) {
1550 		cache->ReleaseRefAndUnlock();
1551 		goto err1;
1552 	}
1553 
1554 	locker.DegradeToReadLock();
1555 
1556 	switch (wiring) {
1557 		case B_NO_LOCK:
1558 		case B_LAZY_LOCK:
1559 			// do nothing - the pages are mapped in as needed
1560 			break;
1561 
1562 		case B_FULL_LOCK:
1563 		{
1564 			// Allocate and map all pages for this area
1565 
1566 			off_t offset = 0;
1567 			for (addr_t address = area->Base();
1568 					address < area->Base() + (area->Size() - 1);
1569 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1570 #ifdef DEBUG_KERNEL_STACKS
1571 #	ifdef STACK_GROWS_DOWNWARDS
1572 				if (isStack && address < area->Base()
1573 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1574 #	else
1575 				if (isStack && address >= area->Base() + area->Size()
1576 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1577 #	endif
1578 					continue;
1579 #endif
1580 				vm_page* page = vm_page_allocate_page(&reservation,
1581 					PAGE_STATE_WIRED | pageAllocFlags);
1582 				cache->InsertPage(page, offset);
1583 				map_page(area, page, address, protection, &reservation);
1584 
1585 				DEBUG_PAGE_ACCESS_END(page);
1586 			}
1587 
1588 			break;
1589 		}
1590 
1591 		case B_ALREADY_WIRED:
1592 		{
1593 			// The pages should already be mapped. This is only really useful
1594 			// during boot time. Find the appropriate vm_page objects and stick
1595 			// them in the cache object.
1596 			VMTranslationMap* map = addressSpace->TranslationMap();
1597 			off_t offset = 0;
1598 
1599 			if (!gKernelStartup)
1600 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1601 
1602 			map->Lock();
1603 
1604 			for (addr_t virtualAddress = area->Base();
1605 					virtualAddress < area->Base() + (area->Size() - 1);
1606 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1607 				phys_addr_t physicalAddress;
1608 				uint32 flags;
1609 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1610 				if (status < B_OK) {
1611 					panic("looking up mapping failed for va 0x%lx\n",
1612 						virtualAddress);
1613 				}
1614 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1615 				if (page == NULL) {
1616 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1617 						"\n", physicalAddress);
1618 				}
1619 
1620 				DEBUG_PAGE_ACCESS_START(page);
1621 
1622 				cache->InsertPage(page, offset);
1623 				increment_page_wired_count(page);
1624 				vm_page_set_state(page, PAGE_STATE_WIRED);
1625 				page->busy = false;
1626 
1627 				DEBUG_PAGE_ACCESS_END(page);
1628 			}
1629 
1630 			map->Unlock();
1631 			break;
1632 		}
1633 
1634 		case B_CONTIGUOUS:
1635 		{
1636 			// We have already allocated our continuous pages run, so we can now
1637 			// just map them in the address space
1638 			VMTranslationMap* map = addressSpace->TranslationMap();
1639 			phys_addr_t physicalAddress
1640 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1641 			addr_t virtualAddress = area->Base();
1642 			off_t offset = 0;
1643 
1644 			map->Lock();
1645 
1646 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1647 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1648 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1649 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1650 				if (page == NULL)
1651 					panic("couldn't lookup physical page just allocated\n");
1652 
1653 				status = map->Map(virtualAddress, physicalAddress, protection,
1654 					area->MemoryType(), &reservation);
1655 				if (status < B_OK)
1656 					panic("couldn't map physical page in page run\n");
1657 
1658 				cache->InsertPage(page, offset);
1659 				increment_page_wired_count(page);
1660 
1661 				DEBUG_PAGE_ACCESS_END(page);
1662 			}
1663 
1664 			map->Unlock();
1665 			break;
1666 		}
1667 
1668 		default:
1669 			break;
1670 	}
1671 
1672 	cache->Unlock();
1673 
1674 	if (reservedPages > 0)
1675 		vm_page_unreserve_pages(&reservation);
1676 
1677 	TRACE(("vm_create_anonymous_area: done\n"));
1678 
1679 	area->cache_type = CACHE_TYPE_RAM;
1680 	return area->id;
1681 
1682 err1:
1683 	if (wiring == B_CONTIGUOUS) {
1684 		// we had reserved the area space upfront...
1685 		phys_addr_t pageNumber = page->physical_page_number;
1686 		int32 i;
1687 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1688 			page = vm_lookup_page(pageNumber);
1689 			if (page == NULL)
1690 				panic("couldn't lookup physical page just allocated\n");
1691 
1692 			vm_page_set_state(page, PAGE_STATE_FREE);
1693 		}
1694 	}
1695 
1696 err0:
1697 	if (reservedPages > 0)
1698 		vm_page_unreserve_pages(&reservation);
1699 	if (reservedMemory > 0)
1700 		vm_unreserve_memory(reservedMemory);
1701 
1702 	return status;
1703 }
1704 
1705 
1706 area_id
1707 vm_map_physical_memory(team_id team, const char* name, void** _address,
1708 	uint32 addressSpec, addr_t size, uint32 protection,
1709 	phys_addr_t physicalAddress, bool alreadyWired)
1710 {
1711 	VMArea* area;
1712 	VMCache* cache;
1713 	addr_t mapOffset;
1714 
1715 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1716 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1717 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1718 		addressSpec, size, protection, physicalAddress));
1719 
1720 	if (!arch_vm_supports_protection(protection))
1721 		return B_NOT_SUPPORTED;
1722 
1723 	AddressSpaceWriteLocker locker(team);
1724 	if (!locker.IsLocked())
1725 		return B_BAD_TEAM_ID;
1726 
1727 	// if the physical address is somewhat inside a page,
1728 	// move the actual area down to align on a page boundary
1729 	mapOffset = physicalAddress % B_PAGE_SIZE;
1730 	size += mapOffset;
1731 	physicalAddress -= mapOffset;
1732 
1733 	size = PAGE_ALIGN(size);
1734 
1735 	// create a device cache
1736 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1737 	if (status != B_OK)
1738 		return status;
1739 
1740 	cache->virtual_end = size;
1741 
1742 	cache->Lock();
1743 
1744 	virtual_address_restrictions addressRestrictions = {};
1745 	addressRestrictions.address = *_address;
1746 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1747 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1748 		B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1749 		true, &area, _address);
1750 
1751 	if (status < B_OK)
1752 		cache->ReleaseRefLocked();
1753 
1754 	cache->Unlock();
1755 
1756 	if (status == B_OK) {
1757 		// set requested memory type -- use uncached, if not given
1758 		uint32 memoryType = addressSpec & B_MTR_MASK;
1759 		if (memoryType == 0)
1760 			memoryType = B_MTR_UC;
1761 
1762 		area->SetMemoryType(memoryType);
1763 
1764 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1765 		if (status != B_OK)
1766 			delete_area(locker.AddressSpace(), area, false);
1767 	}
1768 
1769 	if (status != B_OK)
1770 		return status;
1771 
1772 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1773 
1774 	if (alreadyWired) {
1775 		// The area is already mapped, but possibly not with the right
1776 		// memory type.
1777 		map->Lock();
1778 		map->ProtectArea(area, area->protection);
1779 		map->Unlock();
1780 	} else {
1781 		// Map the area completely.
1782 
1783 		// reserve pages needed for the mapping
1784 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1785 			area->Base() + (size - 1));
1786 		vm_page_reservation reservation;
1787 		vm_page_reserve_pages(&reservation, reservePages,
1788 			team == VMAddressSpace::KernelID()
1789 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1790 
1791 		map->Lock();
1792 
1793 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1794 			map->Map(area->Base() + offset, physicalAddress + offset,
1795 				protection, area->MemoryType(), &reservation);
1796 		}
1797 
1798 		map->Unlock();
1799 
1800 		vm_page_unreserve_pages(&reservation);
1801 	}
1802 
1803 	// modify the pointer returned to be offset back into the new area
1804 	// the same way the physical address in was offset
1805 	*_address = (void*)((addr_t)*_address + mapOffset);
1806 
1807 	area->cache_type = CACHE_TYPE_DEVICE;
1808 	return area->id;
1809 }
1810 
1811 
1812 /*!	Don't use!
1813 	TODO: This function was introduced to map physical page vecs to
1814 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1815 	use a device cache and does not track vm_page::wired_count!
1816 */
1817 area_id
1818 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1819 	uint32 addressSpec, addr_t* _size, uint32 protection,
1820 	struct generic_io_vec* vecs, uint32 vecCount)
1821 {
1822 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1823 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1824 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1825 		addressSpec, _size, protection, vecs, vecCount));
1826 
1827 	if (!arch_vm_supports_protection(protection)
1828 		|| (addressSpec & B_MTR_MASK) != 0) {
1829 		return B_NOT_SUPPORTED;
1830 	}
1831 
1832 	AddressSpaceWriteLocker locker(team);
1833 	if (!locker.IsLocked())
1834 		return B_BAD_TEAM_ID;
1835 
1836 	if (vecCount == 0)
1837 		return B_BAD_VALUE;
1838 
1839 	addr_t size = 0;
1840 	for (uint32 i = 0; i < vecCount; i++) {
1841 		if (vecs[i].base % B_PAGE_SIZE != 0
1842 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1843 			return B_BAD_VALUE;
1844 		}
1845 
1846 		size += vecs[i].length;
1847 	}
1848 
1849 	// create a device cache
1850 	VMCache* cache;
1851 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1852 	if (result != B_OK)
1853 		return result;
1854 
1855 	cache->virtual_end = size;
1856 
1857 	cache->Lock();
1858 
1859 	VMArea* area;
1860 	virtual_address_restrictions addressRestrictions = {};
1861 	addressRestrictions.address = *_address;
1862 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1863 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1864 		size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0,
1865 		&addressRestrictions, true, &area, _address);
1866 
1867 	if (result != B_OK)
1868 		cache->ReleaseRefLocked();
1869 
1870 	cache->Unlock();
1871 
1872 	if (result != B_OK)
1873 		return result;
1874 
1875 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1876 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1877 		area->Base() + (size - 1));
1878 
1879 	vm_page_reservation reservation;
1880 	vm_page_reserve_pages(&reservation, reservePages,
1881 			team == VMAddressSpace::KernelID()
1882 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1883 	map->Lock();
1884 
1885 	uint32 vecIndex = 0;
1886 	size_t vecOffset = 0;
1887 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1888 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1889 			vecOffset = 0;
1890 			vecIndex++;
1891 		}
1892 
1893 		if (vecIndex >= vecCount)
1894 			break;
1895 
1896 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1897 			protection, area->MemoryType(), &reservation);
1898 
1899 		vecOffset += B_PAGE_SIZE;
1900 	}
1901 
1902 	map->Unlock();
1903 	vm_page_unreserve_pages(&reservation);
1904 
1905 	if (_size != NULL)
1906 		*_size = size;
1907 
1908 	area->cache_type = CACHE_TYPE_DEVICE;
1909 	return area->id;
1910 }
1911 
1912 
1913 area_id
1914 vm_create_null_area(team_id team, const char* name, void** address,
1915 	uint32 addressSpec, addr_t size, uint32 flags)
1916 {
1917 	size = PAGE_ALIGN(size);
1918 
1919 	// Lock the address space and, if B_EXACT_ADDRESS and
1920 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1921 	// is not wired.
1922 	AddressSpaceWriteLocker locker;
1923 	do {
1924 		if (locker.SetTo(team) != B_OK)
1925 			return B_BAD_TEAM_ID;
1926 	} while (addressSpec == B_EXACT_ADDRESS
1927 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1928 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1929 			(addr_t)*address, size, &locker));
1930 
1931 	// create a null cache
1932 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1933 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1934 	VMCache* cache;
1935 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1936 	if (status != B_OK)
1937 		return status;
1938 
1939 	cache->temporary = 1;
1940 	cache->virtual_end = size;
1941 
1942 	cache->Lock();
1943 
1944 	VMArea* area;
1945 	virtual_address_restrictions addressRestrictions = {};
1946 	addressRestrictions.address = *address;
1947 	addressRestrictions.address_specification = addressSpec;
1948 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1949 		B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA,
1950 		REGION_NO_PRIVATE_MAP, flags,
1951 		&addressRestrictions, true, &area, address);
1952 
1953 	if (status < B_OK) {
1954 		cache->ReleaseRefAndUnlock();
1955 		return status;
1956 	}
1957 
1958 	cache->Unlock();
1959 
1960 	area->cache_type = CACHE_TYPE_NULL;
1961 	return area->id;
1962 }
1963 
1964 
1965 /*!	Creates the vnode cache for the specified \a vnode.
1966 	The vnode has to be marked busy when calling this function.
1967 */
1968 status_t
1969 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1970 {
1971 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1972 }
1973 
1974 
1975 /*!	\a cache must be locked. The area's address space must be read-locked.
1976 */
1977 static void
1978 pre_map_area_pages(VMArea* area, VMCache* cache,
1979 	vm_page_reservation* reservation)
1980 {
1981 	addr_t baseAddress = area->Base();
1982 	addr_t cacheOffset = area->cache_offset;
1983 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1984 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1985 
1986 	for (VMCachePagesTree::Iterator it
1987 				= cache->pages.GetIterator(firstPage, true, true);
1988 			vm_page* page = it.Next();) {
1989 		if (page->cache_offset >= endPage)
1990 			break;
1991 
1992 		// skip busy and inactive pages
1993 		if (page->busy || page->usage_count == 0)
1994 			continue;
1995 
1996 		DEBUG_PAGE_ACCESS_START(page);
1997 		map_page(area, page,
1998 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1999 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
2000 		DEBUG_PAGE_ACCESS_END(page);
2001 	}
2002 }
2003 
2004 
2005 /*!	Will map the file specified by \a fd to an area in memory.
2006 	The file will be mirrored beginning at the specified \a offset. The
2007 	\a offset and \a size arguments have to be page aligned.
2008 */
2009 static area_id
2010 _vm_map_file(team_id team, const char* name, void** _address,
2011 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2012 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2013 {
2014 	// TODO: for binary files, we want to make sure that they get the
2015 	//	copy of a file at a given time, ie. later changes should not
2016 	//	make it into the mapped copy -- this will need quite some changes
2017 	//	to be done in a nice way
2018 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
2019 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
2020 
2021 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2022 	size = PAGE_ALIGN(size);
2023 
2024 	if (mapping == REGION_NO_PRIVATE_MAP)
2025 		protection |= B_SHARED_AREA;
2026 	if (addressSpec != B_EXACT_ADDRESS)
2027 		unmapAddressRange = false;
2028 
2029 	if (fd < 0) {
2030 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
2031 		virtual_address_restrictions virtualRestrictions = {};
2032 		virtualRestrictions.address = *_address;
2033 		virtualRestrictions.address_specification = addressSpec;
2034 		physical_address_restrictions physicalRestrictions = {};
2035 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
2036 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
2037 			_address);
2038 	}
2039 
2040 	// get the open flags of the FD
2041 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2042 	if (descriptor == NULL)
2043 		return EBADF;
2044 	int32 openMode = descriptor->open_mode;
2045 	put_fd(descriptor);
2046 
2047 	// The FD must open for reading at any rate. For shared mapping with write
2048 	// access, additionally the FD must be open for writing.
2049 	if ((openMode & O_ACCMODE) == O_WRONLY
2050 		|| (mapping == REGION_NO_PRIVATE_MAP
2051 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2052 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2053 		return EACCES;
2054 	}
2055 
2056 	uint32 protectionMax = 0;
2057 	if (mapping != REGION_PRIVATE_MAP) {
2058 		protectionMax = protection | B_READ_AREA;
2059 		if ((openMode & O_ACCMODE) == O_RDWR)
2060 			protectionMax |= B_WRITE_AREA;
2061 	}
2062 
2063 	// get the vnode for the object, this also grabs a ref to it
2064 	struct vnode* vnode = NULL;
2065 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2066 	if (status < B_OK)
2067 		return status;
2068 	VnodePutter vnodePutter(vnode);
2069 
2070 	// If we're going to pre-map pages, we need to reserve the pages needed by
2071 	// the mapping backend upfront.
2072 	page_num_t reservedPreMapPages = 0;
2073 	vm_page_reservation reservation;
2074 	if ((protection & B_READ_AREA) != 0) {
2075 		AddressSpaceWriteLocker locker;
2076 		status = locker.SetTo(team);
2077 		if (status != B_OK)
2078 			return status;
2079 
2080 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2081 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2082 
2083 		locker.Unlock();
2084 
2085 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2086 			team == VMAddressSpace::KernelID()
2087 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2088 	}
2089 
2090 	struct PageUnreserver {
2091 		PageUnreserver(vm_page_reservation* reservation)
2092 			:
2093 			fReservation(reservation)
2094 		{
2095 		}
2096 
2097 		~PageUnreserver()
2098 		{
2099 			if (fReservation != NULL)
2100 				vm_page_unreserve_pages(fReservation);
2101 		}
2102 
2103 		vm_page_reservation* fReservation;
2104 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2105 
2106 	// Lock the address space and, if the specified address range shall be
2107 	// unmapped, ensure it is not wired.
2108 	AddressSpaceWriteLocker locker;
2109 	do {
2110 		if (locker.SetTo(team) != B_OK)
2111 			return B_BAD_TEAM_ID;
2112 	} while (unmapAddressRange
2113 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2114 			(addr_t)*_address, size, &locker));
2115 
2116 	// TODO: this only works for file systems that use the file cache
2117 	VMCache* cache;
2118 	status = vfs_get_vnode_cache(vnode, &cache, false);
2119 	if (status < B_OK)
2120 		return status;
2121 
2122 	cache->Lock();
2123 
2124 	VMArea* area;
2125 	virtual_address_restrictions addressRestrictions = {};
2126 	addressRestrictions.address = *_address;
2127 	addressRestrictions.address_specification = addressSpec;
2128 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2129 		0, protection, protectionMax, mapping,
2130 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
2131 		&addressRestrictions, kernel, &area, _address);
2132 
2133 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2134 		// map_backing_store() cannot know we no longer need the ref
2135 		cache->ReleaseRefLocked();
2136 	}
2137 
2138 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2139 		pre_map_area_pages(area, cache, &reservation);
2140 
2141 	cache->Unlock();
2142 
2143 	if (status == B_OK) {
2144 		// TODO: this probably deserves a smarter solution, ie. don't always
2145 		// prefetch stuff, and also, probably don't trigger it at this place.
2146 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2147 			// prefetches at max 10 MB starting from "offset"
2148 	}
2149 
2150 	if (status != B_OK)
2151 		return status;
2152 
2153 	area->cache_type = CACHE_TYPE_VNODE;
2154 	return area->id;
2155 }
2156 
2157 
2158 area_id
2159 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2160 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2161 	int fd, off_t offset)
2162 {
2163 	if (!arch_vm_supports_protection(protection))
2164 		return B_NOT_SUPPORTED;
2165 
2166 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2167 		mapping, unmapAddressRange, fd, offset, true);
2168 }
2169 
2170 
2171 VMCache*
2172 vm_area_get_locked_cache(VMArea* area)
2173 {
2174 	rw_lock_read_lock(&sAreaCacheLock);
2175 
2176 	while (true) {
2177 		VMCache* cache = area->cache;
2178 
2179 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2180 			// cache has been deleted
2181 			rw_lock_read_lock(&sAreaCacheLock);
2182 			continue;
2183 		}
2184 
2185 		rw_lock_read_lock(&sAreaCacheLock);
2186 
2187 		if (cache == area->cache) {
2188 			cache->AcquireRefLocked();
2189 			rw_lock_read_unlock(&sAreaCacheLock);
2190 			return cache;
2191 		}
2192 
2193 		// the cache changed in the meantime
2194 		cache->Unlock();
2195 	}
2196 }
2197 
2198 
2199 void
2200 vm_area_put_locked_cache(VMCache* cache)
2201 {
2202 	cache->ReleaseRefAndUnlock();
2203 }
2204 
2205 
2206 area_id
2207 vm_clone_area(team_id team, const char* name, void** address,
2208 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2209 	bool kernel)
2210 {
2211 	VMArea* newArea = NULL;
2212 	VMArea* sourceArea;
2213 
2214 	// Check whether the source area exists and is cloneable. If so, mark it
2215 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2216 	{
2217 		AddressSpaceWriteLocker locker;
2218 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2219 		if (status != B_OK)
2220 			return status;
2221 
2222 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2223 			return B_NOT_ALLOWED;
2224 
2225 		sourceArea->protection |= B_SHARED_AREA;
2226 		protection |= B_SHARED_AREA;
2227 	}
2228 
2229 	// Now lock both address spaces and actually do the cloning.
2230 
2231 	MultiAddressSpaceLocker locker;
2232 	VMAddressSpace* sourceAddressSpace;
2233 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2234 	if (status != B_OK)
2235 		return status;
2236 
2237 	VMAddressSpace* targetAddressSpace;
2238 	status = locker.AddTeam(team, true, &targetAddressSpace);
2239 	if (status != B_OK)
2240 		return status;
2241 
2242 	status = locker.Lock();
2243 	if (status != B_OK)
2244 		return status;
2245 
2246 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2247 	if (sourceArea == NULL)
2248 		return B_BAD_VALUE;
2249 
2250 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2251 		return B_NOT_ALLOWED;
2252 
2253 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2254 
2255 	if (!kernel && sourceAddressSpace != targetAddressSpace
2256 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2257 #if KDEBUG
2258 		Team* team = thread_get_current_thread()->team;
2259 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2260 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2261 #endif
2262 		status = B_NOT_ALLOWED;
2263 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2264 		status = B_NOT_ALLOWED;
2265 	} else {
2266 		virtual_address_restrictions addressRestrictions = {};
2267 		addressRestrictions.address = *address;
2268 		addressRestrictions.address_specification = addressSpec;
2269 		status = map_backing_store(targetAddressSpace, cache,
2270 			sourceArea->cache_offset, name, sourceArea->Size(),
2271 			sourceArea->wiring, protection, sourceArea->protection_max,
2272 			mapping, 0, &addressRestrictions,
2273 			kernel, &newArea, address);
2274 	}
2275 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2276 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2277 		// to create a new cache, and has therefore already acquired a reference
2278 		// to the source cache - but otherwise it has no idea that we need
2279 		// one.
2280 		cache->AcquireRefLocked();
2281 	}
2282 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2283 		// we need to map in everything at this point
2284 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2285 			// we don't have actual pages to map but a physical area
2286 			VMTranslationMap* map
2287 				= sourceArea->address_space->TranslationMap();
2288 			map->Lock();
2289 
2290 			phys_addr_t physicalAddress;
2291 			uint32 oldProtection;
2292 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2293 
2294 			map->Unlock();
2295 
2296 			map = targetAddressSpace->TranslationMap();
2297 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2298 				newArea->Base() + (newArea->Size() - 1));
2299 
2300 			vm_page_reservation reservation;
2301 			vm_page_reserve_pages(&reservation, reservePages,
2302 				targetAddressSpace == VMAddressSpace::Kernel()
2303 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2304 			map->Lock();
2305 
2306 			for (addr_t offset = 0; offset < newArea->Size();
2307 					offset += B_PAGE_SIZE) {
2308 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2309 					protection, newArea->MemoryType(), &reservation);
2310 			}
2311 
2312 			map->Unlock();
2313 			vm_page_unreserve_pages(&reservation);
2314 		} else {
2315 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2316 			size_t reservePages = map->MaxPagesNeededToMap(
2317 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2318 			vm_page_reservation reservation;
2319 			vm_page_reserve_pages(&reservation, reservePages,
2320 				targetAddressSpace == VMAddressSpace::Kernel()
2321 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2322 
2323 			// map in all pages from source
2324 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2325 					vm_page* page  = it.Next();) {
2326 				if (!page->busy) {
2327 					DEBUG_PAGE_ACCESS_START(page);
2328 					map_page(newArea, page,
2329 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2330 							- newArea->cache_offset),
2331 						protection, &reservation);
2332 					DEBUG_PAGE_ACCESS_END(page);
2333 				}
2334 			}
2335 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2336 			// ensuring that!
2337 
2338 			vm_page_unreserve_pages(&reservation);
2339 		}
2340 	}
2341 	if (status == B_OK)
2342 		newArea->cache_type = sourceArea->cache_type;
2343 
2344 	vm_area_put_locked_cache(cache);
2345 
2346 	if (status < B_OK)
2347 		return status;
2348 
2349 	return newArea->id;
2350 }
2351 
2352 
2353 /*!	Deletes the specified area of the given address space.
2354 
2355 	The address space must be write-locked.
2356 	The caller must ensure that the area does not have any wired ranges.
2357 
2358 	\param addressSpace The address space containing the area.
2359 	\param area The area to be deleted.
2360 	\param deletingAddressSpace \c true, if the address space is in the process
2361 		of being deleted.
2362 */
2363 static void
2364 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2365 	bool deletingAddressSpace)
2366 {
2367 	ASSERT(!area->IsWired());
2368 
2369 	VMAreaHash::Remove(area);
2370 
2371 	// At this point the area is removed from the global hash table, but
2372 	// still exists in the area list.
2373 
2374 	// Unmap the virtual address space the area occupied.
2375 	{
2376 		// We need to lock the complete cache chain.
2377 		VMCache* topCache = vm_area_get_locked_cache(area);
2378 		VMCacheChainLocker cacheChainLocker(topCache);
2379 		cacheChainLocker.LockAllSourceCaches();
2380 
2381 		// If the area's top cache is a temporary cache and the area is the only
2382 		// one referencing it (besides us currently holding a second reference),
2383 		// the unmapping code doesn't need to care about preserving the accessed
2384 		// and dirty flags of the top cache page mappings.
2385 		bool ignoreTopCachePageFlags
2386 			= topCache->temporary && topCache->RefCount() == 2;
2387 
2388 		area->address_space->TranslationMap()->UnmapArea(area,
2389 			deletingAddressSpace, ignoreTopCachePageFlags);
2390 	}
2391 
2392 	if (!area->cache->temporary)
2393 		area->cache->WriteModified();
2394 
2395 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2396 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2397 
2398 	arch_vm_unset_memory_type(area);
2399 	addressSpace->RemoveArea(area, allocationFlags);
2400 	addressSpace->Put();
2401 
2402 	area->cache->RemoveArea(area);
2403 	area->cache->ReleaseRef();
2404 
2405 	addressSpace->DeleteArea(area, allocationFlags);
2406 }
2407 
2408 
2409 status_t
2410 vm_delete_area(team_id team, area_id id, bool kernel)
2411 {
2412 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2413 		team, id));
2414 
2415 	// lock the address space and make sure the area isn't wired
2416 	AddressSpaceWriteLocker locker;
2417 	VMArea* area;
2418 	AreaCacheLocker cacheLocker;
2419 
2420 	do {
2421 		status_t status = locker.SetFromArea(team, id, area);
2422 		if (status != B_OK)
2423 			return status;
2424 
2425 		cacheLocker.SetTo(area);
2426 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2427 
2428 	cacheLocker.Unlock();
2429 
2430 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2431 		return B_NOT_ALLOWED;
2432 
2433 	delete_area(locker.AddressSpace(), area, false);
2434 	return B_OK;
2435 }
2436 
2437 
2438 /*!	Creates a new cache on top of given cache, moves all areas from
2439 	the old cache to the new one, and changes the protection of all affected
2440 	areas' pages to read-only. If requested, wired pages are moved up to the
2441 	new cache and copies are added to the old cache in their place.
2442 	Preconditions:
2443 	- The given cache must be locked.
2444 	- All of the cache's areas' address spaces must be read locked.
2445 	- Either the cache must not have any wired ranges or a page reservation for
2446 	  all wired pages must be provided, so they can be copied.
2447 
2448 	\param lowerCache The cache on top of which a new cache shall be created.
2449 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2450 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2451 		has wired page. The wired pages are copied in this case.
2452 */
2453 static status_t
2454 vm_copy_on_write_area(VMCache* lowerCache,
2455 	vm_page_reservation* wiredPagesReservation)
2456 {
2457 	VMCache* upperCache;
2458 
2459 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2460 
2461 	// We need to separate the cache from its areas. The cache goes one level
2462 	// deeper and we create a new cache inbetween.
2463 
2464 	// create an anonymous cache
2465 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2466 		lowerCache->GuardSize() / B_PAGE_SIZE,
2467 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2468 		VM_PRIORITY_USER);
2469 	if (status != B_OK)
2470 		return status;
2471 
2472 	upperCache->Lock();
2473 
2474 	upperCache->temporary = 1;
2475 	upperCache->virtual_base = lowerCache->virtual_base;
2476 	upperCache->virtual_end = lowerCache->virtual_end;
2477 
2478 	// transfer the lower cache areas to the upper cache
2479 	rw_lock_write_lock(&sAreaCacheLock);
2480 	upperCache->TransferAreas(lowerCache);
2481 	rw_lock_write_unlock(&sAreaCacheLock);
2482 
2483 	lowerCache->AddConsumer(upperCache);
2484 
2485 	// We now need to remap all pages from all of the cache's areas read-only,
2486 	// so that a copy will be created on next write access. If there are wired
2487 	// pages, we keep their protection, move them to the upper cache and create
2488 	// copies for the lower cache.
2489 	if (wiredPagesReservation != NULL) {
2490 		// We need to handle wired pages -- iterate through the cache's pages.
2491 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2492 				vm_page* page = it.Next();) {
2493 			if (page->WiredCount() > 0) {
2494 				// allocate a new page and copy the wired one
2495 				vm_page* copiedPage = vm_page_allocate_page(
2496 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2497 
2498 				vm_memcpy_physical_page(
2499 					copiedPage->physical_page_number * B_PAGE_SIZE,
2500 					page->physical_page_number * B_PAGE_SIZE);
2501 
2502 				// move the wired page to the upper cache (note: removing is OK
2503 				// with the SplayTree iterator) and insert the copy
2504 				upperCache->MovePage(page);
2505 				lowerCache->InsertPage(copiedPage,
2506 					page->cache_offset * B_PAGE_SIZE);
2507 
2508 				DEBUG_PAGE_ACCESS_END(copiedPage);
2509 			} else {
2510 				// Change the protection of this page in all areas.
2511 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2512 						tempArea = tempArea->cache_next) {
2513 					// The area must be readable in the same way it was
2514 					// previously writable.
2515 					uint32 protection = B_KERNEL_READ_AREA;
2516 					if ((tempArea->protection & B_READ_AREA) != 0)
2517 						protection |= B_READ_AREA;
2518 
2519 					VMTranslationMap* map
2520 						= tempArea->address_space->TranslationMap();
2521 					map->Lock();
2522 					map->ProtectPage(tempArea,
2523 						virtual_page_address(tempArea, page), protection);
2524 					map->Unlock();
2525 				}
2526 			}
2527 		}
2528 	} else {
2529 		ASSERT(lowerCache->WiredPagesCount() == 0);
2530 
2531 		// just change the protection of all areas
2532 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2533 				tempArea = tempArea->cache_next) {
2534 			// The area must be readable in the same way it was previously
2535 			// writable.
2536 			uint32 protection = B_KERNEL_READ_AREA;
2537 			if ((tempArea->protection & B_READ_AREA) != 0)
2538 				protection |= B_READ_AREA;
2539 
2540 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2541 			map->Lock();
2542 			map->ProtectArea(tempArea, protection);
2543 			map->Unlock();
2544 		}
2545 	}
2546 
2547 	vm_area_put_locked_cache(upperCache);
2548 
2549 	return B_OK;
2550 }
2551 
2552 
2553 area_id
2554 vm_copy_area(team_id team, const char* name, void** _address,
2555 	uint32 addressSpec, area_id sourceID)
2556 {
2557 	// Do the locking: target address space, all address spaces associated with
2558 	// the source cache, and the cache itself.
2559 	MultiAddressSpaceLocker locker;
2560 	VMAddressSpace* targetAddressSpace;
2561 	VMCache* cache;
2562 	VMArea* source;
2563 	AreaCacheLocker cacheLocker;
2564 	status_t status;
2565 	bool sharedArea;
2566 
2567 	page_num_t wiredPages = 0;
2568 	vm_page_reservation wiredPagesReservation;
2569 
2570 	bool restart;
2571 	do {
2572 		restart = false;
2573 
2574 		locker.Unset();
2575 		status = locker.AddTeam(team, true, &targetAddressSpace);
2576 		if (status == B_OK) {
2577 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2578 				&cache);
2579 		}
2580 		if (status != B_OK)
2581 			return status;
2582 
2583 		cacheLocker.SetTo(cache, true);	// already locked
2584 
2585 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2586 
2587 		page_num_t oldWiredPages = wiredPages;
2588 		wiredPages = 0;
2589 
2590 		// If the source area isn't shared, count the number of wired pages in
2591 		// the cache and reserve as many pages.
2592 		if (!sharedArea) {
2593 			wiredPages = cache->WiredPagesCount();
2594 
2595 			if (wiredPages > oldWiredPages) {
2596 				cacheLocker.Unlock();
2597 				locker.Unlock();
2598 
2599 				if (oldWiredPages > 0)
2600 					vm_page_unreserve_pages(&wiredPagesReservation);
2601 
2602 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2603 					VM_PRIORITY_USER);
2604 
2605 				restart = true;
2606 			}
2607 		} else if (oldWiredPages > 0)
2608 			vm_page_unreserve_pages(&wiredPagesReservation);
2609 	} while (restart);
2610 
2611 	// unreserve pages later
2612 	struct PagesUnreserver {
2613 		PagesUnreserver(vm_page_reservation* reservation)
2614 			:
2615 			fReservation(reservation)
2616 		{
2617 		}
2618 
2619 		~PagesUnreserver()
2620 		{
2621 			if (fReservation != NULL)
2622 				vm_page_unreserve_pages(fReservation);
2623 		}
2624 
2625 	private:
2626 		vm_page_reservation*	fReservation;
2627 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2628 
2629 	bool writableCopy
2630 		= (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2631 	uint8* targetPageProtections = NULL;
2632 
2633 	if (source->page_protections != NULL) {
2634 		size_t bytes = (source->Size() / B_PAGE_SIZE + 1) / 2;
2635 		targetPageProtections = (uint8*)malloc_etc(bytes,
2636 			HEAP_DONT_LOCK_KERNEL_SPACE);
2637 		if (targetPageProtections == NULL)
2638 			return B_NO_MEMORY;
2639 
2640 		memcpy(targetPageProtections, source->page_protections, bytes);
2641 
2642 		if (!writableCopy) {
2643 			for (size_t i = 0; i < bytes; i++) {
2644 				if ((targetPageProtections[i]
2645 						& (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) {
2646 					writableCopy = true;
2647 					break;
2648 				}
2649 			}
2650 		}
2651 	}
2652 
2653 	if (addressSpec == B_CLONE_ADDRESS) {
2654 		addressSpec = B_EXACT_ADDRESS;
2655 		*_address = (void*)source->Base();
2656 	}
2657 
2658 	// First, create a cache on top of the source area, respectively use the
2659 	// existing one, if this is a shared area.
2660 
2661 	VMArea* target;
2662 	virtual_address_restrictions addressRestrictions = {};
2663 	addressRestrictions.address = *_address;
2664 	addressRestrictions.address_specification = addressSpec;
2665 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2666 		name, source->Size(), source->wiring, source->protection,
2667 		source->protection_max,
2668 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2669 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2670 		&addressRestrictions, true, &target, _address);
2671 	if (status < B_OK) {
2672 		free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE);
2673 		return status;
2674 	}
2675 
2676 	if (targetPageProtections != NULL)
2677 		target->page_protections = targetPageProtections;
2678 
2679 	if (sharedArea) {
2680 		// The new area uses the old area's cache, but map_backing_store()
2681 		// hasn't acquired a ref. So we have to do that now.
2682 		cache->AcquireRefLocked();
2683 	}
2684 
2685 	// If the source area is writable, we need to move it one layer up as well
2686 
2687 	if (!sharedArea) {
2688 		if (writableCopy) {
2689 			// TODO: do something more useful if this fails!
2690 			if (vm_copy_on_write_area(cache,
2691 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2692 				panic("vm_copy_on_write_area() failed!\n");
2693 			}
2694 		}
2695 	}
2696 
2697 	// we return the ID of the newly created area
2698 	return target->id;
2699 }
2700 
2701 
2702 status_t
2703 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2704 	bool kernel)
2705 {
2706 	fix_protection(&newProtection);
2707 
2708 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2709 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2710 
2711 	if (!arch_vm_supports_protection(newProtection))
2712 		return B_NOT_SUPPORTED;
2713 
2714 	bool becomesWritable
2715 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2716 
2717 	// lock address spaces and cache
2718 	MultiAddressSpaceLocker locker;
2719 	VMCache* cache;
2720 	VMArea* area;
2721 	status_t status;
2722 	AreaCacheLocker cacheLocker;
2723 	bool isWritable;
2724 
2725 	bool restart;
2726 	do {
2727 		restart = false;
2728 
2729 		locker.Unset();
2730 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2731 		if (status != B_OK)
2732 			return status;
2733 
2734 		cacheLocker.SetTo(cache, true);	// already locked
2735 
2736 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
2737 				|| (area->protection & B_KERNEL_AREA) != 0)) {
2738 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2739 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2740 				" (%s)\n", team, newProtection, areaID, area->name);
2741 			return B_NOT_ALLOWED;
2742 		}
2743 		if (!kernel && area->protection_max != 0
2744 			&& (newProtection & area->protection_max)
2745 				!= (newProtection & B_USER_PROTECTION)) {
2746 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2747 				"set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel "
2748 				"area %" B_PRId32 " (%s)\n", team, newProtection,
2749 				area->protection_max, areaID, area->name);
2750 			return B_NOT_ALLOWED;
2751 		}
2752 
2753 		if (area->protection == newProtection)
2754 			return B_OK;
2755 
2756 		if (team != VMAddressSpace::KernelID()
2757 			&& area->address_space->ID() != team) {
2758 			// unless you're the kernel, you are only allowed to set
2759 			// the protection of your own areas
2760 			return B_NOT_ALLOWED;
2761 		}
2762 
2763 		isWritable
2764 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2765 
2766 		// Make sure the area (respectively, if we're going to call
2767 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2768 		// wired ranges.
2769 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2770 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2771 					otherArea = otherArea->cache_next) {
2772 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2773 					restart = true;
2774 					break;
2775 				}
2776 			}
2777 		} else {
2778 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2779 				restart = true;
2780 		}
2781 	} while (restart);
2782 
2783 	bool changePageProtection = true;
2784 	bool changeTopCachePagesOnly = false;
2785 
2786 	if (isWritable && !becomesWritable) {
2787 		// writable -> !writable
2788 
2789 		if (cache->source != NULL && cache->temporary) {
2790 			if (cache->CountWritableAreas(area) == 0) {
2791 				// Since this cache now lives from the pages in its source cache,
2792 				// we can change the cache's commitment to take only those pages
2793 				// into account that really are in this cache.
2794 
2795 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2796 					team == VMAddressSpace::KernelID()
2797 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2798 
2799 				// TODO: we may be able to join with our source cache, if
2800 				// count == 0
2801 			}
2802 		}
2803 
2804 		// If only the writability changes, we can just remap the pages of the
2805 		// top cache, since the pages of lower caches are mapped read-only
2806 		// anyway. That's advantageous only, if the number of pages in the cache
2807 		// is significantly smaller than the number of pages in the area,
2808 		// though.
2809 		if (newProtection
2810 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2811 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2812 			changeTopCachePagesOnly = true;
2813 		}
2814 	} else if (!isWritable && becomesWritable) {
2815 		// !writable -> writable
2816 
2817 		if (!cache->consumers.IsEmpty()) {
2818 			// There are consumers -- we have to insert a new cache. Fortunately
2819 			// vm_copy_on_write_area() does everything that's needed.
2820 			changePageProtection = false;
2821 			status = vm_copy_on_write_area(cache, NULL);
2822 		} else {
2823 			// No consumers, so we don't need to insert a new one.
2824 			if (cache->source != NULL && cache->temporary) {
2825 				// the cache's commitment must contain all possible pages
2826 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2827 					team == VMAddressSpace::KernelID()
2828 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2829 			}
2830 
2831 			if (status == B_OK && cache->source != NULL) {
2832 				// There's a source cache, hence we can't just change all pages'
2833 				// protection or we might allow writing into pages belonging to
2834 				// a lower cache.
2835 				changeTopCachePagesOnly = true;
2836 			}
2837 		}
2838 	} else {
2839 		// we don't have anything special to do in all other cases
2840 	}
2841 
2842 	if (status == B_OK) {
2843 		// remap existing pages in this cache
2844 		if (changePageProtection) {
2845 			VMTranslationMap* map = area->address_space->TranslationMap();
2846 			map->Lock();
2847 
2848 			if (changeTopCachePagesOnly) {
2849 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2850 				page_num_t lastPageOffset
2851 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2852 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2853 						vm_page* page = it.Next();) {
2854 					if (page->cache_offset >= firstPageOffset
2855 						&& page->cache_offset <= lastPageOffset) {
2856 						addr_t address = virtual_page_address(area, page);
2857 						map->ProtectPage(area, address, newProtection);
2858 					}
2859 				}
2860 			} else
2861 				map->ProtectArea(area, newProtection);
2862 
2863 			map->Unlock();
2864 		}
2865 
2866 		area->protection = newProtection;
2867 	}
2868 
2869 	return status;
2870 }
2871 
2872 
2873 status_t
2874 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2875 {
2876 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2877 	if (addressSpace == NULL)
2878 		return B_BAD_TEAM_ID;
2879 
2880 	VMTranslationMap* map = addressSpace->TranslationMap();
2881 
2882 	map->Lock();
2883 	uint32 dummyFlags;
2884 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2885 	map->Unlock();
2886 
2887 	addressSpace->Put();
2888 	return status;
2889 }
2890 
2891 
2892 /*!	The page's cache must be locked.
2893 */
2894 bool
2895 vm_test_map_modification(vm_page* page)
2896 {
2897 	if (page->modified)
2898 		return true;
2899 
2900 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2901 	vm_page_mapping* mapping;
2902 	while ((mapping = iterator.Next()) != NULL) {
2903 		VMArea* area = mapping->area;
2904 		VMTranslationMap* map = area->address_space->TranslationMap();
2905 
2906 		phys_addr_t physicalAddress;
2907 		uint32 flags;
2908 		map->Lock();
2909 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2910 		map->Unlock();
2911 
2912 		if ((flags & PAGE_MODIFIED) != 0)
2913 			return true;
2914 	}
2915 
2916 	return false;
2917 }
2918 
2919 
2920 /*!	The page's cache must be locked.
2921 */
2922 void
2923 vm_clear_map_flags(vm_page* page, uint32 flags)
2924 {
2925 	if ((flags & PAGE_ACCESSED) != 0)
2926 		page->accessed = false;
2927 	if ((flags & PAGE_MODIFIED) != 0)
2928 		page->modified = false;
2929 
2930 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2931 	vm_page_mapping* mapping;
2932 	while ((mapping = iterator.Next()) != NULL) {
2933 		VMArea* area = mapping->area;
2934 		VMTranslationMap* map = area->address_space->TranslationMap();
2935 
2936 		map->Lock();
2937 		map->ClearFlags(virtual_page_address(area, page), flags);
2938 		map->Unlock();
2939 	}
2940 }
2941 
2942 
2943 /*!	Removes all mappings from a page.
2944 	After you've called this function, the page is unmapped from memory and
2945 	the page's \c accessed and \c modified flags have been updated according
2946 	to the state of the mappings.
2947 	The page's cache must be locked.
2948 */
2949 void
2950 vm_remove_all_page_mappings(vm_page* page)
2951 {
2952 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2953 		VMArea* area = mapping->area;
2954 		VMTranslationMap* map = area->address_space->TranslationMap();
2955 		addr_t address = virtual_page_address(area, page);
2956 		map->UnmapPage(area, address, false);
2957 	}
2958 }
2959 
2960 
2961 int32
2962 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2963 {
2964 	int32 count = 0;
2965 
2966 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2967 	vm_page_mapping* mapping;
2968 	while ((mapping = iterator.Next()) != NULL) {
2969 		VMArea* area = mapping->area;
2970 		VMTranslationMap* map = area->address_space->TranslationMap();
2971 
2972 		bool modified;
2973 		if (map->ClearAccessedAndModified(area,
2974 				virtual_page_address(area, page), false, modified)) {
2975 			count++;
2976 		}
2977 
2978 		page->modified |= modified;
2979 	}
2980 
2981 
2982 	if (page->accessed) {
2983 		count++;
2984 		page->accessed = false;
2985 	}
2986 
2987 	return count;
2988 }
2989 
2990 
2991 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2992 	mappings.
2993 	The function iterates through the page mappings and removes them until
2994 	encountering one that has been accessed. From then on it will continue to
2995 	iterate, but only clear the accessed flag of the mapping. The page's
2996 	\c modified bit will be updated accordingly, the \c accessed bit will be
2997 	cleared.
2998 	\return The number of mapping accessed bits encountered, including the
2999 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
3000 		of the page have been removed.
3001 */
3002 int32
3003 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
3004 {
3005 	ASSERT(page->WiredCount() == 0);
3006 
3007 	if (page->accessed)
3008 		return vm_clear_page_mapping_accessed_flags(page);
3009 
3010 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3011 		VMArea* area = mapping->area;
3012 		VMTranslationMap* map = area->address_space->TranslationMap();
3013 		addr_t address = virtual_page_address(area, page);
3014 		bool modified = false;
3015 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
3016 			page->accessed = true;
3017 			page->modified |= modified;
3018 			return vm_clear_page_mapping_accessed_flags(page);
3019 		}
3020 		page->modified |= modified;
3021 	}
3022 
3023 	return 0;
3024 }
3025 
3026 
3027 static int
3028 display_mem(int argc, char** argv)
3029 {
3030 	bool physical = false;
3031 	addr_t copyAddress;
3032 	int32 displayWidth;
3033 	int32 itemSize;
3034 	int32 num = -1;
3035 	addr_t address;
3036 	int i = 1, j;
3037 
3038 	if (argc > 1 && argv[1][0] == '-') {
3039 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3040 			physical = true;
3041 			i++;
3042 		} else
3043 			i = 99;
3044 	}
3045 
3046 	if (argc < i + 1 || argc > i + 2) {
3047 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3048 			"\tdl - 8 bytes\n"
3049 			"\tdw - 4 bytes\n"
3050 			"\tds - 2 bytes\n"
3051 			"\tdb - 1 byte\n"
3052 			"\tstring - a whole string\n"
3053 			"  -p or --physical only allows memory from a single page to be "
3054 			"displayed.\n");
3055 		return 0;
3056 	}
3057 
3058 	address = parse_expression(argv[i]);
3059 
3060 	if (argc > i + 1)
3061 		num = parse_expression(argv[i + 1]);
3062 
3063 	// build the format string
3064 	if (strcmp(argv[0], "db") == 0) {
3065 		itemSize = 1;
3066 		displayWidth = 16;
3067 	} else if (strcmp(argv[0], "ds") == 0) {
3068 		itemSize = 2;
3069 		displayWidth = 8;
3070 	} else if (strcmp(argv[0], "dw") == 0) {
3071 		itemSize = 4;
3072 		displayWidth = 4;
3073 	} else if (strcmp(argv[0], "dl") == 0) {
3074 		itemSize = 8;
3075 		displayWidth = 2;
3076 	} else if (strcmp(argv[0], "string") == 0) {
3077 		itemSize = 1;
3078 		displayWidth = -1;
3079 	} else {
3080 		kprintf("display_mem called in an invalid way!\n");
3081 		return 0;
3082 	}
3083 
3084 	if (num <= 0)
3085 		num = displayWidth;
3086 
3087 	void* physicalPageHandle = NULL;
3088 
3089 	if (physical) {
3090 		int32 offset = address & (B_PAGE_SIZE - 1);
3091 		if (num * itemSize + offset > B_PAGE_SIZE) {
3092 			num = (B_PAGE_SIZE - offset) / itemSize;
3093 			kprintf("NOTE: number of bytes has been cut to page size\n");
3094 		}
3095 
3096 		address = ROUNDDOWN(address, B_PAGE_SIZE);
3097 
3098 		if (vm_get_physical_page_debug(address, &copyAddress,
3099 				&physicalPageHandle) != B_OK) {
3100 			kprintf("getting the hardware page failed.");
3101 			return 0;
3102 		}
3103 
3104 		address += offset;
3105 		copyAddress += offset;
3106 	} else
3107 		copyAddress = address;
3108 
3109 	if (!strcmp(argv[0], "string")) {
3110 		kprintf("%p \"", (char*)copyAddress);
3111 
3112 		// string mode
3113 		for (i = 0; true; i++) {
3114 			char c;
3115 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
3116 					!= B_OK
3117 				|| c == '\0') {
3118 				break;
3119 			}
3120 
3121 			if (c == '\n')
3122 				kprintf("\\n");
3123 			else if (c == '\t')
3124 				kprintf("\\t");
3125 			else {
3126 				if (!isprint(c))
3127 					c = '.';
3128 
3129 				kprintf("%c", c);
3130 			}
3131 		}
3132 
3133 		kprintf("\"\n");
3134 	} else {
3135 		// number mode
3136 		for (i = 0; i < num; i++) {
3137 			uint64 value;
3138 
3139 			if ((i % displayWidth) == 0) {
3140 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3141 				if (i != 0)
3142 					kprintf("\n");
3143 
3144 				kprintf("[0x%lx]  ", address + i * itemSize);
3145 
3146 				for (j = 0; j < displayed; j++) {
3147 					char c;
3148 					if (debug_memcpy(B_CURRENT_TEAM, &c,
3149 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
3150 						displayed = j;
3151 						break;
3152 					}
3153 					if (!isprint(c))
3154 						c = '.';
3155 
3156 					kprintf("%c", c);
3157 				}
3158 				if (num > displayWidth) {
3159 					// make sure the spacing in the last line is correct
3160 					for (j = displayed; j < displayWidth * itemSize; j++)
3161 						kprintf(" ");
3162 				}
3163 				kprintf("  ");
3164 			}
3165 
3166 			if (debug_memcpy(B_CURRENT_TEAM, &value,
3167 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
3168 				kprintf("read fault");
3169 				break;
3170 			}
3171 
3172 			switch (itemSize) {
3173 				case 1:
3174 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
3175 					break;
3176 				case 2:
3177 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3178 					break;
3179 				case 4:
3180 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3181 					break;
3182 				case 8:
3183 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3184 					break;
3185 			}
3186 		}
3187 
3188 		kprintf("\n");
3189 	}
3190 
3191 	if (physical) {
3192 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3193 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3194 	}
3195 	return 0;
3196 }
3197 
3198 
3199 static void
3200 dump_cache_tree_recursively(VMCache* cache, int level,
3201 	VMCache* highlightCache)
3202 {
3203 	// print this cache
3204 	for (int i = 0; i < level; i++)
3205 		kprintf("  ");
3206 	if (cache == highlightCache)
3207 		kprintf("%p <--\n", cache);
3208 	else
3209 		kprintf("%p\n", cache);
3210 
3211 	// recursively print its consumers
3212 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3213 			VMCache* consumer = it.Next();) {
3214 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3215 	}
3216 }
3217 
3218 
3219 static int
3220 dump_cache_tree(int argc, char** argv)
3221 {
3222 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3223 		kprintf("usage: %s <address>\n", argv[0]);
3224 		return 0;
3225 	}
3226 
3227 	addr_t address = parse_expression(argv[1]);
3228 	if (address == 0)
3229 		return 0;
3230 
3231 	VMCache* cache = (VMCache*)address;
3232 	VMCache* root = cache;
3233 
3234 	// find the root cache (the transitive source)
3235 	while (root->source != NULL)
3236 		root = root->source;
3237 
3238 	dump_cache_tree_recursively(root, 0, cache);
3239 
3240 	return 0;
3241 }
3242 
3243 
3244 const char*
3245 vm_cache_type_to_string(int32 type)
3246 {
3247 	switch (type) {
3248 		case CACHE_TYPE_RAM:
3249 			return "RAM";
3250 		case CACHE_TYPE_DEVICE:
3251 			return "device";
3252 		case CACHE_TYPE_VNODE:
3253 			return "vnode";
3254 		case CACHE_TYPE_NULL:
3255 			return "null";
3256 
3257 		default:
3258 			return "unknown";
3259 	}
3260 }
3261 
3262 
3263 #if DEBUG_CACHE_LIST
3264 
3265 static void
3266 update_cache_info_recursively(VMCache* cache, cache_info& info)
3267 {
3268 	info.page_count += cache->page_count;
3269 	if (cache->type == CACHE_TYPE_RAM)
3270 		info.committed += cache->committed_size;
3271 
3272 	// recurse
3273 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3274 			VMCache* consumer = it.Next();) {
3275 		update_cache_info_recursively(consumer, info);
3276 	}
3277 }
3278 
3279 
3280 static int
3281 cache_info_compare_page_count(const void* _a, const void* _b)
3282 {
3283 	const cache_info* a = (const cache_info*)_a;
3284 	const cache_info* b = (const cache_info*)_b;
3285 	if (a->page_count == b->page_count)
3286 		return 0;
3287 	return a->page_count < b->page_count ? 1 : -1;
3288 }
3289 
3290 
3291 static int
3292 cache_info_compare_committed(const void* _a, const void* _b)
3293 {
3294 	const cache_info* a = (const cache_info*)_a;
3295 	const cache_info* b = (const cache_info*)_b;
3296 	if (a->committed == b->committed)
3297 		return 0;
3298 	return a->committed < b->committed ? 1 : -1;
3299 }
3300 
3301 
3302 static void
3303 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3304 {
3305 	for (int i = 0; i < level; i++)
3306 		kprintf("  ");
3307 
3308 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3309 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3310 		cache->virtual_base, cache->virtual_end, cache->page_count);
3311 
3312 	if (level == 0)
3313 		kprintf("/%lu", info.page_count);
3314 
3315 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3316 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3317 
3318 		if (level == 0)
3319 			kprintf("/%lu", info.committed);
3320 	}
3321 
3322 	// areas
3323 	if (cache->areas != NULL) {
3324 		VMArea* area = cache->areas;
3325 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3326 			area->name, area->address_space->ID());
3327 
3328 		while (area->cache_next != NULL) {
3329 			area = area->cache_next;
3330 			kprintf(", %" B_PRId32, area->id);
3331 		}
3332 	}
3333 
3334 	kputs("\n");
3335 
3336 	// recurse
3337 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3338 			VMCache* consumer = it.Next();) {
3339 		dump_caches_recursively(consumer, info, level + 1);
3340 	}
3341 }
3342 
3343 
3344 static int
3345 dump_caches(int argc, char** argv)
3346 {
3347 	if (sCacheInfoTable == NULL) {
3348 		kprintf("No cache info table!\n");
3349 		return 0;
3350 	}
3351 
3352 	bool sortByPageCount = true;
3353 
3354 	for (int32 i = 1; i < argc; i++) {
3355 		if (strcmp(argv[i], "-c") == 0) {
3356 			sortByPageCount = false;
3357 		} else {
3358 			print_debugger_command_usage(argv[0]);
3359 			return 0;
3360 		}
3361 	}
3362 
3363 	uint32 totalCount = 0;
3364 	uint32 rootCount = 0;
3365 	off_t totalCommitted = 0;
3366 	page_num_t totalPages = 0;
3367 
3368 	VMCache* cache = gDebugCacheList;
3369 	while (cache) {
3370 		totalCount++;
3371 		if (cache->source == NULL) {
3372 			cache_info stackInfo;
3373 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3374 				? sCacheInfoTable[rootCount] : stackInfo;
3375 			rootCount++;
3376 			info.cache = cache;
3377 			info.page_count = 0;
3378 			info.committed = 0;
3379 			update_cache_info_recursively(cache, info);
3380 			totalCommitted += info.committed;
3381 			totalPages += info.page_count;
3382 		}
3383 
3384 		cache = cache->debug_next;
3385 	}
3386 
3387 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3388 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3389 			sortByPageCount
3390 				? &cache_info_compare_page_count
3391 				: &cache_info_compare_committed);
3392 	}
3393 
3394 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3395 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3396 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3397 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3398 			"page count" : "committed size");
3399 
3400 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3401 		for (uint32 i = 0; i < rootCount; i++) {
3402 			cache_info& info = sCacheInfoTable[i];
3403 			dump_caches_recursively(info.cache, info, 0);
3404 		}
3405 	} else
3406 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3407 
3408 	return 0;
3409 }
3410 
3411 #endif	// DEBUG_CACHE_LIST
3412 
3413 
3414 static int
3415 dump_cache(int argc, char** argv)
3416 {
3417 	VMCache* cache;
3418 	bool showPages = false;
3419 	int i = 1;
3420 
3421 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3422 		kprintf("usage: %s [-ps] <address>\n"
3423 			"  if -p is specified, all pages are shown, if -s is used\n"
3424 			"  only the cache info is shown respectively.\n", argv[0]);
3425 		return 0;
3426 	}
3427 	while (argv[i][0] == '-') {
3428 		char* arg = argv[i] + 1;
3429 		while (arg[0]) {
3430 			if (arg[0] == 'p')
3431 				showPages = true;
3432 			arg++;
3433 		}
3434 		i++;
3435 	}
3436 	if (argv[i] == NULL) {
3437 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3438 		return 0;
3439 	}
3440 
3441 	addr_t address = parse_expression(argv[i]);
3442 	if (address == 0)
3443 		return 0;
3444 
3445 	cache = (VMCache*)address;
3446 
3447 	cache->Dump(showPages);
3448 
3449 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3450 
3451 	return 0;
3452 }
3453 
3454 
3455 static void
3456 dump_area_struct(VMArea* area, bool mappings)
3457 {
3458 	kprintf("AREA: %p\n", area);
3459 	kprintf("name:\t\t'%s'\n", area->name);
3460 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3461 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3462 	kprintf("base:\t\t0x%lx\n", area->Base());
3463 	kprintf("size:\t\t0x%lx\n", area->Size());
3464 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3465 	kprintf("page_protection:%p\n", area->page_protections);
3466 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3467 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3468 	kprintf("cache:\t\t%p\n", area->cache);
3469 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3470 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3471 	kprintf("cache_next:\t%p\n", area->cache_next);
3472 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3473 
3474 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3475 	if (mappings) {
3476 		kprintf("page mappings:\n");
3477 		while (iterator.HasNext()) {
3478 			vm_page_mapping* mapping = iterator.Next();
3479 			kprintf("  %p", mapping->page);
3480 		}
3481 		kprintf("\n");
3482 	} else {
3483 		uint32 count = 0;
3484 		while (iterator.Next() != NULL) {
3485 			count++;
3486 		}
3487 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3488 	}
3489 }
3490 
3491 
3492 static int
3493 dump_area(int argc, char** argv)
3494 {
3495 	bool mappings = false;
3496 	bool found = false;
3497 	int32 index = 1;
3498 	VMArea* area;
3499 	addr_t num;
3500 
3501 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3502 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3503 			"All areas matching either id/address/name are listed. You can\n"
3504 			"force to check only a specific item by prefixing the specifier\n"
3505 			"with the id/contains/address/name keywords.\n"
3506 			"-m shows the area's mappings as well.\n");
3507 		return 0;
3508 	}
3509 
3510 	if (!strcmp(argv[1], "-m")) {
3511 		mappings = true;
3512 		index++;
3513 	}
3514 
3515 	int32 mode = 0xf;
3516 	if (!strcmp(argv[index], "id"))
3517 		mode = 1;
3518 	else if (!strcmp(argv[index], "contains"))
3519 		mode = 2;
3520 	else if (!strcmp(argv[index], "name"))
3521 		mode = 4;
3522 	else if (!strcmp(argv[index], "address"))
3523 		mode = 0;
3524 	if (mode != 0xf)
3525 		index++;
3526 
3527 	if (index >= argc) {
3528 		kprintf("No area specifier given.\n");
3529 		return 0;
3530 	}
3531 
3532 	num = parse_expression(argv[index]);
3533 
3534 	if (mode == 0) {
3535 		dump_area_struct((struct VMArea*)num, mappings);
3536 	} else {
3537 		// walk through the area list, looking for the arguments as a name
3538 
3539 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3540 		while ((area = it.Next()) != NULL) {
3541 			if (((mode & 4) != 0
3542 					&& !strcmp(argv[index], area->name))
3543 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3544 					|| (((mode & 2) != 0 && area->Base() <= num
3545 						&& area->Base() + area->Size() > num))))) {
3546 				dump_area_struct(area, mappings);
3547 				found = true;
3548 			}
3549 		}
3550 
3551 		if (!found)
3552 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3553 	}
3554 
3555 	return 0;
3556 }
3557 
3558 
3559 static int
3560 dump_area_list(int argc, char** argv)
3561 {
3562 	VMArea* area;
3563 	const char* name = NULL;
3564 	int32 id = 0;
3565 
3566 	if (argc > 1) {
3567 		id = parse_expression(argv[1]);
3568 		if (id == 0)
3569 			name = argv[1];
3570 	}
3571 
3572 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3573 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3574 		B_PRINTF_POINTER_WIDTH, "size");
3575 
3576 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3577 	while ((area = it.Next()) != NULL) {
3578 		if ((id != 0 && area->address_space->ID() != id)
3579 			|| (name != NULL && strstr(area->name, name) == NULL))
3580 			continue;
3581 
3582 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3583 			area->id, (void*)area->Base(), (void*)area->Size(),
3584 			area->protection, area->wiring, area->name);
3585 	}
3586 	return 0;
3587 }
3588 
3589 
3590 static int
3591 dump_available_memory(int argc, char** argv)
3592 {
3593 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3594 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3595 	return 0;
3596 }
3597 
3598 
3599 static int
3600 dump_mapping_info(int argc, char** argv)
3601 {
3602 	bool reverseLookup = false;
3603 	bool pageLookup = false;
3604 
3605 	int argi = 1;
3606 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3607 		const char* arg = argv[argi];
3608 		if (strcmp(arg, "-r") == 0) {
3609 			reverseLookup = true;
3610 		} else if (strcmp(arg, "-p") == 0) {
3611 			reverseLookup = true;
3612 			pageLookup = true;
3613 		} else {
3614 			print_debugger_command_usage(argv[0]);
3615 			return 0;
3616 		}
3617 	}
3618 
3619 	// We need at least one argument, the address. Optionally a thread ID can be
3620 	// specified.
3621 	if (argi >= argc || argi + 2 < argc) {
3622 		print_debugger_command_usage(argv[0]);
3623 		return 0;
3624 	}
3625 
3626 	uint64 addressValue;
3627 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3628 		return 0;
3629 
3630 	Team* team = NULL;
3631 	if (argi < argc) {
3632 		uint64 threadID;
3633 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3634 			return 0;
3635 
3636 		Thread* thread = Thread::GetDebug(threadID);
3637 		if (thread == NULL) {
3638 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3639 			return 0;
3640 		}
3641 
3642 		team = thread->team;
3643 	}
3644 
3645 	if (reverseLookup) {
3646 		phys_addr_t physicalAddress;
3647 		if (pageLookup) {
3648 			vm_page* page = (vm_page*)(addr_t)addressValue;
3649 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3650 		} else {
3651 			physicalAddress = (phys_addr_t)addressValue;
3652 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3653 		}
3654 
3655 		kprintf("    Team     Virtual Address      Area\n");
3656 		kprintf("--------------------------------------\n");
3657 
3658 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3659 			Callback()
3660 				:
3661 				fAddressSpace(NULL)
3662 			{
3663 			}
3664 
3665 			void SetAddressSpace(VMAddressSpace* addressSpace)
3666 			{
3667 				fAddressSpace = addressSpace;
3668 			}
3669 
3670 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3671 			{
3672 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3673 					virtualAddress);
3674 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3675 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3676 				else
3677 					kprintf("\n");
3678 				return false;
3679 			}
3680 
3681 		private:
3682 			VMAddressSpace*	fAddressSpace;
3683 		} callback;
3684 
3685 		if (team != NULL) {
3686 			// team specified -- get its address space
3687 			VMAddressSpace* addressSpace = team->address_space;
3688 			if (addressSpace == NULL) {
3689 				kprintf("Failed to get address space!\n");
3690 				return 0;
3691 			}
3692 
3693 			callback.SetAddressSpace(addressSpace);
3694 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3695 				physicalAddress, callback);
3696 		} else {
3697 			// no team specified -- iterate through all address spaces
3698 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3699 				addressSpace != NULL;
3700 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3701 				callback.SetAddressSpace(addressSpace);
3702 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3703 					physicalAddress, callback);
3704 			}
3705 		}
3706 	} else {
3707 		// get the address space
3708 		addr_t virtualAddress = (addr_t)addressValue;
3709 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3710 		VMAddressSpace* addressSpace;
3711 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3712 			addressSpace = VMAddressSpace::Kernel();
3713 		} else if (team != NULL) {
3714 			addressSpace = team->address_space;
3715 		} else {
3716 			Thread* thread = debug_get_debugged_thread();
3717 			if (thread == NULL || thread->team == NULL) {
3718 				kprintf("Failed to get team!\n");
3719 				return 0;
3720 			}
3721 
3722 			addressSpace = thread->team->address_space;
3723 		}
3724 
3725 		if (addressSpace == NULL) {
3726 			kprintf("Failed to get address space!\n");
3727 			return 0;
3728 		}
3729 
3730 		// let the translation map implementation do the job
3731 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3732 	}
3733 
3734 	return 0;
3735 }
3736 
3737 
3738 /*!	Deletes all areas and reserved regions in the given address space.
3739 
3740 	The caller must ensure that none of the areas has any wired ranges.
3741 
3742 	\param addressSpace The address space.
3743 	\param deletingAddressSpace \c true, if the address space is in the process
3744 		of being deleted.
3745 */
3746 void
3747 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3748 {
3749 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3750 		addressSpace->ID()));
3751 
3752 	addressSpace->WriteLock();
3753 
3754 	// remove all reserved areas in this address space
3755 	addressSpace->UnreserveAllAddressRanges(0);
3756 
3757 	// delete all the areas in this address space
3758 	while (VMArea* area = addressSpace->FirstArea()) {
3759 		ASSERT(!area->IsWired());
3760 		delete_area(addressSpace, area, deletingAddressSpace);
3761 	}
3762 
3763 	addressSpace->WriteUnlock();
3764 }
3765 
3766 
3767 static area_id
3768 vm_area_for(addr_t address, bool kernel)
3769 {
3770 	team_id team;
3771 	if (IS_USER_ADDRESS(address)) {
3772 		// we try the user team address space, if any
3773 		team = VMAddressSpace::CurrentID();
3774 		if (team < 0)
3775 			return team;
3776 	} else
3777 		team = VMAddressSpace::KernelID();
3778 
3779 	AddressSpaceReadLocker locker(team);
3780 	if (!locker.IsLocked())
3781 		return B_BAD_TEAM_ID;
3782 
3783 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3784 	if (area != NULL) {
3785 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3786 			return B_ERROR;
3787 
3788 		return area->id;
3789 	}
3790 
3791 	return B_ERROR;
3792 }
3793 
3794 
3795 /*!	Frees physical pages that were used during the boot process.
3796 	\a end is inclusive.
3797 */
3798 static void
3799 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3800 {
3801 	// free all physical pages in the specified range
3802 
3803 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3804 		phys_addr_t physicalAddress;
3805 		uint32 flags;
3806 
3807 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3808 			&& (flags & PAGE_PRESENT) != 0) {
3809 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3810 			if (page != NULL && page->State() != PAGE_STATE_FREE
3811 					&& page->State() != PAGE_STATE_CLEAR
3812 					&& page->State() != PAGE_STATE_UNUSED) {
3813 				DEBUG_PAGE_ACCESS_START(page);
3814 				vm_page_set_state(page, PAGE_STATE_FREE);
3815 			}
3816 		}
3817 	}
3818 
3819 	// unmap the memory
3820 	map->Unmap(start, end);
3821 }
3822 
3823 
3824 void
3825 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3826 {
3827 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3828 	addr_t end = start + (size - 1);
3829 	addr_t lastEnd = start;
3830 
3831 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3832 		(void*)start, (void*)end));
3833 
3834 	// The areas are sorted in virtual address space order, so
3835 	// we just have to find the holes between them that fall
3836 	// into the area we should dispose
3837 
3838 	map->Lock();
3839 
3840 	for (VMAddressSpace::AreaIterator it
3841 				= VMAddressSpace::Kernel()->GetAreaIterator();
3842 			VMArea* area = it.Next();) {
3843 		addr_t areaStart = area->Base();
3844 		addr_t areaEnd = areaStart + (area->Size() - 1);
3845 
3846 		if (areaEnd < start)
3847 			continue;
3848 
3849 		if (areaStart > end) {
3850 			// we are done, the area is already beyond of what we have to free
3851 			break;
3852 		}
3853 
3854 		if (areaStart > lastEnd) {
3855 			// this is something we can free
3856 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3857 				(void*)areaStart));
3858 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3859 		}
3860 
3861 		if (areaEnd >= end) {
3862 			lastEnd = areaEnd;
3863 				// no +1 to prevent potential overflow
3864 			break;
3865 		}
3866 
3867 		lastEnd = areaEnd + 1;
3868 	}
3869 
3870 	if (lastEnd < end) {
3871 		// we can also get rid of some space at the end of the area
3872 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3873 			(void*)end));
3874 		unmap_and_free_physical_pages(map, lastEnd, end);
3875 	}
3876 
3877 	map->Unlock();
3878 }
3879 
3880 
3881 static void
3882 create_preloaded_image_areas(struct preloaded_image* _image)
3883 {
3884 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3885 	char name[B_OS_NAME_LENGTH];
3886 	void* address;
3887 	int32 length;
3888 
3889 	// use file name to create a good area name
3890 	char* fileName = strrchr(image->name, '/');
3891 	if (fileName == NULL)
3892 		fileName = image->name;
3893 	else
3894 		fileName++;
3895 
3896 	length = strlen(fileName);
3897 	// make sure there is enough space for the suffix
3898 	if (length > 25)
3899 		length = 25;
3900 
3901 	memcpy(name, fileName, length);
3902 	strcpy(name + length, "_text");
3903 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3904 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3905 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3906 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3907 		// this will later be remapped read-only/executable by the
3908 		// ELF initialization code
3909 
3910 	strcpy(name + length, "_data");
3911 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3912 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3913 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3914 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3915 }
3916 
3917 
3918 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3919 	Any boot loader resources contained in that arguments must not be accessed
3920 	anymore past this point.
3921 */
3922 void
3923 vm_free_kernel_args(kernel_args* args)
3924 {
3925 	uint32 i;
3926 
3927 	TRACE(("vm_free_kernel_args()\n"));
3928 
3929 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3930 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3931 		if (area >= B_OK)
3932 			delete_area(area);
3933 	}
3934 }
3935 
3936 
3937 static void
3938 allocate_kernel_args(kernel_args* args)
3939 {
3940 	TRACE(("allocate_kernel_args()\n"));
3941 
3942 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3943 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3944 
3945 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3946 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3947 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3948 	}
3949 }
3950 
3951 
3952 static void
3953 unreserve_boot_loader_ranges(kernel_args* args)
3954 {
3955 	TRACE(("unreserve_boot_loader_ranges()\n"));
3956 
3957 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3958 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3959 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3960 			args->virtual_allocated_range[i].size);
3961 	}
3962 }
3963 
3964 
3965 static void
3966 reserve_boot_loader_ranges(kernel_args* args)
3967 {
3968 	TRACE(("reserve_boot_loader_ranges()\n"));
3969 
3970 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3971 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3972 
3973 		// If the address is no kernel address, we just skip it. The
3974 		// architecture specific code has to deal with it.
3975 		if (!IS_KERNEL_ADDRESS(address)) {
3976 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3977 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3978 			continue;
3979 		}
3980 
3981 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3982 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3983 		if (status < B_OK)
3984 			panic("could not reserve boot loader ranges\n");
3985 	}
3986 }
3987 
3988 
3989 static addr_t
3990 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3991 {
3992 	size = PAGE_ALIGN(size);
3993 
3994 	// find a slot in the virtual allocation addr range
3995 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3996 		// check to see if the space between this one and the last is big enough
3997 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3998 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3999 			+ args->virtual_allocated_range[i - 1].size;
4000 
4001 		addr_t base = alignment > 0
4002 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
4003 
4004 		if (base >= KERNEL_BASE && base < rangeStart
4005 				&& rangeStart - base >= size) {
4006 			args->virtual_allocated_range[i - 1].size
4007 				+= base + size - previousRangeEnd;
4008 			return base;
4009 		}
4010 	}
4011 
4012 	// we hadn't found one between allocation ranges. this is ok.
4013 	// see if there's a gap after the last one
4014 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
4015 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
4016 		+ args->virtual_allocated_range[lastEntryIndex].size;
4017 	addr_t base = alignment > 0
4018 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
4019 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
4020 		args->virtual_allocated_range[lastEntryIndex].size
4021 			+= base + size - lastRangeEnd;
4022 		return base;
4023 	}
4024 
4025 	// see if there's a gap before the first one
4026 	addr_t rangeStart = args->virtual_allocated_range[0].start;
4027 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
4028 		base = rangeStart - size;
4029 		if (alignment > 0)
4030 			base = ROUNDDOWN(base, alignment);
4031 
4032 		if (base >= KERNEL_BASE) {
4033 			args->virtual_allocated_range[0].start = base;
4034 			args->virtual_allocated_range[0].size += rangeStart - base;
4035 			return base;
4036 		}
4037 	}
4038 
4039 	return 0;
4040 }
4041 
4042 
4043 static bool
4044 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
4045 {
4046 	// TODO: horrible brute-force method of determining if the page can be
4047 	// allocated
4048 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4049 		if (address >= args->physical_memory_range[i].start
4050 			&& address < args->physical_memory_range[i].start
4051 				+ args->physical_memory_range[i].size)
4052 			return true;
4053 	}
4054 	return false;
4055 }
4056 
4057 
4058 page_num_t
4059 vm_allocate_early_physical_page(kernel_args* args)
4060 {
4061 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4062 		phys_addr_t nextPage;
4063 
4064 		nextPage = args->physical_allocated_range[i].start
4065 			+ args->physical_allocated_range[i].size;
4066 		// see if the page after the next allocated paddr run can be allocated
4067 		if (i + 1 < args->num_physical_allocated_ranges
4068 			&& args->physical_allocated_range[i + 1].size != 0) {
4069 			// see if the next page will collide with the next allocated range
4070 			if (nextPage >= args->physical_allocated_range[i+1].start)
4071 				continue;
4072 		}
4073 		// see if the next physical page fits in the memory block
4074 		if (is_page_in_physical_memory_range(args, nextPage)) {
4075 			// we got one!
4076 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4077 			return nextPage / B_PAGE_SIZE;
4078 		}
4079 	}
4080 
4081 	// Expanding upwards didn't work, try going downwards.
4082 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4083 		phys_addr_t nextPage;
4084 
4085 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
4086 		// see if the page after the prev allocated paddr run can be allocated
4087 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
4088 			// see if the next page will collide with the next allocated range
4089 			if (nextPage < args->physical_allocated_range[i-1].start
4090 				+ args->physical_allocated_range[i-1].size)
4091 				continue;
4092 		}
4093 		// see if the next physical page fits in the memory block
4094 		if (is_page_in_physical_memory_range(args, nextPage)) {
4095 			// we got one!
4096 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
4097 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4098 			return nextPage / B_PAGE_SIZE;
4099 		}
4100 	}
4101 
4102 	return 0;
4103 		// could not allocate a block
4104 }
4105 
4106 
4107 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4108 	allocate some pages before the VM is completely up.
4109 */
4110 addr_t
4111 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4112 	uint32 attributes, addr_t alignment)
4113 {
4114 	if (physicalSize > virtualSize)
4115 		physicalSize = virtualSize;
4116 
4117 	// find the vaddr to allocate at
4118 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
4119 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
4120 	if (virtualBase == 0) {
4121 		panic("vm_allocate_early: could not allocate virtual address\n");
4122 		return 0;
4123 	}
4124 
4125 	// map the pages
4126 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4127 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
4128 		if (physicalAddress == 0)
4129 			panic("error allocating early page!\n");
4130 
4131 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4132 
4133 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
4134 			physicalAddress * B_PAGE_SIZE, attributes,
4135 			&vm_allocate_early_physical_page);
4136 	}
4137 
4138 	return virtualBase;
4139 }
4140 
4141 
4142 /*!	The main entrance point to initialize the VM. */
4143 status_t
4144 vm_init(kernel_args* args)
4145 {
4146 	struct preloaded_image* image;
4147 	void* address;
4148 	status_t err = 0;
4149 	uint32 i;
4150 
4151 	TRACE(("vm_init: entry\n"));
4152 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
4153 	err = arch_vm_init(args);
4154 
4155 	// initialize some globals
4156 	vm_page_init_num_pages(args);
4157 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4158 
4159 	slab_init(args);
4160 
4161 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4162 	off_t heapSize = INITIAL_HEAP_SIZE;
4163 	// try to accomodate low memory systems
4164 	while (heapSize > sAvailableMemory / 8)
4165 		heapSize /= 2;
4166 	if (heapSize < 1024 * 1024)
4167 		panic("vm_init: go buy some RAM please.");
4168 
4169 	// map in the new heap and initialize it
4170 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4171 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
4172 	TRACE(("heap at 0x%lx\n", heapBase));
4173 	heap_init(heapBase, heapSize);
4174 #endif
4175 
4176 	// initialize the free page list and physical page mapper
4177 	vm_page_init(args);
4178 
4179 	// initialize the cache allocators
4180 	vm_cache_init(args);
4181 
4182 	{
4183 		status_t error = VMAreaHash::Init();
4184 		if (error != B_OK)
4185 			panic("vm_init: error initializing area hash table\n");
4186 	}
4187 
4188 	VMAddressSpace::Init();
4189 	reserve_boot_loader_ranges(args);
4190 
4191 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4192 	heap_init_post_area();
4193 #endif
4194 
4195 	// Do any further initialization that the architecture dependant layers may
4196 	// need now
4197 	arch_vm_translation_map_init_post_area(args);
4198 	arch_vm_init_post_area(args);
4199 	vm_page_init_post_area(args);
4200 	slab_init_post_area();
4201 
4202 	// allocate areas to represent stuff that already exists
4203 
4204 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4205 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4206 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4207 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4208 #endif
4209 
4210 	allocate_kernel_args(args);
4211 
4212 	create_preloaded_image_areas(args->kernel_image);
4213 
4214 	// allocate areas for preloaded images
4215 	for (image = args->preloaded_images; image != NULL; image = image->next)
4216 		create_preloaded_image_areas(image);
4217 
4218 	// allocate kernel stacks
4219 	for (i = 0; i < args->num_cpus; i++) {
4220 		char name[64];
4221 
4222 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4223 		address = (void*)args->cpu_kstack[i].start;
4224 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4225 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4226 	}
4227 
4228 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4229 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4230 
4231 #if PARANOID_KERNEL_MALLOC
4232 	vm_block_address_range("uninitialized heap memory",
4233 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4234 #endif
4235 #if PARANOID_KERNEL_FREE
4236 	vm_block_address_range("freed heap memory",
4237 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4238 #endif
4239 
4240 	// create the object cache for the page mappings
4241 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4242 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4243 		NULL, NULL);
4244 	if (gPageMappingsObjectCache == NULL)
4245 		panic("failed to create page mappings object cache");
4246 
4247 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4248 
4249 #if DEBUG_CACHE_LIST
4250 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4251 		virtual_address_restrictions virtualRestrictions = {};
4252 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4253 		physical_address_restrictions physicalRestrictions = {};
4254 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4255 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4256 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4257 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4258 			&physicalRestrictions, (void**)&sCacheInfoTable);
4259 	}
4260 #endif	// DEBUG_CACHE_LIST
4261 
4262 	// add some debugger commands
4263 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4264 	add_debugger_command("area", &dump_area,
4265 		"Dump info about a particular area");
4266 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4267 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4268 #if DEBUG_CACHE_LIST
4269 	if (sCacheInfoTable != NULL) {
4270 		add_debugger_command_etc("caches", &dump_caches,
4271 			"List all VMCache trees",
4272 			"[ \"-c\" ]\n"
4273 			"All cache trees are listed sorted in decreasing order by number "
4274 				"of\n"
4275 			"used pages or, if \"-c\" is specified, by size of committed "
4276 				"memory.\n",
4277 			0);
4278 	}
4279 #endif
4280 	add_debugger_command("avail", &dump_available_memory,
4281 		"Dump available memory");
4282 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4283 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4284 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4285 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4286 	add_debugger_command("string", &display_mem, "dump strings");
4287 
4288 	add_debugger_command_etc("mapping", &dump_mapping_info,
4289 		"Print address mapping information",
4290 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4291 		"Prints low-level page mapping information for a given address. If\n"
4292 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4293 		"address that is looked up in the translation map of the current\n"
4294 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4295 		"\"-r\" is specified, <address> is a physical address that is\n"
4296 		"searched in the translation map of all teams, respectively the team\n"
4297 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4298 		"<address> is the address of a vm_page structure. The behavior is\n"
4299 		"equivalent to specifying \"-r\" with the physical address of that\n"
4300 		"page.\n",
4301 		0);
4302 
4303 	TRACE(("vm_init: exit\n"));
4304 
4305 	vm_cache_init_post_heap();
4306 
4307 	return err;
4308 }
4309 
4310 
4311 status_t
4312 vm_init_post_sem(kernel_args* args)
4313 {
4314 	// This frees all unused boot loader resources and makes its space available
4315 	// again
4316 	arch_vm_init_end(args);
4317 	unreserve_boot_loader_ranges(args);
4318 
4319 	// fill in all of the semaphores that were not allocated before
4320 	// since we're still single threaded and only the kernel address space
4321 	// exists, it isn't that hard to find all of the ones we need to create
4322 
4323 	arch_vm_translation_map_init_post_sem(args);
4324 
4325 	slab_init_post_sem();
4326 
4327 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4328 	heap_init_post_sem();
4329 #endif
4330 
4331 	return B_OK;
4332 }
4333 
4334 
4335 status_t
4336 vm_init_post_thread(kernel_args* args)
4337 {
4338 	vm_page_init_post_thread(args);
4339 	slab_init_post_thread();
4340 	return heap_init_post_thread();
4341 }
4342 
4343 
4344 status_t
4345 vm_init_post_modules(kernel_args* args)
4346 {
4347 	return arch_vm_init_post_modules(args);
4348 }
4349 
4350 
4351 void
4352 permit_page_faults(void)
4353 {
4354 	Thread* thread = thread_get_current_thread();
4355 	if (thread != NULL)
4356 		atomic_add(&thread->page_faults_allowed, 1);
4357 }
4358 
4359 
4360 void
4361 forbid_page_faults(void)
4362 {
4363 	Thread* thread = thread_get_current_thread();
4364 	if (thread != NULL)
4365 		atomic_add(&thread->page_faults_allowed, -1);
4366 }
4367 
4368 
4369 status_t
4370 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4371 	bool isUser, addr_t* newIP)
4372 {
4373 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4374 		faultAddress));
4375 
4376 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4377 
4378 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4379 	VMAddressSpace* addressSpace = NULL;
4380 
4381 	status_t status = B_OK;
4382 	*newIP = 0;
4383 	atomic_add((int32*)&sPageFaults, 1);
4384 
4385 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4386 		addressSpace = VMAddressSpace::GetKernel();
4387 	} else if (IS_USER_ADDRESS(pageAddress)) {
4388 		addressSpace = VMAddressSpace::GetCurrent();
4389 		if (addressSpace == NULL) {
4390 			if (!isUser) {
4391 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4392 					"memory!\n");
4393 				status = B_BAD_ADDRESS;
4394 				TPF(PageFaultError(-1,
4395 					VMPageFaultTracing
4396 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4397 			} else {
4398 				// XXX weird state.
4399 				panic("vm_page_fault: non kernel thread accessing user memory "
4400 					"that doesn't exist!\n");
4401 				status = B_BAD_ADDRESS;
4402 			}
4403 		}
4404 	} else {
4405 		// the hit was probably in the 64k DMZ between kernel and user space
4406 		// this keeps a user space thread from passing a buffer that crosses
4407 		// into kernel space
4408 		status = B_BAD_ADDRESS;
4409 		TPF(PageFaultError(-1,
4410 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4411 	}
4412 
4413 	if (status == B_OK) {
4414 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4415 			isUser, NULL);
4416 	}
4417 
4418 	if (status < B_OK) {
4419 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4420 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4421 			strerror(status), address, faultAddress, isWrite, isUser,
4422 			thread_get_current_thread_id());
4423 		if (!isUser) {
4424 			Thread* thread = thread_get_current_thread();
4425 			if (thread != NULL && thread->fault_handler != 0) {
4426 				// this will cause the arch dependant page fault handler to
4427 				// modify the IP on the interrupt frame or whatever to return
4428 				// to this address
4429 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4430 			} else {
4431 				// unhandled page fault in the kernel
4432 				panic("vm_page_fault: unhandled page fault in kernel space at "
4433 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4434 			}
4435 		} else {
4436 			Thread* thread = thread_get_current_thread();
4437 
4438 #ifdef TRACE_FAULTS
4439 			VMArea* area = NULL;
4440 			if (addressSpace != NULL) {
4441 				addressSpace->ReadLock();
4442 				area = addressSpace->LookupArea(faultAddress);
4443 			}
4444 
4445 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4446 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4447 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4448 				thread->team->Name(), thread->team->id,
4449 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4450 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4451 					area->Base() : 0x0));
4452 
4453 			if (addressSpace != NULL)
4454 				addressSpace->ReadUnlock();
4455 #endif
4456 
4457 			// If the thread has a signal handler for SIGSEGV, we simply
4458 			// send it the signal. Otherwise we notify the user debugger
4459 			// first.
4460 			struct sigaction action;
4461 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4462 					&& action.sa_handler != SIG_DFL
4463 					&& action.sa_handler != SIG_IGN)
4464 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4465 					SIGSEGV)) {
4466 				Signal signal(SIGSEGV,
4467 					status == B_PERMISSION_DENIED
4468 						? SEGV_ACCERR : SEGV_MAPERR,
4469 					EFAULT, thread->team->id);
4470 				signal.SetAddress((void*)address);
4471 				send_signal_to_thread(thread, signal, 0);
4472 			}
4473 		}
4474 	}
4475 
4476 	if (addressSpace != NULL)
4477 		addressSpace->Put();
4478 
4479 	return B_HANDLED_INTERRUPT;
4480 }
4481 
4482 
4483 struct PageFaultContext {
4484 	AddressSpaceReadLocker	addressSpaceLocker;
4485 	VMCacheChainLocker		cacheChainLocker;
4486 
4487 	VMTranslationMap*		map;
4488 	VMCache*				topCache;
4489 	off_t					cacheOffset;
4490 	vm_page_reservation		reservation;
4491 	bool					isWrite;
4492 
4493 	// return values
4494 	vm_page*				page;
4495 	bool					restart;
4496 	bool					pageAllocated;
4497 
4498 
4499 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4500 		:
4501 		addressSpaceLocker(addressSpace, true),
4502 		map(addressSpace->TranslationMap()),
4503 		isWrite(isWrite)
4504 	{
4505 	}
4506 
4507 	~PageFaultContext()
4508 	{
4509 		UnlockAll();
4510 		vm_page_unreserve_pages(&reservation);
4511 	}
4512 
4513 	void Prepare(VMCache* topCache, off_t cacheOffset)
4514 	{
4515 		this->topCache = topCache;
4516 		this->cacheOffset = cacheOffset;
4517 		page = NULL;
4518 		restart = false;
4519 		pageAllocated = false;
4520 
4521 		cacheChainLocker.SetTo(topCache);
4522 	}
4523 
4524 	void UnlockAll(VMCache* exceptCache = NULL)
4525 	{
4526 		topCache = NULL;
4527 		addressSpaceLocker.Unlock();
4528 		cacheChainLocker.Unlock(exceptCache);
4529 	}
4530 };
4531 
4532 
4533 /*!	Gets the page that should be mapped into the area.
4534 	Returns an error code other than \c B_OK, if the page couldn't be found or
4535 	paged in. The locking state of the address space and the caches is undefined
4536 	in that case.
4537 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4538 	had to unlock the address space and all caches and is supposed to be called
4539 	again.
4540 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4541 	found. It is returned in \c context.page. The address space will still be
4542 	locked as well as all caches starting from the top cache to at least the
4543 	cache the page lives in.
4544 */
4545 static status_t
4546 fault_get_page(PageFaultContext& context)
4547 {
4548 	VMCache* cache = context.topCache;
4549 	VMCache* lastCache = NULL;
4550 	vm_page* page = NULL;
4551 
4552 	while (cache != NULL) {
4553 		// We already hold the lock of the cache at this point.
4554 
4555 		lastCache = cache;
4556 
4557 		page = cache->LookupPage(context.cacheOffset);
4558 		if (page != NULL && page->busy) {
4559 			// page must be busy -- wait for it to become unbusy
4560 			context.UnlockAll(cache);
4561 			cache->ReleaseRefLocked();
4562 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4563 
4564 			// restart the whole process
4565 			context.restart = true;
4566 			return B_OK;
4567 		}
4568 
4569 		if (page != NULL)
4570 			break;
4571 
4572 		// The current cache does not contain the page we're looking for.
4573 
4574 		// see if the backing store has it
4575 		if (cache->HasPage(context.cacheOffset)) {
4576 			// insert a fresh page and mark it busy -- we're going to read it in
4577 			page = vm_page_allocate_page(&context.reservation,
4578 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4579 			cache->InsertPage(page, context.cacheOffset);
4580 
4581 			// We need to unlock all caches and the address space while reading
4582 			// the page in. Keep a reference to the cache around.
4583 			cache->AcquireRefLocked();
4584 			context.UnlockAll();
4585 
4586 			// read the page in
4587 			generic_io_vec vec;
4588 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4589 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4590 
4591 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4592 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4593 
4594 			cache->Lock();
4595 
4596 			if (status < B_OK) {
4597 				// on error remove and free the page
4598 				dprintf("reading page from cache %p returned: %s!\n",
4599 					cache, strerror(status));
4600 
4601 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4602 				cache->RemovePage(page);
4603 				vm_page_set_state(page, PAGE_STATE_FREE);
4604 
4605 				cache->ReleaseRefAndUnlock();
4606 				return status;
4607 			}
4608 
4609 			// mark the page unbusy again
4610 			cache->MarkPageUnbusy(page);
4611 
4612 			DEBUG_PAGE_ACCESS_END(page);
4613 
4614 			// Since we needed to unlock everything temporarily, the area
4615 			// situation might have changed. So we need to restart the whole
4616 			// process.
4617 			cache->ReleaseRefAndUnlock();
4618 			context.restart = true;
4619 			return B_OK;
4620 		}
4621 
4622 		cache = context.cacheChainLocker.LockSourceCache();
4623 	}
4624 
4625 	if (page == NULL) {
4626 		// There was no adequate page, determine the cache for a clean one.
4627 		// Read-only pages come in the deepest cache, only the top most cache
4628 		// may have direct write access.
4629 		cache = context.isWrite ? context.topCache : lastCache;
4630 
4631 		// allocate a clean page
4632 		page = vm_page_allocate_page(&context.reservation,
4633 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4634 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4635 			page->physical_page_number));
4636 
4637 		// insert the new page into our cache
4638 		cache->InsertPage(page, context.cacheOffset);
4639 		context.pageAllocated = true;
4640 	} else if (page->Cache() != context.topCache && context.isWrite) {
4641 		// We have a page that has the data we want, but in the wrong cache
4642 		// object so we need to copy it and stick it into the top cache.
4643 		vm_page* sourcePage = page;
4644 
4645 		// TODO: If memory is low, it might be a good idea to steal the page
4646 		// from our source cache -- if possible, that is.
4647 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4648 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4649 
4650 		// To not needlessly kill concurrency we unlock all caches but the top
4651 		// one while copying the page. Lacking another mechanism to ensure that
4652 		// the source page doesn't disappear, we mark it busy.
4653 		sourcePage->busy = true;
4654 		context.cacheChainLocker.UnlockKeepRefs(true);
4655 
4656 		// copy the page
4657 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4658 			sourcePage->physical_page_number * B_PAGE_SIZE);
4659 
4660 		context.cacheChainLocker.RelockCaches(true);
4661 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4662 
4663 		// insert the new page into our cache
4664 		context.topCache->InsertPage(page, context.cacheOffset);
4665 		context.pageAllocated = true;
4666 	} else
4667 		DEBUG_PAGE_ACCESS_START(page);
4668 
4669 	context.page = page;
4670 	return B_OK;
4671 }
4672 
4673 
4674 /*!	Makes sure the address in the given address space is mapped.
4675 
4676 	\param addressSpace The address space.
4677 	\param originalAddress The address. Doesn't need to be page aligned.
4678 	\param isWrite If \c true the address shall be write-accessible.
4679 	\param isUser If \c true the access is requested by a userland team.
4680 	\param wirePage On success, if non \c NULL, the wired count of the page
4681 		mapped at the given address is incremented and the page is returned
4682 		via this parameter.
4683 	\return \c B_OK on success, another error code otherwise.
4684 */
4685 static status_t
4686 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4687 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4688 {
4689 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4690 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4691 		originalAddress, isWrite, isUser));
4692 
4693 	PageFaultContext context(addressSpace, isWrite);
4694 
4695 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4696 	status_t status = B_OK;
4697 
4698 	addressSpace->IncrementFaultCount();
4699 
4700 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4701 	// the pages upfront makes sure we don't have any cache locked, so that the
4702 	// page daemon/thief can do their job without problems.
4703 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4704 		originalAddress);
4705 	context.addressSpaceLocker.Unlock();
4706 	vm_page_reserve_pages(&context.reservation, reservePages,
4707 		addressSpace == VMAddressSpace::Kernel()
4708 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4709 
4710 	while (true) {
4711 		context.addressSpaceLocker.Lock();
4712 
4713 		// get the area the fault was in
4714 		VMArea* area = addressSpace->LookupArea(address);
4715 		if (area == NULL) {
4716 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4717 				"space\n", originalAddress);
4718 			TPF(PageFaultError(-1,
4719 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4720 			status = B_BAD_ADDRESS;
4721 			break;
4722 		}
4723 
4724 		// check permissions
4725 		uint32 protection = get_area_page_protection(area, address);
4726 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4727 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4728 				area->id, (void*)originalAddress);
4729 			TPF(PageFaultError(area->id,
4730 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4731 			status = B_PERMISSION_DENIED;
4732 			break;
4733 		}
4734 		if (isWrite && (protection
4735 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4736 			dprintf("write access attempted on write-protected area 0x%"
4737 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4738 			TPF(PageFaultError(area->id,
4739 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4740 			status = B_PERMISSION_DENIED;
4741 			break;
4742 		} else if (isExecute && (protection
4743 				& (B_EXECUTE_AREA
4744 					| (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4745 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4746 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4747 			TPF(PageFaultError(area->id,
4748 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4749 			status = B_PERMISSION_DENIED;
4750 			break;
4751 		} else if (!isWrite && !isExecute && (protection
4752 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4753 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4754 				" at %p\n", area->id, (void*)originalAddress);
4755 			TPF(PageFaultError(area->id,
4756 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4757 			status = B_PERMISSION_DENIED;
4758 			break;
4759 		}
4760 
4761 		// We have the area, it was a valid access, so let's try to resolve the
4762 		// page fault now.
4763 		// At first, the top most cache from the area is investigated.
4764 
4765 		context.Prepare(vm_area_get_locked_cache(area),
4766 			address - area->Base() + area->cache_offset);
4767 
4768 		// See if this cache has a fault handler -- this will do all the work
4769 		// for us.
4770 		{
4771 			// Note, since the page fault is resolved with interrupts enabled,
4772 			// the fault handler could be called more than once for the same
4773 			// reason -- the store must take this into account.
4774 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4775 			if (status != B_BAD_HANDLER)
4776 				break;
4777 		}
4778 
4779 		// The top most cache has no fault handler, so let's see if the cache or
4780 		// its sources already have the page we're searching for (we're going
4781 		// from top to bottom).
4782 		status = fault_get_page(context);
4783 		if (status != B_OK) {
4784 			TPF(PageFaultError(area->id, status));
4785 			break;
4786 		}
4787 
4788 		if (context.restart)
4789 			continue;
4790 
4791 		// All went fine, all there is left to do is to map the page into the
4792 		// address space.
4793 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4794 			context.page));
4795 
4796 		// If the page doesn't reside in the area's cache, we need to make sure
4797 		// it's mapped in read-only, so that we cannot overwrite someone else's
4798 		// data (copy-on-write)
4799 		uint32 newProtection = protection;
4800 		if (context.page->Cache() != context.topCache && !isWrite)
4801 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4802 
4803 		bool unmapPage = false;
4804 		bool mapPage = true;
4805 
4806 		// check whether there's already a page mapped at the address
4807 		context.map->Lock();
4808 
4809 		phys_addr_t physicalAddress;
4810 		uint32 flags;
4811 		vm_page* mappedPage = NULL;
4812 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4813 			&& (flags & PAGE_PRESENT) != 0
4814 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4815 				!= NULL) {
4816 			// Yep there's already a page. If it's ours, we can simply adjust
4817 			// its protection. Otherwise we have to unmap it.
4818 			if (mappedPage == context.page) {
4819 				context.map->ProtectPage(area, address, newProtection);
4820 					// Note: We assume that ProtectPage() is atomic (i.e.
4821 					// the page isn't temporarily unmapped), otherwise we'd have
4822 					// to make sure it isn't wired.
4823 				mapPage = false;
4824 			} else
4825 				unmapPage = true;
4826 		}
4827 
4828 		context.map->Unlock();
4829 
4830 		if (unmapPage) {
4831 			// If the page is wired, we can't unmap it. Wait until it is unwired
4832 			// again and restart. Note that the page cannot be wired for
4833 			// writing, since it it isn't in the topmost cache. So we can safely
4834 			// ignore ranges wired for writing (our own and other concurrent
4835 			// wiring attempts in progress) and in fact have to do that to avoid
4836 			// a deadlock.
4837 			VMAreaUnwiredWaiter waiter;
4838 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4839 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4840 				// unlock everything and wait
4841 				if (context.pageAllocated) {
4842 					// ... but since we allocated a page and inserted it into
4843 					// the top cache, remove and free it first. Otherwise we'd
4844 					// have a page from a lower cache mapped while an upper
4845 					// cache has a page that would shadow it.
4846 					context.topCache->RemovePage(context.page);
4847 					vm_page_free_etc(context.topCache, context.page,
4848 						&context.reservation);
4849 				} else
4850 					DEBUG_PAGE_ACCESS_END(context.page);
4851 
4852 				context.UnlockAll();
4853 				waiter.waitEntry.Wait();
4854 				continue;
4855 			}
4856 
4857 			// Note: The mapped page is a page of a lower cache. We are
4858 			// guaranteed to have that cached locked, our new page is a copy of
4859 			// that page, and the page is not busy. The logic for that guarantee
4860 			// is as follows: Since the page is mapped, it must live in the top
4861 			// cache (ruled out above) or any of its lower caches, and there is
4862 			// (was before the new page was inserted) no other page in any
4863 			// cache between the top cache and the page's cache (otherwise that
4864 			// would be mapped instead). That in turn means that our algorithm
4865 			// must have found it and therefore it cannot be busy either.
4866 			DEBUG_PAGE_ACCESS_START(mappedPage);
4867 			unmap_page(area, address);
4868 			DEBUG_PAGE_ACCESS_END(mappedPage);
4869 		}
4870 
4871 		if (mapPage) {
4872 			if (map_page(area, context.page, address, newProtection,
4873 					&context.reservation) != B_OK) {
4874 				// Mapping can only fail, when the page mapping object couldn't
4875 				// be allocated. Save for the missing mapping everything is
4876 				// fine, though. If this was a regular page fault, we'll simply
4877 				// leave and probably fault again. To make sure we'll have more
4878 				// luck then, we ensure that the minimum object reserve is
4879 				// available.
4880 				DEBUG_PAGE_ACCESS_END(context.page);
4881 
4882 				context.UnlockAll();
4883 
4884 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4885 						!= B_OK) {
4886 					// Apparently the situation is serious. Let's get ourselves
4887 					// killed.
4888 					status = B_NO_MEMORY;
4889 				} else if (wirePage != NULL) {
4890 					// The caller expects us to wire the page. Since
4891 					// object_cache_reserve() succeeded, we should now be able
4892 					// to allocate a mapping structure. Restart.
4893 					continue;
4894 				}
4895 
4896 				break;
4897 			}
4898 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4899 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4900 
4901 		// also wire the page, if requested
4902 		if (wirePage != NULL && status == B_OK) {
4903 			increment_page_wired_count(context.page);
4904 			*wirePage = context.page;
4905 		}
4906 
4907 		DEBUG_PAGE_ACCESS_END(context.page);
4908 
4909 		break;
4910 	}
4911 
4912 	return status;
4913 }
4914 
4915 
4916 status_t
4917 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4918 {
4919 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4920 }
4921 
4922 status_t
4923 vm_put_physical_page(addr_t vaddr, void* handle)
4924 {
4925 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4926 }
4927 
4928 
4929 status_t
4930 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4931 	void** _handle)
4932 {
4933 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4934 }
4935 
4936 status_t
4937 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4938 {
4939 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4940 }
4941 
4942 
4943 status_t
4944 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4945 {
4946 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4947 }
4948 
4949 status_t
4950 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4951 {
4952 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4953 }
4954 
4955 
4956 void
4957 vm_get_info(system_info* info)
4958 {
4959 	swap_get_info(info);
4960 
4961 	MutexLocker locker(sAvailableMemoryLock);
4962 	info->needed_memory = sNeededMemory;
4963 	info->free_memory = sAvailableMemory;
4964 }
4965 
4966 
4967 uint32
4968 vm_num_page_faults(void)
4969 {
4970 	return sPageFaults;
4971 }
4972 
4973 
4974 off_t
4975 vm_available_memory(void)
4976 {
4977 	MutexLocker locker(sAvailableMemoryLock);
4978 	return sAvailableMemory;
4979 }
4980 
4981 
4982 off_t
4983 vm_available_not_needed_memory(void)
4984 {
4985 	MutexLocker locker(sAvailableMemoryLock);
4986 	return sAvailableMemory - sNeededMemory;
4987 }
4988 
4989 
4990 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4991 	debugger.
4992 */
4993 off_t
4994 vm_available_not_needed_memory_debug(void)
4995 {
4996 	return sAvailableMemory - sNeededMemory;
4997 }
4998 
4999 
5000 size_t
5001 vm_kernel_address_space_left(void)
5002 {
5003 	return VMAddressSpace::Kernel()->FreeSpace();
5004 }
5005 
5006 
5007 void
5008 vm_unreserve_memory(size_t amount)
5009 {
5010 	mutex_lock(&sAvailableMemoryLock);
5011 
5012 	sAvailableMemory += amount;
5013 
5014 	mutex_unlock(&sAvailableMemoryLock);
5015 }
5016 
5017 
5018 status_t
5019 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
5020 {
5021 	size_t reserve = kMemoryReserveForPriority[priority];
5022 
5023 	MutexLocker locker(sAvailableMemoryLock);
5024 
5025 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5026 
5027 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
5028 		sAvailableMemory -= amount;
5029 		return B_OK;
5030 	}
5031 
5032 	if (timeout <= 0)
5033 		return B_NO_MEMORY;
5034 
5035 	// turn timeout into an absolute timeout
5036 	timeout += system_time();
5037 
5038 	// loop until we've got the memory or the timeout occurs
5039 	do {
5040 		sNeededMemory += amount;
5041 
5042 		// call the low resource manager
5043 		locker.Unlock();
5044 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5045 			B_ABSOLUTE_TIMEOUT, timeout);
5046 		locker.Lock();
5047 
5048 		sNeededMemory -= amount;
5049 
5050 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
5051 			sAvailableMemory -= amount;
5052 			return B_OK;
5053 		}
5054 	} while (timeout > system_time());
5055 
5056 	return B_NO_MEMORY;
5057 }
5058 
5059 
5060 status_t
5061 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
5062 {
5063 	// NOTE: The caller is responsible for synchronizing calls to this function!
5064 
5065 	AddressSpaceReadLocker locker;
5066 	VMArea* area;
5067 	status_t status = locker.SetFromArea(id, area);
5068 	if (status != B_OK)
5069 		return status;
5070 
5071 	// nothing to do, if the type doesn't change
5072 	uint32 oldType = area->MemoryType();
5073 	if (type == oldType)
5074 		return B_OK;
5075 
5076 	// set the memory type of the area and the mapped pages
5077 	VMTranslationMap* map = area->address_space->TranslationMap();
5078 	map->Lock();
5079 	area->SetMemoryType(type);
5080 	map->ProtectArea(area, area->protection);
5081 	map->Unlock();
5082 
5083 	// set the physical memory type
5084 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
5085 	if (error != B_OK) {
5086 		// reset the memory type of the area and the mapped pages
5087 		map->Lock();
5088 		area->SetMemoryType(oldType);
5089 		map->ProtectArea(area, area->protection);
5090 		map->Unlock();
5091 		return error;
5092 	}
5093 
5094 	return B_OK;
5095 
5096 }
5097 
5098 
5099 /*!	This function enforces some protection properties:
5100 	 - kernel areas must be W^X (after kernel startup)
5101 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
5102 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5103 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
5104 	   and B_KERNEL_WRITE_AREA.
5105 */
5106 static void
5107 fix_protection(uint32* protection)
5108 {
5109 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
5110 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
5111 			|| (*protection & B_WRITE_AREA) != 0)
5112 		&& !gKernelStartup)
5113 		panic("kernel areas cannot be both writable and executable!");
5114 
5115 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5116 		if ((*protection & B_USER_PROTECTION) == 0
5117 			|| (*protection & B_WRITE_AREA) != 0)
5118 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5119 		else
5120 			*protection |= B_KERNEL_READ_AREA;
5121 	}
5122 }
5123 
5124 
5125 static void
5126 fill_area_info(struct VMArea* area, area_info* info, size_t size)
5127 {
5128 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5129 	info->area = area->id;
5130 	info->address = (void*)area->Base();
5131 	info->size = area->Size();
5132 	info->protection = area->protection;
5133 	info->lock = area->wiring;
5134 	info->team = area->address_space->ID();
5135 	info->copy_count = 0;
5136 	info->in_count = 0;
5137 	info->out_count = 0;
5138 		// TODO: retrieve real values here!
5139 
5140 	VMCache* cache = vm_area_get_locked_cache(area);
5141 
5142 	// Note, this is a simplification; the cache could be larger than this area
5143 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5144 
5145 	vm_area_put_locked_cache(cache);
5146 }
5147 
5148 
5149 static status_t
5150 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5151 {
5152 	// is newSize a multiple of B_PAGE_SIZE?
5153 	if (newSize & (B_PAGE_SIZE - 1))
5154 		return B_BAD_VALUE;
5155 
5156 	// lock all affected address spaces and the cache
5157 	VMArea* area;
5158 	VMCache* cache;
5159 
5160 	MultiAddressSpaceLocker locker;
5161 	AreaCacheLocker cacheLocker;
5162 
5163 	status_t status;
5164 	size_t oldSize;
5165 	bool anyKernelArea;
5166 	bool restart;
5167 
5168 	do {
5169 		anyKernelArea = false;
5170 		restart = false;
5171 
5172 		locker.Unset();
5173 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5174 		if (status != B_OK)
5175 			return status;
5176 		cacheLocker.SetTo(cache, true);	// already locked
5177 
5178 		// enforce restrictions
5179 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
5180 				|| (area->protection & B_KERNEL_AREA) != 0)) {
5181 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5182 				"resize kernel area %" B_PRId32 " (%s)\n",
5183 				team_get_current_team_id(), areaID, area->name);
5184 			return B_NOT_ALLOWED;
5185 		}
5186 		// TODO: Enforce all restrictions (team, etc.)!
5187 
5188 		oldSize = area->Size();
5189 		if (newSize == oldSize)
5190 			return B_OK;
5191 
5192 		if (cache->type != CACHE_TYPE_RAM)
5193 			return B_NOT_ALLOWED;
5194 
5195 		if (oldSize < newSize) {
5196 			// We need to check if all areas of this cache can be resized.
5197 			for (VMArea* current = cache->areas; current != NULL;
5198 					current = current->cache_next) {
5199 				if (!current->address_space->CanResizeArea(current, newSize))
5200 					return B_ERROR;
5201 				anyKernelArea
5202 					|= current->address_space == VMAddressSpace::Kernel();
5203 			}
5204 		} else {
5205 			// We're shrinking the areas, so we must make sure the affected
5206 			// ranges are not wired.
5207 			for (VMArea* current = cache->areas; current != NULL;
5208 					current = current->cache_next) {
5209 				anyKernelArea
5210 					|= current->address_space == VMAddressSpace::Kernel();
5211 
5212 				if (wait_if_area_range_is_wired(current,
5213 						current->Base() + newSize, oldSize - newSize, &locker,
5214 						&cacheLocker)) {
5215 					restart = true;
5216 					break;
5217 				}
5218 			}
5219 		}
5220 	} while (restart);
5221 
5222 	// Okay, looks good so far, so let's do it
5223 
5224 	int priority = kernel && anyKernelArea
5225 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5226 	uint32 allocationFlags = kernel && anyKernelArea
5227 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5228 
5229 	if (oldSize < newSize) {
5230 		// Growing the cache can fail, so we do it first.
5231 		status = cache->Resize(cache->virtual_base + newSize, priority);
5232 		if (status != B_OK)
5233 			return status;
5234 	}
5235 
5236 	for (VMArea* current = cache->areas; current != NULL;
5237 			current = current->cache_next) {
5238 		status = current->address_space->ResizeArea(current, newSize,
5239 			allocationFlags);
5240 		if (status != B_OK)
5241 			break;
5242 
5243 		// We also need to unmap all pages beyond the new size, if the area has
5244 		// shrunk
5245 		if (newSize < oldSize) {
5246 			VMCacheChainLocker cacheChainLocker(cache);
5247 			cacheChainLocker.LockAllSourceCaches();
5248 
5249 			unmap_pages(current, current->Base() + newSize,
5250 				oldSize - newSize);
5251 
5252 			cacheChainLocker.Unlock(cache);
5253 		}
5254 	}
5255 
5256 	if (status == B_OK) {
5257 		// Shrink or grow individual page protections if in use.
5258 		if (area->page_protections != NULL) {
5259 			size_t bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5260 			uint8* newProtections
5261 				= (uint8*)realloc(area->page_protections, bytes);
5262 			if (newProtections == NULL)
5263 				status = B_NO_MEMORY;
5264 			else {
5265 				area->page_protections = newProtections;
5266 
5267 				if (oldSize < newSize) {
5268 					// init the additional page protections to that of the area
5269 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5270 					uint32 areaProtection = area->protection
5271 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5272 					memset(area->page_protections + offset,
5273 						areaProtection | (areaProtection << 4), bytes - offset);
5274 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5275 						uint8& entry = area->page_protections[offset - 1];
5276 						entry = (entry & 0x0f) | (areaProtection << 4);
5277 					}
5278 				}
5279 			}
5280 		}
5281 	}
5282 
5283 	// shrinking the cache can't fail, so we do it now
5284 	if (status == B_OK && newSize < oldSize)
5285 		status = cache->Resize(cache->virtual_base + newSize, priority);
5286 
5287 	if (status != B_OK) {
5288 		// Something failed -- resize the areas back to their original size.
5289 		// This can fail, too, in which case we're seriously screwed.
5290 		for (VMArea* current = cache->areas; current != NULL;
5291 				current = current->cache_next) {
5292 			if (current->address_space->ResizeArea(current, oldSize,
5293 					allocationFlags) != B_OK) {
5294 				panic("vm_resize_area(): Failed and not being able to restore "
5295 					"original state.");
5296 			}
5297 		}
5298 
5299 		cache->Resize(cache->virtual_base + oldSize, priority);
5300 	}
5301 
5302 	// TODO: we must honour the lock restrictions of this area
5303 	return status;
5304 }
5305 
5306 
5307 status_t
5308 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5309 {
5310 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5311 }
5312 
5313 
5314 status_t
5315 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5316 {
5317 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5318 }
5319 
5320 
5321 status_t
5322 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5323 	bool user)
5324 {
5325 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5326 }
5327 
5328 
5329 void
5330 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5331 {
5332 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5333 }
5334 
5335 
5336 /*!	Copies a range of memory directly from/to a page that might not be mapped
5337 	at the moment.
5338 
5339 	For \a unsafeMemory the current mapping (if any is ignored). The function
5340 	walks through the respective area's cache chain to find the physical page
5341 	and copies from/to it directly.
5342 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5343 	must not cross a page boundary.
5344 
5345 	\param teamID The team ID identifying the address space \a unsafeMemory is
5346 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5347 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5348 		is passed, the address space of the thread returned by
5349 		debug_get_debugged_thread() is used.
5350 	\param unsafeMemory The start of the unsafe memory range to be copied
5351 		from/to.
5352 	\param buffer A safely accessible kernel buffer to be copied from/to.
5353 	\param size The number of bytes to be copied.
5354 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5355 		\a unsafeMemory, the other way around otherwise.
5356 */
5357 status_t
5358 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5359 	size_t size, bool copyToUnsafe)
5360 {
5361 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5362 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5363 		return B_BAD_VALUE;
5364 	}
5365 
5366 	// get the address space for the debugged thread
5367 	VMAddressSpace* addressSpace;
5368 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5369 		addressSpace = VMAddressSpace::Kernel();
5370 	} else if (teamID == B_CURRENT_TEAM) {
5371 		Thread* thread = debug_get_debugged_thread();
5372 		if (thread == NULL || thread->team == NULL)
5373 			return B_BAD_ADDRESS;
5374 
5375 		addressSpace = thread->team->address_space;
5376 	} else
5377 		addressSpace = VMAddressSpace::DebugGet(teamID);
5378 
5379 	if (addressSpace == NULL)
5380 		return B_BAD_ADDRESS;
5381 
5382 	// get the area
5383 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5384 	if (area == NULL)
5385 		return B_BAD_ADDRESS;
5386 
5387 	// search the page
5388 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5389 		+ area->cache_offset;
5390 	VMCache* cache = area->cache;
5391 	vm_page* page = NULL;
5392 	while (cache != NULL) {
5393 		page = cache->DebugLookupPage(cacheOffset);
5394 		if (page != NULL)
5395 			break;
5396 
5397 		// Page not found in this cache -- if it is paged out, we must not try
5398 		// to get it from lower caches.
5399 		if (cache->DebugHasPage(cacheOffset))
5400 			break;
5401 
5402 		cache = cache->source;
5403 	}
5404 
5405 	if (page == NULL)
5406 		return B_UNSUPPORTED;
5407 
5408 	// copy from/to physical memory
5409 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5410 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5411 
5412 	if (copyToUnsafe) {
5413 		if (page->Cache() != area->cache)
5414 			return B_UNSUPPORTED;
5415 
5416 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5417 	}
5418 
5419 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5420 }
5421 
5422 
5423 static inline bool
5424 validate_user_range(const void* addr, size_t size)
5425 {
5426 	addr_t address = (addr_t)addr;
5427 
5428 	// Check for overflows on all addresses.
5429 	if ((address + size) < address)
5430 		return false;
5431 
5432 	// Validate that the address does not cross the kernel/user boundary.
5433 	if (IS_USER_ADDRESS(address))
5434 		return IS_USER_ADDRESS(address + size);
5435 	else
5436 		return !IS_USER_ADDRESS(address + size);
5437 }
5438 
5439 
5440 //	#pragma mark - kernel public API
5441 
5442 
5443 status_t
5444 user_memcpy(void* to, const void* from, size_t size)
5445 {
5446 	if (!validate_user_range(to, size) || !validate_user_range(from, size))
5447 		return B_BAD_ADDRESS;
5448 
5449 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5450 		return B_BAD_ADDRESS;
5451 
5452 	return B_OK;
5453 }
5454 
5455 
5456 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5457 	the string in \a to, NULL-terminating the result.
5458 
5459 	\param to Pointer to the destination C-string.
5460 	\param from Pointer to the source C-string.
5461 	\param size Size in bytes of the string buffer pointed to by \a to.
5462 
5463 	\return strlen(\a from).
5464 */
5465 ssize_t
5466 user_strlcpy(char* to, const char* from, size_t size)
5467 {
5468 	if (to == NULL && size != 0)
5469 		return B_BAD_VALUE;
5470 	if (from == NULL)
5471 		return B_BAD_ADDRESS;
5472 
5473 	// Protect the source address from overflows.
5474 	size_t maxSize = size;
5475 	if ((addr_t)from + maxSize < (addr_t)from)
5476 		maxSize -= (addr_t)from + maxSize;
5477 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5478 		maxSize = USER_TOP - (addr_t)from;
5479 
5480 	if (!validate_user_range(to, maxSize))
5481 		return B_BAD_ADDRESS;
5482 
5483 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5484 	if (result < 0)
5485 		return result;
5486 
5487 	// If we hit the address overflow boundary, fail.
5488 	if ((size_t)result >= maxSize && maxSize < size)
5489 		return B_BAD_ADDRESS;
5490 
5491 	return result;
5492 }
5493 
5494 
5495 status_t
5496 user_memset(void* s, char c, size_t count)
5497 {
5498 	if (!validate_user_range(s, count))
5499 		return B_BAD_ADDRESS;
5500 
5501 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5502 		return B_BAD_ADDRESS;
5503 
5504 	return B_OK;
5505 }
5506 
5507 
5508 /*!	Wires a single page at the given address.
5509 
5510 	\param team The team whose address space the address belongs to. Supports
5511 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5512 		parameter is ignored.
5513 	\param address address The virtual address to wire down. Does not need to
5514 		be page aligned.
5515 	\param writable If \c true the page shall be writable.
5516 	\param info On success the info is filled in, among other things
5517 		containing the physical address the given virtual one translates to.
5518 	\return \c B_OK, when the page could be wired, another error code otherwise.
5519 */
5520 status_t
5521 vm_wire_page(team_id team, addr_t address, bool writable,
5522 	VMPageWiringInfo* info)
5523 {
5524 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5525 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5526 
5527 	// compute the page protection that is required
5528 	bool isUser = IS_USER_ADDRESS(address);
5529 	uint32 requiredProtection = PAGE_PRESENT
5530 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5531 	if (writable)
5532 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5533 
5534 	// get and read lock the address space
5535 	VMAddressSpace* addressSpace = NULL;
5536 	if (isUser) {
5537 		if (team == B_CURRENT_TEAM)
5538 			addressSpace = VMAddressSpace::GetCurrent();
5539 		else
5540 			addressSpace = VMAddressSpace::Get(team);
5541 	} else
5542 		addressSpace = VMAddressSpace::GetKernel();
5543 	if (addressSpace == NULL)
5544 		return B_ERROR;
5545 
5546 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5547 
5548 	VMTranslationMap* map = addressSpace->TranslationMap();
5549 	status_t error = B_OK;
5550 
5551 	// get the area
5552 	VMArea* area = addressSpace->LookupArea(pageAddress);
5553 	if (area == NULL) {
5554 		addressSpace->Put();
5555 		return B_BAD_ADDRESS;
5556 	}
5557 
5558 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5559 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5560 
5561 	// mark the area range wired
5562 	area->Wire(&info->range);
5563 
5564 	// Lock the area's cache chain and the translation map. Needed to look
5565 	// up the page and play with its wired count.
5566 	cacheChainLocker.LockAllSourceCaches();
5567 	map->Lock();
5568 
5569 	phys_addr_t physicalAddress;
5570 	uint32 flags;
5571 	vm_page* page;
5572 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5573 		&& (flags & requiredProtection) == requiredProtection
5574 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5575 			!= NULL) {
5576 		// Already mapped with the correct permissions -- just increment
5577 		// the page's wired count.
5578 		increment_page_wired_count(page);
5579 
5580 		map->Unlock();
5581 		cacheChainLocker.Unlock();
5582 		addressSpaceLocker.Unlock();
5583 	} else {
5584 		// Let vm_soft_fault() map the page for us, if possible. We need
5585 		// to fully unlock to avoid deadlocks. Since we have already
5586 		// wired the area itself, nothing disturbing will happen with it
5587 		// in the meantime.
5588 		map->Unlock();
5589 		cacheChainLocker.Unlock();
5590 		addressSpaceLocker.Unlock();
5591 
5592 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5593 			isUser, &page);
5594 
5595 		if (error != B_OK) {
5596 			// The page could not be mapped -- clean up.
5597 			VMCache* cache = vm_area_get_locked_cache(area);
5598 			area->Unwire(&info->range);
5599 			cache->ReleaseRefAndUnlock();
5600 			addressSpace->Put();
5601 			return error;
5602 		}
5603 	}
5604 
5605 	info->physicalAddress
5606 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5607 			+ address % B_PAGE_SIZE;
5608 	info->page = page;
5609 
5610 	return B_OK;
5611 }
5612 
5613 
5614 /*!	Unwires a single page previously wired via vm_wire_page().
5615 
5616 	\param info The same object passed to vm_wire_page() before.
5617 */
5618 void
5619 vm_unwire_page(VMPageWiringInfo* info)
5620 {
5621 	// lock the address space
5622 	VMArea* area = info->range.area;
5623 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5624 		// takes over our reference
5625 
5626 	// lock the top cache
5627 	VMCache* cache = vm_area_get_locked_cache(area);
5628 	VMCacheChainLocker cacheChainLocker(cache);
5629 
5630 	if (info->page->Cache() != cache) {
5631 		// The page is not in the top cache, so we lock the whole cache chain
5632 		// before touching the page's wired count.
5633 		cacheChainLocker.LockAllSourceCaches();
5634 	}
5635 
5636 	decrement_page_wired_count(info->page);
5637 
5638 	// remove the wired range from the range
5639 	area->Unwire(&info->range);
5640 
5641 	cacheChainLocker.Unlock();
5642 }
5643 
5644 
5645 /*!	Wires down the given address range in the specified team's address space.
5646 
5647 	If successful the function
5648 	- acquires a reference to the specified team's address space,
5649 	- adds respective wired ranges to all areas that intersect with the given
5650 	  address range,
5651 	- makes sure all pages in the given address range are mapped with the
5652 	  requested access permissions and increments their wired count.
5653 
5654 	It fails, when \a team doesn't specify a valid address space, when any part
5655 	of the specified address range is not covered by areas, when the concerned
5656 	areas don't allow mapping with the requested permissions, or when mapping
5657 	failed for another reason.
5658 
5659 	When successful the call must be balanced by a unlock_memory_etc() call with
5660 	the exact same parameters.
5661 
5662 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5663 		supported.
5664 	\param address The start of the address range to be wired.
5665 	\param numBytes The size of the address range to be wired.
5666 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5667 		requests that the range must be wired writable ("read from device
5668 		into memory").
5669 	\return \c B_OK on success, another error code otherwise.
5670 */
5671 status_t
5672 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5673 {
5674 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5675 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5676 
5677 	// compute the page protection that is required
5678 	bool isUser = IS_USER_ADDRESS(address);
5679 	bool writable = (flags & B_READ_DEVICE) == 0;
5680 	uint32 requiredProtection = PAGE_PRESENT
5681 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5682 	if (writable)
5683 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5684 
5685 	uint32 mallocFlags = isUser
5686 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5687 
5688 	// get and read lock the address space
5689 	VMAddressSpace* addressSpace = NULL;
5690 	if (isUser) {
5691 		if (team == B_CURRENT_TEAM)
5692 			addressSpace = VMAddressSpace::GetCurrent();
5693 		else
5694 			addressSpace = VMAddressSpace::Get(team);
5695 	} else
5696 		addressSpace = VMAddressSpace::GetKernel();
5697 	if (addressSpace == NULL)
5698 		return B_ERROR;
5699 
5700 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5701 		// We get a new address space reference here. The one we got above will
5702 		// be freed by unlock_memory_etc().
5703 
5704 	VMTranslationMap* map = addressSpace->TranslationMap();
5705 	status_t error = B_OK;
5706 
5707 	// iterate through all concerned areas
5708 	addr_t nextAddress = lockBaseAddress;
5709 	while (nextAddress != lockEndAddress) {
5710 		// get the next area
5711 		VMArea* area = addressSpace->LookupArea(nextAddress);
5712 		if (area == NULL) {
5713 			error = B_BAD_ADDRESS;
5714 			break;
5715 		}
5716 
5717 		addr_t areaStart = nextAddress;
5718 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5719 
5720 		// allocate the wired range (do that before locking the cache to avoid
5721 		// deadlocks)
5722 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5723 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5724 		if (range == NULL) {
5725 			error = B_NO_MEMORY;
5726 			break;
5727 		}
5728 
5729 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5730 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5731 
5732 		// mark the area range wired
5733 		area->Wire(range);
5734 
5735 		// Depending on the area cache type and the wiring, we may not need to
5736 		// look at the individual pages.
5737 		if (area->cache_type == CACHE_TYPE_NULL
5738 			|| area->cache_type == CACHE_TYPE_DEVICE
5739 			|| area->wiring == B_FULL_LOCK
5740 			|| area->wiring == B_CONTIGUOUS) {
5741 			nextAddress = areaEnd;
5742 			continue;
5743 		}
5744 
5745 		// Lock the area's cache chain and the translation map. Needed to look
5746 		// up pages and play with their wired count.
5747 		cacheChainLocker.LockAllSourceCaches();
5748 		map->Lock();
5749 
5750 		// iterate through the pages and wire them
5751 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5752 			phys_addr_t physicalAddress;
5753 			uint32 flags;
5754 
5755 			vm_page* page;
5756 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5757 				&& (flags & requiredProtection) == requiredProtection
5758 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5759 					!= NULL) {
5760 				// Already mapped with the correct permissions -- just increment
5761 				// the page's wired count.
5762 				increment_page_wired_count(page);
5763 			} else {
5764 				// Let vm_soft_fault() map the page for us, if possible. We need
5765 				// to fully unlock to avoid deadlocks. Since we have already
5766 				// wired the area itself, nothing disturbing will happen with it
5767 				// in the meantime.
5768 				map->Unlock();
5769 				cacheChainLocker.Unlock();
5770 				addressSpaceLocker.Unlock();
5771 
5772 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5773 					false, isUser, &page);
5774 
5775 				addressSpaceLocker.Lock();
5776 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5777 				cacheChainLocker.LockAllSourceCaches();
5778 				map->Lock();
5779 			}
5780 
5781 			if (error != B_OK)
5782 				break;
5783 		}
5784 
5785 		map->Unlock();
5786 
5787 		if (error == B_OK) {
5788 			cacheChainLocker.Unlock();
5789 		} else {
5790 			// An error occurred, so abort right here. If the current address
5791 			// is the first in this area, unwire the area, since we won't get
5792 			// to it when reverting what we've done so far.
5793 			if (nextAddress == areaStart) {
5794 				area->Unwire(range);
5795 				cacheChainLocker.Unlock();
5796 				range->~VMAreaWiredRange();
5797 				free_etc(range, mallocFlags);
5798 			} else
5799 				cacheChainLocker.Unlock();
5800 
5801 			break;
5802 		}
5803 	}
5804 
5805 	if (error != B_OK) {
5806 		// An error occurred, so unwire all that we've already wired. Note that
5807 		// even if not a single page was wired, unlock_memory_etc() is called
5808 		// to put the address space reference.
5809 		addressSpaceLocker.Unlock();
5810 		unlock_memory_etc(team, (void*)lockBaseAddress,
5811 			nextAddress - lockBaseAddress, flags);
5812 	}
5813 
5814 	return error;
5815 }
5816 
5817 
5818 status_t
5819 lock_memory(void* address, size_t numBytes, uint32 flags)
5820 {
5821 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5822 }
5823 
5824 
5825 /*!	Unwires an address range previously wired with lock_memory_etc().
5826 
5827 	Note that a call to this function must balance a previous lock_memory_etc()
5828 	call with exactly the same parameters.
5829 */
5830 status_t
5831 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5832 {
5833 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5834 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5835 
5836 	// compute the page protection that is required
5837 	bool isUser = IS_USER_ADDRESS(address);
5838 	bool writable = (flags & B_READ_DEVICE) == 0;
5839 	uint32 requiredProtection = PAGE_PRESENT
5840 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5841 	if (writable)
5842 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5843 
5844 	uint32 mallocFlags = isUser
5845 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5846 
5847 	// get and read lock the address space
5848 	VMAddressSpace* addressSpace = NULL;
5849 	if (isUser) {
5850 		if (team == B_CURRENT_TEAM)
5851 			addressSpace = VMAddressSpace::GetCurrent();
5852 		else
5853 			addressSpace = VMAddressSpace::Get(team);
5854 	} else
5855 		addressSpace = VMAddressSpace::GetKernel();
5856 	if (addressSpace == NULL)
5857 		return B_ERROR;
5858 
5859 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5860 		// Take over the address space reference. We don't unlock until we're
5861 		// done.
5862 
5863 	VMTranslationMap* map = addressSpace->TranslationMap();
5864 	status_t error = B_OK;
5865 
5866 	// iterate through all concerned areas
5867 	addr_t nextAddress = lockBaseAddress;
5868 	while (nextAddress != lockEndAddress) {
5869 		// get the next area
5870 		VMArea* area = addressSpace->LookupArea(nextAddress);
5871 		if (area == NULL) {
5872 			error = B_BAD_ADDRESS;
5873 			break;
5874 		}
5875 
5876 		addr_t areaStart = nextAddress;
5877 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5878 
5879 		// Lock the area's top cache. This is a requirement for
5880 		// VMArea::Unwire().
5881 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5882 
5883 		// Depending on the area cache type and the wiring, we may not need to
5884 		// look at the individual pages.
5885 		if (area->cache_type == CACHE_TYPE_NULL
5886 			|| area->cache_type == CACHE_TYPE_DEVICE
5887 			|| area->wiring == B_FULL_LOCK
5888 			|| area->wiring == B_CONTIGUOUS) {
5889 			// unwire the range (to avoid deadlocks we delete the range after
5890 			// unlocking the cache)
5891 			nextAddress = areaEnd;
5892 			VMAreaWiredRange* range = area->Unwire(areaStart,
5893 				areaEnd - areaStart, writable);
5894 			cacheChainLocker.Unlock();
5895 			if (range != NULL) {
5896 				range->~VMAreaWiredRange();
5897 				free_etc(range, mallocFlags);
5898 			}
5899 			continue;
5900 		}
5901 
5902 		// Lock the area's cache chain and the translation map. Needed to look
5903 		// up pages and play with their wired count.
5904 		cacheChainLocker.LockAllSourceCaches();
5905 		map->Lock();
5906 
5907 		// iterate through the pages and unwire them
5908 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5909 			phys_addr_t physicalAddress;
5910 			uint32 flags;
5911 
5912 			vm_page* page;
5913 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5914 				&& (flags & PAGE_PRESENT) != 0
5915 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5916 					!= NULL) {
5917 				// Already mapped with the correct permissions -- just increment
5918 				// the page's wired count.
5919 				decrement_page_wired_count(page);
5920 			} else {
5921 				panic("unlock_memory_etc(): Failed to unwire page: address "
5922 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5923 					nextAddress);
5924 				error = B_BAD_VALUE;
5925 				break;
5926 			}
5927 		}
5928 
5929 		map->Unlock();
5930 
5931 		// All pages are unwired. Remove the area's wired range as well (to
5932 		// avoid deadlocks we delete the range after unlocking the cache).
5933 		VMAreaWiredRange* range = area->Unwire(areaStart,
5934 			areaEnd - areaStart, writable);
5935 
5936 		cacheChainLocker.Unlock();
5937 
5938 		if (range != NULL) {
5939 			range->~VMAreaWiredRange();
5940 			free_etc(range, mallocFlags);
5941 		}
5942 
5943 		if (error != B_OK)
5944 			break;
5945 	}
5946 
5947 	// get rid of the address space reference lock_memory_etc() acquired
5948 	addressSpace->Put();
5949 
5950 	return error;
5951 }
5952 
5953 
5954 status_t
5955 unlock_memory(void* address, size_t numBytes, uint32 flags)
5956 {
5957 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5958 }
5959 
5960 
5961 /*!	Similar to get_memory_map(), but also allows to specify the address space
5962 	for the memory in question and has a saner semantics.
5963 	Returns \c B_OK when the complete range could be translated or
5964 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5965 	case the actual number of entries is written to \c *_numEntries. Any other
5966 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5967 	in this case.
5968 */
5969 status_t
5970 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5971 	physical_entry* table, uint32* _numEntries)
5972 {
5973 	uint32 numEntries = *_numEntries;
5974 	*_numEntries = 0;
5975 
5976 	VMAddressSpace* addressSpace;
5977 	addr_t virtualAddress = (addr_t)address;
5978 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5979 	phys_addr_t physicalAddress;
5980 	status_t status = B_OK;
5981 	int32 index = -1;
5982 	addr_t offset = 0;
5983 	bool interrupts = are_interrupts_enabled();
5984 
5985 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5986 		"entries)\n", team, address, numBytes, numEntries));
5987 
5988 	if (numEntries == 0 || numBytes == 0)
5989 		return B_BAD_VALUE;
5990 
5991 	// in which address space is the address to be found?
5992 	if (IS_USER_ADDRESS(virtualAddress)) {
5993 		if (team == B_CURRENT_TEAM)
5994 			addressSpace = VMAddressSpace::GetCurrent();
5995 		else
5996 			addressSpace = VMAddressSpace::Get(team);
5997 	} else
5998 		addressSpace = VMAddressSpace::GetKernel();
5999 
6000 	if (addressSpace == NULL)
6001 		return B_ERROR;
6002 
6003 	VMTranslationMap* map = addressSpace->TranslationMap();
6004 
6005 	if (interrupts)
6006 		map->Lock();
6007 
6008 	while (offset < numBytes) {
6009 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
6010 		uint32 flags;
6011 
6012 		if (interrupts) {
6013 			status = map->Query((addr_t)address + offset, &physicalAddress,
6014 				&flags);
6015 		} else {
6016 			status = map->QueryInterrupt((addr_t)address + offset,
6017 				&physicalAddress, &flags);
6018 		}
6019 		if (status < B_OK)
6020 			break;
6021 		if ((flags & PAGE_PRESENT) == 0) {
6022 			panic("get_memory_map() called on unmapped memory!");
6023 			return B_BAD_ADDRESS;
6024 		}
6025 
6026 		if (index < 0 && pageOffset > 0) {
6027 			physicalAddress += pageOffset;
6028 			if (bytes > B_PAGE_SIZE - pageOffset)
6029 				bytes = B_PAGE_SIZE - pageOffset;
6030 		}
6031 
6032 		// need to switch to the next physical_entry?
6033 		if (index < 0 || table[index].address
6034 				!= physicalAddress - table[index].size) {
6035 			if ((uint32)++index + 1 > numEntries) {
6036 				// table to small
6037 				break;
6038 			}
6039 			table[index].address = physicalAddress;
6040 			table[index].size = bytes;
6041 		} else {
6042 			// page does fit in current entry
6043 			table[index].size += bytes;
6044 		}
6045 
6046 		offset += bytes;
6047 	}
6048 
6049 	if (interrupts)
6050 		map->Unlock();
6051 
6052 	if (status != B_OK)
6053 		return status;
6054 
6055 	if ((uint32)index + 1 > numEntries) {
6056 		*_numEntries = index;
6057 		return B_BUFFER_OVERFLOW;
6058 	}
6059 
6060 	*_numEntries = index + 1;
6061 	return B_OK;
6062 }
6063 
6064 
6065 /*!	According to the BeBook, this function should always succeed.
6066 	This is no longer the case.
6067 */
6068 extern "C" int32
6069 __get_memory_map_haiku(const void* address, size_t numBytes,
6070 	physical_entry* table, int32 numEntries)
6071 {
6072 	uint32 entriesRead = numEntries;
6073 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
6074 		table, &entriesRead);
6075 	if (error != B_OK)
6076 		return error;
6077 
6078 	// close the entry list
6079 
6080 	// if it's only one entry, we will silently accept the missing ending
6081 	if (numEntries == 1)
6082 		return B_OK;
6083 
6084 	if (entriesRead + 1 > (uint32)numEntries)
6085 		return B_BUFFER_OVERFLOW;
6086 
6087 	table[entriesRead].address = 0;
6088 	table[entriesRead].size = 0;
6089 
6090 	return B_OK;
6091 }
6092 
6093 
6094 area_id
6095 area_for(void* address)
6096 {
6097 	return vm_area_for((addr_t)address, true);
6098 }
6099 
6100 
6101 area_id
6102 find_area(const char* name)
6103 {
6104 	return VMAreaHash::Find(name);
6105 }
6106 
6107 
6108 status_t
6109 _get_area_info(area_id id, area_info* info, size_t size)
6110 {
6111 	if (size != sizeof(area_info) || info == NULL)
6112 		return B_BAD_VALUE;
6113 
6114 	AddressSpaceReadLocker locker;
6115 	VMArea* area;
6116 	status_t status = locker.SetFromArea(id, area);
6117 	if (status != B_OK)
6118 		return status;
6119 
6120 	fill_area_info(area, info, size);
6121 	return B_OK;
6122 }
6123 
6124 
6125 status_t
6126 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
6127 {
6128 	addr_t nextBase = *(addr_t*)cookie;
6129 
6130 	// we're already through the list
6131 	if (nextBase == (addr_t)-1)
6132 		return B_ENTRY_NOT_FOUND;
6133 
6134 	if (team == B_CURRENT_TEAM)
6135 		team = team_get_current_team_id();
6136 
6137 	AddressSpaceReadLocker locker(team);
6138 	if (!locker.IsLocked())
6139 		return B_BAD_TEAM_ID;
6140 
6141 	VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
6142 	if (area == NULL) {
6143 		nextBase = (addr_t)-1;
6144 		return B_ENTRY_NOT_FOUND;
6145 	}
6146 
6147 	fill_area_info(area, info, size);
6148 	*cookie = (ssize_t)(area->Base() + 1);
6149 
6150 	return B_OK;
6151 }
6152 
6153 
6154 status_t
6155 set_area_protection(area_id area, uint32 newProtection)
6156 {
6157 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6158 		newProtection, true);
6159 }
6160 
6161 
6162 status_t
6163 resize_area(area_id areaID, size_t newSize)
6164 {
6165 	return vm_resize_area(areaID, newSize, true);
6166 }
6167 
6168 
6169 /*!	Transfers the specified area to a new team. The caller must be the owner
6170 	of the area.
6171 */
6172 area_id
6173 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6174 	bool kernel)
6175 {
6176 	area_info info;
6177 	status_t status = get_area_info(id, &info);
6178 	if (status != B_OK)
6179 		return status;
6180 
6181 	if (info.team != thread_get_current_thread()->team->id)
6182 		return B_PERMISSION_DENIED;
6183 
6184 	// We need to mark the area cloneable so the following operations work.
6185 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
6186 	if (status != B_OK)
6187 		return status;
6188 
6189 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6190 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6191 	if (clonedArea < 0)
6192 		return clonedArea;
6193 
6194 	status = vm_delete_area(info.team, id, kernel);
6195 	if (status != B_OK) {
6196 		vm_delete_area(target, clonedArea, kernel);
6197 		return status;
6198 	}
6199 
6200 	// Now we can reset the protection to whatever it was before.
6201 	set_area_protection(clonedArea, info.protection);
6202 
6203 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6204 
6205 	return clonedArea;
6206 }
6207 
6208 
6209 extern "C" area_id
6210 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6211 	size_t numBytes, uint32 addressSpec, uint32 protection,
6212 	void** _virtualAddress)
6213 {
6214 	if (!arch_vm_supports_protection(protection))
6215 		return B_NOT_SUPPORTED;
6216 
6217 	fix_protection(&protection);
6218 
6219 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6220 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6221 		false);
6222 }
6223 
6224 
6225 area_id
6226 clone_area(const char* name, void** _address, uint32 addressSpec,
6227 	uint32 protection, area_id source)
6228 {
6229 	if ((protection & B_KERNEL_PROTECTION) == 0)
6230 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6231 
6232 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6233 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6234 }
6235 
6236 
6237 area_id
6238 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6239 	uint32 protection, uint32 flags, uint32 guardSize,
6240 	const virtual_address_restrictions* virtualAddressRestrictions,
6241 	const physical_address_restrictions* physicalAddressRestrictions,
6242 	void** _address)
6243 {
6244 	fix_protection(&protection);
6245 
6246 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6247 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6248 		true, _address);
6249 }
6250 
6251 
6252 extern "C" area_id
6253 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6254 	size_t size, uint32 lock, uint32 protection)
6255 {
6256 	fix_protection(&protection);
6257 
6258 	virtual_address_restrictions virtualRestrictions = {};
6259 	virtualRestrictions.address = *_address;
6260 	virtualRestrictions.address_specification = addressSpec;
6261 	physical_address_restrictions physicalRestrictions = {};
6262 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6263 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6264 		true, _address);
6265 }
6266 
6267 
6268 status_t
6269 delete_area(area_id area)
6270 {
6271 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6272 }
6273 
6274 
6275 //	#pragma mark - Userland syscalls
6276 
6277 
6278 status_t
6279 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6280 	addr_t size)
6281 {
6282 	// filter out some unavailable values (for userland)
6283 	switch (addressSpec) {
6284 		case B_ANY_KERNEL_ADDRESS:
6285 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6286 			return B_BAD_VALUE;
6287 	}
6288 
6289 	addr_t address;
6290 
6291 	if (!IS_USER_ADDRESS(userAddress)
6292 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6293 		return B_BAD_ADDRESS;
6294 
6295 	status_t status = vm_reserve_address_range(
6296 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6297 		RESERVED_AVOID_BASE);
6298 	if (status != B_OK)
6299 		return status;
6300 
6301 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6302 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6303 			(void*)address, size);
6304 		return B_BAD_ADDRESS;
6305 	}
6306 
6307 	return B_OK;
6308 }
6309 
6310 
6311 status_t
6312 _user_unreserve_address_range(addr_t address, addr_t size)
6313 {
6314 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6315 		(void*)address, size);
6316 }
6317 
6318 
6319 area_id
6320 _user_area_for(void* address)
6321 {
6322 	return vm_area_for((addr_t)address, false);
6323 }
6324 
6325 
6326 area_id
6327 _user_find_area(const char* userName)
6328 {
6329 	char name[B_OS_NAME_LENGTH];
6330 
6331 	if (!IS_USER_ADDRESS(userName)
6332 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6333 		return B_BAD_ADDRESS;
6334 
6335 	return find_area(name);
6336 }
6337 
6338 
6339 status_t
6340 _user_get_area_info(area_id area, area_info* userInfo)
6341 {
6342 	if (!IS_USER_ADDRESS(userInfo))
6343 		return B_BAD_ADDRESS;
6344 
6345 	area_info info;
6346 	status_t status = get_area_info(area, &info);
6347 	if (status < B_OK)
6348 		return status;
6349 
6350 	// TODO: do we want to prevent userland from seeing kernel protections?
6351 	//info.protection &= B_USER_PROTECTION;
6352 
6353 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6354 		return B_BAD_ADDRESS;
6355 
6356 	return status;
6357 }
6358 
6359 
6360 status_t
6361 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6362 {
6363 	ssize_t cookie;
6364 
6365 	if (!IS_USER_ADDRESS(userCookie)
6366 		|| !IS_USER_ADDRESS(userInfo)
6367 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6368 		return B_BAD_ADDRESS;
6369 
6370 	area_info info;
6371 	status_t status = _get_next_area_info(team, &cookie, &info,
6372 		sizeof(area_info));
6373 	if (status != B_OK)
6374 		return status;
6375 
6376 	//info.protection &= B_USER_PROTECTION;
6377 
6378 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6379 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6380 		return B_BAD_ADDRESS;
6381 
6382 	return status;
6383 }
6384 
6385 
6386 status_t
6387 _user_set_area_protection(area_id area, uint32 newProtection)
6388 {
6389 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6390 		return B_BAD_VALUE;
6391 
6392 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6393 		newProtection, false);
6394 }
6395 
6396 
6397 status_t
6398 _user_resize_area(area_id area, size_t newSize)
6399 {
6400 	// TODO: Since we restrict deleting of areas to those owned by the team,
6401 	// we should also do that for resizing (check other functions, too).
6402 	return vm_resize_area(area, newSize, false);
6403 }
6404 
6405 
6406 area_id
6407 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6408 	team_id target)
6409 {
6410 	// filter out some unavailable values (for userland)
6411 	switch (addressSpec) {
6412 		case B_ANY_KERNEL_ADDRESS:
6413 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6414 			return B_BAD_VALUE;
6415 	}
6416 
6417 	void* address;
6418 	if (!IS_USER_ADDRESS(userAddress)
6419 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6420 		return B_BAD_ADDRESS;
6421 
6422 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6423 	if (newArea < B_OK)
6424 		return newArea;
6425 
6426 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6427 		return B_BAD_ADDRESS;
6428 
6429 	return newArea;
6430 }
6431 
6432 
6433 area_id
6434 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6435 	uint32 protection, area_id sourceArea)
6436 {
6437 	char name[B_OS_NAME_LENGTH];
6438 	void* address;
6439 
6440 	// filter out some unavailable values (for userland)
6441 	switch (addressSpec) {
6442 		case B_ANY_KERNEL_ADDRESS:
6443 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6444 			return B_BAD_VALUE;
6445 	}
6446 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6447 		return B_BAD_VALUE;
6448 
6449 	if (!IS_USER_ADDRESS(userName)
6450 		|| !IS_USER_ADDRESS(userAddress)
6451 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6452 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6453 		return B_BAD_ADDRESS;
6454 
6455 	fix_protection(&protection);
6456 
6457 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6458 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6459 		false);
6460 	if (clonedArea < B_OK)
6461 		return clonedArea;
6462 
6463 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6464 		delete_area(clonedArea);
6465 		return B_BAD_ADDRESS;
6466 	}
6467 
6468 	return clonedArea;
6469 }
6470 
6471 
6472 area_id
6473 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6474 	size_t size, uint32 lock, uint32 protection)
6475 {
6476 	char name[B_OS_NAME_LENGTH];
6477 	void* address;
6478 
6479 	// filter out some unavailable values (for userland)
6480 	switch (addressSpec) {
6481 		case B_ANY_KERNEL_ADDRESS:
6482 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6483 			return B_BAD_VALUE;
6484 	}
6485 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6486 		return B_BAD_VALUE;
6487 
6488 	if (!IS_USER_ADDRESS(userName)
6489 		|| !IS_USER_ADDRESS(userAddress)
6490 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6491 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6492 		return B_BAD_ADDRESS;
6493 
6494 	if (addressSpec == B_EXACT_ADDRESS
6495 		&& IS_KERNEL_ADDRESS(address))
6496 		return B_BAD_VALUE;
6497 
6498 	if (addressSpec == B_ANY_ADDRESS)
6499 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6500 	if (addressSpec == B_BASE_ADDRESS)
6501 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6502 
6503 	fix_protection(&protection);
6504 
6505 	virtual_address_restrictions virtualRestrictions = {};
6506 	virtualRestrictions.address = address;
6507 	virtualRestrictions.address_specification = addressSpec;
6508 	physical_address_restrictions physicalRestrictions = {};
6509 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6510 		size, lock, protection, 0, 0, &virtualRestrictions,
6511 		&physicalRestrictions, false, &address);
6512 
6513 	if (area >= B_OK
6514 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6515 		delete_area(area);
6516 		return B_BAD_ADDRESS;
6517 	}
6518 
6519 	return area;
6520 }
6521 
6522 
6523 status_t
6524 _user_delete_area(area_id area)
6525 {
6526 	// Unlike the BeOS implementation, you can now only delete areas
6527 	// that you have created yourself from userland.
6528 	// The documentation to delete_area() explicitly states that this
6529 	// will be restricted in the future, and so it will.
6530 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6531 }
6532 
6533 
6534 // TODO: create a BeOS style call for this!
6535 
6536 area_id
6537 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6538 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6539 	int fd, off_t offset)
6540 {
6541 	char name[B_OS_NAME_LENGTH];
6542 	void* address;
6543 	area_id area;
6544 
6545 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6546 		return B_BAD_VALUE;
6547 
6548 	fix_protection(&protection);
6549 
6550 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6551 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6552 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6553 		return B_BAD_ADDRESS;
6554 
6555 	if (addressSpec == B_EXACT_ADDRESS) {
6556 		if ((addr_t)address + size < (addr_t)address
6557 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6558 			return B_BAD_VALUE;
6559 		}
6560 		if (!IS_USER_ADDRESS(address)
6561 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6562 			return B_BAD_ADDRESS;
6563 		}
6564 	}
6565 
6566 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6567 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6568 		false);
6569 	if (area < B_OK)
6570 		return area;
6571 
6572 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6573 		return B_BAD_ADDRESS;
6574 
6575 	return area;
6576 }
6577 
6578 
6579 status_t
6580 _user_unmap_memory(void* _address, size_t size)
6581 {
6582 	addr_t address = (addr_t)_address;
6583 
6584 	// check params
6585 	if (size == 0 || (addr_t)address + size < (addr_t)address
6586 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6587 		return B_BAD_VALUE;
6588 	}
6589 
6590 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6591 		return B_BAD_ADDRESS;
6592 
6593 	// Write lock the address space and ensure the address range is not wired.
6594 	AddressSpaceWriteLocker locker;
6595 	do {
6596 		status_t status = locker.SetTo(team_get_current_team_id());
6597 		if (status != B_OK)
6598 			return status;
6599 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6600 			size, &locker));
6601 
6602 	// unmap
6603 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6604 }
6605 
6606 
6607 status_t
6608 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6609 {
6610 	// check address range
6611 	addr_t address = (addr_t)_address;
6612 	size = PAGE_ALIGN(size);
6613 
6614 	if ((address % B_PAGE_SIZE) != 0)
6615 		return B_BAD_VALUE;
6616 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6617 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6618 		// weird error code required by POSIX
6619 		return ENOMEM;
6620 	}
6621 
6622 	// extend and check protection
6623 	if ((protection & ~B_USER_PROTECTION) != 0)
6624 		return B_BAD_VALUE;
6625 
6626 	fix_protection(&protection);
6627 
6628 	// We need to write lock the address space, since we're going to play with
6629 	// the areas. Also make sure that none of the areas is wired and that we're
6630 	// actually allowed to change the protection.
6631 	AddressSpaceWriteLocker locker;
6632 
6633 	bool restart;
6634 	do {
6635 		restart = false;
6636 
6637 		status_t status = locker.SetTo(team_get_current_team_id());
6638 		if (status != B_OK)
6639 			return status;
6640 
6641 		// First round: Check whether the whole range is covered by areas and we
6642 		// are allowed to modify them.
6643 		addr_t currentAddress = address;
6644 		size_t sizeLeft = size;
6645 		while (sizeLeft > 0) {
6646 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6647 			if (area == NULL)
6648 				return B_NO_MEMORY;
6649 
6650 			if ((area->protection & B_KERNEL_AREA) != 0)
6651 				return B_NOT_ALLOWED;
6652 			if (area->protection_max != 0
6653 				&& (protection & area->protection_max) != protection) {
6654 				return B_NOT_ALLOWED;
6655 			}
6656 
6657 			addr_t offset = currentAddress - area->Base();
6658 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6659 
6660 			AreaCacheLocker cacheLocker(area);
6661 
6662 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6663 					&locker, &cacheLocker)) {
6664 				restart = true;
6665 				break;
6666 			}
6667 
6668 			cacheLocker.Unlock();
6669 
6670 			currentAddress += rangeSize;
6671 			sizeLeft -= rangeSize;
6672 		}
6673 	} while (restart);
6674 
6675 	// Second round: If the protections differ from that of the area, create a
6676 	// page protection array and re-map mapped pages.
6677 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6678 	addr_t currentAddress = address;
6679 	size_t sizeLeft = size;
6680 	while (sizeLeft > 0) {
6681 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6682 		if (area == NULL)
6683 			return B_NO_MEMORY;
6684 
6685 		addr_t offset = currentAddress - area->Base();
6686 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6687 
6688 		currentAddress += rangeSize;
6689 		sizeLeft -= rangeSize;
6690 
6691 		if (area->page_protections == NULL) {
6692 			if (area->protection == protection)
6693 				continue;
6694 
6695 			status_t status = allocate_area_page_protections(area);
6696 			if (status != B_OK)
6697 				return status;
6698 		}
6699 
6700 		// We need to lock the complete cache chain, since we potentially unmap
6701 		// pages of lower caches.
6702 		VMCache* topCache = vm_area_get_locked_cache(area);
6703 		VMCacheChainLocker cacheChainLocker(topCache);
6704 		cacheChainLocker.LockAllSourceCaches();
6705 
6706 		for (addr_t pageAddress = area->Base() + offset;
6707 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6708 			map->Lock();
6709 
6710 			set_area_page_protection(area, pageAddress, protection);
6711 
6712 			phys_addr_t physicalAddress;
6713 			uint32 flags;
6714 
6715 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6716 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6717 				map->Unlock();
6718 				continue;
6719 			}
6720 
6721 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6722 			if (page == NULL) {
6723 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6724 					"\n", area, physicalAddress);
6725 				map->Unlock();
6726 				return B_ERROR;
6727 			}
6728 
6729 			// If the page is not in the topmost cache and write access is
6730 			// requested, we have to unmap it. Otherwise we can re-map it with
6731 			// the new protection.
6732 			bool unmapPage = page->Cache() != topCache
6733 				&& (protection & B_WRITE_AREA) != 0;
6734 
6735 			if (!unmapPage)
6736 				map->ProtectPage(area, pageAddress, protection);
6737 
6738 			map->Unlock();
6739 
6740 			if (unmapPage) {
6741 				DEBUG_PAGE_ACCESS_START(page);
6742 				unmap_page(area, pageAddress);
6743 				DEBUG_PAGE_ACCESS_END(page);
6744 			}
6745 		}
6746 	}
6747 
6748 	return B_OK;
6749 }
6750 
6751 
6752 status_t
6753 _user_sync_memory(void* _address, size_t size, uint32 flags)
6754 {
6755 	addr_t address = (addr_t)_address;
6756 	size = PAGE_ALIGN(size);
6757 
6758 	// check params
6759 	if ((address % B_PAGE_SIZE) != 0)
6760 		return B_BAD_VALUE;
6761 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6762 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6763 		// weird error code required by POSIX
6764 		return ENOMEM;
6765 	}
6766 
6767 	bool writeSync = (flags & MS_SYNC) != 0;
6768 	bool writeAsync = (flags & MS_ASYNC) != 0;
6769 	if (writeSync && writeAsync)
6770 		return B_BAD_VALUE;
6771 
6772 	if (size == 0 || (!writeSync && !writeAsync))
6773 		return B_OK;
6774 
6775 	// iterate through the range and sync all concerned areas
6776 	while (size > 0) {
6777 		// read lock the address space
6778 		AddressSpaceReadLocker locker;
6779 		status_t error = locker.SetTo(team_get_current_team_id());
6780 		if (error != B_OK)
6781 			return error;
6782 
6783 		// get the first area
6784 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6785 		if (area == NULL)
6786 			return B_NO_MEMORY;
6787 
6788 		uint32 offset = address - area->Base();
6789 		size_t rangeSize = min_c(area->Size() - offset, size);
6790 		offset += area->cache_offset;
6791 
6792 		// lock the cache
6793 		AreaCacheLocker cacheLocker(area);
6794 		if (!cacheLocker)
6795 			return B_BAD_VALUE;
6796 		VMCache* cache = area->cache;
6797 
6798 		locker.Unlock();
6799 
6800 		uint32 firstPage = offset >> PAGE_SHIFT;
6801 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6802 
6803 		// write the pages
6804 		if (cache->type == CACHE_TYPE_VNODE) {
6805 			if (writeSync) {
6806 				// synchronous
6807 				error = vm_page_write_modified_page_range(cache, firstPage,
6808 					endPage);
6809 				if (error != B_OK)
6810 					return error;
6811 			} else {
6812 				// asynchronous
6813 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6814 				// TODO: This is probably not quite what is supposed to happen.
6815 				// Especially when a lot has to be written, it might take ages
6816 				// until it really hits the disk.
6817 			}
6818 		}
6819 
6820 		address += rangeSize;
6821 		size -= rangeSize;
6822 	}
6823 
6824 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6825 	// synchronize multiple mappings of the same file. In our VM they never get
6826 	// out of sync, though, so we don't have to do anything.
6827 
6828 	return B_OK;
6829 }
6830 
6831 
6832 status_t
6833 _user_memory_advice(void* _address, size_t size, uint32 advice)
6834 {
6835 	addr_t address = (addr_t)_address;
6836 	if ((address % B_PAGE_SIZE) != 0)
6837 		return B_BAD_VALUE;
6838 
6839 	size = PAGE_ALIGN(size);
6840 	if (address + size < address || !IS_USER_ADDRESS(address)
6841 		|| !IS_USER_ADDRESS(address + size)) {
6842 		// weird error code required by POSIX
6843 		return B_NO_MEMORY;
6844 	}
6845 
6846 	switch (advice) {
6847 		case MADV_NORMAL:
6848 		case MADV_SEQUENTIAL:
6849 		case MADV_RANDOM:
6850 		case MADV_WILLNEED:
6851 		case MADV_DONTNEED:
6852 			// TODO: Implement!
6853 			break;
6854 
6855 		case MADV_FREE:
6856 		{
6857 			AddressSpaceWriteLocker locker;
6858 			do {
6859 				status_t status = locker.SetTo(team_get_current_team_id());
6860 				if (status != B_OK)
6861 					return status;
6862 			} while (wait_if_address_range_is_wired(locker.AddressSpace(),
6863 					address, size, &locker));
6864 
6865 			discard_address_range(locker.AddressSpace(), address, size, false);
6866 			break;
6867 		}
6868 
6869 		default:
6870 			return B_BAD_VALUE;
6871 	}
6872 
6873 	return B_OK;
6874 }
6875 
6876 
6877 status_t
6878 _user_get_memory_properties(team_id teamID, const void* address,
6879 	uint32* _protected, uint32* _lock)
6880 {
6881 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6882 		return B_BAD_ADDRESS;
6883 
6884 	AddressSpaceReadLocker locker;
6885 	status_t error = locker.SetTo(teamID);
6886 	if (error != B_OK)
6887 		return error;
6888 
6889 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6890 	if (area == NULL)
6891 		return B_NO_MEMORY;
6892 
6893 
6894 	uint32 protection = area->protection;
6895 	if (area->page_protections != NULL)
6896 		protection = get_area_page_protection(area, (addr_t)address);
6897 
6898 	uint32 wiring = area->wiring;
6899 
6900 	locker.Unlock();
6901 
6902 	error = user_memcpy(_protected, &protection, sizeof(protection));
6903 	if (error != B_OK)
6904 		return error;
6905 
6906 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6907 
6908 	return error;
6909 }
6910 
6911 
6912 // An ordered list of non-overlapping ranges to track mlock/munlock locking.
6913 // It is allowed to call mlock/munlock in unbalanced ways (lock a range
6914 // multiple times, unlock a part of it, lock several consecutive ranges and
6915 // unlock them in one go, etc). However the low level lock_memory and
6916 // unlock_memory calls require the locks/unlocks to be balanced (you lock a
6917 // fixed range, and then unlock exactly the same range). This list allows to
6918 // keep track of what was locked exactly so we can unlock the correct things.
6919 struct LockedPages : DoublyLinkedListLinkImpl<LockedPages> {
6920 	addr_t start;
6921 	addr_t end;
6922 
6923 	status_t LockMemory()
6924 	{
6925 		return lock_memory((void*)start, end - start, 0);
6926 	}
6927 
6928 	status_t UnlockMemory()
6929 	{
6930 		return unlock_memory((void*)start, end - start, 0);
6931 	}
6932 
6933 	status_t Move(addr_t start, addr_t end)
6934 	{
6935 		status_t result = lock_memory((void*)start, end - start, 0);
6936 		if (result != B_OK)
6937 			return result;
6938 
6939 		result = UnlockMemory();
6940 
6941 		if (result != B_OK) {
6942 			// What can we do if the unlock fails?
6943 			panic("Failed to unlock memory: %s", strerror(result));
6944 			return result;
6945 		}
6946 
6947 		this->start = start;
6948 		this->end = end;
6949 
6950 		return B_OK;
6951 	}
6952 };
6953 
6954 
6955 status_t
6956 _user_mlock(const void* address, size_t size) {
6957 	// Maybe there's nothing to do, in which case, do nothing
6958 	if (size == 0)
6959 		return B_OK;
6960 
6961 	// Make sure the address is multiple of B_PAGE_SIZE (POSIX allows us to
6962 	// reject the call otherwise)
6963 	if ((addr_t)address % B_PAGE_SIZE != 0)
6964 		return EINVAL;
6965 
6966 	size = ROUNDUP(size, B_PAGE_SIZE);
6967 
6968 	addr_t endAddress = (addr_t)address + size;
6969 
6970 	// Pre-allocate a linked list element we may need (it's simpler to do it
6971 	// now than run out of memory in the midle of changing things)
6972 	LockedPages* newRange = new(std::nothrow) LockedPages();
6973 	if (newRange == NULL)
6974 		return ENOMEM;
6975 
6976 	// Get and lock the team
6977 	Team* team = thread_get_current_thread()->team;
6978 	TeamLocker teamLocker(team);
6979 	teamLocker.Lock();
6980 
6981 	status_t error = B_OK;
6982 	LockedPagesList* lockedPages = &team->locked_pages_list;
6983 
6984 	// Locate the first locked range possibly overlapping ours
6985 	LockedPages* currentRange = lockedPages->Head();
6986 	while (currentRange != NULL && currentRange->end <= (addr_t)address)
6987 		currentRange = lockedPages->GetNext(currentRange);
6988 
6989 	if (currentRange == NULL || currentRange->start >= endAddress) {
6990 		// No existing range is overlapping with ours. We can just lock our
6991 		// range and stop here.
6992 		newRange->start = (addr_t)address;
6993 		newRange->end = endAddress;
6994 		error = newRange->LockMemory();
6995 		if (error != B_OK) {
6996 			delete newRange;
6997 			return error;
6998 		}
6999 		lockedPages->InsertBefore(currentRange, newRange);
7000 		return B_OK;
7001 	}
7002 
7003 	// We get here when there is at least one existing overlapping range.
7004 
7005 	if (currentRange->start <= (addr_t)address) {
7006 		if (currentRange->end >= endAddress) {
7007 			// An existing range is already fully covering the pages we need to
7008 			// lock. Nothing to do then.
7009 			delete newRange;
7010 			return B_OK;
7011 		} else {
7012 			// An existing range covers the start of the area we want to lock.
7013 			// Advance our start address to avoid it.
7014 			address = (void*)currentRange->end;
7015 
7016 			// Move on to the next range for the next step
7017 			currentRange = lockedPages->GetNext(currentRange);
7018 		}
7019 	}
7020 
7021 	// First, lock the new range
7022 	newRange->start = (addr_t)address;
7023 	newRange->end = endAddress;
7024 	error = newRange->LockMemory();
7025 	if (error != B_OK) {
7026 		delete newRange;
7027 		return error;
7028 	}
7029 
7030 	// Unlock all ranges fully overlapping with the area we need to lock
7031 	while (currentRange != NULL && currentRange->end < endAddress) {
7032 		// The existing range is fully contained inside the new one we're
7033 		// trying to lock. Delete/unlock it, and replace it with a new one
7034 		// (this limits fragmentation of the range list, and is simpler to
7035 		// manage)
7036 		error = currentRange->UnlockMemory();
7037 		if (error != B_OK) {
7038 			panic("Failed to unlock a memory range: %s", strerror(error));
7039 			newRange->UnlockMemory();
7040 			delete newRange;
7041 			return error;
7042 		}
7043 		LockedPages* temp = currentRange;
7044 		currentRange = lockedPages->GetNext(currentRange);
7045 		lockedPages->Remove(temp);
7046 		delete temp;
7047 	}
7048 
7049 	if (currentRange != NULL) {
7050 		// One last range may cover the end of the area we're trying to lock
7051 
7052 		if (currentRange->start == (addr_t)address) {
7053 			// In case two overlapping ranges (one at the start and the other
7054 			// at the end) already cover the area we're after, there's nothing
7055 			// more to do. So we destroy our new extra allocation
7056 			error = newRange->UnlockMemory();
7057 			delete newRange;
7058 			return error;
7059 		}
7060 
7061 		if (currentRange->start < endAddress) {
7062 			// Make sure the last range is not overlapping, by moving its start
7063 			error = currentRange->Move(endAddress, currentRange->end);
7064 			if (error != B_OK) {
7065 				panic("Failed to move a memory range: %s", strerror(error));
7066 				newRange->UnlockMemory();
7067 				delete newRange;
7068 				return error;
7069 			}
7070 		}
7071 	}
7072 
7073 	// Finally, store the new range in the locked list
7074 	lockedPages->InsertBefore(currentRange, newRange);
7075 	return B_OK;
7076 }
7077 
7078 
7079 status_t
7080 _user_munlock(const void* address, size_t size) {
7081 	// Maybe there's nothing to do, in which case, do nothing
7082 	if (size == 0)
7083 		return B_OK;
7084 
7085 	// Make sure the address is multiple of B_PAGE_SIZE (POSIX allows us to
7086 	// reject the call otherwise)
7087 	if ((addr_t)address % B_PAGE_SIZE != 0)
7088 		return EINVAL;
7089 
7090 	// Round size up to the next page
7091 	size = ROUNDUP(size, B_PAGE_SIZE);
7092 
7093 	addr_t endAddress = (addr_t)address + size;
7094 
7095 	// Get and lock the team
7096 	Team* team = thread_get_current_thread()->team;
7097 	TeamLocker teamLocker(team);
7098 	teamLocker.Lock();
7099 	LockedPagesList* lockedPages = &team->locked_pages_list;
7100 
7101 	status_t error = B_OK;
7102 
7103 	// Locate the first locked range possibly overlapping ours
7104 	LockedPages* currentRange = lockedPages->Head();
7105 	while (currentRange != NULL && currentRange->end <= (addr_t)address)
7106 		currentRange = lockedPages->GetNext(currentRange);
7107 
7108 	if (currentRange == NULL || currentRange->start >= endAddress) {
7109 		// No range is intersecting, nothing to unlock
7110 		return B_OK;
7111 	}
7112 
7113 	if (currentRange->start < (addr_t)address) {
7114 		if (currentRange->end > endAddress) {
7115 			// There is a range fully covering the area we want to unlock,
7116 			// and it extends on both sides. We need to split it in two
7117 			LockedPages* newRange = new(std::nothrow) LockedPages();
7118 			if (newRange == NULL)
7119 				return ENOMEM;
7120 
7121 			newRange->start = endAddress;
7122 			newRange->end = currentRange->end;
7123 
7124 			error = newRange->LockMemory();
7125 			if (error != B_OK) {
7126 				delete newRange;
7127 				return error;
7128 			}
7129 
7130 			error = currentRange->Move(currentRange->start, (addr_t)address);
7131 			if (error != B_OK) {
7132 				delete newRange;
7133 				return error;
7134 			}
7135 
7136 			lockedPages->InsertAfter(currentRange, newRange);
7137 			return B_OK;
7138 		} else {
7139 			// There is a range that overlaps and extends before the one we
7140 			// want to unlock, we need to shrink it
7141 			error = currentRange->Move(currentRange->start, (addr_t)address);
7142 			if (error != B_OK)
7143 				return error;
7144 		}
7145 	}
7146 
7147 	while (currentRange != NULL && currentRange->end <= endAddress) {
7148 		// Unlock all fully overlapping ranges
7149 		error = currentRange->UnlockMemory();
7150 		if (error != B_OK)
7151 			return error;
7152 		LockedPages* temp = currentRange;
7153 		currentRange = lockedPages->GetNext(currentRange);
7154 		lockedPages->Remove(temp);
7155 		delete temp;
7156 	}
7157 
7158 	// Finally split the last partially overlapping range if any
7159 	if (currentRange != NULL && currentRange->start < endAddress) {
7160 		error = currentRange->Move(endAddress, currentRange->end);
7161 		if (error != B_OK)
7162 			return error;
7163 	}
7164 
7165 	return B_OK;
7166 }
7167 
7168 
7169 // #pragma mark -- compatibility
7170 
7171 
7172 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7173 
7174 
7175 struct physical_entry_beos {
7176 	uint32	address;
7177 	uint32	size;
7178 };
7179 
7180 
7181 /*!	The physical_entry structure has changed. We need to translate it to the
7182 	old one.
7183 */
7184 extern "C" int32
7185 __get_memory_map_beos(const void* _address, size_t numBytes,
7186 	physical_entry_beos* table, int32 numEntries)
7187 {
7188 	if (numEntries <= 0)
7189 		return B_BAD_VALUE;
7190 
7191 	const uint8* address = (const uint8*)_address;
7192 
7193 	int32 count = 0;
7194 	while (numBytes > 0 && count < numEntries) {
7195 		physical_entry entry;
7196 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
7197 		if (result < 0) {
7198 			if (result != B_BUFFER_OVERFLOW)
7199 				return result;
7200 		}
7201 
7202 		if (entry.address >= (phys_addr_t)1 << 32) {
7203 			panic("get_memory_map(): Address is greater 4 GB!");
7204 			return B_ERROR;
7205 		}
7206 
7207 		table[count].address = entry.address;
7208 		table[count++].size = entry.size;
7209 
7210 		address += entry.size;
7211 		numBytes -= entry.size;
7212 	}
7213 
7214 	// null-terminate the table, if possible
7215 	if (count < numEntries) {
7216 		table[count].address = 0;
7217 		table[count].size = 0;
7218 	}
7219 
7220 	return B_OK;
7221 }
7222 
7223 
7224 /*!	The type of the \a physicalAddress parameter has changed from void* to
7225 	phys_addr_t.
7226 */
7227 extern "C" area_id
7228 __map_physical_memory_beos(const char* name, void* physicalAddress,
7229 	size_t numBytes, uint32 addressSpec, uint32 protection,
7230 	void** _virtualAddress)
7231 {
7232 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
7233 		addressSpec, protection, _virtualAddress);
7234 }
7235 
7236 
7237 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
7238 	we meddle with the \a lock parameter to force 32 bit.
7239 */
7240 extern "C" area_id
7241 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
7242 	size_t size, uint32 lock, uint32 protection)
7243 {
7244 	switch (lock) {
7245 		case B_NO_LOCK:
7246 			break;
7247 		case B_FULL_LOCK:
7248 		case B_LAZY_LOCK:
7249 			lock = B_32_BIT_FULL_LOCK;
7250 			break;
7251 		case B_CONTIGUOUS:
7252 			lock = B_32_BIT_CONTIGUOUS;
7253 			break;
7254 	}
7255 
7256 	return __create_area_haiku(name, _address, addressSpec, size, lock,
7257 		protection);
7258 }
7259 
7260 
7261 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
7262 	"BASE");
7263 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
7264 	"map_physical_memory@", "BASE");
7265 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
7266 	"BASE");
7267 
7268 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7269 	"get_memory_map@@", "1_ALPHA3");
7270 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7271 	"map_physical_memory@@", "1_ALPHA3");
7272 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7273 	"1_ALPHA3");
7274 
7275 
7276 #else
7277 
7278 
7279 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7280 	"get_memory_map@@", "BASE");
7281 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7282 	"map_physical_memory@@", "BASE");
7283 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7284 	"BASE");
7285 
7286 
7287 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7288