xref: /haiku/src/system/kernel/vm/vm.cpp (revision 4a32f48e70297d9a634646f01e08c2f451ecd6bd)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleterDrivers.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "VMAnonymousNoSwapCache.h"
59 #include "IORequest.h"
60 
61 
62 //#define TRACE_VM
63 //#define TRACE_FAULTS
64 #ifdef TRACE_VM
65 #	define TRACE(x) dprintf x
66 #else
67 #	define TRACE(x) ;
68 #endif
69 #ifdef TRACE_FAULTS
70 #	define FTRACE(x) dprintf x
71 #else
72 #	define FTRACE(x) ;
73 #endif
74 
75 
76 namespace {
77 
78 class AreaCacheLocking {
79 public:
80 	inline bool Lock(VMCache* lockable)
81 	{
82 		return false;
83 	}
84 
85 	inline void Unlock(VMCache* lockable)
86 	{
87 		vm_area_put_locked_cache(lockable);
88 	}
89 };
90 
91 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
92 public:
93 	inline AreaCacheLocker(VMCache* cache = NULL)
94 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
95 	{
96 	}
97 
98 	inline AreaCacheLocker(VMArea* area)
99 		: AutoLocker<VMCache, AreaCacheLocking>()
100 	{
101 		SetTo(area);
102 	}
103 
104 	inline void SetTo(VMCache* cache, bool alreadyLocked)
105 	{
106 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
107 	}
108 
109 	inline void SetTo(VMArea* area)
110 	{
111 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
112 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
113 	}
114 };
115 
116 
117 class VMCacheChainLocker {
118 public:
119 	VMCacheChainLocker()
120 		:
121 		fTopCache(NULL),
122 		fBottomCache(NULL)
123 	{
124 	}
125 
126 	VMCacheChainLocker(VMCache* topCache)
127 		:
128 		fTopCache(topCache),
129 		fBottomCache(topCache)
130 	{
131 	}
132 
133 	~VMCacheChainLocker()
134 	{
135 		Unlock();
136 	}
137 
138 	void SetTo(VMCache* topCache)
139 	{
140 		fTopCache = topCache;
141 		fBottomCache = topCache;
142 
143 		if (topCache != NULL)
144 			topCache->SetUserData(NULL);
145 	}
146 
147 	VMCache* LockSourceCache()
148 	{
149 		if (fBottomCache == NULL || fBottomCache->source == NULL)
150 			return NULL;
151 
152 		VMCache* previousCache = fBottomCache;
153 
154 		fBottomCache = fBottomCache->source;
155 		fBottomCache->Lock();
156 		fBottomCache->AcquireRefLocked();
157 		fBottomCache->SetUserData(previousCache);
158 
159 		return fBottomCache;
160 	}
161 
162 	void LockAllSourceCaches()
163 	{
164 		while (LockSourceCache() != NULL) {
165 		}
166 	}
167 
168 	void Unlock(VMCache* exceptCache = NULL)
169 	{
170 		if (fTopCache == NULL)
171 			return;
172 
173 		// Unlock caches in source -> consumer direction. This is important to
174 		// avoid double-locking and a reversal of locking order in case a cache
175 		// is eligable for merging.
176 		VMCache* cache = fBottomCache;
177 		while (cache != NULL) {
178 			VMCache* nextCache = (VMCache*)cache->UserData();
179 			if (cache != exceptCache)
180 				cache->ReleaseRefAndUnlock(cache != fTopCache);
181 
182 			if (cache == fTopCache)
183 				break;
184 
185 			cache = nextCache;
186 		}
187 
188 		fTopCache = NULL;
189 		fBottomCache = NULL;
190 	}
191 
192 	void UnlockKeepRefs(bool keepTopCacheLocked)
193 	{
194 		if (fTopCache == NULL)
195 			return;
196 
197 		VMCache* nextCache = fBottomCache;
198 		VMCache* cache = NULL;
199 
200 		while (keepTopCacheLocked
201 				? nextCache != fTopCache : cache != fTopCache) {
202 			cache = nextCache;
203 			nextCache = (VMCache*)cache->UserData();
204 			cache->Unlock(cache != fTopCache);
205 		}
206 	}
207 
208 	void RelockCaches(bool topCacheLocked)
209 	{
210 		if (fTopCache == NULL)
211 			return;
212 
213 		VMCache* nextCache = fTopCache;
214 		VMCache* cache = NULL;
215 		if (topCacheLocked) {
216 			cache = nextCache;
217 			nextCache = cache->source;
218 		}
219 
220 		while (cache != fBottomCache && nextCache != NULL) {
221 			VMCache* consumer = cache;
222 			cache = nextCache;
223 			nextCache = cache->source;
224 			cache->Lock();
225 			cache->SetUserData(consumer);
226 		}
227 	}
228 
229 private:
230 	VMCache*	fTopCache;
231 	VMCache*	fBottomCache;
232 };
233 
234 } // namespace
235 
236 
237 // The memory reserve an allocation of the certain priority must not touch.
238 static const size_t kMemoryReserveForPriority[] = {
239 	VM_MEMORY_RESERVE_USER,		// user
240 	VM_MEMORY_RESERVE_SYSTEM,	// system
241 	0							// VIP
242 };
243 
244 
245 ObjectCache* gPageMappingsObjectCache;
246 
247 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
248 
249 static off_t sAvailableMemory;
250 static off_t sNeededMemory;
251 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
252 static uint32 sPageFaults;
253 
254 static VMPhysicalPageMapper* sPhysicalPageMapper;
255 
256 #if DEBUG_CACHE_LIST
257 
258 struct cache_info {
259 	VMCache*	cache;
260 	addr_t		page_count;
261 	addr_t		committed;
262 };
263 
264 static const int kCacheInfoTableCount = 100 * 1024;
265 static cache_info* sCacheInfoTable;
266 
267 #endif	// DEBUG_CACHE_LIST
268 
269 
270 // function declarations
271 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
272 	bool addressSpaceCleanup);
273 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
274 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
275 static status_t map_backing_store(VMAddressSpace* addressSpace,
276 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
277 	int protection, int protectionMax, int mapping, uint32 flags,
278 	const virtual_address_restrictions* addressRestrictions, bool kernel,
279 	VMArea** _area, void** _virtualAddress);
280 static void fix_protection(uint32* protection);
281 
282 
283 //	#pragma mark -
284 
285 
286 #if VM_PAGE_FAULT_TRACING
287 
288 namespace VMPageFaultTracing {
289 
290 class PageFaultStart : public AbstractTraceEntry {
291 public:
292 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
293 		:
294 		fAddress(address),
295 		fPC(pc),
296 		fWrite(write),
297 		fUser(user)
298 	{
299 		Initialized();
300 	}
301 
302 	virtual void AddDump(TraceOutput& out)
303 	{
304 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
305 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
306 	}
307 
308 private:
309 	addr_t	fAddress;
310 	addr_t	fPC;
311 	bool	fWrite;
312 	bool	fUser;
313 };
314 
315 
316 // page fault errors
317 enum {
318 	PAGE_FAULT_ERROR_NO_AREA		= 0,
319 	PAGE_FAULT_ERROR_KERNEL_ONLY,
320 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
321 	PAGE_FAULT_ERROR_READ_PROTECTED,
322 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
323 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
324 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
325 };
326 
327 
328 class PageFaultError : public AbstractTraceEntry {
329 public:
330 	PageFaultError(area_id area, status_t error)
331 		:
332 		fArea(area),
333 		fError(error)
334 	{
335 		Initialized();
336 	}
337 
338 	virtual void AddDump(TraceOutput& out)
339 	{
340 		switch (fError) {
341 			case PAGE_FAULT_ERROR_NO_AREA:
342 				out.Print("page fault error: no area");
343 				break;
344 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
345 				out.Print("page fault error: area: %ld, kernel only", fArea);
346 				break;
347 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
348 				out.Print("page fault error: area: %ld, write protected",
349 					fArea);
350 				break;
351 			case PAGE_FAULT_ERROR_READ_PROTECTED:
352 				out.Print("page fault error: area: %ld, read protected", fArea);
353 				break;
354 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
355 				out.Print("page fault error: area: %ld, execute protected",
356 					fArea);
357 				break;
358 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
359 				out.Print("page fault error: kernel touching bad user memory");
360 				break;
361 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
362 				out.Print("page fault error: no address space");
363 				break;
364 			default:
365 				out.Print("page fault error: area: %ld, error: %s", fArea,
366 					strerror(fError));
367 				break;
368 		}
369 	}
370 
371 private:
372 	area_id		fArea;
373 	status_t	fError;
374 };
375 
376 
377 class PageFaultDone : public AbstractTraceEntry {
378 public:
379 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
380 			vm_page* page)
381 		:
382 		fArea(area),
383 		fTopCache(topCache),
384 		fCache(cache),
385 		fPage(page)
386 	{
387 		Initialized();
388 	}
389 
390 	virtual void AddDump(TraceOutput& out)
391 	{
392 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
393 			"page: %p", fArea, fTopCache, fCache, fPage);
394 	}
395 
396 private:
397 	area_id		fArea;
398 	VMCache*	fTopCache;
399 	VMCache*	fCache;
400 	vm_page*	fPage;
401 };
402 
403 }	// namespace VMPageFaultTracing
404 
405 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
406 #else
407 #	define TPF(x) ;
408 #endif	// VM_PAGE_FAULT_TRACING
409 
410 
411 //	#pragma mark -
412 
413 
414 /*!	The page's cache must be locked.
415 */
416 static inline void
417 increment_page_wired_count(vm_page* page)
418 {
419 	if (!page->IsMapped())
420 		atomic_add(&gMappedPagesCount, 1);
421 	page->IncrementWiredCount();
422 }
423 
424 
425 /*!	The page's cache must be locked.
426 */
427 static inline void
428 decrement_page_wired_count(vm_page* page)
429 {
430 	page->DecrementWiredCount();
431 	if (!page->IsMapped())
432 		atomic_add(&gMappedPagesCount, -1);
433 }
434 
435 
436 static inline addr_t
437 virtual_page_address(VMArea* area, vm_page* page)
438 {
439 	return area->Base()
440 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
441 }
442 
443 
444 //! You need to have the address space locked when calling this function
445 static VMArea*
446 lookup_area(VMAddressSpace* addressSpace, area_id id)
447 {
448 	VMAreaHash::ReadLock();
449 
450 	VMArea* area = VMAreaHash::LookupLocked(id);
451 	if (area != NULL && area->address_space != addressSpace)
452 		area = NULL;
453 
454 	VMAreaHash::ReadUnlock();
455 
456 	return area;
457 }
458 
459 
460 static status_t
461 allocate_area_page_protections(VMArea* area)
462 {
463 	// In the page protections we store only the three user protections,
464 	// so we use 4 bits per page.
465 	size_t bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
466 	area->page_protections = (uint8*)malloc_etc(bytes,
467 		HEAP_DONT_LOCK_KERNEL_SPACE);
468 	if (area->page_protections == NULL)
469 		return B_NO_MEMORY;
470 
471 	// init the page protections for all pages to that of the area
472 	uint32 areaProtection = area->protection
473 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
474 	memset(area->page_protections, areaProtection | (areaProtection << 4),
475 		bytes);
476 	return B_OK;
477 }
478 
479 
480 static inline void
481 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
482 {
483 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
484 	addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
485 	uint8& entry = area->page_protections[pageIndex / 2];
486 	if (pageIndex % 2 == 0)
487 		entry = (entry & 0xf0) | protection;
488 	else
489 		entry = (entry & 0x0f) | (protection << 4);
490 }
491 
492 
493 static inline uint32
494 get_area_page_protection(VMArea* area, addr_t pageAddress)
495 {
496 	if (area->page_protections == NULL)
497 		return area->protection;
498 
499 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
500 	uint32 protection = area->page_protections[pageIndex / 2];
501 	if (pageIndex % 2 == 0)
502 		protection &= 0x0f;
503 	else
504 		protection >>= 4;
505 
506 	// If this is a kernel area we translate the user flags to kernel flags.
507 	if (area->address_space == VMAddressSpace::Kernel()) {
508 		uint32 kernelProtection = 0;
509 		if ((protection & B_READ_AREA) != 0)
510 			kernelProtection |= B_KERNEL_READ_AREA;
511 		if ((protection & B_WRITE_AREA) != 0)
512 			kernelProtection |= B_KERNEL_WRITE_AREA;
513 
514 		return kernelProtection;
515 	}
516 
517 	return protection | B_KERNEL_READ_AREA
518 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
519 }
520 
521 
522 /*!	The caller must have reserved enough pages the translation map
523 	implementation might need to map this page.
524 	The page's cache must be locked.
525 */
526 static status_t
527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528 	vm_page_reservation* reservation)
529 {
530 	VMTranslationMap* map = area->address_space->TranslationMap();
531 
532 	bool wasMapped = page->IsMapped();
533 
534 	if (area->wiring == B_NO_LOCK) {
535 		DEBUG_PAGE_ACCESS_CHECK(page);
536 
537 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539 			gPageMappingsObjectCache,
540 			CACHE_DONT_WAIT_FOR_MEMORY
541 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542 		if (mapping == NULL)
543 			return B_NO_MEMORY;
544 
545 		mapping->page = page;
546 		mapping->area = area;
547 
548 		map->Lock();
549 
550 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551 			area->MemoryType(), reservation);
552 
553 		// insert mapping into lists
554 		if (!page->IsMapped())
555 			atomic_add(&gMappedPagesCount, 1);
556 
557 		page->mappings.Add(mapping);
558 		area->mappings.Add(mapping);
559 
560 		map->Unlock();
561 	} else {
562 		DEBUG_PAGE_ACCESS_CHECK(page);
563 
564 		map->Lock();
565 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566 			area->MemoryType(), reservation);
567 		map->Unlock();
568 
569 		increment_page_wired_count(page);
570 	}
571 
572 	if (!wasMapped) {
573 		// The page is mapped now, so we must not remain in the cached queue.
574 		// It also makes sense to move it from the inactive to the active, since
575 		// otherwise the page daemon wouldn't come to keep track of it (in idle
576 		// mode) -- if the page isn't touched, it will be deactivated after a
577 		// full iteration through the queue at the latest.
578 		if (page->State() == PAGE_STATE_CACHED
579 				|| page->State() == PAGE_STATE_INACTIVE) {
580 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
581 		}
582 	}
583 
584 	return B_OK;
585 }
586 
587 
588 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
589 	page's cache.
590 */
591 static inline bool
592 unmap_page(VMArea* area, addr_t virtualAddress)
593 {
594 	return area->address_space->TranslationMap()->UnmapPage(area,
595 		virtualAddress, true);
596 }
597 
598 
599 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
600 	mapped pages' caches.
601 */
602 static inline void
603 unmap_pages(VMArea* area, addr_t base, size_t size)
604 {
605 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
606 }
607 
608 
609 static inline bool
610 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
611 {
612 	if (address < area->Base()) {
613 		offset = area->Base() - address;
614 		if (offset >= size)
615 			return false;
616 
617 		address = area->Base();
618 		size -= offset;
619 		offset = 0;
620 		if (size > area->Size())
621 			size = area->Size();
622 
623 		return true;
624 	}
625 
626 	offset = address - area->Base();
627 	if (offset >= area->Size())
628 		return false;
629 
630 	if (size >= area->Size() - offset)
631 		size = area->Size() - offset;
632 
633 	return true;
634 }
635 
636 
637 /*!	Cuts a piece out of an area. If the given cut range covers the complete
638 	area, it is deleted. If it covers the beginning or the end, the area is
639 	resized accordingly. If the range covers some part in the middle of the
640 	area, it is split in two; in this case the second area is returned via
641 	\a _secondArea (the variable is left untouched in the other cases).
642 	The address space must be write locked.
643 	The caller must ensure that no part of the given range is wired.
644 */
645 static status_t
646 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
647 	addr_t size, VMArea** _secondArea, bool kernel)
648 {
649 	addr_t offset;
650 	if (!intersect_area(area, address, size, offset))
651 		return B_OK;
652 
653 	// Is the area fully covered?
654 	if (address == area->Base() && size == area->Size()) {
655 		delete_area(addressSpace, area, false);
656 		return B_OK;
657 	}
658 
659 	int priority;
660 	uint32 allocationFlags;
661 	if (addressSpace == VMAddressSpace::Kernel()) {
662 		priority = VM_PRIORITY_SYSTEM;
663 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
664 			| HEAP_DONT_LOCK_KERNEL_SPACE;
665 	} else {
666 		priority = VM_PRIORITY_USER;
667 		allocationFlags = 0;
668 	}
669 
670 	VMCache* cache = vm_area_get_locked_cache(area);
671 	VMCacheChainLocker cacheChainLocker(cache);
672 	cacheChainLocker.LockAllSourceCaches();
673 
674 	// If no one else uses the area's cache and it's an anonymous cache, we can
675 	// resize or split it, too.
676 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
677 		&& cache->consumers.IsEmpty() && cache->type == CACHE_TYPE_RAM;
678 
679 	// Cut the end only?
680 	if (offset > 0 && size == area->Size() - offset) {
681 		status_t error = addressSpace->ShrinkAreaTail(area, offset,
682 			allocationFlags);
683 		if (error != B_OK)
684 			return error;
685 
686 		// unmap pages
687 		unmap_pages(area, address, size);
688 
689 		if (onlyCacheUser) {
690 			// Since VMCache::Resize() can temporarily drop the lock, we must
691 			// unlock all lower caches to prevent locking order inversion.
692 			cacheChainLocker.Unlock(cache);
693 			cache->Resize(cache->virtual_base + offset, priority);
694 			cache->ReleaseRefAndUnlock();
695 		}
696 
697 		return B_OK;
698 	}
699 
700 	// Cut the beginning only?
701 	if (area->Base() == address) {
702 		// resize the area
703 		status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
704 			allocationFlags);
705 		if (error != B_OK)
706 			return error;
707 
708 		// unmap pages
709 		unmap_pages(area, address, size);
710 
711 		if (onlyCacheUser) {
712 			// Since VMCache::Rebase() can temporarily drop the lock, we must
713 			// unlock all lower caches to prevent locking order inversion.
714 			cacheChainLocker.Unlock(cache);
715 			cache->Rebase(cache->virtual_base + size, priority);
716 			cache->ReleaseRefAndUnlock();
717 		}
718 		area->cache_offset += size;
719 
720 		return B_OK;
721 	}
722 
723 	// The tough part -- cut a piece out of the middle of the area.
724 	// We do that by shrinking the area to the begin section and creating a
725 	// new area for the end section.
726 	addr_t firstNewSize = offset;
727 	addr_t secondBase = address + size;
728 	addr_t secondSize = area->Size() - offset - size;
729 
730 	// unmap pages
731 	unmap_pages(area, address, area->Size() - firstNewSize);
732 
733 	// resize the area
734 	addr_t oldSize = area->Size();
735 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
736 		allocationFlags);
737 	if (error != B_OK)
738 		return error;
739 
740 	virtual_address_restrictions addressRestrictions = {};
741 	addressRestrictions.address = (void*)secondBase;
742 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
743 	VMArea* secondArea;
744 
745 	if (onlyCacheUser) {
746 		// Create a new cache for the second area.
747 		VMCache* secondCache;
748 		error = VMCacheFactory::CreateAnonymousCache(secondCache, false, 0, 0,
749 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
750 		if (error != B_OK) {
751 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
752 			return error;
753 		}
754 
755 		secondCache->Lock();
756 		secondCache->temporary = cache->temporary;
757 		secondCache->virtual_base = area->cache_offset;
758 		secondCache->virtual_end = area->cache_offset + secondSize;
759 
760 		// Transfer the concerned pages from the first cache.
761 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
762 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
763 			area->cache_offset);
764 
765 		if (error == B_OK) {
766 			// Since VMCache::Resize() can temporarily drop the lock, we must
767 			// unlock all lower caches to prevent locking order inversion.
768 			cacheChainLocker.Unlock(cache);
769 			cache->Resize(cache->virtual_base + firstNewSize, priority);
770 			// Don't unlock the cache yet because we might have to resize it
771 			// back.
772 
773 			// Map the second area.
774 			error = map_backing_store(addressSpace, secondCache,
775 				area->cache_offset, area->name, secondSize, area->wiring,
776 				area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0,
777 				&addressRestrictions, kernel, &secondArea, NULL);
778 		}
779 
780 		if (error != B_OK) {
781 			// Restore the original cache.
782 			cache->Resize(cache->virtual_base + oldSize, priority);
783 
784 			// Move the pages back.
785 			status_t readoptStatus = cache->Adopt(secondCache,
786 				area->cache_offset, secondSize, adoptOffset);
787 			if (readoptStatus != B_OK) {
788 				// Some (swap) pages have not been moved back and will be lost
789 				// once the second cache is deleted.
790 				panic("failed to restore cache range: %s",
791 					strerror(readoptStatus));
792 
793 				// TODO: Handle out of memory cases by freeing memory and
794 				// retrying.
795 			}
796 
797 			cache->ReleaseRefAndUnlock();
798 			secondCache->ReleaseRefAndUnlock();
799 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
800 			return error;
801 		}
802 
803 		// Now we can unlock it.
804 		cache->ReleaseRefAndUnlock();
805 		secondCache->Unlock();
806 	} else {
807 		error = map_backing_store(addressSpace, cache, area->cache_offset
808 			+ (secondBase - area->Base()),
809 			area->name, secondSize, area->wiring, area->protection,
810 			area->protection_max, REGION_NO_PRIVATE_MAP, 0,
811 			&addressRestrictions, kernel, &secondArea, NULL);
812 		if (error != B_OK) {
813 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
814 			return error;
815 		}
816 		// We need a cache reference for the new area.
817 		cache->AcquireRefLocked();
818 	}
819 
820 	if (_secondArea != NULL)
821 		*_secondArea = secondArea;
822 
823 	return B_OK;
824 }
825 
826 
827 /*!	Deletes or cuts all areas in the given address range.
828 	The address space must be write-locked.
829 	The caller must ensure that no part of the given range is wired.
830 */
831 static status_t
832 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
833 	bool kernel)
834 {
835 	size = PAGE_ALIGN(size);
836 
837 	// Check, whether the caller is allowed to modify the concerned areas.
838 	if (!kernel) {
839 		for (VMAddressSpace::AreaRangeIterator it
840 				= addressSpace->GetAreaRangeIterator(address, size);
841 			VMArea* area = it.Next();) {
842 
843 			if ((area->protection & B_KERNEL_AREA) != 0) {
844 				dprintf("unmap_address_range: team %" B_PRId32 " tried to "
845 					"unmap range of kernel area %" B_PRId32 " (%s)\n",
846 					team_get_current_team_id(), area->id, area->name);
847 				return B_NOT_ALLOWED;
848 			}
849 		}
850 	}
851 
852 	for (VMAddressSpace::AreaRangeIterator it
853 			= addressSpace->GetAreaRangeIterator(address, size);
854 		VMArea* area = it.Next();) {
855 
856 		status_t error = cut_area(addressSpace, area, address, size, NULL,
857 			kernel);
858 		if (error != B_OK)
859 			return error;
860 			// Failing after already messing with areas is ugly, but we
861 			// can't do anything about it.
862 	}
863 
864 	return B_OK;
865 }
866 
867 
868 static status_t
869 discard_area_range(VMArea* area, addr_t address, addr_t size)
870 {
871 	addr_t offset;
872 	if (!intersect_area(area, address, size, offset))
873 		return B_OK;
874 
875 	// If someone else uses the area's cache or it's not an anonymous cache, we
876 	// can't discard.
877 	VMCache* cache = vm_area_get_locked_cache(area);
878 	if (cache->areas != area || area->cache_next != NULL
879 		|| !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) {
880 		return B_OK;
881 	}
882 
883 	VMCacheChainLocker cacheChainLocker(cache);
884 	cacheChainLocker.LockAllSourceCaches();
885 
886 	unmap_pages(area, address, size);
887 
888 	// Since VMCache::Discard() can temporarily drop the lock, we must
889 	// unlock all lower caches to prevent locking order inversion.
890 	cacheChainLocker.Unlock(cache);
891 	cache->Discard(cache->virtual_base + offset, size);
892 	cache->ReleaseRefAndUnlock();
893 
894 	return B_OK;
895 }
896 
897 
898 static status_t
899 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
900 	bool kernel)
901 {
902 	for (VMAddressSpace::AreaRangeIterator it
903 		= addressSpace->GetAreaRangeIterator(address, size);
904 			VMArea* area = it.Next();) {
905 		status_t error = discard_area_range(area, address, size);
906 		if (error != B_OK)
907 			return error;
908 	}
909 
910 	return B_OK;
911 }
912 
913 
914 /*! You need to hold the lock of the cache and the write lock of the address
915 	space when calling this function.
916 	Note, that in case of error your cache will be temporarily unlocked.
917 	If \a addressSpec is \c B_EXACT_ADDRESS and the
918 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
919 	that no part of the specified address range (base \c *_virtualAddress, size
920 	\a size) is wired.
921 */
922 static status_t
923 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
924 	const char* areaName, addr_t size, int wiring, int protection,
925 	int protectionMax, int mapping,
926 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
927 	bool kernel, VMArea** _area, void** _virtualAddress)
928 {
929 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
930 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
931 		", protection %d, protectionMax %d, area %p, areaName '%s'\n",
932 		addressSpace, cache, addressRestrictions->address, offset, size,
933 		addressRestrictions->address_specification, wiring, protection,
934 		protectionMax, _area, areaName));
935 	cache->AssertLocked();
936 
937 	if (size == 0) {
938 #if KDEBUG
939 		panic("map_backing_store(): called with size=0 for area '%s'!",
940 			areaName);
941 #endif
942 		return B_BAD_VALUE;
943 	}
944 
945 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
946 		| HEAP_DONT_LOCK_KERNEL_SPACE;
947 	int priority;
948 	if (addressSpace != VMAddressSpace::Kernel()) {
949 		priority = VM_PRIORITY_USER;
950 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
951 		priority = VM_PRIORITY_VIP;
952 		allocationFlags |= HEAP_PRIORITY_VIP;
953 	} else
954 		priority = VM_PRIORITY_SYSTEM;
955 
956 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
957 		allocationFlags);
958 	if (mapping != REGION_PRIVATE_MAP)
959 		area->protection_max = protectionMax & B_USER_PROTECTION;
960 	if (area == NULL)
961 		return B_NO_MEMORY;
962 
963 	status_t status;
964 
965 	// if this is a private map, we need to create a new cache
966 	// to handle the private copies of pages as they are written to
967 	VMCache* sourceCache = cache;
968 	if (mapping == REGION_PRIVATE_MAP) {
969 		VMCache* newCache;
970 
971 		// create an anonymous cache
972 		status = VMCacheFactory::CreateAnonymousCache(newCache,
973 			(protection & B_STACK_AREA) != 0
974 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
975 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
976 		if (status != B_OK)
977 			goto err1;
978 
979 		newCache->Lock();
980 		newCache->temporary = 1;
981 		newCache->virtual_base = offset;
982 		newCache->virtual_end = offset + size;
983 
984 		cache->AddConsumer(newCache);
985 
986 		cache = newCache;
987 	}
988 
989 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
990 		status = cache->SetMinimalCommitment(size, priority);
991 		if (status != B_OK)
992 			goto err2;
993 	}
994 
995 	// check to see if this address space has entered DELETE state
996 	if (addressSpace->IsBeingDeleted()) {
997 		// okay, someone is trying to delete this address space now, so we can't
998 		// insert the area, so back out
999 		status = B_BAD_TEAM_ID;
1000 		goto err2;
1001 	}
1002 
1003 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
1004 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
1005 		status = unmap_address_range(addressSpace,
1006 			(addr_t)addressRestrictions->address, size, kernel);
1007 		if (status != B_OK)
1008 			goto err2;
1009 	}
1010 
1011 	status = addressSpace->InsertArea(area, size, addressRestrictions,
1012 		allocationFlags, _virtualAddress);
1013 	if (status == B_NO_MEMORY
1014 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
1015 		// TODO: At present, there is no way to notify the low_resource monitor
1016 		// that kernel addresss space is fragmented, nor does it check for this
1017 		// automatically. Due to how many locks are held, we cannot wait here
1018 		// for space to be freed up, but it would be good to at least notify
1019 		// that we tried and failed to allocate some amount.
1020 	}
1021 	if (status != B_OK)
1022 		goto err2;
1023 
1024 	// attach the cache to the area
1025 	area->cache = cache;
1026 	area->cache_offset = offset;
1027 
1028 	// point the cache back to the area
1029 	cache->InsertAreaLocked(area);
1030 	if (mapping == REGION_PRIVATE_MAP)
1031 		cache->Unlock();
1032 
1033 	// insert the area in the global area hash table
1034 	VMAreaHash::Insert(area);
1035 
1036 	// grab a ref to the address space (the area holds this)
1037 	addressSpace->Get();
1038 
1039 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1040 //		cache, sourceCache, areaName, area);
1041 
1042 	*_area = area;
1043 	return B_OK;
1044 
1045 err2:
1046 	if (mapping == REGION_PRIVATE_MAP) {
1047 		// We created this cache, so we must delete it again. Note, that we
1048 		// need to temporarily unlock the source cache or we'll otherwise
1049 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1050 		sourceCache->Unlock();
1051 		cache->ReleaseRefAndUnlock();
1052 		sourceCache->Lock();
1053 	}
1054 err1:
1055 	addressSpace->DeleteArea(area, allocationFlags);
1056 	return status;
1057 }
1058 
1059 
1060 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1061 	  locker1, locker2).
1062 */
1063 template<typename LockerType1, typename LockerType2>
1064 static inline bool
1065 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1066 {
1067 	area->cache->AssertLocked();
1068 
1069 	VMAreaUnwiredWaiter waiter;
1070 	if (!area->AddWaiterIfWired(&waiter))
1071 		return false;
1072 
1073 	// unlock everything and wait
1074 	if (locker1 != NULL)
1075 		locker1->Unlock();
1076 	if (locker2 != NULL)
1077 		locker2->Unlock();
1078 
1079 	waiter.waitEntry.Wait();
1080 
1081 	return true;
1082 }
1083 
1084 
1085 /*!	Checks whether the given area has any wired ranges intersecting with the
1086 	specified range and waits, if so.
1087 
1088 	When it has to wait, the function calls \c Unlock() on both \a locker1
1089 	and \a locker2, if given.
1090 	The area's top cache must be locked and must be unlocked as a side effect
1091 	of calling \c Unlock() on either \a locker1 or \a locker2.
1092 
1093 	If the function does not have to wait it does not modify or unlock any
1094 	object.
1095 
1096 	\param area The area to be checked.
1097 	\param base The base address of the range to check.
1098 	\param size The size of the address range to check.
1099 	\param locker1 An object to be unlocked when before starting to wait (may
1100 		be \c NULL).
1101 	\param locker2 An object to be unlocked when before starting to wait (may
1102 		be \c NULL).
1103 	\return \c true, if the function had to wait, \c false otherwise.
1104 */
1105 template<typename LockerType1, typename LockerType2>
1106 static inline bool
1107 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1108 	LockerType1* locker1, LockerType2* locker2)
1109 {
1110 	area->cache->AssertLocked();
1111 
1112 	VMAreaUnwiredWaiter waiter;
1113 	if (!area->AddWaiterIfWired(&waiter, base, size))
1114 		return false;
1115 
1116 	// unlock everything and wait
1117 	if (locker1 != NULL)
1118 		locker1->Unlock();
1119 	if (locker2 != NULL)
1120 		locker2->Unlock();
1121 
1122 	waiter.waitEntry.Wait();
1123 
1124 	return true;
1125 }
1126 
1127 
1128 /*!	Checks whether the given address space has any wired ranges intersecting
1129 	with the specified range and waits, if so.
1130 
1131 	Similar to wait_if_area_range_is_wired(), with the following differences:
1132 	- All areas intersecting with the range are checked (respectively all until
1133 	  one is found that contains a wired range intersecting with the given
1134 	  range).
1135 	- The given address space must at least be read-locked and must be unlocked
1136 	  when \c Unlock() is called on \a locker.
1137 	- None of the areas' caches are allowed to be locked.
1138 */
1139 template<typename LockerType>
1140 static inline bool
1141 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1142 	size_t size, LockerType* locker)
1143 {
1144 	for (VMAddressSpace::AreaRangeIterator it
1145 		= addressSpace->GetAreaRangeIterator(base, size);
1146 			VMArea* area = it.Next();) {
1147 
1148 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1149 
1150 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1151 			return true;
1152 	}
1153 
1154 	return false;
1155 }
1156 
1157 
1158 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1159 	It must be called in a situation where the kernel address space may be
1160 	locked.
1161 */
1162 status_t
1163 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1164 {
1165 	AddressSpaceReadLocker locker;
1166 	VMArea* area;
1167 	status_t status = locker.SetFromArea(id, area);
1168 	if (status != B_OK)
1169 		return status;
1170 
1171 	if (area->page_protections == NULL) {
1172 		status = allocate_area_page_protections(area);
1173 		if (status != B_OK)
1174 			return status;
1175 	}
1176 
1177 	*cookie = (void*)area;
1178 	return B_OK;
1179 }
1180 
1181 
1182 /*!	This is a debug helper function that can only be used with very specific
1183 	use cases.
1184 	Sets protection for the given address range to the protection specified.
1185 	If \a protection is 0 then the involved pages will be marked non-present
1186 	in the translation map to cause a fault on access. The pages aren't
1187 	actually unmapped however so that they can be marked present again with
1188 	additional calls to this function. For this to work the area must be
1189 	fully locked in memory so that the pages aren't otherwise touched.
1190 	This function does not lock the kernel address space and needs to be
1191 	supplied with a \a cookie retrieved from a successful call to
1192 	vm_prepare_kernel_area_debug_protection().
1193 */
1194 status_t
1195 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1196 	uint32 protection)
1197 {
1198 	// check address range
1199 	addr_t address = (addr_t)_address;
1200 	size = PAGE_ALIGN(size);
1201 
1202 	if ((address % B_PAGE_SIZE) != 0
1203 		|| (addr_t)address + size < (addr_t)address
1204 		|| !IS_KERNEL_ADDRESS(address)
1205 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1206 		return B_BAD_VALUE;
1207 	}
1208 
1209 	// Translate the kernel protection to user protection as we only store that.
1210 	if ((protection & B_KERNEL_READ_AREA) != 0)
1211 		protection |= B_READ_AREA;
1212 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1213 		protection |= B_WRITE_AREA;
1214 
1215 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1216 	VMTranslationMap* map = addressSpace->TranslationMap();
1217 	VMArea* area = (VMArea*)cookie;
1218 
1219 	addr_t offset = address - area->Base();
1220 	if (area->Size() - offset < size) {
1221 		panic("protect range not fully within supplied area");
1222 		return B_BAD_VALUE;
1223 	}
1224 
1225 	if (area->page_protections == NULL) {
1226 		panic("area has no page protections");
1227 		return B_BAD_VALUE;
1228 	}
1229 
1230 	// Invalidate the mapping entries so any access to them will fault or
1231 	// restore the mapping entries unchanged so that lookup will success again.
1232 	map->Lock();
1233 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1234 	map->Unlock();
1235 
1236 	// And set the proper page protections so that the fault case will actually
1237 	// fail and not simply try to map a new page.
1238 	for (addr_t pageAddress = address; pageAddress < address + size;
1239 			pageAddress += B_PAGE_SIZE) {
1240 		set_area_page_protection(area, pageAddress, protection);
1241 	}
1242 
1243 	return B_OK;
1244 }
1245 
1246 
1247 status_t
1248 vm_block_address_range(const char* name, void* address, addr_t size)
1249 {
1250 	if (!arch_vm_supports_protection(0))
1251 		return B_NOT_SUPPORTED;
1252 
1253 	AddressSpaceWriteLocker locker;
1254 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1255 	if (status != B_OK)
1256 		return status;
1257 
1258 	VMAddressSpace* addressSpace = locker.AddressSpace();
1259 
1260 	// create an anonymous cache
1261 	VMCache* cache;
1262 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1263 		VM_PRIORITY_SYSTEM);
1264 	if (status != B_OK)
1265 		return status;
1266 
1267 	cache->temporary = 1;
1268 	cache->virtual_end = size;
1269 	cache->Lock();
1270 
1271 	VMArea* area;
1272 	virtual_address_restrictions addressRestrictions = {};
1273 	addressRestrictions.address = address;
1274 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1275 	status = map_backing_store(addressSpace, cache, 0, name, size,
1276 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions,
1277 		true, &area, NULL);
1278 	if (status != B_OK) {
1279 		cache->ReleaseRefAndUnlock();
1280 		return status;
1281 	}
1282 
1283 	cache->Unlock();
1284 	area->cache_type = CACHE_TYPE_RAM;
1285 	return area->id;
1286 }
1287 
1288 
1289 status_t
1290 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1291 {
1292 	AddressSpaceWriteLocker locker(team);
1293 	if (!locker.IsLocked())
1294 		return B_BAD_TEAM_ID;
1295 
1296 	VMAddressSpace* addressSpace = locker.AddressSpace();
1297 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1298 		addressSpace == VMAddressSpace::Kernel()
1299 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1300 }
1301 
1302 
1303 status_t
1304 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1305 	addr_t size, uint32 flags)
1306 {
1307 	if (size == 0)
1308 		return B_BAD_VALUE;
1309 
1310 	AddressSpaceWriteLocker locker(team);
1311 	if (!locker.IsLocked())
1312 		return B_BAD_TEAM_ID;
1313 
1314 	virtual_address_restrictions addressRestrictions = {};
1315 	addressRestrictions.address = *_address;
1316 	addressRestrictions.address_specification = addressSpec;
1317 	VMAddressSpace* addressSpace = locker.AddressSpace();
1318 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1319 		addressSpace == VMAddressSpace::Kernel()
1320 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1321 		_address);
1322 }
1323 
1324 
1325 area_id
1326 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1327 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1328 	const virtual_address_restrictions* virtualAddressRestrictions,
1329 	const physical_address_restrictions* physicalAddressRestrictions,
1330 	bool kernel, void** _address)
1331 {
1332 	VMArea* area;
1333 	VMCache* cache;
1334 	vm_page* page = NULL;
1335 	bool isStack = (protection & B_STACK_AREA) != 0;
1336 	page_num_t guardPages;
1337 	bool canOvercommit = false;
1338 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1339 		? VM_PAGE_ALLOC_CLEAR : 0;
1340 
1341 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1342 		team, name, size));
1343 
1344 	size = PAGE_ALIGN(size);
1345 	guardSize = PAGE_ALIGN(guardSize);
1346 	guardPages = guardSize / B_PAGE_SIZE;
1347 
1348 	if (size == 0 || size < guardSize)
1349 		return B_BAD_VALUE;
1350 	if (!arch_vm_supports_protection(protection))
1351 		return B_NOT_SUPPORTED;
1352 
1353 	if (team == B_CURRENT_TEAM)
1354 		team = VMAddressSpace::CurrentID();
1355 	if (team < 0)
1356 		return B_BAD_TEAM_ID;
1357 
1358 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1359 		canOvercommit = true;
1360 
1361 #ifdef DEBUG_KERNEL_STACKS
1362 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1363 		isStack = true;
1364 #endif
1365 
1366 	// check parameters
1367 	switch (virtualAddressRestrictions->address_specification) {
1368 		case B_ANY_ADDRESS:
1369 		case B_EXACT_ADDRESS:
1370 		case B_BASE_ADDRESS:
1371 		case B_ANY_KERNEL_ADDRESS:
1372 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1373 		case B_RANDOMIZED_ANY_ADDRESS:
1374 		case B_RANDOMIZED_BASE_ADDRESS:
1375 			break;
1376 
1377 		default:
1378 			return B_BAD_VALUE;
1379 	}
1380 
1381 	// If low or high physical address restrictions are given, we force
1382 	// B_CONTIGUOUS wiring, since only then we'll use
1383 	// vm_page_allocate_page_run() which deals with those restrictions.
1384 	if (physicalAddressRestrictions->low_address != 0
1385 		|| physicalAddressRestrictions->high_address != 0) {
1386 		wiring = B_CONTIGUOUS;
1387 	}
1388 
1389 	physical_address_restrictions stackPhysicalRestrictions;
1390 	bool doReserveMemory = false;
1391 	switch (wiring) {
1392 		case B_NO_LOCK:
1393 			break;
1394 		case B_FULL_LOCK:
1395 		case B_LAZY_LOCK:
1396 		case B_CONTIGUOUS:
1397 			doReserveMemory = true;
1398 			break;
1399 		case B_ALREADY_WIRED:
1400 			break;
1401 		case B_LOMEM:
1402 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1403 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1404 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1405 			wiring = B_CONTIGUOUS;
1406 			doReserveMemory = true;
1407 			break;
1408 		case B_32_BIT_FULL_LOCK:
1409 			if (B_HAIKU_PHYSICAL_BITS <= 32
1410 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1411 				wiring = B_FULL_LOCK;
1412 				doReserveMemory = true;
1413 				break;
1414 			}
1415 			// TODO: We don't really support this mode efficiently. Just fall
1416 			// through for now ...
1417 		case B_32_BIT_CONTIGUOUS:
1418 			#if B_HAIKU_PHYSICAL_BITS > 32
1419 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1420 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1421 					stackPhysicalRestrictions.high_address
1422 						= (phys_addr_t)1 << 32;
1423 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1424 				}
1425 			#endif
1426 			wiring = B_CONTIGUOUS;
1427 			doReserveMemory = true;
1428 			break;
1429 		default:
1430 			return B_BAD_VALUE;
1431 	}
1432 
1433 	// Optimization: For a single-page contiguous allocation without low/high
1434 	// memory restriction B_FULL_LOCK wiring suffices.
1435 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1436 		&& physicalAddressRestrictions->low_address == 0
1437 		&& physicalAddressRestrictions->high_address == 0) {
1438 		wiring = B_FULL_LOCK;
1439 	}
1440 
1441 	// For full lock or contiguous areas we're also going to map the pages and
1442 	// thus need to reserve pages for the mapping backend upfront.
1443 	addr_t reservedMapPages = 0;
1444 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1445 		AddressSpaceWriteLocker locker;
1446 		status_t status = locker.SetTo(team);
1447 		if (status != B_OK)
1448 			return status;
1449 
1450 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1451 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1452 	}
1453 
1454 	int priority;
1455 	if (team != VMAddressSpace::KernelID())
1456 		priority = VM_PRIORITY_USER;
1457 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1458 		priority = VM_PRIORITY_VIP;
1459 	else
1460 		priority = VM_PRIORITY_SYSTEM;
1461 
1462 	// Reserve memory before acquiring the address space lock. This reduces the
1463 	// chances of failure, since while holding the write lock to the address
1464 	// space (if it is the kernel address space that is), the low memory handler
1465 	// won't be able to free anything for us.
1466 	addr_t reservedMemory = 0;
1467 	if (doReserveMemory) {
1468 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1469 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1470 			return B_NO_MEMORY;
1471 		reservedMemory = size;
1472 		// TODO: We don't reserve the memory for the pages for the page
1473 		// directories/tables. We actually need to do since we currently don't
1474 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1475 		// there are actually less physical pages than there should be, which
1476 		// can get the VM into trouble in low memory situations.
1477 	}
1478 
1479 	AddressSpaceWriteLocker locker;
1480 	VMAddressSpace* addressSpace;
1481 	status_t status;
1482 
1483 	// For full lock areas reserve the pages before locking the address
1484 	// space. E.g. block caches can't release their memory while we hold the
1485 	// address space lock.
1486 	page_num_t reservedPages = reservedMapPages;
1487 	if (wiring == B_FULL_LOCK)
1488 		reservedPages += size / B_PAGE_SIZE;
1489 
1490 	vm_page_reservation reservation;
1491 	if (reservedPages > 0) {
1492 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1493 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1494 					priority)) {
1495 				reservedPages = 0;
1496 				status = B_WOULD_BLOCK;
1497 				goto err0;
1498 			}
1499 		} else
1500 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1501 	}
1502 
1503 	if (wiring == B_CONTIGUOUS) {
1504 		// we try to allocate the page run here upfront as this may easily
1505 		// fail for obvious reasons
1506 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1507 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1508 		if (page == NULL) {
1509 			status = B_NO_MEMORY;
1510 			goto err0;
1511 		}
1512 	}
1513 
1514 	// Lock the address space and, if B_EXACT_ADDRESS and
1515 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1516 	// is not wired.
1517 	do {
1518 		status = locker.SetTo(team);
1519 		if (status != B_OK)
1520 			goto err1;
1521 
1522 		addressSpace = locker.AddressSpace();
1523 	} while (virtualAddressRestrictions->address_specification
1524 			== B_EXACT_ADDRESS
1525 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1526 		&& wait_if_address_range_is_wired(addressSpace,
1527 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1528 
1529 	// create an anonymous cache
1530 	// if it's a stack, make sure that two pages are available at least
1531 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1532 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1533 		wiring == B_NO_LOCK, priority);
1534 	if (status != B_OK)
1535 		goto err1;
1536 
1537 	cache->temporary = 1;
1538 	cache->virtual_end = size;
1539 	cache->committed_size = reservedMemory;
1540 		// TODO: This should be done via a method.
1541 	reservedMemory = 0;
1542 
1543 	cache->Lock();
1544 
1545 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1546 		protection, 0, REGION_NO_PRIVATE_MAP, flags,
1547 		virtualAddressRestrictions, kernel, &area, _address);
1548 
1549 	if (status != B_OK) {
1550 		cache->ReleaseRefAndUnlock();
1551 		goto err1;
1552 	}
1553 
1554 	locker.DegradeToReadLock();
1555 
1556 	switch (wiring) {
1557 		case B_NO_LOCK:
1558 		case B_LAZY_LOCK:
1559 			// do nothing - the pages are mapped in as needed
1560 			break;
1561 
1562 		case B_FULL_LOCK:
1563 		{
1564 			// Allocate and map all pages for this area
1565 
1566 			off_t offset = 0;
1567 			for (addr_t address = area->Base();
1568 					address < area->Base() + (area->Size() - 1);
1569 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1570 #ifdef DEBUG_KERNEL_STACKS
1571 #	ifdef STACK_GROWS_DOWNWARDS
1572 				if (isStack && address < area->Base()
1573 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1574 #	else
1575 				if (isStack && address >= area->Base() + area->Size()
1576 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1577 #	endif
1578 					continue;
1579 #endif
1580 				vm_page* page = vm_page_allocate_page(&reservation,
1581 					PAGE_STATE_WIRED | pageAllocFlags);
1582 				cache->InsertPage(page, offset);
1583 				map_page(area, page, address, protection, &reservation);
1584 
1585 				DEBUG_PAGE_ACCESS_END(page);
1586 			}
1587 
1588 			break;
1589 		}
1590 
1591 		case B_ALREADY_WIRED:
1592 		{
1593 			// The pages should already be mapped. This is only really useful
1594 			// during boot time. Find the appropriate vm_page objects and stick
1595 			// them in the cache object.
1596 			VMTranslationMap* map = addressSpace->TranslationMap();
1597 			off_t offset = 0;
1598 
1599 			if (!gKernelStartup)
1600 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1601 
1602 			map->Lock();
1603 
1604 			for (addr_t virtualAddress = area->Base();
1605 					virtualAddress < area->Base() + (area->Size() - 1);
1606 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1607 				phys_addr_t physicalAddress;
1608 				uint32 flags;
1609 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1610 				if (status < B_OK) {
1611 					panic("looking up mapping failed for va 0x%lx\n",
1612 						virtualAddress);
1613 				}
1614 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1615 				if (page == NULL) {
1616 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1617 						"\n", physicalAddress);
1618 				}
1619 
1620 				DEBUG_PAGE_ACCESS_START(page);
1621 
1622 				cache->InsertPage(page, offset);
1623 				increment_page_wired_count(page);
1624 				vm_page_set_state(page, PAGE_STATE_WIRED);
1625 				page->busy = false;
1626 
1627 				DEBUG_PAGE_ACCESS_END(page);
1628 			}
1629 
1630 			map->Unlock();
1631 			break;
1632 		}
1633 
1634 		case B_CONTIGUOUS:
1635 		{
1636 			// We have already allocated our continuous pages run, so we can now
1637 			// just map them in the address space
1638 			VMTranslationMap* map = addressSpace->TranslationMap();
1639 			phys_addr_t physicalAddress
1640 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1641 			addr_t virtualAddress = area->Base();
1642 			off_t offset = 0;
1643 
1644 			map->Lock();
1645 
1646 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1647 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1648 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1649 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1650 				if (page == NULL)
1651 					panic("couldn't lookup physical page just allocated\n");
1652 
1653 				status = map->Map(virtualAddress, physicalAddress, protection,
1654 					area->MemoryType(), &reservation);
1655 				if (status < B_OK)
1656 					panic("couldn't map physical page in page run\n");
1657 
1658 				cache->InsertPage(page, offset);
1659 				increment_page_wired_count(page);
1660 
1661 				DEBUG_PAGE_ACCESS_END(page);
1662 			}
1663 
1664 			map->Unlock();
1665 			break;
1666 		}
1667 
1668 		default:
1669 			break;
1670 	}
1671 
1672 	cache->Unlock();
1673 
1674 	if (reservedPages > 0)
1675 		vm_page_unreserve_pages(&reservation);
1676 
1677 	TRACE(("vm_create_anonymous_area: done\n"));
1678 
1679 	area->cache_type = CACHE_TYPE_RAM;
1680 	return area->id;
1681 
1682 err1:
1683 	if (wiring == B_CONTIGUOUS) {
1684 		// we had reserved the area space upfront...
1685 		phys_addr_t pageNumber = page->physical_page_number;
1686 		int32 i;
1687 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1688 			page = vm_lookup_page(pageNumber);
1689 			if (page == NULL)
1690 				panic("couldn't lookup physical page just allocated\n");
1691 
1692 			vm_page_set_state(page, PAGE_STATE_FREE);
1693 		}
1694 	}
1695 
1696 err0:
1697 	if (reservedPages > 0)
1698 		vm_page_unreserve_pages(&reservation);
1699 	if (reservedMemory > 0)
1700 		vm_unreserve_memory(reservedMemory);
1701 
1702 	return status;
1703 }
1704 
1705 
1706 area_id
1707 vm_map_physical_memory(team_id team, const char* name, void** _address,
1708 	uint32 addressSpec, addr_t size, uint32 protection,
1709 	phys_addr_t physicalAddress, bool alreadyWired)
1710 {
1711 	VMArea* area;
1712 	VMCache* cache;
1713 	addr_t mapOffset;
1714 
1715 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1716 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1717 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1718 		addressSpec, size, protection, physicalAddress));
1719 
1720 	if (!arch_vm_supports_protection(protection))
1721 		return B_NOT_SUPPORTED;
1722 
1723 	AddressSpaceWriteLocker locker(team);
1724 	if (!locker.IsLocked())
1725 		return B_BAD_TEAM_ID;
1726 
1727 	// if the physical address is somewhat inside a page,
1728 	// move the actual area down to align on a page boundary
1729 	mapOffset = physicalAddress % B_PAGE_SIZE;
1730 	size += mapOffset;
1731 	physicalAddress -= mapOffset;
1732 
1733 	size = PAGE_ALIGN(size);
1734 
1735 	// create a device cache
1736 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1737 	if (status != B_OK)
1738 		return status;
1739 
1740 	cache->virtual_end = size;
1741 
1742 	cache->Lock();
1743 
1744 	virtual_address_restrictions addressRestrictions = {};
1745 	addressRestrictions.address = *_address;
1746 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1747 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1748 		B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1749 		true, &area, _address);
1750 
1751 	if (status < B_OK)
1752 		cache->ReleaseRefLocked();
1753 
1754 	cache->Unlock();
1755 
1756 	if (status == B_OK) {
1757 		// set requested memory type -- use uncached, if not given
1758 		uint32 memoryType = addressSpec & B_MTR_MASK;
1759 		if (memoryType == 0)
1760 			memoryType = B_MTR_UC;
1761 
1762 		area->SetMemoryType(memoryType);
1763 
1764 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1765 		if (status != B_OK)
1766 			delete_area(locker.AddressSpace(), area, false);
1767 	}
1768 
1769 	if (status != B_OK)
1770 		return status;
1771 
1772 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1773 
1774 	if (alreadyWired) {
1775 		// The area is already mapped, but possibly not with the right
1776 		// memory type.
1777 		map->Lock();
1778 		map->ProtectArea(area, area->protection);
1779 		map->Unlock();
1780 	} else {
1781 		// Map the area completely.
1782 
1783 		// reserve pages needed for the mapping
1784 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1785 			area->Base() + (size - 1));
1786 		vm_page_reservation reservation;
1787 		vm_page_reserve_pages(&reservation, reservePages,
1788 			team == VMAddressSpace::KernelID()
1789 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1790 
1791 		map->Lock();
1792 
1793 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1794 			map->Map(area->Base() + offset, physicalAddress + offset,
1795 				protection, area->MemoryType(), &reservation);
1796 		}
1797 
1798 		map->Unlock();
1799 
1800 		vm_page_unreserve_pages(&reservation);
1801 	}
1802 
1803 	// modify the pointer returned to be offset back into the new area
1804 	// the same way the physical address in was offset
1805 	*_address = (void*)((addr_t)*_address + mapOffset);
1806 
1807 	area->cache_type = CACHE_TYPE_DEVICE;
1808 	return area->id;
1809 }
1810 
1811 
1812 /*!	Don't use!
1813 	TODO: This function was introduced to map physical page vecs to
1814 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1815 	use a device cache and does not track vm_page::wired_count!
1816 */
1817 area_id
1818 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1819 	uint32 addressSpec, addr_t* _size, uint32 protection,
1820 	struct generic_io_vec* vecs, uint32 vecCount)
1821 {
1822 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1823 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1824 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1825 		addressSpec, _size, protection, vecs, vecCount));
1826 
1827 	if (!arch_vm_supports_protection(protection)
1828 		|| (addressSpec & B_MTR_MASK) != 0) {
1829 		return B_NOT_SUPPORTED;
1830 	}
1831 
1832 	AddressSpaceWriteLocker locker(team);
1833 	if (!locker.IsLocked())
1834 		return B_BAD_TEAM_ID;
1835 
1836 	if (vecCount == 0)
1837 		return B_BAD_VALUE;
1838 
1839 	addr_t size = 0;
1840 	for (uint32 i = 0; i < vecCount; i++) {
1841 		if (vecs[i].base % B_PAGE_SIZE != 0
1842 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1843 			return B_BAD_VALUE;
1844 		}
1845 
1846 		size += vecs[i].length;
1847 	}
1848 
1849 	// create a device cache
1850 	VMCache* cache;
1851 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1852 	if (result != B_OK)
1853 		return result;
1854 
1855 	cache->virtual_end = size;
1856 
1857 	cache->Lock();
1858 
1859 	VMArea* area;
1860 	virtual_address_restrictions addressRestrictions = {};
1861 	addressRestrictions.address = *_address;
1862 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1863 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1864 		size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0,
1865 		&addressRestrictions, true, &area, _address);
1866 
1867 	if (result != B_OK)
1868 		cache->ReleaseRefLocked();
1869 
1870 	cache->Unlock();
1871 
1872 	if (result != B_OK)
1873 		return result;
1874 
1875 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1876 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1877 		area->Base() + (size - 1));
1878 
1879 	vm_page_reservation reservation;
1880 	vm_page_reserve_pages(&reservation, reservePages,
1881 			team == VMAddressSpace::KernelID()
1882 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1883 	map->Lock();
1884 
1885 	uint32 vecIndex = 0;
1886 	size_t vecOffset = 0;
1887 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1888 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1889 			vecOffset = 0;
1890 			vecIndex++;
1891 		}
1892 
1893 		if (vecIndex >= vecCount)
1894 			break;
1895 
1896 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1897 			protection, area->MemoryType(), &reservation);
1898 
1899 		vecOffset += B_PAGE_SIZE;
1900 	}
1901 
1902 	map->Unlock();
1903 	vm_page_unreserve_pages(&reservation);
1904 
1905 	if (_size != NULL)
1906 		*_size = size;
1907 
1908 	area->cache_type = CACHE_TYPE_DEVICE;
1909 	return area->id;
1910 }
1911 
1912 
1913 area_id
1914 vm_create_null_area(team_id team, const char* name, void** address,
1915 	uint32 addressSpec, addr_t size, uint32 flags)
1916 {
1917 	size = PAGE_ALIGN(size);
1918 
1919 	// Lock the address space and, if B_EXACT_ADDRESS and
1920 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1921 	// is not wired.
1922 	AddressSpaceWriteLocker locker;
1923 	do {
1924 		if (locker.SetTo(team) != B_OK)
1925 			return B_BAD_TEAM_ID;
1926 	} while (addressSpec == B_EXACT_ADDRESS
1927 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1928 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1929 			(addr_t)*address, size, &locker));
1930 
1931 	// create a null cache
1932 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1933 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1934 	VMCache* cache;
1935 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1936 	if (status != B_OK)
1937 		return status;
1938 
1939 	cache->temporary = 1;
1940 	cache->virtual_end = size;
1941 
1942 	cache->Lock();
1943 
1944 	VMArea* area;
1945 	virtual_address_restrictions addressRestrictions = {};
1946 	addressRestrictions.address = *address;
1947 	addressRestrictions.address_specification = addressSpec;
1948 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1949 		B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA,
1950 		REGION_NO_PRIVATE_MAP, flags,
1951 		&addressRestrictions, true, &area, address);
1952 
1953 	if (status < B_OK) {
1954 		cache->ReleaseRefAndUnlock();
1955 		return status;
1956 	}
1957 
1958 	cache->Unlock();
1959 
1960 	area->cache_type = CACHE_TYPE_NULL;
1961 	return area->id;
1962 }
1963 
1964 
1965 /*!	Creates the vnode cache for the specified \a vnode.
1966 	The vnode has to be marked busy when calling this function.
1967 */
1968 status_t
1969 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1970 {
1971 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1972 }
1973 
1974 
1975 /*!	\a cache must be locked. The area's address space must be read-locked.
1976 */
1977 static void
1978 pre_map_area_pages(VMArea* area, VMCache* cache,
1979 	vm_page_reservation* reservation)
1980 {
1981 	addr_t baseAddress = area->Base();
1982 	addr_t cacheOffset = area->cache_offset;
1983 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1984 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1985 
1986 	for (VMCachePagesTree::Iterator it
1987 				= cache->pages.GetIterator(firstPage, true, true);
1988 			vm_page* page = it.Next();) {
1989 		if (page->cache_offset >= endPage)
1990 			break;
1991 
1992 		// skip busy and inactive pages
1993 		if (page->busy || page->usage_count == 0)
1994 			continue;
1995 
1996 		DEBUG_PAGE_ACCESS_START(page);
1997 		map_page(area, page,
1998 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1999 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
2000 		DEBUG_PAGE_ACCESS_END(page);
2001 	}
2002 }
2003 
2004 
2005 /*!	Will map the file specified by \a fd to an area in memory.
2006 	The file will be mirrored beginning at the specified \a offset. The
2007 	\a offset and \a size arguments have to be page aligned.
2008 */
2009 static area_id
2010 _vm_map_file(team_id team, const char* name, void** _address,
2011 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2012 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2013 {
2014 	// TODO: for binary files, we want to make sure that they get the
2015 	//	copy of a file at a given time, ie. later changes should not
2016 	//	make it into the mapped copy -- this will need quite some changes
2017 	//	to be done in a nice way
2018 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
2019 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
2020 
2021 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2022 	size = PAGE_ALIGN(size);
2023 
2024 	if (mapping == REGION_NO_PRIVATE_MAP)
2025 		protection |= B_SHARED_AREA;
2026 	if (addressSpec != B_EXACT_ADDRESS)
2027 		unmapAddressRange = false;
2028 
2029 	if (fd < 0) {
2030 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
2031 		virtual_address_restrictions virtualRestrictions = {};
2032 		virtualRestrictions.address = *_address;
2033 		virtualRestrictions.address_specification = addressSpec;
2034 		physical_address_restrictions physicalRestrictions = {};
2035 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
2036 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
2037 			_address);
2038 	}
2039 
2040 	// get the open flags of the FD
2041 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2042 	if (descriptor == NULL)
2043 		return EBADF;
2044 	int32 openMode = descriptor->open_mode;
2045 	put_fd(descriptor);
2046 
2047 	// The FD must open for reading at any rate. For shared mapping with write
2048 	// access, additionally the FD must be open for writing.
2049 	if ((openMode & O_ACCMODE) == O_WRONLY
2050 		|| (mapping == REGION_NO_PRIVATE_MAP
2051 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2052 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2053 		return EACCES;
2054 	}
2055 
2056 	uint32 protectionMax = 0;
2057 	if (mapping != REGION_PRIVATE_MAP) {
2058 		protectionMax = protection | B_READ_AREA;
2059 		if ((openMode & O_ACCMODE) == O_RDWR)
2060 			protectionMax |= B_WRITE_AREA;
2061 	}
2062 
2063 	// get the vnode for the object, this also grabs a ref to it
2064 	struct vnode* vnode = NULL;
2065 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2066 	if (status < B_OK)
2067 		return status;
2068 	VnodePutter vnodePutter(vnode);
2069 
2070 	// If we're going to pre-map pages, we need to reserve the pages needed by
2071 	// the mapping backend upfront.
2072 	page_num_t reservedPreMapPages = 0;
2073 	vm_page_reservation reservation;
2074 	if ((protection & B_READ_AREA) != 0) {
2075 		AddressSpaceWriteLocker locker;
2076 		status = locker.SetTo(team);
2077 		if (status != B_OK)
2078 			return status;
2079 
2080 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2081 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2082 
2083 		locker.Unlock();
2084 
2085 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2086 			team == VMAddressSpace::KernelID()
2087 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2088 	}
2089 
2090 	struct PageUnreserver {
2091 		PageUnreserver(vm_page_reservation* reservation)
2092 			:
2093 			fReservation(reservation)
2094 		{
2095 		}
2096 
2097 		~PageUnreserver()
2098 		{
2099 			if (fReservation != NULL)
2100 				vm_page_unreserve_pages(fReservation);
2101 		}
2102 
2103 		vm_page_reservation* fReservation;
2104 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2105 
2106 	// Lock the address space and, if the specified address range shall be
2107 	// unmapped, ensure it is not wired.
2108 	AddressSpaceWriteLocker locker;
2109 	do {
2110 		if (locker.SetTo(team) != B_OK)
2111 			return B_BAD_TEAM_ID;
2112 	} while (unmapAddressRange
2113 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2114 			(addr_t)*_address, size, &locker));
2115 
2116 	// TODO: this only works for file systems that use the file cache
2117 	VMCache* cache;
2118 	status = vfs_get_vnode_cache(vnode, &cache, false);
2119 	if (status < B_OK)
2120 		return status;
2121 
2122 	cache->Lock();
2123 
2124 	VMArea* area;
2125 	virtual_address_restrictions addressRestrictions = {};
2126 	addressRestrictions.address = *_address;
2127 	addressRestrictions.address_specification = addressSpec;
2128 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2129 		0, protection, protectionMax, mapping,
2130 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
2131 		&addressRestrictions, kernel, &area, _address);
2132 
2133 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2134 		// map_backing_store() cannot know we no longer need the ref
2135 		cache->ReleaseRefLocked();
2136 	}
2137 
2138 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2139 		pre_map_area_pages(area, cache, &reservation);
2140 
2141 	cache->Unlock();
2142 
2143 	if (status == B_OK) {
2144 		// TODO: this probably deserves a smarter solution, ie. don't always
2145 		// prefetch stuff, and also, probably don't trigger it at this place.
2146 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2147 			// prefetches at max 10 MB starting from "offset"
2148 	}
2149 
2150 	if (status != B_OK)
2151 		return status;
2152 
2153 	area->cache_type = CACHE_TYPE_VNODE;
2154 	return area->id;
2155 }
2156 
2157 
2158 area_id
2159 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2160 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2161 	int fd, off_t offset)
2162 {
2163 	if (!arch_vm_supports_protection(protection))
2164 		return B_NOT_SUPPORTED;
2165 
2166 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2167 		mapping, unmapAddressRange, fd, offset, true);
2168 }
2169 
2170 
2171 VMCache*
2172 vm_area_get_locked_cache(VMArea* area)
2173 {
2174 	rw_lock_read_lock(&sAreaCacheLock);
2175 
2176 	while (true) {
2177 		VMCache* cache = area->cache;
2178 
2179 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2180 			// cache has been deleted
2181 			rw_lock_read_lock(&sAreaCacheLock);
2182 			continue;
2183 		}
2184 
2185 		rw_lock_read_lock(&sAreaCacheLock);
2186 
2187 		if (cache == area->cache) {
2188 			cache->AcquireRefLocked();
2189 			rw_lock_read_unlock(&sAreaCacheLock);
2190 			return cache;
2191 		}
2192 
2193 		// the cache changed in the meantime
2194 		cache->Unlock();
2195 	}
2196 }
2197 
2198 
2199 void
2200 vm_area_put_locked_cache(VMCache* cache)
2201 {
2202 	cache->ReleaseRefAndUnlock();
2203 }
2204 
2205 
2206 area_id
2207 vm_clone_area(team_id team, const char* name, void** address,
2208 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2209 	bool kernel)
2210 {
2211 	VMArea* newArea = NULL;
2212 	VMArea* sourceArea;
2213 
2214 	// Check whether the source area exists and is cloneable. If so, mark it
2215 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2216 	{
2217 		AddressSpaceWriteLocker locker;
2218 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2219 		if (status != B_OK)
2220 			return status;
2221 
2222 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2223 			return B_NOT_ALLOWED;
2224 
2225 		sourceArea->protection |= B_SHARED_AREA;
2226 		protection |= B_SHARED_AREA;
2227 	}
2228 
2229 	// Now lock both address spaces and actually do the cloning.
2230 
2231 	MultiAddressSpaceLocker locker;
2232 	VMAddressSpace* sourceAddressSpace;
2233 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2234 	if (status != B_OK)
2235 		return status;
2236 
2237 	VMAddressSpace* targetAddressSpace;
2238 	status = locker.AddTeam(team, true, &targetAddressSpace);
2239 	if (status != B_OK)
2240 		return status;
2241 
2242 	status = locker.Lock();
2243 	if (status != B_OK)
2244 		return status;
2245 
2246 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2247 	if (sourceArea == NULL)
2248 		return B_BAD_VALUE;
2249 
2250 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2251 		return B_NOT_ALLOWED;
2252 
2253 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2254 
2255 	if (!kernel && sourceAddressSpace != targetAddressSpace
2256 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2257 #if KDEBUG
2258 		Team* team = thread_get_current_thread()->team;
2259 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2260 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2261 #endif
2262 		status = B_NOT_ALLOWED;
2263 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2264 		status = B_NOT_ALLOWED;
2265 	} else {
2266 		virtual_address_restrictions addressRestrictions = {};
2267 		addressRestrictions.address = *address;
2268 		addressRestrictions.address_specification = addressSpec;
2269 		status = map_backing_store(targetAddressSpace, cache,
2270 			sourceArea->cache_offset, name, sourceArea->Size(),
2271 			sourceArea->wiring, protection, sourceArea->protection_max,
2272 			mapping, 0, &addressRestrictions,
2273 			kernel, &newArea, address);
2274 	}
2275 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2276 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2277 		// to create a new cache, and has therefore already acquired a reference
2278 		// to the source cache - but otherwise it has no idea that we need
2279 		// one.
2280 		cache->AcquireRefLocked();
2281 	}
2282 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2283 		// we need to map in everything at this point
2284 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2285 			// we don't have actual pages to map but a physical area
2286 			VMTranslationMap* map
2287 				= sourceArea->address_space->TranslationMap();
2288 			map->Lock();
2289 
2290 			phys_addr_t physicalAddress;
2291 			uint32 oldProtection;
2292 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2293 
2294 			map->Unlock();
2295 
2296 			map = targetAddressSpace->TranslationMap();
2297 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2298 				newArea->Base() + (newArea->Size() - 1));
2299 
2300 			vm_page_reservation reservation;
2301 			vm_page_reserve_pages(&reservation, reservePages,
2302 				targetAddressSpace == VMAddressSpace::Kernel()
2303 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2304 			map->Lock();
2305 
2306 			for (addr_t offset = 0; offset < newArea->Size();
2307 					offset += B_PAGE_SIZE) {
2308 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2309 					protection, newArea->MemoryType(), &reservation);
2310 			}
2311 
2312 			map->Unlock();
2313 			vm_page_unreserve_pages(&reservation);
2314 		} else {
2315 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2316 			size_t reservePages = map->MaxPagesNeededToMap(
2317 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2318 			vm_page_reservation reservation;
2319 			vm_page_reserve_pages(&reservation, reservePages,
2320 				targetAddressSpace == VMAddressSpace::Kernel()
2321 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2322 
2323 			// map in all pages from source
2324 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2325 					vm_page* page  = it.Next();) {
2326 				if (!page->busy) {
2327 					DEBUG_PAGE_ACCESS_START(page);
2328 					map_page(newArea, page,
2329 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2330 							- newArea->cache_offset),
2331 						protection, &reservation);
2332 					DEBUG_PAGE_ACCESS_END(page);
2333 				}
2334 			}
2335 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2336 			// ensuring that!
2337 
2338 			vm_page_unreserve_pages(&reservation);
2339 		}
2340 	}
2341 	if (status == B_OK)
2342 		newArea->cache_type = sourceArea->cache_type;
2343 
2344 	vm_area_put_locked_cache(cache);
2345 
2346 	if (status < B_OK)
2347 		return status;
2348 
2349 	return newArea->id;
2350 }
2351 
2352 
2353 /*!	Deletes the specified area of the given address space.
2354 
2355 	The address space must be write-locked.
2356 	The caller must ensure that the area does not have any wired ranges.
2357 
2358 	\param addressSpace The address space containing the area.
2359 	\param area The area to be deleted.
2360 	\param deletingAddressSpace \c true, if the address space is in the process
2361 		of being deleted.
2362 */
2363 static void
2364 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2365 	bool deletingAddressSpace)
2366 {
2367 	ASSERT(!area->IsWired());
2368 
2369 	VMAreaHash::Remove(area);
2370 
2371 	// At this point the area is removed from the global hash table, but
2372 	// still exists in the area list.
2373 
2374 	// Unmap the virtual address space the area occupied.
2375 	{
2376 		// We need to lock the complete cache chain.
2377 		VMCache* topCache = vm_area_get_locked_cache(area);
2378 		VMCacheChainLocker cacheChainLocker(topCache);
2379 		cacheChainLocker.LockAllSourceCaches();
2380 
2381 		// If the area's top cache is a temporary cache and the area is the only
2382 		// one referencing it (besides us currently holding a second reference),
2383 		// the unmapping code doesn't need to care about preserving the accessed
2384 		// and dirty flags of the top cache page mappings.
2385 		bool ignoreTopCachePageFlags
2386 			= topCache->temporary && topCache->RefCount() == 2;
2387 
2388 		area->address_space->TranslationMap()->UnmapArea(area,
2389 			deletingAddressSpace, ignoreTopCachePageFlags);
2390 	}
2391 
2392 	if (!area->cache->temporary)
2393 		area->cache->WriteModified();
2394 
2395 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2396 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2397 
2398 	arch_vm_unset_memory_type(area);
2399 	addressSpace->RemoveArea(area, allocationFlags);
2400 	addressSpace->Put();
2401 
2402 	area->cache->RemoveArea(area);
2403 	area->cache->ReleaseRef();
2404 
2405 	addressSpace->DeleteArea(area, allocationFlags);
2406 }
2407 
2408 
2409 status_t
2410 vm_delete_area(team_id team, area_id id, bool kernel)
2411 {
2412 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2413 		team, id));
2414 
2415 	// lock the address space and make sure the area isn't wired
2416 	AddressSpaceWriteLocker locker;
2417 	VMArea* area;
2418 	AreaCacheLocker cacheLocker;
2419 
2420 	do {
2421 		status_t status = locker.SetFromArea(team, id, area);
2422 		if (status != B_OK)
2423 			return status;
2424 
2425 		cacheLocker.SetTo(area);
2426 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2427 
2428 	cacheLocker.Unlock();
2429 
2430 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2431 		return B_NOT_ALLOWED;
2432 
2433 	delete_area(locker.AddressSpace(), area, false);
2434 	return B_OK;
2435 }
2436 
2437 
2438 /*!	Creates a new cache on top of given cache, moves all areas from
2439 	the old cache to the new one, and changes the protection of all affected
2440 	areas' pages to read-only. If requested, wired pages are moved up to the
2441 	new cache and copies are added to the old cache in their place.
2442 	Preconditions:
2443 	- The given cache must be locked.
2444 	- All of the cache's areas' address spaces must be read locked.
2445 	- Either the cache must not have any wired ranges or a page reservation for
2446 	  all wired pages must be provided, so they can be copied.
2447 
2448 	\param lowerCache The cache on top of which a new cache shall be created.
2449 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2450 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2451 		has wired page. The wired pages are copied in this case.
2452 */
2453 static status_t
2454 vm_copy_on_write_area(VMCache* lowerCache,
2455 	vm_page_reservation* wiredPagesReservation)
2456 {
2457 	VMCache* upperCache;
2458 
2459 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2460 
2461 	// We need to separate the cache from its areas. The cache goes one level
2462 	// deeper and we create a new cache inbetween.
2463 
2464 	// create an anonymous cache
2465 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2466 		lowerCache->GuardSize() / B_PAGE_SIZE,
2467 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2468 		VM_PRIORITY_USER);
2469 	if (status != B_OK)
2470 		return status;
2471 
2472 	upperCache->Lock();
2473 
2474 	upperCache->temporary = 1;
2475 	upperCache->virtual_base = lowerCache->virtual_base;
2476 	upperCache->virtual_end = lowerCache->virtual_end;
2477 
2478 	// transfer the lower cache areas to the upper cache
2479 	rw_lock_write_lock(&sAreaCacheLock);
2480 	upperCache->TransferAreas(lowerCache);
2481 	rw_lock_write_unlock(&sAreaCacheLock);
2482 
2483 	lowerCache->AddConsumer(upperCache);
2484 
2485 	// We now need to remap all pages from all of the cache's areas read-only,
2486 	// so that a copy will be created on next write access. If there are wired
2487 	// pages, we keep their protection, move them to the upper cache and create
2488 	// copies for the lower cache.
2489 	if (wiredPagesReservation != NULL) {
2490 		// We need to handle wired pages -- iterate through the cache's pages.
2491 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2492 				vm_page* page = it.Next();) {
2493 			if (page->WiredCount() > 0) {
2494 				// allocate a new page and copy the wired one
2495 				vm_page* copiedPage = vm_page_allocate_page(
2496 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2497 
2498 				vm_memcpy_physical_page(
2499 					copiedPage->physical_page_number * B_PAGE_SIZE,
2500 					page->physical_page_number * B_PAGE_SIZE);
2501 
2502 				// move the wired page to the upper cache (note: removing is OK
2503 				// with the SplayTree iterator) and insert the copy
2504 				upperCache->MovePage(page);
2505 				lowerCache->InsertPage(copiedPage,
2506 					page->cache_offset * B_PAGE_SIZE);
2507 
2508 				DEBUG_PAGE_ACCESS_END(copiedPage);
2509 			} else {
2510 				// Change the protection of this page in all areas.
2511 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2512 						tempArea = tempArea->cache_next) {
2513 					// The area must be readable in the same way it was
2514 					// previously writable.
2515 					uint32 protection = B_KERNEL_READ_AREA;
2516 					if ((tempArea->protection & B_READ_AREA) != 0)
2517 						protection |= B_READ_AREA;
2518 
2519 					VMTranslationMap* map
2520 						= tempArea->address_space->TranslationMap();
2521 					map->Lock();
2522 					map->ProtectPage(tempArea,
2523 						virtual_page_address(tempArea, page), protection);
2524 					map->Unlock();
2525 				}
2526 			}
2527 		}
2528 	} else {
2529 		ASSERT(lowerCache->WiredPagesCount() == 0);
2530 
2531 		// just change the protection of all areas
2532 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2533 				tempArea = tempArea->cache_next) {
2534 			// The area must be readable in the same way it was previously
2535 			// writable.
2536 			uint32 protection = B_KERNEL_READ_AREA;
2537 			if ((tempArea->protection & B_READ_AREA) != 0)
2538 				protection |= B_READ_AREA;
2539 
2540 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2541 			map->Lock();
2542 			map->ProtectArea(tempArea, protection);
2543 			map->Unlock();
2544 		}
2545 	}
2546 
2547 	vm_area_put_locked_cache(upperCache);
2548 
2549 	return B_OK;
2550 }
2551 
2552 
2553 area_id
2554 vm_copy_area(team_id team, const char* name, void** _address,
2555 	uint32 addressSpec, area_id sourceID)
2556 {
2557 	// Do the locking: target address space, all address spaces associated with
2558 	// the source cache, and the cache itself.
2559 	MultiAddressSpaceLocker locker;
2560 	VMAddressSpace* targetAddressSpace;
2561 	VMCache* cache;
2562 	VMArea* source;
2563 	AreaCacheLocker cacheLocker;
2564 	status_t status;
2565 	bool sharedArea;
2566 
2567 	page_num_t wiredPages = 0;
2568 	vm_page_reservation wiredPagesReservation;
2569 
2570 	bool restart;
2571 	do {
2572 		restart = false;
2573 
2574 		locker.Unset();
2575 		status = locker.AddTeam(team, true, &targetAddressSpace);
2576 		if (status == B_OK) {
2577 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2578 				&cache);
2579 		}
2580 		if (status != B_OK)
2581 			return status;
2582 
2583 		cacheLocker.SetTo(cache, true);	// already locked
2584 
2585 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2586 
2587 		page_num_t oldWiredPages = wiredPages;
2588 		wiredPages = 0;
2589 
2590 		// If the source area isn't shared, count the number of wired pages in
2591 		// the cache and reserve as many pages.
2592 		if (!sharedArea) {
2593 			wiredPages = cache->WiredPagesCount();
2594 
2595 			if (wiredPages > oldWiredPages) {
2596 				cacheLocker.Unlock();
2597 				locker.Unlock();
2598 
2599 				if (oldWiredPages > 0)
2600 					vm_page_unreserve_pages(&wiredPagesReservation);
2601 
2602 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2603 					VM_PRIORITY_USER);
2604 
2605 				restart = true;
2606 			}
2607 		} else if (oldWiredPages > 0)
2608 			vm_page_unreserve_pages(&wiredPagesReservation);
2609 	} while (restart);
2610 
2611 	// unreserve pages later
2612 	struct PagesUnreserver {
2613 		PagesUnreserver(vm_page_reservation* reservation)
2614 			:
2615 			fReservation(reservation)
2616 		{
2617 		}
2618 
2619 		~PagesUnreserver()
2620 		{
2621 			if (fReservation != NULL)
2622 				vm_page_unreserve_pages(fReservation);
2623 		}
2624 
2625 	private:
2626 		vm_page_reservation*	fReservation;
2627 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2628 
2629 	bool writableCopy
2630 		= (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2631 	uint8* targetPageProtections = NULL;
2632 
2633 	if (source->page_protections != NULL) {
2634 		size_t bytes = (source->Size() / B_PAGE_SIZE + 1) / 2;
2635 		targetPageProtections = (uint8*)malloc_etc(bytes,
2636 			HEAP_DONT_LOCK_KERNEL_SPACE);
2637 		if (targetPageProtections == NULL)
2638 			return B_NO_MEMORY;
2639 
2640 		memcpy(targetPageProtections, source->page_protections, bytes);
2641 
2642 		if (!writableCopy) {
2643 			for (size_t i = 0; i < bytes; i++) {
2644 				if ((targetPageProtections[i]
2645 						& (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) {
2646 					writableCopy = true;
2647 					break;
2648 				}
2649 			}
2650 		}
2651 	}
2652 
2653 	if (addressSpec == B_CLONE_ADDRESS) {
2654 		addressSpec = B_EXACT_ADDRESS;
2655 		*_address = (void*)source->Base();
2656 	}
2657 
2658 	// First, create a cache on top of the source area, respectively use the
2659 	// existing one, if this is a shared area.
2660 
2661 	VMArea* target;
2662 	virtual_address_restrictions addressRestrictions = {};
2663 	addressRestrictions.address = *_address;
2664 	addressRestrictions.address_specification = addressSpec;
2665 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2666 		name, source->Size(), source->wiring, source->protection,
2667 		source->protection_max,
2668 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2669 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2670 		&addressRestrictions, true, &target, _address);
2671 	if (status < B_OK) {
2672 		free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE);
2673 		return status;
2674 	}
2675 
2676 	if (targetPageProtections != NULL)
2677 		target->page_protections = targetPageProtections;
2678 
2679 	if (sharedArea) {
2680 		// The new area uses the old area's cache, but map_backing_store()
2681 		// hasn't acquired a ref. So we have to do that now.
2682 		cache->AcquireRefLocked();
2683 	}
2684 
2685 	// If the source area is writable, we need to move it one layer up as well
2686 
2687 	if (!sharedArea) {
2688 		if (writableCopy) {
2689 			// TODO: do something more useful if this fails!
2690 			if (vm_copy_on_write_area(cache,
2691 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2692 				panic("vm_copy_on_write_area() failed!\n");
2693 			}
2694 		}
2695 	}
2696 
2697 	// we return the ID of the newly created area
2698 	return target->id;
2699 }
2700 
2701 
2702 status_t
2703 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2704 	bool kernel)
2705 {
2706 	fix_protection(&newProtection);
2707 
2708 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2709 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2710 
2711 	if (!arch_vm_supports_protection(newProtection))
2712 		return B_NOT_SUPPORTED;
2713 
2714 	bool becomesWritable
2715 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2716 
2717 	// lock address spaces and cache
2718 	MultiAddressSpaceLocker locker;
2719 	VMCache* cache;
2720 	VMArea* area;
2721 	status_t status;
2722 	AreaCacheLocker cacheLocker;
2723 	bool isWritable;
2724 
2725 	bool restart;
2726 	do {
2727 		restart = false;
2728 
2729 		locker.Unset();
2730 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2731 		if (status != B_OK)
2732 			return status;
2733 
2734 		cacheLocker.SetTo(cache, true);	// already locked
2735 
2736 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
2737 				|| (area->protection & B_KERNEL_AREA) != 0)) {
2738 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2739 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2740 				" (%s)\n", team, newProtection, areaID, area->name);
2741 			return B_NOT_ALLOWED;
2742 		}
2743 		if (!kernel && area->protection_max != 0
2744 			&& (newProtection & area->protection_max)
2745 				!= (newProtection & B_USER_PROTECTION)) {
2746 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2747 				"set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel "
2748 				"area %" B_PRId32 " (%s)\n", team, newProtection,
2749 				area->protection_max, areaID, area->name);
2750 			return B_NOT_ALLOWED;
2751 		}
2752 
2753 		if (area->protection == newProtection)
2754 			return B_OK;
2755 
2756 		if (team != VMAddressSpace::KernelID()
2757 			&& area->address_space->ID() != team) {
2758 			// unless you're the kernel, you are only allowed to set
2759 			// the protection of your own areas
2760 			return B_NOT_ALLOWED;
2761 		}
2762 
2763 		isWritable
2764 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2765 
2766 		// Make sure the area (respectively, if we're going to call
2767 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2768 		// wired ranges.
2769 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2770 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2771 					otherArea = otherArea->cache_next) {
2772 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2773 					restart = true;
2774 					break;
2775 				}
2776 			}
2777 		} else {
2778 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2779 				restart = true;
2780 		}
2781 	} while (restart);
2782 
2783 	bool changePageProtection = true;
2784 	bool changeTopCachePagesOnly = false;
2785 
2786 	if (isWritable && !becomesWritable) {
2787 		// writable -> !writable
2788 
2789 		if (cache->source != NULL && cache->temporary) {
2790 			if (cache->CountWritableAreas(area) == 0) {
2791 				// Since this cache now lives from the pages in its source cache,
2792 				// we can change the cache's commitment to take only those pages
2793 				// into account that really are in this cache.
2794 
2795 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2796 					team == VMAddressSpace::KernelID()
2797 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2798 
2799 				// TODO: we may be able to join with our source cache, if
2800 				// count == 0
2801 			}
2802 		}
2803 
2804 		// If only the writability changes, we can just remap the pages of the
2805 		// top cache, since the pages of lower caches are mapped read-only
2806 		// anyway. That's advantageous only, if the number of pages in the cache
2807 		// is significantly smaller than the number of pages in the area,
2808 		// though.
2809 		if (newProtection
2810 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2811 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2812 			changeTopCachePagesOnly = true;
2813 		}
2814 	} else if (!isWritable && becomesWritable) {
2815 		// !writable -> writable
2816 
2817 		if (!cache->consumers.IsEmpty()) {
2818 			// There are consumers -- we have to insert a new cache. Fortunately
2819 			// vm_copy_on_write_area() does everything that's needed.
2820 			changePageProtection = false;
2821 			status = vm_copy_on_write_area(cache, NULL);
2822 		} else {
2823 			// No consumers, so we don't need to insert a new one.
2824 			if (cache->source != NULL && cache->temporary) {
2825 				// the cache's commitment must contain all possible pages
2826 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2827 					team == VMAddressSpace::KernelID()
2828 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2829 			}
2830 
2831 			if (status == B_OK && cache->source != NULL) {
2832 				// There's a source cache, hence we can't just change all pages'
2833 				// protection or we might allow writing into pages belonging to
2834 				// a lower cache.
2835 				changeTopCachePagesOnly = true;
2836 			}
2837 		}
2838 	} else {
2839 		// we don't have anything special to do in all other cases
2840 	}
2841 
2842 	if (status == B_OK) {
2843 		// remap existing pages in this cache
2844 		if (changePageProtection) {
2845 			VMTranslationMap* map = area->address_space->TranslationMap();
2846 			map->Lock();
2847 
2848 			if (changeTopCachePagesOnly) {
2849 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2850 				page_num_t lastPageOffset
2851 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2852 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2853 						vm_page* page = it.Next();) {
2854 					if (page->cache_offset >= firstPageOffset
2855 						&& page->cache_offset <= lastPageOffset) {
2856 						addr_t address = virtual_page_address(area, page);
2857 						map->ProtectPage(area, address, newProtection);
2858 					}
2859 				}
2860 			} else
2861 				map->ProtectArea(area, newProtection);
2862 
2863 			map->Unlock();
2864 		}
2865 
2866 		area->protection = newProtection;
2867 	}
2868 
2869 	return status;
2870 }
2871 
2872 
2873 status_t
2874 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2875 {
2876 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2877 	if (addressSpace == NULL)
2878 		return B_BAD_TEAM_ID;
2879 
2880 	VMTranslationMap* map = addressSpace->TranslationMap();
2881 
2882 	map->Lock();
2883 	uint32 dummyFlags;
2884 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2885 	map->Unlock();
2886 
2887 	addressSpace->Put();
2888 	return status;
2889 }
2890 
2891 
2892 /*!	The page's cache must be locked.
2893 */
2894 bool
2895 vm_test_map_modification(vm_page* page)
2896 {
2897 	if (page->modified)
2898 		return true;
2899 
2900 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2901 	vm_page_mapping* mapping;
2902 	while ((mapping = iterator.Next()) != NULL) {
2903 		VMArea* area = mapping->area;
2904 		VMTranslationMap* map = area->address_space->TranslationMap();
2905 
2906 		phys_addr_t physicalAddress;
2907 		uint32 flags;
2908 		map->Lock();
2909 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2910 		map->Unlock();
2911 
2912 		if ((flags & PAGE_MODIFIED) != 0)
2913 			return true;
2914 	}
2915 
2916 	return false;
2917 }
2918 
2919 
2920 /*!	The page's cache must be locked.
2921 */
2922 void
2923 vm_clear_map_flags(vm_page* page, uint32 flags)
2924 {
2925 	if ((flags & PAGE_ACCESSED) != 0)
2926 		page->accessed = false;
2927 	if ((flags & PAGE_MODIFIED) != 0)
2928 		page->modified = false;
2929 
2930 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2931 	vm_page_mapping* mapping;
2932 	while ((mapping = iterator.Next()) != NULL) {
2933 		VMArea* area = mapping->area;
2934 		VMTranslationMap* map = area->address_space->TranslationMap();
2935 
2936 		map->Lock();
2937 		map->ClearFlags(virtual_page_address(area, page), flags);
2938 		map->Unlock();
2939 	}
2940 }
2941 
2942 
2943 /*!	Removes all mappings from a page.
2944 	After you've called this function, the page is unmapped from memory and
2945 	the page's \c accessed and \c modified flags have been updated according
2946 	to the state of the mappings.
2947 	The page's cache must be locked.
2948 */
2949 void
2950 vm_remove_all_page_mappings(vm_page* page)
2951 {
2952 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2953 		VMArea* area = mapping->area;
2954 		VMTranslationMap* map = area->address_space->TranslationMap();
2955 		addr_t address = virtual_page_address(area, page);
2956 		map->UnmapPage(area, address, false);
2957 	}
2958 }
2959 
2960 
2961 int32
2962 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2963 {
2964 	int32 count = 0;
2965 
2966 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2967 	vm_page_mapping* mapping;
2968 	while ((mapping = iterator.Next()) != NULL) {
2969 		VMArea* area = mapping->area;
2970 		VMTranslationMap* map = area->address_space->TranslationMap();
2971 
2972 		bool modified;
2973 		if (map->ClearAccessedAndModified(area,
2974 				virtual_page_address(area, page), false, modified)) {
2975 			count++;
2976 		}
2977 
2978 		page->modified |= modified;
2979 	}
2980 
2981 
2982 	if (page->accessed) {
2983 		count++;
2984 		page->accessed = false;
2985 	}
2986 
2987 	return count;
2988 }
2989 
2990 
2991 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2992 	mappings.
2993 	The function iterates through the page mappings and removes them until
2994 	encountering one that has been accessed. From then on it will continue to
2995 	iterate, but only clear the accessed flag of the mapping. The page's
2996 	\c modified bit will be updated accordingly, the \c accessed bit will be
2997 	cleared.
2998 	\return The number of mapping accessed bits encountered, including the
2999 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
3000 		of the page have been removed.
3001 */
3002 int32
3003 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
3004 {
3005 	ASSERT(page->WiredCount() == 0);
3006 
3007 	if (page->accessed)
3008 		return vm_clear_page_mapping_accessed_flags(page);
3009 
3010 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3011 		VMArea* area = mapping->area;
3012 		VMTranslationMap* map = area->address_space->TranslationMap();
3013 		addr_t address = virtual_page_address(area, page);
3014 		bool modified = false;
3015 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
3016 			page->accessed = true;
3017 			page->modified |= modified;
3018 			return vm_clear_page_mapping_accessed_flags(page);
3019 		}
3020 		page->modified |= modified;
3021 	}
3022 
3023 	return 0;
3024 }
3025 
3026 
3027 static int
3028 display_mem(int argc, char** argv)
3029 {
3030 	bool physical = false;
3031 	addr_t copyAddress;
3032 	int32 displayWidth;
3033 	int32 itemSize;
3034 	int32 num = -1;
3035 	addr_t address;
3036 	int i = 1, j;
3037 
3038 	if (argc > 1 && argv[1][0] == '-') {
3039 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3040 			physical = true;
3041 			i++;
3042 		} else
3043 			i = 99;
3044 	}
3045 
3046 	if (argc < i + 1 || argc > i + 2) {
3047 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3048 			"\tdl - 8 bytes\n"
3049 			"\tdw - 4 bytes\n"
3050 			"\tds - 2 bytes\n"
3051 			"\tdb - 1 byte\n"
3052 			"\tstring - a whole string\n"
3053 			"  -p or --physical only allows memory from a single page to be "
3054 			"displayed.\n");
3055 		return 0;
3056 	}
3057 
3058 	address = parse_expression(argv[i]);
3059 
3060 	if (argc > i + 1)
3061 		num = parse_expression(argv[i + 1]);
3062 
3063 	// build the format string
3064 	if (strcmp(argv[0], "db") == 0) {
3065 		itemSize = 1;
3066 		displayWidth = 16;
3067 	} else if (strcmp(argv[0], "ds") == 0) {
3068 		itemSize = 2;
3069 		displayWidth = 8;
3070 	} else if (strcmp(argv[0], "dw") == 0) {
3071 		itemSize = 4;
3072 		displayWidth = 4;
3073 	} else if (strcmp(argv[0], "dl") == 0) {
3074 		itemSize = 8;
3075 		displayWidth = 2;
3076 	} else if (strcmp(argv[0], "string") == 0) {
3077 		itemSize = 1;
3078 		displayWidth = -1;
3079 	} else {
3080 		kprintf("display_mem called in an invalid way!\n");
3081 		return 0;
3082 	}
3083 
3084 	if (num <= 0)
3085 		num = displayWidth;
3086 
3087 	void* physicalPageHandle = NULL;
3088 
3089 	if (physical) {
3090 		int32 offset = address & (B_PAGE_SIZE - 1);
3091 		if (num * itemSize + offset > B_PAGE_SIZE) {
3092 			num = (B_PAGE_SIZE - offset) / itemSize;
3093 			kprintf("NOTE: number of bytes has been cut to page size\n");
3094 		}
3095 
3096 		address = ROUNDDOWN(address, B_PAGE_SIZE);
3097 
3098 		if (vm_get_physical_page_debug(address, &copyAddress,
3099 				&physicalPageHandle) != B_OK) {
3100 			kprintf("getting the hardware page failed.");
3101 			return 0;
3102 		}
3103 
3104 		address += offset;
3105 		copyAddress += offset;
3106 	} else
3107 		copyAddress = address;
3108 
3109 	if (!strcmp(argv[0], "string")) {
3110 		kprintf("%p \"", (char*)copyAddress);
3111 
3112 		// string mode
3113 		for (i = 0; true; i++) {
3114 			char c;
3115 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
3116 					!= B_OK
3117 				|| c == '\0') {
3118 				break;
3119 			}
3120 
3121 			if (c == '\n')
3122 				kprintf("\\n");
3123 			else if (c == '\t')
3124 				kprintf("\\t");
3125 			else {
3126 				if (!isprint(c))
3127 					c = '.';
3128 
3129 				kprintf("%c", c);
3130 			}
3131 		}
3132 
3133 		kprintf("\"\n");
3134 	} else {
3135 		// number mode
3136 		for (i = 0; i < num; i++) {
3137 			uint64 value;
3138 
3139 			if ((i % displayWidth) == 0) {
3140 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3141 				if (i != 0)
3142 					kprintf("\n");
3143 
3144 				kprintf("[0x%lx]  ", address + i * itemSize);
3145 
3146 				for (j = 0; j < displayed; j++) {
3147 					char c;
3148 					if (debug_memcpy(B_CURRENT_TEAM, &c,
3149 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
3150 						displayed = j;
3151 						break;
3152 					}
3153 					if (!isprint(c))
3154 						c = '.';
3155 
3156 					kprintf("%c", c);
3157 				}
3158 				if (num > displayWidth) {
3159 					// make sure the spacing in the last line is correct
3160 					for (j = displayed; j < displayWidth * itemSize; j++)
3161 						kprintf(" ");
3162 				}
3163 				kprintf("  ");
3164 			}
3165 
3166 			if (debug_memcpy(B_CURRENT_TEAM, &value,
3167 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
3168 				kprintf("read fault");
3169 				break;
3170 			}
3171 
3172 			switch (itemSize) {
3173 				case 1:
3174 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
3175 					break;
3176 				case 2:
3177 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3178 					break;
3179 				case 4:
3180 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3181 					break;
3182 				case 8:
3183 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3184 					break;
3185 			}
3186 		}
3187 
3188 		kprintf("\n");
3189 	}
3190 
3191 	if (physical) {
3192 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3193 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3194 	}
3195 	return 0;
3196 }
3197 
3198 
3199 static void
3200 dump_cache_tree_recursively(VMCache* cache, int level,
3201 	VMCache* highlightCache)
3202 {
3203 	// print this cache
3204 	for (int i = 0; i < level; i++)
3205 		kprintf("  ");
3206 	if (cache == highlightCache)
3207 		kprintf("%p <--\n", cache);
3208 	else
3209 		kprintf("%p\n", cache);
3210 
3211 	// recursively print its consumers
3212 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3213 			VMCache* consumer = it.Next();) {
3214 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3215 	}
3216 }
3217 
3218 
3219 static int
3220 dump_cache_tree(int argc, char** argv)
3221 {
3222 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3223 		kprintf("usage: %s <address>\n", argv[0]);
3224 		return 0;
3225 	}
3226 
3227 	addr_t address = parse_expression(argv[1]);
3228 	if (address == 0)
3229 		return 0;
3230 
3231 	VMCache* cache = (VMCache*)address;
3232 	VMCache* root = cache;
3233 
3234 	// find the root cache (the transitive source)
3235 	while (root->source != NULL)
3236 		root = root->source;
3237 
3238 	dump_cache_tree_recursively(root, 0, cache);
3239 
3240 	return 0;
3241 }
3242 
3243 
3244 const char*
3245 vm_cache_type_to_string(int32 type)
3246 {
3247 	switch (type) {
3248 		case CACHE_TYPE_RAM:
3249 			return "RAM";
3250 		case CACHE_TYPE_DEVICE:
3251 			return "device";
3252 		case CACHE_TYPE_VNODE:
3253 			return "vnode";
3254 		case CACHE_TYPE_NULL:
3255 			return "null";
3256 
3257 		default:
3258 			return "unknown";
3259 	}
3260 }
3261 
3262 
3263 #if DEBUG_CACHE_LIST
3264 
3265 static void
3266 update_cache_info_recursively(VMCache* cache, cache_info& info)
3267 {
3268 	info.page_count += cache->page_count;
3269 	if (cache->type == CACHE_TYPE_RAM)
3270 		info.committed += cache->committed_size;
3271 
3272 	// recurse
3273 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3274 			VMCache* consumer = it.Next();) {
3275 		update_cache_info_recursively(consumer, info);
3276 	}
3277 }
3278 
3279 
3280 static int
3281 cache_info_compare_page_count(const void* _a, const void* _b)
3282 {
3283 	const cache_info* a = (const cache_info*)_a;
3284 	const cache_info* b = (const cache_info*)_b;
3285 	if (a->page_count == b->page_count)
3286 		return 0;
3287 	return a->page_count < b->page_count ? 1 : -1;
3288 }
3289 
3290 
3291 static int
3292 cache_info_compare_committed(const void* _a, const void* _b)
3293 {
3294 	const cache_info* a = (const cache_info*)_a;
3295 	const cache_info* b = (const cache_info*)_b;
3296 	if (a->committed == b->committed)
3297 		return 0;
3298 	return a->committed < b->committed ? 1 : -1;
3299 }
3300 
3301 
3302 static void
3303 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3304 {
3305 	for (int i = 0; i < level; i++)
3306 		kprintf("  ");
3307 
3308 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3309 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3310 		cache->virtual_base, cache->virtual_end, cache->page_count);
3311 
3312 	if (level == 0)
3313 		kprintf("/%lu", info.page_count);
3314 
3315 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3316 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3317 
3318 		if (level == 0)
3319 			kprintf("/%lu", info.committed);
3320 	}
3321 
3322 	// areas
3323 	if (cache->areas != NULL) {
3324 		VMArea* area = cache->areas;
3325 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3326 			area->name, area->address_space->ID());
3327 
3328 		while (area->cache_next != NULL) {
3329 			area = area->cache_next;
3330 			kprintf(", %" B_PRId32, area->id);
3331 		}
3332 	}
3333 
3334 	kputs("\n");
3335 
3336 	// recurse
3337 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3338 			VMCache* consumer = it.Next();) {
3339 		dump_caches_recursively(consumer, info, level + 1);
3340 	}
3341 }
3342 
3343 
3344 static int
3345 dump_caches(int argc, char** argv)
3346 {
3347 	if (sCacheInfoTable == NULL) {
3348 		kprintf("No cache info table!\n");
3349 		return 0;
3350 	}
3351 
3352 	bool sortByPageCount = true;
3353 
3354 	for (int32 i = 1; i < argc; i++) {
3355 		if (strcmp(argv[i], "-c") == 0) {
3356 			sortByPageCount = false;
3357 		} else {
3358 			print_debugger_command_usage(argv[0]);
3359 			return 0;
3360 		}
3361 	}
3362 
3363 	uint32 totalCount = 0;
3364 	uint32 rootCount = 0;
3365 	off_t totalCommitted = 0;
3366 	page_num_t totalPages = 0;
3367 
3368 	VMCache* cache = gDebugCacheList;
3369 	while (cache) {
3370 		totalCount++;
3371 		if (cache->source == NULL) {
3372 			cache_info stackInfo;
3373 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3374 				? sCacheInfoTable[rootCount] : stackInfo;
3375 			rootCount++;
3376 			info.cache = cache;
3377 			info.page_count = 0;
3378 			info.committed = 0;
3379 			update_cache_info_recursively(cache, info);
3380 			totalCommitted += info.committed;
3381 			totalPages += info.page_count;
3382 		}
3383 
3384 		cache = cache->debug_next;
3385 	}
3386 
3387 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3388 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3389 			sortByPageCount
3390 				? &cache_info_compare_page_count
3391 				: &cache_info_compare_committed);
3392 	}
3393 
3394 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3395 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3396 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3397 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3398 			"page count" : "committed size");
3399 
3400 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3401 		for (uint32 i = 0; i < rootCount; i++) {
3402 			cache_info& info = sCacheInfoTable[i];
3403 			dump_caches_recursively(info.cache, info, 0);
3404 		}
3405 	} else
3406 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3407 
3408 	return 0;
3409 }
3410 
3411 #endif	// DEBUG_CACHE_LIST
3412 
3413 
3414 static int
3415 dump_cache(int argc, char** argv)
3416 {
3417 	VMCache* cache;
3418 	bool showPages = false;
3419 	int i = 1;
3420 
3421 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3422 		kprintf("usage: %s [-ps] <address>\n"
3423 			"  if -p is specified, all pages are shown, if -s is used\n"
3424 			"  only the cache info is shown respectively.\n", argv[0]);
3425 		return 0;
3426 	}
3427 	while (argv[i][0] == '-') {
3428 		char* arg = argv[i] + 1;
3429 		while (arg[0]) {
3430 			if (arg[0] == 'p')
3431 				showPages = true;
3432 			arg++;
3433 		}
3434 		i++;
3435 	}
3436 	if (argv[i] == NULL) {
3437 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3438 		return 0;
3439 	}
3440 
3441 	addr_t address = parse_expression(argv[i]);
3442 	if (address == 0)
3443 		return 0;
3444 
3445 	cache = (VMCache*)address;
3446 
3447 	cache->Dump(showPages);
3448 
3449 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3450 
3451 	return 0;
3452 }
3453 
3454 
3455 static void
3456 dump_area_struct(VMArea* area, bool mappings)
3457 {
3458 	kprintf("AREA: %p\n", area);
3459 	kprintf("name:\t\t'%s'\n", area->name);
3460 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3461 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3462 	kprintf("base:\t\t0x%lx\n", area->Base());
3463 	kprintf("size:\t\t0x%lx\n", area->Size());
3464 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3465 	kprintf("page_protection:%p\n", area->page_protections);
3466 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3467 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3468 	kprintf("cache:\t\t%p\n", area->cache);
3469 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3470 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3471 	kprintf("cache_next:\t%p\n", area->cache_next);
3472 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3473 
3474 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3475 	if (mappings) {
3476 		kprintf("page mappings:\n");
3477 		while (iterator.HasNext()) {
3478 			vm_page_mapping* mapping = iterator.Next();
3479 			kprintf("  %p", mapping->page);
3480 		}
3481 		kprintf("\n");
3482 	} else {
3483 		uint32 count = 0;
3484 		while (iterator.Next() != NULL) {
3485 			count++;
3486 		}
3487 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3488 	}
3489 }
3490 
3491 
3492 static int
3493 dump_area(int argc, char** argv)
3494 {
3495 	bool mappings = false;
3496 	bool found = false;
3497 	int32 index = 1;
3498 	VMArea* area;
3499 	addr_t num;
3500 
3501 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3502 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3503 			"All areas matching either id/address/name are listed. You can\n"
3504 			"force to check only a specific item by prefixing the specifier\n"
3505 			"with the id/contains/address/name keywords.\n"
3506 			"-m shows the area's mappings as well.\n");
3507 		return 0;
3508 	}
3509 
3510 	if (!strcmp(argv[1], "-m")) {
3511 		mappings = true;
3512 		index++;
3513 	}
3514 
3515 	int32 mode = 0xf;
3516 	if (!strcmp(argv[index], "id"))
3517 		mode = 1;
3518 	else if (!strcmp(argv[index], "contains"))
3519 		mode = 2;
3520 	else if (!strcmp(argv[index], "name"))
3521 		mode = 4;
3522 	else if (!strcmp(argv[index], "address"))
3523 		mode = 0;
3524 	if (mode != 0xf)
3525 		index++;
3526 
3527 	if (index >= argc) {
3528 		kprintf("No area specifier given.\n");
3529 		return 0;
3530 	}
3531 
3532 	num = parse_expression(argv[index]);
3533 
3534 	if (mode == 0) {
3535 		dump_area_struct((struct VMArea*)num, mappings);
3536 	} else {
3537 		// walk through the area list, looking for the arguments as a name
3538 
3539 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3540 		while ((area = it.Next()) != NULL) {
3541 			if (((mode & 4) != 0
3542 					&& !strcmp(argv[index], area->name))
3543 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3544 					|| (((mode & 2) != 0 && area->Base() <= num
3545 						&& area->Base() + area->Size() > num))))) {
3546 				dump_area_struct(area, mappings);
3547 				found = true;
3548 			}
3549 		}
3550 
3551 		if (!found)
3552 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3553 	}
3554 
3555 	return 0;
3556 }
3557 
3558 
3559 static int
3560 dump_area_list(int argc, char** argv)
3561 {
3562 	VMArea* area;
3563 	const char* name = NULL;
3564 	int32 id = 0;
3565 
3566 	if (argc > 1) {
3567 		id = parse_expression(argv[1]);
3568 		if (id == 0)
3569 			name = argv[1];
3570 	}
3571 
3572 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3573 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3574 		B_PRINTF_POINTER_WIDTH, "size");
3575 
3576 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3577 	while ((area = it.Next()) != NULL) {
3578 		if ((id != 0 && area->address_space->ID() != id)
3579 			|| (name != NULL && strstr(area->name, name) == NULL))
3580 			continue;
3581 
3582 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3583 			area->id, (void*)area->Base(), (void*)area->Size(),
3584 			area->protection, area->wiring, area->name);
3585 	}
3586 	return 0;
3587 }
3588 
3589 
3590 static int
3591 dump_available_memory(int argc, char** argv)
3592 {
3593 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3594 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3595 	return 0;
3596 }
3597 
3598 
3599 static int
3600 dump_mapping_info(int argc, char** argv)
3601 {
3602 	bool reverseLookup = false;
3603 	bool pageLookup = false;
3604 
3605 	int argi = 1;
3606 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3607 		const char* arg = argv[argi];
3608 		if (strcmp(arg, "-r") == 0) {
3609 			reverseLookup = true;
3610 		} else if (strcmp(arg, "-p") == 0) {
3611 			reverseLookup = true;
3612 			pageLookup = true;
3613 		} else {
3614 			print_debugger_command_usage(argv[0]);
3615 			return 0;
3616 		}
3617 	}
3618 
3619 	// We need at least one argument, the address. Optionally a thread ID can be
3620 	// specified.
3621 	if (argi >= argc || argi + 2 < argc) {
3622 		print_debugger_command_usage(argv[0]);
3623 		return 0;
3624 	}
3625 
3626 	uint64 addressValue;
3627 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3628 		return 0;
3629 
3630 	Team* team = NULL;
3631 	if (argi < argc) {
3632 		uint64 threadID;
3633 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3634 			return 0;
3635 
3636 		Thread* thread = Thread::GetDebug(threadID);
3637 		if (thread == NULL) {
3638 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3639 			return 0;
3640 		}
3641 
3642 		team = thread->team;
3643 	}
3644 
3645 	if (reverseLookup) {
3646 		phys_addr_t physicalAddress;
3647 		if (pageLookup) {
3648 			vm_page* page = (vm_page*)(addr_t)addressValue;
3649 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3650 		} else {
3651 			physicalAddress = (phys_addr_t)addressValue;
3652 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3653 		}
3654 
3655 		kprintf("    Team     Virtual Address      Area\n");
3656 		kprintf("--------------------------------------\n");
3657 
3658 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3659 			Callback()
3660 				:
3661 				fAddressSpace(NULL)
3662 			{
3663 			}
3664 
3665 			void SetAddressSpace(VMAddressSpace* addressSpace)
3666 			{
3667 				fAddressSpace = addressSpace;
3668 			}
3669 
3670 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3671 			{
3672 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3673 					virtualAddress);
3674 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3675 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3676 				else
3677 					kprintf("\n");
3678 				return false;
3679 			}
3680 
3681 		private:
3682 			VMAddressSpace*	fAddressSpace;
3683 		} callback;
3684 
3685 		if (team != NULL) {
3686 			// team specified -- get its address space
3687 			VMAddressSpace* addressSpace = team->address_space;
3688 			if (addressSpace == NULL) {
3689 				kprintf("Failed to get address space!\n");
3690 				return 0;
3691 			}
3692 
3693 			callback.SetAddressSpace(addressSpace);
3694 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3695 				physicalAddress, callback);
3696 		} else {
3697 			// no team specified -- iterate through all address spaces
3698 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3699 				addressSpace != NULL;
3700 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3701 				callback.SetAddressSpace(addressSpace);
3702 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3703 					physicalAddress, callback);
3704 			}
3705 		}
3706 	} else {
3707 		// get the address space
3708 		addr_t virtualAddress = (addr_t)addressValue;
3709 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3710 		VMAddressSpace* addressSpace;
3711 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3712 			addressSpace = VMAddressSpace::Kernel();
3713 		} else if (team != NULL) {
3714 			addressSpace = team->address_space;
3715 		} else {
3716 			Thread* thread = debug_get_debugged_thread();
3717 			if (thread == NULL || thread->team == NULL) {
3718 				kprintf("Failed to get team!\n");
3719 				return 0;
3720 			}
3721 
3722 			addressSpace = thread->team->address_space;
3723 		}
3724 
3725 		if (addressSpace == NULL) {
3726 			kprintf("Failed to get address space!\n");
3727 			return 0;
3728 		}
3729 
3730 		// let the translation map implementation do the job
3731 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3732 	}
3733 
3734 	return 0;
3735 }
3736 
3737 
3738 /*!	Deletes all areas and reserved regions in the given address space.
3739 
3740 	The caller must ensure that none of the areas has any wired ranges.
3741 
3742 	\param addressSpace The address space.
3743 	\param deletingAddressSpace \c true, if the address space is in the process
3744 		of being deleted.
3745 */
3746 void
3747 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3748 {
3749 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3750 		addressSpace->ID()));
3751 
3752 	addressSpace->WriteLock();
3753 
3754 	// remove all reserved areas in this address space
3755 	addressSpace->UnreserveAllAddressRanges(0);
3756 
3757 	// delete all the areas in this address space
3758 	while (VMArea* area = addressSpace->FirstArea()) {
3759 		ASSERT(!area->IsWired());
3760 		delete_area(addressSpace, area, deletingAddressSpace);
3761 	}
3762 
3763 	addressSpace->WriteUnlock();
3764 }
3765 
3766 
3767 static area_id
3768 vm_area_for(addr_t address, bool kernel)
3769 {
3770 	team_id team;
3771 	if (IS_USER_ADDRESS(address)) {
3772 		// we try the user team address space, if any
3773 		team = VMAddressSpace::CurrentID();
3774 		if (team < 0)
3775 			return team;
3776 	} else
3777 		team = VMAddressSpace::KernelID();
3778 
3779 	AddressSpaceReadLocker locker(team);
3780 	if (!locker.IsLocked())
3781 		return B_BAD_TEAM_ID;
3782 
3783 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3784 	if (area != NULL) {
3785 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3786 			return B_ERROR;
3787 
3788 		return area->id;
3789 	}
3790 
3791 	return B_ERROR;
3792 }
3793 
3794 
3795 /*!	Frees physical pages that were used during the boot process.
3796 	\a end is inclusive.
3797 */
3798 static void
3799 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3800 {
3801 	// free all physical pages in the specified range
3802 
3803 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3804 		phys_addr_t physicalAddress;
3805 		uint32 flags;
3806 
3807 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3808 			&& (flags & PAGE_PRESENT) != 0) {
3809 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3810 			if (page != NULL && page->State() != PAGE_STATE_FREE
3811 					&& page->State() != PAGE_STATE_CLEAR
3812 					&& page->State() != PAGE_STATE_UNUSED) {
3813 				DEBUG_PAGE_ACCESS_START(page);
3814 				vm_page_set_state(page, PAGE_STATE_FREE);
3815 			}
3816 		}
3817 	}
3818 
3819 	// unmap the memory
3820 	map->Unmap(start, end);
3821 }
3822 
3823 
3824 void
3825 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3826 {
3827 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3828 	addr_t end = start + (size - 1);
3829 	addr_t lastEnd = start;
3830 
3831 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3832 		(void*)start, (void*)end));
3833 
3834 	// The areas are sorted in virtual address space order, so
3835 	// we just have to find the holes between them that fall
3836 	// into the area we should dispose
3837 
3838 	map->Lock();
3839 
3840 	for (VMAddressSpace::AreaIterator it
3841 				= VMAddressSpace::Kernel()->GetAreaIterator();
3842 			VMArea* area = it.Next();) {
3843 		addr_t areaStart = area->Base();
3844 		addr_t areaEnd = areaStart + (area->Size() - 1);
3845 
3846 		if (areaEnd < start)
3847 			continue;
3848 
3849 		if (areaStart > end) {
3850 			// we are done, the area is already beyond of what we have to free
3851 			break;
3852 		}
3853 
3854 		if (areaStart > lastEnd) {
3855 			// this is something we can free
3856 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3857 				(void*)areaStart));
3858 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3859 		}
3860 
3861 		if (areaEnd >= end) {
3862 			lastEnd = areaEnd;
3863 				// no +1 to prevent potential overflow
3864 			break;
3865 		}
3866 
3867 		lastEnd = areaEnd + 1;
3868 	}
3869 
3870 	if (lastEnd < end) {
3871 		// we can also get rid of some space at the end of the area
3872 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3873 			(void*)end));
3874 		unmap_and_free_physical_pages(map, lastEnd, end);
3875 	}
3876 
3877 	map->Unlock();
3878 }
3879 
3880 
3881 static void
3882 create_preloaded_image_areas(struct preloaded_image* _image)
3883 {
3884 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3885 	char name[B_OS_NAME_LENGTH];
3886 	void* address;
3887 	int32 length;
3888 
3889 	// use file name to create a good area name
3890 	char* fileName = strrchr(image->name, '/');
3891 	if (fileName == NULL)
3892 		fileName = image->name;
3893 	else
3894 		fileName++;
3895 
3896 	length = strlen(fileName);
3897 	// make sure there is enough space for the suffix
3898 	if (length > 25)
3899 		length = 25;
3900 
3901 	memcpy(name, fileName, length);
3902 	strcpy(name + length, "_text");
3903 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3904 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3905 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3906 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3907 		// this will later be remapped read-only/executable by the
3908 		// ELF initialization code
3909 
3910 	strcpy(name + length, "_data");
3911 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3912 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3913 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3914 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3915 }
3916 
3917 
3918 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3919 	Any boot loader resources contained in that arguments must not be accessed
3920 	anymore past this point.
3921 */
3922 void
3923 vm_free_kernel_args(kernel_args* args)
3924 {
3925 	uint32 i;
3926 
3927 	TRACE(("vm_free_kernel_args()\n"));
3928 
3929 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3930 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3931 		if (area >= B_OK)
3932 			delete_area(area);
3933 	}
3934 }
3935 
3936 
3937 static void
3938 allocate_kernel_args(kernel_args* args)
3939 {
3940 	TRACE(("allocate_kernel_args()\n"));
3941 
3942 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3943 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3944 
3945 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3946 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3947 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3948 	}
3949 }
3950 
3951 
3952 static void
3953 unreserve_boot_loader_ranges(kernel_args* args)
3954 {
3955 	TRACE(("unreserve_boot_loader_ranges()\n"));
3956 
3957 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3958 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3959 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3960 			args->virtual_allocated_range[i].size);
3961 	}
3962 }
3963 
3964 
3965 static void
3966 reserve_boot_loader_ranges(kernel_args* args)
3967 {
3968 	TRACE(("reserve_boot_loader_ranges()\n"));
3969 
3970 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3971 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3972 
3973 		// If the address is no kernel address, we just skip it. The
3974 		// architecture specific code has to deal with it.
3975 		if (!IS_KERNEL_ADDRESS(address)) {
3976 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3977 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3978 			continue;
3979 		}
3980 
3981 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3982 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3983 		if (status < B_OK)
3984 			panic("could not reserve boot loader ranges\n");
3985 	}
3986 }
3987 
3988 
3989 static addr_t
3990 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3991 {
3992 	size = PAGE_ALIGN(size);
3993 
3994 	// find a slot in the virtual allocation addr range
3995 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3996 		// check to see if the space between this one and the last is big enough
3997 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3998 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3999 			+ args->virtual_allocated_range[i - 1].size;
4000 
4001 		addr_t base = alignment > 0
4002 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
4003 
4004 		if (base >= KERNEL_BASE && base < rangeStart
4005 				&& rangeStart - base >= size) {
4006 			args->virtual_allocated_range[i - 1].size
4007 				+= base + size - previousRangeEnd;
4008 			return base;
4009 		}
4010 	}
4011 
4012 	// we hadn't found one between allocation ranges. this is ok.
4013 	// see if there's a gap after the last one
4014 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
4015 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
4016 		+ args->virtual_allocated_range[lastEntryIndex].size;
4017 	addr_t base = alignment > 0
4018 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
4019 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
4020 		args->virtual_allocated_range[lastEntryIndex].size
4021 			+= base + size - lastRangeEnd;
4022 		return base;
4023 	}
4024 
4025 	// see if there's a gap before the first one
4026 	addr_t rangeStart = args->virtual_allocated_range[0].start;
4027 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
4028 		base = rangeStart - size;
4029 		if (alignment > 0)
4030 			base = ROUNDDOWN(base, alignment);
4031 
4032 		if (base >= KERNEL_BASE) {
4033 			args->virtual_allocated_range[0].start = base;
4034 			args->virtual_allocated_range[0].size += rangeStart - base;
4035 			return base;
4036 		}
4037 	}
4038 
4039 	return 0;
4040 }
4041 
4042 
4043 static bool
4044 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
4045 {
4046 	// TODO: horrible brute-force method of determining if the page can be
4047 	// allocated
4048 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4049 		if (address >= args->physical_memory_range[i].start
4050 			&& address < args->physical_memory_range[i].start
4051 				+ args->physical_memory_range[i].size)
4052 			return true;
4053 	}
4054 	return false;
4055 }
4056 
4057 
4058 page_num_t
4059 vm_allocate_early_physical_page(kernel_args* args)
4060 {
4061 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4062 		phys_addr_t nextPage;
4063 
4064 		nextPage = args->physical_allocated_range[i].start
4065 			+ args->physical_allocated_range[i].size;
4066 		// see if the page after the next allocated paddr run can be allocated
4067 		if (i + 1 < args->num_physical_allocated_ranges
4068 			&& args->physical_allocated_range[i + 1].size != 0) {
4069 			// see if the next page will collide with the next allocated range
4070 			if (nextPage >= args->physical_allocated_range[i+1].start)
4071 				continue;
4072 		}
4073 		// see if the next physical page fits in the memory block
4074 		if (is_page_in_physical_memory_range(args, nextPage)) {
4075 			// we got one!
4076 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4077 			return nextPage / B_PAGE_SIZE;
4078 		}
4079 	}
4080 
4081 	// Expanding upwards didn't work, try going downwards.
4082 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4083 		phys_addr_t nextPage;
4084 
4085 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
4086 		// see if the page after the prev allocated paddr run can be allocated
4087 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
4088 			// see if the next page will collide with the next allocated range
4089 			if (nextPage < args->physical_allocated_range[i-1].start
4090 				+ args->physical_allocated_range[i-1].size)
4091 				continue;
4092 		}
4093 		// see if the next physical page fits in the memory block
4094 		if (is_page_in_physical_memory_range(args, nextPage)) {
4095 			// we got one!
4096 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
4097 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4098 			return nextPage / B_PAGE_SIZE;
4099 		}
4100 	}
4101 
4102 	return 0;
4103 		// could not allocate a block
4104 }
4105 
4106 
4107 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4108 	allocate some pages before the VM is completely up.
4109 */
4110 addr_t
4111 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4112 	uint32 attributes, addr_t alignment)
4113 {
4114 	if (physicalSize > virtualSize)
4115 		physicalSize = virtualSize;
4116 
4117 	// find the vaddr to allocate at
4118 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
4119 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
4120 	if (virtualBase == 0) {
4121 		panic("vm_allocate_early: could not allocate virtual address\n");
4122 		return 0;
4123 	}
4124 
4125 	// map the pages
4126 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4127 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
4128 		if (physicalAddress == 0)
4129 			panic("error allocating early page!\n");
4130 
4131 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4132 
4133 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
4134 			physicalAddress * B_PAGE_SIZE, attributes,
4135 			&vm_allocate_early_physical_page);
4136 	}
4137 
4138 	return virtualBase;
4139 }
4140 
4141 
4142 /*!	The main entrance point to initialize the VM. */
4143 status_t
4144 vm_init(kernel_args* args)
4145 {
4146 	struct preloaded_image* image;
4147 	void* address;
4148 	status_t err = 0;
4149 	uint32 i;
4150 
4151 	TRACE(("vm_init: entry\n"));
4152 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
4153 	err = arch_vm_init(args);
4154 
4155 	// initialize some globals
4156 	vm_page_init_num_pages(args);
4157 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4158 
4159 	slab_init(args);
4160 
4161 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4162 	off_t heapSize = INITIAL_HEAP_SIZE;
4163 	// try to accomodate low memory systems
4164 	while (heapSize > sAvailableMemory / 8)
4165 		heapSize /= 2;
4166 	if (heapSize < 1024 * 1024)
4167 		panic("vm_init: go buy some RAM please.");
4168 
4169 	// map in the new heap and initialize it
4170 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4171 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
4172 	TRACE(("heap at 0x%lx\n", heapBase));
4173 	heap_init(heapBase, heapSize);
4174 #endif
4175 
4176 	// initialize the free page list and physical page mapper
4177 	vm_page_init(args);
4178 
4179 	// initialize the cache allocators
4180 	vm_cache_init(args);
4181 
4182 	{
4183 		status_t error = VMAreaHash::Init();
4184 		if (error != B_OK)
4185 			panic("vm_init: error initializing area hash table\n");
4186 	}
4187 
4188 	VMAddressSpace::Init();
4189 	reserve_boot_loader_ranges(args);
4190 
4191 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4192 	heap_init_post_area();
4193 #endif
4194 
4195 	// Do any further initialization that the architecture dependant layers may
4196 	// need now
4197 	arch_vm_translation_map_init_post_area(args);
4198 	arch_vm_init_post_area(args);
4199 	vm_page_init_post_area(args);
4200 	slab_init_post_area();
4201 
4202 	// allocate areas to represent stuff that already exists
4203 
4204 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4205 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4206 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4207 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4208 #endif
4209 
4210 	allocate_kernel_args(args);
4211 
4212 	create_preloaded_image_areas(args->kernel_image);
4213 
4214 	// allocate areas for preloaded images
4215 	for (image = args->preloaded_images; image != NULL; image = image->next)
4216 		create_preloaded_image_areas(image);
4217 
4218 	// allocate kernel stacks
4219 	for (i = 0; i < args->num_cpus; i++) {
4220 		char name[64];
4221 
4222 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4223 		address = (void*)args->cpu_kstack[i].start;
4224 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4225 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4226 	}
4227 
4228 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4229 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4230 
4231 #if PARANOID_KERNEL_MALLOC
4232 	vm_block_address_range("uninitialized heap memory",
4233 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4234 #endif
4235 #if PARANOID_KERNEL_FREE
4236 	vm_block_address_range("freed heap memory",
4237 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4238 #endif
4239 
4240 	// create the object cache for the page mappings
4241 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4242 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4243 		NULL, NULL);
4244 	if (gPageMappingsObjectCache == NULL)
4245 		panic("failed to create page mappings object cache");
4246 
4247 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4248 
4249 #if DEBUG_CACHE_LIST
4250 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4251 		virtual_address_restrictions virtualRestrictions = {};
4252 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4253 		physical_address_restrictions physicalRestrictions = {};
4254 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4255 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4256 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4257 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4258 			&physicalRestrictions, (void**)&sCacheInfoTable);
4259 	}
4260 #endif	// DEBUG_CACHE_LIST
4261 
4262 	// add some debugger commands
4263 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4264 	add_debugger_command("area", &dump_area,
4265 		"Dump info about a particular area");
4266 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4267 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4268 #if DEBUG_CACHE_LIST
4269 	if (sCacheInfoTable != NULL) {
4270 		add_debugger_command_etc("caches", &dump_caches,
4271 			"List all VMCache trees",
4272 			"[ \"-c\" ]\n"
4273 			"All cache trees are listed sorted in decreasing order by number "
4274 				"of\n"
4275 			"used pages or, if \"-c\" is specified, by size of committed "
4276 				"memory.\n",
4277 			0);
4278 	}
4279 #endif
4280 	add_debugger_command("avail", &dump_available_memory,
4281 		"Dump available memory");
4282 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4283 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4284 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4285 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4286 	add_debugger_command("string", &display_mem, "dump strings");
4287 
4288 	add_debugger_command_etc("mapping", &dump_mapping_info,
4289 		"Print address mapping information",
4290 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4291 		"Prints low-level page mapping information for a given address. If\n"
4292 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4293 		"address that is looked up in the translation map of the current\n"
4294 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4295 		"\"-r\" is specified, <address> is a physical address that is\n"
4296 		"searched in the translation map of all teams, respectively the team\n"
4297 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4298 		"<address> is the address of a vm_page structure. The behavior is\n"
4299 		"equivalent to specifying \"-r\" with the physical address of that\n"
4300 		"page.\n",
4301 		0);
4302 
4303 	TRACE(("vm_init: exit\n"));
4304 
4305 	vm_cache_init_post_heap();
4306 
4307 	return err;
4308 }
4309 
4310 
4311 status_t
4312 vm_init_post_sem(kernel_args* args)
4313 {
4314 	// This frees all unused boot loader resources and makes its space available
4315 	// again
4316 	arch_vm_init_end(args);
4317 	unreserve_boot_loader_ranges(args);
4318 
4319 	// fill in all of the semaphores that were not allocated before
4320 	// since we're still single threaded and only the kernel address space
4321 	// exists, it isn't that hard to find all of the ones we need to create
4322 
4323 	arch_vm_translation_map_init_post_sem(args);
4324 
4325 	slab_init_post_sem();
4326 
4327 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4328 	heap_init_post_sem();
4329 #endif
4330 
4331 	return B_OK;
4332 }
4333 
4334 
4335 status_t
4336 vm_init_post_thread(kernel_args* args)
4337 {
4338 	vm_page_init_post_thread(args);
4339 	slab_init_post_thread();
4340 	return heap_init_post_thread();
4341 }
4342 
4343 
4344 status_t
4345 vm_init_post_modules(kernel_args* args)
4346 {
4347 	return arch_vm_init_post_modules(args);
4348 }
4349 
4350 
4351 void
4352 permit_page_faults(void)
4353 {
4354 	Thread* thread = thread_get_current_thread();
4355 	if (thread != NULL)
4356 		atomic_add(&thread->page_faults_allowed, 1);
4357 }
4358 
4359 
4360 void
4361 forbid_page_faults(void)
4362 {
4363 	Thread* thread = thread_get_current_thread();
4364 	if (thread != NULL)
4365 		atomic_add(&thread->page_faults_allowed, -1);
4366 }
4367 
4368 
4369 status_t
4370 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4371 	bool isUser, addr_t* newIP)
4372 {
4373 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4374 		faultAddress));
4375 
4376 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4377 
4378 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4379 	VMAddressSpace* addressSpace = NULL;
4380 
4381 	status_t status = B_OK;
4382 	*newIP = 0;
4383 	atomic_add((int32*)&sPageFaults, 1);
4384 
4385 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4386 		addressSpace = VMAddressSpace::GetKernel();
4387 	} else if (IS_USER_ADDRESS(pageAddress)) {
4388 		addressSpace = VMAddressSpace::GetCurrent();
4389 		if (addressSpace == NULL) {
4390 			if (!isUser) {
4391 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4392 					"memory!\n");
4393 				status = B_BAD_ADDRESS;
4394 				TPF(PageFaultError(-1,
4395 					VMPageFaultTracing
4396 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4397 			} else {
4398 				// XXX weird state.
4399 				panic("vm_page_fault: non kernel thread accessing user memory "
4400 					"that doesn't exist!\n");
4401 				status = B_BAD_ADDRESS;
4402 			}
4403 		}
4404 	} else {
4405 		// the hit was probably in the 64k DMZ between kernel and user space
4406 		// this keeps a user space thread from passing a buffer that crosses
4407 		// into kernel space
4408 		status = B_BAD_ADDRESS;
4409 		TPF(PageFaultError(-1,
4410 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4411 	}
4412 
4413 	if (status == B_OK) {
4414 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4415 			isUser, NULL);
4416 	}
4417 
4418 	if (status < B_OK) {
4419 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4420 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4421 			strerror(status), address, faultAddress, isWrite, isUser,
4422 			thread_get_current_thread_id());
4423 		if (!isUser) {
4424 			Thread* thread = thread_get_current_thread();
4425 			if (thread != NULL && thread->fault_handler != 0) {
4426 				// this will cause the arch dependant page fault handler to
4427 				// modify the IP on the interrupt frame or whatever to return
4428 				// to this address
4429 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4430 			} else {
4431 				// unhandled page fault in the kernel
4432 				panic("vm_page_fault: unhandled page fault in kernel space at "
4433 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4434 			}
4435 		} else {
4436 			Thread* thread = thread_get_current_thread();
4437 
4438 #ifdef TRACE_FAULTS
4439 			VMArea* area = NULL;
4440 			if (addressSpace != NULL) {
4441 				addressSpace->ReadLock();
4442 				area = addressSpace->LookupArea(faultAddress);
4443 			}
4444 
4445 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4446 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4447 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4448 				thread->team->Name(), thread->team->id,
4449 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4450 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4451 					area->Base() : 0x0));
4452 
4453 			if (addressSpace != NULL)
4454 				addressSpace->ReadUnlock();
4455 #endif
4456 
4457 			// If the thread has a signal handler for SIGSEGV, we simply
4458 			// send it the signal. Otherwise we notify the user debugger
4459 			// first.
4460 			struct sigaction action;
4461 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4462 					&& action.sa_handler != SIG_DFL
4463 					&& action.sa_handler != SIG_IGN)
4464 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4465 					SIGSEGV)) {
4466 				Signal signal(SIGSEGV,
4467 					status == B_PERMISSION_DENIED
4468 						? SEGV_ACCERR : SEGV_MAPERR,
4469 					EFAULT, thread->team->id);
4470 				signal.SetAddress((void*)address);
4471 				send_signal_to_thread(thread, signal, 0);
4472 			}
4473 		}
4474 	}
4475 
4476 	if (addressSpace != NULL)
4477 		addressSpace->Put();
4478 
4479 	return B_HANDLED_INTERRUPT;
4480 }
4481 
4482 
4483 struct PageFaultContext {
4484 	AddressSpaceReadLocker	addressSpaceLocker;
4485 	VMCacheChainLocker		cacheChainLocker;
4486 
4487 	VMTranslationMap*		map;
4488 	VMCache*				topCache;
4489 	off_t					cacheOffset;
4490 	vm_page_reservation		reservation;
4491 	bool					isWrite;
4492 
4493 	// return values
4494 	vm_page*				page;
4495 	bool					restart;
4496 	bool					pageAllocated;
4497 
4498 
4499 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4500 		:
4501 		addressSpaceLocker(addressSpace, true),
4502 		map(addressSpace->TranslationMap()),
4503 		isWrite(isWrite)
4504 	{
4505 	}
4506 
4507 	~PageFaultContext()
4508 	{
4509 		UnlockAll();
4510 		vm_page_unreserve_pages(&reservation);
4511 	}
4512 
4513 	void Prepare(VMCache* topCache, off_t cacheOffset)
4514 	{
4515 		this->topCache = topCache;
4516 		this->cacheOffset = cacheOffset;
4517 		page = NULL;
4518 		restart = false;
4519 		pageAllocated = false;
4520 
4521 		cacheChainLocker.SetTo(topCache);
4522 	}
4523 
4524 	void UnlockAll(VMCache* exceptCache = NULL)
4525 	{
4526 		topCache = NULL;
4527 		addressSpaceLocker.Unlock();
4528 		cacheChainLocker.Unlock(exceptCache);
4529 	}
4530 };
4531 
4532 
4533 /*!	Gets the page that should be mapped into the area.
4534 	Returns an error code other than \c B_OK, if the page couldn't be found or
4535 	paged in. The locking state of the address space and the caches is undefined
4536 	in that case.
4537 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4538 	had to unlock the address space and all caches and is supposed to be called
4539 	again.
4540 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4541 	found. It is returned in \c context.page. The address space will still be
4542 	locked as well as all caches starting from the top cache to at least the
4543 	cache the page lives in.
4544 */
4545 static status_t
4546 fault_get_page(PageFaultContext& context)
4547 {
4548 	VMCache* cache = context.topCache;
4549 	VMCache* lastCache = NULL;
4550 	vm_page* page = NULL;
4551 
4552 	while (cache != NULL) {
4553 		// We already hold the lock of the cache at this point.
4554 
4555 		lastCache = cache;
4556 
4557 		page = cache->LookupPage(context.cacheOffset);
4558 		if (page != NULL && page->busy) {
4559 			// page must be busy -- wait for it to become unbusy
4560 			context.UnlockAll(cache);
4561 			cache->ReleaseRefLocked();
4562 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4563 
4564 			// restart the whole process
4565 			context.restart = true;
4566 			return B_OK;
4567 		}
4568 
4569 		if (page != NULL)
4570 			break;
4571 
4572 		// The current cache does not contain the page we're looking for.
4573 
4574 		// see if the backing store has it
4575 		if (cache->HasPage(context.cacheOffset)) {
4576 			// insert a fresh page and mark it busy -- we're going to read it in
4577 			page = vm_page_allocate_page(&context.reservation,
4578 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4579 			cache->InsertPage(page, context.cacheOffset);
4580 
4581 			// We need to unlock all caches and the address space while reading
4582 			// the page in. Keep a reference to the cache around.
4583 			cache->AcquireRefLocked();
4584 			context.UnlockAll();
4585 
4586 			// read the page in
4587 			generic_io_vec vec;
4588 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4589 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4590 
4591 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4592 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4593 
4594 			cache->Lock();
4595 
4596 			if (status < B_OK) {
4597 				// on error remove and free the page
4598 				dprintf("reading page from cache %p returned: %s!\n",
4599 					cache, strerror(status));
4600 
4601 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4602 				cache->RemovePage(page);
4603 				vm_page_set_state(page, PAGE_STATE_FREE);
4604 
4605 				cache->ReleaseRefAndUnlock();
4606 				return status;
4607 			}
4608 
4609 			// mark the page unbusy again
4610 			cache->MarkPageUnbusy(page);
4611 
4612 			DEBUG_PAGE_ACCESS_END(page);
4613 
4614 			// Since we needed to unlock everything temporarily, the area
4615 			// situation might have changed. So we need to restart the whole
4616 			// process.
4617 			cache->ReleaseRefAndUnlock();
4618 			context.restart = true;
4619 			return B_OK;
4620 		}
4621 
4622 		cache = context.cacheChainLocker.LockSourceCache();
4623 	}
4624 
4625 	if (page == NULL) {
4626 		// There was no adequate page, determine the cache for a clean one.
4627 		// Read-only pages come in the deepest cache, only the top most cache
4628 		// may have direct write access.
4629 		cache = context.isWrite ? context.topCache : lastCache;
4630 
4631 		// allocate a clean page
4632 		page = vm_page_allocate_page(&context.reservation,
4633 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4634 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4635 			page->physical_page_number));
4636 
4637 		// insert the new page into our cache
4638 		cache->InsertPage(page, context.cacheOffset);
4639 		context.pageAllocated = true;
4640 	} else if (page->Cache() != context.topCache && context.isWrite) {
4641 		// We have a page that has the data we want, but in the wrong cache
4642 		// object so we need to copy it and stick it into the top cache.
4643 		vm_page* sourcePage = page;
4644 
4645 		// TODO: If memory is low, it might be a good idea to steal the page
4646 		// from our source cache -- if possible, that is.
4647 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4648 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4649 
4650 		// To not needlessly kill concurrency we unlock all caches but the top
4651 		// one while copying the page. Lacking another mechanism to ensure that
4652 		// the source page doesn't disappear, we mark it busy.
4653 		sourcePage->busy = true;
4654 		context.cacheChainLocker.UnlockKeepRefs(true);
4655 
4656 		// copy the page
4657 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4658 			sourcePage->physical_page_number * B_PAGE_SIZE);
4659 
4660 		context.cacheChainLocker.RelockCaches(true);
4661 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4662 
4663 		// insert the new page into our cache
4664 		context.topCache->InsertPage(page, context.cacheOffset);
4665 		context.pageAllocated = true;
4666 	} else
4667 		DEBUG_PAGE_ACCESS_START(page);
4668 
4669 	context.page = page;
4670 	return B_OK;
4671 }
4672 
4673 
4674 /*!	Makes sure the address in the given address space is mapped.
4675 
4676 	\param addressSpace The address space.
4677 	\param originalAddress The address. Doesn't need to be page aligned.
4678 	\param isWrite If \c true the address shall be write-accessible.
4679 	\param isUser If \c true the access is requested by a userland team.
4680 	\param wirePage On success, if non \c NULL, the wired count of the page
4681 		mapped at the given address is incremented and the page is returned
4682 		via this parameter.
4683 	\return \c B_OK on success, another error code otherwise.
4684 */
4685 static status_t
4686 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4687 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4688 {
4689 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4690 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4691 		originalAddress, isWrite, isUser));
4692 
4693 	PageFaultContext context(addressSpace, isWrite);
4694 
4695 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4696 	status_t status = B_OK;
4697 
4698 	addressSpace->IncrementFaultCount();
4699 
4700 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4701 	// the pages upfront makes sure we don't have any cache locked, so that the
4702 	// page daemon/thief can do their job without problems.
4703 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4704 		originalAddress);
4705 	context.addressSpaceLocker.Unlock();
4706 	vm_page_reserve_pages(&context.reservation, reservePages,
4707 		addressSpace == VMAddressSpace::Kernel()
4708 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4709 
4710 	while (true) {
4711 		context.addressSpaceLocker.Lock();
4712 
4713 		// get the area the fault was in
4714 		VMArea* area = addressSpace->LookupArea(address);
4715 		if (area == NULL) {
4716 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4717 				"space\n", originalAddress);
4718 			TPF(PageFaultError(-1,
4719 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4720 			status = B_BAD_ADDRESS;
4721 			break;
4722 		}
4723 
4724 		// check permissions
4725 		uint32 protection = get_area_page_protection(area, address);
4726 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4727 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4728 				area->id, (void*)originalAddress);
4729 			TPF(PageFaultError(area->id,
4730 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4731 			status = B_PERMISSION_DENIED;
4732 			break;
4733 		}
4734 		if (isWrite && (protection
4735 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4736 			dprintf("write access attempted on write-protected area 0x%"
4737 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4738 			TPF(PageFaultError(area->id,
4739 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4740 			status = B_PERMISSION_DENIED;
4741 			break;
4742 		} else if (isExecute && (protection
4743 				& (B_EXECUTE_AREA
4744 					| (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4745 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4746 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4747 			TPF(PageFaultError(area->id,
4748 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4749 			status = B_PERMISSION_DENIED;
4750 			break;
4751 		} else if (!isWrite && !isExecute && (protection
4752 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4753 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4754 				" at %p\n", area->id, (void*)originalAddress);
4755 			TPF(PageFaultError(area->id,
4756 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4757 			status = B_PERMISSION_DENIED;
4758 			break;
4759 		}
4760 
4761 		// We have the area, it was a valid access, so let's try to resolve the
4762 		// page fault now.
4763 		// At first, the top most cache from the area is investigated.
4764 
4765 		context.Prepare(vm_area_get_locked_cache(area),
4766 			address - area->Base() + area->cache_offset);
4767 
4768 		// See if this cache has a fault handler -- this will do all the work
4769 		// for us.
4770 		{
4771 			// Note, since the page fault is resolved with interrupts enabled,
4772 			// the fault handler could be called more than once for the same
4773 			// reason -- the store must take this into account.
4774 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4775 			if (status != B_BAD_HANDLER)
4776 				break;
4777 		}
4778 
4779 		// The top most cache has no fault handler, so let's see if the cache or
4780 		// its sources already have the page we're searching for (we're going
4781 		// from top to bottom).
4782 		status = fault_get_page(context);
4783 		if (status != B_OK) {
4784 			TPF(PageFaultError(area->id, status));
4785 			break;
4786 		}
4787 
4788 		if (context.restart)
4789 			continue;
4790 
4791 		// All went fine, all there is left to do is to map the page into the
4792 		// address space.
4793 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4794 			context.page));
4795 
4796 		// If the page doesn't reside in the area's cache, we need to make sure
4797 		// it's mapped in read-only, so that we cannot overwrite someone else's
4798 		// data (copy-on-write)
4799 		uint32 newProtection = protection;
4800 		if (context.page->Cache() != context.topCache && !isWrite)
4801 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4802 
4803 		bool unmapPage = false;
4804 		bool mapPage = true;
4805 
4806 		// check whether there's already a page mapped at the address
4807 		context.map->Lock();
4808 
4809 		phys_addr_t physicalAddress;
4810 		uint32 flags;
4811 		vm_page* mappedPage = NULL;
4812 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4813 			&& (flags & PAGE_PRESENT) != 0
4814 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4815 				!= NULL) {
4816 			// Yep there's already a page. If it's ours, we can simply adjust
4817 			// its protection. Otherwise we have to unmap it.
4818 			if (mappedPage == context.page) {
4819 				context.map->ProtectPage(area, address, newProtection);
4820 					// Note: We assume that ProtectPage() is atomic (i.e.
4821 					// the page isn't temporarily unmapped), otherwise we'd have
4822 					// to make sure it isn't wired.
4823 				mapPage = false;
4824 			} else
4825 				unmapPage = true;
4826 		}
4827 
4828 		context.map->Unlock();
4829 
4830 		if (unmapPage) {
4831 			// If the page is wired, we can't unmap it. Wait until it is unwired
4832 			// again and restart. Note that the page cannot be wired for
4833 			// writing, since it it isn't in the topmost cache. So we can safely
4834 			// ignore ranges wired for writing (our own and other concurrent
4835 			// wiring attempts in progress) and in fact have to do that to avoid
4836 			// a deadlock.
4837 			VMAreaUnwiredWaiter waiter;
4838 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4839 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4840 				// unlock everything and wait
4841 				if (context.pageAllocated) {
4842 					// ... but since we allocated a page and inserted it into
4843 					// the top cache, remove and free it first. Otherwise we'd
4844 					// have a page from a lower cache mapped while an upper
4845 					// cache has a page that would shadow it.
4846 					context.topCache->RemovePage(context.page);
4847 					vm_page_free_etc(context.topCache, context.page,
4848 						&context.reservation);
4849 				} else
4850 					DEBUG_PAGE_ACCESS_END(context.page);
4851 
4852 				context.UnlockAll();
4853 				waiter.waitEntry.Wait();
4854 				continue;
4855 			}
4856 
4857 			// Note: The mapped page is a page of a lower cache. We are
4858 			// guaranteed to have that cached locked, our new page is a copy of
4859 			// that page, and the page is not busy. The logic for that guarantee
4860 			// is as follows: Since the page is mapped, it must live in the top
4861 			// cache (ruled out above) or any of its lower caches, and there is
4862 			// (was before the new page was inserted) no other page in any
4863 			// cache between the top cache and the page's cache (otherwise that
4864 			// would be mapped instead). That in turn means that our algorithm
4865 			// must have found it and therefore it cannot be busy either.
4866 			DEBUG_PAGE_ACCESS_START(mappedPage);
4867 			unmap_page(area, address);
4868 			DEBUG_PAGE_ACCESS_END(mappedPage);
4869 		}
4870 
4871 		if (mapPage) {
4872 			if (map_page(area, context.page, address, newProtection,
4873 					&context.reservation) != B_OK) {
4874 				// Mapping can only fail, when the page mapping object couldn't
4875 				// be allocated. Save for the missing mapping everything is
4876 				// fine, though. If this was a regular page fault, we'll simply
4877 				// leave and probably fault again. To make sure we'll have more
4878 				// luck then, we ensure that the minimum object reserve is
4879 				// available.
4880 				DEBUG_PAGE_ACCESS_END(context.page);
4881 
4882 				context.UnlockAll();
4883 
4884 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4885 						!= B_OK) {
4886 					// Apparently the situation is serious. Let's get ourselves
4887 					// killed.
4888 					status = B_NO_MEMORY;
4889 				} else if (wirePage != NULL) {
4890 					// The caller expects us to wire the page. Since
4891 					// object_cache_reserve() succeeded, we should now be able
4892 					// to allocate a mapping structure. Restart.
4893 					continue;
4894 				}
4895 
4896 				break;
4897 			}
4898 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4899 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4900 
4901 		// also wire the page, if requested
4902 		if (wirePage != NULL && status == B_OK) {
4903 			increment_page_wired_count(context.page);
4904 			*wirePage = context.page;
4905 		}
4906 
4907 		DEBUG_PAGE_ACCESS_END(context.page);
4908 
4909 		break;
4910 	}
4911 
4912 	return status;
4913 }
4914 
4915 
4916 status_t
4917 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4918 {
4919 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4920 }
4921 
4922 status_t
4923 vm_put_physical_page(addr_t vaddr, void* handle)
4924 {
4925 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4926 }
4927 
4928 
4929 status_t
4930 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4931 	void** _handle)
4932 {
4933 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4934 }
4935 
4936 status_t
4937 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4938 {
4939 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4940 }
4941 
4942 
4943 status_t
4944 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4945 {
4946 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4947 }
4948 
4949 status_t
4950 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4951 {
4952 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4953 }
4954 
4955 
4956 void
4957 vm_get_info(system_info* info)
4958 {
4959 	swap_get_info(info);
4960 
4961 	MutexLocker locker(sAvailableMemoryLock);
4962 	info->needed_memory = sNeededMemory;
4963 	info->free_memory = sAvailableMemory;
4964 }
4965 
4966 
4967 uint32
4968 vm_num_page_faults(void)
4969 {
4970 	return sPageFaults;
4971 }
4972 
4973 
4974 off_t
4975 vm_available_memory(void)
4976 {
4977 	MutexLocker locker(sAvailableMemoryLock);
4978 	return sAvailableMemory;
4979 }
4980 
4981 
4982 off_t
4983 vm_available_not_needed_memory(void)
4984 {
4985 	MutexLocker locker(sAvailableMemoryLock);
4986 	return sAvailableMemory - sNeededMemory;
4987 }
4988 
4989 
4990 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4991 	debugger.
4992 */
4993 off_t
4994 vm_available_not_needed_memory_debug(void)
4995 {
4996 	return sAvailableMemory - sNeededMemory;
4997 }
4998 
4999 
5000 size_t
5001 vm_kernel_address_space_left(void)
5002 {
5003 	return VMAddressSpace::Kernel()->FreeSpace();
5004 }
5005 
5006 
5007 void
5008 vm_unreserve_memory(size_t amount)
5009 {
5010 	mutex_lock(&sAvailableMemoryLock);
5011 
5012 	sAvailableMemory += amount;
5013 
5014 	mutex_unlock(&sAvailableMemoryLock);
5015 }
5016 
5017 
5018 status_t
5019 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
5020 {
5021 	size_t reserve = kMemoryReserveForPriority[priority];
5022 
5023 	MutexLocker locker(sAvailableMemoryLock);
5024 
5025 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5026 
5027 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
5028 		sAvailableMemory -= amount;
5029 		return B_OK;
5030 	}
5031 
5032 	if (timeout <= 0)
5033 		return B_NO_MEMORY;
5034 
5035 	// turn timeout into an absolute timeout
5036 	timeout += system_time();
5037 
5038 	// loop until we've got the memory or the timeout occurs
5039 	do {
5040 		sNeededMemory += amount;
5041 
5042 		// call the low resource manager
5043 		locker.Unlock();
5044 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5045 			B_ABSOLUTE_TIMEOUT, timeout);
5046 		locker.Lock();
5047 
5048 		sNeededMemory -= amount;
5049 
5050 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
5051 			sAvailableMemory -= amount;
5052 			return B_OK;
5053 		}
5054 	} while (timeout > system_time());
5055 
5056 	return B_NO_MEMORY;
5057 }
5058 
5059 
5060 status_t
5061 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
5062 {
5063 	// NOTE: The caller is responsible for synchronizing calls to this function!
5064 
5065 	AddressSpaceReadLocker locker;
5066 	VMArea* area;
5067 	status_t status = locker.SetFromArea(id, area);
5068 	if (status != B_OK)
5069 		return status;
5070 
5071 	// nothing to do, if the type doesn't change
5072 	uint32 oldType = area->MemoryType();
5073 	if (type == oldType)
5074 		return B_OK;
5075 
5076 	// set the memory type of the area and the mapped pages
5077 	VMTranslationMap* map = area->address_space->TranslationMap();
5078 	map->Lock();
5079 	area->SetMemoryType(type);
5080 	map->ProtectArea(area, area->protection);
5081 	map->Unlock();
5082 
5083 	// set the physical memory type
5084 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
5085 	if (error != B_OK) {
5086 		// reset the memory type of the area and the mapped pages
5087 		map->Lock();
5088 		area->SetMemoryType(oldType);
5089 		map->ProtectArea(area, area->protection);
5090 		map->Unlock();
5091 		return error;
5092 	}
5093 
5094 	return B_OK;
5095 
5096 }
5097 
5098 
5099 /*!	This function enforces some protection properties:
5100 	 - kernel areas must be W^X (after kernel startup)
5101 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
5102 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5103 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
5104 	   and B_KERNEL_WRITE_AREA.
5105 */
5106 static void
5107 fix_protection(uint32* protection)
5108 {
5109 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
5110 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
5111 			|| (*protection & B_WRITE_AREA) != 0)
5112 		&& !gKernelStartup)
5113 		panic("kernel areas cannot be both writable and executable!");
5114 
5115 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5116 		if ((*protection & B_USER_PROTECTION) == 0
5117 			|| (*protection & B_WRITE_AREA) != 0)
5118 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5119 		else
5120 			*protection |= B_KERNEL_READ_AREA;
5121 	}
5122 }
5123 
5124 
5125 static void
5126 fill_area_info(struct VMArea* area, area_info* info, size_t size)
5127 {
5128 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5129 	info->area = area->id;
5130 	info->address = (void*)area->Base();
5131 	info->size = area->Size();
5132 	info->protection = area->protection;
5133 	info->lock = area->wiring;
5134 	info->team = area->address_space->ID();
5135 	info->copy_count = 0;
5136 	info->in_count = 0;
5137 	info->out_count = 0;
5138 		// TODO: retrieve real values here!
5139 
5140 	VMCache* cache = vm_area_get_locked_cache(area);
5141 
5142 	// Note, this is a simplification; the cache could be larger than this area
5143 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5144 
5145 	vm_area_put_locked_cache(cache);
5146 }
5147 
5148 
5149 static status_t
5150 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5151 {
5152 	// is newSize a multiple of B_PAGE_SIZE?
5153 	if (newSize & (B_PAGE_SIZE - 1))
5154 		return B_BAD_VALUE;
5155 
5156 	// lock all affected address spaces and the cache
5157 	VMArea* area;
5158 	VMCache* cache;
5159 
5160 	MultiAddressSpaceLocker locker;
5161 	AreaCacheLocker cacheLocker;
5162 
5163 	status_t status;
5164 	size_t oldSize;
5165 	bool anyKernelArea;
5166 	bool restart;
5167 
5168 	do {
5169 		anyKernelArea = false;
5170 		restart = false;
5171 
5172 		locker.Unset();
5173 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5174 		if (status != B_OK)
5175 			return status;
5176 		cacheLocker.SetTo(cache, true);	// already locked
5177 
5178 		// enforce restrictions
5179 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
5180 				|| (area->protection & B_KERNEL_AREA) != 0)) {
5181 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5182 				"resize kernel area %" B_PRId32 " (%s)\n",
5183 				team_get_current_team_id(), areaID, area->name);
5184 			return B_NOT_ALLOWED;
5185 		}
5186 		// TODO: Enforce all restrictions (team, etc.)!
5187 
5188 		oldSize = area->Size();
5189 		if (newSize == oldSize)
5190 			return B_OK;
5191 
5192 		if (cache->type != CACHE_TYPE_RAM)
5193 			return B_NOT_ALLOWED;
5194 
5195 		if (oldSize < newSize) {
5196 			// We need to check if all areas of this cache can be resized.
5197 			for (VMArea* current = cache->areas; current != NULL;
5198 					current = current->cache_next) {
5199 				if (!current->address_space->CanResizeArea(current, newSize))
5200 					return B_ERROR;
5201 				anyKernelArea
5202 					|= current->address_space == VMAddressSpace::Kernel();
5203 			}
5204 		} else {
5205 			// We're shrinking the areas, so we must make sure the affected
5206 			// ranges are not wired.
5207 			for (VMArea* current = cache->areas; current != NULL;
5208 					current = current->cache_next) {
5209 				anyKernelArea
5210 					|= current->address_space == VMAddressSpace::Kernel();
5211 
5212 				if (wait_if_area_range_is_wired(current,
5213 						current->Base() + newSize, oldSize - newSize, &locker,
5214 						&cacheLocker)) {
5215 					restart = true;
5216 					break;
5217 				}
5218 			}
5219 		}
5220 	} while (restart);
5221 
5222 	// Okay, looks good so far, so let's do it
5223 
5224 	int priority = kernel && anyKernelArea
5225 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5226 	uint32 allocationFlags = kernel && anyKernelArea
5227 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5228 
5229 	if (oldSize < newSize) {
5230 		// Growing the cache can fail, so we do it first.
5231 		status = cache->Resize(cache->virtual_base + newSize, priority);
5232 		if (status != B_OK)
5233 			return status;
5234 	}
5235 
5236 	for (VMArea* current = cache->areas; current != NULL;
5237 			current = current->cache_next) {
5238 		status = current->address_space->ResizeArea(current, newSize,
5239 			allocationFlags);
5240 		if (status != B_OK)
5241 			break;
5242 
5243 		// We also need to unmap all pages beyond the new size, if the area has
5244 		// shrunk
5245 		if (newSize < oldSize) {
5246 			VMCacheChainLocker cacheChainLocker(cache);
5247 			cacheChainLocker.LockAllSourceCaches();
5248 
5249 			unmap_pages(current, current->Base() + newSize,
5250 				oldSize - newSize);
5251 
5252 			cacheChainLocker.Unlock(cache);
5253 		}
5254 	}
5255 
5256 	if (status == B_OK) {
5257 		// Shrink or grow individual page protections if in use.
5258 		if (area->page_protections != NULL) {
5259 			size_t bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5260 			uint8* newProtections
5261 				= (uint8*)realloc(area->page_protections, bytes);
5262 			if (newProtections == NULL)
5263 				status = B_NO_MEMORY;
5264 			else {
5265 				area->page_protections = newProtections;
5266 
5267 				if (oldSize < newSize) {
5268 					// init the additional page protections to that of the area
5269 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5270 					uint32 areaProtection = area->protection
5271 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5272 					memset(area->page_protections + offset,
5273 						areaProtection | (areaProtection << 4), bytes - offset);
5274 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5275 						uint8& entry = area->page_protections[offset - 1];
5276 						entry = (entry & 0x0f) | (areaProtection << 4);
5277 					}
5278 				}
5279 			}
5280 		}
5281 	}
5282 
5283 	// shrinking the cache can't fail, so we do it now
5284 	if (status == B_OK && newSize < oldSize)
5285 		status = cache->Resize(cache->virtual_base + newSize, priority);
5286 
5287 	if (status != B_OK) {
5288 		// Something failed -- resize the areas back to their original size.
5289 		// This can fail, too, in which case we're seriously screwed.
5290 		for (VMArea* current = cache->areas; current != NULL;
5291 				current = current->cache_next) {
5292 			if (current->address_space->ResizeArea(current, oldSize,
5293 					allocationFlags) != B_OK) {
5294 				panic("vm_resize_area(): Failed and not being able to restore "
5295 					"original state.");
5296 			}
5297 		}
5298 
5299 		cache->Resize(cache->virtual_base + oldSize, priority);
5300 	}
5301 
5302 	// TODO: we must honour the lock restrictions of this area
5303 	return status;
5304 }
5305 
5306 
5307 status_t
5308 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5309 {
5310 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5311 }
5312 
5313 
5314 status_t
5315 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5316 {
5317 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5318 }
5319 
5320 
5321 status_t
5322 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5323 	bool user)
5324 {
5325 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5326 }
5327 
5328 
5329 void
5330 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5331 {
5332 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5333 }
5334 
5335 
5336 /*!	Copies a range of memory directly from/to a page that might not be mapped
5337 	at the moment.
5338 
5339 	For \a unsafeMemory the current mapping (if any is ignored). The function
5340 	walks through the respective area's cache chain to find the physical page
5341 	and copies from/to it directly.
5342 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5343 	must not cross a page boundary.
5344 
5345 	\param teamID The team ID identifying the address space \a unsafeMemory is
5346 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5347 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5348 		is passed, the address space of the thread returned by
5349 		debug_get_debugged_thread() is used.
5350 	\param unsafeMemory The start of the unsafe memory range to be copied
5351 		from/to.
5352 	\param buffer A safely accessible kernel buffer to be copied from/to.
5353 	\param size The number of bytes to be copied.
5354 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5355 		\a unsafeMemory, the other way around otherwise.
5356 */
5357 status_t
5358 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5359 	size_t size, bool copyToUnsafe)
5360 {
5361 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5362 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5363 		return B_BAD_VALUE;
5364 	}
5365 
5366 	// get the address space for the debugged thread
5367 	VMAddressSpace* addressSpace;
5368 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5369 		addressSpace = VMAddressSpace::Kernel();
5370 	} else if (teamID == B_CURRENT_TEAM) {
5371 		Thread* thread = debug_get_debugged_thread();
5372 		if (thread == NULL || thread->team == NULL)
5373 			return B_BAD_ADDRESS;
5374 
5375 		addressSpace = thread->team->address_space;
5376 	} else
5377 		addressSpace = VMAddressSpace::DebugGet(teamID);
5378 
5379 	if (addressSpace == NULL)
5380 		return B_BAD_ADDRESS;
5381 
5382 	// get the area
5383 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5384 	if (area == NULL)
5385 		return B_BAD_ADDRESS;
5386 
5387 	// search the page
5388 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5389 		+ area->cache_offset;
5390 	VMCache* cache = area->cache;
5391 	vm_page* page = NULL;
5392 	while (cache != NULL) {
5393 		page = cache->DebugLookupPage(cacheOffset);
5394 		if (page != NULL)
5395 			break;
5396 
5397 		// Page not found in this cache -- if it is paged out, we must not try
5398 		// to get it from lower caches.
5399 		if (cache->DebugHasPage(cacheOffset))
5400 			break;
5401 
5402 		cache = cache->source;
5403 	}
5404 
5405 	if (page == NULL)
5406 		return B_UNSUPPORTED;
5407 
5408 	// copy from/to physical memory
5409 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5410 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5411 
5412 	if (copyToUnsafe) {
5413 		if (page->Cache() != area->cache)
5414 			return B_UNSUPPORTED;
5415 
5416 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5417 	}
5418 
5419 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5420 }
5421 
5422 
5423 /** Validate that a memory range is either fully in kernel space, or fully in
5424  *  userspace */
5425 static inline bool
5426 validate_memory_range(const void* addr, size_t size)
5427 {
5428 	addr_t address = (addr_t)addr;
5429 
5430 	// Check for overflows on all addresses.
5431 	if ((address + size) < address)
5432 		return false;
5433 
5434 	// Validate that the address range does not cross the kernel/user boundary.
5435 	return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1);
5436 }
5437 
5438 
5439 /** Validate that a memory range is fully in userspace. */
5440 static inline bool
5441 validate_user_memory_range(const void* addr, size_t size)
5442 {
5443 	addr_t address = (addr_t)addr;
5444 
5445 	// Check for overflows on all addresses.
5446 	if ((address + size) < address)
5447 		return false;
5448 
5449 	// Validate that both the start and end address are in userspace
5450 	return IS_USER_ADDRESS(address) && IS_USER_ADDRESS(address + size - 1);
5451 }
5452 
5453 
5454 //	#pragma mark - kernel public API
5455 
5456 
5457 status_t
5458 user_memcpy(void* to, const void* from, size_t size)
5459 {
5460 	if (!validate_memory_range(to, size) || !validate_memory_range(from, size))
5461 		return B_BAD_ADDRESS;
5462 
5463 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5464 		return B_BAD_ADDRESS;
5465 
5466 	return B_OK;
5467 }
5468 
5469 
5470 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5471 	the string in \a to, NULL-terminating the result.
5472 
5473 	\param to Pointer to the destination C-string.
5474 	\param from Pointer to the source C-string.
5475 	\param size Size in bytes of the string buffer pointed to by \a to.
5476 
5477 	\return strlen(\a from).
5478 */
5479 ssize_t
5480 user_strlcpy(char* to, const char* from, size_t size)
5481 {
5482 	if (to == NULL && size != 0)
5483 		return B_BAD_VALUE;
5484 	if (from == NULL)
5485 		return B_BAD_ADDRESS;
5486 
5487 	// Protect the source address from overflows.
5488 	size_t maxSize = size;
5489 	if ((addr_t)from + maxSize < (addr_t)from)
5490 		maxSize -= (addr_t)from + maxSize;
5491 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5492 		maxSize = USER_TOP - (addr_t)from;
5493 
5494 	if (!validate_memory_range(to, maxSize))
5495 		return B_BAD_ADDRESS;
5496 
5497 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5498 	if (result < 0)
5499 		return result;
5500 
5501 	// If we hit the address overflow boundary, fail.
5502 	if ((size_t)result >= maxSize && maxSize < size)
5503 		return B_BAD_ADDRESS;
5504 
5505 	return result;
5506 }
5507 
5508 
5509 status_t
5510 user_memset(void* s, char c, size_t count)
5511 {
5512 	if (!validate_memory_range(s, count))
5513 		return B_BAD_ADDRESS;
5514 
5515 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5516 		return B_BAD_ADDRESS;
5517 
5518 	return B_OK;
5519 }
5520 
5521 
5522 /*!	Wires a single page at the given address.
5523 
5524 	\param team The team whose address space the address belongs to. Supports
5525 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5526 		parameter is ignored.
5527 	\param address address The virtual address to wire down. Does not need to
5528 		be page aligned.
5529 	\param writable If \c true the page shall be writable.
5530 	\param info On success the info is filled in, among other things
5531 		containing the physical address the given virtual one translates to.
5532 	\return \c B_OK, when the page could be wired, another error code otherwise.
5533 */
5534 status_t
5535 vm_wire_page(team_id team, addr_t address, bool writable,
5536 	VMPageWiringInfo* info)
5537 {
5538 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5539 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5540 
5541 	// compute the page protection that is required
5542 	bool isUser = IS_USER_ADDRESS(address);
5543 	uint32 requiredProtection = PAGE_PRESENT
5544 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5545 	if (writable)
5546 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5547 
5548 	// get and read lock the address space
5549 	VMAddressSpace* addressSpace = NULL;
5550 	if (isUser) {
5551 		if (team == B_CURRENT_TEAM)
5552 			addressSpace = VMAddressSpace::GetCurrent();
5553 		else
5554 			addressSpace = VMAddressSpace::Get(team);
5555 	} else
5556 		addressSpace = VMAddressSpace::GetKernel();
5557 	if (addressSpace == NULL)
5558 		return B_ERROR;
5559 
5560 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5561 
5562 	VMTranslationMap* map = addressSpace->TranslationMap();
5563 	status_t error = B_OK;
5564 
5565 	// get the area
5566 	VMArea* area = addressSpace->LookupArea(pageAddress);
5567 	if (area == NULL) {
5568 		addressSpace->Put();
5569 		return B_BAD_ADDRESS;
5570 	}
5571 
5572 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5573 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5574 
5575 	// mark the area range wired
5576 	area->Wire(&info->range);
5577 
5578 	// Lock the area's cache chain and the translation map. Needed to look
5579 	// up the page and play with its wired count.
5580 	cacheChainLocker.LockAllSourceCaches();
5581 	map->Lock();
5582 
5583 	phys_addr_t physicalAddress;
5584 	uint32 flags;
5585 	vm_page* page;
5586 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5587 		&& (flags & requiredProtection) == requiredProtection
5588 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5589 			!= NULL) {
5590 		// Already mapped with the correct permissions -- just increment
5591 		// the page's wired count.
5592 		increment_page_wired_count(page);
5593 
5594 		map->Unlock();
5595 		cacheChainLocker.Unlock();
5596 		addressSpaceLocker.Unlock();
5597 	} else {
5598 		// Let vm_soft_fault() map the page for us, if possible. We need
5599 		// to fully unlock to avoid deadlocks. Since we have already
5600 		// wired the area itself, nothing disturbing will happen with it
5601 		// in the meantime.
5602 		map->Unlock();
5603 		cacheChainLocker.Unlock();
5604 		addressSpaceLocker.Unlock();
5605 
5606 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5607 			isUser, &page);
5608 
5609 		if (error != B_OK) {
5610 			// The page could not be mapped -- clean up.
5611 			VMCache* cache = vm_area_get_locked_cache(area);
5612 			area->Unwire(&info->range);
5613 			cache->ReleaseRefAndUnlock();
5614 			addressSpace->Put();
5615 			return error;
5616 		}
5617 	}
5618 
5619 	info->physicalAddress
5620 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5621 			+ address % B_PAGE_SIZE;
5622 	info->page = page;
5623 
5624 	return B_OK;
5625 }
5626 
5627 
5628 /*!	Unwires a single page previously wired via vm_wire_page().
5629 
5630 	\param info The same object passed to vm_wire_page() before.
5631 */
5632 void
5633 vm_unwire_page(VMPageWiringInfo* info)
5634 {
5635 	// lock the address space
5636 	VMArea* area = info->range.area;
5637 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5638 		// takes over our reference
5639 
5640 	// lock the top cache
5641 	VMCache* cache = vm_area_get_locked_cache(area);
5642 	VMCacheChainLocker cacheChainLocker(cache);
5643 
5644 	if (info->page->Cache() != cache) {
5645 		// The page is not in the top cache, so we lock the whole cache chain
5646 		// before touching the page's wired count.
5647 		cacheChainLocker.LockAllSourceCaches();
5648 	}
5649 
5650 	decrement_page_wired_count(info->page);
5651 
5652 	// remove the wired range from the range
5653 	area->Unwire(&info->range);
5654 
5655 	cacheChainLocker.Unlock();
5656 }
5657 
5658 
5659 /*!	Wires down the given address range in the specified team's address space.
5660 
5661 	If successful the function
5662 	- acquires a reference to the specified team's address space,
5663 	- adds respective wired ranges to all areas that intersect with the given
5664 	  address range,
5665 	- makes sure all pages in the given address range are mapped with the
5666 	  requested access permissions and increments their wired count.
5667 
5668 	It fails, when \a team doesn't specify a valid address space, when any part
5669 	of the specified address range is not covered by areas, when the concerned
5670 	areas don't allow mapping with the requested permissions, or when mapping
5671 	failed for another reason.
5672 
5673 	When successful the call must be balanced by a unlock_memory_etc() call with
5674 	the exact same parameters.
5675 
5676 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5677 		supported.
5678 	\param address The start of the address range to be wired.
5679 	\param numBytes The size of the address range to be wired.
5680 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5681 		requests that the range must be wired writable ("read from device
5682 		into memory").
5683 	\return \c B_OK on success, another error code otherwise.
5684 */
5685 status_t
5686 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5687 {
5688 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5689 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5690 
5691 	// compute the page protection that is required
5692 	bool isUser = IS_USER_ADDRESS(address);
5693 	bool writable = (flags & B_READ_DEVICE) == 0;
5694 	uint32 requiredProtection = PAGE_PRESENT
5695 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5696 	if (writable)
5697 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5698 
5699 	uint32 mallocFlags = isUser
5700 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5701 
5702 	// get and read lock the address space
5703 	VMAddressSpace* addressSpace = NULL;
5704 	if (isUser) {
5705 		if (team == B_CURRENT_TEAM)
5706 			addressSpace = VMAddressSpace::GetCurrent();
5707 		else
5708 			addressSpace = VMAddressSpace::Get(team);
5709 	} else
5710 		addressSpace = VMAddressSpace::GetKernel();
5711 	if (addressSpace == NULL)
5712 		return B_ERROR;
5713 
5714 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5715 		// We get a new address space reference here. The one we got above will
5716 		// be freed by unlock_memory_etc().
5717 
5718 	VMTranslationMap* map = addressSpace->TranslationMap();
5719 	status_t error = B_OK;
5720 
5721 	// iterate through all concerned areas
5722 	addr_t nextAddress = lockBaseAddress;
5723 	while (nextAddress != lockEndAddress) {
5724 		// get the next area
5725 		VMArea* area = addressSpace->LookupArea(nextAddress);
5726 		if (area == NULL) {
5727 			error = B_BAD_ADDRESS;
5728 			break;
5729 		}
5730 
5731 		addr_t areaStart = nextAddress;
5732 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5733 
5734 		// allocate the wired range (do that before locking the cache to avoid
5735 		// deadlocks)
5736 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5737 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5738 		if (range == NULL) {
5739 			error = B_NO_MEMORY;
5740 			break;
5741 		}
5742 
5743 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5744 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5745 
5746 		// mark the area range wired
5747 		area->Wire(range);
5748 
5749 		// Depending on the area cache type and the wiring, we may not need to
5750 		// look at the individual pages.
5751 		if (area->cache_type == CACHE_TYPE_NULL
5752 			|| area->cache_type == CACHE_TYPE_DEVICE
5753 			|| area->wiring == B_FULL_LOCK
5754 			|| area->wiring == B_CONTIGUOUS) {
5755 			nextAddress = areaEnd;
5756 			continue;
5757 		}
5758 
5759 		// Lock the area's cache chain and the translation map. Needed to look
5760 		// up pages and play with their wired count.
5761 		cacheChainLocker.LockAllSourceCaches();
5762 		map->Lock();
5763 
5764 		// iterate through the pages and wire them
5765 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5766 			phys_addr_t physicalAddress;
5767 			uint32 flags;
5768 
5769 			vm_page* page;
5770 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5771 				&& (flags & requiredProtection) == requiredProtection
5772 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5773 					!= NULL) {
5774 				// Already mapped with the correct permissions -- just increment
5775 				// the page's wired count.
5776 				increment_page_wired_count(page);
5777 			} else {
5778 				// Let vm_soft_fault() map the page for us, if possible. We need
5779 				// to fully unlock to avoid deadlocks. Since we have already
5780 				// wired the area itself, nothing disturbing will happen with it
5781 				// in the meantime.
5782 				map->Unlock();
5783 				cacheChainLocker.Unlock();
5784 				addressSpaceLocker.Unlock();
5785 
5786 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5787 					false, isUser, &page);
5788 
5789 				addressSpaceLocker.Lock();
5790 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5791 				cacheChainLocker.LockAllSourceCaches();
5792 				map->Lock();
5793 			}
5794 
5795 			if (error != B_OK)
5796 				break;
5797 		}
5798 
5799 		map->Unlock();
5800 
5801 		if (error == B_OK) {
5802 			cacheChainLocker.Unlock();
5803 		} else {
5804 			// An error occurred, so abort right here. If the current address
5805 			// is the first in this area, unwire the area, since we won't get
5806 			// to it when reverting what we've done so far.
5807 			if (nextAddress == areaStart) {
5808 				area->Unwire(range);
5809 				cacheChainLocker.Unlock();
5810 				range->~VMAreaWiredRange();
5811 				free_etc(range, mallocFlags);
5812 			} else
5813 				cacheChainLocker.Unlock();
5814 
5815 			break;
5816 		}
5817 	}
5818 
5819 	if (error != B_OK) {
5820 		// An error occurred, so unwire all that we've already wired. Note that
5821 		// even if not a single page was wired, unlock_memory_etc() is called
5822 		// to put the address space reference.
5823 		addressSpaceLocker.Unlock();
5824 		unlock_memory_etc(team, (void*)lockBaseAddress,
5825 			nextAddress - lockBaseAddress, flags);
5826 	}
5827 
5828 	return error;
5829 }
5830 
5831 
5832 status_t
5833 lock_memory(void* address, size_t numBytes, uint32 flags)
5834 {
5835 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5836 }
5837 
5838 
5839 /*!	Unwires an address range previously wired with lock_memory_etc().
5840 
5841 	Note that a call to this function must balance a previous lock_memory_etc()
5842 	call with exactly the same parameters.
5843 */
5844 status_t
5845 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5846 {
5847 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5848 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5849 
5850 	// compute the page protection that is required
5851 	bool isUser = IS_USER_ADDRESS(address);
5852 	bool writable = (flags & B_READ_DEVICE) == 0;
5853 	uint32 requiredProtection = PAGE_PRESENT
5854 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5855 	if (writable)
5856 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5857 
5858 	uint32 mallocFlags = isUser
5859 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5860 
5861 	// get and read lock the address space
5862 	VMAddressSpace* addressSpace = NULL;
5863 	if (isUser) {
5864 		if (team == B_CURRENT_TEAM)
5865 			addressSpace = VMAddressSpace::GetCurrent();
5866 		else
5867 			addressSpace = VMAddressSpace::Get(team);
5868 	} else
5869 		addressSpace = VMAddressSpace::GetKernel();
5870 	if (addressSpace == NULL)
5871 		return B_ERROR;
5872 
5873 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5874 		// Take over the address space reference. We don't unlock until we're
5875 		// done.
5876 
5877 	VMTranslationMap* map = addressSpace->TranslationMap();
5878 	status_t error = B_OK;
5879 
5880 	// iterate through all concerned areas
5881 	addr_t nextAddress = lockBaseAddress;
5882 	while (nextAddress != lockEndAddress) {
5883 		// get the next area
5884 		VMArea* area = addressSpace->LookupArea(nextAddress);
5885 		if (area == NULL) {
5886 			error = B_BAD_ADDRESS;
5887 			break;
5888 		}
5889 
5890 		addr_t areaStart = nextAddress;
5891 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5892 
5893 		// Lock the area's top cache. This is a requirement for
5894 		// VMArea::Unwire().
5895 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5896 
5897 		// Depending on the area cache type and the wiring, we may not need to
5898 		// look at the individual pages.
5899 		if (area->cache_type == CACHE_TYPE_NULL
5900 			|| area->cache_type == CACHE_TYPE_DEVICE
5901 			|| area->wiring == B_FULL_LOCK
5902 			|| area->wiring == B_CONTIGUOUS) {
5903 			// unwire the range (to avoid deadlocks we delete the range after
5904 			// unlocking the cache)
5905 			nextAddress = areaEnd;
5906 			VMAreaWiredRange* range = area->Unwire(areaStart,
5907 				areaEnd - areaStart, writable);
5908 			cacheChainLocker.Unlock();
5909 			if (range != NULL) {
5910 				range->~VMAreaWiredRange();
5911 				free_etc(range, mallocFlags);
5912 			}
5913 			continue;
5914 		}
5915 
5916 		// Lock the area's cache chain and the translation map. Needed to look
5917 		// up pages and play with their wired count.
5918 		cacheChainLocker.LockAllSourceCaches();
5919 		map->Lock();
5920 
5921 		// iterate through the pages and unwire them
5922 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5923 			phys_addr_t physicalAddress;
5924 			uint32 flags;
5925 
5926 			vm_page* page;
5927 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5928 				&& (flags & PAGE_PRESENT) != 0
5929 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5930 					!= NULL) {
5931 				// Already mapped with the correct permissions -- just increment
5932 				// the page's wired count.
5933 				decrement_page_wired_count(page);
5934 			} else {
5935 				panic("unlock_memory_etc(): Failed to unwire page: address "
5936 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5937 					nextAddress);
5938 				error = B_BAD_VALUE;
5939 				break;
5940 			}
5941 		}
5942 
5943 		map->Unlock();
5944 
5945 		// All pages are unwired. Remove the area's wired range as well (to
5946 		// avoid deadlocks we delete the range after unlocking the cache).
5947 		VMAreaWiredRange* range = area->Unwire(areaStart,
5948 			areaEnd - areaStart, writable);
5949 
5950 		cacheChainLocker.Unlock();
5951 
5952 		if (range != NULL) {
5953 			range->~VMAreaWiredRange();
5954 			free_etc(range, mallocFlags);
5955 		}
5956 
5957 		if (error != B_OK)
5958 			break;
5959 	}
5960 
5961 	// get rid of the address space reference lock_memory_etc() acquired
5962 	addressSpace->Put();
5963 
5964 	return error;
5965 }
5966 
5967 
5968 status_t
5969 unlock_memory(void* address, size_t numBytes, uint32 flags)
5970 {
5971 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5972 }
5973 
5974 
5975 /*!	Similar to get_memory_map(), but also allows to specify the address space
5976 	for the memory in question and has a saner semantics.
5977 	Returns \c B_OK when the complete range could be translated or
5978 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5979 	case the actual number of entries is written to \c *_numEntries. Any other
5980 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5981 	in this case.
5982 */
5983 status_t
5984 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5985 	physical_entry* table, uint32* _numEntries)
5986 {
5987 	uint32 numEntries = *_numEntries;
5988 	*_numEntries = 0;
5989 
5990 	VMAddressSpace* addressSpace;
5991 	addr_t virtualAddress = (addr_t)address;
5992 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5993 	phys_addr_t physicalAddress;
5994 	status_t status = B_OK;
5995 	int32 index = -1;
5996 	addr_t offset = 0;
5997 	bool interrupts = are_interrupts_enabled();
5998 
5999 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
6000 		"entries)\n", team, address, numBytes, numEntries));
6001 
6002 	if (numEntries == 0 || numBytes == 0)
6003 		return B_BAD_VALUE;
6004 
6005 	// in which address space is the address to be found?
6006 	if (IS_USER_ADDRESS(virtualAddress)) {
6007 		if (team == B_CURRENT_TEAM)
6008 			addressSpace = VMAddressSpace::GetCurrent();
6009 		else
6010 			addressSpace = VMAddressSpace::Get(team);
6011 	} else
6012 		addressSpace = VMAddressSpace::GetKernel();
6013 
6014 	if (addressSpace == NULL)
6015 		return B_ERROR;
6016 
6017 	VMTranslationMap* map = addressSpace->TranslationMap();
6018 
6019 	if (interrupts)
6020 		map->Lock();
6021 
6022 	while (offset < numBytes) {
6023 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
6024 		uint32 flags;
6025 
6026 		if (interrupts) {
6027 			status = map->Query((addr_t)address + offset, &physicalAddress,
6028 				&flags);
6029 		} else {
6030 			status = map->QueryInterrupt((addr_t)address + offset,
6031 				&physicalAddress, &flags);
6032 		}
6033 		if (status < B_OK)
6034 			break;
6035 		if ((flags & PAGE_PRESENT) == 0) {
6036 			panic("get_memory_map() called on unmapped memory!");
6037 			return B_BAD_ADDRESS;
6038 		}
6039 
6040 		if (index < 0 && pageOffset > 0) {
6041 			physicalAddress += pageOffset;
6042 			if (bytes > B_PAGE_SIZE - pageOffset)
6043 				bytes = B_PAGE_SIZE - pageOffset;
6044 		}
6045 
6046 		// need to switch to the next physical_entry?
6047 		if (index < 0 || table[index].address
6048 				!= physicalAddress - table[index].size) {
6049 			if ((uint32)++index + 1 > numEntries) {
6050 				// table to small
6051 				break;
6052 			}
6053 			table[index].address = physicalAddress;
6054 			table[index].size = bytes;
6055 		} else {
6056 			// page does fit in current entry
6057 			table[index].size += bytes;
6058 		}
6059 
6060 		offset += bytes;
6061 	}
6062 
6063 	if (interrupts)
6064 		map->Unlock();
6065 
6066 	if (status != B_OK)
6067 		return status;
6068 
6069 	if ((uint32)index + 1 > numEntries) {
6070 		*_numEntries = index;
6071 		return B_BUFFER_OVERFLOW;
6072 	}
6073 
6074 	*_numEntries = index + 1;
6075 	return B_OK;
6076 }
6077 
6078 
6079 /*!	According to the BeBook, this function should always succeed.
6080 	This is no longer the case.
6081 */
6082 extern "C" int32
6083 __get_memory_map_haiku(const void* address, size_t numBytes,
6084 	physical_entry* table, int32 numEntries)
6085 {
6086 	uint32 entriesRead = numEntries;
6087 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
6088 		table, &entriesRead);
6089 	if (error != B_OK)
6090 		return error;
6091 
6092 	// close the entry list
6093 
6094 	// if it's only one entry, we will silently accept the missing ending
6095 	if (numEntries == 1)
6096 		return B_OK;
6097 
6098 	if (entriesRead + 1 > (uint32)numEntries)
6099 		return B_BUFFER_OVERFLOW;
6100 
6101 	table[entriesRead].address = 0;
6102 	table[entriesRead].size = 0;
6103 
6104 	return B_OK;
6105 }
6106 
6107 
6108 area_id
6109 area_for(void* address)
6110 {
6111 	return vm_area_for((addr_t)address, true);
6112 }
6113 
6114 
6115 area_id
6116 find_area(const char* name)
6117 {
6118 	return VMAreaHash::Find(name);
6119 }
6120 
6121 
6122 status_t
6123 _get_area_info(area_id id, area_info* info, size_t size)
6124 {
6125 	if (size != sizeof(area_info) || info == NULL)
6126 		return B_BAD_VALUE;
6127 
6128 	AddressSpaceReadLocker locker;
6129 	VMArea* area;
6130 	status_t status = locker.SetFromArea(id, area);
6131 	if (status != B_OK)
6132 		return status;
6133 
6134 	fill_area_info(area, info, size);
6135 	return B_OK;
6136 }
6137 
6138 
6139 status_t
6140 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
6141 {
6142 	addr_t nextBase = *(addr_t*)cookie;
6143 
6144 	// we're already through the list
6145 	if (nextBase == (addr_t)-1)
6146 		return B_ENTRY_NOT_FOUND;
6147 
6148 	if (team == B_CURRENT_TEAM)
6149 		team = team_get_current_team_id();
6150 
6151 	AddressSpaceReadLocker locker(team);
6152 	if (!locker.IsLocked())
6153 		return B_BAD_TEAM_ID;
6154 
6155 	VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
6156 	if (area == NULL) {
6157 		nextBase = (addr_t)-1;
6158 		return B_ENTRY_NOT_FOUND;
6159 	}
6160 
6161 	fill_area_info(area, info, size);
6162 	*cookie = (ssize_t)(area->Base() + 1);
6163 
6164 	return B_OK;
6165 }
6166 
6167 
6168 status_t
6169 set_area_protection(area_id area, uint32 newProtection)
6170 {
6171 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6172 		newProtection, true);
6173 }
6174 
6175 
6176 status_t
6177 resize_area(area_id areaID, size_t newSize)
6178 {
6179 	return vm_resize_area(areaID, newSize, true);
6180 }
6181 
6182 
6183 /*!	Transfers the specified area to a new team. The caller must be the owner
6184 	of the area.
6185 */
6186 area_id
6187 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6188 	bool kernel)
6189 {
6190 	area_info info;
6191 	status_t status = get_area_info(id, &info);
6192 	if (status != B_OK)
6193 		return status;
6194 
6195 	if (info.team != thread_get_current_thread()->team->id)
6196 		return B_PERMISSION_DENIED;
6197 
6198 	// We need to mark the area cloneable so the following operations work.
6199 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
6200 	if (status != B_OK)
6201 		return status;
6202 
6203 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6204 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6205 	if (clonedArea < 0)
6206 		return clonedArea;
6207 
6208 	status = vm_delete_area(info.team, id, kernel);
6209 	if (status != B_OK) {
6210 		vm_delete_area(target, clonedArea, kernel);
6211 		return status;
6212 	}
6213 
6214 	// Now we can reset the protection to whatever it was before.
6215 	set_area_protection(clonedArea, info.protection);
6216 
6217 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6218 
6219 	return clonedArea;
6220 }
6221 
6222 
6223 extern "C" area_id
6224 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6225 	size_t numBytes, uint32 addressSpec, uint32 protection,
6226 	void** _virtualAddress)
6227 {
6228 	if (!arch_vm_supports_protection(protection))
6229 		return B_NOT_SUPPORTED;
6230 
6231 	fix_protection(&protection);
6232 
6233 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6234 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6235 		false);
6236 }
6237 
6238 
6239 area_id
6240 clone_area(const char* name, void** _address, uint32 addressSpec,
6241 	uint32 protection, area_id source)
6242 {
6243 	if ((protection & B_KERNEL_PROTECTION) == 0)
6244 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6245 
6246 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6247 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6248 }
6249 
6250 
6251 area_id
6252 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6253 	uint32 protection, uint32 flags, uint32 guardSize,
6254 	const virtual_address_restrictions* virtualAddressRestrictions,
6255 	const physical_address_restrictions* physicalAddressRestrictions,
6256 	void** _address)
6257 {
6258 	fix_protection(&protection);
6259 
6260 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6261 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6262 		true, _address);
6263 }
6264 
6265 
6266 extern "C" area_id
6267 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6268 	size_t size, uint32 lock, uint32 protection)
6269 {
6270 	fix_protection(&protection);
6271 
6272 	virtual_address_restrictions virtualRestrictions = {};
6273 	virtualRestrictions.address = *_address;
6274 	virtualRestrictions.address_specification = addressSpec;
6275 	physical_address_restrictions physicalRestrictions = {};
6276 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6277 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6278 		true, _address);
6279 }
6280 
6281 
6282 status_t
6283 delete_area(area_id area)
6284 {
6285 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6286 }
6287 
6288 
6289 //	#pragma mark - Userland syscalls
6290 
6291 
6292 status_t
6293 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6294 	addr_t size)
6295 {
6296 	// filter out some unavailable values (for userland)
6297 	switch (addressSpec) {
6298 		case B_ANY_KERNEL_ADDRESS:
6299 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6300 			return B_BAD_VALUE;
6301 	}
6302 
6303 	addr_t address;
6304 
6305 	if (!IS_USER_ADDRESS(userAddress)
6306 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6307 		return B_BAD_ADDRESS;
6308 
6309 	status_t status = vm_reserve_address_range(
6310 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6311 		RESERVED_AVOID_BASE);
6312 	if (status != B_OK)
6313 		return status;
6314 
6315 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6316 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6317 			(void*)address, size);
6318 		return B_BAD_ADDRESS;
6319 	}
6320 
6321 	return B_OK;
6322 }
6323 
6324 
6325 status_t
6326 _user_unreserve_address_range(addr_t address, addr_t size)
6327 {
6328 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6329 		(void*)address, size);
6330 }
6331 
6332 
6333 area_id
6334 _user_area_for(void* address)
6335 {
6336 	return vm_area_for((addr_t)address, false);
6337 }
6338 
6339 
6340 area_id
6341 _user_find_area(const char* userName)
6342 {
6343 	char name[B_OS_NAME_LENGTH];
6344 
6345 	if (!IS_USER_ADDRESS(userName)
6346 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6347 		return B_BAD_ADDRESS;
6348 
6349 	return find_area(name);
6350 }
6351 
6352 
6353 status_t
6354 _user_get_area_info(area_id area, area_info* userInfo)
6355 {
6356 	if (!IS_USER_ADDRESS(userInfo))
6357 		return B_BAD_ADDRESS;
6358 
6359 	area_info info;
6360 	status_t status = get_area_info(area, &info);
6361 	if (status < B_OK)
6362 		return status;
6363 
6364 	// TODO: do we want to prevent userland from seeing kernel protections?
6365 	//info.protection &= B_USER_PROTECTION;
6366 
6367 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6368 		return B_BAD_ADDRESS;
6369 
6370 	return status;
6371 }
6372 
6373 
6374 status_t
6375 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6376 {
6377 	ssize_t cookie;
6378 
6379 	if (!IS_USER_ADDRESS(userCookie)
6380 		|| !IS_USER_ADDRESS(userInfo)
6381 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6382 		return B_BAD_ADDRESS;
6383 
6384 	area_info info;
6385 	status_t status = _get_next_area_info(team, &cookie, &info,
6386 		sizeof(area_info));
6387 	if (status != B_OK)
6388 		return status;
6389 
6390 	//info.protection &= B_USER_PROTECTION;
6391 
6392 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6393 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6394 		return B_BAD_ADDRESS;
6395 
6396 	return status;
6397 }
6398 
6399 
6400 status_t
6401 _user_set_area_protection(area_id area, uint32 newProtection)
6402 {
6403 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6404 		return B_BAD_VALUE;
6405 
6406 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6407 		newProtection, false);
6408 }
6409 
6410 
6411 status_t
6412 _user_resize_area(area_id area, size_t newSize)
6413 {
6414 	// TODO: Since we restrict deleting of areas to those owned by the team,
6415 	// we should also do that for resizing (check other functions, too).
6416 	return vm_resize_area(area, newSize, false);
6417 }
6418 
6419 
6420 area_id
6421 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6422 	team_id target)
6423 {
6424 	// filter out some unavailable values (for userland)
6425 	switch (addressSpec) {
6426 		case B_ANY_KERNEL_ADDRESS:
6427 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6428 			return B_BAD_VALUE;
6429 	}
6430 
6431 	void* address;
6432 	if (!IS_USER_ADDRESS(userAddress)
6433 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6434 		return B_BAD_ADDRESS;
6435 
6436 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6437 	if (newArea < B_OK)
6438 		return newArea;
6439 
6440 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6441 		return B_BAD_ADDRESS;
6442 
6443 	return newArea;
6444 }
6445 
6446 
6447 area_id
6448 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6449 	uint32 protection, area_id sourceArea)
6450 {
6451 	char name[B_OS_NAME_LENGTH];
6452 	void* address;
6453 
6454 	// filter out some unavailable values (for userland)
6455 	switch (addressSpec) {
6456 		case B_ANY_KERNEL_ADDRESS:
6457 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6458 			return B_BAD_VALUE;
6459 	}
6460 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6461 		return B_BAD_VALUE;
6462 
6463 	if (!IS_USER_ADDRESS(userName)
6464 		|| !IS_USER_ADDRESS(userAddress)
6465 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6466 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6467 		return B_BAD_ADDRESS;
6468 
6469 	fix_protection(&protection);
6470 
6471 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6472 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6473 		false);
6474 	if (clonedArea < B_OK)
6475 		return clonedArea;
6476 
6477 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6478 		delete_area(clonedArea);
6479 		return B_BAD_ADDRESS;
6480 	}
6481 
6482 	return clonedArea;
6483 }
6484 
6485 
6486 area_id
6487 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6488 	size_t size, uint32 lock, uint32 protection)
6489 {
6490 	char name[B_OS_NAME_LENGTH];
6491 	void* address;
6492 
6493 	// filter out some unavailable values (for userland)
6494 	switch (addressSpec) {
6495 		case B_ANY_KERNEL_ADDRESS:
6496 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6497 			return B_BAD_VALUE;
6498 	}
6499 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6500 		return B_BAD_VALUE;
6501 
6502 	if (!IS_USER_ADDRESS(userName)
6503 		|| !IS_USER_ADDRESS(userAddress)
6504 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6505 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6506 		return B_BAD_ADDRESS;
6507 
6508 	if (addressSpec == B_EXACT_ADDRESS
6509 		&& IS_KERNEL_ADDRESS(address))
6510 		return B_BAD_VALUE;
6511 
6512 	if (addressSpec == B_ANY_ADDRESS)
6513 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6514 	if (addressSpec == B_BASE_ADDRESS)
6515 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6516 
6517 	fix_protection(&protection);
6518 
6519 	virtual_address_restrictions virtualRestrictions = {};
6520 	virtualRestrictions.address = address;
6521 	virtualRestrictions.address_specification = addressSpec;
6522 	physical_address_restrictions physicalRestrictions = {};
6523 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6524 		size, lock, protection, 0, 0, &virtualRestrictions,
6525 		&physicalRestrictions, false, &address);
6526 
6527 	if (area >= B_OK
6528 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6529 		delete_area(area);
6530 		return B_BAD_ADDRESS;
6531 	}
6532 
6533 	return area;
6534 }
6535 
6536 
6537 status_t
6538 _user_delete_area(area_id area)
6539 {
6540 	// Unlike the BeOS implementation, you can now only delete areas
6541 	// that you have created yourself from userland.
6542 	// The documentation to delete_area() explicitly states that this
6543 	// will be restricted in the future, and so it will.
6544 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6545 }
6546 
6547 
6548 // TODO: create a BeOS style call for this!
6549 
6550 area_id
6551 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6552 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6553 	int fd, off_t offset)
6554 {
6555 	char name[B_OS_NAME_LENGTH];
6556 	void* address;
6557 	area_id area;
6558 
6559 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6560 		return B_BAD_VALUE;
6561 
6562 	fix_protection(&protection);
6563 
6564 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6565 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6566 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6567 		return B_BAD_ADDRESS;
6568 
6569 	if (addressSpec == B_EXACT_ADDRESS) {
6570 		if ((addr_t)address + size < (addr_t)address
6571 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6572 			return B_BAD_VALUE;
6573 		}
6574 		if (!IS_USER_ADDRESS(address)
6575 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6576 			return B_BAD_ADDRESS;
6577 		}
6578 	}
6579 
6580 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6581 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6582 		false);
6583 	if (area < B_OK)
6584 		return area;
6585 
6586 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6587 		return B_BAD_ADDRESS;
6588 
6589 	return area;
6590 }
6591 
6592 
6593 status_t
6594 _user_unmap_memory(void* _address, size_t size)
6595 {
6596 	addr_t address = (addr_t)_address;
6597 
6598 	// check params
6599 	if (size == 0 || (addr_t)address + size < (addr_t)address
6600 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6601 		return B_BAD_VALUE;
6602 	}
6603 
6604 	if (!IS_USER_ADDRESS(address)
6605 		|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6606 		return B_BAD_ADDRESS;
6607 	}
6608 
6609 	// Write lock the address space and ensure the address range is not wired.
6610 	AddressSpaceWriteLocker locker;
6611 	do {
6612 		status_t status = locker.SetTo(team_get_current_team_id());
6613 		if (status != B_OK)
6614 			return status;
6615 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6616 			size, &locker));
6617 
6618 	// unmap
6619 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6620 }
6621 
6622 
6623 status_t
6624 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6625 {
6626 	// check address range
6627 	addr_t address = (addr_t)_address;
6628 	size = PAGE_ALIGN(size);
6629 
6630 	if ((address % B_PAGE_SIZE) != 0)
6631 		return B_BAD_VALUE;
6632 	if (!validate_user_memory_range(_address, size)) {
6633 		// weird error code required by POSIX
6634 		return ENOMEM;
6635 	}
6636 
6637 	// extend and check protection
6638 	if ((protection & ~B_USER_PROTECTION) != 0)
6639 		return B_BAD_VALUE;
6640 
6641 	fix_protection(&protection);
6642 
6643 	// We need to write lock the address space, since we're going to play with
6644 	// the areas. Also make sure that none of the areas is wired and that we're
6645 	// actually allowed to change the protection.
6646 	AddressSpaceWriteLocker locker;
6647 
6648 	bool restart;
6649 	do {
6650 		restart = false;
6651 
6652 		status_t status = locker.SetTo(team_get_current_team_id());
6653 		if (status != B_OK)
6654 			return status;
6655 
6656 		// First round: Check whether the whole range is covered by areas and we
6657 		// are allowed to modify them.
6658 		addr_t currentAddress = address;
6659 		size_t sizeLeft = size;
6660 		while (sizeLeft > 0) {
6661 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6662 			if (area == NULL)
6663 				return B_NO_MEMORY;
6664 
6665 			if ((area->protection & B_KERNEL_AREA) != 0)
6666 				return B_NOT_ALLOWED;
6667 			if (area->protection_max != 0
6668 				&& (protection & area->protection_max) != protection) {
6669 				return B_NOT_ALLOWED;
6670 			}
6671 
6672 			addr_t offset = currentAddress - area->Base();
6673 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6674 
6675 			AreaCacheLocker cacheLocker(area);
6676 
6677 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6678 					&locker, &cacheLocker)) {
6679 				restart = true;
6680 				break;
6681 			}
6682 
6683 			cacheLocker.Unlock();
6684 
6685 			currentAddress += rangeSize;
6686 			sizeLeft -= rangeSize;
6687 		}
6688 	} while (restart);
6689 
6690 	// Second round: If the protections differ from that of the area, create a
6691 	// page protection array and re-map mapped pages.
6692 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6693 	addr_t currentAddress = address;
6694 	size_t sizeLeft = size;
6695 	while (sizeLeft > 0) {
6696 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6697 		if (area == NULL)
6698 			return B_NO_MEMORY;
6699 
6700 		addr_t offset = currentAddress - area->Base();
6701 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6702 
6703 		currentAddress += rangeSize;
6704 		sizeLeft -= rangeSize;
6705 
6706 		if (area->page_protections == NULL) {
6707 			if (area->protection == protection)
6708 				continue;
6709 
6710 			status_t status = allocate_area_page_protections(area);
6711 			if (status != B_OK)
6712 				return status;
6713 		}
6714 
6715 		// We need to lock the complete cache chain, since we potentially unmap
6716 		// pages of lower caches.
6717 		VMCache* topCache = vm_area_get_locked_cache(area);
6718 		VMCacheChainLocker cacheChainLocker(topCache);
6719 		cacheChainLocker.LockAllSourceCaches();
6720 
6721 		for (addr_t pageAddress = area->Base() + offset;
6722 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6723 			map->Lock();
6724 
6725 			set_area_page_protection(area, pageAddress, protection);
6726 
6727 			phys_addr_t physicalAddress;
6728 			uint32 flags;
6729 
6730 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6731 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6732 				map->Unlock();
6733 				continue;
6734 			}
6735 
6736 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6737 			if (page == NULL) {
6738 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6739 					"\n", area, physicalAddress);
6740 				map->Unlock();
6741 				return B_ERROR;
6742 			}
6743 
6744 			// If the page is not in the topmost cache and write access is
6745 			// requested, we have to unmap it. Otherwise we can re-map it with
6746 			// the new protection.
6747 			bool unmapPage = page->Cache() != topCache
6748 				&& (protection & B_WRITE_AREA) != 0;
6749 
6750 			if (!unmapPage)
6751 				map->ProtectPage(area, pageAddress, protection);
6752 
6753 			map->Unlock();
6754 
6755 			if (unmapPage) {
6756 				DEBUG_PAGE_ACCESS_START(page);
6757 				unmap_page(area, pageAddress);
6758 				DEBUG_PAGE_ACCESS_END(page);
6759 			}
6760 		}
6761 	}
6762 
6763 	return B_OK;
6764 }
6765 
6766 
6767 status_t
6768 _user_sync_memory(void* _address, size_t size, uint32 flags)
6769 {
6770 	addr_t address = (addr_t)_address;
6771 	size = PAGE_ALIGN(size);
6772 
6773 	// check params
6774 	if ((address % B_PAGE_SIZE) != 0)
6775 		return B_BAD_VALUE;
6776 	if (!validate_user_memory_range(_address, size)) {
6777 		// weird error code required by POSIX
6778 		return ENOMEM;
6779 	}
6780 
6781 	bool writeSync = (flags & MS_SYNC) != 0;
6782 	bool writeAsync = (flags & MS_ASYNC) != 0;
6783 	if (writeSync && writeAsync)
6784 		return B_BAD_VALUE;
6785 
6786 	if (size == 0 || (!writeSync && !writeAsync))
6787 		return B_OK;
6788 
6789 	// iterate through the range and sync all concerned areas
6790 	while (size > 0) {
6791 		// read lock the address space
6792 		AddressSpaceReadLocker locker;
6793 		status_t error = locker.SetTo(team_get_current_team_id());
6794 		if (error != B_OK)
6795 			return error;
6796 
6797 		// get the first area
6798 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6799 		if (area == NULL)
6800 			return B_NO_MEMORY;
6801 
6802 		uint32 offset = address - area->Base();
6803 		size_t rangeSize = min_c(area->Size() - offset, size);
6804 		offset += area->cache_offset;
6805 
6806 		// lock the cache
6807 		AreaCacheLocker cacheLocker(area);
6808 		if (!cacheLocker)
6809 			return B_BAD_VALUE;
6810 		VMCache* cache = area->cache;
6811 
6812 		locker.Unlock();
6813 
6814 		uint32 firstPage = offset >> PAGE_SHIFT;
6815 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6816 
6817 		// write the pages
6818 		if (cache->type == CACHE_TYPE_VNODE) {
6819 			if (writeSync) {
6820 				// synchronous
6821 				error = vm_page_write_modified_page_range(cache, firstPage,
6822 					endPage);
6823 				if (error != B_OK)
6824 					return error;
6825 			} else {
6826 				// asynchronous
6827 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6828 				// TODO: This is probably not quite what is supposed to happen.
6829 				// Especially when a lot has to be written, it might take ages
6830 				// until it really hits the disk.
6831 			}
6832 		}
6833 
6834 		address += rangeSize;
6835 		size -= rangeSize;
6836 	}
6837 
6838 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6839 	// synchronize multiple mappings of the same file. In our VM they never get
6840 	// out of sync, though, so we don't have to do anything.
6841 
6842 	return B_OK;
6843 }
6844 
6845 
6846 status_t
6847 _user_memory_advice(void* _address, size_t size, uint32 advice)
6848 {
6849 	addr_t address = (addr_t)_address;
6850 	if ((address % B_PAGE_SIZE) != 0)
6851 		return B_BAD_VALUE;
6852 
6853 	size = PAGE_ALIGN(size);
6854 	if (!validate_user_memory_range(_address, size)) {
6855 		// weird error code required by POSIX
6856 		return B_NO_MEMORY;
6857 	}
6858 
6859 	switch (advice) {
6860 		case MADV_NORMAL:
6861 		case MADV_SEQUENTIAL:
6862 		case MADV_RANDOM:
6863 		case MADV_WILLNEED:
6864 		case MADV_DONTNEED:
6865 			// TODO: Implement!
6866 			break;
6867 
6868 		case MADV_FREE:
6869 		{
6870 			AddressSpaceWriteLocker locker;
6871 			do {
6872 				status_t status = locker.SetTo(team_get_current_team_id());
6873 				if (status != B_OK)
6874 					return status;
6875 			} while (wait_if_address_range_is_wired(locker.AddressSpace(),
6876 					address, size, &locker));
6877 
6878 			discard_address_range(locker.AddressSpace(), address, size, false);
6879 			break;
6880 		}
6881 
6882 		default:
6883 			return B_BAD_VALUE;
6884 	}
6885 
6886 	return B_OK;
6887 }
6888 
6889 
6890 status_t
6891 _user_get_memory_properties(team_id teamID, const void* address,
6892 	uint32* _protected, uint32* _lock)
6893 {
6894 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6895 		return B_BAD_ADDRESS;
6896 
6897 	AddressSpaceReadLocker locker;
6898 	status_t error = locker.SetTo(teamID);
6899 	if (error != B_OK)
6900 		return error;
6901 
6902 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6903 	if (area == NULL)
6904 		return B_NO_MEMORY;
6905 
6906 
6907 	uint32 protection = area->protection;
6908 	if (area->page_protections != NULL)
6909 		protection = get_area_page_protection(area, (addr_t)address);
6910 
6911 	uint32 wiring = area->wiring;
6912 
6913 	locker.Unlock();
6914 
6915 	error = user_memcpy(_protected, &protection, sizeof(protection));
6916 	if (error != B_OK)
6917 		return error;
6918 
6919 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6920 
6921 	return error;
6922 }
6923 
6924 
6925 // An ordered list of non-overlapping ranges to track mlock/munlock locking.
6926 // It is allowed to call mlock/munlock in unbalanced ways (lock a range
6927 // multiple times, unlock a part of it, lock several consecutive ranges and
6928 // unlock them in one go, etc). However the low level lock_memory and
6929 // unlock_memory calls require the locks/unlocks to be balanced (you lock a
6930 // fixed range, and then unlock exactly the same range). This list allows to
6931 // keep track of what was locked exactly so we can unlock the correct things.
6932 struct LockedPages : DoublyLinkedListLinkImpl<LockedPages> {
6933 	addr_t start;
6934 	addr_t end;
6935 
6936 	status_t LockMemory()
6937 	{
6938 		return lock_memory((void*)start, end - start, 0);
6939 	}
6940 
6941 	status_t UnlockMemory()
6942 	{
6943 		return unlock_memory((void*)start, end - start, 0);
6944 	}
6945 
6946 	status_t Move(addr_t start, addr_t end)
6947 	{
6948 		status_t result = lock_memory((void*)start, end - start, 0);
6949 		if (result != B_OK)
6950 			return result;
6951 
6952 		result = UnlockMemory();
6953 
6954 		if (result != B_OK) {
6955 			// What can we do if the unlock fails?
6956 			panic("Failed to unlock memory: %s", strerror(result));
6957 			return result;
6958 		}
6959 
6960 		this->start = start;
6961 		this->end = end;
6962 
6963 		return B_OK;
6964 	}
6965 };
6966 
6967 
6968 status_t
6969 _user_mlock(const void* address, size_t size) {
6970 	// Maybe there's nothing to do, in which case, do nothing
6971 	if (size == 0)
6972 		return B_OK;
6973 
6974 	// Make sure the address is multiple of B_PAGE_SIZE (POSIX allows us to
6975 	// reject the call otherwise)
6976 	if ((addr_t)address % B_PAGE_SIZE != 0)
6977 		return EINVAL;
6978 
6979 	size = ROUNDUP(size, B_PAGE_SIZE);
6980 
6981 	addr_t endAddress = (addr_t)address + size;
6982 
6983 	// Pre-allocate a linked list element we may need (it's simpler to do it
6984 	// now than run out of memory in the midle of changing things)
6985 	LockedPages* newRange = new(std::nothrow) LockedPages();
6986 	if (newRange == NULL)
6987 		return ENOMEM;
6988 
6989 	// Get and lock the team
6990 	Team* team = thread_get_current_thread()->team;
6991 	TeamLocker teamLocker(team);
6992 	teamLocker.Lock();
6993 
6994 	status_t error = B_OK;
6995 	LockedPagesList* lockedPages = &team->locked_pages_list;
6996 
6997 	// Locate the first locked range possibly overlapping ours
6998 	LockedPages* currentRange = lockedPages->Head();
6999 	while (currentRange != NULL && currentRange->end <= (addr_t)address)
7000 		currentRange = lockedPages->GetNext(currentRange);
7001 
7002 	if (currentRange == NULL || currentRange->start >= endAddress) {
7003 		// No existing range is overlapping with ours. We can just lock our
7004 		// range and stop here.
7005 		newRange->start = (addr_t)address;
7006 		newRange->end = endAddress;
7007 		error = newRange->LockMemory();
7008 		if (error != B_OK) {
7009 			delete newRange;
7010 			return error;
7011 		}
7012 		lockedPages->InsertBefore(currentRange, newRange);
7013 		return B_OK;
7014 	}
7015 
7016 	// We get here when there is at least one existing overlapping range.
7017 
7018 	if (currentRange->start <= (addr_t)address) {
7019 		if (currentRange->end >= endAddress) {
7020 			// An existing range is already fully covering the pages we need to
7021 			// lock. Nothing to do then.
7022 			delete newRange;
7023 			return B_OK;
7024 		} else {
7025 			// An existing range covers the start of the area we want to lock.
7026 			// Advance our start address to avoid it.
7027 			address = (void*)currentRange->end;
7028 
7029 			// Move on to the next range for the next step
7030 			currentRange = lockedPages->GetNext(currentRange);
7031 		}
7032 	}
7033 
7034 	// First, lock the new range
7035 	newRange->start = (addr_t)address;
7036 	newRange->end = endAddress;
7037 	error = newRange->LockMemory();
7038 	if (error != B_OK) {
7039 		delete newRange;
7040 		return error;
7041 	}
7042 
7043 	// Unlock all ranges fully overlapping with the area we need to lock
7044 	while (currentRange != NULL && currentRange->end < endAddress) {
7045 		// The existing range is fully contained inside the new one we're
7046 		// trying to lock. Delete/unlock it, and replace it with a new one
7047 		// (this limits fragmentation of the range list, and is simpler to
7048 		// manage)
7049 		error = currentRange->UnlockMemory();
7050 		if (error != B_OK) {
7051 			panic("Failed to unlock a memory range: %s", strerror(error));
7052 			newRange->UnlockMemory();
7053 			delete newRange;
7054 			return error;
7055 		}
7056 		LockedPages* temp = currentRange;
7057 		currentRange = lockedPages->GetNext(currentRange);
7058 		lockedPages->Remove(temp);
7059 		delete temp;
7060 	}
7061 
7062 	if (currentRange != NULL) {
7063 		// One last range may cover the end of the area we're trying to lock
7064 
7065 		if (currentRange->start == (addr_t)address) {
7066 			// In case two overlapping ranges (one at the start and the other
7067 			// at the end) already cover the area we're after, there's nothing
7068 			// more to do. So we destroy our new extra allocation
7069 			error = newRange->UnlockMemory();
7070 			delete newRange;
7071 			return error;
7072 		}
7073 
7074 		if (currentRange->start < endAddress) {
7075 			// Make sure the last range is not overlapping, by moving its start
7076 			error = currentRange->Move(endAddress, currentRange->end);
7077 			if (error != B_OK) {
7078 				panic("Failed to move a memory range: %s", strerror(error));
7079 				newRange->UnlockMemory();
7080 				delete newRange;
7081 				return error;
7082 			}
7083 		}
7084 	}
7085 
7086 	// Finally, store the new range in the locked list
7087 	lockedPages->InsertBefore(currentRange, newRange);
7088 	return B_OK;
7089 }
7090 
7091 
7092 status_t
7093 _user_munlock(const void* address, size_t size) {
7094 	// Maybe there's nothing to do, in which case, do nothing
7095 	if (size == 0)
7096 		return B_OK;
7097 
7098 	// Make sure the address is multiple of B_PAGE_SIZE (POSIX allows us to
7099 	// reject the call otherwise)
7100 	if ((addr_t)address % B_PAGE_SIZE != 0)
7101 		return EINVAL;
7102 
7103 	// Round size up to the next page
7104 	size = ROUNDUP(size, B_PAGE_SIZE);
7105 
7106 	addr_t endAddress = (addr_t)address + size;
7107 
7108 	// Get and lock the team
7109 	Team* team = thread_get_current_thread()->team;
7110 	TeamLocker teamLocker(team);
7111 	teamLocker.Lock();
7112 	LockedPagesList* lockedPages = &team->locked_pages_list;
7113 
7114 	status_t error = B_OK;
7115 
7116 	// Locate the first locked range possibly overlapping ours
7117 	LockedPages* currentRange = lockedPages->Head();
7118 	while (currentRange != NULL && currentRange->end <= (addr_t)address)
7119 		currentRange = lockedPages->GetNext(currentRange);
7120 
7121 	if (currentRange == NULL || currentRange->start >= endAddress) {
7122 		// No range is intersecting, nothing to unlock
7123 		return B_OK;
7124 	}
7125 
7126 	if (currentRange->start < (addr_t)address) {
7127 		if (currentRange->end > endAddress) {
7128 			// There is a range fully covering the area we want to unlock,
7129 			// and it extends on both sides. We need to split it in two
7130 			LockedPages* newRange = new(std::nothrow) LockedPages();
7131 			if (newRange == NULL)
7132 				return ENOMEM;
7133 
7134 			newRange->start = endAddress;
7135 			newRange->end = currentRange->end;
7136 
7137 			error = newRange->LockMemory();
7138 			if (error != B_OK) {
7139 				delete newRange;
7140 				return error;
7141 			}
7142 
7143 			error = currentRange->Move(currentRange->start, (addr_t)address);
7144 			if (error != B_OK) {
7145 				delete newRange;
7146 				return error;
7147 			}
7148 
7149 			lockedPages->InsertAfter(currentRange, newRange);
7150 			return B_OK;
7151 		} else {
7152 			// There is a range that overlaps and extends before the one we
7153 			// want to unlock, we need to shrink it
7154 			error = currentRange->Move(currentRange->start, (addr_t)address);
7155 			if (error != B_OK)
7156 				return error;
7157 		}
7158 	}
7159 
7160 	while (currentRange != NULL && currentRange->end <= endAddress) {
7161 		// Unlock all fully overlapping ranges
7162 		error = currentRange->UnlockMemory();
7163 		if (error != B_OK)
7164 			return error;
7165 		LockedPages* temp = currentRange;
7166 		currentRange = lockedPages->GetNext(currentRange);
7167 		lockedPages->Remove(temp);
7168 		delete temp;
7169 	}
7170 
7171 	// Finally split the last partially overlapping range if any
7172 	if (currentRange != NULL && currentRange->start < endAddress) {
7173 		error = currentRange->Move(endAddress, currentRange->end);
7174 		if (error != B_OK)
7175 			return error;
7176 	}
7177 
7178 	return B_OK;
7179 }
7180 
7181 
7182 // #pragma mark -- compatibility
7183 
7184 
7185 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7186 
7187 
7188 struct physical_entry_beos {
7189 	uint32	address;
7190 	uint32	size;
7191 };
7192 
7193 
7194 /*!	The physical_entry structure has changed. We need to translate it to the
7195 	old one.
7196 */
7197 extern "C" int32
7198 __get_memory_map_beos(const void* _address, size_t numBytes,
7199 	physical_entry_beos* table, int32 numEntries)
7200 {
7201 	if (numEntries <= 0)
7202 		return B_BAD_VALUE;
7203 
7204 	const uint8* address = (const uint8*)_address;
7205 
7206 	int32 count = 0;
7207 	while (numBytes > 0 && count < numEntries) {
7208 		physical_entry entry;
7209 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
7210 		if (result < 0) {
7211 			if (result != B_BUFFER_OVERFLOW)
7212 				return result;
7213 		}
7214 
7215 		if (entry.address >= (phys_addr_t)1 << 32) {
7216 			panic("get_memory_map(): Address is greater 4 GB!");
7217 			return B_ERROR;
7218 		}
7219 
7220 		table[count].address = entry.address;
7221 		table[count++].size = entry.size;
7222 
7223 		address += entry.size;
7224 		numBytes -= entry.size;
7225 	}
7226 
7227 	// null-terminate the table, if possible
7228 	if (count < numEntries) {
7229 		table[count].address = 0;
7230 		table[count].size = 0;
7231 	}
7232 
7233 	return B_OK;
7234 }
7235 
7236 
7237 /*!	The type of the \a physicalAddress parameter has changed from void* to
7238 	phys_addr_t.
7239 */
7240 extern "C" area_id
7241 __map_physical_memory_beos(const char* name, void* physicalAddress,
7242 	size_t numBytes, uint32 addressSpec, uint32 protection,
7243 	void** _virtualAddress)
7244 {
7245 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
7246 		addressSpec, protection, _virtualAddress);
7247 }
7248 
7249 
7250 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
7251 	we meddle with the \a lock parameter to force 32 bit.
7252 */
7253 extern "C" area_id
7254 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
7255 	size_t size, uint32 lock, uint32 protection)
7256 {
7257 	switch (lock) {
7258 		case B_NO_LOCK:
7259 			break;
7260 		case B_FULL_LOCK:
7261 		case B_LAZY_LOCK:
7262 			lock = B_32_BIT_FULL_LOCK;
7263 			break;
7264 		case B_CONTIGUOUS:
7265 			lock = B_32_BIT_CONTIGUOUS;
7266 			break;
7267 	}
7268 
7269 	return __create_area_haiku(name, _address, addressSpec, size, lock,
7270 		protection);
7271 }
7272 
7273 
7274 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
7275 	"BASE");
7276 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
7277 	"map_physical_memory@", "BASE");
7278 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
7279 	"BASE");
7280 
7281 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7282 	"get_memory_map@@", "1_ALPHA3");
7283 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7284 	"map_physical_memory@@", "1_ALPHA3");
7285 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7286 	"1_ALPHA3");
7287 
7288 
7289 #else
7290 
7291 
7292 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7293 	"get_memory_map@@", "BASE");
7294 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7295 	"map_physical_memory@@", "BASE");
7296 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7297 	"BASE");
7298 
7299 
7300 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7301