xref: /haiku/src/system/kernel/vm/vm.cpp (revision 52f7c9389475e19fc21487b38064b4390eeb6fea)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleterDrivers.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <util/ThreadAutoLock.h>
51 #include <vm/vm_page.h>
52 #include <vm/vm_priv.h>
53 #include <vm/VMAddressSpace.h>
54 #include <vm/VMArea.h>
55 #include <vm/VMCache.h>
56 
57 #include "VMAddressSpaceLocking.h"
58 #include "VMAnonymousCache.h"
59 #include "VMAnonymousNoSwapCache.h"
60 #include "IORequest.h"
61 
62 
63 //#define TRACE_VM
64 //#define TRACE_FAULTS
65 #ifdef TRACE_VM
66 #	define TRACE(x) dprintf x
67 #else
68 #	define TRACE(x) ;
69 #endif
70 #ifdef TRACE_FAULTS
71 #	define FTRACE(x) dprintf x
72 #else
73 #	define FTRACE(x) ;
74 #endif
75 
76 
77 namespace {
78 
79 class AreaCacheLocking {
80 public:
81 	inline bool Lock(VMCache* lockable)
82 	{
83 		return false;
84 	}
85 
86 	inline void Unlock(VMCache* lockable)
87 	{
88 		vm_area_put_locked_cache(lockable);
89 	}
90 };
91 
92 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
93 public:
94 	inline AreaCacheLocker(VMCache* cache = NULL)
95 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
96 	{
97 	}
98 
99 	inline AreaCacheLocker(VMArea* area)
100 		: AutoLocker<VMCache, AreaCacheLocking>()
101 	{
102 		SetTo(area);
103 	}
104 
105 	inline void SetTo(VMCache* cache, bool alreadyLocked)
106 	{
107 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
108 	}
109 
110 	inline void SetTo(VMArea* area)
111 	{
112 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
113 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
114 	}
115 };
116 
117 
118 class VMCacheChainLocker {
119 public:
120 	VMCacheChainLocker()
121 		:
122 		fTopCache(NULL),
123 		fBottomCache(NULL)
124 	{
125 	}
126 
127 	VMCacheChainLocker(VMCache* topCache)
128 		:
129 		fTopCache(topCache),
130 		fBottomCache(topCache)
131 	{
132 	}
133 
134 	~VMCacheChainLocker()
135 	{
136 		Unlock();
137 	}
138 
139 	void SetTo(VMCache* topCache)
140 	{
141 		fTopCache = topCache;
142 		fBottomCache = topCache;
143 
144 		if (topCache != NULL)
145 			topCache->SetUserData(NULL);
146 	}
147 
148 	VMCache* LockSourceCache()
149 	{
150 		if (fBottomCache == NULL || fBottomCache->source == NULL)
151 			return NULL;
152 
153 		VMCache* previousCache = fBottomCache;
154 
155 		fBottomCache = fBottomCache->source;
156 		fBottomCache->Lock();
157 		fBottomCache->AcquireRefLocked();
158 		fBottomCache->SetUserData(previousCache);
159 
160 		return fBottomCache;
161 	}
162 
163 	void LockAllSourceCaches()
164 	{
165 		while (LockSourceCache() != NULL) {
166 		}
167 	}
168 
169 	void Unlock(VMCache* exceptCache = NULL)
170 	{
171 		if (fTopCache == NULL)
172 			return;
173 
174 		// Unlock caches in source -> consumer direction. This is important to
175 		// avoid double-locking and a reversal of locking order in case a cache
176 		// is eligable for merging.
177 		VMCache* cache = fBottomCache;
178 		while (cache != NULL) {
179 			VMCache* nextCache = (VMCache*)cache->UserData();
180 			if (cache != exceptCache)
181 				cache->ReleaseRefAndUnlock(cache != fTopCache);
182 
183 			if (cache == fTopCache)
184 				break;
185 
186 			cache = nextCache;
187 		}
188 
189 		fTopCache = NULL;
190 		fBottomCache = NULL;
191 	}
192 
193 	void UnlockKeepRefs(bool keepTopCacheLocked)
194 	{
195 		if (fTopCache == NULL)
196 			return;
197 
198 		VMCache* nextCache = fBottomCache;
199 		VMCache* cache = NULL;
200 
201 		while (keepTopCacheLocked
202 				? nextCache != fTopCache : cache != fTopCache) {
203 			cache = nextCache;
204 			nextCache = (VMCache*)cache->UserData();
205 			cache->Unlock(cache != fTopCache);
206 		}
207 	}
208 
209 	void RelockCaches(bool topCacheLocked)
210 	{
211 		if (fTopCache == NULL)
212 			return;
213 
214 		VMCache* nextCache = fTopCache;
215 		VMCache* cache = NULL;
216 		if (topCacheLocked) {
217 			cache = nextCache;
218 			nextCache = cache->source;
219 		}
220 
221 		while (cache != fBottomCache && nextCache != NULL) {
222 			VMCache* consumer = cache;
223 			cache = nextCache;
224 			nextCache = cache->source;
225 			cache->Lock();
226 			cache->SetUserData(consumer);
227 		}
228 	}
229 
230 private:
231 	VMCache*	fTopCache;
232 	VMCache*	fBottomCache;
233 };
234 
235 } // namespace
236 
237 
238 // The memory reserve an allocation of the certain priority must not touch.
239 static const size_t kMemoryReserveForPriority[] = {
240 	VM_MEMORY_RESERVE_USER,		// user
241 	VM_MEMORY_RESERVE_SYSTEM,	// system
242 	0							// VIP
243 };
244 
245 
246 ObjectCache* gPageMappingsObjectCache;
247 
248 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
249 
250 static off_t sAvailableMemory;
251 static off_t sNeededMemory;
252 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
253 static uint32 sPageFaults;
254 
255 static VMPhysicalPageMapper* sPhysicalPageMapper;
256 
257 #if DEBUG_CACHE_LIST
258 
259 struct cache_info {
260 	VMCache*	cache;
261 	addr_t		page_count;
262 	addr_t		committed;
263 };
264 
265 static const int kCacheInfoTableCount = 100 * 1024;
266 static cache_info* sCacheInfoTable;
267 
268 #endif	// DEBUG_CACHE_LIST
269 
270 
271 // function declarations
272 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
273 	bool addressSpaceCleanup);
274 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
275 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
276 static status_t map_backing_store(VMAddressSpace* addressSpace,
277 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
278 	int protection, int protectionMax, int mapping, uint32 flags,
279 	const virtual_address_restrictions* addressRestrictions, bool kernel,
280 	VMArea** _area, void** _virtualAddress);
281 static void fix_protection(uint32* protection);
282 
283 
284 //	#pragma mark -
285 
286 
287 #if VM_PAGE_FAULT_TRACING
288 
289 namespace VMPageFaultTracing {
290 
291 class PageFaultStart : public AbstractTraceEntry {
292 public:
293 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
294 		:
295 		fAddress(address),
296 		fPC(pc),
297 		fWrite(write),
298 		fUser(user)
299 	{
300 		Initialized();
301 	}
302 
303 	virtual void AddDump(TraceOutput& out)
304 	{
305 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
306 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
307 	}
308 
309 private:
310 	addr_t	fAddress;
311 	addr_t	fPC;
312 	bool	fWrite;
313 	bool	fUser;
314 };
315 
316 
317 // page fault errors
318 enum {
319 	PAGE_FAULT_ERROR_NO_AREA		= 0,
320 	PAGE_FAULT_ERROR_KERNEL_ONLY,
321 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
322 	PAGE_FAULT_ERROR_READ_PROTECTED,
323 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
324 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
325 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
326 };
327 
328 
329 class PageFaultError : public AbstractTraceEntry {
330 public:
331 	PageFaultError(area_id area, status_t error)
332 		:
333 		fArea(area),
334 		fError(error)
335 	{
336 		Initialized();
337 	}
338 
339 	virtual void AddDump(TraceOutput& out)
340 	{
341 		switch (fError) {
342 			case PAGE_FAULT_ERROR_NO_AREA:
343 				out.Print("page fault error: no area");
344 				break;
345 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
346 				out.Print("page fault error: area: %ld, kernel only", fArea);
347 				break;
348 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
349 				out.Print("page fault error: area: %ld, write protected",
350 					fArea);
351 				break;
352 			case PAGE_FAULT_ERROR_READ_PROTECTED:
353 				out.Print("page fault error: area: %ld, read protected", fArea);
354 				break;
355 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
356 				out.Print("page fault error: area: %ld, execute protected",
357 					fArea);
358 				break;
359 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
360 				out.Print("page fault error: kernel touching bad user memory");
361 				break;
362 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
363 				out.Print("page fault error: no address space");
364 				break;
365 			default:
366 				out.Print("page fault error: area: %ld, error: %s", fArea,
367 					strerror(fError));
368 				break;
369 		}
370 	}
371 
372 private:
373 	area_id		fArea;
374 	status_t	fError;
375 };
376 
377 
378 class PageFaultDone : public AbstractTraceEntry {
379 public:
380 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
381 			vm_page* page)
382 		:
383 		fArea(area),
384 		fTopCache(topCache),
385 		fCache(cache),
386 		fPage(page)
387 	{
388 		Initialized();
389 	}
390 
391 	virtual void AddDump(TraceOutput& out)
392 	{
393 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
394 			"page: %p", fArea, fTopCache, fCache, fPage);
395 	}
396 
397 private:
398 	area_id		fArea;
399 	VMCache*	fTopCache;
400 	VMCache*	fCache;
401 	vm_page*	fPage;
402 };
403 
404 }	// namespace VMPageFaultTracing
405 
406 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
407 #else
408 #	define TPF(x) ;
409 #endif	// VM_PAGE_FAULT_TRACING
410 
411 
412 //	#pragma mark -
413 
414 
415 /*!	The page's cache must be locked.
416 */
417 static inline void
418 increment_page_wired_count(vm_page* page)
419 {
420 	if (!page->IsMapped())
421 		atomic_add(&gMappedPagesCount, 1);
422 	page->IncrementWiredCount();
423 }
424 
425 
426 /*!	The page's cache must be locked.
427 */
428 static inline void
429 decrement_page_wired_count(vm_page* page)
430 {
431 	page->DecrementWiredCount();
432 	if (!page->IsMapped())
433 		atomic_add(&gMappedPagesCount, -1);
434 }
435 
436 
437 static inline addr_t
438 virtual_page_address(VMArea* area, vm_page* page)
439 {
440 	return area->Base()
441 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
442 }
443 
444 
445 //! You need to have the address space locked when calling this function
446 static VMArea*
447 lookup_area(VMAddressSpace* addressSpace, area_id id)
448 {
449 	VMAreaHash::ReadLock();
450 
451 	VMArea* area = VMAreaHash::LookupLocked(id);
452 	if (area != NULL && area->address_space != addressSpace)
453 		area = NULL;
454 
455 	VMAreaHash::ReadUnlock();
456 
457 	return area;
458 }
459 
460 
461 static status_t
462 allocate_area_page_protections(VMArea* area)
463 {
464 	// In the page protections we store only the three user protections,
465 	// so we use 4 bits per page.
466 	size_t bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
467 	area->page_protections = (uint8*)malloc_etc(bytes,
468 		area->address_space == VMAddressSpace::Kernel()
469 			? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
470 	if (area->page_protections == NULL)
471 		return B_NO_MEMORY;
472 
473 	// init the page protections for all pages to that of the area
474 	uint32 areaProtection = area->protection
475 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
476 	memset(area->page_protections, areaProtection | (areaProtection << 4),
477 		bytes);
478 	return B_OK;
479 }
480 
481 
482 static inline void
483 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
484 {
485 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
486 	addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
487 	uint8& entry = area->page_protections[pageIndex / 2];
488 	if (pageIndex % 2 == 0)
489 		entry = (entry & 0xf0) | protection;
490 	else
491 		entry = (entry & 0x0f) | (protection << 4);
492 }
493 
494 
495 static inline uint32
496 get_area_page_protection(VMArea* area, addr_t pageAddress)
497 {
498 	if (area->page_protections == NULL)
499 		return area->protection;
500 
501 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
502 	uint32 protection = area->page_protections[pageIndex / 2];
503 	if (pageIndex % 2 == 0)
504 		protection &= 0x0f;
505 	else
506 		protection >>= 4;
507 
508 	uint32 kernelProtection = 0;
509 	if ((protection & B_READ_AREA) != 0)
510 		kernelProtection |= B_KERNEL_READ_AREA;
511 	if ((protection & B_WRITE_AREA) != 0)
512 		kernelProtection |= B_KERNEL_WRITE_AREA;
513 
514 	// If this is a kernel area we return only the kernel flags.
515 	if (area->address_space == VMAddressSpace::Kernel())
516 		return kernelProtection;
517 
518 	return protection | kernelProtection;
519 }
520 
521 
522 /*!	The caller must have reserved enough pages the translation map
523 	implementation might need to map this page.
524 	The page's cache must be locked.
525 */
526 static status_t
527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528 	vm_page_reservation* reservation)
529 {
530 	VMTranslationMap* map = area->address_space->TranslationMap();
531 
532 	bool wasMapped = page->IsMapped();
533 
534 	if (area->wiring == B_NO_LOCK) {
535 		DEBUG_PAGE_ACCESS_CHECK(page);
536 
537 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539 			gPageMappingsObjectCache,
540 			CACHE_DONT_WAIT_FOR_MEMORY
541 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542 		if (mapping == NULL)
543 			return B_NO_MEMORY;
544 
545 		mapping->page = page;
546 		mapping->area = area;
547 
548 		map->Lock();
549 
550 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551 			area->MemoryType(), reservation);
552 
553 		// insert mapping into lists
554 		if (!page->IsMapped())
555 			atomic_add(&gMappedPagesCount, 1);
556 
557 		page->mappings.Add(mapping);
558 		area->mappings.Add(mapping);
559 
560 		map->Unlock();
561 	} else {
562 		DEBUG_PAGE_ACCESS_CHECK(page);
563 
564 		map->Lock();
565 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566 			area->MemoryType(), reservation);
567 		map->Unlock();
568 
569 		increment_page_wired_count(page);
570 	}
571 
572 	if (!wasMapped) {
573 		// The page is mapped now, so we must not remain in the cached queue.
574 		// It also makes sense to move it from the inactive to the active, since
575 		// otherwise the page daemon wouldn't come to keep track of it (in idle
576 		// mode) -- if the page isn't touched, it will be deactivated after a
577 		// full iteration through the queue at the latest.
578 		if (page->State() == PAGE_STATE_CACHED
579 				|| page->State() == PAGE_STATE_INACTIVE) {
580 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
581 		}
582 	}
583 
584 	return B_OK;
585 }
586 
587 
588 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
589 	page's cache.
590 */
591 static inline bool
592 unmap_page(VMArea* area, addr_t virtualAddress)
593 {
594 	return area->address_space->TranslationMap()->UnmapPage(area,
595 		virtualAddress, true);
596 }
597 
598 
599 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
600 	mapped pages' caches.
601 */
602 static inline void
603 unmap_pages(VMArea* area, addr_t base, size_t size)
604 {
605 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
606 }
607 
608 
609 static inline bool
610 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
611 {
612 	if (address < area->Base()) {
613 		offset = area->Base() - address;
614 		if (offset >= size)
615 			return false;
616 
617 		address = area->Base();
618 		size -= offset;
619 		offset = 0;
620 		if (size > area->Size())
621 			size = area->Size();
622 
623 		return true;
624 	}
625 
626 	offset = address - area->Base();
627 	if (offset >= area->Size())
628 		return false;
629 
630 	if (size >= area->Size() - offset)
631 		size = area->Size() - offset;
632 
633 	return true;
634 }
635 
636 
637 /*!	Cuts a piece out of an area. If the given cut range covers the complete
638 	area, it is deleted. If it covers the beginning or the end, the area is
639 	resized accordingly. If the range covers some part in the middle of the
640 	area, it is split in two; in this case the second area is returned via
641 	\a _secondArea (the variable is left untouched in the other cases).
642 	The address space must be write locked.
643 	The caller must ensure that no part of the given range is wired.
644 */
645 static status_t
646 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
647 	addr_t size, VMArea** _secondArea, bool kernel)
648 {
649 	addr_t offset;
650 	if (!intersect_area(area, address, size, offset))
651 		return B_OK;
652 
653 	// Is the area fully covered?
654 	if (address == area->Base() && size == area->Size()) {
655 		delete_area(addressSpace, area, false);
656 		return B_OK;
657 	}
658 
659 	int priority;
660 	uint32 allocationFlags;
661 	if (addressSpace == VMAddressSpace::Kernel()) {
662 		priority = VM_PRIORITY_SYSTEM;
663 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
664 			| HEAP_DONT_LOCK_KERNEL_SPACE;
665 	} else {
666 		priority = VM_PRIORITY_USER;
667 		allocationFlags = 0;
668 	}
669 
670 	VMCache* cache = vm_area_get_locked_cache(area);
671 	VMCacheChainLocker cacheChainLocker(cache);
672 	cacheChainLocker.LockAllSourceCaches();
673 
674 	// If no one else uses the area's cache and it's an anonymous cache, we can
675 	// resize or split it, too.
676 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
677 		&& cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM;
678 
679 	// Cut the end only?
680 	if (offset > 0 && size == area->Size() - offset) {
681 		status_t error = addressSpace->ShrinkAreaTail(area, offset,
682 			allocationFlags);
683 		if (error != B_OK)
684 			return error;
685 
686 		// unmap pages
687 		unmap_pages(area, address, size);
688 
689 		if (onlyCacheUser) {
690 			// Since VMCache::Resize() can temporarily drop the lock, we must
691 			// unlock all lower caches to prevent locking order inversion.
692 			cacheChainLocker.Unlock(cache);
693 			cache->Resize(cache->virtual_base + offset, priority);
694 			cache->ReleaseRefAndUnlock();
695 		}
696 
697 		return B_OK;
698 	}
699 
700 	// Cut the beginning only?
701 	if (area->Base() == address) {
702 		// resize the area
703 		status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
704 			allocationFlags);
705 		if (error != B_OK)
706 			return error;
707 
708 		// unmap pages
709 		unmap_pages(area, address, size);
710 
711 		if (onlyCacheUser) {
712 			// Since VMCache::Rebase() can temporarily drop the lock, we must
713 			// unlock all lower caches to prevent locking order inversion.
714 			cacheChainLocker.Unlock(cache);
715 			cache->Rebase(cache->virtual_base + size, priority);
716 			cache->ReleaseRefAndUnlock();
717 		}
718 		area->cache_offset += size;
719 
720 		return B_OK;
721 	}
722 
723 	// The tough part -- cut a piece out of the middle of the area.
724 	// We do that by shrinking the area to the begin section and creating a
725 	// new area for the end section.
726 	addr_t firstNewSize = offset;
727 	addr_t secondBase = address + size;
728 	addr_t secondSize = area->Size() - offset - size;
729 
730 	// unmap pages
731 	unmap_pages(area, address, area->Size() - firstNewSize);
732 
733 	// resize the area
734 	addr_t oldSize = area->Size();
735 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
736 		allocationFlags);
737 	if (error != B_OK)
738 		return error;
739 
740 	virtual_address_restrictions addressRestrictions = {};
741 	addressRestrictions.address = (void*)secondBase;
742 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
743 	VMArea* secondArea;
744 
745 	if (onlyCacheUser) {
746 		// Create a new cache for the second area.
747 		VMCache* secondCache;
748 		error = VMCacheFactory::CreateAnonymousCache(secondCache, false, 0, 0,
749 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
750 		if (error != B_OK) {
751 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
752 			return error;
753 		}
754 
755 		secondCache->Lock();
756 		secondCache->temporary = cache->temporary;
757 		secondCache->virtual_base = area->cache_offset;
758 		secondCache->virtual_end = area->cache_offset + secondSize;
759 
760 		// Transfer the concerned pages from the first cache.
761 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
762 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
763 			area->cache_offset);
764 
765 		if (error == B_OK) {
766 			// Since VMCache::Resize() can temporarily drop the lock, we must
767 			// unlock all lower caches to prevent locking order inversion.
768 			cacheChainLocker.Unlock(cache);
769 			cache->Resize(cache->virtual_base + firstNewSize, priority);
770 			// Don't unlock the cache yet because we might have to resize it
771 			// back.
772 
773 			// Map the second area.
774 			error = map_backing_store(addressSpace, secondCache,
775 				area->cache_offset, area->name, secondSize, area->wiring,
776 				area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0,
777 				&addressRestrictions, kernel, &secondArea, NULL);
778 		}
779 
780 		if (error != B_OK) {
781 			// Restore the original cache.
782 			cache->Resize(cache->virtual_base + oldSize, priority);
783 
784 			// Move the pages back.
785 			status_t readoptStatus = cache->Adopt(secondCache,
786 				area->cache_offset, secondSize, adoptOffset);
787 			if (readoptStatus != B_OK) {
788 				// Some (swap) pages have not been moved back and will be lost
789 				// once the second cache is deleted.
790 				panic("failed to restore cache range: %s",
791 					strerror(readoptStatus));
792 
793 				// TODO: Handle out of memory cases by freeing memory and
794 				// retrying.
795 			}
796 
797 			cache->ReleaseRefAndUnlock();
798 			secondCache->ReleaseRefAndUnlock();
799 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
800 			return error;
801 		}
802 
803 		// Now we can unlock it.
804 		cache->ReleaseRefAndUnlock();
805 		secondCache->Unlock();
806 	} else {
807 		error = map_backing_store(addressSpace, cache, area->cache_offset
808 			+ (secondBase - area->Base()),
809 			area->name, secondSize, area->wiring, area->protection,
810 			area->protection_max, REGION_NO_PRIVATE_MAP, 0,
811 			&addressRestrictions, kernel, &secondArea, NULL);
812 		if (error != B_OK) {
813 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
814 			return error;
815 		}
816 		// We need a cache reference for the new area.
817 		cache->AcquireRefLocked();
818 	}
819 
820 	if (_secondArea != NULL)
821 		*_secondArea = secondArea;
822 
823 	return B_OK;
824 }
825 
826 
827 /*!	Deletes or cuts all areas in the given address range.
828 	The address space must be write-locked.
829 	The caller must ensure that no part of the given range is wired.
830 */
831 static status_t
832 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
833 	bool kernel)
834 {
835 	size = PAGE_ALIGN(size);
836 
837 	// Check, whether the caller is allowed to modify the concerned areas.
838 	if (!kernel) {
839 		for (VMAddressSpace::AreaRangeIterator it
840 				= addressSpace->GetAreaRangeIterator(address, size);
841 			VMArea* area = it.Next();) {
842 
843 			if ((area->protection & B_KERNEL_AREA) != 0) {
844 				dprintf("unmap_address_range: team %" B_PRId32 " tried to "
845 					"unmap range of kernel area %" B_PRId32 " (%s)\n",
846 					team_get_current_team_id(), area->id, area->name);
847 				return B_NOT_ALLOWED;
848 			}
849 		}
850 	}
851 
852 	for (VMAddressSpace::AreaRangeIterator it
853 			= addressSpace->GetAreaRangeIterator(address, size);
854 		VMArea* area = it.Next();) {
855 
856 		status_t error = cut_area(addressSpace, area, address, size, NULL,
857 			kernel);
858 		if (error != B_OK)
859 			return error;
860 			// Failing after already messing with areas is ugly, but we
861 			// can't do anything about it.
862 	}
863 
864 	return B_OK;
865 }
866 
867 
868 static status_t
869 discard_area_range(VMArea* area, addr_t address, addr_t size)
870 {
871 	addr_t offset;
872 	if (!intersect_area(area, address, size, offset))
873 		return B_OK;
874 
875 	// If someone else uses the area's cache or it's not an anonymous cache, we
876 	// can't discard.
877 	VMCache* cache = vm_area_get_locked_cache(area);
878 	if (cache->areas != area || area->cache_next != NULL
879 		|| !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) {
880 		return B_OK;
881 	}
882 
883 	VMCacheChainLocker cacheChainLocker(cache);
884 	cacheChainLocker.LockAllSourceCaches();
885 
886 	unmap_pages(area, address, size);
887 
888 	// Since VMCache::Discard() can temporarily drop the lock, we must
889 	// unlock all lower caches to prevent locking order inversion.
890 	cacheChainLocker.Unlock(cache);
891 	cache->Discard(cache->virtual_base + offset, size);
892 	cache->ReleaseRefAndUnlock();
893 
894 	return B_OK;
895 }
896 
897 
898 static status_t
899 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
900 	bool kernel)
901 {
902 	for (VMAddressSpace::AreaRangeIterator it
903 		= addressSpace->GetAreaRangeIterator(address, size);
904 			VMArea* area = it.Next();) {
905 		status_t error = discard_area_range(area, address, size);
906 		if (error != B_OK)
907 			return error;
908 	}
909 
910 	return B_OK;
911 }
912 
913 
914 /*! You need to hold the lock of the cache and the write lock of the address
915 	space when calling this function.
916 	Note, that in case of error your cache will be temporarily unlocked.
917 	If \a addressSpec is \c B_EXACT_ADDRESS and the
918 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
919 	that no part of the specified address range (base \c *_virtualAddress, size
920 	\a size) is wired.
921 */
922 static status_t
923 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
924 	const char* areaName, addr_t size, int wiring, int protection,
925 	int protectionMax, int mapping,
926 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
927 	bool kernel, VMArea** _area, void** _virtualAddress)
928 {
929 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
930 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
931 		", protection %d, protectionMax %d, area %p, areaName '%s'\n",
932 		addressSpace, cache, addressRestrictions->address, offset, size,
933 		addressRestrictions->address_specification, wiring, protection,
934 		protectionMax, _area, areaName));
935 	cache->AssertLocked();
936 
937 	if (size == 0) {
938 #if KDEBUG
939 		panic("map_backing_store(): called with size=0 for area '%s'!",
940 			areaName);
941 #endif
942 		return B_BAD_VALUE;
943 	}
944 
945 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
946 		| HEAP_DONT_LOCK_KERNEL_SPACE;
947 	int priority;
948 	if (addressSpace != VMAddressSpace::Kernel()) {
949 		priority = VM_PRIORITY_USER;
950 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
951 		priority = VM_PRIORITY_VIP;
952 		allocationFlags |= HEAP_PRIORITY_VIP;
953 	} else
954 		priority = VM_PRIORITY_SYSTEM;
955 
956 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
957 		allocationFlags);
958 	if (mapping != REGION_PRIVATE_MAP)
959 		area->protection_max = protectionMax & B_USER_PROTECTION;
960 	if (area == NULL)
961 		return B_NO_MEMORY;
962 
963 	status_t status;
964 
965 	// if this is a private map, we need to create a new cache
966 	// to handle the private copies of pages as they are written to
967 	VMCache* sourceCache = cache;
968 	if (mapping == REGION_PRIVATE_MAP) {
969 		VMCache* newCache;
970 
971 		// create an anonymous cache
972 		status = VMCacheFactory::CreateAnonymousCache(newCache,
973 			(protection & B_STACK_AREA) != 0
974 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
975 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
976 		if (status != B_OK)
977 			goto err1;
978 
979 		newCache->Lock();
980 		newCache->temporary = 1;
981 		newCache->virtual_base = offset;
982 		newCache->virtual_end = offset + size;
983 
984 		cache->AddConsumer(newCache);
985 
986 		cache = newCache;
987 	}
988 
989 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
990 		status = cache->SetMinimalCommitment(size, priority);
991 		if (status != B_OK)
992 			goto err2;
993 	}
994 
995 	// check to see if this address space has entered DELETE state
996 	if (addressSpace->IsBeingDeleted()) {
997 		// okay, someone is trying to delete this address space now, so we can't
998 		// insert the area, so back out
999 		status = B_BAD_TEAM_ID;
1000 		goto err2;
1001 	}
1002 
1003 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
1004 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
1005 		status = unmap_address_range(addressSpace,
1006 			(addr_t)addressRestrictions->address, size, kernel);
1007 		if (status != B_OK)
1008 			goto err2;
1009 	}
1010 
1011 	status = addressSpace->InsertArea(area, size, addressRestrictions,
1012 		allocationFlags, _virtualAddress);
1013 	if (status == B_NO_MEMORY
1014 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
1015 		// Due to how many locks are held, we cannot wait here for space to be
1016 		// freed up, but we can at least notify the low_resource handler.
1017 		low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0);
1018 	}
1019 	if (status != B_OK)
1020 		goto err2;
1021 
1022 	// attach the cache to the area
1023 	area->cache = cache;
1024 	area->cache_offset = offset;
1025 
1026 	// point the cache back to the area
1027 	cache->InsertAreaLocked(area);
1028 	if (mapping == REGION_PRIVATE_MAP)
1029 		cache->Unlock();
1030 
1031 	// insert the area in the global area hash table
1032 	VMAreaHash::Insert(area);
1033 
1034 	// grab a ref to the address space (the area holds this)
1035 	addressSpace->Get();
1036 
1037 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1038 //		cache, sourceCache, areaName, area);
1039 
1040 	*_area = area;
1041 	return B_OK;
1042 
1043 err2:
1044 	if (mapping == REGION_PRIVATE_MAP) {
1045 		// We created this cache, so we must delete it again. Note, that we
1046 		// need to temporarily unlock the source cache or we'll otherwise
1047 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1048 		sourceCache->Unlock();
1049 		cache->ReleaseRefAndUnlock();
1050 		sourceCache->Lock();
1051 	}
1052 err1:
1053 	addressSpace->DeleteArea(area, allocationFlags);
1054 	return status;
1055 }
1056 
1057 
1058 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1059 	  locker1, locker2).
1060 */
1061 template<typename LockerType1, typename LockerType2>
1062 static inline bool
1063 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1064 {
1065 	area->cache->AssertLocked();
1066 
1067 	VMAreaUnwiredWaiter waiter;
1068 	if (!area->AddWaiterIfWired(&waiter))
1069 		return false;
1070 
1071 	// unlock everything and wait
1072 	if (locker1 != NULL)
1073 		locker1->Unlock();
1074 	if (locker2 != NULL)
1075 		locker2->Unlock();
1076 
1077 	waiter.waitEntry.Wait();
1078 
1079 	return true;
1080 }
1081 
1082 
1083 /*!	Checks whether the given area has any wired ranges intersecting with the
1084 	specified range and waits, if so.
1085 
1086 	When it has to wait, the function calls \c Unlock() on both \a locker1
1087 	and \a locker2, if given.
1088 	The area's top cache must be locked and must be unlocked as a side effect
1089 	of calling \c Unlock() on either \a locker1 or \a locker2.
1090 
1091 	If the function does not have to wait it does not modify or unlock any
1092 	object.
1093 
1094 	\param area The area to be checked.
1095 	\param base The base address of the range to check.
1096 	\param size The size of the address range to check.
1097 	\param locker1 An object to be unlocked when before starting to wait (may
1098 		be \c NULL).
1099 	\param locker2 An object to be unlocked when before starting to wait (may
1100 		be \c NULL).
1101 	\return \c true, if the function had to wait, \c false otherwise.
1102 */
1103 template<typename LockerType1, typename LockerType2>
1104 static inline bool
1105 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1106 	LockerType1* locker1, LockerType2* locker2)
1107 {
1108 	area->cache->AssertLocked();
1109 
1110 	VMAreaUnwiredWaiter waiter;
1111 	if (!area->AddWaiterIfWired(&waiter, base, size))
1112 		return false;
1113 
1114 	// unlock everything and wait
1115 	if (locker1 != NULL)
1116 		locker1->Unlock();
1117 	if (locker2 != NULL)
1118 		locker2->Unlock();
1119 
1120 	waiter.waitEntry.Wait();
1121 
1122 	return true;
1123 }
1124 
1125 
1126 /*!	Checks whether the given address space has any wired ranges intersecting
1127 	with the specified range and waits, if so.
1128 
1129 	Similar to wait_if_area_range_is_wired(), with the following differences:
1130 	- All areas intersecting with the range are checked (respectively all until
1131 	  one is found that contains a wired range intersecting with the given
1132 	  range).
1133 	- The given address space must at least be read-locked and must be unlocked
1134 	  when \c Unlock() is called on \a locker.
1135 	- None of the areas' caches are allowed to be locked.
1136 */
1137 template<typename LockerType>
1138 static inline bool
1139 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1140 	size_t size, LockerType* locker)
1141 {
1142 	for (VMAddressSpace::AreaRangeIterator it
1143 		= addressSpace->GetAreaRangeIterator(base, size);
1144 			VMArea* area = it.Next();) {
1145 
1146 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1147 
1148 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1149 			return true;
1150 	}
1151 
1152 	return false;
1153 }
1154 
1155 
1156 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1157 	It must be called in a situation where the kernel address space may be
1158 	locked.
1159 */
1160 status_t
1161 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1162 {
1163 	AddressSpaceReadLocker locker;
1164 	VMArea* area;
1165 	status_t status = locker.SetFromArea(id, area);
1166 	if (status != B_OK)
1167 		return status;
1168 
1169 	if (area->page_protections == NULL) {
1170 		status = allocate_area_page_protections(area);
1171 		if (status != B_OK)
1172 			return status;
1173 	}
1174 
1175 	*cookie = (void*)area;
1176 	return B_OK;
1177 }
1178 
1179 
1180 /*!	This is a debug helper function that can only be used with very specific
1181 	use cases.
1182 	Sets protection for the given address range to the protection specified.
1183 	If \a protection is 0 then the involved pages will be marked non-present
1184 	in the translation map to cause a fault on access. The pages aren't
1185 	actually unmapped however so that they can be marked present again with
1186 	additional calls to this function. For this to work the area must be
1187 	fully locked in memory so that the pages aren't otherwise touched.
1188 	This function does not lock the kernel address space and needs to be
1189 	supplied with a \a cookie retrieved from a successful call to
1190 	vm_prepare_kernel_area_debug_protection().
1191 */
1192 status_t
1193 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1194 	uint32 protection)
1195 {
1196 	// check address range
1197 	addr_t address = (addr_t)_address;
1198 	size = PAGE_ALIGN(size);
1199 
1200 	if ((address % B_PAGE_SIZE) != 0
1201 		|| (addr_t)address + size < (addr_t)address
1202 		|| !IS_KERNEL_ADDRESS(address)
1203 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1204 		return B_BAD_VALUE;
1205 	}
1206 
1207 	// Translate the kernel protection to user protection as we only store that.
1208 	if ((protection & B_KERNEL_READ_AREA) != 0)
1209 		protection |= B_READ_AREA;
1210 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1211 		protection |= B_WRITE_AREA;
1212 
1213 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1214 	VMTranslationMap* map = addressSpace->TranslationMap();
1215 	VMArea* area = (VMArea*)cookie;
1216 
1217 	addr_t offset = address - area->Base();
1218 	if (area->Size() - offset < size) {
1219 		panic("protect range not fully within supplied area");
1220 		return B_BAD_VALUE;
1221 	}
1222 
1223 	if (area->page_protections == NULL) {
1224 		panic("area has no page protections");
1225 		return B_BAD_VALUE;
1226 	}
1227 
1228 	// Invalidate the mapping entries so any access to them will fault or
1229 	// restore the mapping entries unchanged so that lookup will success again.
1230 	map->Lock();
1231 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1232 	map->Unlock();
1233 
1234 	// And set the proper page protections so that the fault case will actually
1235 	// fail and not simply try to map a new page.
1236 	for (addr_t pageAddress = address; pageAddress < address + size;
1237 			pageAddress += B_PAGE_SIZE) {
1238 		set_area_page_protection(area, pageAddress, protection);
1239 	}
1240 
1241 	return B_OK;
1242 }
1243 
1244 
1245 status_t
1246 vm_block_address_range(const char* name, void* address, addr_t size)
1247 {
1248 	if (!arch_vm_supports_protection(0))
1249 		return B_NOT_SUPPORTED;
1250 
1251 	AddressSpaceWriteLocker locker;
1252 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1253 	if (status != B_OK)
1254 		return status;
1255 
1256 	VMAddressSpace* addressSpace = locker.AddressSpace();
1257 
1258 	// create an anonymous cache
1259 	VMCache* cache;
1260 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1261 		VM_PRIORITY_SYSTEM);
1262 	if (status != B_OK)
1263 		return status;
1264 
1265 	cache->temporary = 1;
1266 	cache->virtual_end = size;
1267 	cache->Lock();
1268 
1269 	VMArea* area;
1270 	virtual_address_restrictions addressRestrictions = {};
1271 	addressRestrictions.address = address;
1272 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1273 	status = map_backing_store(addressSpace, cache, 0, name, size,
1274 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions,
1275 		true, &area, NULL);
1276 	if (status != B_OK) {
1277 		cache->ReleaseRefAndUnlock();
1278 		return status;
1279 	}
1280 
1281 	cache->Unlock();
1282 	area->cache_type = CACHE_TYPE_RAM;
1283 	return area->id;
1284 }
1285 
1286 
1287 status_t
1288 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1289 {
1290 	AddressSpaceWriteLocker locker(team);
1291 	if (!locker.IsLocked())
1292 		return B_BAD_TEAM_ID;
1293 
1294 	VMAddressSpace* addressSpace = locker.AddressSpace();
1295 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1296 		addressSpace == VMAddressSpace::Kernel()
1297 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1298 }
1299 
1300 
1301 status_t
1302 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1303 	addr_t size, uint32 flags)
1304 {
1305 	if (size == 0)
1306 		return B_BAD_VALUE;
1307 
1308 	AddressSpaceWriteLocker locker(team);
1309 	if (!locker.IsLocked())
1310 		return B_BAD_TEAM_ID;
1311 
1312 	virtual_address_restrictions addressRestrictions = {};
1313 	addressRestrictions.address = *_address;
1314 	addressRestrictions.address_specification = addressSpec;
1315 	VMAddressSpace* addressSpace = locker.AddressSpace();
1316 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1317 		addressSpace == VMAddressSpace::Kernel()
1318 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1319 		_address);
1320 }
1321 
1322 
1323 area_id
1324 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1325 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1326 	const virtual_address_restrictions* virtualAddressRestrictions,
1327 	const physical_address_restrictions* physicalAddressRestrictions,
1328 	bool kernel, void** _address)
1329 {
1330 	VMArea* area;
1331 	VMCache* cache;
1332 	vm_page* page = NULL;
1333 	bool isStack = (protection & B_STACK_AREA) != 0;
1334 	page_num_t guardPages;
1335 	bool canOvercommit = false;
1336 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1337 		? VM_PAGE_ALLOC_CLEAR : 0;
1338 
1339 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1340 		team, name, size));
1341 
1342 	size = PAGE_ALIGN(size);
1343 	guardSize = PAGE_ALIGN(guardSize);
1344 	guardPages = guardSize / B_PAGE_SIZE;
1345 
1346 	if (size == 0 || size < guardSize)
1347 		return B_BAD_VALUE;
1348 	if (!arch_vm_supports_protection(protection))
1349 		return B_NOT_SUPPORTED;
1350 
1351 	if (team == B_CURRENT_TEAM)
1352 		team = VMAddressSpace::CurrentID();
1353 	if (team < 0)
1354 		return B_BAD_TEAM_ID;
1355 
1356 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1357 		canOvercommit = true;
1358 
1359 #ifdef DEBUG_KERNEL_STACKS
1360 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1361 		isStack = true;
1362 #endif
1363 
1364 	// check parameters
1365 	switch (virtualAddressRestrictions->address_specification) {
1366 		case B_ANY_ADDRESS:
1367 		case B_EXACT_ADDRESS:
1368 		case B_BASE_ADDRESS:
1369 		case B_ANY_KERNEL_ADDRESS:
1370 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1371 		case B_RANDOMIZED_ANY_ADDRESS:
1372 		case B_RANDOMIZED_BASE_ADDRESS:
1373 			break;
1374 
1375 		default:
1376 			return B_BAD_VALUE;
1377 	}
1378 
1379 	// If low or high physical address restrictions are given, we force
1380 	// B_CONTIGUOUS wiring, since only then we'll use
1381 	// vm_page_allocate_page_run() which deals with those restrictions.
1382 	if (physicalAddressRestrictions->low_address != 0
1383 		|| physicalAddressRestrictions->high_address != 0) {
1384 		wiring = B_CONTIGUOUS;
1385 	}
1386 
1387 	physical_address_restrictions stackPhysicalRestrictions;
1388 	bool doReserveMemory = false;
1389 	switch (wiring) {
1390 		case B_NO_LOCK:
1391 			break;
1392 		case B_FULL_LOCK:
1393 		case B_LAZY_LOCK:
1394 		case B_CONTIGUOUS:
1395 			doReserveMemory = true;
1396 			break;
1397 		case B_ALREADY_WIRED:
1398 			break;
1399 		case B_LOMEM:
1400 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1401 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1402 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1403 			wiring = B_CONTIGUOUS;
1404 			doReserveMemory = true;
1405 			break;
1406 		case B_32_BIT_FULL_LOCK:
1407 			if (B_HAIKU_PHYSICAL_BITS <= 32
1408 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1409 				wiring = B_FULL_LOCK;
1410 				doReserveMemory = true;
1411 				break;
1412 			}
1413 			// TODO: We don't really support this mode efficiently. Just fall
1414 			// through for now ...
1415 		case B_32_BIT_CONTIGUOUS:
1416 			#if B_HAIKU_PHYSICAL_BITS > 32
1417 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1418 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1419 					stackPhysicalRestrictions.high_address
1420 						= (phys_addr_t)1 << 32;
1421 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1422 				}
1423 			#endif
1424 			wiring = B_CONTIGUOUS;
1425 			doReserveMemory = true;
1426 			break;
1427 		default:
1428 			return B_BAD_VALUE;
1429 	}
1430 
1431 	// Optimization: For a single-page contiguous allocation without low/high
1432 	// memory restriction B_FULL_LOCK wiring suffices.
1433 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1434 		&& physicalAddressRestrictions->low_address == 0
1435 		&& physicalAddressRestrictions->high_address == 0) {
1436 		wiring = B_FULL_LOCK;
1437 	}
1438 
1439 	// For full lock or contiguous areas we're also going to map the pages and
1440 	// thus need to reserve pages for the mapping backend upfront.
1441 	addr_t reservedMapPages = 0;
1442 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1443 		AddressSpaceWriteLocker locker;
1444 		status_t status = locker.SetTo(team);
1445 		if (status != B_OK)
1446 			return status;
1447 
1448 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1449 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1450 	}
1451 
1452 	int priority;
1453 	if (team != VMAddressSpace::KernelID())
1454 		priority = VM_PRIORITY_USER;
1455 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1456 		priority = VM_PRIORITY_VIP;
1457 	else
1458 		priority = VM_PRIORITY_SYSTEM;
1459 
1460 	// Reserve memory before acquiring the address space lock. This reduces the
1461 	// chances of failure, since while holding the write lock to the address
1462 	// space (if it is the kernel address space that is), the low memory handler
1463 	// won't be able to free anything for us.
1464 	addr_t reservedMemory = 0;
1465 	if (doReserveMemory) {
1466 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1467 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1468 			return B_NO_MEMORY;
1469 		reservedMemory = size;
1470 		// TODO: We don't reserve the memory for the pages for the page
1471 		// directories/tables. We actually need to do since we currently don't
1472 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1473 		// there are actually less physical pages than there should be, which
1474 		// can get the VM into trouble in low memory situations.
1475 	}
1476 
1477 	AddressSpaceWriteLocker locker;
1478 	VMAddressSpace* addressSpace;
1479 	status_t status;
1480 
1481 	// For full lock areas reserve the pages before locking the address
1482 	// space. E.g. block caches can't release their memory while we hold the
1483 	// address space lock.
1484 	page_num_t reservedPages = reservedMapPages;
1485 	if (wiring == B_FULL_LOCK)
1486 		reservedPages += size / B_PAGE_SIZE;
1487 
1488 	vm_page_reservation reservation;
1489 	if (reservedPages > 0) {
1490 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1491 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1492 					priority)) {
1493 				reservedPages = 0;
1494 				status = B_WOULD_BLOCK;
1495 				goto err0;
1496 			}
1497 		} else
1498 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1499 	}
1500 
1501 	if (wiring == B_CONTIGUOUS) {
1502 		// we try to allocate the page run here upfront as this may easily
1503 		// fail for obvious reasons
1504 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1505 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1506 		if (page == NULL) {
1507 			status = B_NO_MEMORY;
1508 			goto err0;
1509 		}
1510 	}
1511 
1512 	// Lock the address space and, if B_EXACT_ADDRESS and
1513 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1514 	// is not wired.
1515 	do {
1516 		status = locker.SetTo(team);
1517 		if (status != B_OK)
1518 			goto err1;
1519 
1520 		addressSpace = locker.AddressSpace();
1521 	} while (virtualAddressRestrictions->address_specification
1522 			== B_EXACT_ADDRESS
1523 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1524 		&& wait_if_address_range_is_wired(addressSpace,
1525 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1526 
1527 	// create an anonymous cache
1528 	// if it's a stack, make sure that two pages are available at least
1529 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1530 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1531 		wiring == B_NO_LOCK, priority);
1532 	if (status != B_OK)
1533 		goto err1;
1534 
1535 	cache->temporary = 1;
1536 	cache->virtual_end = size;
1537 	cache->committed_size = reservedMemory;
1538 		// TODO: This should be done via a method.
1539 	reservedMemory = 0;
1540 
1541 	cache->Lock();
1542 
1543 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1544 		protection, 0, REGION_NO_PRIVATE_MAP, flags,
1545 		virtualAddressRestrictions, kernel, &area, _address);
1546 
1547 	if (status != B_OK) {
1548 		cache->ReleaseRefAndUnlock();
1549 		goto err1;
1550 	}
1551 
1552 	locker.DegradeToReadLock();
1553 
1554 	switch (wiring) {
1555 		case B_NO_LOCK:
1556 		case B_LAZY_LOCK:
1557 			// do nothing - the pages are mapped in as needed
1558 			break;
1559 
1560 		case B_FULL_LOCK:
1561 		{
1562 			// Allocate and map all pages for this area
1563 
1564 			off_t offset = 0;
1565 			for (addr_t address = area->Base();
1566 					address < area->Base() + (area->Size() - 1);
1567 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1568 #ifdef DEBUG_KERNEL_STACKS
1569 #	ifdef STACK_GROWS_DOWNWARDS
1570 				if (isStack && address < area->Base()
1571 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1572 #	else
1573 				if (isStack && address >= area->Base() + area->Size()
1574 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1575 #	endif
1576 					continue;
1577 #endif
1578 				vm_page* page = vm_page_allocate_page(&reservation,
1579 					PAGE_STATE_WIRED | pageAllocFlags);
1580 				cache->InsertPage(page, offset);
1581 				map_page(area, page, address, protection, &reservation);
1582 
1583 				DEBUG_PAGE_ACCESS_END(page);
1584 			}
1585 
1586 			break;
1587 		}
1588 
1589 		case B_ALREADY_WIRED:
1590 		{
1591 			// The pages should already be mapped. This is only really useful
1592 			// during boot time. Find the appropriate vm_page objects and stick
1593 			// them in the cache object.
1594 			VMTranslationMap* map = addressSpace->TranslationMap();
1595 			off_t offset = 0;
1596 
1597 			if (!gKernelStartup)
1598 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1599 
1600 			map->Lock();
1601 
1602 			for (addr_t virtualAddress = area->Base();
1603 					virtualAddress < area->Base() + (area->Size() - 1);
1604 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1605 				phys_addr_t physicalAddress;
1606 				uint32 flags;
1607 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1608 				if (status < B_OK) {
1609 					panic("looking up mapping failed for va 0x%lx\n",
1610 						virtualAddress);
1611 				}
1612 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1613 				if (page == NULL) {
1614 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1615 						"\n", physicalAddress);
1616 				}
1617 
1618 				DEBUG_PAGE_ACCESS_START(page);
1619 
1620 				cache->InsertPage(page, offset);
1621 				increment_page_wired_count(page);
1622 				vm_page_set_state(page, PAGE_STATE_WIRED);
1623 				page->busy = false;
1624 
1625 				DEBUG_PAGE_ACCESS_END(page);
1626 			}
1627 
1628 			map->Unlock();
1629 			break;
1630 		}
1631 
1632 		case B_CONTIGUOUS:
1633 		{
1634 			// We have already allocated our continuous pages run, so we can now
1635 			// just map them in the address space
1636 			VMTranslationMap* map = addressSpace->TranslationMap();
1637 			phys_addr_t physicalAddress
1638 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1639 			addr_t virtualAddress = area->Base();
1640 			off_t offset = 0;
1641 
1642 			map->Lock();
1643 
1644 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1645 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1646 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1647 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1648 				if (page == NULL)
1649 					panic("couldn't lookup physical page just allocated\n");
1650 
1651 				status = map->Map(virtualAddress, physicalAddress, protection,
1652 					area->MemoryType(), &reservation);
1653 				if (status < B_OK)
1654 					panic("couldn't map physical page in page run\n");
1655 
1656 				cache->InsertPage(page, offset);
1657 				increment_page_wired_count(page);
1658 
1659 				DEBUG_PAGE_ACCESS_END(page);
1660 			}
1661 
1662 			map->Unlock();
1663 			break;
1664 		}
1665 
1666 		default:
1667 			break;
1668 	}
1669 
1670 	cache->Unlock();
1671 
1672 	if (reservedPages > 0)
1673 		vm_page_unreserve_pages(&reservation);
1674 
1675 	TRACE(("vm_create_anonymous_area: done\n"));
1676 
1677 	area->cache_type = CACHE_TYPE_RAM;
1678 	return area->id;
1679 
1680 err1:
1681 	if (wiring == B_CONTIGUOUS) {
1682 		// we had reserved the area space upfront...
1683 		phys_addr_t pageNumber = page->physical_page_number;
1684 		int32 i;
1685 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1686 			page = vm_lookup_page(pageNumber);
1687 			if (page == NULL)
1688 				panic("couldn't lookup physical page just allocated\n");
1689 
1690 			vm_page_set_state(page, PAGE_STATE_FREE);
1691 		}
1692 	}
1693 
1694 err0:
1695 	if (reservedPages > 0)
1696 		vm_page_unreserve_pages(&reservation);
1697 	if (reservedMemory > 0)
1698 		vm_unreserve_memory(reservedMemory);
1699 
1700 	return status;
1701 }
1702 
1703 
1704 area_id
1705 vm_map_physical_memory(team_id team, const char* name, void** _address,
1706 	uint32 addressSpec, addr_t size, uint32 protection,
1707 	phys_addr_t physicalAddress, bool alreadyWired)
1708 {
1709 	VMArea* area;
1710 	VMCache* cache;
1711 	addr_t mapOffset;
1712 
1713 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1714 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1715 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1716 		addressSpec, size, protection, physicalAddress));
1717 
1718 	if (!arch_vm_supports_protection(protection))
1719 		return B_NOT_SUPPORTED;
1720 
1721 	AddressSpaceWriteLocker locker(team);
1722 	if (!locker.IsLocked())
1723 		return B_BAD_TEAM_ID;
1724 
1725 	// if the physical address is somewhat inside a page,
1726 	// move the actual area down to align on a page boundary
1727 	mapOffset = physicalAddress % B_PAGE_SIZE;
1728 	size += mapOffset;
1729 	physicalAddress -= mapOffset;
1730 
1731 	size = PAGE_ALIGN(size);
1732 
1733 	// create a device cache
1734 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1735 	if (status != B_OK)
1736 		return status;
1737 
1738 	cache->virtual_end = size;
1739 
1740 	cache->Lock();
1741 
1742 	virtual_address_restrictions addressRestrictions = {};
1743 	addressRestrictions.address = *_address;
1744 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1745 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1746 		B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1747 		true, &area, _address);
1748 
1749 	if (status < B_OK)
1750 		cache->ReleaseRefLocked();
1751 
1752 	cache->Unlock();
1753 
1754 	if (status == B_OK) {
1755 		// set requested memory type -- use uncached, if not given
1756 		uint32 memoryType = addressSpec & B_MTR_MASK;
1757 		if (memoryType == 0)
1758 			memoryType = B_MTR_UC;
1759 
1760 		area->SetMemoryType(memoryType);
1761 
1762 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1763 		if (status != B_OK)
1764 			delete_area(locker.AddressSpace(), area, false);
1765 	}
1766 
1767 	if (status != B_OK)
1768 		return status;
1769 
1770 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1771 
1772 	if (alreadyWired) {
1773 		// The area is already mapped, but possibly not with the right
1774 		// memory type.
1775 		map->Lock();
1776 		map->ProtectArea(area, area->protection);
1777 		map->Unlock();
1778 	} else {
1779 		// Map the area completely.
1780 
1781 		// reserve pages needed for the mapping
1782 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1783 			area->Base() + (size - 1));
1784 		vm_page_reservation reservation;
1785 		vm_page_reserve_pages(&reservation, reservePages,
1786 			team == VMAddressSpace::KernelID()
1787 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1788 
1789 		map->Lock();
1790 
1791 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1792 			map->Map(area->Base() + offset, physicalAddress + offset,
1793 				protection, area->MemoryType(), &reservation);
1794 		}
1795 
1796 		map->Unlock();
1797 
1798 		vm_page_unreserve_pages(&reservation);
1799 	}
1800 
1801 	// modify the pointer returned to be offset back into the new area
1802 	// the same way the physical address in was offset
1803 	*_address = (void*)((addr_t)*_address + mapOffset);
1804 
1805 	area->cache_type = CACHE_TYPE_DEVICE;
1806 	return area->id;
1807 }
1808 
1809 
1810 /*!	Don't use!
1811 	TODO: This function was introduced to map physical page vecs to
1812 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1813 	use a device cache and does not track vm_page::wired_count!
1814 */
1815 area_id
1816 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1817 	uint32 addressSpec, addr_t* _size, uint32 protection,
1818 	struct generic_io_vec* vecs, uint32 vecCount)
1819 {
1820 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1821 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1822 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1823 		addressSpec, _size, protection, vecs, vecCount));
1824 
1825 	if (!arch_vm_supports_protection(protection)
1826 		|| (addressSpec & B_MTR_MASK) != 0) {
1827 		return B_NOT_SUPPORTED;
1828 	}
1829 
1830 	AddressSpaceWriteLocker locker(team);
1831 	if (!locker.IsLocked())
1832 		return B_BAD_TEAM_ID;
1833 
1834 	if (vecCount == 0)
1835 		return B_BAD_VALUE;
1836 
1837 	addr_t size = 0;
1838 	for (uint32 i = 0; i < vecCount; i++) {
1839 		if (vecs[i].base % B_PAGE_SIZE != 0
1840 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1841 			return B_BAD_VALUE;
1842 		}
1843 
1844 		size += vecs[i].length;
1845 	}
1846 
1847 	// create a device cache
1848 	VMCache* cache;
1849 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1850 	if (result != B_OK)
1851 		return result;
1852 
1853 	cache->virtual_end = size;
1854 
1855 	cache->Lock();
1856 
1857 	VMArea* area;
1858 	virtual_address_restrictions addressRestrictions = {};
1859 	addressRestrictions.address = *_address;
1860 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1861 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1862 		size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0,
1863 		&addressRestrictions, true, &area, _address);
1864 
1865 	if (result != B_OK)
1866 		cache->ReleaseRefLocked();
1867 
1868 	cache->Unlock();
1869 
1870 	if (result != B_OK)
1871 		return result;
1872 
1873 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1874 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1875 		area->Base() + (size - 1));
1876 
1877 	vm_page_reservation reservation;
1878 	vm_page_reserve_pages(&reservation, reservePages,
1879 			team == VMAddressSpace::KernelID()
1880 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1881 	map->Lock();
1882 
1883 	uint32 vecIndex = 0;
1884 	size_t vecOffset = 0;
1885 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1886 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1887 			vecOffset = 0;
1888 			vecIndex++;
1889 		}
1890 
1891 		if (vecIndex >= vecCount)
1892 			break;
1893 
1894 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1895 			protection, area->MemoryType(), &reservation);
1896 
1897 		vecOffset += B_PAGE_SIZE;
1898 	}
1899 
1900 	map->Unlock();
1901 	vm_page_unreserve_pages(&reservation);
1902 
1903 	if (_size != NULL)
1904 		*_size = size;
1905 
1906 	area->cache_type = CACHE_TYPE_DEVICE;
1907 	return area->id;
1908 }
1909 
1910 
1911 area_id
1912 vm_create_null_area(team_id team, const char* name, void** address,
1913 	uint32 addressSpec, addr_t size, uint32 flags)
1914 {
1915 	size = PAGE_ALIGN(size);
1916 
1917 	// Lock the address space and, if B_EXACT_ADDRESS and
1918 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1919 	// is not wired.
1920 	AddressSpaceWriteLocker locker;
1921 	do {
1922 		if (locker.SetTo(team) != B_OK)
1923 			return B_BAD_TEAM_ID;
1924 	} while (addressSpec == B_EXACT_ADDRESS
1925 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1926 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1927 			(addr_t)*address, size, &locker));
1928 
1929 	// create a null cache
1930 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1931 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1932 	VMCache* cache;
1933 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1934 	if (status != B_OK)
1935 		return status;
1936 
1937 	cache->temporary = 1;
1938 	cache->virtual_end = size;
1939 
1940 	cache->Lock();
1941 
1942 	VMArea* area;
1943 	virtual_address_restrictions addressRestrictions = {};
1944 	addressRestrictions.address = *address;
1945 	addressRestrictions.address_specification = addressSpec;
1946 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1947 		B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA,
1948 		REGION_NO_PRIVATE_MAP, flags,
1949 		&addressRestrictions, true, &area, address);
1950 
1951 	if (status < B_OK) {
1952 		cache->ReleaseRefAndUnlock();
1953 		return status;
1954 	}
1955 
1956 	cache->Unlock();
1957 
1958 	area->cache_type = CACHE_TYPE_NULL;
1959 	return area->id;
1960 }
1961 
1962 
1963 /*!	Creates the vnode cache for the specified \a vnode.
1964 	The vnode has to be marked busy when calling this function.
1965 */
1966 status_t
1967 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1968 {
1969 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1970 }
1971 
1972 
1973 /*!	\a cache must be locked. The area's address space must be read-locked.
1974 */
1975 static void
1976 pre_map_area_pages(VMArea* area, VMCache* cache,
1977 	vm_page_reservation* reservation)
1978 {
1979 	addr_t baseAddress = area->Base();
1980 	addr_t cacheOffset = area->cache_offset;
1981 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1982 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1983 
1984 	for (VMCachePagesTree::Iterator it
1985 				= cache->pages.GetIterator(firstPage, true, true);
1986 			vm_page* page = it.Next();) {
1987 		if (page->cache_offset >= endPage)
1988 			break;
1989 
1990 		// skip busy and inactive pages
1991 		if (page->busy || page->usage_count == 0)
1992 			continue;
1993 
1994 		DEBUG_PAGE_ACCESS_START(page);
1995 		map_page(area, page,
1996 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1997 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1998 		DEBUG_PAGE_ACCESS_END(page);
1999 	}
2000 }
2001 
2002 
2003 /*!	Will map the file specified by \a fd to an area in memory.
2004 	The file will be mirrored beginning at the specified \a offset. The
2005 	\a offset and \a size arguments have to be page aligned.
2006 */
2007 static area_id
2008 _vm_map_file(team_id team, const char* name, void** _address,
2009 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2010 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2011 {
2012 	// TODO: for binary files, we want to make sure that they get the
2013 	//	copy of a file at a given time, ie. later changes should not
2014 	//	make it into the mapped copy -- this will need quite some changes
2015 	//	to be done in a nice way
2016 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
2017 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
2018 
2019 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2020 	size = PAGE_ALIGN(size);
2021 
2022 	if (mapping == REGION_NO_PRIVATE_MAP)
2023 		protection |= B_SHARED_AREA;
2024 	if (addressSpec != B_EXACT_ADDRESS)
2025 		unmapAddressRange = false;
2026 
2027 	if (fd < 0) {
2028 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
2029 		virtual_address_restrictions virtualRestrictions = {};
2030 		virtualRestrictions.address = *_address;
2031 		virtualRestrictions.address_specification = addressSpec;
2032 		physical_address_restrictions physicalRestrictions = {};
2033 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
2034 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
2035 			_address);
2036 	}
2037 
2038 	// get the open flags of the FD
2039 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2040 	if (descriptor == NULL)
2041 		return EBADF;
2042 	int32 openMode = descriptor->open_mode;
2043 	put_fd(descriptor);
2044 
2045 	// The FD must open for reading at any rate. For shared mapping with write
2046 	// access, additionally the FD must be open for writing.
2047 	if ((openMode & O_ACCMODE) == O_WRONLY
2048 		|| (mapping == REGION_NO_PRIVATE_MAP
2049 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2050 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2051 		return EACCES;
2052 	}
2053 
2054 	uint32 protectionMax = 0;
2055 	if (mapping != REGION_PRIVATE_MAP) {
2056 		protectionMax = protection | B_READ_AREA;
2057 		if ((openMode & O_ACCMODE) == O_RDWR)
2058 			protectionMax |= B_WRITE_AREA;
2059 	}
2060 
2061 	// get the vnode for the object, this also grabs a ref to it
2062 	struct vnode* vnode = NULL;
2063 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2064 	if (status < B_OK)
2065 		return status;
2066 	VnodePutter vnodePutter(vnode);
2067 
2068 	// If we're going to pre-map pages, we need to reserve the pages needed by
2069 	// the mapping backend upfront.
2070 	page_num_t reservedPreMapPages = 0;
2071 	vm_page_reservation reservation;
2072 	if ((protection & B_READ_AREA) != 0) {
2073 		AddressSpaceWriteLocker locker;
2074 		status = locker.SetTo(team);
2075 		if (status != B_OK)
2076 			return status;
2077 
2078 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2079 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2080 
2081 		locker.Unlock();
2082 
2083 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2084 			team == VMAddressSpace::KernelID()
2085 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2086 	}
2087 
2088 	struct PageUnreserver {
2089 		PageUnreserver(vm_page_reservation* reservation)
2090 			:
2091 			fReservation(reservation)
2092 		{
2093 		}
2094 
2095 		~PageUnreserver()
2096 		{
2097 			if (fReservation != NULL)
2098 				vm_page_unreserve_pages(fReservation);
2099 		}
2100 
2101 		vm_page_reservation* fReservation;
2102 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2103 
2104 	// Lock the address space and, if the specified address range shall be
2105 	// unmapped, ensure it is not wired.
2106 	AddressSpaceWriteLocker locker;
2107 	do {
2108 		if (locker.SetTo(team) != B_OK)
2109 			return B_BAD_TEAM_ID;
2110 	} while (unmapAddressRange
2111 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2112 			(addr_t)*_address, size, &locker));
2113 
2114 	// TODO: this only works for file systems that use the file cache
2115 	VMCache* cache;
2116 	status = vfs_get_vnode_cache(vnode, &cache, false);
2117 	if (status < B_OK)
2118 		return status;
2119 
2120 	cache->Lock();
2121 
2122 	VMArea* area;
2123 	virtual_address_restrictions addressRestrictions = {};
2124 	addressRestrictions.address = *_address;
2125 	addressRestrictions.address_specification = addressSpec;
2126 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2127 		0, protection, protectionMax, mapping,
2128 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
2129 		&addressRestrictions, kernel, &area, _address);
2130 
2131 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2132 		// map_backing_store() cannot know we no longer need the ref
2133 		cache->ReleaseRefLocked();
2134 	}
2135 
2136 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2137 		pre_map_area_pages(area, cache, &reservation);
2138 
2139 	cache->Unlock();
2140 
2141 	if (status == B_OK) {
2142 		// TODO: this probably deserves a smarter solution, ie. don't always
2143 		// prefetch stuff, and also, probably don't trigger it at this place.
2144 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2145 			// prefetches at max 10 MB starting from "offset"
2146 	}
2147 
2148 	if (status != B_OK)
2149 		return status;
2150 
2151 	area->cache_type = CACHE_TYPE_VNODE;
2152 	return area->id;
2153 }
2154 
2155 
2156 area_id
2157 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2158 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2159 	int fd, off_t offset)
2160 {
2161 	if (!arch_vm_supports_protection(protection))
2162 		return B_NOT_SUPPORTED;
2163 
2164 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2165 		mapping, unmapAddressRange, fd, offset, true);
2166 }
2167 
2168 
2169 VMCache*
2170 vm_area_get_locked_cache(VMArea* area)
2171 {
2172 	rw_lock_read_lock(&sAreaCacheLock);
2173 
2174 	while (true) {
2175 		VMCache* cache = area->cache;
2176 
2177 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2178 			// cache has been deleted
2179 			rw_lock_read_lock(&sAreaCacheLock);
2180 			continue;
2181 		}
2182 
2183 		rw_lock_read_lock(&sAreaCacheLock);
2184 
2185 		if (cache == area->cache) {
2186 			cache->AcquireRefLocked();
2187 			rw_lock_read_unlock(&sAreaCacheLock);
2188 			return cache;
2189 		}
2190 
2191 		// the cache changed in the meantime
2192 		cache->Unlock();
2193 	}
2194 }
2195 
2196 
2197 void
2198 vm_area_put_locked_cache(VMCache* cache)
2199 {
2200 	cache->ReleaseRefAndUnlock();
2201 }
2202 
2203 
2204 area_id
2205 vm_clone_area(team_id team, const char* name, void** address,
2206 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2207 	bool kernel)
2208 {
2209 	VMArea* newArea = NULL;
2210 	VMArea* sourceArea;
2211 
2212 	// Check whether the source area exists and is cloneable. If so, mark it
2213 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2214 	{
2215 		AddressSpaceWriteLocker locker;
2216 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2217 		if (status != B_OK)
2218 			return status;
2219 
2220 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2221 			return B_NOT_ALLOWED;
2222 
2223 		sourceArea->protection |= B_SHARED_AREA;
2224 		protection |= B_SHARED_AREA;
2225 	}
2226 
2227 	// Now lock both address spaces and actually do the cloning.
2228 
2229 	MultiAddressSpaceLocker locker;
2230 	VMAddressSpace* sourceAddressSpace;
2231 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2232 	if (status != B_OK)
2233 		return status;
2234 
2235 	VMAddressSpace* targetAddressSpace;
2236 	status = locker.AddTeam(team, true, &targetAddressSpace);
2237 	if (status != B_OK)
2238 		return status;
2239 
2240 	status = locker.Lock();
2241 	if (status != B_OK)
2242 		return status;
2243 
2244 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2245 	if (sourceArea == NULL)
2246 		return B_BAD_VALUE;
2247 
2248 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2249 		return B_NOT_ALLOWED;
2250 
2251 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2252 
2253 	if (!kernel && sourceAddressSpace != targetAddressSpace
2254 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2255 #if KDEBUG
2256 		Team* team = thread_get_current_thread()->team;
2257 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2258 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2259 #endif
2260 		status = B_NOT_ALLOWED;
2261 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2262 		status = B_NOT_ALLOWED;
2263 	} else {
2264 		virtual_address_restrictions addressRestrictions = {};
2265 		addressRestrictions.address = *address;
2266 		addressRestrictions.address_specification = addressSpec;
2267 		status = map_backing_store(targetAddressSpace, cache,
2268 			sourceArea->cache_offset, name, sourceArea->Size(),
2269 			sourceArea->wiring, protection, sourceArea->protection_max,
2270 			mapping, 0, &addressRestrictions,
2271 			kernel, &newArea, address);
2272 	}
2273 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2274 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2275 		// to create a new cache, and has therefore already acquired a reference
2276 		// to the source cache - but otherwise it has no idea that we need
2277 		// one.
2278 		cache->AcquireRefLocked();
2279 	}
2280 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2281 		// we need to map in everything at this point
2282 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2283 			// we don't have actual pages to map but a physical area
2284 			VMTranslationMap* map
2285 				= sourceArea->address_space->TranslationMap();
2286 			map->Lock();
2287 
2288 			phys_addr_t physicalAddress;
2289 			uint32 oldProtection;
2290 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2291 
2292 			map->Unlock();
2293 
2294 			map = targetAddressSpace->TranslationMap();
2295 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2296 				newArea->Base() + (newArea->Size() - 1));
2297 
2298 			vm_page_reservation reservation;
2299 			vm_page_reserve_pages(&reservation, reservePages,
2300 				targetAddressSpace == VMAddressSpace::Kernel()
2301 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2302 			map->Lock();
2303 
2304 			for (addr_t offset = 0; offset < newArea->Size();
2305 					offset += B_PAGE_SIZE) {
2306 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2307 					protection, newArea->MemoryType(), &reservation);
2308 			}
2309 
2310 			map->Unlock();
2311 			vm_page_unreserve_pages(&reservation);
2312 		} else {
2313 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2314 			size_t reservePages = map->MaxPagesNeededToMap(
2315 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2316 			vm_page_reservation reservation;
2317 			vm_page_reserve_pages(&reservation, reservePages,
2318 				targetAddressSpace == VMAddressSpace::Kernel()
2319 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2320 
2321 			// map in all pages from source
2322 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2323 					vm_page* page  = it.Next();) {
2324 				if (!page->busy) {
2325 					DEBUG_PAGE_ACCESS_START(page);
2326 					map_page(newArea, page,
2327 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2328 							- newArea->cache_offset),
2329 						protection, &reservation);
2330 					DEBUG_PAGE_ACCESS_END(page);
2331 				}
2332 			}
2333 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2334 			// ensuring that!
2335 
2336 			vm_page_unreserve_pages(&reservation);
2337 		}
2338 	}
2339 	if (status == B_OK)
2340 		newArea->cache_type = sourceArea->cache_type;
2341 
2342 	vm_area_put_locked_cache(cache);
2343 
2344 	if (status < B_OK)
2345 		return status;
2346 
2347 	return newArea->id;
2348 }
2349 
2350 
2351 /*!	Deletes the specified area of the given address space.
2352 
2353 	The address space must be write-locked.
2354 	The caller must ensure that the area does not have any wired ranges.
2355 
2356 	\param addressSpace The address space containing the area.
2357 	\param area The area to be deleted.
2358 	\param deletingAddressSpace \c true, if the address space is in the process
2359 		of being deleted.
2360 */
2361 static void
2362 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2363 	bool deletingAddressSpace)
2364 {
2365 	ASSERT(!area->IsWired());
2366 
2367 	VMAreaHash::Remove(area);
2368 
2369 	// At this point the area is removed from the global hash table, but
2370 	// still exists in the area list.
2371 
2372 	// Unmap the virtual address space the area occupied.
2373 	{
2374 		// We need to lock the complete cache chain.
2375 		VMCache* topCache = vm_area_get_locked_cache(area);
2376 		VMCacheChainLocker cacheChainLocker(topCache);
2377 		cacheChainLocker.LockAllSourceCaches();
2378 
2379 		// If the area's top cache is a temporary cache and the area is the only
2380 		// one referencing it (besides us currently holding a second reference),
2381 		// the unmapping code doesn't need to care about preserving the accessed
2382 		// and dirty flags of the top cache page mappings.
2383 		bool ignoreTopCachePageFlags
2384 			= topCache->temporary && topCache->RefCount() == 2;
2385 
2386 		area->address_space->TranslationMap()->UnmapArea(area,
2387 			deletingAddressSpace, ignoreTopCachePageFlags);
2388 	}
2389 
2390 	if (!area->cache->temporary)
2391 		area->cache->WriteModified();
2392 
2393 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2394 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2395 
2396 	arch_vm_unset_memory_type(area);
2397 	addressSpace->RemoveArea(area, allocationFlags);
2398 	addressSpace->Put();
2399 
2400 	area->cache->RemoveArea(area);
2401 	area->cache->ReleaseRef();
2402 
2403 	addressSpace->DeleteArea(area, allocationFlags);
2404 }
2405 
2406 
2407 status_t
2408 vm_delete_area(team_id team, area_id id, bool kernel)
2409 {
2410 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2411 		team, id));
2412 
2413 	// lock the address space and make sure the area isn't wired
2414 	AddressSpaceWriteLocker locker;
2415 	VMArea* area;
2416 	AreaCacheLocker cacheLocker;
2417 
2418 	do {
2419 		status_t status = locker.SetFromArea(team, id, area);
2420 		if (status != B_OK)
2421 			return status;
2422 
2423 		cacheLocker.SetTo(area);
2424 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2425 
2426 	cacheLocker.Unlock();
2427 
2428 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2429 		return B_NOT_ALLOWED;
2430 
2431 	delete_area(locker.AddressSpace(), area, false);
2432 	return B_OK;
2433 }
2434 
2435 
2436 /*!	Creates a new cache on top of given cache, moves all areas from
2437 	the old cache to the new one, and changes the protection of all affected
2438 	areas' pages to read-only. If requested, wired pages are moved up to the
2439 	new cache and copies are added to the old cache in their place.
2440 	Preconditions:
2441 	- The given cache must be locked.
2442 	- All of the cache's areas' address spaces must be read locked.
2443 	- Either the cache must not have any wired ranges or a page reservation for
2444 	  all wired pages must be provided, so they can be copied.
2445 
2446 	\param lowerCache The cache on top of which a new cache shall be created.
2447 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2448 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2449 		has wired page. The wired pages are copied in this case.
2450 */
2451 static status_t
2452 vm_copy_on_write_area(VMCache* lowerCache,
2453 	vm_page_reservation* wiredPagesReservation)
2454 {
2455 	VMCache* upperCache;
2456 
2457 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2458 
2459 	// We need to separate the cache from its areas. The cache goes one level
2460 	// deeper and we create a new cache inbetween.
2461 
2462 	// create an anonymous cache
2463 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2464 		lowerCache->GuardSize() / B_PAGE_SIZE,
2465 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2466 		VM_PRIORITY_USER);
2467 	if (status != B_OK)
2468 		return status;
2469 
2470 	upperCache->Lock();
2471 
2472 	upperCache->temporary = 1;
2473 	upperCache->virtual_base = lowerCache->virtual_base;
2474 	upperCache->virtual_end = lowerCache->virtual_end;
2475 
2476 	// transfer the lower cache areas to the upper cache
2477 	rw_lock_write_lock(&sAreaCacheLock);
2478 	upperCache->TransferAreas(lowerCache);
2479 	rw_lock_write_unlock(&sAreaCacheLock);
2480 
2481 	lowerCache->AddConsumer(upperCache);
2482 
2483 	// We now need to remap all pages from all of the cache's areas read-only,
2484 	// so that a copy will be created on next write access. If there are wired
2485 	// pages, we keep their protection, move them to the upper cache and create
2486 	// copies for the lower cache.
2487 	if (wiredPagesReservation != NULL) {
2488 		// We need to handle wired pages -- iterate through the cache's pages.
2489 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2490 				vm_page* page = it.Next();) {
2491 			if (page->WiredCount() > 0) {
2492 				// allocate a new page and copy the wired one
2493 				vm_page* copiedPage = vm_page_allocate_page(
2494 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2495 
2496 				vm_memcpy_physical_page(
2497 					copiedPage->physical_page_number * B_PAGE_SIZE,
2498 					page->physical_page_number * B_PAGE_SIZE);
2499 
2500 				// move the wired page to the upper cache (note: removing is OK
2501 				// with the SplayTree iterator) and insert the copy
2502 				upperCache->MovePage(page);
2503 				lowerCache->InsertPage(copiedPage,
2504 					page->cache_offset * B_PAGE_SIZE);
2505 
2506 				DEBUG_PAGE_ACCESS_END(copiedPage);
2507 			} else {
2508 				// Change the protection of this page in all areas.
2509 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2510 						tempArea = tempArea->cache_next) {
2511 					// The area must be readable in the same way it was
2512 					// previously writable.
2513 					uint32 protection = B_KERNEL_READ_AREA;
2514 					if ((tempArea->protection & B_READ_AREA) != 0)
2515 						protection |= B_READ_AREA;
2516 
2517 					VMTranslationMap* map
2518 						= tempArea->address_space->TranslationMap();
2519 					map->Lock();
2520 					map->ProtectPage(tempArea,
2521 						virtual_page_address(tempArea, page), protection);
2522 					map->Unlock();
2523 				}
2524 			}
2525 		}
2526 	} else {
2527 		ASSERT(lowerCache->WiredPagesCount() == 0);
2528 
2529 		// just change the protection of all areas
2530 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2531 				tempArea = tempArea->cache_next) {
2532 			// The area must be readable in the same way it was previously
2533 			// writable.
2534 			uint32 protection = B_KERNEL_READ_AREA;
2535 			if ((tempArea->protection & B_READ_AREA) != 0)
2536 				protection |= B_READ_AREA;
2537 
2538 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2539 			map->Lock();
2540 			map->ProtectArea(tempArea, protection);
2541 			map->Unlock();
2542 		}
2543 	}
2544 
2545 	vm_area_put_locked_cache(upperCache);
2546 
2547 	return B_OK;
2548 }
2549 
2550 
2551 area_id
2552 vm_copy_area(team_id team, const char* name, void** _address,
2553 	uint32 addressSpec, area_id sourceID)
2554 {
2555 	// Do the locking: target address space, all address spaces associated with
2556 	// the source cache, and the cache itself.
2557 	MultiAddressSpaceLocker locker;
2558 	VMAddressSpace* targetAddressSpace;
2559 	VMCache* cache;
2560 	VMArea* source;
2561 	AreaCacheLocker cacheLocker;
2562 	status_t status;
2563 	bool sharedArea;
2564 
2565 	page_num_t wiredPages = 0;
2566 	vm_page_reservation wiredPagesReservation;
2567 
2568 	bool restart;
2569 	do {
2570 		restart = false;
2571 
2572 		locker.Unset();
2573 		status = locker.AddTeam(team, true, &targetAddressSpace);
2574 		if (status == B_OK) {
2575 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2576 				&cache);
2577 		}
2578 		if (status != B_OK)
2579 			return status;
2580 
2581 		cacheLocker.SetTo(cache, true);	// already locked
2582 
2583 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2584 
2585 		page_num_t oldWiredPages = wiredPages;
2586 		wiredPages = 0;
2587 
2588 		// If the source area isn't shared, count the number of wired pages in
2589 		// the cache and reserve as many pages.
2590 		if (!sharedArea) {
2591 			wiredPages = cache->WiredPagesCount();
2592 
2593 			if (wiredPages > oldWiredPages) {
2594 				cacheLocker.Unlock();
2595 				locker.Unlock();
2596 
2597 				if (oldWiredPages > 0)
2598 					vm_page_unreserve_pages(&wiredPagesReservation);
2599 
2600 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2601 					VM_PRIORITY_USER);
2602 
2603 				restart = true;
2604 			}
2605 		} else if (oldWiredPages > 0)
2606 			vm_page_unreserve_pages(&wiredPagesReservation);
2607 	} while (restart);
2608 
2609 	// unreserve pages later
2610 	struct PagesUnreserver {
2611 		PagesUnreserver(vm_page_reservation* reservation)
2612 			:
2613 			fReservation(reservation)
2614 		{
2615 		}
2616 
2617 		~PagesUnreserver()
2618 		{
2619 			if (fReservation != NULL)
2620 				vm_page_unreserve_pages(fReservation);
2621 		}
2622 
2623 	private:
2624 		vm_page_reservation*	fReservation;
2625 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2626 
2627 	bool writableCopy
2628 		= (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2629 	uint8* targetPageProtections = NULL;
2630 
2631 	if (source->page_protections != NULL) {
2632 		size_t bytes = (source->Size() / B_PAGE_SIZE + 1) / 2;
2633 		targetPageProtections = (uint8*)malloc_etc(bytes,
2634 			(source->address_space == VMAddressSpace::Kernel()
2635 					|| targetAddressSpace == VMAddressSpace::Kernel())
2636 				? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
2637 		if (targetPageProtections == NULL)
2638 			return B_NO_MEMORY;
2639 
2640 		memcpy(targetPageProtections, source->page_protections, bytes);
2641 
2642 		if (!writableCopy) {
2643 			for (size_t i = 0; i < bytes; i++) {
2644 				if ((targetPageProtections[i]
2645 						& (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) {
2646 					writableCopy = true;
2647 					break;
2648 				}
2649 			}
2650 		}
2651 	}
2652 
2653 	if (addressSpec == B_CLONE_ADDRESS) {
2654 		addressSpec = B_EXACT_ADDRESS;
2655 		*_address = (void*)source->Base();
2656 	}
2657 
2658 	// First, create a cache on top of the source area, respectively use the
2659 	// existing one, if this is a shared area.
2660 
2661 	VMArea* target;
2662 	virtual_address_restrictions addressRestrictions = {};
2663 	addressRestrictions.address = *_address;
2664 	addressRestrictions.address_specification = addressSpec;
2665 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2666 		name, source->Size(), source->wiring, source->protection,
2667 		source->protection_max,
2668 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2669 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2670 		&addressRestrictions, true, &target, _address);
2671 	if (status < B_OK) {
2672 		free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE);
2673 		return status;
2674 	}
2675 
2676 	if (targetPageProtections != NULL)
2677 		target->page_protections = targetPageProtections;
2678 
2679 	if (sharedArea) {
2680 		// The new area uses the old area's cache, but map_backing_store()
2681 		// hasn't acquired a ref. So we have to do that now.
2682 		cache->AcquireRefLocked();
2683 	}
2684 
2685 	// If the source area is writable, we need to move it one layer up as well
2686 
2687 	if (!sharedArea) {
2688 		if (writableCopy) {
2689 			// TODO: do something more useful if this fails!
2690 			if (vm_copy_on_write_area(cache,
2691 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2692 				panic("vm_copy_on_write_area() failed!\n");
2693 			}
2694 		}
2695 	}
2696 
2697 	// we return the ID of the newly created area
2698 	return target->id;
2699 }
2700 
2701 
2702 status_t
2703 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2704 	bool kernel)
2705 {
2706 	fix_protection(&newProtection);
2707 
2708 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2709 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2710 
2711 	if (!arch_vm_supports_protection(newProtection))
2712 		return B_NOT_SUPPORTED;
2713 
2714 	bool becomesWritable
2715 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2716 
2717 	// lock address spaces and cache
2718 	MultiAddressSpaceLocker locker;
2719 	VMCache* cache;
2720 	VMArea* area;
2721 	status_t status;
2722 	AreaCacheLocker cacheLocker;
2723 	bool isWritable;
2724 
2725 	bool restart;
2726 	do {
2727 		restart = false;
2728 
2729 		locker.Unset();
2730 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2731 		if (status != B_OK)
2732 			return status;
2733 
2734 		cacheLocker.SetTo(cache, true);	// already locked
2735 
2736 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
2737 				|| (area->protection & B_KERNEL_AREA) != 0)) {
2738 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2739 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2740 				" (%s)\n", team, newProtection, areaID, area->name);
2741 			return B_NOT_ALLOWED;
2742 		}
2743 		if (!kernel && area->protection_max != 0
2744 			&& (newProtection & area->protection_max)
2745 				!= (newProtection & B_USER_PROTECTION)) {
2746 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2747 				"set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel "
2748 				"area %" B_PRId32 " (%s)\n", team, newProtection,
2749 				area->protection_max, areaID, area->name);
2750 			return B_NOT_ALLOWED;
2751 		}
2752 
2753 		if (area->protection == newProtection)
2754 			return B_OK;
2755 
2756 		if (team != VMAddressSpace::KernelID()
2757 			&& area->address_space->ID() != team) {
2758 			// unless you're the kernel, you are only allowed to set
2759 			// the protection of your own areas
2760 			return B_NOT_ALLOWED;
2761 		}
2762 
2763 		isWritable
2764 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2765 
2766 		// Make sure the area (respectively, if we're going to call
2767 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2768 		// wired ranges.
2769 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2770 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2771 					otherArea = otherArea->cache_next) {
2772 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2773 					restart = true;
2774 					break;
2775 				}
2776 			}
2777 		} else {
2778 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2779 				restart = true;
2780 		}
2781 	} while (restart);
2782 
2783 	bool changePageProtection = true;
2784 	bool changeTopCachePagesOnly = false;
2785 
2786 	if (isWritable && !becomesWritable) {
2787 		// writable -> !writable
2788 
2789 		if (cache->source != NULL && cache->temporary) {
2790 			if (cache->CountWritableAreas(area) == 0) {
2791 				// Since this cache now lives from the pages in its source cache,
2792 				// we can change the cache's commitment to take only those pages
2793 				// into account that really are in this cache.
2794 
2795 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2796 					team == VMAddressSpace::KernelID()
2797 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2798 
2799 				// TODO: we may be able to join with our source cache, if
2800 				// count == 0
2801 			}
2802 		}
2803 
2804 		// If only the writability changes, we can just remap the pages of the
2805 		// top cache, since the pages of lower caches are mapped read-only
2806 		// anyway. That's advantageous only, if the number of pages in the cache
2807 		// is significantly smaller than the number of pages in the area,
2808 		// though.
2809 		if (newProtection
2810 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2811 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2812 			changeTopCachePagesOnly = true;
2813 		}
2814 	} else if (!isWritable && becomesWritable) {
2815 		// !writable -> writable
2816 
2817 		if (!cache->consumers.IsEmpty()) {
2818 			// There are consumers -- we have to insert a new cache. Fortunately
2819 			// vm_copy_on_write_area() does everything that's needed.
2820 			changePageProtection = false;
2821 			status = vm_copy_on_write_area(cache, NULL);
2822 		} else {
2823 			// No consumers, so we don't need to insert a new one.
2824 			if (cache->source != NULL && cache->temporary) {
2825 				// the cache's commitment must contain all possible pages
2826 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2827 					team == VMAddressSpace::KernelID()
2828 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2829 			}
2830 
2831 			if (status == B_OK && cache->source != NULL) {
2832 				// There's a source cache, hence we can't just change all pages'
2833 				// protection or we might allow writing into pages belonging to
2834 				// a lower cache.
2835 				changeTopCachePagesOnly = true;
2836 			}
2837 		}
2838 	} else {
2839 		// we don't have anything special to do in all other cases
2840 	}
2841 
2842 	if (status == B_OK) {
2843 		// remap existing pages in this cache
2844 		if (changePageProtection) {
2845 			VMTranslationMap* map = area->address_space->TranslationMap();
2846 			map->Lock();
2847 
2848 			if (changeTopCachePagesOnly) {
2849 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2850 				page_num_t lastPageOffset
2851 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2852 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2853 						vm_page* page = it.Next();) {
2854 					if (page->cache_offset >= firstPageOffset
2855 						&& page->cache_offset <= lastPageOffset) {
2856 						addr_t address = virtual_page_address(area, page);
2857 						map->ProtectPage(area, address, newProtection);
2858 					}
2859 				}
2860 			} else
2861 				map->ProtectArea(area, newProtection);
2862 
2863 			map->Unlock();
2864 		}
2865 
2866 		area->protection = newProtection;
2867 	}
2868 
2869 	return status;
2870 }
2871 
2872 
2873 status_t
2874 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2875 {
2876 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2877 	if (addressSpace == NULL)
2878 		return B_BAD_TEAM_ID;
2879 
2880 	VMTranslationMap* map = addressSpace->TranslationMap();
2881 
2882 	map->Lock();
2883 	uint32 dummyFlags;
2884 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2885 	map->Unlock();
2886 
2887 	addressSpace->Put();
2888 	return status;
2889 }
2890 
2891 
2892 /*!	The page's cache must be locked.
2893 */
2894 bool
2895 vm_test_map_modification(vm_page* page)
2896 {
2897 	if (page->modified)
2898 		return true;
2899 
2900 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2901 	vm_page_mapping* mapping;
2902 	while ((mapping = iterator.Next()) != NULL) {
2903 		VMArea* area = mapping->area;
2904 		VMTranslationMap* map = area->address_space->TranslationMap();
2905 
2906 		phys_addr_t physicalAddress;
2907 		uint32 flags;
2908 		map->Lock();
2909 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2910 		map->Unlock();
2911 
2912 		if ((flags & PAGE_MODIFIED) != 0)
2913 			return true;
2914 	}
2915 
2916 	return false;
2917 }
2918 
2919 
2920 /*!	The page's cache must be locked.
2921 */
2922 void
2923 vm_clear_map_flags(vm_page* page, uint32 flags)
2924 {
2925 	if ((flags & PAGE_ACCESSED) != 0)
2926 		page->accessed = false;
2927 	if ((flags & PAGE_MODIFIED) != 0)
2928 		page->modified = false;
2929 
2930 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2931 	vm_page_mapping* mapping;
2932 	while ((mapping = iterator.Next()) != NULL) {
2933 		VMArea* area = mapping->area;
2934 		VMTranslationMap* map = area->address_space->TranslationMap();
2935 
2936 		map->Lock();
2937 		map->ClearFlags(virtual_page_address(area, page), flags);
2938 		map->Unlock();
2939 	}
2940 }
2941 
2942 
2943 /*!	Removes all mappings from a page.
2944 	After you've called this function, the page is unmapped from memory and
2945 	the page's \c accessed and \c modified flags have been updated according
2946 	to the state of the mappings.
2947 	The page's cache must be locked.
2948 */
2949 void
2950 vm_remove_all_page_mappings(vm_page* page)
2951 {
2952 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2953 		VMArea* area = mapping->area;
2954 		VMTranslationMap* map = area->address_space->TranslationMap();
2955 		addr_t address = virtual_page_address(area, page);
2956 		map->UnmapPage(area, address, false);
2957 	}
2958 }
2959 
2960 
2961 int32
2962 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2963 {
2964 	int32 count = 0;
2965 
2966 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2967 	vm_page_mapping* mapping;
2968 	while ((mapping = iterator.Next()) != NULL) {
2969 		VMArea* area = mapping->area;
2970 		VMTranslationMap* map = area->address_space->TranslationMap();
2971 
2972 		bool modified;
2973 		if (map->ClearAccessedAndModified(area,
2974 				virtual_page_address(area, page), false, modified)) {
2975 			count++;
2976 		}
2977 
2978 		page->modified |= modified;
2979 	}
2980 
2981 
2982 	if (page->accessed) {
2983 		count++;
2984 		page->accessed = false;
2985 	}
2986 
2987 	return count;
2988 }
2989 
2990 
2991 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2992 	mappings.
2993 	The function iterates through the page mappings and removes them until
2994 	encountering one that has been accessed. From then on it will continue to
2995 	iterate, but only clear the accessed flag of the mapping. The page's
2996 	\c modified bit will be updated accordingly, the \c accessed bit will be
2997 	cleared.
2998 	\return The number of mapping accessed bits encountered, including the
2999 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
3000 		of the page have been removed.
3001 */
3002 int32
3003 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
3004 {
3005 	ASSERT(page->WiredCount() == 0);
3006 
3007 	if (page->accessed)
3008 		return vm_clear_page_mapping_accessed_flags(page);
3009 
3010 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3011 		VMArea* area = mapping->area;
3012 		VMTranslationMap* map = area->address_space->TranslationMap();
3013 		addr_t address = virtual_page_address(area, page);
3014 		bool modified = false;
3015 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
3016 			page->accessed = true;
3017 			page->modified |= modified;
3018 			return vm_clear_page_mapping_accessed_flags(page);
3019 		}
3020 		page->modified |= modified;
3021 	}
3022 
3023 	return 0;
3024 }
3025 
3026 
3027 static int
3028 display_mem(int argc, char** argv)
3029 {
3030 	bool physical = false;
3031 	addr_t copyAddress;
3032 	int32 displayWidth;
3033 	int32 itemSize;
3034 	int32 num = -1;
3035 	addr_t address;
3036 	int i = 1, j;
3037 
3038 	if (argc > 1 && argv[1][0] == '-') {
3039 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3040 			physical = true;
3041 			i++;
3042 		} else
3043 			i = 99;
3044 	}
3045 
3046 	if (argc < i + 1 || argc > i + 2) {
3047 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3048 			"\tdl - 8 bytes\n"
3049 			"\tdw - 4 bytes\n"
3050 			"\tds - 2 bytes\n"
3051 			"\tdb - 1 byte\n"
3052 			"\tstring - a whole string\n"
3053 			"  -p or --physical only allows memory from a single page to be "
3054 			"displayed.\n");
3055 		return 0;
3056 	}
3057 
3058 	address = parse_expression(argv[i]);
3059 
3060 	if (argc > i + 1)
3061 		num = parse_expression(argv[i + 1]);
3062 
3063 	// build the format string
3064 	if (strcmp(argv[0], "db") == 0) {
3065 		itemSize = 1;
3066 		displayWidth = 16;
3067 	} else if (strcmp(argv[0], "ds") == 0) {
3068 		itemSize = 2;
3069 		displayWidth = 8;
3070 	} else if (strcmp(argv[0], "dw") == 0) {
3071 		itemSize = 4;
3072 		displayWidth = 4;
3073 	} else if (strcmp(argv[0], "dl") == 0) {
3074 		itemSize = 8;
3075 		displayWidth = 2;
3076 	} else if (strcmp(argv[0], "string") == 0) {
3077 		itemSize = 1;
3078 		displayWidth = -1;
3079 	} else {
3080 		kprintf("display_mem called in an invalid way!\n");
3081 		return 0;
3082 	}
3083 
3084 	if (num <= 0)
3085 		num = displayWidth;
3086 
3087 	void* physicalPageHandle = NULL;
3088 
3089 	if (physical) {
3090 		int32 offset = address & (B_PAGE_SIZE - 1);
3091 		if (num * itemSize + offset > B_PAGE_SIZE) {
3092 			num = (B_PAGE_SIZE - offset) / itemSize;
3093 			kprintf("NOTE: number of bytes has been cut to page size\n");
3094 		}
3095 
3096 		address = ROUNDDOWN(address, B_PAGE_SIZE);
3097 
3098 		if (vm_get_physical_page_debug(address, &copyAddress,
3099 				&physicalPageHandle) != B_OK) {
3100 			kprintf("getting the hardware page failed.");
3101 			return 0;
3102 		}
3103 
3104 		address += offset;
3105 		copyAddress += offset;
3106 	} else
3107 		copyAddress = address;
3108 
3109 	if (!strcmp(argv[0], "string")) {
3110 		kprintf("%p \"", (char*)copyAddress);
3111 
3112 		// string mode
3113 		for (i = 0; true; i++) {
3114 			char c;
3115 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
3116 					!= B_OK
3117 				|| c == '\0') {
3118 				break;
3119 			}
3120 
3121 			if (c == '\n')
3122 				kprintf("\\n");
3123 			else if (c == '\t')
3124 				kprintf("\\t");
3125 			else {
3126 				if (!isprint(c))
3127 					c = '.';
3128 
3129 				kprintf("%c", c);
3130 			}
3131 		}
3132 
3133 		kprintf("\"\n");
3134 	} else {
3135 		// number mode
3136 		for (i = 0; i < num; i++) {
3137 			uint64 value;
3138 
3139 			if ((i % displayWidth) == 0) {
3140 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3141 				if (i != 0)
3142 					kprintf("\n");
3143 
3144 				kprintf("[0x%lx]  ", address + i * itemSize);
3145 
3146 				for (j = 0; j < displayed; j++) {
3147 					char c;
3148 					if (debug_memcpy(B_CURRENT_TEAM, &c,
3149 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
3150 						displayed = j;
3151 						break;
3152 					}
3153 					if (!isprint(c))
3154 						c = '.';
3155 
3156 					kprintf("%c", c);
3157 				}
3158 				if (num > displayWidth) {
3159 					// make sure the spacing in the last line is correct
3160 					for (j = displayed; j < displayWidth * itemSize; j++)
3161 						kprintf(" ");
3162 				}
3163 				kprintf("  ");
3164 			}
3165 
3166 			if (debug_memcpy(B_CURRENT_TEAM, &value,
3167 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
3168 				kprintf("read fault");
3169 				break;
3170 			}
3171 
3172 			switch (itemSize) {
3173 				case 1:
3174 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
3175 					break;
3176 				case 2:
3177 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3178 					break;
3179 				case 4:
3180 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3181 					break;
3182 				case 8:
3183 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3184 					break;
3185 			}
3186 		}
3187 
3188 		kprintf("\n");
3189 	}
3190 
3191 	if (physical) {
3192 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3193 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3194 	}
3195 	return 0;
3196 }
3197 
3198 
3199 static void
3200 dump_cache_tree_recursively(VMCache* cache, int level,
3201 	VMCache* highlightCache)
3202 {
3203 	// print this cache
3204 	for (int i = 0; i < level; i++)
3205 		kprintf("  ");
3206 	if (cache == highlightCache)
3207 		kprintf("%p <--\n", cache);
3208 	else
3209 		kprintf("%p\n", cache);
3210 
3211 	// recursively print its consumers
3212 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3213 			VMCache* consumer = it.Next();) {
3214 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3215 	}
3216 }
3217 
3218 
3219 static int
3220 dump_cache_tree(int argc, char** argv)
3221 {
3222 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3223 		kprintf("usage: %s <address>\n", argv[0]);
3224 		return 0;
3225 	}
3226 
3227 	addr_t address = parse_expression(argv[1]);
3228 	if (address == 0)
3229 		return 0;
3230 
3231 	VMCache* cache = (VMCache*)address;
3232 	VMCache* root = cache;
3233 
3234 	// find the root cache (the transitive source)
3235 	while (root->source != NULL)
3236 		root = root->source;
3237 
3238 	dump_cache_tree_recursively(root, 0, cache);
3239 
3240 	return 0;
3241 }
3242 
3243 
3244 const char*
3245 vm_cache_type_to_string(int32 type)
3246 {
3247 	switch (type) {
3248 		case CACHE_TYPE_RAM:
3249 			return "RAM";
3250 		case CACHE_TYPE_DEVICE:
3251 			return "device";
3252 		case CACHE_TYPE_VNODE:
3253 			return "vnode";
3254 		case CACHE_TYPE_NULL:
3255 			return "null";
3256 
3257 		default:
3258 			return "unknown";
3259 	}
3260 }
3261 
3262 
3263 #if DEBUG_CACHE_LIST
3264 
3265 static void
3266 update_cache_info_recursively(VMCache* cache, cache_info& info)
3267 {
3268 	info.page_count += cache->page_count;
3269 	if (cache->type == CACHE_TYPE_RAM)
3270 		info.committed += cache->committed_size;
3271 
3272 	// recurse
3273 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3274 			VMCache* consumer = it.Next();) {
3275 		update_cache_info_recursively(consumer, info);
3276 	}
3277 }
3278 
3279 
3280 static int
3281 cache_info_compare_page_count(const void* _a, const void* _b)
3282 {
3283 	const cache_info* a = (const cache_info*)_a;
3284 	const cache_info* b = (const cache_info*)_b;
3285 	if (a->page_count == b->page_count)
3286 		return 0;
3287 	return a->page_count < b->page_count ? 1 : -1;
3288 }
3289 
3290 
3291 static int
3292 cache_info_compare_committed(const void* _a, const void* _b)
3293 {
3294 	const cache_info* a = (const cache_info*)_a;
3295 	const cache_info* b = (const cache_info*)_b;
3296 	if (a->committed == b->committed)
3297 		return 0;
3298 	return a->committed < b->committed ? 1 : -1;
3299 }
3300 
3301 
3302 static void
3303 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3304 {
3305 	for (int i = 0; i < level; i++)
3306 		kprintf("  ");
3307 
3308 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3309 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3310 		cache->virtual_base, cache->virtual_end, cache->page_count);
3311 
3312 	if (level == 0)
3313 		kprintf("/%lu", info.page_count);
3314 
3315 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3316 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3317 
3318 		if (level == 0)
3319 			kprintf("/%lu", info.committed);
3320 	}
3321 
3322 	// areas
3323 	if (cache->areas != NULL) {
3324 		VMArea* area = cache->areas;
3325 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3326 			area->name, area->address_space->ID());
3327 
3328 		while (area->cache_next != NULL) {
3329 			area = area->cache_next;
3330 			kprintf(", %" B_PRId32, area->id);
3331 		}
3332 	}
3333 
3334 	kputs("\n");
3335 
3336 	// recurse
3337 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3338 			VMCache* consumer = it.Next();) {
3339 		dump_caches_recursively(consumer, info, level + 1);
3340 	}
3341 }
3342 
3343 
3344 static int
3345 dump_caches(int argc, char** argv)
3346 {
3347 	if (sCacheInfoTable == NULL) {
3348 		kprintf("No cache info table!\n");
3349 		return 0;
3350 	}
3351 
3352 	bool sortByPageCount = true;
3353 
3354 	for (int32 i = 1; i < argc; i++) {
3355 		if (strcmp(argv[i], "-c") == 0) {
3356 			sortByPageCount = false;
3357 		} else {
3358 			print_debugger_command_usage(argv[0]);
3359 			return 0;
3360 		}
3361 	}
3362 
3363 	uint32 totalCount = 0;
3364 	uint32 rootCount = 0;
3365 	off_t totalCommitted = 0;
3366 	page_num_t totalPages = 0;
3367 
3368 	VMCache* cache = gDebugCacheList;
3369 	while (cache) {
3370 		totalCount++;
3371 		if (cache->source == NULL) {
3372 			cache_info stackInfo;
3373 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3374 				? sCacheInfoTable[rootCount] : stackInfo;
3375 			rootCount++;
3376 			info.cache = cache;
3377 			info.page_count = 0;
3378 			info.committed = 0;
3379 			update_cache_info_recursively(cache, info);
3380 			totalCommitted += info.committed;
3381 			totalPages += info.page_count;
3382 		}
3383 
3384 		cache = cache->debug_next;
3385 	}
3386 
3387 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3388 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3389 			sortByPageCount
3390 				? &cache_info_compare_page_count
3391 				: &cache_info_compare_committed);
3392 	}
3393 
3394 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3395 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3396 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3397 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3398 			"page count" : "committed size");
3399 
3400 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3401 		for (uint32 i = 0; i < rootCount; i++) {
3402 			cache_info& info = sCacheInfoTable[i];
3403 			dump_caches_recursively(info.cache, info, 0);
3404 		}
3405 	} else
3406 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3407 
3408 	return 0;
3409 }
3410 
3411 #endif	// DEBUG_CACHE_LIST
3412 
3413 
3414 static int
3415 dump_cache(int argc, char** argv)
3416 {
3417 	VMCache* cache;
3418 	bool showPages = false;
3419 	int i = 1;
3420 
3421 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3422 		kprintf("usage: %s [-ps] <address>\n"
3423 			"  if -p is specified, all pages are shown, if -s is used\n"
3424 			"  only the cache info is shown respectively.\n", argv[0]);
3425 		return 0;
3426 	}
3427 	while (argv[i][0] == '-') {
3428 		char* arg = argv[i] + 1;
3429 		while (arg[0]) {
3430 			if (arg[0] == 'p')
3431 				showPages = true;
3432 			arg++;
3433 		}
3434 		i++;
3435 	}
3436 	if (argv[i] == NULL) {
3437 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3438 		return 0;
3439 	}
3440 
3441 	addr_t address = parse_expression(argv[i]);
3442 	if (address == 0)
3443 		return 0;
3444 
3445 	cache = (VMCache*)address;
3446 
3447 	cache->Dump(showPages);
3448 
3449 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3450 
3451 	return 0;
3452 }
3453 
3454 
3455 static void
3456 dump_area_struct(VMArea* area, bool mappings)
3457 {
3458 	kprintf("AREA: %p\n", area);
3459 	kprintf("name:\t\t'%s'\n", area->name);
3460 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3461 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3462 	kprintf("base:\t\t0x%lx\n", area->Base());
3463 	kprintf("size:\t\t0x%lx\n", area->Size());
3464 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3465 	kprintf("page_protection:%p\n", area->page_protections);
3466 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3467 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3468 	kprintf("cache:\t\t%p\n", area->cache);
3469 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3470 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3471 	kprintf("cache_next:\t%p\n", area->cache_next);
3472 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3473 
3474 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3475 	if (mappings) {
3476 		kprintf("page mappings:\n");
3477 		while (iterator.HasNext()) {
3478 			vm_page_mapping* mapping = iterator.Next();
3479 			kprintf("  %p", mapping->page);
3480 		}
3481 		kprintf("\n");
3482 	} else {
3483 		uint32 count = 0;
3484 		while (iterator.Next() != NULL) {
3485 			count++;
3486 		}
3487 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3488 	}
3489 }
3490 
3491 
3492 static int
3493 dump_area(int argc, char** argv)
3494 {
3495 	bool mappings = false;
3496 	bool found = false;
3497 	int32 index = 1;
3498 	VMArea* area;
3499 	addr_t num;
3500 
3501 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3502 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3503 			"All areas matching either id/address/name are listed. You can\n"
3504 			"force to check only a specific item by prefixing the specifier\n"
3505 			"with the id/contains/address/name keywords.\n"
3506 			"-m shows the area's mappings as well.\n");
3507 		return 0;
3508 	}
3509 
3510 	if (!strcmp(argv[1], "-m")) {
3511 		mappings = true;
3512 		index++;
3513 	}
3514 
3515 	int32 mode = 0xf;
3516 	if (!strcmp(argv[index], "id"))
3517 		mode = 1;
3518 	else if (!strcmp(argv[index], "contains"))
3519 		mode = 2;
3520 	else if (!strcmp(argv[index], "name"))
3521 		mode = 4;
3522 	else if (!strcmp(argv[index], "address"))
3523 		mode = 0;
3524 	if (mode != 0xf)
3525 		index++;
3526 
3527 	if (index >= argc) {
3528 		kprintf("No area specifier given.\n");
3529 		return 0;
3530 	}
3531 
3532 	num = parse_expression(argv[index]);
3533 
3534 	if (mode == 0) {
3535 		dump_area_struct((struct VMArea*)num, mappings);
3536 	} else {
3537 		// walk through the area list, looking for the arguments as a name
3538 
3539 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3540 		while ((area = it.Next()) != NULL) {
3541 			if (((mode & 4) != 0
3542 					&& !strcmp(argv[index], area->name))
3543 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3544 					|| (((mode & 2) != 0 && area->Base() <= num
3545 						&& area->Base() + area->Size() > num))))) {
3546 				dump_area_struct(area, mappings);
3547 				found = true;
3548 			}
3549 		}
3550 
3551 		if (!found)
3552 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3553 	}
3554 
3555 	return 0;
3556 }
3557 
3558 
3559 static int
3560 dump_area_list(int argc, char** argv)
3561 {
3562 	VMArea* area;
3563 	const char* name = NULL;
3564 	int32 id = 0;
3565 
3566 	if (argc > 1) {
3567 		id = parse_expression(argv[1]);
3568 		if (id == 0)
3569 			name = argv[1];
3570 	}
3571 
3572 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3573 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3574 		B_PRINTF_POINTER_WIDTH, "size");
3575 
3576 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3577 	while ((area = it.Next()) != NULL) {
3578 		if ((id != 0 && area->address_space->ID() != id)
3579 			|| (name != NULL && strstr(area->name, name) == NULL))
3580 			continue;
3581 
3582 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3583 			area->id, (void*)area->Base(), (void*)area->Size(),
3584 			area->protection, area->wiring, area->name);
3585 	}
3586 	return 0;
3587 }
3588 
3589 
3590 static int
3591 dump_available_memory(int argc, char** argv)
3592 {
3593 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3594 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3595 	return 0;
3596 }
3597 
3598 
3599 static int
3600 dump_mapping_info(int argc, char** argv)
3601 {
3602 	bool reverseLookup = false;
3603 	bool pageLookup = false;
3604 
3605 	int argi = 1;
3606 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3607 		const char* arg = argv[argi];
3608 		if (strcmp(arg, "-r") == 0) {
3609 			reverseLookup = true;
3610 		} else if (strcmp(arg, "-p") == 0) {
3611 			reverseLookup = true;
3612 			pageLookup = true;
3613 		} else {
3614 			print_debugger_command_usage(argv[0]);
3615 			return 0;
3616 		}
3617 	}
3618 
3619 	// We need at least one argument, the address. Optionally a thread ID can be
3620 	// specified.
3621 	if (argi >= argc || argi + 2 < argc) {
3622 		print_debugger_command_usage(argv[0]);
3623 		return 0;
3624 	}
3625 
3626 	uint64 addressValue;
3627 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3628 		return 0;
3629 
3630 	Team* team = NULL;
3631 	if (argi < argc) {
3632 		uint64 threadID;
3633 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3634 			return 0;
3635 
3636 		Thread* thread = Thread::GetDebug(threadID);
3637 		if (thread == NULL) {
3638 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3639 			return 0;
3640 		}
3641 
3642 		team = thread->team;
3643 	}
3644 
3645 	if (reverseLookup) {
3646 		phys_addr_t physicalAddress;
3647 		if (pageLookup) {
3648 			vm_page* page = (vm_page*)(addr_t)addressValue;
3649 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3650 		} else {
3651 			physicalAddress = (phys_addr_t)addressValue;
3652 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3653 		}
3654 
3655 		kprintf("    Team     Virtual Address      Area\n");
3656 		kprintf("--------------------------------------\n");
3657 
3658 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3659 			Callback()
3660 				:
3661 				fAddressSpace(NULL)
3662 			{
3663 			}
3664 
3665 			void SetAddressSpace(VMAddressSpace* addressSpace)
3666 			{
3667 				fAddressSpace = addressSpace;
3668 			}
3669 
3670 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3671 			{
3672 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3673 					virtualAddress);
3674 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3675 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3676 				else
3677 					kprintf("\n");
3678 				return false;
3679 			}
3680 
3681 		private:
3682 			VMAddressSpace*	fAddressSpace;
3683 		} callback;
3684 
3685 		if (team != NULL) {
3686 			// team specified -- get its address space
3687 			VMAddressSpace* addressSpace = team->address_space;
3688 			if (addressSpace == NULL) {
3689 				kprintf("Failed to get address space!\n");
3690 				return 0;
3691 			}
3692 
3693 			callback.SetAddressSpace(addressSpace);
3694 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3695 				physicalAddress, callback);
3696 		} else {
3697 			// no team specified -- iterate through all address spaces
3698 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3699 				addressSpace != NULL;
3700 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3701 				callback.SetAddressSpace(addressSpace);
3702 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3703 					physicalAddress, callback);
3704 			}
3705 		}
3706 	} else {
3707 		// get the address space
3708 		addr_t virtualAddress = (addr_t)addressValue;
3709 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3710 		VMAddressSpace* addressSpace;
3711 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3712 			addressSpace = VMAddressSpace::Kernel();
3713 		} else if (team != NULL) {
3714 			addressSpace = team->address_space;
3715 		} else {
3716 			Thread* thread = debug_get_debugged_thread();
3717 			if (thread == NULL || thread->team == NULL) {
3718 				kprintf("Failed to get team!\n");
3719 				return 0;
3720 			}
3721 
3722 			addressSpace = thread->team->address_space;
3723 		}
3724 
3725 		if (addressSpace == NULL) {
3726 			kprintf("Failed to get address space!\n");
3727 			return 0;
3728 		}
3729 
3730 		// let the translation map implementation do the job
3731 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3732 	}
3733 
3734 	return 0;
3735 }
3736 
3737 
3738 /*!	Deletes all areas and reserved regions in the given address space.
3739 
3740 	The caller must ensure that none of the areas has any wired ranges.
3741 
3742 	\param addressSpace The address space.
3743 	\param deletingAddressSpace \c true, if the address space is in the process
3744 		of being deleted.
3745 */
3746 void
3747 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3748 {
3749 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3750 		addressSpace->ID()));
3751 
3752 	addressSpace->WriteLock();
3753 
3754 	// remove all reserved areas in this address space
3755 	addressSpace->UnreserveAllAddressRanges(0);
3756 
3757 	// delete all the areas in this address space
3758 	while (VMArea* area = addressSpace->FirstArea()) {
3759 		ASSERT(!area->IsWired());
3760 		delete_area(addressSpace, area, deletingAddressSpace);
3761 	}
3762 
3763 	addressSpace->WriteUnlock();
3764 }
3765 
3766 
3767 static area_id
3768 vm_area_for(addr_t address, bool kernel)
3769 {
3770 	team_id team;
3771 	if (IS_USER_ADDRESS(address)) {
3772 		// we try the user team address space, if any
3773 		team = VMAddressSpace::CurrentID();
3774 		if (team < 0)
3775 			return team;
3776 	} else
3777 		team = VMAddressSpace::KernelID();
3778 
3779 	AddressSpaceReadLocker locker(team);
3780 	if (!locker.IsLocked())
3781 		return B_BAD_TEAM_ID;
3782 
3783 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3784 	if (area != NULL) {
3785 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3786 			return B_ERROR;
3787 
3788 		return area->id;
3789 	}
3790 
3791 	return B_ERROR;
3792 }
3793 
3794 
3795 /*!	Frees physical pages that were used during the boot process.
3796 	\a end is inclusive.
3797 */
3798 static void
3799 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3800 {
3801 	// free all physical pages in the specified range
3802 
3803 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3804 		phys_addr_t physicalAddress;
3805 		uint32 flags;
3806 
3807 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3808 			&& (flags & PAGE_PRESENT) != 0) {
3809 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3810 			if (page != NULL && page->State() != PAGE_STATE_FREE
3811 					&& page->State() != PAGE_STATE_CLEAR
3812 					&& page->State() != PAGE_STATE_UNUSED) {
3813 				DEBUG_PAGE_ACCESS_START(page);
3814 				vm_page_set_state(page, PAGE_STATE_FREE);
3815 			}
3816 		}
3817 	}
3818 
3819 	// unmap the memory
3820 	map->Unmap(start, end);
3821 }
3822 
3823 
3824 void
3825 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3826 {
3827 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3828 	addr_t end = start + (size - 1);
3829 	addr_t lastEnd = start;
3830 
3831 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3832 		(void*)start, (void*)end));
3833 
3834 	// The areas are sorted in virtual address space order, so
3835 	// we just have to find the holes between them that fall
3836 	// into the area we should dispose
3837 
3838 	map->Lock();
3839 
3840 	for (VMAddressSpace::AreaIterator it
3841 				= VMAddressSpace::Kernel()->GetAreaIterator();
3842 			VMArea* area = it.Next();) {
3843 		addr_t areaStart = area->Base();
3844 		addr_t areaEnd = areaStart + (area->Size() - 1);
3845 
3846 		if (areaEnd < start)
3847 			continue;
3848 
3849 		if (areaStart > end) {
3850 			// we are done, the area is already beyond of what we have to free
3851 			break;
3852 		}
3853 
3854 		if (areaStart > lastEnd) {
3855 			// this is something we can free
3856 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3857 				(void*)areaStart));
3858 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3859 		}
3860 
3861 		if (areaEnd >= end) {
3862 			lastEnd = areaEnd;
3863 				// no +1 to prevent potential overflow
3864 			break;
3865 		}
3866 
3867 		lastEnd = areaEnd + 1;
3868 	}
3869 
3870 	if (lastEnd < end) {
3871 		// we can also get rid of some space at the end of the area
3872 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3873 			(void*)end));
3874 		unmap_and_free_physical_pages(map, lastEnd, end);
3875 	}
3876 
3877 	map->Unlock();
3878 }
3879 
3880 
3881 static void
3882 create_preloaded_image_areas(struct preloaded_image* _image)
3883 {
3884 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3885 	char name[B_OS_NAME_LENGTH];
3886 	void* address;
3887 	int32 length;
3888 
3889 	// use file name to create a good area name
3890 	char* fileName = strrchr(image->name, '/');
3891 	if (fileName == NULL)
3892 		fileName = image->name;
3893 	else
3894 		fileName++;
3895 
3896 	length = strlen(fileName);
3897 	// make sure there is enough space for the suffix
3898 	if (length > 25)
3899 		length = 25;
3900 
3901 	memcpy(name, fileName, length);
3902 	strcpy(name + length, "_text");
3903 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3904 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3905 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3906 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3907 		// this will later be remapped read-only/executable by the
3908 		// ELF initialization code
3909 
3910 	strcpy(name + length, "_data");
3911 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3912 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3913 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3914 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3915 }
3916 
3917 
3918 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3919 	Any boot loader resources contained in that arguments must not be accessed
3920 	anymore past this point.
3921 */
3922 void
3923 vm_free_kernel_args(kernel_args* args)
3924 {
3925 	uint32 i;
3926 
3927 	TRACE(("vm_free_kernel_args()\n"));
3928 
3929 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3930 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3931 		if (area >= B_OK)
3932 			delete_area(area);
3933 	}
3934 }
3935 
3936 
3937 static void
3938 allocate_kernel_args(kernel_args* args)
3939 {
3940 	TRACE(("allocate_kernel_args()\n"));
3941 
3942 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3943 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3944 
3945 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3946 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3947 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3948 	}
3949 }
3950 
3951 
3952 static void
3953 unreserve_boot_loader_ranges(kernel_args* args)
3954 {
3955 	TRACE(("unreserve_boot_loader_ranges()\n"));
3956 
3957 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3958 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3959 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3960 			args->virtual_allocated_range[i].size);
3961 	}
3962 }
3963 
3964 
3965 static void
3966 reserve_boot_loader_ranges(kernel_args* args)
3967 {
3968 	TRACE(("reserve_boot_loader_ranges()\n"));
3969 
3970 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3971 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3972 
3973 		// If the address is no kernel address, we just skip it. The
3974 		// architecture specific code has to deal with it.
3975 		if (!IS_KERNEL_ADDRESS(address)) {
3976 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3977 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3978 			continue;
3979 		}
3980 
3981 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3982 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3983 		if (status < B_OK)
3984 			panic("could not reserve boot loader ranges\n");
3985 	}
3986 }
3987 
3988 
3989 static addr_t
3990 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3991 {
3992 	size = PAGE_ALIGN(size);
3993 
3994 	// find a slot in the virtual allocation addr range
3995 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3996 		// check to see if the space between this one and the last is big enough
3997 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3998 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3999 			+ args->virtual_allocated_range[i - 1].size;
4000 
4001 		addr_t base = alignment > 0
4002 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
4003 
4004 		if (base >= KERNEL_BASE && base < rangeStart
4005 				&& rangeStart - base >= size) {
4006 			args->virtual_allocated_range[i - 1].size
4007 				+= base + size - previousRangeEnd;
4008 			return base;
4009 		}
4010 	}
4011 
4012 	// we hadn't found one between allocation ranges. this is ok.
4013 	// see if there's a gap after the last one
4014 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
4015 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
4016 		+ args->virtual_allocated_range[lastEntryIndex].size;
4017 	addr_t base = alignment > 0
4018 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
4019 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
4020 		args->virtual_allocated_range[lastEntryIndex].size
4021 			+= base + size - lastRangeEnd;
4022 		return base;
4023 	}
4024 
4025 	// see if there's a gap before the first one
4026 	addr_t rangeStart = args->virtual_allocated_range[0].start;
4027 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
4028 		base = rangeStart - size;
4029 		if (alignment > 0)
4030 			base = ROUNDDOWN(base, alignment);
4031 
4032 		if (base >= KERNEL_BASE) {
4033 			args->virtual_allocated_range[0].start = base;
4034 			args->virtual_allocated_range[0].size += rangeStart - base;
4035 			return base;
4036 		}
4037 	}
4038 
4039 	return 0;
4040 }
4041 
4042 
4043 static bool
4044 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
4045 {
4046 	// TODO: horrible brute-force method of determining if the page can be
4047 	// allocated
4048 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4049 		if (address >= args->physical_memory_range[i].start
4050 			&& address < args->physical_memory_range[i].start
4051 				+ args->physical_memory_range[i].size)
4052 			return true;
4053 	}
4054 	return false;
4055 }
4056 
4057 
4058 page_num_t
4059 vm_allocate_early_physical_page(kernel_args* args)
4060 {
4061 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4062 		phys_addr_t nextPage;
4063 
4064 		nextPage = args->physical_allocated_range[i].start
4065 			+ args->physical_allocated_range[i].size;
4066 		// see if the page after the next allocated paddr run can be allocated
4067 		if (i + 1 < args->num_physical_allocated_ranges
4068 			&& args->physical_allocated_range[i + 1].size != 0) {
4069 			// see if the next page will collide with the next allocated range
4070 			if (nextPage >= args->physical_allocated_range[i+1].start)
4071 				continue;
4072 		}
4073 		// see if the next physical page fits in the memory block
4074 		if (is_page_in_physical_memory_range(args, nextPage)) {
4075 			// we got one!
4076 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4077 			return nextPage / B_PAGE_SIZE;
4078 		}
4079 	}
4080 
4081 	// Expanding upwards didn't work, try going downwards.
4082 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4083 		phys_addr_t nextPage;
4084 
4085 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
4086 		// see if the page after the prev allocated paddr run can be allocated
4087 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
4088 			// see if the next page will collide with the next allocated range
4089 			if (nextPage < args->physical_allocated_range[i-1].start
4090 				+ args->physical_allocated_range[i-1].size)
4091 				continue;
4092 		}
4093 		// see if the next physical page fits in the memory block
4094 		if (is_page_in_physical_memory_range(args, nextPage)) {
4095 			// we got one!
4096 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
4097 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4098 			return nextPage / B_PAGE_SIZE;
4099 		}
4100 	}
4101 
4102 	return 0;
4103 		// could not allocate a block
4104 }
4105 
4106 
4107 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4108 	allocate some pages before the VM is completely up.
4109 */
4110 addr_t
4111 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4112 	uint32 attributes, addr_t alignment)
4113 {
4114 	if (physicalSize > virtualSize)
4115 		physicalSize = virtualSize;
4116 
4117 	// find the vaddr to allocate at
4118 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
4119 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
4120 	if (virtualBase == 0) {
4121 		panic("vm_allocate_early: could not allocate virtual address\n");
4122 		return 0;
4123 	}
4124 
4125 	// map the pages
4126 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4127 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
4128 		if (physicalAddress == 0)
4129 			panic("error allocating early page!\n");
4130 
4131 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4132 
4133 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
4134 			physicalAddress * B_PAGE_SIZE, attributes,
4135 			&vm_allocate_early_physical_page);
4136 	}
4137 
4138 	return virtualBase;
4139 }
4140 
4141 
4142 /*!	The main entrance point to initialize the VM. */
4143 status_t
4144 vm_init(kernel_args* args)
4145 {
4146 	struct preloaded_image* image;
4147 	void* address;
4148 	status_t err = 0;
4149 	uint32 i;
4150 
4151 	TRACE(("vm_init: entry\n"));
4152 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
4153 	err = arch_vm_init(args);
4154 
4155 	// initialize some globals
4156 	vm_page_init_num_pages(args);
4157 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4158 
4159 	slab_init(args);
4160 
4161 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4162 	off_t heapSize = INITIAL_HEAP_SIZE;
4163 	// try to accomodate low memory systems
4164 	while (heapSize > sAvailableMemory / 8)
4165 		heapSize /= 2;
4166 	if (heapSize < 1024 * 1024)
4167 		panic("vm_init: go buy some RAM please.");
4168 
4169 	// map in the new heap and initialize it
4170 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4171 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
4172 	TRACE(("heap at 0x%lx\n", heapBase));
4173 	heap_init(heapBase, heapSize);
4174 #endif
4175 
4176 	// initialize the free page list and physical page mapper
4177 	vm_page_init(args);
4178 
4179 	// initialize the cache allocators
4180 	vm_cache_init(args);
4181 
4182 	{
4183 		status_t error = VMAreaHash::Init();
4184 		if (error != B_OK)
4185 			panic("vm_init: error initializing area hash table\n");
4186 	}
4187 
4188 	VMAddressSpace::Init();
4189 	reserve_boot_loader_ranges(args);
4190 
4191 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4192 	heap_init_post_area();
4193 #endif
4194 
4195 	// Do any further initialization that the architecture dependant layers may
4196 	// need now
4197 	arch_vm_translation_map_init_post_area(args);
4198 	arch_vm_init_post_area(args);
4199 	vm_page_init_post_area(args);
4200 	slab_init_post_area();
4201 
4202 	// allocate areas to represent stuff that already exists
4203 
4204 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4205 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4206 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4207 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4208 #endif
4209 
4210 	allocate_kernel_args(args);
4211 
4212 	create_preloaded_image_areas(args->kernel_image);
4213 
4214 	// allocate areas for preloaded images
4215 	for (image = args->preloaded_images; image != NULL; image = image->next)
4216 		create_preloaded_image_areas(image);
4217 
4218 	// allocate kernel stacks
4219 	for (i = 0; i < args->num_cpus; i++) {
4220 		char name[64];
4221 
4222 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4223 		address = (void*)args->cpu_kstack[i].start;
4224 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4225 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4226 	}
4227 
4228 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4229 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4230 
4231 #if PARANOID_KERNEL_MALLOC
4232 	vm_block_address_range("uninitialized heap memory",
4233 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4234 #endif
4235 #if PARANOID_KERNEL_FREE
4236 	vm_block_address_range("freed heap memory",
4237 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4238 #endif
4239 
4240 	// create the object cache for the page mappings
4241 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4242 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4243 		NULL, NULL);
4244 	if (gPageMappingsObjectCache == NULL)
4245 		panic("failed to create page mappings object cache");
4246 
4247 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4248 
4249 #if DEBUG_CACHE_LIST
4250 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4251 		virtual_address_restrictions virtualRestrictions = {};
4252 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4253 		physical_address_restrictions physicalRestrictions = {};
4254 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4255 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4256 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4257 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4258 			&physicalRestrictions, (void**)&sCacheInfoTable);
4259 	}
4260 #endif	// DEBUG_CACHE_LIST
4261 
4262 	// add some debugger commands
4263 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4264 	add_debugger_command("area", &dump_area,
4265 		"Dump info about a particular area");
4266 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4267 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4268 #if DEBUG_CACHE_LIST
4269 	if (sCacheInfoTable != NULL) {
4270 		add_debugger_command_etc("caches", &dump_caches,
4271 			"List all VMCache trees",
4272 			"[ \"-c\" ]\n"
4273 			"All cache trees are listed sorted in decreasing order by number "
4274 				"of\n"
4275 			"used pages or, if \"-c\" is specified, by size of committed "
4276 				"memory.\n",
4277 			0);
4278 	}
4279 #endif
4280 	add_debugger_command("avail", &dump_available_memory,
4281 		"Dump available memory");
4282 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4283 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4284 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4285 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4286 	add_debugger_command("string", &display_mem, "dump strings");
4287 
4288 	add_debugger_command_etc("mapping", &dump_mapping_info,
4289 		"Print address mapping information",
4290 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4291 		"Prints low-level page mapping information for a given address. If\n"
4292 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4293 		"address that is looked up in the translation map of the current\n"
4294 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4295 		"\"-r\" is specified, <address> is a physical address that is\n"
4296 		"searched in the translation map of all teams, respectively the team\n"
4297 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4298 		"<address> is the address of a vm_page structure. The behavior is\n"
4299 		"equivalent to specifying \"-r\" with the physical address of that\n"
4300 		"page.\n",
4301 		0);
4302 
4303 	TRACE(("vm_init: exit\n"));
4304 
4305 	vm_cache_init_post_heap();
4306 
4307 	return err;
4308 }
4309 
4310 
4311 status_t
4312 vm_init_post_sem(kernel_args* args)
4313 {
4314 	// This frees all unused boot loader resources and makes its space available
4315 	// again
4316 	arch_vm_init_end(args);
4317 	unreserve_boot_loader_ranges(args);
4318 
4319 	// fill in all of the semaphores that were not allocated before
4320 	// since we're still single threaded and only the kernel address space
4321 	// exists, it isn't that hard to find all of the ones we need to create
4322 
4323 	arch_vm_translation_map_init_post_sem(args);
4324 
4325 	slab_init_post_sem();
4326 
4327 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4328 	heap_init_post_sem();
4329 #endif
4330 
4331 	return B_OK;
4332 }
4333 
4334 
4335 status_t
4336 vm_init_post_thread(kernel_args* args)
4337 {
4338 	vm_page_init_post_thread(args);
4339 	slab_init_post_thread();
4340 	return heap_init_post_thread();
4341 }
4342 
4343 
4344 status_t
4345 vm_init_post_modules(kernel_args* args)
4346 {
4347 	return arch_vm_init_post_modules(args);
4348 }
4349 
4350 
4351 void
4352 permit_page_faults(void)
4353 {
4354 	Thread* thread = thread_get_current_thread();
4355 	if (thread != NULL)
4356 		atomic_add(&thread->page_faults_allowed, 1);
4357 }
4358 
4359 
4360 void
4361 forbid_page_faults(void)
4362 {
4363 	Thread* thread = thread_get_current_thread();
4364 	if (thread != NULL)
4365 		atomic_add(&thread->page_faults_allowed, -1);
4366 }
4367 
4368 
4369 status_t
4370 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4371 	bool isUser, addr_t* newIP)
4372 {
4373 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4374 		faultAddress));
4375 
4376 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4377 
4378 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4379 	VMAddressSpace* addressSpace = NULL;
4380 
4381 	status_t status = B_OK;
4382 	*newIP = 0;
4383 	atomic_add((int32*)&sPageFaults, 1);
4384 
4385 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4386 		addressSpace = VMAddressSpace::GetKernel();
4387 	} else if (IS_USER_ADDRESS(pageAddress)) {
4388 		addressSpace = VMAddressSpace::GetCurrent();
4389 		if (addressSpace == NULL) {
4390 			if (!isUser) {
4391 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4392 					"memory!\n");
4393 				status = B_BAD_ADDRESS;
4394 				TPF(PageFaultError(-1,
4395 					VMPageFaultTracing
4396 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4397 			} else {
4398 				// XXX weird state.
4399 				panic("vm_page_fault: non kernel thread accessing user memory "
4400 					"that doesn't exist!\n");
4401 				status = B_BAD_ADDRESS;
4402 			}
4403 		}
4404 	} else {
4405 		// the hit was probably in the 64k DMZ between kernel and user space
4406 		// this keeps a user space thread from passing a buffer that crosses
4407 		// into kernel space
4408 		status = B_BAD_ADDRESS;
4409 		TPF(PageFaultError(-1,
4410 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4411 	}
4412 
4413 	if (status == B_OK) {
4414 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4415 			isUser, NULL);
4416 	}
4417 
4418 	if (status < B_OK) {
4419 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4420 			"0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n",
4421 			strerror(status), address, faultAddress, isWrite, isUser, isExecute,
4422 			thread_get_current_thread_id());
4423 		if (!isUser) {
4424 			Thread* thread = thread_get_current_thread();
4425 			if (thread != NULL && thread->fault_handler != 0) {
4426 				// this will cause the arch dependant page fault handler to
4427 				// modify the IP on the interrupt frame or whatever to return
4428 				// to this address
4429 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4430 			} else {
4431 				// unhandled page fault in the kernel
4432 				panic("vm_page_fault: unhandled page fault in kernel space at "
4433 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4434 			}
4435 		} else {
4436 			Thread* thread = thread_get_current_thread();
4437 
4438 #ifdef TRACE_FAULTS
4439 			VMArea* area = NULL;
4440 			if (addressSpace != NULL) {
4441 				addressSpace->ReadLock();
4442 				area = addressSpace->LookupArea(faultAddress);
4443 			}
4444 
4445 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4446 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4447 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4448 				thread->team->Name(), thread->team->id,
4449 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4450 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4451 					area->Base() : 0x0));
4452 
4453 			if (addressSpace != NULL)
4454 				addressSpace->ReadUnlock();
4455 #endif
4456 
4457 			// If the thread has a signal handler for SIGSEGV, we simply
4458 			// send it the signal. Otherwise we notify the user debugger
4459 			// first.
4460 			struct sigaction action;
4461 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4462 					&& action.sa_handler != SIG_DFL
4463 					&& action.sa_handler != SIG_IGN)
4464 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4465 					SIGSEGV)) {
4466 				Signal signal(SIGSEGV,
4467 					status == B_PERMISSION_DENIED
4468 						? SEGV_ACCERR : SEGV_MAPERR,
4469 					EFAULT, thread->team->id);
4470 				signal.SetAddress((void*)address);
4471 				send_signal_to_thread(thread, signal, 0);
4472 			}
4473 		}
4474 	}
4475 
4476 	if (addressSpace != NULL)
4477 		addressSpace->Put();
4478 
4479 	return B_HANDLED_INTERRUPT;
4480 }
4481 
4482 
4483 struct PageFaultContext {
4484 	AddressSpaceReadLocker	addressSpaceLocker;
4485 	VMCacheChainLocker		cacheChainLocker;
4486 
4487 	VMTranslationMap*		map;
4488 	VMCache*				topCache;
4489 	off_t					cacheOffset;
4490 	vm_page_reservation		reservation;
4491 	bool					isWrite;
4492 
4493 	// return values
4494 	vm_page*				page;
4495 	bool					restart;
4496 	bool					pageAllocated;
4497 
4498 
4499 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4500 		:
4501 		addressSpaceLocker(addressSpace, true),
4502 		map(addressSpace->TranslationMap()),
4503 		isWrite(isWrite)
4504 	{
4505 	}
4506 
4507 	~PageFaultContext()
4508 	{
4509 		UnlockAll();
4510 		vm_page_unreserve_pages(&reservation);
4511 	}
4512 
4513 	void Prepare(VMCache* topCache, off_t cacheOffset)
4514 	{
4515 		this->topCache = topCache;
4516 		this->cacheOffset = cacheOffset;
4517 		page = NULL;
4518 		restart = false;
4519 		pageAllocated = false;
4520 
4521 		cacheChainLocker.SetTo(topCache);
4522 	}
4523 
4524 	void UnlockAll(VMCache* exceptCache = NULL)
4525 	{
4526 		topCache = NULL;
4527 		addressSpaceLocker.Unlock();
4528 		cacheChainLocker.Unlock(exceptCache);
4529 	}
4530 };
4531 
4532 
4533 /*!	Gets the page that should be mapped into the area.
4534 	Returns an error code other than \c B_OK, if the page couldn't be found or
4535 	paged in. The locking state of the address space and the caches is undefined
4536 	in that case.
4537 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4538 	had to unlock the address space and all caches and is supposed to be called
4539 	again.
4540 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4541 	found. It is returned in \c context.page. The address space will still be
4542 	locked as well as all caches starting from the top cache to at least the
4543 	cache the page lives in.
4544 */
4545 static status_t
4546 fault_get_page(PageFaultContext& context)
4547 {
4548 	VMCache* cache = context.topCache;
4549 	VMCache* lastCache = NULL;
4550 	vm_page* page = NULL;
4551 
4552 	while (cache != NULL) {
4553 		// We already hold the lock of the cache at this point.
4554 
4555 		lastCache = cache;
4556 
4557 		page = cache->LookupPage(context.cacheOffset);
4558 		if (page != NULL && page->busy) {
4559 			// page must be busy -- wait for it to become unbusy
4560 			context.UnlockAll(cache);
4561 			cache->ReleaseRefLocked();
4562 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4563 
4564 			// restart the whole process
4565 			context.restart = true;
4566 			return B_OK;
4567 		}
4568 
4569 		if (page != NULL)
4570 			break;
4571 
4572 		// The current cache does not contain the page we're looking for.
4573 
4574 		// see if the backing store has it
4575 		if (cache->HasPage(context.cacheOffset)) {
4576 			// insert a fresh page and mark it busy -- we're going to read it in
4577 			page = vm_page_allocate_page(&context.reservation,
4578 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4579 			cache->InsertPage(page, context.cacheOffset);
4580 
4581 			// We need to unlock all caches and the address space while reading
4582 			// the page in. Keep a reference to the cache around.
4583 			cache->AcquireRefLocked();
4584 			context.UnlockAll();
4585 
4586 			// read the page in
4587 			generic_io_vec vec;
4588 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4589 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4590 
4591 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4592 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4593 
4594 			cache->Lock();
4595 
4596 			if (status < B_OK) {
4597 				// on error remove and free the page
4598 				dprintf("reading page from cache %p returned: %s!\n",
4599 					cache, strerror(status));
4600 
4601 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4602 				cache->RemovePage(page);
4603 				vm_page_set_state(page, PAGE_STATE_FREE);
4604 
4605 				cache->ReleaseRefAndUnlock();
4606 				return status;
4607 			}
4608 
4609 			// mark the page unbusy again
4610 			cache->MarkPageUnbusy(page);
4611 
4612 			DEBUG_PAGE_ACCESS_END(page);
4613 
4614 			// Since we needed to unlock everything temporarily, the area
4615 			// situation might have changed. So we need to restart the whole
4616 			// process.
4617 			cache->ReleaseRefAndUnlock();
4618 			context.restart = true;
4619 			return B_OK;
4620 		}
4621 
4622 		cache = context.cacheChainLocker.LockSourceCache();
4623 	}
4624 
4625 	if (page == NULL) {
4626 		// There was no adequate page, determine the cache for a clean one.
4627 		// Read-only pages come in the deepest cache, only the top most cache
4628 		// may have direct write access.
4629 		cache = context.isWrite ? context.topCache : lastCache;
4630 
4631 		// allocate a clean page
4632 		page = vm_page_allocate_page(&context.reservation,
4633 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4634 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4635 			page->physical_page_number));
4636 
4637 		// insert the new page into our cache
4638 		cache->InsertPage(page, context.cacheOffset);
4639 		context.pageAllocated = true;
4640 	} else if (page->Cache() != context.topCache && context.isWrite) {
4641 		// We have a page that has the data we want, but in the wrong cache
4642 		// object so we need to copy it and stick it into the top cache.
4643 		vm_page* sourcePage = page;
4644 
4645 		// TODO: If memory is low, it might be a good idea to steal the page
4646 		// from our source cache -- if possible, that is.
4647 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4648 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4649 
4650 		// To not needlessly kill concurrency we unlock all caches but the top
4651 		// one while copying the page. Lacking another mechanism to ensure that
4652 		// the source page doesn't disappear, we mark it busy.
4653 		sourcePage->busy = true;
4654 		context.cacheChainLocker.UnlockKeepRefs(true);
4655 
4656 		// copy the page
4657 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4658 			sourcePage->physical_page_number * B_PAGE_SIZE);
4659 
4660 		context.cacheChainLocker.RelockCaches(true);
4661 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4662 
4663 		// insert the new page into our cache
4664 		context.topCache->InsertPage(page, context.cacheOffset);
4665 		context.pageAllocated = true;
4666 	} else
4667 		DEBUG_PAGE_ACCESS_START(page);
4668 
4669 	context.page = page;
4670 	return B_OK;
4671 }
4672 
4673 
4674 /*!	Makes sure the address in the given address space is mapped.
4675 
4676 	\param addressSpace The address space.
4677 	\param originalAddress The address. Doesn't need to be page aligned.
4678 	\param isWrite If \c true the address shall be write-accessible.
4679 	\param isUser If \c true the access is requested by a userland team.
4680 	\param wirePage On success, if non \c NULL, the wired count of the page
4681 		mapped at the given address is incremented and the page is returned
4682 		via this parameter.
4683 	\return \c B_OK on success, another error code otherwise.
4684 */
4685 static status_t
4686 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4687 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4688 {
4689 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4690 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4691 		originalAddress, isWrite, isUser));
4692 
4693 	PageFaultContext context(addressSpace, isWrite);
4694 
4695 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4696 	status_t status = B_OK;
4697 
4698 	addressSpace->IncrementFaultCount();
4699 
4700 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4701 	// the pages upfront makes sure we don't have any cache locked, so that the
4702 	// page daemon/thief can do their job without problems.
4703 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4704 		originalAddress);
4705 	context.addressSpaceLocker.Unlock();
4706 	vm_page_reserve_pages(&context.reservation, reservePages,
4707 		addressSpace == VMAddressSpace::Kernel()
4708 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4709 
4710 	while (true) {
4711 		context.addressSpaceLocker.Lock();
4712 
4713 		// get the area the fault was in
4714 		VMArea* area = addressSpace->LookupArea(address);
4715 		if (area == NULL) {
4716 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4717 				"space\n", originalAddress);
4718 			TPF(PageFaultError(-1,
4719 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4720 			status = B_BAD_ADDRESS;
4721 			break;
4722 		}
4723 
4724 		// check permissions
4725 		uint32 protection = get_area_page_protection(area, address);
4726 		if (isUser && (protection & B_USER_PROTECTION) == 0
4727 				&& (area->protection & B_KERNEL_AREA) != 0) {
4728 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4729 				area->id, (void*)originalAddress);
4730 			TPF(PageFaultError(area->id,
4731 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4732 			status = B_PERMISSION_DENIED;
4733 			break;
4734 		}
4735 		if (isWrite && (protection
4736 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4737 			dprintf("write access attempted on write-protected area 0x%"
4738 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4739 			TPF(PageFaultError(area->id,
4740 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4741 			status = B_PERMISSION_DENIED;
4742 			break;
4743 		} else if (isExecute && (protection
4744 				& (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4745 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4746 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4747 			TPF(PageFaultError(area->id,
4748 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4749 			status = B_PERMISSION_DENIED;
4750 			break;
4751 		} else if (!isWrite && !isExecute && (protection
4752 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4753 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4754 				" at %p\n", area->id, (void*)originalAddress);
4755 			TPF(PageFaultError(area->id,
4756 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4757 			status = B_PERMISSION_DENIED;
4758 			break;
4759 		}
4760 
4761 		// We have the area, it was a valid access, so let's try to resolve the
4762 		// page fault now.
4763 		// At first, the top most cache from the area is investigated.
4764 
4765 		context.Prepare(vm_area_get_locked_cache(area),
4766 			address - area->Base() + area->cache_offset);
4767 
4768 		// See if this cache has a fault handler -- this will do all the work
4769 		// for us.
4770 		{
4771 			// Note, since the page fault is resolved with interrupts enabled,
4772 			// the fault handler could be called more than once for the same
4773 			// reason -- the store must take this into account.
4774 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4775 			if (status != B_BAD_HANDLER)
4776 				break;
4777 		}
4778 
4779 		// The top most cache has no fault handler, so let's see if the cache or
4780 		// its sources already have the page we're searching for (we're going
4781 		// from top to bottom).
4782 		status = fault_get_page(context);
4783 		if (status != B_OK) {
4784 			TPF(PageFaultError(area->id, status));
4785 			break;
4786 		}
4787 
4788 		if (context.restart)
4789 			continue;
4790 
4791 		// All went fine, all there is left to do is to map the page into the
4792 		// address space.
4793 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4794 			context.page));
4795 
4796 		// If the page doesn't reside in the area's cache, we need to make sure
4797 		// it's mapped in read-only, so that we cannot overwrite someone else's
4798 		// data (copy-on-write)
4799 		uint32 newProtection = protection;
4800 		if (context.page->Cache() != context.topCache && !isWrite)
4801 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4802 
4803 		bool unmapPage = false;
4804 		bool mapPage = true;
4805 
4806 		// check whether there's already a page mapped at the address
4807 		context.map->Lock();
4808 
4809 		phys_addr_t physicalAddress;
4810 		uint32 flags;
4811 		vm_page* mappedPage = NULL;
4812 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4813 			&& (flags & PAGE_PRESENT) != 0
4814 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4815 				!= NULL) {
4816 			// Yep there's already a page. If it's ours, we can simply adjust
4817 			// its protection. Otherwise we have to unmap it.
4818 			if (mappedPage == context.page) {
4819 				context.map->ProtectPage(area, address, newProtection);
4820 					// Note: We assume that ProtectPage() is atomic (i.e.
4821 					// the page isn't temporarily unmapped), otherwise we'd have
4822 					// to make sure it isn't wired.
4823 				mapPage = false;
4824 			} else
4825 				unmapPage = true;
4826 		}
4827 
4828 		context.map->Unlock();
4829 
4830 		if (unmapPage) {
4831 			// If the page is wired, we can't unmap it. Wait until it is unwired
4832 			// again and restart. Note that the page cannot be wired for
4833 			// writing, since it it isn't in the topmost cache. So we can safely
4834 			// ignore ranges wired for writing (our own and other concurrent
4835 			// wiring attempts in progress) and in fact have to do that to avoid
4836 			// a deadlock.
4837 			VMAreaUnwiredWaiter waiter;
4838 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4839 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4840 				// unlock everything and wait
4841 				if (context.pageAllocated) {
4842 					// ... but since we allocated a page and inserted it into
4843 					// the top cache, remove and free it first. Otherwise we'd
4844 					// have a page from a lower cache mapped while an upper
4845 					// cache has a page that would shadow it.
4846 					context.topCache->RemovePage(context.page);
4847 					vm_page_free_etc(context.topCache, context.page,
4848 						&context.reservation);
4849 				} else
4850 					DEBUG_PAGE_ACCESS_END(context.page);
4851 
4852 				context.UnlockAll();
4853 				waiter.waitEntry.Wait();
4854 				continue;
4855 			}
4856 
4857 			// Note: The mapped page is a page of a lower cache. We are
4858 			// guaranteed to have that cached locked, our new page is a copy of
4859 			// that page, and the page is not busy. The logic for that guarantee
4860 			// is as follows: Since the page is mapped, it must live in the top
4861 			// cache (ruled out above) or any of its lower caches, and there is
4862 			// (was before the new page was inserted) no other page in any
4863 			// cache between the top cache and the page's cache (otherwise that
4864 			// would be mapped instead). That in turn means that our algorithm
4865 			// must have found it and therefore it cannot be busy either.
4866 			DEBUG_PAGE_ACCESS_START(mappedPage);
4867 			unmap_page(area, address);
4868 			DEBUG_PAGE_ACCESS_END(mappedPage);
4869 		}
4870 
4871 		if (mapPage) {
4872 			if (map_page(area, context.page, address, newProtection,
4873 					&context.reservation) != B_OK) {
4874 				// Mapping can only fail, when the page mapping object couldn't
4875 				// be allocated. Save for the missing mapping everything is
4876 				// fine, though. If this was a regular page fault, we'll simply
4877 				// leave and probably fault again. To make sure we'll have more
4878 				// luck then, we ensure that the minimum object reserve is
4879 				// available.
4880 				DEBUG_PAGE_ACCESS_END(context.page);
4881 
4882 				context.UnlockAll();
4883 
4884 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4885 						!= B_OK) {
4886 					// Apparently the situation is serious. Let's get ourselves
4887 					// killed.
4888 					status = B_NO_MEMORY;
4889 				} else if (wirePage != NULL) {
4890 					// The caller expects us to wire the page. Since
4891 					// object_cache_reserve() succeeded, we should now be able
4892 					// to allocate a mapping structure. Restart.
4893 					continue;
4894 				}
4895 
4896 				break;
4897 			}
4898 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4899 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4900 
4901 		// also wire the page, if requested
4902 		if (wirePage != NULL && status == B_OK) {
4903 			increment_page_wired_count(context.page);
4904 			*wirePage = context.page;
4905 		}
4906 
4907 		DEBUG_PAGE_ACCESS_END(context.page);
4908 
4909 		break;
4910 	}
4911 
4912 	return status;
4913 }
4914 
4915 
4916 status_t
4917 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4918 {
4919 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4920 }
4921 
4922 status_t
4923 vm_put_physical_page(addr_t vaddr, void* handle)
4924 {
4925 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4926 }
4927 
4928 
4929 status_t
4930 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4931 	void** _handle)
4932 {
4933 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4934 }
4935 
4936 status_t
4937 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4938 {
4939 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4940 }
4941 
4942 
4943 status_t
4944 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4945 {
4946 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4947 }
4948 
4949 status_t
4950 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4951 {
4952 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4953 }
4954 
4955 
4956 void
4957 vm_get_info(system_info* info)
4958 {
4959 	swap_get_info(info);
4960 
4961 	MutexLocker locker(sAvailableMemoryLock);
4962 	info->needed_memory = sNeededMemory;
4963 	info->free_memory = sAvailableMemory;
4964 }
4965 
4966 
4967 uint32
4968 vm_num_page_faults(void)
4969 {
4970 	return sPageFaults;
4971 }
4972 
4973 
4974 off_t
4975 vm_available_memory(void)
4976 {
4977 	MutexLocker locker(sAvailableMemoryLock);
4978 	return sAvailableMemory;
4979 }
4980 
4981 
4982 off_t
4983 vm_available_not_needed_memory(void)
4984 {
4985 	MutexLocker locker(sAvailableMemoryLock);
4986 	return sAvailableMemory - sNeededMemory;
4987 }
4988 
4989 
4990 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4991 	debugger.
4992 */
4993 off_t
4994 vm_available_not_needed_memory_debug(void)
4995 {
4996 	return sAvailableMemory - sNeededMemory;
4997 }
4998 
4999 
5000 size_t
5001 vm_kernel_address_space_left(void)
5002 {
5003 	return VMAddressSpace::Kernel()->FreeSpace();
5004 }
5005 
5006 
5007 void
5008 vm_unreserve_memory(size_t amount)
5009 {
5010 	mutex_lock(&sAvailableMemoryLock);
5011 
5012 	sAvailableMemory += amount;
5013 
5014 	mutex_unlock(&sAvailableMemoryLock);
5015 }
5016 
5017 
5018 status_t
5019 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
5020 {
5021 	size_t reserve = kMemoryReserveForPriority[priority];
5022 
5023 	MutexLocker locker(sAvailableMemoryLock);
5024 
5025 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5026 
5027 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
5028 		sAvailableMemory -= amount;
5029 		return B_OK;
5030 	}
5031 
5032 	if (timeout <= 0)
5033 		return B_NO_MEMORY;
5034 
5035 	// turn timeout into an absolute timeout
5036 	timeout += system_time();
5037 
5038 	// loop until we've got the memory or the timeout occurs
5039 	do {
5040 		sNeededMemory += amount;
5041 
5042 		// call the low resource manager
5043 		locker.Unlock();
5044 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5045 			B_ABSOLUTE_TIMEOUT, timeout);
5046 		locker.Lock();
5047 
5048 		sNeededMemory -= amount;
5049 
5050 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
5051 			sAvailableMemory -= amount;
5052 			return B_OK;
5053 		}
5054 	} while (timeout > system_time());
5055 
5056 	return B_NO_MEMORY;
5057 }
5058 
5059 
5060 status_t
5061 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
5062 {
5063 	// NOTE: The caller is responsible for synchronizing calls to this function!
5064 
5065 	AddressSpaceReadLocker locker;
5066 	VMArea* area;
5067 	status_t status = locker.SetFromArea(id, area);
5068 	if (status != B_OK)
5069 		return status;
5070 
5071 	// nothing to do, if the type doesn't change
5072 	uint32 oldType = area->MemoryType();
5073 	if (type == oldType)
5074 		return B_OK;
5075 
5076 	// set the memory type of the area and the mapped pages
5077 	VMTranslationMap* map = area->address_space->TranslationMap();
5078 	map->Lock();
5079 	area->SetMemoryType(type);
5080 	map->ProtectArea(area, area->protection);
5081 	map->Unlock();
5082 
5083 	// set the physical memory type
5084 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
5085 	if (error != B_OK) {
5086 		// reset the memory type of the area and the mapped pages
5087 		map->Lock();
5088 		area->SetMemoryType(oldType);
5089 		map->ProtectArea(area, area->protection);
5090 		map->Unlock();
5091 		return error;
5092 	}
5093 
5094 	return B_OK;
5095 
5096 }
5097 
5098 
5099 /*!	This function enforces some protection properties:
5100 	 - kernel areas must be W^X (after kernel startup)
5101 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
5102 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5103 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
5104 	   and B_KERNEL_WRITE_AREA.
5105 */
5106 static void
5107 fix_protection(uint32* protection)
5108 {
5109 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
5110 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
5111 			|| (*protection & B_WRITE_AREA) != 0)
5112 		&& !gKernelStartup)
5113 		panic("kernel areas cannot be both writable and executable!");
5114 
5115 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5116 		if ((*protection & B_USER_PROTECTION) == 0
5117 			|| (*protection & B_WRITE_AREA) != 0)
5118 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5119 		else
5120 			*protection |= B_KERNEL_READ_AREA;
5121 	}
5122 }
5123 
5124 
5125 static void
5126 fill_area_info(struct VMArea* area, area_info* info, size_t size)
5127 {
5128 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5129 	info->area = area->id;
5130 	info->address = (void*)area->Base();
5131 	info->size = area->Size();
5132 	info->protection = area->protection;
5133 	info->lock = area->wiring;
5134 	info->team = area->address_space->ID();
5135 	info->copy_count = 0;
5136 	info->in_count = 0;
5137 	info->out_count = 0;
5138 		// TODO: retrieve real values here!
5139 
5140 	VMCache* cache = vm_area_get_locked_cache(area);
5141 
5142 	// Note, this is a simplification; the cache could be larger than this area
5143 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5144 
5145 	vm_area_put_locked_cache(cache);
5146 }
5147 
5148 
5149 static status_t
5150 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5151 {
5152 	// is newSize a multiple of B_PAGE_SIZE?
5153 	if (newSize & (B_PAGE_SIZE - 1))
5154 		return B_BAD_VALUE;
5155 
5156 	// lock all affected address spaces and the cache
5157 	VMArea* area;
5158 	VMCache* cache;
5159 
5160 	MultiAddressSpaceLocker locker;
5161 	AreaCacheLocker cacheLocker;
5162 
5163 	status_t status;
5164 	size_t oldSize;
5165 	bool anyKernelArea;
5166 	bool restart;
5167 
5168 	do {
5169 		anyKernelArea = false;
5170 		restart = false;
5171 
5172 		locker.Unset();
5173 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5174 		if (status != B_OK)
5175 			return status;
5176 		cacheLocker.SetTo(cache, true);	// already locked
5177 
5178 		// enforce restrictions
5179 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
5180 				|| (area->protection & B_KERNEL_AREA) != 0)) {
5181 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5182 				"resize kernel area %" B_PRId32 " (%s)\n",
5183 				team_get_current_team_id(), areaID, area->name);
5184 			return B_NOT_ALLOWED;
5185 		}
5186 		// TODO: Enforce all restrictions (team, etc.)!
5187 
5188 		oldSize = area->Size();
5189 		if (newSize == oldSize)
5190 			return B_OK;
5191 
5192 		if (cache->type != CACHE_TYPE_RAM)
5193 			return B_NOT_ALLOWED;
5194 
5195 		if (oldSize < newSize) {
5196 			// We need to check if all areas of this cache can be resized.
5197 			for (VMArea* current = cache->areas; current != NULL;
5198 					current = current->cache_next) {
5199 				if (!current->address_space->CanResizeArea(current, newSize))
5200 					return B_ERROR;
5201 				anyKernelArea
5202 					|= current->address_space == VMAddressSpace::Kernel();
5203 			}
5204 		} else {
5205 			// We're shrinking the areas, so we must make sure the affected
5206 			// ranges are not wired.
5207 			for (VMArea* current = cache->areas; current != NULL;
5208 					current = current->cache_next) {
5209 				anyKernelArea
5210 					|= current->address_space == VMAddressSpace::Kernel();
5211 
5212 				if (wait_if_area_range_is_wired(current,
5213 						current->Base() + newSize, oldSize - newSize, &locker,
5214 						&cacheLocker)) {
5215 					restart = true;
5216 					break;
5217 				}
5218 			}
5219 		}
5220 	} while (restart);
5221 
5222 	// Okay, looks good so far, so let's do it
5223 
5224 	int priority = kernel && anyKernelArea
5225 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5226 	uint32 allocationFlags = kernel && anyKernelArea
5227 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5228 
5229 	if (oldSize < newSize) {
5230 		// Growing the cache can fail, so we do it first.
5231 		status = cache->Resize(cache->virtual_base + newSize, priority);
5232 		if (status != B_OK)
5233 			return status;
5234 	}
5235 
5236 	for (VMArea* current = cache->areas; current != NULL;
5237 			current = current->cache_next) {
5238 		status = current->address_space->ResizeArea(current, newSize,
5239 			allocationFlags);
5240 		if (status != B_OK)
5241 			break;
5242 
5243 		// We also need to unmap all pages beyond the new size, if the area has
5244 		// shrunk
5245 		if (newSize < oldSize) {
5246 			VMCacheChainLocker cacheChainLocker(cache);
5247 			cacheChainLocker.LockAllSourceCaches();
5248 
5249 			unmap_pages(current, current->Base() + newSize,
5250 				oldSize - newSize);
5251 
5252 			cacheChainLocker.Unlock(cache);
5253 		}
5254 	}
5255 
5256 	if (status == B_OK) {
5257 		// Shrink or grow individual page protections if in use.
5258 		if (area->page_protections != NULL) {
5259 			size_t bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5260 			uint8* newProtections
5261 				= (uint8*)realloc(area->page_protections, bytes);
5262 			if (newProtections == NULL)
5263 				status = B_NO_MEMORY;
5264 			else {
5265 				area->page_protections = newProtections;
5266 
5267 				if (oldSize < newSize) {
5268 					// init the additional page protections to that of the area
5269 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5270 					uint32 areaProtection = area->protection
5271 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5272 					memset(area->page_protections + offset,
5273 						areaProtection | (areaProtection << 4), bytes - offset);
5274 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5275 						uint8& entry = area->page_protections[offset - 1];
5276 						entry = (entry & 0x0f) | (areaProtection << 4);
5277 					}
5278 				}
5279 			}
5280 		}
5281 	}
5282 
5283 	// shrinking the cache can't fail, so we do it now
5284 	if (status == B_OK && newSize < oldSize)
5285 		status = cache->Resize(cache->virtual_base + newSize, priority);
5286 
5287 	if (status != B_OK) {
5288 		// Something failed -- resize the areas back to their original size.
5289 		// This can fail, too, in which case we're seriously screwed.
5290 		for (VMArea* current = cache->areas; current != NULL;
5291 				current = current->cache_next) {
5292 			if (current->address_space->ResizeArea(current, oldSize,
5293 					allocationFlags) != B_OK) {
5294 				panic("vm_resize_area(): Failed and not being able to restore "
5295 					"original state.");
5296 			}
5297 		}
5298 
5299 		cache->Resize(cache->virtual_base + oldSize, priority);
5300 	}
5301 
5302 	// TODO: we must honour the lock restrictions of this area
5303 	return status;
5304 }
5305 
5306 
5307 status_t
5308 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5309 {
5310 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5311 }
5312 
5313 
5314 status_t
5315 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5316 {
5317 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5318 }
5319 
5320 
5321 status_t
5322 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5323 	bool user)
5324 {
5325 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5326 }
5327 
5328 
5329 void
5330 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5331 {
5332 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5333 }
5334 
5335 
5336 /*!	Copies a range of memory directly from/to a page that might not be mapped
5337 	at the moment.
5338 
5339 	For \a unsafeMemory the current mapping (if any is ignored). The function
5340 	walks through the respective area's cache chain to find the physical page
5341 	and copies from/to it directly.
5342 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5343 	must not cross a page boundary.
5344 
5345 	\param teamID The team ID identifying the address space \a unsafeMemory is
5346 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5347 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5348 		is passed, the address space of the thread returned by
5349 		debug_get_debugged_thread() is used.
5350 	\param unsafeMemory The start of the unsafe memory range to be copied
5351 		from/to.
5352 	\param buffer A safely accessible kernel buffer to be copied from/to.
5353 	\param size The number of bytes to be copied.
5354 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5355 		\a unsafeMemory, the other way around otherwise.
5356 */
5357 status_t
5358 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5359 	size_t size, bool copyToUnsafe)
5360 {
5361 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5362 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5363 		return B_BAD_VALUE;
5364 	}
5365 
5366 	// get the address space for the debugged thread
5367 	VMAddressSpace* addressSpace;
5368 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5369 		addressSpace = VMAddressSpace::Kernel();
5370 	} else if (teamID == B_CURRENT_TEAM) {
5371 		Thread* thread = debug_get_debugged_thread();
5372 		if (thread == NULL || thread->team == NULL)
5373 			return B_BAD_ADDRESS;
5374 
5375 		addressSpace = thread->team->address_space;
5376 	} else
5377 		addressSpace = VMAddressSpace::DebugGet(teamID);
5378 
5379 	if (addressSpace == NULL)
5380 		return B_BAD_ADDRESS;
5381 
5382 	// get the area
5383 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5384 	if (area == NULL)
5385 		return B_BAD_ADDRESS;
5386 
5387 	// search the page
5388 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5389 		+ area->cache_offset;
5390 	VMCache* cache = area->cache;
5391 	vm_page* page = NULL;
5392 	while (cache != NULL) {
5393 		page = cache->DebugLookupPage(cacheOffset);
5394 		if (page != NULL)
5395 			break;
5396 
5397 		// Page not found in this cache -- if it is paged out, we must not try
5398 		// to get it from lower caches.
5399 		if (cache->DebugHasPage(cacheOffset))
5400 			break;
5401 
5402 		cache = cache->source;
5403 	}
5404 
5405 	if (page == NULL)
5406 		return B_UNSUPPORTED;
5407 
5408 	// copy from/to physical memory
5409 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5410 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5411 
5412 	if (copyToUnsafe) {
5413 		if (page->Cache() != area->cache)
5414 			return B_UNSUPPORTED;
5415 
5416 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5417 	}
5418 
5419 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5420 }
5421 
5422 
5423 /** Validate that a memory range is either fully in kernel space, or fully in
5424  *  userspace */
5425 static inline bool
5426 validate_memory_range(const void* addr, size_t size)
5427 {
5428 	addr_t address = (addr_t)addr;
5429 
5430 	// Check for overflows on all addresses.
5431 	if ((address + size) < address)
5432 		return false;
5433 
5434 	// Validate that the address range does not cross the kernel/user boundary.
5435 	return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1);
5436 }
5437 
5438 
5439 /** Validate that a memory range is fully in userspace. */
5440 static inline bool
5441 validate_user_memory_range(const void* addr, size_t size)
5442 {
5443 	addr_t address = (addr_t)addr;
5444 
5445 	// Check for overflows on all addresses.
5446 	if ((address + size) < address)
5447 		return false;
5448 
5449 	// Validate that both the start and end address are in userspace
5450 	return IS_USER_ADDRESS(address) && IS_USER_ADDRESS(address + size - 1);
5451 }
5452 
5453 
5454 //	#pragma mark - kernel public API
5455 
5456 
5457 status_t
5458 user_memcpy(void* to, const void* from, size_t size)
5459 {
5460 	if (!validate_memory_range(to, size) || !validate_memory_range(from, size))
5461 		return B_BAD_ADDRESS;
5462 
5463 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5464 		return B_BAD_ADDRESS;
5465 
5466 	return B_OK;
5467 }
5468 
5469 
5470 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5471 	the string in \a to, NULL-terminating the result.
5472 
5473 	\param to Pointer to the destination C-string.
5474 	\param from Pointer to the source C-string.
5475 	\param size Size in bytes of the string buffer pointed to by \a to.
5476 
5477 	\return strlen(\a from).
5478 */
5479 ssize_t
5480 user_strlcpy(char* to, const char* from, size_t size)
5481 {
5482 	if (to == NULL && size != 0)
5483 		return B_BAD_VALUE;
5484 	if (from == NULL)
5485 		return B_BAD_ADDRESS;
5486 
5487 	// Protect the source address from overflows.
5488 	size_t maxSize = size;
5489 	if ((addr_t)from + maxSize < (addr_t)from)
5490 		maxSize -= (addr_t)from + maxSize;
5491 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5492 		maxSize = USER_TOP - (addr_t)from;
5493 
5494 	if (!validate_memory_range(to, maxSize))
5495 		return B_BAD_ADDRESS;
5496 
5497 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5498 	if (result < 0)
5499 		return result;
5500 
5501 	// If we hit the address overflow boundary, fail.
5502 	if ((size_t)result >= maxSize && maxSize < size)
5503 		return B_BAD_ADDRESS;
5504 
5505 	return result;
5506 }
5507 
5508 
5509 status_t
5510 user_memset(void* s, char c, size_t count)
5511 {
5512 	if (!validate_memory_range(s, count))
5513 		return B_BAD_ADDRESS;
5514 
5515 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5516 		return B_BAD_ADDRESS;
5517 
5518 	return B_OK;
5519 }
5520 
5521 
5522 /*!	Wires a single page at the given address.
5523 
5524 	\param team The team whose address space the address belongs to. Supports
5525 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5526 		parameter is ignored.
5527 	\param address address The virtual address to wire down. Does not need to
5528 		be page aligned.
5529 	\param writable If \c true the page shall be writable.
5530 	\param info On success the info is filled in, among other things
5531 		containing the physical address the given virtual one translates to.
5532 	\return \c B_OK, when the page could be wired, another error code otherwise.
5533 */
5534 status_t
5535 vm_wire_page(team_id team, addr_t address, bool writable,
5536 	VMPageWiringInfo* info)
5537 {
5538 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5539 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5540 
5541 	// compute the page protection that is required
5542 	bool isUser = IS_USER_ADDRESS(address);
5543 	uint32 requiredProtection = PAGE_PRESENT
5544 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5545 	if (writable)
5546 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5547 
5548 	// get and read lock the address space
5549 	VMAddressSpace* addressSpace = NULL;
5550 	if (isUser) {
5551 		if (team == B_CURRENT_TEAM)
5552 			addressSpace = VMAddressSpace::GetCurrent();
5553 		else
5554 			addressSpace = VMAddressSpace::Get(team);
5555 	} else
5556 		addressSpace = VMAddressSpace::GetKernel();
5557 	if (addressSpace == NULL)
5558 		return B_ERROR;
5559 
5560 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5561 
5562 	VMTranslationMap* map = addressSpace->TranslationMap();
5563 	status_t error = B_OK;
5564 
5565 	// get the area
5566 	VMArea* area = addressSpace->LookupArea(pageAddress);
5567 	if (area == NULL) {
5568 		addressSpace->Put();
5569 		return B_BAD_ADDRESS;
5570 	}
5571 
5572 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5573 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5574 
5575 	// mark the area range wired
5576 	area->Wire(&info->range);
5577 
5578 	// Lock the area's cache chain and the translation map. Needed to look
5579 	// up the page and play with its wired count.
5580 	cacheChainLocker.LockAllSourceCaches();
5581 	map->Lock();
5582 
5583 	phys_addr_t physicalAddress;
5584 	uint32 flags;
5585 	vm_page* page;
5586 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5587 		&& (flags & requiredProtection) == requiredProtection
5588 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5589 			!= NULL) {
5590 		// Already mapped with the correct permissions -- just increment
5591 		// the page's wired count.
5592 		increment_page_wired_count(page);
5593 
5594 		map->Unlock();
5595 		cacheChainLocker.Unlock();
5596 		addressSpaceLocker.Unlock();
5597 	} else {
5598 		// Let vm_soft_fault() map the page for us, if possible. We need
5599 		// to fully unlock to avoid deadlocks. Since we have already
5600 		// wired the area itself, nothing disturbing will happen with it
5601 		// in the meantime.
5602 		map->Unlock();
5603 		cacheChainLocker.Unlock();
5604 		addressSpaceLocker.Unlock();
5605 
5606 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5607 			isUser, &page);
5608 
5609 		if (error != B_OK) {
5610 			// The page could not be mapped -- clean up.
5611 			VMCache* cache = vm_area_get_locked_cache(area);
5612 			area->Unwire(&info->range);
5613 			cache->ReleaseRefAndUnlock();
5614 			addressSpace->Put();
5615 			return error;
5616 		}
5617 	}
5618 
5619 	info->physicalAddress
5620 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5621 			+ address % B_PAGE_SIZE;
5622 	info->page = page;
5623 
5624 	return B_OK;
5625 }
5626 
5627 
5628 /*!	Unwires a single page previously wired via vm_wire_page().
5629 
5630 	\param info The same object passed to vm_wire_page() before.
5631 */
5632 void
5633 vm_unwire_page(VMPageWiringInfo* info)
5634 {
5635 	// lock the address space
5636 	VMArea* area = info->range.area;
5637 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5638 		// takes over our reference
5639 
5640 	// lock the top cache
5641 	VMCache* cache = vm_area_get_locked_cache(area);
5642 	VMCacheChainLocker cacheChainLocker(cache);
5643 
5644 	if (info->page->Cache() != cache) {
5645 		// The page is not in the top cache, so we lock the whole cache chain
5646 		// before touching the page's wired count.
5647 		cacheChainLocker.LockAllSourceCaches();
5648 	}
5649 
5650 	decrement_page_wired_count(info->page);
5651 
5652 	// remove the wired range from the range
5653 	area->Unwire(&info->range);
5654 
5655 	cacheChainLocker.Unlock();
5656 }
5657 
5658 
5659 /*!	Wires down the given address range in the specified team's address space.
5660 
5661 	If successful the function
5662 	- acquires a reference to the specified team's address space,
5663 	- adds respective wired ranges to all areas that intersect with the given
5664 	  address range,
5665 	- makes sure all pages in the given address range are mapped with the
5666 	  requested access permissions and increments their wired count.
5667 
5668 	It fails, when \a team doesn't specify a valid address space, when any part
5669 	of the specified address range is not covered by areas, when the concerned
5670 	areas don't allow mapping with the requested permissions, or when mapping
5671 	failed for another reason.
5672 
5673 	When successful the call must be balanced by a unlock_memory_etc() call with
5674 	the exact same parameters.
5675 
5676 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5677 		supported.
5678 	\param address The start of the address range to be wired.
5679 	\param numBytes The size of the address range to be wired.
5680 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5681 		requests that the range must be wired writable ("read from device
5682 		into memory").
5683 	\return \c B_OK on success, another error code otherwise.
5684 */
5685 status_t
5686 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5687 {
5688 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5689 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5690 
5691 	// compute the page protection that is required
5692 	bool isUser = IS_USER_ADDRESS(address);
5693 	bool writable = (flags & B_READ_DEVICE) == 0;
5694 	uint32 requiredProtection = PAGE_PRESENT
5695 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5696 	if (writable)
5697 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5698 
5699 	uint32 mallocFlags = isUser
5700 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5701 
5702 	// get and read lock the address space
5703 	VMAddressSpace* addressSpace = NULL;
5704 	if (isUser) {
5705 		if (team == B_CURRENT_TEAM)
5706 			addressSpace = VMAddressSpace::GetCurrent();
5707 		else
5708 			addressSpace = VMAddressSpace::Get(team);
5709 	} else
5710 		addressSpace = VMAddressSpace::GetKernel();
5711 	if (addressSpace == NULL)
5712 		return B_ERROR;
5713 
5714 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5715 		// We get a new address space reference here. The one we got above will
5716 		// be freed by unlock_memory_etc().
5717 
5718 	VMTranslationMap* map = addressSpace->TranslationMap();
5719 	status_t error = B_OK;
5720 
5721 	// iterate through all concerned areas
5722 	addr_t nextAddress = lockBaseAddress;
5723 	while (nextAddress != lockEndAddress) {
5724 		// get the next area
5725 		VMArea* area = addressSpace->LookupArea(nextAddress);
5726 		if (area == NULL) {
5727 			error = B_BAD_ADDRESS;
5728 			break;
5729 		}
5730 
5731 		addr_t areaStart = nextAddress;
5732 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5733 
5734 		// allocate the wired range (do that before locking the cache to avoid
5735 		// deadlocks)
5736 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5737 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5738 		if (range == NULL) {
5739 			error = B_NO_MEMORY;
5740 			break;
5741 		}
5742 
5743 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5744 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5745 
5746 		// mark the area range wired
5747 		area->Wire(range);
5748 
5749 		// Depending on the area cache type and the wiring, we may not need to
5750 		// look at the individual pages.
5751 		if (area->cache_type == CACHE_TYPE_NULL
5752 			|| area->cache_type == CACHE_TYPE_DEVICE
5753 			|| area->wiring == B_FULL_LOCK
5754 			|| area->wiring == B_CONTIGUOUS) {
5755 			nextAddress = areaEnd;
5756 			continue;
5757 		}
5758 
5759 		// Lock the area's cache chain and the translation map. Needed to look
5760 		// up pages and play with their wired count.
5761 		cacheChainLocker.LockAllSourceCaches();
5762 		map->Lock();
5763 
5764 		// iterate through the pages and wire them
5765 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5766 			phys_addr_t physicalAddress;
5767 			uint32 flags;
5768 
5769 			vm_page* page;
5770 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5771 				&& (flags & requiredProtection) == requiredProtection
5772 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5773 					!= NULL) {
5774 				// Already mapped with the correct permissions -- just increment
5775 				// the page's wired count.
5776 				increment_page_wired_count(page);
5777 			} else {
5778 				// Let vm_soft_fault() map the page for us, if possible. We need
5779 				// to fully unlock to avoid deadlocks. Since we have already
5780 				// wired the area itself, nothing disturbing will happen with it
5781 				// in the meantime.
5782 				map->Unlock();
5783 				cacheChainLocker.Unlock();
5784 				addressSpaceLocker.Unlock();
5785 
5786 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5787 					false, isUser, &page);
5788 
5789 				addressSpaceLocker.Lock();
5790 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5791 				cacheChainLocker.LockAllSourceCaches();
5792 				map->Lock();
5793 			}
5794 
5795 			if (error != B_OK)
5796 				break;
5797 		}
5798 
5799 		map->Unlock();
5800 
5801 		if (error == B_OK) {
5802 			cacheChainLocker.Unlock();
5803 		} else {
5804 			// An error occurred, so abort right here. If the current address
5805 			// is the first in this area, unwire the area, since we won't get
5806 			// to it when reverting what we've done so far.
5807 			if (nextAddress == areaStart) {
5808 				area->Unwire(range);
5809 				cacheChainLocker.Unlock();
5810 				range->~VMAreaWiredRange();
5811 				free_etc(range, mallocFlags);
5812 			} else
5813 				cacheChainLocker.Unlock();
5814 
5815 			break;
5816 		}
5817 	}
5818 
5819 	if (error != B_OK) {
5820 		// An error occurred, so unwire all that we've already wired. Note that
5821 		// even if not a single page was wired, unlock_memory_etc() is called
5822 		// to put the address space reference.
5823 		addressSpaceLocker.Unlock();
5824 		unlock_memory_etc(team, (void*)lockBaseAddress,
5825 			nextAddress - lockBaseAddress, flags);
5826 	}
5827 
5828 	return error;
5829 }
5830 
5831 
5832 status_t
5833 lock_memory(void* address, size_t numBytes, uint32 flags)
5834 {
5835 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5836 }
5837 
5838 
5839 /*!	Unwires an address range previously wired with lock_memory_etc().
5840 
5841 	Note that a call to this function must balance a previous lock_memory_etc()
5842 	call with exactly the same parameters.
5843 */
5844 status_t
5845 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5846 {
5847 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5848 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5849 
5850 	// compute the page protection that is required
5851 	bool isUser = IS_USER_ADDRESS(address);
5852 	bool writable = (flags & B_READ_DEVICE) == 0;
5853 	uint32 requiredProtection = PAGE_PRESENT
5854 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5855 	if (writable)
5856 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5857 
5858 	uint32 mallocFlags = isUser
5859 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5860 
5861 	// get and read lock the address space
5862 	VMAddressSpace* addressSpace = NULL;
5863 	if (isUser) {
5864 		if (team == B_CURRENT_TEAM)
5865 			addressSpace = VMAddressSpace::GetCurrent();
5866 		else
5867 			addressSpace = VMAddressSpace::Get(team);
5868 	} else
5869 		addressSpace = VMAddressSpace::GetKernel();
5870 	if (addressSpace == NULL)
5871 		return B_ERROR;
5872 
5873 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5874 		// Take over the address space reference. We don't unlock until we're
5875 		// done.
5876 
5877 	VMTranslationMap* map = addressSpace->TranslationMap();
5878 	status_t error = B_OK;
5879 
5880 	// iterate through all concerned areas
5881 	addr_t nextAddress = lockBaseAddress;
5882 	while (nextAddress != lockEndAddress) {
5883 		// get the next area
5884 		VMArea* area = addressSpace->LookupArea(nextAddress);
5885 		if (area == NULL) {
5886 			error = B_BAD_ADDRESS;
5887 			break;
5888 		}
5889 
5890 		addr_t areaStart = nextAddress;
5891 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5892 
5893 		// Lock the area's top cache. This is a requirement for
5894 		// VMArea::Unwire().
5895 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5896 
5897 		// Depending on the area cache type and the wiring, we may not need to
5898 		// look at the individual pages.
5899 		if (area->cache_type == CACHE_TYPE_NULL
5900 			|| area->cache_type == CACHE_TYPE_DEVICE
5901 			|| area->wiring == B_FULL_LOCK
5902 			|| area->wiring == B_CONTIGUOUS) {
5903 			// unwire the range (to avoid deadlocks we delete the range after
5904 			// unlocking the cache)
5905 			nextAddress = areaEnd;
5906 			VMAreaWiredRange* range = area->Unwire(areaStart,
5907 				areaEnd - areaStart, writable);
5908 			cacheChainLocker.Unlock();
5909 			if (range != NULL) {
5910 				range->~VMAreaWiredRange();
5911 				free_etc(range, mallocFlags);
5912 			}
5913 			continue;
5914 		}
5915 
5916 		// Lock the area's cache chain and the translation map. Needed to look
5917 		// up pages and play with their wired count.
5918 		cacheChainLocker.LockAllSourceCaches();
5919 		map->Lock();
5920 
5921 		// iterate through the pages and unwire them
5922 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5923 			phys_addr_t physicalAddress;
5924 			uint32 flags;
5925 
5926 			vm_page* page;
5927 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5928 				&& (flags & PAGE_PRESENT) != 0
5929 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5930 					!= NULL) {
5931 				// Already mapped with the correct permissions -- just increment
5932 				// the page's wired count.
5933 				decrement_page_wired_count(page);
5934 			} else {
5935 				panic("unlock_memory_etc(): Failed to unwire page: address "
5936 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5937 					nextAddress);
5938 				error = B_BAD_VALUE;
5939 				break;
5940 			}
5941 		}
5942 
5943 		map->Unlock();
5944 
5945 		// All pages are unwired. Remove the area's wired range as well (to
5946 		// avoid deadlocks we delete the range after unlocking the cache).
5947 		VMAreaWiredRange* range = area->Unwire(areaStart,
5948 			areaEnd - areaStart, writable);
5949 
5950 		cacheChainLocker.Unlock();
5951 
5952 		if (range != NULL) {
5953 			range->~VMAreaWiredRange();
5954 			free_etc(range, mallocFlags);
5955 		}
5956 
5957 		if (error != B_OK)
5958 			break;
5959 	}
5960 
5961 	// get rid of the address space reference lock_memory_etc() acquired
5962 	addressSpace->Put();
5963 
5964 	return error;
5965 }
5966 
5967 
5968 status_t
5969 unlock_memory(void* address, size_t numBytes, uint32 flags)
5970 {
5971 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5972 }
5973 
5974 
5975 /*!	Similar to get_memory_map(), but also allows to specify the address space
5976 	for the memory in question and has a saner semantics.
5977 	Returns \c B_OK when the complete range could be translated or
5978 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5979 	case the actual number of entries is written to \c *_numEntries. Any other
5980 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5981 	in this case.
5982 */
5983 status_t
5984 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5985 	physical_entry* table, uint32* _numEntries)
5986 {
5987 	uint32 numEntries = *_numEntries;
5988 	*_numEntries = 0;
5989 
5990 	VMAddressSpace* addressSpace;
5991 	addr_t virtualAddress = (addr_t)address;
5992 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5993 	phys_addr_t physicalAddress;
5994 	status_t status = B_OK;
5995 	int32 index = -1;
5996 	addr_t offset = 0;
5997 	bool interrupts = are_interrupts_enabled();
5998 
5999 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
6000 		"entries)\n", team, address, numBytes, numEntries));
6001 
6002 	if (numEntries == 0 || numBytes == 0)
6003 		return B_BAD_VALUE;
6004 
6005 	// in which address space is the address to be found?
6006 	if (IS_USER_ADDRESS(virtualAddress)) {
6007 		if (team == B_CURRENT_TEAM)
6008 			addressSpace = VMAddressSpace::GetCurrent();
6009 		else
6010 			addressSpace = VMAddressSpace::Get(team);
6011 	} else
6012 		addressSpace = VMAddressSpace::GetKernel();
6013 
6014 	if (addressSpace == NULL)
6015 		return B_ERROR;
6016 
6017 	VMTranslationMap* map = addressSpace->TranslationMap();
6018 
6019 	if (interrupts)
6020 		map->Lock();
6021 
6022 	while (offset < numBytes) {
6023 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
6024 		uint32 flags;
6025 
6026 		if (interrupts) {
6027 			status = map->Query((addr_t)address + offset, &physicalAddress,
6028 				&flags);
6029 		} else {
6030 			status = map->QueryInterrupt((addr_t)address + offset,
6031 				&physicalAddress, &flags);
6032 		}
6033 		if (status < B_OK)
6034 			break;
6035 		if ((flags & PAGE_PRESENT) == 0) {
6036 			panic("get_memory_map() called on unmapped memory!");
6037 			return B_BAD_ADDRESS;
6038 		}
6039 
6040 		if (index < 0 && pageOffset > 0) {
6041 			physicalAddress += pageOffset;
6042 			if (bytes > B_PAGE_SIZE - pageOffset)
6043 				bytes = B_PAGE_SIZE - pageOffset;
6044 		}
6045 
6046 		// need to switch to the next physical_entry?
6047 		if (index < 0 || table[index].address
6048 				!= physicalAddress - table[index].size) {
6049 			if ((uint32)++index + 1 > numEntries) {
6050 				// table to small
6051 				break;
6052 			}
6053 			table[index].address = physicalAddress;
6054 			table[index].size = bytes;
6055 		} else {
6056 			// page does fit in current entry
6057 			table[index].size += bytes;
6058 		}
6059 
6060 		offset += bytes;
6061 	}
6062 
6063 	if (interrupts)
6064 		map->Unlock();
6065 
6066 	if (status != B_OK)
6067 		return status;
6068 
6069 	if ((uint32)index + 1 > numEntries) {
6070 		*_numEntries = index;
6071 		return B_BUFFER_OVERFLOW;
6072 	}
6073 
6074 	*_numEntries = index + 1;
6075 	return B_OK;
6076 }
6077 
6078 
6079 /*!	According to the BeBook, this function should always succeed.
6080 	This is no longer the case.
6081 */
6082 extern "C" int32
6083 __get_memory_map_haiku(const void* address, size_t numBytes,
6084 	physical_entry* table, int32 numEntries)
6085 {
6086 	uint32 entriesRead = numEntries;
6087 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
6088 		table, &entriesRead);
6089 	if (error != B_OK)
6090 		return error;
6091 
6092 	// close the entry list
6093 
6094 	// if it's only one entry, we will silently accept the missing ending
6095 	if (numEntries == 1)
6096 		return B_OK;
6097 
6098 	if (entriesRead + 1 > (uint32)numEntries)
6099 		return B_BUFFER_OVERFLOW;
6100 
6101 	table[entriesRead].address = 0;
6102 	table[entriesRead].size = 0;
6103 
6104 	return B_OK;
6105 }
6106 
6107 
6108 area_id
6109 area_for(void* address)
6110 {
6111 	return vm_area_for((addr_t)address, true);
6112 }
6113 
6114 
6115 area_id
6116 find_area(const char* name)
6117 {
6118 	return VMAreaHash::Find(name);
6119 }
6120 
6121 
6122 status_t
6123 _get_area_info(area_id id, area_info* info, size_t size)
6124 {
6125 	if (size != sizeof(area_info) || info == NULL)
6126 		return B_BAD_VALUE;
6127 
6128 	AddressSpaceReadLocker locker;
6129 	VMArea* area;
6130 	status_t status = locker.SetFromArea(id, area);
6131 	if (status != B_OK)
6132 		return status;
6133 
6134 	fill_area_info(area, info, size);
6135 	return B_OK;
6136 }
6137 
6138 
6139 status_t
6140 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
6141 {
6142 	addr_t nextBase = *(addr_t*)cookie;
6143 
6144 	// we're already through the list
6145 	if (nextBase == (addr_t)-1)
6146 		return B_ENTRY_NOT_FOUND;
6147 
6148 	if (team == B_CURRENT_TEAM)
6149 		team = team_get_current_team_id();
6150 
6151 	AddressSpaceReadLocker locker(team);
6152 	if (!locker.IsLocked())
6153 		return B_BAD_TEAM_ID;
6154 
6155 	VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
6156 	if (area == NULL) {
6157 		nextBase = (addr_t)-1;
6158 		return B_ENTRY_NOT_FOUND;
6159 	}
6160 
6161 	fill_area_info(area, info, size);
6162 	*cookie = (ssize_t)(area->Base() + 1);
6163 
6164 	return B_OK;
6165 }
6166 
6167 
6168 status_t
6169 set_area_protection(area_id area, uint32 newProtection)
6170 {
6171 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6172 		newProtection, true);
6173 }
6174 
6175 
6176 status_t
6177 resize_area(area_id areaID, size_t newSize)
6178 {
6179 	return vm_resize_area(areaID, newSize, true);
6180 }
6181 
6182 
6183 /*!	Transfers the specified area to a new team. The caller must be the owner
6184 	of the area.
6185 */
6186 area_id
6187 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6188 	bool kernel)
6189 {
6190 	area_info info;
6191 	status_t status = get_area_info(id, &info);
6192 	if (status != B_OK)
6193 		return status;
6194 
6195 	if (info.team != thread_get_current_thread()->team->id)
6196 		return B_PERMISSION_DENIED;
6197 
6198 	// We need to mark the area cloneable so the following operations work.
6199 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
6200 	if (status != B_OK)
6201 		return status;
6202 
6203 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6204 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6205 	if (clonedArea < 0)
6206 		return clonedArea;
6207 
6208 	status = vm_delete_area(info.team, id, kernel);
6209 	if (status != B_OK) {
6210 		vm_delete_area(target, clonedArea, kernel);
6211 		return status;
6212 	}
6213 
6214 	// Now we can reset the protection to whatever it was before.
6215 	set_area_protection(clonedArea, info.protection);
6216 
6217 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6218 
6219 	return clonedArea;
6220 }
6221 
6222 
6223 extern "C" area_id
6224 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6225 	size_t numBytes, uint32 addressSpec, uint32 protection,
6226 	void** _virtualAddress)
6227 {
6228 	if (!arch_vm_supports_protection(protection))
6229 		return B_NOT_SUPPORTED;
6230 
6231 	fix_protection(&protection);
6232 
6233 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6234 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6235 		false);
6236 }
6237 
6238 
6239 area_id
6240 clone_area(const char* name, void** _address, uint32 addressSpec,
6241 	uint32 protection, area_id source)
6242 {
6243 	if ((protection & B_KERNEL_PROTECTION) == 0)
6244 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6245 
6246 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6247 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6248 }
6249 
6250 
6251 area_id
6252 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6253 	uint32 protection, uint32 flags, uint32 guardSize,
6254 	const virtual_address_restrictions* virtualAddressRestrictions,
6255 	const physical_address_restrictions* physicalAddressRestrictions,
6256 	void** _address)
6257 {
6258 	fix_protection(&protection);
6259 
6260 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6261 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6262 		true, _address);
6263 }
6264 
6265 
6266 extern "C" area_id
6267 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6268 	size_t size, uint32 lock, uint32 protection)
6269 {
6270 	fix_protection(&protection);
6271 
6272 	virtual_address_restrictions virtualRestrictions = {};
6273 	virtualRestrictions.address = *_address;
6274 	virtualRestrictions.address_specification = addressSpec;
6275 	physical_address_restrictions physicalRestrictions = {};
6276 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6277 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6278 		true, _address);
6279 }
6280 
6281 
6282 status_t
6283 delete_area(area_id area)
6284 {
6285 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6286 }
6287 
6288 
6289 //	#pragma mark - Userland syscalls
6290 
6291 
6292 status_t
6293 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6294 	addr_t size)
6295 {
6296 	// filter out some unavailable values (for userland)
6297 	switch (addressSpec) {
6298 		case B_ANY_KERNEL_ADDRESS:
6299 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6300 			return B_BAD_VALUE;
6301 	}
6302 
6303 	addr_t address;
6304 
6305 	if (!IS_USER_ADDRESS(userAddress)
6306 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6307 		return B_BAD_ADDRESS;
6308 
6309 	status_t status = vm_reserve_address_range(
6310 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6311 		RESERVED_AVOID_BASE);
6312 	if (status != B_OK)
6313 		return status;
6314 
6315 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6316 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6317 			(void*)address, size);
6318 		return B_BAD_ADDRESS;
6319 	}
6320 
6321 	return B_OK;
6322 }
6323 
6324 
6325 status_t
6326 _user_unreserve_address_range(addr_t address, addr_t size)
6327 {
6328 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6329 		(void*)address, size);
6330 }
6331 
6332 
6333 area_id
6334 _user_area_for(void* address)
6335 {
6336 	return vm_area_for((addr_t)address, false);
6337 }
6338 
6339 
6340 area_id
6341 _user_find_area(const char* userName)
6342 {
6343 	char name[B_OS_NAME_LENGTH];
6344 
6345 	if (!IS_USER_ADDRESS(userName)
6346 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6347 		return B_BAD_ADDRESS;
6348 
6349 	return find_area(name);
6350 }
6351 
6352 
6353 status_t
6354 _user_get_area_info(area_id area, area_info* userInfo)
6355 {
6356 	if (!IS_USER_ADDRESS(userInfo))
6357 		return B_BAD_ADDRESS;
6358 
6359 	area_info info;
6360 	status_t status = get_area_info(area, &info);
6361 	if (status < B_OK)
6362 		return status;
6363 
6364 	// TODO: do we want to prevent userland from seeing kernel protections?
6365 	//info.protection &= B_USER_PROTECTION;
6366 
6367 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6368 		return B_BAD_ADDRESS;
6369 
6370 	return status;
6371 }
6372 
6373 
6374 status_t
6375 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6376 {
6377 	ssize_t cookie;
6378 
6379 	if (!IS_USER_ADDRESS(userCookie)
6380 		|| !IS_USER_ADDRESS(userInfo)
6381 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6382 		return B_BAD_ADDRESS;
6383 
6384 	area_info info;
6385 	status_t status = _get_next_area_info(team, &cookie, &info,
6386 		sizeof(area_info));
6387 	if (status != B_OK)
6388 		return status;
6389 
6390 	//info.protection &= B_USER_PROTECTION;
6391 
6392 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6393 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6394 		return B_BAD_ADDRESS;
6395 
6396 	return status;
6397 }
6398 
6399 
6400 status_t
6401 _user_set_area_protection(area_id area, uint32 newProtection)
6402 {
6403 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6404 		return B_BAD_VALUE;
6405 
6406 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6407 		newProtection, false);
6408 }
6409 
6410 
6411 status_t
6412 _user_resize_area(area_id area, size_t newSize)
6413 {
6414 	// TODO: Since we restrict deleting of areas to those owned by the team,
6415 	// we should also do that for resizing (check other functions, too).
6416 	return vm_resize_area(area, newSize, false);
6417 }
6418 
6419 
6420 area_id
6421 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6422 	team_id target)
6423 {
6424 	// filter out some unavailable values (for userland)
6425 	switch (addressSpec) {
6426 		case B_ANY_KERNEL_ADDRESS:
6427 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6428 			return B_BAD_VALUE;
6429 	}
6430 
6431 	void* address;
6432 	if (!IS_USER_ADDRESS(userAddress)
6433 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6434 		return B_BAD_ADDRESS;
6435 
6436 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6437 	if (newArea < B_OK)
6438 		return newArea;
6439 
6440 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6441 		return B_BAD_ADDRESS;
6442 
6443 	return newArea;
6444 }
6445 
6446 
6447 area_id
6448 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6449 	uint32 protection, area_id sourceArea)
6450 {
6451 	char name[B_OS_NAME_LENGTH];
6452 	void* address;
6453 
6454 	// filter out some unavailable values (for userland)
6455 	switch (addressSpec) {
6456 		case B_ANY_KERNEL_ADDRESS:
6457 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6458 			return B_BAD_VALUE;
6459 	}
6460 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6461 		return B_BAD_VALUE;
6462 
6463 	if (!IS_USER_ADDRESS(userName)
6464 		|| !IS_USER_ADDRESS(userAddress)
6465 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6466 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6467 		return B_BAD_ADDRESS;
6468 
6469 	fix_protection(&protection);
6470 
6471 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6472 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6473 		false);
6474 	if (clonedArea < B_OK)
6475 		return clonedArea;
6476 
6477 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6478 		delete_area(clonedArea);
6479 		return B_BAD_ADDRESS;
6480 	}
6481 
6482 	return clonedArea;
6483 }
6484 
6485 
6486 area_id
6487 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6488 	size_t size, uint32 lock, uint32 protection)
6489 {
6490 	char name[B_OS_NAME_LENGTH];
6491 	void* address;
6492 
6493 	// filter out some unavailable values (for userland)
6494 	switch (addressSpec) {
6495 		case B_ANY_KERNEL_ADDRESS:
6496 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6497 			return B_BAD_VALUE;
6498 	}
6499 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6500 		return B_BAD_VALUE;
6501 
6502 	if (!IS_USER_ADDRESS(userName)
6503 		|| !IS_USER_ADDRESS(userAddress)
6504 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6505 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6506 		return B_BAD_ADDRESS;
6507 
6508 	if (addressSpec == B_EXACT_ADDRESS
6509 		&& IS_KERNEL_ADDRESS(address))
6510 		return B_BAD_VALUE;
6511 
6512 	if (addressSpec == B_ANY_ADDRESS)
6513 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6514 	if (addressSpec == B_BASE_ADDRESS)
6515 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6516 
6517 	fix_protection(&protection);
6518 
6519 	virtual_address_restrictions virtualRestrictions = {};
6520 	virtualRestrictions.address = address;
6521 	virtualRestrictions.address_specification = addressSpec;
6522 	physical_address_restrictions physicalRestrictions = {};
6523 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6524 		size, lock, protection, 0, 0, &virtualRestrictions,
6525 		&physicalRestrictions, false, &address);
6526 
6527 	if (area >= B_OK
6528 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6529 		delete_area(area);
6530 		return B_BAD_ADDRESS;
6531 	}
6532 
6533 	return area;
6534 }
6535 
6536 
6537 status_t
6538 _user_delete_area(area_id area)
6539 {
6540 	// Unlike the BeOS implementation, you can now only delete areas
6541 	// that you have created yourself from userland.
6542 	// The documentation to delete_area() explicitly states that this
6543 	// will be restricted in the future, and so it will.
6544 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6545 }
6546 
6547 
6548 // TODO: create a BeOS style call for this!
6549 
6550 area_id
6551 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6552 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6553 	int fd, off_t offset)
6554 {
6555 	char name[B_OS_NAME_LENGTH];
6556 	void* address;
6557 	area_id area;
6558 
6559 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6560 		return B_BAD_VALUE;
6561 
6562 	fix_protection(&protection);
6563 
6564 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6565 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6566 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6567 		return B_BAD_ADDRESS;
6568 
6569 	if (addressSpec == B_EXACT_ADDRESS) {
6570 		if ((addr_t)address + size < (addr_t)address
6571 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6572 			return B_BAD_VALUE;
6573 		}
6574 		if (!IS_USER_ADDRESS(address)
6575 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6576 			return B_BAD_ADDRESS;
6577 		}
6578 	}
6579 
6580 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6581 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6582 		false);
6583 	if (area < B_OK)
6584 		return area;
6585 
6586 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6587 		return B_BAD_ADDRESS;
6588 
6589 	return area;
6590 }
6591 
6592 
6593 status_t
6594 _user_unmap_memory(void* _address, size_t size)
6595 {
6596 	addr_t address = (addr_t)_address;
6597 
6598 	// check params
6599 	if (size == 0 || (addr_t)address + size < (addr_t)address
6600 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6601 		return B_BAD_VALUE;
6602 	}
6603 
6604 	if (!IS_USER_ADDRESS(address)
6605 		|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6606 		return B_BAD_ADDRESS;
6607 	}
6608 
6609 	// Write lock the address space and ensure the address range is not wired.
6610 	AddressSpaceWriteLocker locker;
6611 	do {
6612 		status_t status = locker.SetTo(team_get_current_team_id());
6613 		if (status != B_OK)
6614 			return status;
6615 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6616 			size, &locker));
6617 
6618 	// unmap
6619 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6620 }
6621 
6622 
6623 status_t
6624 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6625 {
6626 	// check address range
6627 	addr_t address = (addr_t)_address;
6628 	size = PAGE_ALIGN(size);
6629 
6630 	if ((address % B_PAGE_SIZE) != 0)
6631 		return B_BAD_VALUE;
6632 	if (!validate_user_memory_range(_address, size)) {
6633 		// weird error code required by POSIX
6634 		return ENOMEM;
6635 	}
6636 
6637 	// extend and check protection
6638 	if ((protection & ~B_USER_PROTECTION) != 0)
6639 		return B_BAD_VALUE;
6640 
6641 	fix_protection(&protection);
6642 
6643 	// We need to write lock the address space, since we're going to play with
6644 	// the areas. Also make sure that none of the areas is wired and that we're
6645 	// actually allowed to change the protection.
6646 	AddressSpaceWriteLocker locker;
6647 
6648 	bool restart;
6649 	do {
6650 		restart = false;
6651 
6652 		status_t status = locker.SetTo(team_get_current_team_id());
6653 		if (status != B_OK)
6654 			return status;
6655 
6656 		// First round: Check whether the whole range is covered by areas and we
6657 		// are allowed to modify them.
6658 		addr_t currentAddress = address;
6659 		size_t sizeLeft = size;
6660 		while (sizeLeft > 0) {
6661 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6662 			if (area == NULL)
6663 				return B_NO_MEMORY;
6664 
6665 			if ((area->protection & B_KERNEL_AREA) != 0)
6666 				return B_NOT_ALLOWED;
6667 			if (area->protection_max != 0
6668 				&& (protection & area->protection_max) != protection) {
6669 				return B_NOT_ALLOWED;
6670 			}
6671 
6672 			addr_t offset = currentAddress - area->Base();
6673 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6674 
6675 			AreaCacheLocker cacheLocker(area);
6676 
6677 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6678 					&locker, &cacheLocker)) {
6679 				restart = true;
6680 				break;
6681 			}
6682 
6683 			cacheLocker.Unlock();
6684 
6685 			currentAddress += rangeSize;
6686 			sizeLeft -= rangeSize;
6687 		}
6688 	} while (restart);
6689 
6690 	// Second round: If the protections differ from that of the area, create a
6691 	// page protection array and re-map mapped pages.
6692 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6693 	addr_t currentAddress = address;
6694 	size_t sizeLeft = size;
6695 	while (sizeLeft > 0) {
6696 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6697 		if (area == NULL)
6698 			return B_NO_MEMORY;
6699 
6700 		addr_t offset = currentAddress - area->Base();
6701 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6702 
6703 		currentAddress += rangeSize;
6704 		sizeLeft -= rangeSize;
6705 
6706 		if (area->page_protections == NULL) {
6707 			if (area->protection == protection)
6708 				continue;
6709 			if (offset == 0 && rangeSize == area->Size()) {
6710 				status_t status = vm_set_area_protection(area->address_space->ID(),
6711 					area->id, protection, false);
6712 				if (status != B_OK)
6713 					return status;
6714 				continue;
6715 			}
6716 
6717 			status_t status = allocate_area_page_protections(area);
6718 			if (status != B_OK)
6719 				return status;
6720 		}
6721 
6722 		// We need to lock the complete cache chain, since we potentially unmap
6723 		// pages of lower caches.
6724 		VMCache* topCache = vm_area_get_locked_cache(area);
6725 		VMCacheChainLocker cacheChainLocker(topCache);
6726 		cacheChainLocker.LockAllSourceCaches();
6727 
6728 		for (addr_t pageAddress = area->Base() + offset;
6729 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6730 			map->Lock();
6731 
6732 			set_area_page_protection(area, pageAddress, protection);
6733 
6734 			phys_addr_t physicalAddress;
6735 			uint32 flags;
6736 
6737 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6738 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6739 				map->Unlock();
6740 				continue;
6741 			}
6742 
6743 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6744 			if (page == NULL) {
6745 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6746 					"\n", area, physicalAddress);
6747 				map->Unlock();
6748 				return B_ERROR;
6749 			}
6750 
6751 			// If the page is not in the topmost cache and write access is
6752 			// requested, we have to unmap it. Otherwise we can re-map it with
6753 			// the new protection.
6754 			bool unmapPage = page->Cache() != topCache
6755 				&& (protection & B_WRITE_AREA) != 0;
6756 
6757 			if (!unmapPage)
6758 				map->ProtectPage(area, pageAddress, protection);
6759 
6760 			map->Unlock();
6761 
6762 			if (unmapPage) {
6763 				DEBUG_PAGE_ACCESS_START(page);
6764 				unmap_page(area, pageAddress);
6765 				DEBUG_PAGE_ACCESS_END(page);
6766 			}
6767 		}
6768 	}
6769 
6770 	return B_OK;
6771 }
6772 
6773 
6774 status_t
6775 _user_sync_memory(void* _address, size_t size, uint32 flags)
6776 {
6777 	addr_t address = (addr_t)_address;
6778 	size = PAGE_ALIGN(size);
6779 
6780 	// check params
6781 	if ((address % B_PAGE_SIZE) != 0)
6782 		return B_BAD_VALUE;
6783 	if (!validate_user_memory_range(_address, size)) {
6784 		// weird error code required by POSIX
6785 		return ENOMEM;
6786 	}
6787 
6788 	bool writeSync = (flags & MS_SYNC) != 0;
6789 	bool writeAsync = (flags & MS_ASYNC) != 0;
6790 	if (writeSync && writeAsync)
6791 		return B_BAD_VALUE;
6792 
6793 	if (size == 0 || (!writeSync && !writeAsync))
6794 		return B_OK;
6795 
6796 	// iterate through the range and sync all concerned areas
6797 	while (size > 0) {
6798 		// read lock the address space
6799 		AddressSpaceReadLocker locker;
6800 		status_t error = locker.SetTo(team_get_current_team_id());
6801 		if (error != B_OK)
6802 			return error;
6803 
6804 		// get the first area
6805 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6806 		if (area == NULL)
6807 			return B_NO_MEMORY;
6808 
6809 		uint32 offset = address - area->Base();
6810 		size_t rangeSize = min_c(area->Size() - offset, size);
6811 		offset += area->cache_offset;
6812 
6813 		// lock the cache
6814 		AreaCacheLocker cacheLocker(area);
6815 		if (!cacheLocker)
6816 			return B_BAD_VALUE;
6817 		VMCache* cache = area->cache;
6818 
6819 		locker.Unlock();
6820 
6821 		uint32 firstPage = offset >> PAGE_SHIFT;
6822 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6823 
6824 		// write the pages
6825 		if (cache->type == CACHE_TYPE_VNODE) {
6826 			if (writeSync) {
6827 				// synchronous
6828 				error = vm_page_write_modified_page_range(cache, firstPage,
6829 					endPage);
6830 				if (error != B_OK)
6831 					return error;
6832 			} else {
6833 				// asynchronous
6834 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6835 				// TODO: This is probably not quite what is supposed to happen.
6836 				// Especially when a lot has to be written, it might take ages
6837 				// until it really hits the disk.
6838 			}
6839 		}
6840 
6841 		address += rangeSize;
6842 		size -= rangeSize;
6843 	}
6844 
6845 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6846 	// synchronize multiple mappings of the same file. In our VM they never get
6847 	// out of sync, though, so we don't have to do anything.
6848 
6849 	return B_OK;
6850 }
6851 
6852 
6853 status_t
6854 _user_memory_advice(void* _address, size_t size, uint32 advice)
6855 {
6856 	addr_t address = (addr_t)_address;
6857 	if ((address % B_PAGE_SIZE) != 0)
6858 		return B_BAD_VALUE;
6859 
6860 	size = PAGE_ALIGN(size);
6861 	if (!validate_user_memory_range(_address, size)) {
6862 		// weird error code required by POSIX
6863 		return B_NO_MEMORY;
6864 	}
6865 
6866 	switch (advice) {
6867 		case MADV_NORMAL:
6868 		case MADV_SEQUENTIAL:
6869 		case MADV_RANDOM:
6870 		case MADV_WILLNEED:
6871 		case MADV_DONTNEED:
6872 			// TODO: Implement!
6873 			break;
6874 
6875 		case MADV_FREE:
6876 		{
6877 			AddressSpaceWriteLocker locker;
6878 			do {
6879 				status_t status = locker.SetTo(team_get_current_team_id());
6880 				if (status != B_OK)
6881 					return status;
6882 			} while (wait_if_address_range_is_wired(locker.AddressSpace(),
6883 					address, size, &locker));
6884 
6885 			discard_address_range(locker.AddressSpace(), address, size, false);
6886 			break;
6887 		}
6888 
6889 		default:
6890 			return B_BAD_VALUE;
6891 	}
6892 
6893 	return B_OK;
6894 }
6895 
6896 
6897 status_t
6898 _user_get_memory_properties(team_id teamID, const void* address,
6899 	uint32* _protected, uint32* _lock)
6900 {
6901 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6902 		return B_BAD_ADDRESS;
6903 
6904 	AddressSpaceReadLocker locker;
6905 	status_t error = locker.SetTo(teamID);
6906 	if (error != B_OK)
6907 		return error;
6908 
6909 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6910 	if (area == NULL)
6911 		return B_NO_MEMORY;
6912 
6913 	uint32 protection = get_area_page_protection(area, (addr_t)address);
6914 	uint32 wiring = area->wiring;
6915 
6916 	locker.Unlock();
6917 
6918 	error = user_memcpy(_protected, &protection, sizeof(protection));
6919 	if (error != B_OK)
6920 		return error;
6921 
6922 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6923 
6924 	return error;
6925 }
6926 
6927 
6928 // An ordered list of non-overlapping ranges to track mlock/munlock locking.
6929 // It is allowed to call mlock/munlock in unbalanced ways (lock a range
6930 // multiple times, unlock a part of it, lock several consecutive ranges and
6931 // unlock them in one go, etc). However the low level lock_memory and
6932 // unlock_memory calls require the locks/unlocks to be balanced (you lock a
6933 // fixed range, and then unlock exactly the same range). This list allows to
6934 // keep track of what was locked exactly so we can unlock the correct things.
6935 struct LockedPages : DoublyLinkedListLinkImpl<LockedPages> {
6936 	addr_t start;
6937 	addr_t end;
6938 
6939 	status_t LockMemory()
6940 	{
6941 		return lock_memory((void*)start, end - start, 0);
6942 	}
6943 
6944 	status_t UnlockMemory()
6945 	{
6946 		return unlock_memory((void*)start, end - start, 0);
6947 	}
6948 
6949 	status_t Move(addr_t start, addr_t end)
6950 	{
6951 		status_t result = lock_memory((void*)start, end - start, 0);
6952 		if (result != B_OK)
6953 			return result;
6954 
6955 		result = UnlockMemory();
6956 
6957 		if (result != B_OK) {
6958 			// What can we do if the unlock fails?
6959 			panic("Failed to unlock memory: %s", strerror(result));
6960 			return result;
6961 		}
6962 
6963 		this->start = start;
6964 		this->end = end;
6965 
6966 		return B_OK;
6967 	}
6968 };
6969 
6970 
6971 status_t
6972 _user_mlock(const void* _address, size_t size)
6973 {
6974 	// check address range
6975 	addr_t address = (addr_t)_address;
6976 	size = PAGE_ALIGN(size);
6977 
6978 	if ((address % B_PAGE_SIZE) != 0)
6979 		return EINVAL;
6980 	if (!validate_user_memory_range(_address, size))
6981 		return EINVAL;
6982 
6983 	addr_t endAddress = address + size;
6984 
6985 	// Pre-allocate a linked list element we may need (it's simpler to do it
6986 	// now than run out of memory in the midle of changing things)
6987 	LockedPages* newRange = new(std::nothrow) LockedPages();
6988 	if (newRange == NULL)
6989 		return ENOMEM;
6990 	ObjectDeleter<LockedPages> newRangeDeleter(newRange);
6991 
6992 	// Get and lock the team
6993 	Team* team = thread_get_current_thread()->team;
6994 	TeamLocker teamLocker(team);
6995 	teamLocker.Lock();
6996 
6997 	status_t error = B_OK;
6998 	LockedPagesList* lockedPages = &team->locked_pages_list;
6999 
7000 	// Locate the first locked range possibly overlapping ours
7001 	LockedPages* currentRange = lockedPages->Head();
7002 	while (currentRange != NULL && currentRange->end <= address)
7003 		currentRange = lockedPages->GetNext(currentRange);
7004 
7005 	if (currentRange == NULL || currentRange->start >= endAddress) {
7006 		// No existing range is overlapping with ours. We can just lock our
7007 		// range and stop here.
7008 		newRange->start = address;
7009 		newRange->end = endAddress;
7010 		error = newRange->LockMemory();
7011 		if (error != B_OK)
7012 			return error;
7013 
7014 		lockedPages->InsertBefore(currentRange, newRange);
7015 		newRangeDeleter.Detach();
7016 		return B_OK;
7017 	}
7018 
7019 	// We get here when there is at least one existing overlapping range.
7020 
7021 	if (currentRange->start <= address) {
7022 		if (currentRange->end >= endAddress) {
7023 			// An existing range is already fully covering the pages we need to
7024 			// lock. Nothing to do then.
7025 			return B_OK;
7026 		} else {
7027 			// An existing range covers the start of the area we want to lock.
7028 			// Advance our start address to avoid it.
7029 			address = currentRange->end;
7030 
7031 			// Move on to the next range for the next step
7032 			currentRange = lockedPages->GetNext(currentRange);
7033 		}
7034 	}
7035 
7036 	// First, lock the new range
7037 	newRange->start = address;
7038 	newRange->end = endAddress;
7039 	error = newRange->LockMemory();
7040 	if (error != B_OK)
7041 		return error;
7042 
7043 	// Unlock all ranges fully overlapping with the area we need to lock
7044 	while (currentRange != NULL && currentRange->end < endAddress) {
7045 		// The existing range is fully contained inside the new one we're
7046 		// trying to lock. Delete/unlock it, and replace it with a new one
7047 		// (this limits fragmentation of the range list, and is simpler to
7048 		// manage)
7049 		error = currentRange->UnlockMemory();
7050 		if (error != B_OK) {
7051 			panic("Failed to unlock a memory range: %s", strerror(error));
7052 			newRange->UnlockMemory();
7053 			return error;
7054 		}
7055 		LockedPages* temp = currentRange;
7056 		currentRange = lockedPages->GetNext(currentRange);
7057 		lockedPages->Remove(temp);
7058 		delete temp;
7059 	}
7060 
7061 	if (currentRange != NULL) {
7062 		// One last range may cover the end of the area we're trying to lock
7063 
7064 		if (currentRange->start == address) {
7065 			// In case two overlapping ranges (one at the start and the other
7066 			// at the end) already cover the area we're after, there's nothing
7067 			// more to do. So we destroy our new extra allocation
7068 			error = newRange->UnlockMemory();
7069 			return error;
7070 		}
7071 
7072 		if (currentRange->start < endAddress) {
7073 			// Make sure the last range is not overlapping, by moving its start
7074 			error = currentRange->Move(endAddress, currentRange->end);
7075 			if (error != B_OK) {
7076 				panic("Failed to move a memory range: %s", strerror(error));
7077 				newRange->UnlockMemory();
7078 				return error;
7079 			}
7080 		}
7081 	}
7082 
7083 	// Finally, store the new range in the locked list
7084 	lockedPages->InsertBefore(currentRange, newRange);
7085 	newRangeDeleter.Detach();
7086 	return B_OK;
7087 }
7088 
7089 
7090 status_t
7091 _user_munlock(const void* _address, size_t size)
7092 {
7093 	// check address range
7094 	addr_t address = (addr_t)_address;
7095 	size = PAGE_ALIGN(size);
7096 
7097 	if ((address % B_PAGE_SIZE) != 0)
7098 		return EINVAL;
7099 	if (!validate_user_memory_range(_address, size))
7100 		return EINVAL;
7101 
7102 	addr_t endAddress = address + size;
7103 
7104 	// Get and lock the team
7105 	Team* team = thread_get_current_thread()->team;
7106 	TeamLocker teamLocker(team);
7107 	teamLocker.Lock();
7108 	LockedPagesList* lockedPages = &team->locked_pages_list;
7109 
7110 	status_t error = B_OK;
7111 
7112 	// Locate the first locked range possibly overlapping ours
7113 	LockedPages* currentRange = lockedPages->Head();
7114 	while (currentRange != NULL && currentRange->end <= address)
7115 		currentRange = lockedPages->GetNext(currentRange);
7116 
7117 	if (currentRange == NULL || currentRange->start >= endAddress) {
7118 		// No range is intersecting, nothing to unlock
7119 		return B_OK;
7120 	}
7121 
7122 	if (currentRange->start < address) {
7123 		if (currentRange->end > endAddress) {
7124 			// There is a range fully covering the area we want to unlock,
7125 			// and it extends on both sides. We need to split it in two
7126 			LockedPages* newRange = new(std::nothrow) LockedPages();
7127 			if (newRange == NULL)
7128 				return ENOMEM;
7129 
7130 			newRange->start = endAddress;
7131 			newRange->end = currentRange->end;
7132 
7133 			error = newRange->LockMemory();
7134 			if (error != B_OK) {
7135 				delete newRange;
7136 				return error;
7137 			}
7138 
7139 			error = currentRange->Move(currentRange->start, address);
7140 			if (error != B_OK) {
7141 				delete newRange;
7142 				return error;
7143 			}
7144 
7145 			lockedPages->InsertAfter(currentRange, newRange);
7146 			return B_OK;
7147 		} else {
7148 			// There is a range that overlaps and extends before the one we
7149 			// want to unlock, we need to shrink it
7150 			error = currentRange->Move(currentRange->start, address);
7151 			if (error != B_OK)
7152 				return error;
7153 		}
7154 	}
7155 
7156 	while (currentRange != NULL && currentRange->end <= endAddress) {
7157 		// Unlock all fully overlapping ranges
7158 		error = currentRange->UnlockMemory();
7159 		if (error != B_OK)
7160 			return error;
7161 		LockedPages* temp = currentRange;
7162 		currentRange = lockedPages->GetNext(currentRange);
7163 		lockedPages->Remove(temp);
7164 		delete temp;
7165 	}
7166 
7167 	// Finally split the last partially overlapping range if any
7168 	if (currentRange != NULL && currentRange->start < endAddress) {
7169 		error = currentRange->Move(endAddress, currentRange->end);
7170 		if (error != B_OK)
7171 			return error;
7172 	}
7173 
7174 	return B_OK;
7175 }
7176 
7177 
7178 // #pragma mark -- compatibility
7179 
7180 
7181 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7182 
7183 
7184 struct physical_entry_beos {
7185 	uint32	address;
7186 	uint32	size;
7187 };
7188 
7189 
7190 /*!	The physical_entry structure has changed. We need to translate it to the
7191 	old one.
7192 */
7193 extern "C" int32
7194 __get_memory_map_beos(const void* _address, size_t numBytes,
7195 	physical_entry_beos* table, int32 numEntries)
7196 {
7197 	if (numEntries <= 0)
7198 		return B_BAD_VALUE;
7199 
7200 	const uint8* address = (const uint8*)_address;
7201 
7202 	int32 count = 0;
7203 	while (numBytes > 0 && count < numEntries) {
7204 		physical_entry entry;
7205 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
7206 		if (result < 0) {
7207 			if (result != B_BUFFER_OVERFLOW)
7208 				return result;
7209 		}
7210 
7211 		if (entry.address >= (phys_addr_t)1 << 32) {
7212 			panic("get_memory_map(): Address is greater 4 GB!");
7213 			return B_ERROR;
7214 		}
7215 
7216 		table[count].address = entry.address;
7217 		table[count++].size = entry.size;
7218 
7219 		address += entry.size;
7220 		numBytes -= entry.size;
7221 	}
7222 
7223 	// null-terminate the table, if possible
7224 	if (count < numEntries) {
7225 		table[count].address = 0;
7226 		table[count].size = 0;
7227 	}
7228 
7229 	return B_OK;
7230 }
7231 
7232 
7233 /*!	The type of the \a physicalAddress parameter has changed from void* to
7234 	phys_addr_t.
7235 */
7236 extern "C" area_id
7237 __map_physical_memory_beos(const char* name, void* physicalAddress,
7238 	size_t numBytes, uint32 addressSpec, uint32 protection,
7239 	void** _virtualAddress)
7240 {
7241 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
7242 		addressSpec, protection, _virtualAddress);
7243 }
7244 
7245 
7246 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
7247 	we meddle with the \a lock parameter to force 32 bit.
7248 */
7249 extern "C" area_id
7250 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
7251 	size_t size, uint32 lock, uint32 protection)
7252 {
7253 	switch (lock) {
7254 		case B_NO_LOCK:
7255 			break;
7256 		case B_FULL_LOCK:
7257 		case B_LAZY_LOCK:
7258 			lock = B_32_BIT_FULL_LOCK;
7259 			break;
7260 		case B_CONTIGUOUS:
7261 			lock = B_32_BIT_CONTIGUOUS;
7262 			break;
7263 	}
7264 
7265 	return __create_area_haiku(name, _address, addressSpec, size, lock,
7266 		protection);
7267 }
7268 
7269 
7270 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
7271 	"BASE");
7272 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
7273 	"map_physical_memory@", "BASE");
7274 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
7275 	"BASE");
7276 
7277 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7278 	"get_memory_map@@", "1_ALPHA3");
7279 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7280 	"map_physical_memory@@", "1_ALPHA3");
7281 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7282 	"1_ALPHA3");
7283 
7284 
7285 #else
7286 
7287 
7288 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7289 	"get_memory_map@@", "BASE");
7290 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7291 	"map_physical_memory@@", "BASE");
7292 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7293 	"BASE");
7294 
7295 
7296 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7297