xref: /haiku/src/system/kernel/vm/vm.cpp (revision 02354704729d38c3b078c696adc1bbbd33cbcf72)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleterDrivers.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <util/ThreadAutoLock.h>
51 #include <vm/vm_page.h>
52 #include <vm/vm_priv.h>
53 #include <vm/VMAddressSpace.h>
54 #include <vm/VMArea.h>
55 #include <vm/VMCache.h>
56 
57 #include "VMAddressSpaceLocking.h"
58 #include "VMAnonymousCache.h"
59 #include "VMAnonymousNoSwapCache.h"
60 #include "IORequest.h"
61 
62 
63 //#define TRACE_VM
64 //#define TRACE_FAULTS
65 #ifdef TRACE_VM
66 #	define TRACE(x) dprintf x
67 #else
68 #	define TRACE(x) ;
69 #endif
70 #ifdef TRACE_FAULTS
71 #	define FTRACE(x) dprintf x
72 #else
73 #	define FTRACE(x) ;
74 #endif
75 
76 
77 namespace {
78 
79 class AreaCacheLocking {
80 public:
81 	inline bool Lock(VMCache* lockable)
82 	{
83 		return false;
84 	}
85 
86 	inline void Unlock(VMCache* lockable)
87 	{
88 		vm_area_put_locked_cache(lockable);
89 	}
90 };
91 
92 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
93 public:
94 	inline AreaCacheLocker(VMCache* cache = NULL)
95 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
96 	{
97 	}
98 
99 	inline AreaCacheLocker(VMArea* area)
100 		: AutoLocker<VMCache, AreaCacheLocking>()
101 	{
102 		SetTo(area);
103 	}
104 
105 	inline void SetTo(VMCache* cache, bool alreadyLocked)
106 	{
107 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
108 	}
109 
110 	inline void SetTo(VMArea* area)
111 	{
112 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
113 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
114 	}
115 };
116 
117 
118 class VMCacheChainLocker {
119 public:
120 	VMCacheChainLocker()
121 		:
122 		fTopCache(NULL),
123 		fBottomCache(NULL)
124 	{
125 	}
126 
127 	VMCacheChainLocker(VMCache* topCache)
128 		:
129 		fTopCache(topCache),
130 		fBottomCache(topCache)
131 	{
132 	}
133 
134 	~VMCacheChainLocker()
135 	{
136 		Unlock();
137 	}
138 
139 	void SetTo(VMCache* topCache)
140 	{
141 		fTopCache = topCache;
142 		fBottomCache = topCache;
143 
144 		if (topCache != NULL)
145 			topCache->SetUserData(NULL);
146 	}
147 
148 	VMCache* LockSourceCache()
149 	{
150 		if (fBottomCache == NULL || fBottomCache->source == NULL)
151 			return NULL;
152 
153 		VMCache* previousCache = fBottomCache;
154 
155 		fBottomCache = fBottomCache->source;
156 		fBottomCache->Lock();
157 		fBottomCache->AcquireRefLocked();
158 		fBottomCache->SetUserData(previousCache);
159 
160 		return fBottomCache;
161 	}
162 
163 	void LockAllSourceCaches()
164 	{
165 		while (LockSourceCache() != NULL) {
166 		}
167 	}
168 
169 	void Unlock(VMCache* exceptCache = NULL)
170 	{
171 		if (fTopCache == NULL)
172 			return;
173 
174 		// Unlock caches in source -> consumer direction. This is important to
175 		// avoid double-locking and a reversal of locking order in case a cache
176 		// is eligable for merging.
177 		VMCache* cache = fBottomCache;
178 		while (cache != NULL) {
179 			VMCache* nextCache = (VMCache*)cache->UserData();
180 			if (cache != exceptCache)
181 				cache->ReleaseRefAndUnlock(cache != fTopCache);
182 
183 			if (cache == fTopCache)
184 				break;
185 
186 			cache = nextCache;
187 		}
188 
189 		fTopCache = NULL;
190 		fBottomCache = NULL;
191 	}
192 
193 	void UnlockKeepRefs(bool keepTopCacheLocked)
194 	{
195 		if (fTopCache == NULL)
196 			return;
197 
198 		VMCache* nextCache = fBottomCache;
199 		VMCache* cache = NULL;
200 
201 		while (keepTopCacheLocked
202 				? nextCache != fTopCache : cache != fTopCache) {
203 			cache = nextCache;
204 			nextCache = (VMCache*)cache->UserData();
205 			cache->Unlock(cache != fTopCache);
206 		}
207 	}
208 
209 	void RelockCaches(bool topCacheLocked)
210 	{
211 		if (fTopCache == NULL)
212 			return;
213 
214 		VMCache* nextCache = fTopCache;
215 		VMCache* cache = NULL;
216 		if (topCacheLocked) {
217 			cache = nextCache;
218 			nextCache = cache->source;
219 		}
220 
221 		while (cache != fBottomCache && nextCache != NULL) {
222 			VMCache* consumer = cache;
223 			cache = nextCache;
224 			nextCache = cache->source;
225 			cache->Lock();
226 			cache->SetUserData(consumer);
227 		}
228 	}
229 
230 private:
231 	VMCache*	fTopCache;
232 	VMCache*	fBottomCache;
233 };
234 
235 } // namespace
236 
237 
238 // The memory reserve an allocation of the certain priority must not touch.
239 static const size_t kMemoryReserveForPriority[] = {
240 	VM_MEMORY_RESERVE_USER,		// user
241 	VM_MEMORY_RESERVE_SYSTEM,	// system
242 	0							// VIP
243 };
244 
245 
246 ObjectCache* gPageMappingsObjectCache;
247 
248 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
249 
250 static off_t sAvailableMemory;
251 static off_t sNeededMemory;
252 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
253 static uint32 sPageFaults;
254 
255 static VMPhysicalPageMapper* sPhysicalPageMapper;
256 
257 #if DEBUG_CACHE_LIST
258 
259 struct cache_info {
260 	VMCache*	cache;
261 	addr_t		page_count;
262 	addr_t		committed;
263 };
264 
265 static const int kCacheInfoTableCount = 100 * 1024;
266 static cache_info* sCacheInfoTable;
267 
268 #endif	// DEBUG_CACHE_LIST
269 
270 
271 // function declarations
272 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
273 	bool addressSpaceCleanup);
274 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
275 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
276 static status_t map_backing_store(VMAddressSpace* addressSpace,
277 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
278 	int protection, int protectionMax, int mapping, uint32 flags,
279 	const virtual_address_restrictions* addressRestrictions, bool kernel,
280 	VMArea** _area, void** _virtualAddress);
281 static void fix_protection(uint32* protection);
282 
283 
284 //	#pragma mark -
285 
286 
287 #if VM_PAGE_FAULT_TRACING
288 
289 namespace VMPageFaultTracing {
290 
291 class PageFaultStart : public AbstractTraceEntry {
292 public:
293 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
294 		:
295 		fAddress(address),
296 		fPC(pc),
297 		fWrite(write),
298 		fUser(user)
299 	{
300 		Initialized();
301 	}
302 
303 	virtual void AddDump(TraceOutput& out)
304 	{
305 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
306 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
307 	}
308 
309 private:
310 	addr_t	fAddress;
311 	addr_t	fPC;
312 	bool	fWrite;
313 	bool	fUser;
314 };
315 
316 
317 // page fault errors
318 enum {
319 	PAGE_FAULT_ERROR_NO_AREA		= 0,
320 	PAGE_FAULT_ERROR_KERNEL_ONLY,
321 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
322 	PAGE_FAULT_ERROR_READ_PROTECTED,
323 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
324 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
325 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
326 };
327 
328 
329 class PageFaultError : public AbstractTraceEntry {
330 public:
331 	PageFaultError(area_id area, status_t error)
332 		:
333 		fArea(area),
334 		fError(error)
335 	{
336 		Initialized();
337 	}
338 
339 	virtual void AddDump(TraceOutput& out)
340 	{
341 		switch (fError) {
342 			case PAGE_FAULT_ERROR_NO_AREA:
343 				out.Print("page fault error: no area");
344 				break;
345 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
346 				out.Print("page fault error: area: %ld, kernel only", fArea);
347 				break;
348 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
349 				out.Print("page fault error: area: %ld, write protected",
350 					fArea);
351 				break;
352 			case PAGE_FAULT_ERROR_READ_PROTECTED:
353 				out.Print("page fault error: area: %ld, read protected", fArea);
354 				break;
355 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
356 				out.Print("page fault error: area: %ld, execute protected",
357 					fArea);
358 				break;
359 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
360 				out.Print("page fault error: kernel touching bad user memory");
361 				break;
362 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
363 				out.Print("page fault error: no address space");
364 				break;
365 			default:
366 				out.Print("page fault error: area: %ld, error: %s", fArea,
367 					strerror(fError));
368 				break;
369 		}
370 	}
371 
372 private:
373 	area_id		fArea;
374 	status_t	fError;
375 };
376 
377 
378 class PageFaultDone : public AbstractTraceEntry {
379 public:
380 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
381 			vm_page* page)
382 		:
383 		fArea(area),
384 		fTopCache(topCache),
385 		fCache(cache),
386 		fPage(page)
387 	{
388 		Initialized();
389 	}
390 
391 	virtual void AddDump(TraceOutput& out)
392 	{
393 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
394 			"page: %p", fArea, fTopCache, fCache, fPage);
395 	}
396 
397 private:
398 	area_id		fArea;
399 	VMCache*	fTopCache;
400 	VMCache*	fCache;
401 	vm_page*	fPage;
402 };
403 
404 }	// namespace VMPageFaultTracing
405 
406 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
407 #else
408 #	define TPF(x) ;
409 #endif	// VM_PAGE_FAULT_TRACING
410 
411 
412 //	#pragma mark -
413 
414 
415 /*!	The page's cache must be locked.
416 */
417 static inline void
418 increment_page_wired_count(vm_page* page)
419 {
420 	if (!page->IsMapped())
421 		atomic_add(&gMappedPagesCount, 1);
422 	page->IncrementWiredCount();
423 }
424 
425 
426 /*!	The page's cache must be locked.
427 */
428 static inline void
429 decrement_page_wired_count(vm_page* page)
430 {
431 	page->DecrementWiredCount();
432 	if (!page->IsMapped())
433 		atomic_add(&gMappedPagesCount, -1);
434 }
435 
436 
437 static inline addr_t
438 virtual_page_address(VMArea* area, vm_page* page)
439 {
440 	return area->Base()
441 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
442 }
443 
444 
445 //! You need to have the address space locked when calling this function
446 static VMArea*
447 lookup_area(VMAddressSpace* addressSpace, area_id id)
448 {
449 	VMAreaHash::ReadLock();
450 
451 	VMArea* area = VMAreaHash::LookupLocked(id);
452 	if (area != NULL && area->address_space != addressSpace)
453 		area = NULL;
454 
455 	VMAreaHash::ReadUnlock();
456 
457 	return area;
458 }
459 
460 
461 static status_t
462 allocate_area_page_protections(VMArea* area)
463 {
464 	// In the page protections we store only the three user protections,
465 	// so we use 4 bits per page.
466 	size_t bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
467 	area->page_protections = (uint8*)malloc_etc(bytes,
468 		area->address_space == VMAddressSpace::Kernel()
469 			? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
470 	if (area->page_protections == NULL)
471 		return B_NO_MEMORY;
472 
473 	// init the page protections for all pages to that of the area
474 	uint32 areaProtection = area->protection
475 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
476 	memset(area->page_protections, areaProtection | (areaProtection << 4),
477 		bytes);
478 	return B_OK;
479 }
480 
481 
482 static inline void
483 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
484 {
485 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
486 	addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
487 	uint8& entry = area->page_protections[pageIndex / 2];
488 	if (pageIndex % 2 == 0)
489 		entry = (entry & 0xf0) | protection;
490 	else
491 		entry = (entry & 0x0f) | (protection << 4);
492 }
493 
494 
495 static inline uint32
496 get_area_page_protection(VMArea* area, addr_t pageAddress)
497 {
498 	if (area->page_protections == NULL)
499 		return area->protection;
500 
501 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
502 	uint32 protection = area->page_protections[pageIndex / 2];
503 	if (pageIndex % 2 == 0)
504 		protection &= 0x0f;
505 	else
506 		protection >>= 4;
507 
508 	// If this is a kernel area we translate the user flags to kernel flags.
509 	if (area->address_space == VMAddressSpace::Kernel()) {
510 		uint32 kernelProtection = 0;
511 		if ((protection & B_READ_AREA) != 0)
512 			kernelProtection |= B_KERNEL_READ_AREA;
513 		if ((protection & B_WRITE_AREA) != 0)
514 			kernelProtection |= B_KERNEL_WRITE_AREA;
515 
516 		return kernelProtection;
517 	}
518 
519 	return protection | B_KERNEL_READ_AREA
520 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
521 }
522 
523 
524 /*!	The caller must have reserved enough pages the translation map
525 	implementation might need to map this page.
526 	The page's cache must be locked.
527 */
528 static status_t
529 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
530 	vm_page_reservation* reservation)
531 {
532 	VMTranslationMap* map = area->address_space->TranslationMap();
533 
534 	bool wasMapped = page->IsMapped();
535 
536 	if (area->wiring == B_NO_LOCK) {
537 		DEBUG_PAGE_ACCESS_CHECK(page);
538 
539 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
540 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
541 			gPageMappingsObjectCache,
542 			CACHE_DONT_WAIT_FOR_MEMORY
543 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
544 		if (mapping == NULL)
545 			return B_NO_MEMORY;
546 
547 		mapping->page = page;
548 		mapping->area = area;
549 
550 		map->Lock();
551 
552 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
553 			area->MemoryType(), reservation);
554 
555 		// insert mapping into lists
556 		if (!page->IsMapped())
557 			atomic_add(&gMappedPagesCount, 1);
558 
559 		page->mappings.Add(mapping);
560 		area->mappings.Add(mapping);
561 
562 		map->Unlock();
563 	} else {
564 		DEBUG_PAGE_ACCESS_CHECK(page);
565 
566 		map->Lock();
567 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
568 			area->MemoryType(), reservation);
569 		map->Unlock();
570 
571 		increment_page_wired_count(page);
572 	}
573 
574 	if (!wasMapped) {
575 		// The page is mapped now, so we must not remain in the cached queue.
576 		// It also makes sense to move it from the inactive to the active, since
577 		// otherwise the page daemon wouldn't come to keep track of it (in idle
578 		// mode) -- if the page isn't touched, it will be deactivated after a
579 		// full iteration through the queue at the latest.
580 		if (page->State() == PAGE_STATE_CACHED
581 				|| page->State() == PAGE_STATE_INACTIVE) {
582 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
583 		}
584 	}
585 
586 	return B_OK;
587 }
588 
589 
590 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
591 	page's cache.
592 */
593 static inline bool
594 unmap_page(VMArea* area, addr_t virtualAddress)
595 {
596 	return area->address_space->TranslationMap()->UnmapPage(area,
597 		virtualAddress, true);
598 }
599 
600 
601 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
602 	mapped pages' caches.
603 */
604 static inline void
605 unmap_pages(VMArea* area, addr_t base, size_t size)
606 {
607 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
608 }
609 
610 
611 static inline bool
612 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
613 {
614 	if (address < area->Base()) {
615 		offset = area->Base() - address;
616 		if (offset >= size)
617 			return false;
618 
619 		address = area->Base();
620 		size -= offset;
621 		offset = 0;
622 		if (size > area->Size())
623 			size = area->Size();
624 
625 		return true;
626 	}
627 
628 	offset = address - area->Base();
629 	if (offset >= area->Size())
630 		return false;
631 
632 	if (size >= area->Size() - offset)
633 		size = area->Size() - offset;
634 
635 	return true;
636 }
637 
638 
639 /*!	Cuts a piece out of an area. If the given cut range covers the complete
640 	area, it is deleted. If it covers the beginning or the end, the area is
641 	resized accordingly. If the range covers some part in the middle of the
642 	area, it is split in two; in this case the second area is returned via
643 	\a _secondArea (the variable is left untouched in the other cases).
644 	The address space must be write locked.
645 	The caller must ensure that no part of the given range is wired.
646 */
647 static status_t
648 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
649 	addr_t size, VMArea** _secondArea, bool kernel)
650 {
651 	addr_t offset;
652 	if (!intersect_area(area, address, size, offset))
653 		return B_OK;
654 
655 	// Is the area fully covered?
656 	if (address == area->Base() && size == area->Size()) {
657 		delete_area(addressSpace, area, false);
658 		return B_OK;
659 	}
660 
661 	int priority;
662 	uint32 allocationFlags;
663 	if (addressSpace == VMAddressSpace::Kernel()) {
664 		priority = VM_PRIORITY_SYSTEM;
665 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
666 			| HEAP_DONT_LOCK_KERNEL_SPACE;
667 	} else {
668 		priority = VM_PRIORITY_USER;
669 		allocationFlags = 0;
670 	}
671 
672 	VMCache* cache = vm_area_get_locked_cache(area);
673 	VMCacheChainLocker cacheChainLocker(cache);
674 	cacheChainLocker.LockAllSourceCaches();
675 
676 	// If no one else uses the area's cache and it's an anonymous cache, we can
677 	// resize or split it, too.
678 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
679 		&& cache->consumers.IsEmpty() && cache->type == CACHE_TYPE_RAM;
680 
681 	// Cut the end only?
682 	if (offset > 0 && size == area->Size() - offset) {
683 		status_t error = addressSpace->ShrinkAreaTail(area, offset,
684 			allocationFlags);
685 		if (error != B_OK)
686 			return error;
687 
688 		// unmap pages
689 		unmap_pages(area, address, size);
690 
691 		if (onlyCacheUser) {
692 			// Since VMCache::Resize() can temporarily drop the lock, we must
693 			// unlock all lower caches to prevent locking order inversion.
694 			cacheChainLocker.Unlock(cache);
695 			cache->Resize(cache->virtual_base + offset, priority);
696 			cache->ReleaseRefAndUnlock();
697 		}
698 
699 		return B_OK;
700 	}
701 
702 	// Cut the beginning only?
703 	if (area->Base() == address) {
704 		// resize the area
705 		status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
706 			allocationFlags);
707 		if (error != B_OK)
708 			return error;
709 
710 		// unmap pages
711 		unmap_pages(area, address, size);
712 
713 		if (onlyCacheUser) {
714 			// Since VMCache::Rebase() can temporarily drop the lock, we must
715 			// unlock all lower caches to prevent locking order inversion.
716 			cacheChainLocker.Unlock(cache);
717 			cache->Rebase(cache->virtual_base + size, priority);
718 			cache->ReleaseRefAndUnlock();
719 		}
720 		area->cache_offset += size;
721 
722 		return B_OK;
723 	}
724 
725 	// The tough part -- cut a piece out of the middle of the area.
726 	// We do that by shrinking the area to the begin section and creating a
727 	// new area for the end section.
728 	addr_t firstNewSize = offset;
729 	addr_t secondBase = address + size;
730 	addr_t secondSize = area->Size() - offset - size;
731 
732 	// unmap pages
733 	unmap_pages(area, address, area->Size() - firstNewSize);
734 
735 	// resize the area
736 	addr_t oldSize = area->Size();
737 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
738 		allocationFlags);
739 	if (error != B_OK)
740 		return error;
741 
742 	virtual_address_restrictions addressRestrictions = {};
743 	addressRestrictions.address = (void*)secondBase;
744 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
745 	VMArea* secondArea;
746 
747 	if (onlyCacheUser) {
748 		// Create a new cache for the second area.
749 		VMCache* secondCache;
750 		error = VMCacheFactory::CreateAnonymousCache(secondCache, false, 0, 0,
751 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
752 		if (error != B_OK) {
753 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
754 			return error;
755 		}
756 
757 		secondCache->Lock();
758 		secondCache->temporary = cache->temporary;
759 		secondCache->virtual_base = area->cache_offset;
760 		secondCache->virtual_end = area->cache_offset + secondSize;
761 
762 		// Transfer the concerned pages from the first cache.
763 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
764 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
765 			area->cache_offset);
766 
767 		if (error == B_OK) {
768 			// Since VMCache::Resize() can temporarily drop the lock, we must
769 			// unlock all lower caches to prevent locking order inversion.
770 			cacheChainLocker.Unlock(cache);
771 			cache->Resize(cache->virtual_base + firstNewSize, priority);
772 			// Don't unlock the cache yet because we might have to resize it
773 			// back.
774 
775 			// Map the second area.
776 			error = map_backing_store(addressSpace, secondCache,
777 				area->cache_offset, area->name, secondSize, area->wiring,
778 				area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0,
779 				&addressRestrictions, kernel, &secondArea, NULL);
780 		}
781 
782 		if (error != B_OK) {
783 			// Restore the original cache.
784 			cache->Resize(cache->virtual_base + oldSize, priority);
785 
786 			// Move the pages back.
787 			status_t readoptStatus = cache->Adopt(secondCache,
788 				area->cache_offset, secondSize, adoptOffset);
789 			if (readoptStatus != B_OK) {
790 				// Some (swap) pages have not been moved back and will be lost
791 				// once the second cache is deleted.
792 				panic("failed to restore cache range: %s",
793 					strerror(readoptStatus));
794 
795 				// TODO: Handle out of memory cases by freeing memory and
796 				// retrying.
797 			}
798 
799 			cache->ReleaseRefAndUnlock();
800 			secondCache->ReleaseRefAndUnlock();
801 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
802 			return error;
803 		}
804 
805 		// Now we can unlock it.
806 		cache->ReleaseRefAndUnlock();
807 		secondCache->Unlock();
808 	} else {
809 		error = map_backing_store(addressSpace, cache, area->cache_offset
810 			+ (secondBase - area->Base()),
811 			area->name, secondSize, area->wiring, area->protection,
812 			area->protection_max, REGION_NO_PRIVATE_MAP, 0,
813 			&addressRestrictions, kernel, &secondArea, NULL);
814 		if (error != B_OK) {
815 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
816 			return error;
817 		}
818 		// We need a cache reference for the new area.
819 		cache->AcquireRefLocked();
820 	}
821 
822 	if (_secondArea != NULL)
823 		*_secondArea = secondArea;
824 
825 	return B_OK;
826 }
827 
828 
829 /*!	Deletes or cuts all areas in the given address range.
830 	The address space must be write-locked.
831 	The caller must ensure that no part of the given range is wired.
832 */
833 static status_t
834 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
835 	bool kernel)
836 {
837 	size = PAGE_ALIGN(size);
838 
839 	// Check, whether the caller is allowed to modify the concerned areas.
840 	if (!kernel) {
841 		for (VMAddressSpace::AreaRangeIterator it
842 				= addressSpace->GetAreaRangeIterator(address, size);
843 			VMArea* area = it.Next();) {
844 
845 			if ((area->protection & B_KERNEL_AREA) != 0) {
846 				dprintf("unmap_address_range: team %" B_PRId32 " tried to "
847 					"unmap range of kernel area %" B_PRId32 " (%s)\n",
848 					team_get_current_team_id(), area->id, area->name);
849 				return B_NOT_ALLOWED;
850 			}
851 		}
852 	}
853 
854 	for (VMAddressSpace::AreaRangeIterator it
855 			= addressSpace->GetAreaRangeIterator(address, size);
856 		VMArea* area = it.Next();) {
857 
858 		status_t error = cut_area(addressSpace, area, address, size, NULL,
859 			kernel);
860 		if (error != B_OK)
861 			return error;
862 			// Failing after already messing with areas is ugly, but we
863 			// can't do anything about it.
864 	}
865 
866 	return B_OK;
867 }
868 
869 
870 static status_t
871 discard_area_range(VMArea* area, addr_t address, addr_t size)
872 {
873 	addr_t offset;
874 	if (!intersect_area(area, address, size, offset))
875 		return B_OK;
876 
877 	// If someone else uses the area's cache or it's not an anonymous cache, we
878 	// can't discard.
879 	VMCache* cache = vm_area_get_locked_cache(area);
880 	if (cache->areas != area || area->cache_next != NULL
881 		|| !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) {
882 		return B_OK;
883 	}
884 
885 	VMCacheChainLocker cacheChainLocker(cache);
886 	cacheChainLocker.LockAllSourceCaches();
887 
888 	unmap_pages(area, address, size);
889 
890 	// Since VMCache::Discard() can temporarily drop the lock, we must
891 	// unlock all lower caches to prevent locking order inversion.
892 	cacheChainLocker.Unlock(cache);
893 	cache->Discard(cache->virtual_base + offset, size);
894 	cache->ReleaseRefAndUnlock();
895 
896 	return B_OK;
897 }
898 
899 
900 static status_t
901 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
902 	bool kernel)
903 {
904 	for (VMAddressSpace::AreaRangeIterator it
905 		= addressSpace->GetAreaRangeIterator(address, size);
906 			VMArea* area = it.Next();) {
907 		status_t error = discard_area_range(area, address, size);
908 		if (error != B_OK)
909 			return error;
910 	}
911 
912 	return B_OK;
913 }
914 
915 
916 /*! You need to hold the lock of the cache and the write lock of the address
917 	space when calling this function.
918 	Note, that in case of error your cache will be temporarily unlocked.
919 	If \a addressSpec is \c B_EXACT_ADDRESS and the
920 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
921 	that no part of the specified address range (base \c *_virtualAddress, size
922 	\a size) is wired.
923 */
924 static status_t
925 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
926 	const char* areaName, addr_t size, int wiring, int protection,
927 	int protectionMax, int mapping,
928 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
929 	bool kernel, VMArea** _area, void** _virtualAddress)
930 {
931 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
932 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
933 		", protection %d, protectionMax %d, area %p, areaName '%s'\n",
934 		addressSpace, cache, addressRestrictions->address, offset, size,
935 		addressRestrictions->address_specification, wiring, protection,
936 		protectionMax, _area, areaName));
937 	cache->AssertLocked();
938 
939 	if (size == 0) {
940 #if KDEBUG
941 		panic("map_backing_store(): called with size=0 for area '%s'!",
942 			areaName);
943 #endif
944 		return B_BAD_VALUE;
945 	}
946 
947 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
948 		| HEAP_DONT_LOCK_KERNEL_SPACE;
949 	int priority;
950 	if (addressSpace != VMAddressSpace::Kernel()) {
951 		priority = VM_PRIORITY_USER;
952 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
953 		priority = VM_PRIORITY_VIP;
954 		allocationFlags |= HEAP_PRIORITY_VIP;
955 	} else
956 		priority = VM_PRIORITY_SYSTEM;
957 
958 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
959 		allocationFlags);
960 	if (mapping != REGION_PRIVATE_MAP)
961 		area->protection_max = protectionMax & B_USER_PROTECTION;
962 	if (area == NULL)
963 		return B_NO_MEMORY;
964 
965 	status_t status;
966 
967 	// if this is a private map, we need to create a new cache
968 	// to handle the private copies of pages as they are written to
969 	VMCache* sourceCache = cache;
970 	if (mapping == REGION_PRIVATE_MAP) {
971 		VMCache* newCache;
972 
973 		// create an anonymous cache
974 		status = VMCacheFactory::CreateAnonymousCache(newCache,
975 			(protection & B_STACK_AREA) != 0
976 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
977 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
978 		if (status != B_OK)
979 			goto err1;
980 
981 		newCache->Lock();
982 		newCache->temporary = 1;
983 		newCache->virtual_base = offset;
984 		newCache->virtual_end = offset + size;
985 
986 		cache->AddConsumer(newCache);
987 
988 		cache = newCache;
989 	}
990 
991 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
992 		status = cache->SetMinimalCommitment(size, priority);
993 		if (status != B_OK)
994 			goto err2;
995 	}
996 
997 	// check to see if this address space has entered DELETE state
998 	if (addressSpace->IsBeingDeleted()) {
999 		// okay, someone is trying to delete this address space now, so we can't
1000 		// insert the area, so back out
1001 		status = B_BAD_TEAM_ID;
1002 		goto err2;
1003 	}
1004 
1005 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
1006 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
1007 		status = unmap_address_range(addressSpace,
1008 			(addr_t)addressRestrictions->address, size, kernel);
1009 		if (status != B_OK)
1010 			goto err2;
1011 	}
1012 
1013 	status = addressSpace->InsertArea(area, size, addressRestrictions,
1014 		allocationFlags, _virtualAddress);
1015 	if (status == B_NO_MEMORY
1016 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
1017 		// TODO: At present, there is no way to notify the low_resource monitor
1018 		// that kernel addresss space is fragmented, nor does it check for this
1019 		// automatically. Due to how many locks are held, we cannot wait here
1020 		// for space to be freed up, but it would be good to at least notify
1021 		// that we tried and failed to allocate some amount.
1022 	}
1023 	if (status != B_OK)
1024 		goto err2;
1025 
1026 	// attach the cache to the area
1027 	area->cache = cache;
1028 	area->cache_offset = offset;
1029 
1030 	// point the cache back to the area
1031 	cache->InsertAreaLocked(area);
1032 	if (mapping == REGION_PRIVATE_MAP)
1033 		cache->Unlock();
1034 
1035 	// insert the area in the global area hash table
1036 	VMAreaHash::Insert(area);
1037 
1038 	// grab a ref to the address space (the area holds this)
1039 	addressSpace->Get();
1040 
1041 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1042 //		cache, sourceCache, areaName, area);
1043 
1044 	*_area = area;
1045 	return B_OK;
1046 
1047 err2:
1048 	if (mapping == REGION_PRIVATE_MAP) {
1049 		// We created this cache, so we must delete it again. Note, that we
1050 		// need to temporarily unlock the source cache or we'll otherwise
1051 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1052 		sourceCache->Unlock();
1053 		cache->ReleaseRefAndUnlock();
1054 		sourceCache->Lock();
1055 	}
1056 err1:
1057 	addressSpace->DeleteArea(area, allocationFlags);
1058 	return status;
1059 }
1060 
1061 
1062 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1063 	  locker1, locker2).
1064 */
1065 template<typename LockerType1, typename LockerType2>
1066 static inline bool
1067 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1068 {
1069 	area->cache->AssertLocked();
1070 
1071 	VMAreaUnwiredWaiter waiter;
1072 	if (!area->AddWaiterIfWired(&waiter))
1073 		return false;
1074 
1075 	// unlock everything and wait
1076 	if (locker1 != NULL)
1077 		locker1->Unlock();
1078 	if (locker2 != NULL)
1079 		locker2->Unlock();
1080 
1081 	waiter.waitEntry.Wait();
1082 
1083 	return true;
1084 }
1085 
1086 
1087 /*!	Checks whether the given area has any wired ranges intersecting with the
1088 	specified range and waits, if so.
1089 
1090 	When it has to wait, the function calls \c Unlock() on both \a locker1
1091 	and \a locker2, if given.
1092 	The area's top cache must be locked and must be unlocked as a side effect
1093 	of calling \c Unlock() on either \a locker1 or \a locker2.
1094 
1095 	If the function does not have to wait it does not modify or unlock any
1096 	object.
1097 
1098 	\param area The area to be checked.
1099 	\param base The base address of the range to check.
1100 	\param size The size of the address range to check.
1101 	\param locker1 An object to be unlocked when before starting to wait (may
1102 		be \c NULL).
1103 	\param locker2 An object to be unlocked when before starting to wait (may
1104 		be \c NULL).
1105 	\return \c true, if the function had to wait, \c false otherwise.
1106 */
1107 template<typename LockerType1, typename LockerType2>
1108 static inline bool
1109 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1110 	LockerType1* locker1, LockerType2* locker2)
1111 {
1112 	area->cache->AssertLocked();
1113 
1114 	VMAreaUnwiredWaiter waiter;
1115 	if (!area->AddWaiterIfWired(&waiter, base, size))
1116 		return false;
1117 
1118 	// unlock everything and wait
1119 	if (locker1 != NULL)
1120 		locker1->Unlock();
1121 	if (locker2 != NULL)
1122 		locker2->Unlock();
1123 
1124 	waiter.waitEntry.Wait();
1125 
1126 	return true;
1127 }
1128 
1129 
1130 /*!	Checks whether the given address space has any wired ranges intersecting
1131 	with the specified range and waits, if so.
1132 
1133 	Similar to wait_if_area_range_is_wired(), with the following differences:
1134 	- All areas intersecting with the range are checked (respectively all until
1135 	  one is found that contains a wired range intersecting with the given
1136 	  range).
1137 	- The given address space must at least be read-locked and must be unlocked
1138 	  when \c Unlock() is called on \a locker.
1139 	- None of the areas' caches are allowed to be locked.
1140 */
1141 template<typename LockerType>
1142 static inline bool
1143 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1144 	size_t size, LockerType* locker)
1145 {
1146 	for (VMAddressSpace::AreaRangeIterator it
1147 		= addressSpace->GetAreaRangeIterator(base, size);
1148 			VMArea* area = it.Next();) {
1149 
1150 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1151 
1152 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1153 			return true;
1154 	}
1155 
1156 	return false;
1157 }
1158 
1159 
1160 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1161 	It must be called in a situation where the kernel address space may be
1162 	locked.
1163 */
1164 status_t
1165 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1166 {
1167 	AddressSpaceReadLocker locker;
1168 	VMArea* area;
1169 	status_t status = locker.SetFromArea(id, area);
1170 	if (status != B_OK)
1171 		return status;
1172 
1173 	if (area->page_protections == NULL) {
1174 		status = allocate_area_page_protections(area);
1175 		if (status != B_OK)
1176 			return status;
1177 	}
1178 
1179 	*cookie = (void*)area;
1180 	return B_OK;
1181 }
1182 
1183 
1184 /*!	This is a debug helper function that can only be used with very specific
1185 	use cases.
1186 	Sets protection for the given address range to the protection specified.
1187 	If \a protection is 0 then the involved pages will be marked non-present
1188 	in the translation map to cause a fault on access. The pages aren't
1189 	actually unmapped however so that they can be marked present again with
1190 	additional calls to this function. For this to work the area must be
1191 	fully locked in memory so that the pages aren't otherwise touched.
1192 	This function does not lock the kernel address space and needs to be
1193 	supplied with a \a cookie retrieved from a successful call to
1194 	vm_prepare_kernel_area_debug_protection().
1195 */
1196 status_t
1197 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1198 	uint32 protection)
1199 {
1200 	// check address range
1201 	addr_t address = (addr_t)_address;
1202 	size = PAGE_ALIGN(size);
1203 
1204 	if ((address % B_PAGE_SIZE) != 0
1205 		|| (addr_t)address + size < (addr_t)address
1206 		|| !IS_KERNEL_ADDRESS(address)
1207 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1208 		return B_BAD_VALUE;
1209 	}
1210 
1211 	// Translate the kernel protection to user protection as we only store that.
1212 	if ((protection & B_KERNEL_READ_AREA) != 0)
1213 		protection |= B_READ_AREA;
1214 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1215 		protection |= B_WRITE_AREA;
1216 
1217 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1218 	VMTranslationMap* map = addressSpace->TranslationMap();
1219 	VMArea* area = (VMArea*)cookie;
1220 
1221 	addr_t offset = address - area->Base();
1222 	if (area->Size() - offset < size) {
1223 		panic("protect range not fully within supplied area");
1224 		return B_BAD_VALUE;
1225 	}
1226 
1227 	if (area->page_protections == NULL) {
1228 		panic("area has no page protections");
1229 		return B_BAD_VALUE;
1230 	}
1231 
1232 	// Invalidate the mapping entries so any access to them will fault or
1233 	// restore the mapping entries unchanged so that lookup will success again.
1234 	map->Lock();
1235 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1236 	map->Unlock();
1237 
1238 	// And set the proper page protections so that the fault case will actually
1239 	// fail and not simply try to map a new page.
1240 	for (addr_t pageAddress = address; pageAddress < address + size;
1241 			pageAddress += B_PAGE_SIZE) {
1242 		set_area_page_protection(area, pageAddress, protection);
1243 	}
1244 
1245 	return B_OK;
1246 }
1247 
1248 
1249 status_t
1250 vm_block_address_range(const char* name, void* address, addr_t size)
1251 {
1252 	if (!arch_vm_supports_protection(0))
1253 		return B_NOT_SUPPORTED;
1254 
1255 	AddressSpaceWriteLocker locker;
1256 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1257 	if (status != B_OK)
1258 		return status;
1259 
1260 	VMAddressSpace* addressSpace = locker.AddressSpace();
1261 
1262 	// create an anonymous cache
1263 	VMCache* cache;
1264 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1265 		VM_PRIORITY_SYSTEM);
1266 	if (status != B_OK)
1267 		return status;
1268 
1269 	cache->temporary = 1;
1270 	cache->virtual_end = size;
1271 	cache->Lock();
1272 
1273 	VMArea* area;
1274 	virtual_address_restrictions addressRestrictions = {};
1275 	addressRestrictions.address = address;
1276 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1277 	status = map_backing_store(addressSpace, cache, 0, name, size,
1278 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions,
1279 		true, &area, NULL);
1280 	if (status != B_OK) {
1281 		cache->ReleaseRefAndUnlock();
1282 		return status;
1283 	}
1284 
1285 	cache->Unlock();
1286 	area->cache_type = CACHE_TYPE_RAM;
1287 	return area->id;
1288 }
1289 
1290 
1291 status_t
1292 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1293 {
1294 	AddressSpaceWriteLocker locker(team);
1295 	if (!locker.IsLocked())
1296 		return B_BAD_TEAM_ID;
1297 
1298 	VMAddressSpace* addressSpace = locker.AddressSpace();
1299 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1300 		addressSpace == VMAddressSpace::Kernel()
1301 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1302 }
1303 
1304 
1305 status_t
1306 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1307 	addr_t size, uint32 flags)
1308 {
1309 	if (size == 0)
1310 		return B_BAD_VALUE;
1311 
1312 	AddressSpaceWriteLocker locker(team);
1313 	if (!locker.IsLocked())
1314 		return B_BAD_TEAM_ID;
1315 
1316 	virtual_address_restrictions addressRestrictions = {};
1317 	addressRestrictions.address = *_address;
1318 	addressRestrictions.address_specification = addressSpec;
1319 	VMAddressSpace* addressSpace = locker.AddressSpace();
1320 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1321 		addressSpace == VMAddressSpace::Kernel()
1322 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1323 		_address);
1324 }
1325 
1326 
1327 area_id
1328 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1329 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1330 	const virtual_address_restrictions* virtualAddressRestrictions,
1331 	const physical_address_restrictions* physicalAddressRestrictions,
1332 	bool kernel, void** _address)
1333 {
1334 	VMArea* area;
1335 	VMCache* cache;
1336 	vm_page* page = NULL;
1337 	bool isStack = (protection & B_STACK_AREA) != 0;
1338 	page_num_t guardPages;
1339 	bool canOvercommit = false;
1340 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1341 		? VM_PAGE_ALLOC_CLEAR : 0;
1342 
1343 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1344 		team, name, size));
1345 
1346 	size = PAGE_ALIGN(size);
1347 	guardSize = PAGE_ALIGN(guardSize);
1348 	guardPages = guardSize / B_PAGE_SIZE;
1349 
1350 	if (size == 0 || size < guardSize)
1351 		return B_BAD_VALUE;
1352 	if (!arch_vm_supports_protection(protection))
1353 		return B_NOT_SUPPORTED;
1354 
1355 	if (team == B_CURRENT_TEAM)
1356 		team = VMAddressSpace::CurrentID();
1357 	if (team < 0)
1358 		return B_BAD_TEAM_ID;
1359 
1360 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1361 		canOvercommit = true;
1362 
1363 #ifdef DEBUG_KERNEL_STACKS
1364 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1365 		isStack = true;
1366 #endif
1367 
1368 	// check parameters
1369 	switch (virtualAddressRestrictions->address_specification) {
1370 		case B_ANY_ADDRESS:
1371 		case B_EXACT_ADDRESS:
1372 		case B_BASE_ADDRESS:
1373 		case B_ANY_KERNEL_ADDRESS:
1374 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1375 		case B_RANDOMIZED_ANY_ADDRESS:
1376 		case B_RANDOMIZED_BASE_ADDRESS:
1377 			break;
1378 
1379 		default:
1380 			return B_BAD_VALUE;
1381 	}
1382 
1383 	// If low or high physical address restrictions are given, we force
1384 	// B_CONTIGUOUS wiring, since only then we'll use
1385 	// vm_page_allocate_page_run() which deals with those restrictions.
1386 	if (physicalAddressRestrictions->low_address != 0
1387 		|| physicalAddressRestrictions->high_address != 0) {
1388 		wiring = B_CONTIGUOUS;
1389 	}
1390 
1391 	physical_address_restrictions stackPhysicalRestrictions;
1392 	bool doReserveMemory = false;
1393 	switch (wiring) {
1394 		case B_NO_LOCK:
1395 			break;
1396 		case B_FULL_LOCK:
1397 		case B_LAZY_LOCK:
1398 		case B_CONTIGUOUS:
1399 			doReserveMemory = true;
1400 			break;
1401 		case B_ALREADY_WIRED:
1402 			break;
1403 		case B_LOMEM:
1404 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1405 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1406 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1407 			wiring = B_CONTIGUOUS;
1408 			doReserveMemory = true;
1409 			break;
1410 		case B_32_BIT_FULL_LOCK:
1411 			if (B_HAIKU_PHYSICAL_BITS <= 32
1412 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1413 				wiring = B_FULL_LOCK;
1414 				doReserveMemory = true;
1415 				break;
1416 			}
1417 			// TODO: We don't really support this mode efficiently. Just fall
1418 			// through for now ...
1419 		case B_32_BIT_CONTIGUOUS:
1420 			#if B_HAIKU_PHYSICAL_BITS > 32
1421 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1422 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1423 					stackPhysicalRestrictions.high_address
1424 						= (phys_addr_t)1 << 32;
1425 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1426 				}
1427 			#endif
1428 			wiring = B_CONTIGUOUS;
1429 			doReserveMemory = true;
1430 			break;
1431 		default:
1432 			return B_BAD_VALUE;
1433 	}
1434 
1435 	// Optimization: For a single-page contiguous allocation without low/high
1436 	// memory restriction B_FULL_LOCK wiring suffices.
1437 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1438 		&& physicalAddressRestrictions->low_address == 0
1439 		&& physicalAddressRestrictions->high_address == 0) {
1440 		wiring = B_FULL_LOCK;
1441 	}
1442 
1443 	// For full lock or contiguous areas we're also going to map the pages and
1444 	// thus need to reserve pages for the mapping backend upfront.
1445 	addr_t reservedMapPages = 0;
1446 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1447 		AddressSpaceWriteLocker locker;
1448 		status_t status = locker.SetTo(team);
1449 		if (status != B_OK)
1450 			return status;
1451 
1452 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1453 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1454 	}
1455 
1456 	int priority;
1457 	if (team != VMAddressSpace::KernelID())
1458 		priority = VM_PRIORITY_USER;
1459 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1460 		priority = VM_PRIORITY_VIP;
1461 	else
1462 		priority = VM_PRIORITY_SYSTEM;
1463 
1464 	// Reserve memory before acquiring the address space lock. This reduces the
1465 	// chances of failure, since while holding the write lock to the address
1466 	// space (if it is the kernel address space that is), the low memory handler
1467 	// won't be able to free anything for us.
1468 	addr_t reservedMemory = 0;
1469 	if (doReserveMemory) {
1470 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1471 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1472 			return B_NO_MEMORY;
1473 		reservedMemory = size;
1474 		// TODO: We don't reserve the memory for the pages for the page
1475 		// directories/tables. We actually need to do since we currently don't
1476 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1477 		// there are actually less physical pages than there should be, which
1478 		// can get the VM into trouble in low memory situations.
1479 	}
1480 
1481 	AddressSpaceWriteLocker locker;
1482 	VMAddressSpace* addressSpace;
1483 	status_t status;
1484 
1485 	// For full lock areas reserve the pages before locking the address
1486 	// space. E.g. block caches can't release their memory while we hold the
1487 	// address space lock.
1488 	page_num_t reservedPages = reservedMapPages;
1489 	if (wiring == B_FULL_LOCK)
1490 		reservedPages += size / B_PAGE_SIZE;
1491 
1492 	vm_page_reservation reservation;
1493 	if (reservedPages > 0) {
1494 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1495 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1496 					priority)) {
1497 				reservedPages = 0;
1498 				status = B_WOULD_BLOCK;
1499 				goto err0;
1500 			}
1501 		} else
1502 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1503 	}
1504 
1505 	if (wiring == B_CONTIGUOUS) {
1506 		// we try to allocate the page run here upfront as this may easily
1507 		// fail for obvious reasons
1508 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1509 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1510 		if (page == NULL) {
1511 			status = B_NO_MEMORY;
1512 			goto err0;
1513 		}
1514 	}
1515 
1516 	// Lock the address space and, if B_EXACT_ADDRESS and
1517 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1518 	// is not wired.
1519 	do {
1520 		status = locker.SetTo(team);
1521 		if (status != B_OK)
1522 			goto err1;
1523 
1524 		addressSpace = locker.AddressSpace();
1525 	} while (virtualAddressRestrictions->address_specification
1526 			== B_EXACT_ADDRESS
1527 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1528 		&& wait_if_address_range_is_wired(addressSpace,
1529 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1530 
1531 	// create an anonymous cache
1532 	// if it's a stack, make sure that two pages are available at least
1533 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1534 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1535 		wiring == B_NO_LOCK, priority);
1536 	if (status != B_OK)
1537 		goto err1;
1538 
1539 	cache->temporary = 1;
1540 	cache->virtual_end = size;
1541 	cache->committed_size = reservedMemory;
1542 		// TODO: This should be done via a method.
1543 	reservedMemory = 0;
1544 
1545 	cache->Lock();
1546 
1547 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1548 		protection, 0, REGION_NO_PRIVATE_MAP, flags,
1549 		virtualAddressRestrictions, kernel, &area, _address);
1550 
1551 	if (status != B_OK) {
1552 		cache->ReleaseRefAndUnlock();
1553 		goto err1;
1554 	}
1555 
1556 	locker.DegradeToReadLock();
1557 
1558 	switch (wiring) {
1559 		case B_NO_LOCK:
1560 		case B_LAZY_LOCK:
1561 			// do nothing - the pages are mapped in as needed
1562 			break;
1563 
1564 		case B_FULL_LOCK:
1565 		{
1566 			// Allocate and map all pages for this area
1567 
1568 			off_t offset = 0;
1569 			for (addr_t address = area->Base();
1570 					address < area->Base() + (area->Size() - 1);
1571 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1572 #ifdef DEBUG_KERNEL_STACKS
1573 #	ifdef STACK_GROWS_DOWNWARDS
1574 				if (isStack && address < area->Base()
1575 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1576 #	else
1577 				if (isStack && address >= area->Base() + area->Size()
1578 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1579 #	endif
1580 					continue;
1581 #endif
1582 				vm_page* page = vm_page_allocate_page(&reservation,
1583 					PAGE_STATE_WIRED | pageAllocFlags);
1584 				cache->InsertPage(page, offset);
1585 				map_page(area, page, address, protection, &reservation);
1586 
1587 				DEBUG_PAGE_ACCESS_END(page);
1588 			}
1589 
1590 			break;
1591 		}
1592 
1593 		case B_ALREADY_WIRED:
1594 		{
1595 			// The pages should already be mapped. This is only really useful
1596 			// during boot time. Find the appropriate vm_page objects and stick
1597 			// them in the cache object.
1598 			VMTranslationMap* map = addressSpace->TranslationMap();
1599 			off_t offset = 0;
1600 
1601 			if (!gKernelStartup)
1602 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1603 
1604 			map->Lock();
1605 
1606 			for (addr_t virtualAddress = area->Base();
1607 					virtualAddress < area->Base() + (area->Size() - 1);
1608 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1609 				phys_addr_t physicalAddress;
1610 				uint32 flags;
1611 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1612 				if (status < B_OK) {
1613 					panic("looking up mapping failed for va 0x%lx\n",
1614 						virtualAddress);
1615 				}
1616 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1617 				if (page == NULL) {
1618 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1619 						"\n", physicalAddress);
1620 				}
1621 
1622 				DEBUG_PAGE_ACCESS_START(page);
1623 
1624 				cache->InsertPage(page, offset);
1625 				increment_page_wired_count(page);
1626 				vm_page_set_state(page, PAGE_STATE_WIRED);
1627 				page->busy = false;
1628 
1629 				DEBUG_PAGE_ACCESS_END(page);
1630 			}
1631 
1632 			map->Unlock();
1633 			break;
1634 		}
1635 
1636 		case B_CONTIGUOUS:
1637 		{
1638 			// We have already allocated our continuous pages run, so we can now
1639 			// just map them in the address space
1640 			VMTranslationMap* map = addressSpace->TranslationMap();
1641 			phys_addr_t physicalAddress
1642 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1643 			addr_t virtualAddress = area->Base();
1644 			off_t offset = 0;
1645 
1646 			map->Lock();
1647 
1648 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1649 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1650 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1651 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1652 				if (page == NULL)
1653 					panic("couldn't lookup physical page just allocated\n");
1654 
1655 				status = map->Map(virtualAddress, physicalAddress, protection,
1656 					area->MemoryType(), &reservation);
1657 				if (status < B_OK)
1658 					panic("couldn't map physical page in page run\n");
1659 
1660 				cache->InsertPage(page, offset);
1661 				increment_page_wired_count(page);
1662 
1663 				DEBUG_PAGE_ACCESS_END(page);
1664 			}
1665 
1666 			map->Unlock();
1667 			break;
1668 		}
1669 
1670 		default:
1671 			break;
1672 	}
1673 
1674 	cache->Unlock();
1675 
1676 	if (reservedPages > 0)
1677 		vm_page_unreserve_pages(&reservation);
1678 
1679 	TRACE(("vm_create_anonymous_area: done\n"));
1680 
1681 	area->cache_type = CACHE_TYPE_RAM;
1682 	return area->id;
1683 
1684 err1:
1685 	if (wiring == B_CONTIGUOUS) {
1686 		// we had reserved the area space upfront...
1687 		phys_addr_t pageNumber = page->physical_page_number;
1688 		int32 i;
1689 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1690 			page = vm_lookup_page(pageNumber);
1691 			if (page == NULL)
1692 				panic("couldn't lookup physical page just allocated\n");
1693 
1694 			vm_page_set_state(page, PAGE_STATE_FREE);
1695 		}
1696 	}
1697 
1698 err0:
1699 	if (reservedPages > 0)
1700 		vm_page_unreserve_pages(&reservation);
1701 	if (reservedMemory > 0)
1702 		vm_unreserve_memory(reservedMemory);
1703 
1704 	return status;
1705 }
1706 
1707 
1708 area_id
1709 vm_map_physical_memory(team_id team, const char* name, void** _address,
1710 	uint32 addressSpec, addr_t size, uint32 protection,
1711 	phys_addr_t physicalAddress, bool alreadyWired)
1712 {
1713 	VMArea* area;
1714 	VMCache* cache;
1715 	addr_t mapOffset;
1716 
1717 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1718 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1719 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1720 		addressSpec, size, protection, physicalAddress));
1721 
1722 	if (!arch_vm_supports_protection(protection))
1723 		return B_NOT_SUPPORTED;
1724 
1725 	AddressSpaceWriteLocker locker(team);
1726 	if (!locker.IsLocked())
1727 		return B_BAD_TEAM_ID;
1728 
1729 	// if the physical address is somewhat inside a page,
1730 	// move the actual area down to align on a page boundary
1731 	mapOffset = physicalAddress % B_PAGE_SIZE;
1732 	size += mapOffset;
1733 	physicalAddress -= mapOffset;
1734 
1735 	size = PAGE_ALIGN(size);
1736 
1737 	// create a device cache
1738 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1739 	if (status != B_OK)
1740 		return status;
1741 
1742 	cache->virtual_end = size;
1743 
1744 	cache->Lock();
1745 
1746 	virtual_address_restrictions addressRestrictions = {};
1747 	addressRestrictions.address = *_address;
1748 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1749 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1750 		B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1751 		true, &area, _address);
1752 
1753 	if (status < B_OK)
1754 		cache->ReleaseRefLocked();
1755 
1756 	cache->Unlock();
1757 
1758 	if (status == B_OK) {
1759 		// set requested memory type -- use uncached, if not given
1760 		uint32 memoryType = addressSpec & B_MTR_MASK;
1761 		if (memoryType == 0)
1762 			memoryType = B_MTR_UC;
1763 
1764 		area->SetMemoryType(memoryType);
1765 
1766 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1767 		if (status != B_OK)
1768 			delete_area(locker.AddressSpace(), area, false);
1769 	}
1770 
1771 	if (status != B_OK)
1772 		return status;
1773 
1774 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1775 
1776 	if (alreadyWired) {
1777 		// The area is already mapped, but possibly not with the right
1778 		// memory type.
1779 		map->Lock();
1780 		map->ProtectArea(area, area->protection);
1781 		map->Unlock();
1782 	} else {
1783 		// Map the area completely.
1784 
1785 		// reserve pages needed for the mapping
1786 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1787 			area->Base() + (size - 1));
1788 		vm_page_reservation reservation;
1789 		vm_page_reserve_pages(&reservation, reservePages,
1790 			team == VMAddressSpace::KernelID()
1791 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1792 
1793 		map->Lock();
1794 
1795 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1796 			map->Map(area->Base() + offset, physicalAddress + offset,
1797 				protection, area->MemoryType(), &reservation);
1798 		}
1799 
1800 		map->Unlock();
1801 
1802 		vm_page_unreserve_pages(&reservation);
1803 	}
1804 
1805 	// modify the pointer returned to be offset back into the new area
1806 	// the same way the physical address in was offset
1807 	*_address = (void*)((addr_t)*_address + mapOffset);
1808 
1809 	area->cache_type = CACHE_TYPE_DEVICE;
1810 	return area->id;
1811 }
1812 
1813 
1814 /*!	Don't use!
1815 	TODO: This function was introduced to map physical page vecs to
1816 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1817 	use a device cache and does not track vm_page::wired_count!
1818 */
1819 area_id
1820 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1821 	uint32 addressSpec, addr_t* _size, uint32 protection,
1822 	struct generic_io_vec* vecs, uint32 vecCount)
1823 {
1824 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1825 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1826 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1827 		addressSpec, _size, protection, vecs, vecCount));
1828 
1829 	if (!arch_vm_supports_protection(protection)
1830 		|| (addressSpec & B_MTR_MASK) != 0) {
1831 		return B_NOT_SUPPORTED;
1832 	}
1833 
1834 	AddressSpaceWriteLocker locker(team);
1835 	if (!locker.IsLocked())
1836 		return B_BAD_TEAM_ID;
1837 
1838 	if (vecCount == 0)
1839 		return B_BAD_VALUE;
1840 
1841 	addr_t size = 0;
1842 	for (uint32 i = 0; i < vecCount; i++) {
1843 		if (vecs[i].base % B_PAGE_SIZE != 0
1844 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1845 			return B_BAD_VALUE;
1846 		}
1847 
1848 		size += vecs[i].length;
1849 	}
1850 
1851 	// create a device cache
1852 	VMCache* cache;
1853 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1854 	if (result != B_OK)
1855 		return result;
1856 
1857 	cache->virtual_end = size;
1858 
1859 	cache->Lock();
1860 
1861 	VMArea* area;
1862 	virtual_address_restrictions addressRestrictions = {};
1863 	addressRestrictions.address = *_address;
1864 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1865 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1866 		size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0,
1867 		&addressRestrictions, true, &area, _address);
1868 
1869 	if (result != B_OK)
1870 		cache->ReleaseRefLocked();
1871 
1872 	cache->Unlock();
1873 
1874 	if (result != B_OK)
1875 		return result;
1876 
1877 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1878 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1879 		area->Base() + (size - 1));
1880 
1881 	vm_page_reservation reservation;
1882 	vm_page_reserve_pages(&reservation, reservePages,
1883 			team == VMAddressSpace::KernelID()
1884 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1885 	map->Lock();
1886 
1887 	uint32 vecIndex = 0;
1888 	size_t vecOffset = 0;
1889 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1890 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1891 			vecOffset = 0;
1892 			vecIndex++;
1893 		}
1894 
1895 		if (vecIndex >= vecCount)
1896 			break;
1897 
1898 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1899 			protection, area->MemoryType(), &reservation);
1900 
1901 		vecOffset += B_PAGE_SIZE;
1902 	}
1903 
1904 	map->Unlock();
1905 	vm_page_unreserve_pages(&reservation);
1906 
1907 	if (_size != NULL)
1908 		*_size = size;
1909 
1910 	area->cache_type = CACHE_TYPE_DEVICE;
1911 	return area->id;
1912 }
1913 
1914 
1915 area_id
1916 vm_create_null_area(team_id team, const char* name, void** address,
1917 	uint32 addressSpec, addr_t size, uint32 flags)
1918 {
1919 	size = PAGE_ALIGN(size);
1920 
1921 	// Lock the address space and, if B_EXACT_ADDRESS and
1922 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1923 	// is not wired.
1924 	AddressSpaceWriteLocker locker;
1925 	do {
1926 		if (locker.SetTo(team) != B_OK)
1927 			return B_BAD_TEAM_ID;
1928 	} while (addressSpec == B_EXACT_ADDRESS
1929 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1930 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1931 			(addr_t)*address, size, &locker));
1932 
1933 	// create a null cache
1934 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1935 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1936 	VMCache* cache;
1937 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1938 	if (status != B_OK)
1939 		return status;
1940 
1941 	cache->temporary = 1;
1942 	cache->virtual_end = size;
1943 
1944 	cache->Lock();
1945 
1946 	VMArea* area;
1947 	virtual_address_restrictions addressRestrictions = {};
1948 	addressRestrictions.address = *address;
1949 	addressRestrictions.address_specification = addressSpec;
1950 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1951 		B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA,
1952 		REGION_NO_PRIVATE_MAP, flags,
1953 		&addressRestrictions, true, &area, address);
1954 
1955 	if (status < B_OK) {
1956 		cache->ReleaseRefAndUnlock();
1957 		return status;
1958 	}
1959 
1960 	cache->Unlock();
1961 
1962 	area->cache_type = CACHE_TYPE_NULL;
1963 	return area->id;
1964 }
1965 
1966 
1967 /*!	Creates the vnode cache for the specified \a vnode.
1968 	The vnode has to be marked busy when calling this function.
1969 */
1970 status_t
1971 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1972 {
1973 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1974 }
1975 
1976 
1977 /*!	\a cache must be locked. The area's address space must be read-locked.
1978 */
1979 static void
1980 pre_map_area_pages(VMArea* area, VMCache* cache,
1981 	vm_page_reservation* reservation)
1982 {
1983 	addr_t baseAddress = area->Base();
1984 	addr_t cacheOffset = area->cache_offset;
1985 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1986 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1987 
1988 	for (VMCachePagesTree::Iterator it
1989 				= cache->pages.GetIterator(firstPage, true, true);
1990 			vm_page* page = it.Next();) {
1991 		if (page->cache_offset >= endPage)
1992 			break;
1993 
1994 		// skip busy and inactive pages
1995 		if (page->busy || page->usage_count == 0)
1996 			continue;
1997 
1998 		DEBUG_PAGE_ACCESS_START(page);
1999 		map_page(area, page,
2000 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
2001 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
2002 		DEBUG_PAGE_ACCESS_END(page);
2003 	}
2004 }
2005 
2006 
2007 /*!	Will map the file specified by \a fd to an area in memory.
2008 	The file will be mirrored beginning at the specified \a offset. The
2009 	\a offset and \a size arguments have to be page aligned.
2010 */
2011 static area_id
2012 _vm_map_file(team_id team, const char* name, void** _address,
2013 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2014 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2015 {
2016 	// TODO: for binary files, we want to make sure that they get the
2017 	//	copy of a file at a given time, ie. later changes should not
2018 	//	make it into the mapped copy -- this will need quite some changes
2019 	//	to be done in a nice way
2020 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
2021 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
2022 
2023 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2024 	size = PAGE_ALIGN(size);
2025 
2026 	if (mapping == REGION_NO_PRIVATE_MAP)
2027 		protection |= B_SHARED_AREA;
2028 	if (addressSpec != B_EXACT_ADDRESS)
2029 		unmapAddressRange = false;
2030 
2031 	if (fd < 0) {
2032 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
2033 		virtual_address_restrictions virtualRestrictions = {};
2034 		virtualRestrictions.address = *_address;
2035 		virtualRestrictions.address_specification = addressSpec;
2036 		physical_address_restrictions physicalRestrictions = {};
2037 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
2038 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
2039 			_address);
2040 	}
2041 
2042 	// get the open flags of the FD
2043 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2044 	if (descriptor == NULL)
2045 		return EBADF;
2046 	int32 openMode = descriptor->open_mode;
2047 	put_fd(descriptor);
2048 
2049 	// The FD must open for reading at any rate. For shared mapping with write
2050 	// access, additionally the FD must be open for writing.
2051 	if ((openMode & O_ACCMODE) == O_WRONLY
2052 		|| (mapping == REGION_NO_PRIVATE_MAP
2053 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2054 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2055 		return EACCES;
2056 	}
2057 
2058 	uint32 protectionMax = 0;
2059 	if (mapping != REGION_PRIVATE_MAP) {
2060 		protectionMax = protection | B_READ_AREA;
2061 		if ((openMode & O_ACCMODE) == O_RDWR)
2062 			protectionMax |= B_WRITE_AREA;
2063 	}
2064 
2065 	// get the vnode for the object, this also grabs a ref to it
2066 	struct vnode* vnode = NULL;
2067 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2068 	if (status < B_OK)
2069 		return status;
2070 	VnodePutter vnodePutter(vnode);
2071 
2072 	// If we're going to pre-map pages, we need to reserve the pages needed by
2073 	// the mapping backend upfront.
2074 	page_num_t reservedPreMapPages = 0;
2075 	vm_page_reservation reservation;
2076 	if ((protection & B_READ_AREA) != 0) {
2077 		AddressSpaceWriteLocker locker;
2078 		status = locker.SetTo(team);
2079 		if (status != B_OK)
2080 			return status;
2081 
2082 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2083 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2084 
2085 		locker.Unlock();
2086 
2087 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2088 			team == VMAddressSpace::KernelID()
2089 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2090 	}
2091 
2092 	struct PageUnreserver {
2093 		PageUnreserver(vm_page_reservation* reservation)
2094 			:
2095 			fReservation(reservation)
2096 		{
2097 		}
2098 
2099 		~PageUnreserver()
2100 		{
2101 			if (fReservation != NULL)
2102 				vm_page_unreserve_pages(fReservation);
2103 		}
2104 
2105 		vm_page_reservation* fReservation;
2106 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2107 
2108 	// Lock the address space and, if the specified address range shall be
2109 	// unmapped, ensure it is not wired.
2110 	AddressSpaceWriteLocker locker;
2111 	do {
2112 		if (locker.SetTo(team) != B_OK)
2113 			return B_BAD_TEAM_ID;
2114 	} while (unmapAddressRange
2115 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2116 			(addr_t)*_address, size, &locker));
2117 
2118 	// TODO: this only works for file systems that use the file cache
2119 	VMCache* cache;
2120 	status = vfs_get_vnode_cache(vnode, &cache, false);
2121 	if (status < B_OK)
2122 		return status;
2123 
2124 	cache->Lock();
2125 
2126 	VMArea* area;
2127 	virtual_address_restrictions addressRestrictions = {};
2128 	addressRestrictions.address = *_address;
2129 	addressRestrictions.address_specification = addressSpec;
2130 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2131 		0, protection, protectionMax, mapping,
2132 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
2133 		&addressRestrictions, kernel, &area, _address);
2134 
2135 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2136 		// map_backing_store() cannot know we no longer need the ref
2137 		cache->ReleaseRefLocked();
2138 	}
2139 
2140 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2141 		pre_map_area_pages(area, cache, &reservation);
2142 
2143 	cache->Unlock();
2144 
2145 	if (status == B_OK) {
2146 		// TODO: this probably deserves a smarter solution, ie. don't always
2147 		// prefetch stuff, and also, probably don't trigger it at this place.
2148 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2149 			// prefetches at max 10 MB starting from "offset"
2150 	}
2151 
2152 	if (status != B_OK)
2153 		return status;
2154 
2155 	area->cache_type = CACHE_TYPE_VNODE;
2156 	return area->id;
2157 }
2158 
2159 
2160 area_id
2161 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2162 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2163 	int fd, off_t offset)
2164 {
2165 	if (!arch_vm_supports_protection(protection))
2166 		return B_NOT_SUPPORTED;
2167 
2168 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2169 		mapping, unmapAddressRange, fd, offset, true);
2170 }
2171 
2172 
2173 VMCache*
2174 vm_area_get_locked_cache(VMArea* area)
2175 {
2176 	rw_lock_read_lock(&sAreaCacheLock);
2177 
2178 	while (true) {
2179 		VMCache* cache = area->cache;
2180 
2181 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2182 			// cache has been deleted
2183 			rw_lock_read_lock(&sAreaCacheLock);
2184 			continue;
2185 		}
2186 
2187 		rw_lock_read_lock(&sAreaCacheLock);
2188 
2189 		if (cache == area->cache) {
2190 			cache->AcquireRefLocked();
2191 			rw_lock_read_unlock(&sAreaCacheLock);
2192 			return cache;
2193 		}
2194 
2195 		// the cache changed in the meantime
2196 		cache->Unlock();
2197 	}
2198 }
2199 
2200 
2201 void
2202 vm_area_put_locked_cache(VMCache* cache)
2203 {
2204 	cache->ReleaseRefAndUnlock();
2205 }
2206 
2207 
2208 area_id
2209 vm_clone_area(team_id team, const char* name, void** address,
2210 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2211 	bool kernel)
2212 {
2213 	VMArea* newArea = NULL;
2214 	VMArea* sourceArea;
2215 
2216 	// Check whether the source area exists and is cloneable. If so, mark it
2217 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2218 	{
2219 		AddressSpaceWriteLocker locker;
2220 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2221 		if (status != B_OK)
2222 			return status;
2223 
2224 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2225 			return B_NOT_ALLOWED;
2226 
2227 		sourceArea->protection |= B_SHARED_AREA;
2228 		protection |= B_SHARED_AREA;
2229 	}
2230 
2231 	// Now lock both address spaces and actually do the cloning.
2232 
2233 	MultiAddressSpaceLocker locker;
2234 	VMAddressSpace* sourceAddressSpace;
2235 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2236 	if (status != B_OK)
2237 		return status;
2238 
2239 	VMAddressSpace* targetAddressSpace;
2240 	status = locker.AddTeam(team, true, &targetAddressSpace);
2241 	if (status != B_OK)
2242 		return status;
2243 
2244 	status = locker.Lock();
2245 	if (status != B_OK)
2246 		return status;
2247 
2248 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2249 	if (sourceArea == NULL)
2250 		return B_BAD_VALUE;
2251 
2252 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2253 		return B_NOT_ALLOWED;
2254 
2255 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2256 
2257 	if (!kernel && sourceAddressSpace != targetAddressSpace
2258 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2259 #if KDEBUG
2260 		Team* team = thread_get_current_thread()->team;
2261 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2262 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2263 #endif
2264 		status = B_NOT_ALLOWED;
2265 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2266 		status = B_NOT_ALLOWED;
2267 	} else {
2268 		virtual_address_restrictions addressRestrictions = {};
2269 		addressRestrictions.address = *address;
2270 		addressRestrictions.address_specification = addressSpec;
2271 		status = map_backing_store(targetAddressSpace, cache,
2272 			sourceArea->cache_offset, name, sourceArea->Size(),
2273 			sourceArea->wiring, protection, sourceArea->protection_max,
2274 			mapping, 0, &addressRestrictions,
2275 			kernel, &newArea, address);
2276 	}
2277 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2278 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2279 		// to create a new cache, and has therefore already acquired a reference
2280 		// to the source cache - but otherwise it has no idea that we need
2281 		// one.
2282 		cache->AcquireRefLocked();
2283 	}
2284 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2285 		// we need to map in everything at this point
2286 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2287 			// we don't have actual pages to map but a physical area
2288 			VMTranslationMap* map
2289 				= sourceArea->address_space->TranslationMap();
2290 			map->Lock();
2291 
2292 			phys_addr_t physicalAddress;
2293 			uint32 oldProtection;
2294 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2295 
2296 			map->Unlock();
2297 
2298 			map = targetAddressSpace->TranslationMap();
2299 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2300 				newArea->Base() + (newArea->Size() - 1));
2301 
2302 			vm_page_reservation reservation;
2303 			vm_page_reserve_pages(&reservation, reservePages,
2304 				targetAddressSpace == VMAddressSpace::Kernel()
2305 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2306 			map->Lock();
2307 
2308 			for (addr_t offset = 0; offset < newArea->Size();
2309 					offset += B_PAGE_SIZE) {
2310 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2311 					protection, newArea->MemoryType(), &reservation);
2312 			}
2313 
2314 			map->Unlock();
2315 			vm_page_unreserve_pages(&reservation);
2316 		} else {
2317 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2318 			size_t reservePages = map->MaxPagesNeededToMap(
2319 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2320 			vm_page_reservation reservation;
2321 			vm_page_reserve_pages(&reservation, reservePages,
2322 				targetAddressSpace == VMAddressSpace::Kernel()
2323 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2324 
2325 			// map in all pages from source
2326 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2327 					vm_page* page  = it.Next();) {
2328 				if (!page->busy) {
2329 					DEBUG_PAGE_ACCESS_START(page);
2330 					map_page(newArea, page,
2331 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2332 							- newArea->cache_offset),
2333 						protection, &reservation);
2334 					DEBUG_PAGE_ACCESS_END(page);
2335 				}
2336 			}
2337 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2338 			// ensuring that!
2339 
2340 			vm_page_unreserve_pages(&reservation);
2341 		}
2342 	}
2343 	if (status == B_OK)
2344 		newArea->cache_type = sourceArea->cache_type;
2345 
2346 	vm_area_put_locked_cache(cache);
2347 
2348 	if (status < B_OK)
2349 		return status;
2350 
2351 	return newArea->id;
2352 }
2353 
2354 
2355 /*!	Deletes the specified area of the given address space.
2356 
2357 	The address space must be write-locked.
2358 	The caller must ensure that the area does not have any wired ranges.
2359 
2360 	\param addressSpace The address space containing the area.
2361 	\param area The area to be deleted.
2362 	\param deletingAddressSpace \c true, if the address space is in the process
2363 		of being deleted.
2364 */
2365 static void
2366 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2367 	bool deletingAddressSpace)
2368 {
2369 	ASSERT(!area->IsWired());
2370 
2371 	VMAreaHash::Remove(area);
2372 
2373 	// At this point the area is removed from the global hash table, but
2374 	// still exists in the area list.
2375 
2376 	// Unmap the virtual address space the area occupied.
2377 	{
2378 		// We need to lock the complete cache chain.
2379 		VMCache* topCache = vm_area_get_locked_cache(area);
2380 		VMCacheChainLocker cacheChainLocker(topCache);
2381 		cacheChainLocker.LockAllSourceCaches();
2382 
2383 		// If the area's top cache is a temporary cache and the area is the only
2384 		// one referencing it (besides us currently holding a second reference),
2385 		// the unmapping code doesn't need to care about preserving the accessed
2386 		// and dirty flags of the top cache page mappings.
2387 		bool ignoreTopCachePageFlags
2388 			= topCache->temporary && topCache->RefCount() == 2;
2389 
2390 		area->address_space->TranslationMap()->UnmapArea(area,
2391 			deletingAddressSpace, ignoreTopCachePageFlags);
2392 	}
2393 
2394 	if (!area->cache->temporary)
2395 		area->cache->WriteModified();
2396 
2397 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2398 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2399 
2400 	arch_vm_unset_memory_type(area);
2401 	addressSpace->RemoveArea(area, allocationFlags);
2402 	addressSpace->Put();
2403 
2404 	area->cache->RemoveArea(area);
2405 	area->cache->ReleaseRef();
2406 
2407 	addressSpace->DeleteArea(area, allocationFlags);
2408 }
2409 
2410 
2411 status_t
2412 vm_delete_area(team_id team, area_id id, bool kernel)
2413 {
2414 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2415 		team, id));
2416 
2417 	// lock the address space and make sure the area isn't wired
2418 	AddressSpaceWriteLocker locker;
2419 	VMArea* area;
2420 	AreaCacheLocker cacheLocker;
2421 
2422 	do {
2423 		status_t status = locker.SetFromArea(team, id, area);
2424 		if (status != B_OK)
2425 			return status;
2426 
2427 		cacheLocker.SetTo(area);
2428 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2429 
2430 	cacheLocker.Unlock();
2431 
2432 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2433 		return B_NOT_ALLOWED;
2434 
2435 	delete_area(locker.AddressSpace(), area, false);
2436 	return B_OK;
2437 }
2438 
2439 
2440 /*!	Creates a new cache on top of given cache, moves all areas from
2441 	the old cache to the new one, and changes the protection of all affected
2442 	areas' pages to read-only. If requested, wired pages are moved up to the
2443 	new cache and copies are added to the old cache in their place.
2444 	Preconditions:
2445 	- The given cache must be locked.
2446 	- All of the cache's areas' address spaces must be read locked.
2447 	- Either the cache must not have any wired ranges or a page reservation for
2448 	  all wired pages must be provided, so they can be copied.
2449 
2450 	\param lowerCache The cache on top of which a new cache shall be created.
2451 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2452 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2453 		has wired page. The wired pages are copied in this case.
2454 */
2455 static status_t
2456 vm_copy_on_write_area(VMCache* lowerCache,
2457 	vm_page_reservation* wiredPagesReservation)
2458 {
2459 	VMCache* upperCache;
2460 
2461 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2462 
2463 	// We need to separate the cache from its areas. The cache goes one level
2464 	// deeper and we create a new cache inbetween.
2465 
2466 	// create an anonymous cache
2467 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2468 		lowerCache->GuardSize() / B_PAGE_SIZE,
2469 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2470 		VM_PRIORITY_USER);
2471 	if (status != B_OK)
2472 		return status;
2473 
2474 	upperCache->Lock();
2475 
2476 	upperCache->temporary = 1;
2477 	upperCache->virtual_base = lowerCache->virtual_base;
2478 	upperCache->virtual_end = lowerCache->virtual_end;
2479 
2480 	// transfer the lower cache areas to the upper cache
2481 	rw_lock_write_lock(&sAreaCacheLock);
2482 	upperCache->TransferAreas(lowerCache);
2483 	rw_lock_write_unlock(&sAreaCacheLock);
2484 
2485 	lowerCache->AddConsumer(upperCache);
2486 
2487 	// We now need to remap all pages from all of the cache's areas read-only,
2488 	// so that a copy will be created on next write access. If there are wired
2489 	// pages, we keep their protection, move them to the upper cache and create
2490 	// copies for the lower cache.
2491 	if (wiredPagesReservation != NULL) {
2492 		// We need to handle wired pages -- iterate through the cache's pages.
2493 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2494 				vm_page* page = it.Next();) {
2495 			if (page->WiredCount() > 0) {
2496 				// allocate a new page and copy the wired one
2497 				vm_page* copiedPage = vm_page_allocate_page(
2498 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2499 
2500 				vm_memcpy_physical_page(
2501 					copiedPage->physical_page_number * B_PAGE_SIZE,
2502 					page->physical_page_number * B_PAGE_SIZE);
2503 
2504 				// move the wired page to the upper cache (note: removing is OK
2505 				// with the SplayTree iterator) and insert the copy
2506 				upperCache->MovePage(page);
2507 				lowerCache->InsertPage(copiedPage,
2508 					page->cache_offset * B_PAGE_SIZE);
2509 
2510 				DEBUG_PAGE_ACCESS_END(copiedPage);
2511 			} else {
2512 				// Change the protection of this page in all areas.
2513 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2514 						tempArea = tempArea->cache_next) {
2515 					// The area must be readable in the same way it was
2516 					// previously writable.
2517 					uint32 protection = B_KERNEL_READ_AREA;
2518 					if ((tempArea->protection & B_READ_AREA) != 0)
2519 						protection |= B_READ_AREA;
2520 
2521 					VMTranslationMap* map
2522 						= tempArea->address_space->TranslationMap();
2523 					map->Lock();
2524 					map->ProtectPage(tempArea,
2525 						virtual_page_address(tempArea, page), protection);
2526 					map->Unlock();
2527 				}
2528 			}
2529 		}
2530 	} else {
2531 		ASSERT(lowerCache->WiredPagesCount() == 0);
2532 
2533 		// just change the protection of all areas
2534 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2535 				tempArea = tempArea->cache_next) {
2536 			// The area must be readable in the same way it was previously
2537 			// writable.
2538 			uint32 protection = B_KERNEL_READ_AREA;
2539 			if ((tempArea->protection & B_READ_AREA) != 0)
2540 				protection |= B_READ_AREA;
2541 
2542 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2543 			map->Lock();
2544 			map->ProtectArea(tempArea, protection);
2545 			map->Unlock();
2546 		}
2547 	}
2548 
2549 	vm_area_put_locked_cache(upperCache);
2550 
2551 	return B_OK;
2552 }
2553 
2554 
2555 area_id
2556 vm_copy_area(team_id team, const char* name, void** _address,
2557 	uint32 addressSpec, area_id sourceID)
2558 {
2559 	// Do the locking: target address space, all address spaces associated with
2560 	// the source cache, and the cache itself.
2561 	MultiAddressSpaceLocker locker;
2562 	VMAddressSpace* targetAddressSpace;
2563 	VMCache* cache;
2564 	VMArea* source;
2565 	AreaCacheLocker cacheLocker;
2566 	status_t status;
2567 	bool sharedArea;
2568 
2569 	page_num_t wiredPages = 0;
2570 	vm_page_reservation wiredPagesReservation;
2571 
2572 	bool restart;
2573 	do {
2574 		restart = false;
2575 
2576 		locker.Unset();
2577 		status = locker.AddTeam(team, true, &targetAddressSpace);
2578 		if (status == B_OK) {
2579 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2580 				&cache);
2581 		}
2582 		if (status != B_OK)
2583 			return status;
2584 
2585 		cacheLocker.SetTo(cache, true);	// already locked
2586 
2587 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2588 
2589 		page_num_t oldWiredPages = wiredPages;
2590 		wiredPages = 0;
2591 
2592 		// If the source area isn't shared, count the number of wired pages in
2593 		// the cache and reserve as many pages.
2594 		if (!sharedArea) {
2595 			wiredPages = cache->WiredPagesCount();
2596 
2597 			if (wiredPages > oldWiredPages) {
2598 				cacheLocker.Unlock();
2599 				locker.Unlock();
2600 
2601 				if (oldWiredPages > 0)
2602 					vm_page_unreserve_pages(&wiredPagesReservation);
2603 
2604 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2605 					VM_PRIORITY_USER);
2606 
2607 				restart = true;
2608 			}
2609 		} else if (oldWiredPages > 0)
2610 			vm_page_unreserve_pages(&wiredPagesReservation);
2611 	} while (restart);
2612 
2613 	// unreserve pages later
2614 	struct PagesUnreserver {
2615 		PagesUnreserver(vm_page_reservation* reservation)
2616 			:
2617 			fReservation(reservation)
2618 		{
2619 		}
2620 
2621 		~PagesUnreserver()
2622 		{
2623 			if (fReservation != NULL)
2624 				vm_page_unreserve_pages(fReservation);
2625 		}
2626 
2627 	private:
2628 		vm_page_reservation*	fReservation;
2629 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2630 
2631 	bool writableCopy
2632 		= (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2633 	uint8* targetPageProtections = NULL;
2634 
2635 	if (source->page_protections != NULL) {
2636 		size_t bytes = (source->Size() / B_PAGE_SIZE + 1) / 2;
2637 		targetPageProtections = (uint8*)malloc_etc(bytes,
2638 			(source->address_space == VMAddressSpace::Kernel()
2639 					|| targetAddressSpace == VMAddressSpace::Kernel())
2640 				? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
2641 		if (targetPageProtections == NULL)
2642 			return B_NO_MEMORY;
2643 
2644 		memcpy(targetPageProtections, source->page_protections, bytes);
2645 
2646 		if (!writableCopy) {
2647 			for (size_t i = 0; i < bytes; i++) {
2648 				if ((targetPageProtections[i]
2649 						& (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) {
2650 					writableCopy = true;
2651 					break;
2652 				}
2653 			}
2654 		}
2655 	}
2656 
2657 	if (addressSpec == B_CLONE_ADDRESS) {
2658 		addressSpec = B_EXACT_ADDRESS;
2659 		*_address = (void*)source->Base();
2660 	}
2661 
2662 	// First, create a cache on top of the source area, respectively use the
2663 	// existing one, if this is a shared area.
2664 
2665 	VMArea* target;
2666 	virtual_address_restrictions addressRestrictions = {};
2667 	addressRestrictions.address = *_address;
2668 	addressRestrictions.address_specification = addressSpec;
2669 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2670 		name, source->Size(), source->wiring, source->protection,
2671 		source->protection_max,
2672 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2673 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2674 		&addressRestrictions, true, &target, _address);
2675 	if (status < B_OK) {
2676 		free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE);
2677 		return status;
2678 	}
2679 
2680 	if (targetPageProtections != NULL)
2681 		target->page_protections = targetPageProtections;
2682 
2683 	if (sharedArea) {
2684 		// The new area uses the old area's cache, but map_backing_store()
2685 		// hasn't acquired a ref. So we have to do that now.
2686 		cache->AcquireRefLocked();
2687 	}
2688 
2689 	// If the source area is writable, we need to move it one layer up as well
2690 
2691 	if (!sharedArea) {
2692 		if (writableCopy) {
2693 			// TODO: do something more useful if this fails!
2694 			if (vm_copy_on_write_area(cache,
2695 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2696 				panic("vm_copy_on_write_area() failed!\n");
2697 			}
2698 		}
2699 	}
2700 
2701 	// we return the ID of the newly created area
2702 	return target->id;
2703 }
2704 
2705 
2706 status_t
2707 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2708 	bool kernel)
2709 {
2710 	fix_protection(&newProtection);
2711 
2712 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2713 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2714 
2715 	if (!arch_vm_supports_protection(newProtection))
2716 		return B_NOT_SUPPORTED;
2717 
2718 	bool becomesWritable
2719 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2720 
2721 	// lock address spaces and cache
2722 	MultiAddressSpaceLocker locker;
2723 	VMCache* cache;
2724 	VMArea* area;
2725 	status_t status;
2726 	AreaCacheLocker cacheLocker;
2727 	bool isWritable;
2728 
2729 	bool restart;
2730 	do {
2731 		restart = false;
2732 
2733 		locker.Unset();
2734 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2735 		if (status != B_OK)
2736 			return status;
2737 
2738 		cacheLocker.SetTo(cache, true);	// already locked
2739 
2740 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
2741 				|| (area->protection & B_KERNEL_AREA) != 0)) {
2742 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2743 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2744 				" (%s)\n", team, newProtection, areaID, area->name);
2745 			return B_NOT_ALLOWED;
2746 		}
2747 		if (!kernel && area->protection_max != 0
2748 			&& (newProtection & area->protection_max)
2749 				!= (newProtection & B_USER_PROTECTION)) {
2750 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2751 				"set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel "
2752 				"area %" B_PRId32 " (%s)\n", team, newProtection,
2753 				area->protection_max, areaID, area->name);
2754 			return B_NOT_ALLOWED;
2755 		}
2756 
2757 		if (area->protection == newProtection)
2758 			return B_OK;
2759 
2760 		if (team != VMAddressSpace::KernelID()
2761 			&& area->address_space->ID() != team) {
2762 			// unless you're the kernel, you are only allowed to set
2763 			// the protection of your own areas
2764 			return B_NOT_ALLOWED;
2765 		}
2766 
2767 		isWritable
2768 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2769 
2770 		// Make sure the area (respectively, if we're going to call
2771 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2772 		// wired ranges.
2773 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2774 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2775 					otherArea = otherArea->cache_next) {
2776 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2777 					restart = true;
2778 					break;
2779 				}
2780 			}
2781 		} else {
2782 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2783 				restart = true;
2784 		}
2785 	} while (restart);
2786 
2787 	bool changePageProtection = true;
2788 	bool changeTopCachePagesOnly = false;
2789 
2790 	if (isWritable && !becomesWritable) {
2791 		// writable -> !writable
2792 
2793 		if (cache->source != NULL && cache->temporary) {
2794 			if (cache->CountWritableAreas(area) == 0) {
2795 				// Since this cache now lives from the pages in its source cache,
2796 				// we can change the cache's commitment to take only those pages
2797 				// into account that really are in this cache.
2798 
2799 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2800 					team == VMAddressSpace::KernelID()
2801 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2802 
2803 				// TODO: we may be able to join with our source cache, if
2804 				// count == 0
2805 			}
2806 		}
2807 
2808 		// If only the writability changes, we can just remap the pages of the
2809 		// top cache, since the pages of lower caches are mapped read-only
2810 		// anyway. That's advantageous only, if the number of pages in the cache
2811 		// is significantly smaller than the number of pages in the area,
2812 		// though.
2813 		if (newProtection
2814 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2815 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2816 			changeTopCachePagesOnly = true;
2817 		}
2818 	} else if (!isWritable && becomesWritable) {
2819 		// !writable -> writable
2820 
2821 		if (!cache->consumers.IsEmpty()) {
2822 			// There are consumers -- we have to insert a new cache. Fortunately
2823 			// vm_copy_on_write_area() does everything that's needed.
2824 			changePageProtection = false;
2825 			status = vm_copy_on_write_area(cache, NULL);
2826 		} else {
2827 			// No consumers, so we don't need to insert a new one.
2828 			if (cache->source != NULL && cache->temporary) {
2829 				// the cache's commitment must contain all possible pages
2830 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2831 					team == VMAddressSpace::KernelID()
2832 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2833 			}
2834 
2835 			if (status == B_OK && cache->source != NULL) {
2836 				// There's a source cache, hence we can't just change all pages'
2837 				// protection or we might allow writing into pages belonging to
2838 				// a lower cache.
2839 				changeTopCachePagesOnly = true;
2840 			}
2841 		}
2842 	} else {
2843 		// we don't have anything special to do in all other cases
2844 	}
2845 
2846 	if (status == B_OK) {
2847 		// remap existing pages in this cache
2848 		if (changePageProtection) {
2849 			VMTranslationMap* map = area->address_space->TranslationMap();
2850 			map->Lock();
2851 
2852 			if (changeTopCachePagesOnly) {
2853 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2854 				page_num_t lastPageOffset
2855 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2856 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2857 						vm_page* page = it.Next();) {
2858 					if (page->cache_offset >= firstPageOffset
2859 						&& page->cache_offset <= lastPageOffset) {
2860 						addr_t address = virtual_page_address(area, page);
2861 						map->ProtectPage(area, address, newProtection);
2862 					}
2863 				}
2864 			} else
2865 				map->ProtectArea(area, newProtection);
2866 
2867 			map->Unlock();
2868 		}
2869 
2870 		area->protection = newProtection;
2871 	}
2872 
2873 	return status;
2874 }
2875 
2876 
2877 status_t
2878 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2879 {
2880 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2881 	if (addressSpace == NULL)
2882 		return B_BAD_TEAM_ID;
2883 
2884 	VMTranslationMap* map = addressSpace->TranslationMap();
2885 
2886 	map->Lock();
2887 	uint32 dummyFlags;
2888 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2889 	map->Unlock();
2890 
2891 	addressSpace->Put();
2892 	return status;
2893 }
2894 
2895 
2896 /*!	The page's cache must be locked.
2897 */
2898 bool
2899 vm_test_map_modification(vm_page* page)
2900 {
2901 	if (page->modified)
2902 		return true;
2903 
2904 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2905 	vm_page_mapping* mapping;
2906 	while ((mapping = iterator.Next()) != NULL) {
2907 		VMArea* area = mapping->area;
2908 		VMTranslationMap* map = area->address_space->TranslationMap();
2909 
2910 		phys_addr_t physicalAddress;
2911 		uint32 flags;
2912 		map->Lock();
2913 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2914 		map->Unlock();
2915 
2916 		if ((flags & PAGE_MODIFIED) != 0)
2917 			return true;
2918 	}
2919 
2920 	return false;
2921 }
2922 
2923 
2924 /*!	The page's cache must be locked.
2925 */
2926 void
2927 vm_clear_map_flags(vm_page* page, uint32 flags)
2928 {
2929 	if ((flags & PAGE_ACCESSED) != 0)
2930 		page->accessed = false;
2931 	if ((flags & PAGE_MODIFIED) != 0)
2932 		page->modified = false;
2933 
2934 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2935 	vm_page_mapping* mapping;
2936 	while ((mapping = iterator.Next()) != NULL) {
2937 		VMArea* area = mapping->area;
2938 		VMTranslationMap* map = area->address_space->TranslationMap();
2939 
2940 		map->Lock();
2941 		map->ClearFlags(virtual_page_address(area, page), flags);
2942 		map->Unlock();
2943 	}
2944 }
2945 
2946 
2947 /*!	Removes all mappings from a page.
2948 	After you've called this function, the page is unmapped from memory and
2949 	the page's \c accessed and \c modified flags have been updated according
2950 	to the state of the mappings.
2951 	The page's cache must be locked.
2952 */
2953 void
2954 vm_remove_all_page_mappings(vm_page* page)
2955 {
2956 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2957 		VMArea* area = mapping->area;
2958 		VMTranslationMap* map = area->address_space->TranslationMap();
2959 		addr_t address = virtual_page_address(area, page);
2960 		map->UnmapPage(area, address, false);
2961 	}
2962 }
2963 
2964 
2965 int32
2966 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2967 {
2968 	int32 count = 0;
2969 
2970 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2971 	vm_page_mapping* mapping;
2972 	while ((mapping = iterator.Next()) != NULL) {
2973 		VMArea* area = mapping->area;
2974 		VMTranslationMap* map = area->address_space->TranslationMap();
2975 
2976 		bool modified;
2977 		if (map->ClearAccessedAndModified(area,
2978 				virtual_page_address(area, page), false, modified)) {
2979 			count++;
2980 		}
2981 
2982 		page->modified |= modified;
2983 	}
2984 
2985 
2986 	if (page->accessed) {
2987 		count++;
2988 		page->accessed = false;
2989 	}
2990 
2991 	return count;
2992 }
2993 
2994 
2995 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2996 	mappings.
2997 	The function iterates through the page mappings and removes them until
2998 	encountering one that has been accessed. From then on it will continue to
2999 	iterate, but only clear the accessed flag of the mapping. The page's
3000 	\c modified bit will be updated accordingly, the \c accessed bit will be
3001 	cleared.
3002 	\return The number of mapping accessed bits encountered, including the
3003 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
3004 		of the page have been removed.
3005 */
3006 int32
3007 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
3008 {
3009 	ASSERT(page->WiredCount() == 0);
3010 
3011 	if (page->accessed)
3012 		return vm_clear_page_mapping_accessed_flags(page);
3013 
3014 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3015 		VMArea* area = mapping->area;
3016 		VMTranslationMap* map = area->address_space->TranslationMap();
3017 		addr_t address = virtual_page_address(area, page);
3018 		bool modified = false;
3019 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
3020 			page->accessed = true;
3021 			page->modified |= modified;
3022 			return vm_clear_page_mapping_accessed_flags(page);
3023 		}
3024 		page->modified |= modified;
3025 	}
3026 
3027 	return 0;
3028 }
3029 
3030 
3031 static int
3032 display_mem(int argc, char** argv)
3033 {
3034 	bool physical = false;
3035 	addr_t copyAddress;
3036 	int32 displayWidth;
3037 	int32 itemSize;
3038 	int32 num = -1;
3039 	addr_t address;
3040 	int i = 1, j;
3041 
3042 	if (argc > 1 && argv[1][0] == '-') {
3043 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3044 			physical = true;
3045 			i++;
3046 		} else
3047 			i = 99;
3048 	}
3049 
3050 	if (argc < i + 1 || argc > i + 2) {
3051 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3052 			"\tdl - 8 bytes\n"
3053 			"\tdw - 4 bytes\n"
3054 			"\tds - 2 bytes\n"
3055 			"\tdb - 1 byte\n"
3056 			"\tstring - a whole string\n"
3057 			"  -p or --physical only allows memory from a single page to be "
3058 			"displayed.\n");
3059 		return 0;
3060 	}
3061 
3062 	address = parse_expression(argv[i]);
3063 
3064 	if (argc > i + 1)
3065 		num = parse_expression(argv[i + 1]);
3066 
3067 	// build the format string
3068 	if (strcmp(argv[0], "db") == 0) {
3069 		itemSize = 1;
3070 		displayWidth = 16;
3071 	} else if (strcmp(argv[0], "ds") == 0) {
3072 		itemSize = 2;
3073 		displayWidth = 8;
3074 	} else if (strcmp(argv[0], "dw") == 0) {
3075 		itemSize = 4;
3076 		displayWidth = 4;
3077 	} else if (strcmp(argv[0], "dl") == 0) {
3078 		itemSize = 8;
3079 		displayWidth = 2;
3080 	} else if (strcmp(argv[0], "string") == 0) {
3081 		itemSize = 1;
3082 		displayWidth = -1;
3083 	} else {
3084 		kprintf("display_mem called in an invalid way!\n");
3085 		return 0;
3086 	}
3087 
3088 	if (num <= 0)
3089 		num = displayWidth;
3090 
3091 	void* physicalPageHandle = NULL;
3092 
3093 	if (physical) {
3094 		int32 offset = address & (B_PAGE_SIZE - 1);
3095 		if (num * itemSize + offset > B_PAGE_SIZE) {
3096 			num = (B_PAGE_SIZE - offset) / itemSize;
3097 			kprintf("NOTE: number of bytes has been cut to page size\n");
3098 		}
3099 
3100 		address = ROUNDDOWN(address, B_PAGE_SIZE);
3101 
3102 		if (vm_get_physical_page_debug(address, &copyAddress,
3103 				&physicalPageHandle) != B_OK) {
3104 			kprintf("getting the hardware page failed.");
3105 			return 0;
3106 		}
3107 
3108 		address += offset;
3109 		copyAddress += offset;
3110 	} else
3111 		copyAddress = address;
3112 
3113 	if (!strcmp(argv[0], "string")) {
3114 		kprintf("%p \"", (char*)copyAddress);
3115 
3116 		// string mode
3117 		for (i = 0; true; i++) {
3118 			char c;
3119 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
3120 					!= B_OK
3121 				|| c == '\0') {
3122 				break;
3123 			}
3124 
3125 			if (c == '\n')
3126 				kprintf("\\n");
3127 			else if (c == '\t')
3128 				kprintf("\\t");
3129 			else {
3130 				if (!isprint(c))
3131 					c = '.';
3132 
3133 				kprintf("%c", c);
3134 			}
3135 		}
3136 
3137 		kprintf("\"\n");
3138 	} else {
3139 		// number mode
3140 		for (i = 0; i < num; i++) {
3141 			uint64 value;
3142 
3143 			if ((i % displayWidth) == 0) {
3144 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3145 				if (i != 0)
3146 					kprintf("\n");
3147 
3148 				kprintf("[0x%lx]  ", address + i * itemSize);
3149 
3150 				for (j = 0; j < displayed; j++) {
3151 					char c;
3152 					if (debug_memcpy(B_CURRENT_TEAM, &c,
3153 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
3154 						displayed = j;
3155 						break;
3156 					}
3157 					if (!isprint(c))
3158 						c = '.';
3159 
3160 					kprintf("%c", c);
3161 				}
3162 				if (num > displayWidth) {
3163 					// make sure the spacing in the last line is correct
3164 					for (j = displayed; j < displayWidth * itemSize; j++)
3165 						kprintf(" ");
3166 				}
3167 				kprintf("  ");
3168 			}
3169 
3170 			if (debug_memcpy(B_CURRENT_TEAM, &value,
3171 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
3172 				kprintf("read fault");
3173 				break;
3174 			}
3175 
3176 			switch (itemSize) {
3177 				case 1:
3178 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
3179 					break;
3180 				case 2:
3181 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3182 					break;
3183 				case 4:
3184 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3185 					break;
3186 				case 8:
3187 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3188 					break;
3189 			}
3190 		}
3191 
3192 		kprintf("\n");
3193 	}
3194 
3195 	if (physical) {
3196 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3197 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3198 	}
3199 	return 0;
3200 }
3201 
3202 
3203 static void
3204 dump_cache_tree_recursively(VMCache* cache, int level,
3205 	VMCache* highlightCache)
3206 {
3207 	// print this cache
3208 	for (int i = 0; i < level; i++)
3209 		kprintf("  ");
3210 	if (cache == highlightCache)
3211 		kprintf("%p <--\n", cache);
3212 	else
3213 		kprintf("%p\n", cache);
3214 
3215 	// recursively print its consumers
3216 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3217 			VMCache* consumer = it.Next();) {
3218 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3219 	}
3220 }
3221 
3222 
3223 static int
3224 dump_cache_tree(int argc, char** argv)
3225 {
3226 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3227 		kprintf("usage: %s <address>\n", argv[0]);
3228 		return 0;
3229 	}
3230 
3231 	addr_t address = parse_expression(argv[1]);
3232 	if (address == 0)
3233 		return 0;
3234 
3235 	VMCache* cache = (VMCache*)address;
3236 	VMCache* root = cache;
3237 
3238 	// find the root cache (the transitive source)
3239 	while (root->source != NULL)
3240 		root = root->source;
3241 
3242 	dump_cache_tree_recursively(root, 0, cache);
3243 
3244 	return 0;
3245 }
3246 
3247 
3248 const char*
3249 vm_cache_type_to_string(int32 type)
3250 {
3251 	switch (type) {
3252 		case CACHE_TYPE_RAM:
3253 			return "RAM";
3254 		case CACHE_TYPE_DEVICE:
3255 			return "device";
3256 		case CACHE_TYPE_VNODE:
3257 			return "vnode";
3258 		case CACHE_TYPE_NULL:
3259 			return "null";
3260 
3261 		default:
3262 			return "unknown";
3263 	}
3264 }
3265 
3266 
3267 #if DEBUG_CACHE_LIST
3268 
3269 static void
3270 update_cache_info_recursively(VMCache* cache, cache_info& info)
3271 {
3272 	info.page_count += cache->page_count;
3273 	if (cache->type == CACHE_TYPE_RAM)
3274 		info.committed += cache->committed_size;
3275 
3276 	// recurse
3277 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3278 			VMCache* consumer = it.Next();) {
3279 		update_cache_info_recursively(consumer, info);
3280 	}
3281 }
3282 
3283 
3284 static int
3285 cache_info_compare_page_count(const void* _a, const void* _b)
3286 {
3287 	const cache_info* a = (const cache_info*)_a;
3288 	const cache_info* b = (const cache_info*)_b;
3289 	if (a->page_count == b->page_count)
3290 		return 0;
3291 	return a->page_count < b->page_count ? 1 : -1;
3292 }
3293 
3294 
3295 static int
3296 cache_info_compare_committed(const void* _a, const void* _b)
3297 {
3298 	const cache_info* a = (const cache_info*)_a;
3299 	const cache_info* b = (const cache_info*)_b;
3300 	if (a->committed == b->committed)
3301 		return 0;
3302 	return a->committed < b->committed ? 1 : -1;
3303 }
3304 
3305 
3306 static void
3307 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3308 {
3309 	for (int i = 0; i < level; i++)
3310 		kprintf("  ");
3311 
3312 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3313 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3314 		cache->virtual_base, cache->virtual_end, cache->page_count);
3315 
3316 	if (level == 0)
3317 		kprintf("/%lu", info.page_count);
3318 
3319 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3320 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3321 
3322 		if (level == 0)
3323 			kprintf("/%lu", info.committed);
3324 	}
3325 
3326 	// areas
3327 	if (cache->areas != NULL) {
3328 		VMArea* area = cache->areas;
3329 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3330 			area->name, area->address_space->ID());
3331 
3332 		while (area->cache_next != NULL) {
3333 			area = area->cache_next;
3334 			kprintf(", %" B_PRId32, area->id);
3335 		}
3336 	}
3337 
3338 	kputs("\n");
3339 
3340 	// recurse
3341 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3342 			VMCache* consumer = it.Next();) {
3343 		dump_caches_recursively(consumer, info, level + 1);
3344 	}
3345 }
3346 
3347 
3348 static int
3349 dump_caches(int argc, char** argv)
3350 {
3351 	if (sCacheInfoTable == NULL) {
3352 		kprintf("No cache info table!\n");
3353 		return 0;
3354 	}
3355 
3356 	bool sortByPageCount = true;
3357 
3358 	for (int32 i = 1; i < argc; i++) {
3359 		if (strcmp(argv[i], "-c") == 0) {
3360 			sortByPageCount = false;
3361 		} else {
3362 			print_debugger_command_usage(argv[0]);
3363 			return 0;
3364 		}
3365 	}
3366 
3367 	uint32 totalCount = 0;
3368 	uint32 rootCount = 0;
3369 	off_t totalCommitted = 0;
3370 	page_num_t totalPages = 0;
3371 
3372 	VMCache* cache = gDebugCacheList;
3373 	while (cache) {
3374 		totalCount++;
3375 		if (cache->source == NULL) {
3376 			cache_info stackInfo;
3377 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3378 				? sCacheInfoTable[rootCount] : stackInfo;
3379 			rootCount++;
3380 			info.cache = cache;
3381 			info.page_count = 0;
3382 			info.committed = 0;
3383 			update_cache_info_recursively(cache, info);
3384 			totalCommitted += info.committed;
3385 			totalPages += info.page_count;
3386 		}
3387 
3388 		cache = cache->debug_next;
3389 	}
3390 
3391 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3392 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3393 			sortByPageCount
3394 				? &cache_info_compare_page_count
3395 				: &cache_info_compare_committed);
3396 	}
3397 
3398 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3399 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3400 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3401 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3402 			"page count" : "committed size");
3403 
3404 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3405 		for (uint32 i = 0; i < rootCount; i++) {
3406 			cache_info& info = sCacheInfoTable[i];
3407 			dump_caches_recursively(info.cache, info, 0);
3408 		}
3409 	} else
3410 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3411 
3412 	return 0;
3413 }
3414 
3415 #endif	// DEBUG_CACHE_LIST
3416 
3417 
3418 static int
3419 dump_cache(int argc, char** argv)
3420 {
3421 	VMCache* cache;
3422 	bool showPages = false;
3423 	int i = 1;
3424 
3425 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3426 		kprintf("usage: %s [-ps] <address>\n"
3427 			"  if -p is specified, all pages are shown, if -s is used\n"
3428 			"  only the cache info is shown respectively.\n", argv[0]);
3429 		return 0;
3430 	}
3431 	while (argv[i][0] == '-') {
3432 		char* arg = argv[i] + 1;
3433 		while (arg[0]) {
3434 			if (arg[0] == 'p')
3435 				showPages = true;
3436 			arg++;
3437 		}
3438 		i++;
3439 	}
3440 	if (argv[i] == NULL) {
3441 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3442 		return 0;
3443 	}
3444 
3445 	addr_t address = parse_expression(argv[i]);
3446 	if (address == 0)
3447 		return 0;
3448 
3449 	cache = (VMCache*)address;
3450 
3451 	cache->Dump(showPages);
3452 
3453 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3454 
3455 	return 0;
3456 }
3457 
3458 
3459 static void
3460 dump_area_struct(VMArea* area, bool mappings)
3461 {
3462 	kprintf("AREA: %p\n", area);
3463 	kprintf("name:\t\t'%s'\n", area->name);
3464 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3465 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3466 	kprintf("base:\t\t0x%lx\n", area->Base());
3467 	kprintf("size:\t\t0x%lx\n", area->Size());
3468 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3469 	kprintf("page_protection:%p\n", area->page_protections);
3470 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3471 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3472 	kprintf("cache:\t\t%p\n", area->cache);
3473 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3474 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3475 	kprintf("cache_next:\t%p\n", area->cache_next);
3476 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3477 
3478 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3479 	if (mappings) {
3480 		kprintf("page mappings:\n");
3481 		while (iterator.HasNext()) {
3482 			vm_page_mapping* mapping = iterator.Next();
3483 			kprintf("  %p", mapping->page);
3484 		}
3485 		kprintf("\n");
3486 	} else {
3487 		uint32 count = 0;
3488 		while (iterator.Next() != NULL) {
3489 			count++;
3490 		}
3491 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3492 	}
3493 }
3494 
3495 
3496 static int
3497 dump_area(int argc, char** argv)
3498 {
3499 	bool mappings = false;
3500 	bool found = false;
3501 	int32 index = 1;
3502 	VMArea* area;
3503 	addr_t num;
3504 
3505 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3506 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3507 			"All areas matching either id/address/name are listed. You can\n"
3508 			"force to check only a specific item by prefixing the specifier\n"
3509 			"with the id/contains/address/name keywords.\n"
3510 			"-m shows the area's mappings as well.\n");
3511 		return 0;
3512 	}
3513 
3514 	if (!strcmp(argv[1], "-m")) {
3515 		mappings = true;
3516 		index++;
3517 	}
3518 
3519 	int32 mode = 0xf;
3520 	if (!strcmp(argv[index], "id"))
3521 		mode = 1;
3522 	else if (!strcmp(argv[index], "contains"))
3523 		mode = 2;
3524 	else if (!strcmp(argv[index], "name"))
3525 		mode = 4;
3526 	else if (!strcmp(argv[index], "address"))
3527 		mode = 0;
3528 	if (mode != 0xf)
3529 		index++;
3530 
3531 	if (index >= argc) {
3532 		kprintf("No area specifier given.\n");
3533 		return 0;
3534 	}
3535 
3536 	num = parse_expression(argv[index]);
3537 
3538 	if (mode == 0) {
3539 		dump_area_struct((struct VMArea*)num, mappings);
3540 	} else {
3541 		// walk through the area list, looking for the arguments as a name
3542 
3543 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3544 		while ((area = it.Next()) != NULL) {
3545 			if (((mode & 4) != 0
3546 					&& !strcmp(argv[index], area->name))
3547 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3548 					|| (((mode & 2) != 0 && area->Base() <= num
3549 						&& area->Base() + area->Size() > num))))) {
3550 				dump_area_struct(area, mappings);
3551 				found = true;
3552 			}
3553 		}
3554 
3555 		if (!found)
3556 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3557 	}
3558 
3559 	return 0;
3560 }
3561 
3562 
3563 static int
3564 dump_area_list(int argc, char** argv)
3565 {
3566 	VMArea* area;
3567 	const char* name = NULL;
3568 	int32 id = 0;
3569 
3570 	if (argc > 1) {
3571 		id = parse_expression(argv[1]);
3572 		if (id == 0)
3573 			name = argv[1];
3574 	}
3575 
3576 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3577 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3578 		B_PRINTF_POINTER_WIDTH, "size");
3579 
3580 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3581 	while ((area = it.Next()) != NULL) {
3582 		if ((id != 0 && area->address_space->ID() != id)
3583 			|| (name != NULL && strstr(area->name, name) == NULL))
3584 			continue;
3585 
3586 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3587 			area->id, (void*)area->Base(), (void*)area->Size(),
3588 			area->protection, area->wiring, area->name);
3589 	}
3590 	return 0;
3591 }
3592 
3593 
3594 static int
3595 dump_available_memory(int argc, char** argv)
3596 {
3597 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3598 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3599 	return 0;
3600 }
3601 
3602 
3603 static int
3604 dump_mapping_info(int argc, char** argv)
3605 {
3606 	bool reverseLookup = false;
3607 	bool pageLookup = false;
3608 
3609 	int argi = 1;
3610 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3611 		const char* arg = argv[argi];
3612 		if (strcmp(arg, "-r") == 0) {
3613 			reverseLookup = true;
3614 		} else if (strcmp(arg, "-p") == 0) {
3615 			reverseLookup = true;
3616 			pageLookup = true;
3617 		} else {
3618 			print_debugger_command_usage(argv[0]);
3619 			return 0;
3620 		}
3621 	}
3622 
3623 	// We need at least one argument, the address. Optionally a thread ID can be
3624 	// specified.
3625 	if (argi >= argc || argi + 2 < argc) {
3626 		print_debugger_command_usage(argv[0]);
3627 		return 0;
3628 	}
3629 
3630 	uint64 addressValue;
3631 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3632 		return 0;
3633 
3634 	Team* team = NULL;
3635 	if (argi < argc) {
3636 		uint64 threadID;
3637 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3638 			return 0;
3639 
3640 		Thread* thread = Thread::GetDebug(threadID);
3641 		if (thread == NULL) {
3642 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3643 			return 0;
3644 		}
3645 
3646 		team = thread->team;
3647 	}
3648 
3649 	if (reverseLookup) {
3650 		phys_addr_t physicalAddress;
3651 		if (pageLookup) {
3652 			vm_page* page = (vm_page*)(addr_t)addressValue;
3653 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3654 		} else {
3655 			physicalAddress = (phys_addr_t)addressValue;
3656 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3657 		}
3658 
3659 		kprintf("    Team     Virtual Address      Area\n");
3660 		kprintf("--------------------------------------\n");
3661 
3662 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3663 			Callback()
3664 				:
3665 				fAddressSpace(NULL)
3666 			{
3667 			}
3668 
3669 			void SetAddressSpace(VMAddressSpace* addressSpace)
3670 			{
3671 				fAddressSpace = addressSpace;
3672 			}
3673 
3674 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3675 			{
3676 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3677 					virtualAddress);
3678 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3679 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3680 				else
3681 					kprintf("\n");
3682 				return false;
3683 			}
3684 
3685 		private:
3686 			VMAddressSpace*	fAddressSpace;
3687 		} callback;
3688 
3689 		if (team != NULL) {
3690 			// team specified -- get its address space
3691 			VMAddressSpace* addressSpace = team->address_space;
3692 			if (addressSpace == NULL) {
3693 				kprintf("Failed to get address space!\n");
3694 				return 0;
3695 			}
3696 
3697 			callback.SetAddressSpace(addressSpace);
3698 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3699 				physicalAddress, callback);
3700 		} else {
3701 			// no team specified -- iterate through all address spaces
3702 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3703 				addressSpace != NULL;
3704 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3705 				callback.SetAddressSpace(addressSpace);
3706 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3707 					physicalAddress, callback);
3708 			}
3709 		}
3710 	} else {
3711 		// get the address space
3712 		addr_t virtualAddress = (addr_t)addressValue;
3713 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3714 		VMAddressSpace* addressSpace;
3715 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3716 			addressSpace = VMAddressSpace::Kernel();
3717 		} else if (team != NULL) {
3718 			addressSpace = team->address_space;
3719 		} else {
3720 			Thread* thread = debug_get_debugged_thread();
3721 			if (thread == NULL || thread->team == NULL) {
3722 				kprintf("Failed to get team!\n");
3723 				return 0;
3724 			}
3725 
3726 			addressSpace = thread->team->address_space;
3727 		}
3728 
3729 		if (addressSpace == NULL) {
3730 			kprintf("Failed to get address space!\n");
3731 			return 0;
3732 		}
3733 
3734 		// let the translation map implementation do the job
3735 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3736 	}
3737 
3738 	return 0;
3739 }
3740 
3741 
3742 /*!	Deletes all areas and reserved regions in the given address space.
3743 
3744 	The caller must ensure that none of the areas has any wired ranges.
3745 
3746 	\param addressSpace The address space.
3747 	\param deletingAddressSpace \c true, if the address space is in the process
3748 		of being deleted.
3749 */
3750 void
3751 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3752 {
3753 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3754 		addressSpace->ID()));
3755 
3756 	addressSpace->WriteLock();
3757 
3758 	// remove all reserved areas in this address space
3759 	addressSpace->UnreserveAllAddressRanges(0);
3760 
3761 	// delete all the areas in this address space
3762 	while (VMArea* area = addressSpace->FirstArea()) {
3763 		ASSERT(!area->IsWired());
3764 		delete_area(addressSpace, area, deletingAddressSpace);
3765 	}
3766 
3767 	addressSpace->WriteUnlock();
3768 }
3769 
3770 
3771 static area_id
3772 vm_area_for(addr_t address, bool kernel)
3773 {
3774 	team_id team;
3775 	if (IS_USER_ADDRESS(address)) {
3776 		// we try the user team address space, if any
3777 		team = VMAddressSpace::CurrentID();
3778 		if (team < 0)
3779 			return team;
3780 	} else
3781 		team = VMAddressSpace::KernelID();
3782 
3783 	AddressSpaceReadLocker locker(team);
3784 	if (!locker.IsLocked())
3785 		return B_BAD_TEAM_ID;
3786 
3787 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3788 	if (area != NULL) {
3789 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3790 			return B_ERROR;
3791 
3792 		return area->id;
3793 	}
3794 
3795 	return B_ERROR;
3796 }
3797 
3798 
3799 /*!	Frees physical pages that were used during the boot process.
3800 	\a end is inclusive.
3801 */
3802 static void
3803 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3804 {
3805 	// free all physical pages in the specified range
3806 
3807 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3808 		phys_addr_t physicalAddress;
3809 		uint32 flags;
3810 
3811 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3812 			&& (flags & PAGE_PRESENT) != 0) {
3813 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3814 			if (page != NULL && page->State() != PAGE_STATE_FREE
3815 					&& page->State() != PAGE_STATE_CLEAR
3816 					&& page->State() != PAGE_STATE_UNUSED) {
3817 				DEBUG_PAGE_ACCESS_START(page);
3818 				vm_page_set_state(page, PAGE_STATE_FREE);
3819 			}
3820 		}
3821 	}
3822 
3823 	// unmap the memory
3824 	map->Unmap(start, end);
3825 }
3826 
3827 
3828 void
3829 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3830 {
3831 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3832 	addr_t end = start + (size - 1);
3833 	addr_t lastEnd = start;
3834 
3835 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3836 		(void*)start, (void*)end));
3837 
3838 	// The areas are sorted in virtual address space order, so
3839 	// we just have to find the holes between them that fall
3840 	// into the area we should dispose
3841 
3842 	map->Lock();
3843 
3844 	for (VMAddressSpace::AreaIterator it
3845 				= VMAddressSpace::Kernel()->GetAreaIterator();
3846 			VMArea* area = it.Next();) {
3847 		addr_t areaStart = area->Base();
3848 		addr_t areaEnd = areaStart + (area->Size() - 1);
3849 
3850 		if (areaEnd < start)
3851 			continue;
3852 
3853 		if (areaStart > end) {
3854 			// we are done, the area is already beyond of what we have to free
3855 			break;
3856 		}
3857 
3858 		if (areaStart > lastEnd) {
3859 			// this is something we can free
3860 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3861 				(void*)areaStart));
3862 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3863 		}
3864 
3865 		if (areaEnd >= end) {
3866 			lastEnd = areaEnd;
3867 				// no +1 to prevent potential overflow
3868 			break;
3869 		}
3870 
3871 		lastEnd = areaEnd + 1;
3872 	}
3873 
3874 	if (lastEnd < end) {
3875 		// we can also get rid of some space at the end of the area
3876 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3877 			(void*)end));
3878 		unmap_and_free_physical_pages(map, lastEnd, end);
3879 	}
3880 
3881 	map->Unlock();
3882 }
3883 
3884 
3885 static void
3886 create_preloaded_image_areas(struct preloaded_image* _image)
3887 {
3888 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3889 	char name[B_OS_NAME_LENGTH];
3890 	void* address;
3891 	int32 length;
3892 
3893 	// use file name to create a good area name
3894 	char* fileName = strrchr(image->name, '/');
3895 	if (fileName == NULL)
3896 		fileName = image->name;
3897 	else
3898 		fileName++;
3899 
3900 	length = strlen(fileName);
3901 	// make sure there is enough space for the suffix
3902 	if (length > 25)
3903 		length = 25;
3904 
3905 	memcpy(name, fileName, length);
3906 	strcpy(name + length, "_text");
3907 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3908 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3909 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3910 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3911 		// this will later be remapped read-only/executable by the
3912 		// ELF initialization code
3913 
3914 	strcpy(name + length, "_data");
3915 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3916 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3917 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3918 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3919 }
3920 
3921 
3922 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3923 	Any boot loader resources contained in that arguments must not be accessed
3924 	anymore past this point.
3925 */
3926 void
3927 vm_free_kernel_args(kernel_args* args)
3928 {
3929 	uint32 i;
3930 
3931 	TRACE(("vm_free_kernel_args()\n"));
3932 
3933 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3934 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3935 		if (area >= B_OK)
3936 			delete_area(area);
3937 	}
3938 }
3939 
3940 
3941 static void
3942 allocate_kernel_args(kernel_args* args)
3943 {
3944 	TRACE(("allocate_kernel_args()\n"));
3945 
3946 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3947 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3948 
3949 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3950 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3951 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3952 	}
3953 }
3954 
3955 
3956 static void
3957 unreserve_boot_loader_ranges(kernel_args* args)
3958 {
3959 	TRACE(("unreserve_boot_loader_ranges()\n"));
3960 
3961 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3962 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3963 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3964 			args->virtual_allocated_range[i].size);
3965 	}
3966 }
3967 
3968 
3969 static void
3970 reserve_boot_loader_ranges(kernel_args* args)
3971 {
3972 	TRACE(("reserve_boot_loader_ranges()\n"));
3973 
3974 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3975 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3976 
3977 		// If the address is no kernel address, we just skip it. The
3978 		// architecture specific code has to deal with it.
3979 		if (!IS_KERNEL_ADDRESS(address)) {
3980 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3981 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3982 			continue;
3983 		}
3984 
3985 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3986 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3987 		if (status < B_OK)
3988 			panic("could not reserve boot loader ranges\n");
3989 	}
3990 }
3991 
3992 
3993 static addr_t
3994 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3995 {
3996 	size = PAGE_ALIGN(size);
3997 
3998 	// find a slot in the virtual allocation addr range
3999 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
4000 		// check to see if the space between this one and the last is big enough
4001 		addr_t rangeStart = args->virtual_allocated_range[i].start;
4002 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
4003 			+ args->virtual_allocated_range[i - 1].size;
4004 
4005 		addr_t base = alignment > 0
4006 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
4007 
4008 		if (base >= KERNEL_BASE && base < rangeStart
4009 				&& rangeStart - base >= size) {
4010 			args->virtual_allocated_range[i - 1].size
4011 				+= base + size - previousRangeEnd;
4012 			return base;
4013 		}
4014 	}
4015 
4016 	// we hadn't found one between allocation ranges. this is ok.
4017 	// see if there's a gap after the last one
4018 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
4019 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
4020 		+ args->virtual_allocated_range[lastEntryIndex].size;
4021 	addr_t base = alignment > 0
4022 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
4023 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
4024 		args->virtual_allocated_range[lastEntryIndex].size
4025 			+= base + size - lastRangeEnd;
4026 		return base;
4027 	}
4028 
4029 	// see if there's a gap before the first one
4030 	addr_t rangeStart = args->virtual_allocated_range[0].start;
4031 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
4032 		base = rangeStart - size;
4033 		if (alignment > 0)
4034 			base = ROUNDDOWN(base, alignment);
4035 
4036 		if (base >= KERNEL_BASE) {
4037 			args->virtual_allocated_range[0].start = base;
4038 			args->virtual_allocated_range[0].size += rangeStart - base;
4039 			return base;
4040 		}
4041 	}
4042 
4043 	return 0;
4044 }
4045 
4046 
4047 static bool
4048 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
4049 {
4050 	// TODO: horrible brute-force method of determining if the page can be
4051 	// allocated
4052 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4053 		if (address >= args->physical_memory_range[i].start
4054 			&& address < args->physical_memory_range[i].start
4055 				+ args->physical_memory_range[i].size)
4056 			return true;
4057 	}
4058 	return false;
4059 }
4060 
4061 
4062 page_num_t
4063 vm_allocate_early_physical_page(kernel_args* args)
4064 {
4065 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4066 		phys_addr_t nextPage;
4067 
4068 		nextPage = args->physical_allocated_range[i].start
4069 			+ args->physical_allocated_range[i].size;
4070 		// see if the page after the next allocated paddr run can be allocated
4071 		if (i + 1 < args->num_physical_allocated_ranges
4072 			&& args->physical_allocated_range[i + 1].size != 0) {
4073 			// see if the next page will collide with the next allocated range
4074 			if (nextPage >= args->physical_allocated_range[i+1].start)
4075 				continue;
4076 		}
4077 		// see if the next physical page fits in the memory block
4078 		if (is_page_in_physical_memory_range(args, nextPage)) {
4079 			// we got one!
4080 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4081 			return nextPage / B_PAGE_SIZE;
4082 		}
4083 	}
4084 
4085 	// Expanding upwards didn't work, try going downwards.
4086 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4087 		phys_addr_t nextPage;
4088 
4089 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
4090 		// see if the page after the prev allocated paddr run can be allocated
4091 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
4092 			// see if the next page will collide with the next allocated range
4093 			if (nextPage < args->physical_allocated_range[i-1].start
4094 				+ args->physical_allocated_range[i-1].size)
4095 				continue;
4096 		}
4097 		// see if the next physical page fits in the memory block
4098 		if (is_page_in_physical_memory_range(args, nextPage)) {
4099 			// we got one!
4100 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
4101 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4102 			return nextPage / B_PAGE_SIZE;
4103 		}
4104 	}
4105 
4106 	return 0;
4107 		// could not allocate a block
4108 }
4109 
4110 
4111 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4112 	allocate some pages before the VM is completely up.
4113 */
4114 addr_t
4115 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4116 	uint32 attributes, addr_t alignment)
4117 {
4118 	if (physicalSize > virtualSize)
4119 		physicalSize = virtualSize;
4120 
4121 	// find the vaddr to allocate at
4122 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
4123 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
4124 	if (virtualBase == 0) {
4125 		panic("vm_allocate_early: could not allocate virtual address\n");
4126 		return 0;
4127 	}
4128 
4129 	// map the pages
4130 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4131 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
4132 		if (physicalAddress == 0)
4133 			panic("error allocating early page!\n");
4134 
4135 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4136 
4137 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
4138 			physicalAddress * B_PAGE_SIZE, attributes,
4139 			&vm_allocate_early_physical_page);
4140 	}
4141 
4142 	return virtualBase;
4143 }
4144 
4145 
4146 /*!	The main entrance point to initialize the VM. */
4147 status_t
4148 vm_init(kernel_args* args)
4149 {
4150 	struct preloaded_image* image;
4151 	void* address;
4152 	status_t err = 0;
4153 	uint32 i;
4154 
4155 	TRACE(("vm_init: entry\n"));
4156 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
4157 	err = arch_vm_init(args);
4158 
4159 	// initialize some globals
4160 	vm_page_init_num_pages(args);
4161 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4162 
4163 	slab_init(args);
4164 
4165 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4166 	off_t heapSize = INITIAL_HEAP_SIZE;
4167 	// try to accomodate low memory systems
4168 	while (heapSize > sAvailableMemory / 8)
4169 		heapSize /= 2;
4170 	if (heapSize < 1024 * 1024)
4171 		panic("vm_init: go buy some RAM please.");
4172 
4173 	// map in the new heap and initialize it
4174 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4175 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
4176 	TRACE(("heap at 0x%lx\n", heapBase));
4177 	heap_init(heapBase, heapSize);
4178 #endif
4179 
4180 	// initialize the free page list and physical page mapper
4181 	vm_page_init(args);
4182 
4183 	// initialize the cache allocators
4184 	vm_cache_init(args);
4185 
4186 	{
4187 		status_t error = VMAreaHash::Init();
4188 		if (error != B_OK)
4189 			panic("vm_init: error initializing area hash table\n");
4190 	}
4191 
4192 	VMAddressSpace::Init();
4193 	reserve_boot_loader_ranges(args);
4194 
4195 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4196 	heap_init_post_area();
4197 #endif
4198 
4199 	// Do any further initialization that the architecture dependant layers may
4200 	// need now
4201 	arch_vm_translation_map_init_post_area(args);
4202 	arch_vm_init_post_area(args);
4203 	vm_page_init_post_area(args);
4204 	slab_init_post_area();
4205 
4206 	// allocate areas to represent stuff that already exists
4207 
4208 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4209 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4210 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4211 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4212 #endif
4213 
4214 	allocate_kernel_args(args);
4215 
4216 	create_preloaded_image_areas(args->kernel_image);
4217 
4218 	// allocate areas for preloaded images
4219 	for (image = args->preloaded_images; image != NULL; image = image->next)
4220 		create_preloaded_image_areas(image);
4221 
4222 	// allocate kernel stacks
4223 	for (i = 0; i < args->num_cpus; i++) {
4224 		char name[64];
4225 
4226 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4227 		address = (void*)args->cpu_kstack[i].start;
4228 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4229 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4230 	}
4231 
4232 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4233 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4234 
4235 #if PARANOID_KERNEL_MALLOC
4236 	vm_block_address_range("uninitialized heap memory",
4237 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4238 #endif
4239 #if PARANOID_KERNEL_FREE
4240 	vm_block_address_range("freed heap memory",
4241 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4242 #endif
4243 
4244 	// create the object cache for the page mappings
4245 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4246 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4247 		NULL, NULL);
4248 	if (gPageMappingsObjectCache == NULL)
4249 		panic("failed to create page mappings object cache");
4250 
4251 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4252 
4253 #if DEBUG_CACHE_LIST
4254 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4255 		virtual_address_restrictions virtualRestrictions = {};
4256 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4257 		physical_address_restrictions physicalRestrictions = {};
4258 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4259 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4260 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4261 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4262 			&physicalRestrictions, (void**)&sCacheInfoTable);
4263 	}
4264 #endif	// DEBUG_CACHE_LIST
4265 
4266 	// add some debugger commands
4267 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4268 	add_debugger_command("area", &dump_area,
4269 		"Dump info about a particular area");
4270 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4271 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4272 #if DEBUG_CACHE_LIST
4273 	if (sCacheInfoTable != NULL) {
4274 		add_debugger_command_etc("caches", &dump_caches,
4275 			"List all VMCache trees",
4276 			"[ \"-c\" ]\n"
4277 			"All cache trees are listed sorted in decreasing order by number "
4278 				"of\n"
4279 			"used pages or, if \"-c\" is specified, by size of committed "
4280 				"memory.\n",
4281 			0);
4282 	}
4283 #endif
4284 	add_debugger_command("avail", &dump_available_memory,
4285 		"Dump available memory");
4286 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4287 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4288 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4289 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4290 	add_debugger_command("string", &display_mem, "dump strings");
4291 
4292 	add_debugger_command_etc("mapping", &dump_mapping_info,
4293 		"Print address mapping information",
4294 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4295 		"Prints low-level page mapping information for a given address. If\n"
4296 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4297 		"address that is looked up in the translation map of the current\n"
4298 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4299 		"\"-r\" is specified, <address> is a physical address that is\n"
4300 		"searched in the translation map of all teams, respectively the team\n"
4301 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4302 		"<address> is the address of a vm_page structure. The behavior is\n"
4303 		"equivalent to specifying \"-r\" with the physical address of that\n"
4304 		"page.\n",
4305 		0);
4306 
4307 	TRACE(("vm_init: exit\n"));
4308 
4309 	vm_cache_init_post_heap();
4310 
4311 	return err;
4312 }
4313 
4314 
4315 status_t
4316 vm_init_post_sem(kernel_args* args)
4317 {
4318 	// This frees all unused boot loader resources and makes its space available
4319 	// again
4320 	arch_vm_init_end(args);
4321 	unreserve_boot_loader_ranges(args);
4322 
4323 	// fill in all of the semaphores that were not allocated before
4324 	// since we're still single threaded and only the kernel address space
4325 	// exists, it isn't that hard to find all of the ones we need to create
4326 
4327 	arch_vm_translation_map_init_post_sem(args);
4328 
4329 	slab_init_post_sem();
4330 
4331 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4332 	heap_init_post_sem();
4333 #endif
4334 
4335 	return B_OK;
4336 }
4337 
4338 
4339 status_t
4340 vm_init_post_thread(kernel_args* args)
4341 {
4342 	vm_page_init_post_thread(args);
4343 	slab_init_post_thread();
4344 	return heap_init_post_thread();
4345 }
4346 
4347 
4348 status_t
4349 vm_init_post_modules(kernel_args* args)
4350 {
4351 	return arch_vm_init_post_modules(args);
4352 }
4353 
4354 
4355 void
4356 permit_page_faults(void)
4357 {
4358 	Thread* thread = thread_get_current_thread();
4359 	if (thread != NULL)
4360 		atomic_add(&thread->page_faults_allowed, 1);
4361 }
4362 
4363 
4364 void
4365 forbid_page_faults(void)
4366 {
4367 	Thread* thread = thread_get_current_thread();
4368 	if (thread != NULL)
4369 		atomic_add(&thread->page_faults_allowed, -1);
4370 }
4371 
4372 
4373 status_t
4374 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4375 	bool isUser, addr_t* newIP)
4376 {
4377 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4378 		faultAddress));
4379 
4380 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4381 
4382 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4383 	VMAddressSpace* addressSpace = NULL;
4384 
4385 	status_t status = B_OK;
4386 	*newIP = 0;
4387 	atomic_add((int32*)&sPageFaults, 1);
4388 
4389 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4390 		addressSpace = VMAddressSpace::GetKernel();
4391 	} else if (IS_USER_ADDRESS(pageAddress)) {
4392 		addressSpace = VMAddressSpace::GetCurrent();
4393 		if (addressSpace == NULL) {
4394 			if (!isUser) {
4395 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4396 					"memory!\n");
4397 				status = B_BAD_ADDRESS;
4398 				TPF(PageFaultError(-1,
4399 					VMPageFaultTracing
4400 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4401 			} else {
4402 				// XXX weird state.
4403 				panic("vm_page_fault: non kernel thread accessing user memory "
4404 					"that doesn't exist!\n");
4405 				status = B_BAD_ADDRESS;
4406 			}
4407 		}
4408 	} else {
4409 		// the hit was probably in the 64k DMZ between kernel and user space
4410 		// this keeps a user space thread from passing a buffer that crosses
4411 		// into kernel space
4412 		status = B_BAD_ADDRESS;
4413 		TPF(PageFaultError(-1,
4414 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4415 	}
4416 
4417 	if (status == B_OK) {
4418 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4419 			isUser, NULL);
4420 	}
4421 
4422 	if (status < B_OK) {
4423 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4424 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4425 			strerror(status), address, faultAddress, isWrite, isUser,
4426 			thread_get_current_thread_id());
4427 		if (!isUser) {
4428 			Thread* thread = thread_get_current_thread();
4429 			if (thread != NULL && thread->fault_handler != 0) {
4430 				// this will cause the arch dependant page fault handler to
4431 				// modify the IP on the interrupt frame or whatever to return
4432 				// to this address
4433 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4434 			} else {
4435 				// unhandled page fault in the kernel
4436 				panic("vm_page_fault: unhandled page fault in kernel space at "
4437 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4438 			}
4439 		} else {
4440 			Thread* thread = thread_get_current_thread();
4441 
4442 #ifdef TRACE_FAULTS
4443 			VMArea* area = NULL;
4444 			if (addressSpace != NULL) {
4445 				addressSpace->ReadLock();
4446 				area = addressSpace->LookupArea(faultAddress);
4447 			}
4448 
4449 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4450 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4451 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4452 				thread->team->Name(), thread->team->id,
4453 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4454 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4455 					area->Base() : 0x0));
4456 
4457 			if (addressSpace != NULL)
4458 				addressSpace->ReadUnlock();
4459 #endif
4460 
4461 			// If the thread has a signal handler for SIGSEGV, we simply
4462 			// send it the signal. Otherwise we notify the user debugger
4463 			// first.
4464 			struct sigaction action;
4465 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4466 					&& action.sa_handler != SIG_DFL
4467 					&& action.sa_handler != SIG_IGN)
4468 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4469 					SIGSEGV)) {
4470 				Signal signal(SIGSEGV,
4471 					status == B_PERMISSION_DENIED
4472 						? SEGV_ACCERR : SEGV_MAPERR,
4473 					EFAULT, thread->team->id);
4474 				signal.SetAddress((void*)address);
4475 				send_signal_to_thread(thread, signal, 0);
4476 			}
4477 		}
4478 	}
4479 
4480 	if (addressSpace != NULL)
4481 		addressSpace->Put();
4482 
4483 	return B_HANDLED_INTERRUPT;
4484 }
4485 
4486 
4487 struct PageFaultContext {
4488 	AddressSpaceReadLocker	addressSpaceLocker;
4489 	VMCacheChainLocker		cacheChainLocker;
4490 
4491 	VMTranslationMap*		map;
4492 	VMCache*				topCache;
4493 	off_t					cacheOffset;
4494 	vm_page_reservation		reservation;
4495 	bool					isWrite;
4496 
4497 	// return values
4498 	vm_page*				page;
4499 	bool					restart;
4500 	bool					pageAllocated;
4501 
4502 
4503 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4504 		:
4505 		addressSpaceLocker(addressSpace, true),
4506 		map(addressSpace->TranslationMap()),
4507 		isWrite(isWrite)
4508 	{
4509 	}
4510 
4511 	~PageFaultContext()
4512 	{
4513 		UnlockAll();
4514 		vm_page_unreserve_pages(&reservation);
4515 	}
4516 
4517 	void Prepare(VMCache* topCache, off_t cacheOffset)
4518 	{
4519 		this->topCache = topCache;
4520 		this->cacheOffset = cacheOffset;
4521 		page = NULL;
4522 		restart = false;
4523 		pageAllocated = false;
4524 
4525 		cacheChainLocker.SetTo(topCache);
4526 	}
4527 
4528 	void UnlockAll(VMCache* exceptCache = NULL)
4529 	{
4530 		topCache = NULL;
4531 		addressSpaceLocker.Unlock();
4532 		cacheChainLocker.Unlock(exceptCache);
4533 	}
4534 };
4535 
4536 
4537 /*!	Gets the page that should be mapped into the area.
4538 	Returns an error code other than \c B_OK, if the page couldn't be found or
4539 	paged in. The locking state of the address space and the caches is undefined
4540 	in that case.
4541 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4542 	had to unlock the address space and all caches and is supposed to be called
4543 	again.
4544 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4545 	found. It is returned in \c context.page. The address space will still be
4546 	locked as well as all caches starting from the top cache to at least the
4547 	cache the page lives in.
4548 */
4549 static status_t
4550 fault_get_page(PageFaultContext& context)
4551 {
4552 	VMCache* cache = context.topCache;
4553 	VMCache* lastCache = NULL;
4554 	vm_page* page = NULL;
4555 
4556 	while (cache != NULL) {
4557 		// We already hold the lock of the cache at this point.
4558 
4559 		lastCache = cache;
4560 
4561 		page = cache->LookupPage(context.cacheOffset);
4562 		if (page != NULL && page->busy) {
4563 			// page must be busy -- wait for it to become unbusy
4564 			context.UnlockAll(cache);
4565 			cache->ReleaseRefLocked();
4566 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4567 
4568 			// restart the whole process
4569 			context.restart = true;
4570 			return B_OK;
4571 		}
4572 
4573 		if (page != NULL)
4574 			break;
4575 
4576 		// The current cache does not contain the page we're looking for.
4577 
4578 		// see if the backing store has it
4579 		if (cache->HasPage(context.cacheOffset)) {
4580 			// insert a fresh page and mark it busy -- we're going to read it in
4581 			page = vm_page_allocate_page(&context.reservation,
4582 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4583 			cache->InsertPage(page, context.cacheOffset);
4584 
4585 			// We need to unlock all caches and the address space while reading
4586 			// the page in. Keep a reference to the cache around.
4587 			cache->AcquireRefLocked();
4588 			context.UnlockAll();
4589 
4590 			// read the page in
4591 			generic_io_vec vec;
4592 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4593 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4594 
4595 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4596 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4597 
4598 			cache->Lock();
4599 
4600 			if (status < B_OK) {
4601 				// on error remove and free the page
4602 				dprintf("reading page from cache %p returned: %s!\n",
4603 					cache, strerror(status));
4604 
4605 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4606 				cache->RemovePage(page);
4607 				vm_page_set_state(page, PAGE_STATE_FREE);
4608 
4609 				cache->ReleaseRefAndUnlock();
4610 				return status;
4611 			}
4612 
4613 			// mark the page unbusy again
4614 			cache->MarkPageUnbusy(page);
4615 
4616 			DEBUG_PAGE_ACCESS_END(page);
4617 
4618 			// Since we needed to unlock everything temporarily, the area
4619 			// situation might have changed. So we need to restart the whole
4620 			// process.
4621 			cache->ReleaseRefAndUnlock();
4622 			context.restart = true;
4623 			return B_OK;
4624 		}
4625 
4626 		cache = context.cacheChainLocker.LockSourceCache();
4627 	}
4628 
4629 	if (page == NULL) {
4630 		// There was no adequate page, determine the cache for a clean one.
4631 		// Read-only pages come in the deepest cache, only the top most cache
4632 		// may have direct write access.
4633 		cache = context.isWrite ? context.topCache : lastCache;
4634 
4635 		// allocate a clean page
4636 		page = vm_page_allocate_page(&context.reservation,
4637 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4638 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4639 			page->physical_page_number));
4640 
4641 		// insert the new page into our cache
4642 		cache->InsertPage(page, context.cacheOffset);
4643 		context.pageAllocated = true;
4644 	} else if (page->Cache() != context.topCache && context.isWrite) {
4645 		// We have a page that has the data we want, but in the wrong cache
4646 		// object so we need to copy it and stick it into the top cache.
4647 		vm_page* sourcePage = page;
4648 
4649 		// TODO: If memory is low, it might be a good idea to steal the page
4650 		// from our source cache -- if possible, that is.
4651 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4652 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4653 
4654 		// To not needlessly kill concurrency we unlock all caches but the top
4655 		// one while copying the page. Lacking another mechanism to ensure that
4656 		// the source page doesn't disappear, we mark it busy.
4657 		sourcePage->busy = true;
4658 		context.cacheChainLocker.UnlockKeepRefs(true);
4659 
4660 		// copy the page
4661 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4662 			sourcePage->physical_page_number * B_PAGE_SIZE);
4663 
4664 		context.cacheChainLocker.RelockCaches(true);
4665 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4666 
4667 		// insert the new page into our cache
4668 		context.topCache->InsertPage(page, context.cacheOffset);
4669 		context.pageAllocated = true;
4670 	} else
4671 		DEBUG_PAGE_ACCESS_START(page);
4672 
4673 	context.page = page;
4674 	return B_OK;
4675 }
4676 
4677 
4678 /*!	Makes sure the address in the given address space is mapped.
4679 
4680 	\param addressSpace The address space.
4681 	\param originalAddress The address. Doesn't need to be page aligned.
4682 	\param isWrite If \c true the address shall be write-accessible.
4683 	\param isUser If \c true the access is requested by a userland team.
4684 	\param wirePage On success, if non \c NULL, the wired count of the page
4685 		mapped at the given address is incremented and the page is returned
4686 		via this parameter.
4687 	\return \c B_OK on success, another error code otherwise.
4688 */
4689 static status_t
4690 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4691 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4692 {
4693 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4694 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4695 		originalAddress, isWrite, isUser));
4696 
4697 	PageFaultContext context(addressSpace, isWrite);
4698 
4699 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4700 	status_t status = B_OK;
4701 
4702 	addressSpace->IncrementFaultCount();
4703 
4704 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4705 	// the pages upfront makes sure we don't have any cache locked, so that the
4706 	// page daemon/thief can do their job without problems.
4707 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4708 		originalAddress);
4709 	context.addressSpaceLocker.Unlock();
4710 	vm_page_reserve_pages(&context.reservation, reservePages,
4711 		addressSpace == VMAddressSpace::Kernel()
4712 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4713 
4714 	while (true) {
4715 		context.addressSpaceLocker.Lock();
4716 
4717 		// get the area the fault was in
4718 		VMArea* area = addressSpace->LookupArea(address);
4719 		if (area == NULL) {
4720 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4721 				"space\n", originalAddress);
4722 			TPF(PageFaultError(-1,
4723 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4724 			status = B_BAD_ADDRESS;
4725 			break;
4726 		}
4727 
4728 		// check permissions
4729 		uint32 protection = get_area_page_protection(area, address);
4730 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4731 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4732 				area->id, (void*)originalAddress);
4733 			TPF(PageFaultError(area->id,
4734 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4735 			status = B_PERMISSION_DENIED;
4736 			break;
4737 		}
4738 		if (isWrite && (protection
4739 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4740 			dprintf("write access attempted on write-protected area 0x%"
4741 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4742 			TPF(PageFaultError(area->id,
4743 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4744 			status = B_PERMISSION_DENIED;
4745 			break;
4746 		} else if (isExecute && (protection
4747 				& (B_EXECUTE_AREA
4748 					| (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4749 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4750 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4751 			TPF(PageFaultError(area->id,
4752 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4753 			status = B_PERMISSION_DENIED;
4754 			break;
4755 		} else if (!isWrite && !isExecute && (protection
4756 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4757 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4758 				" at %p\n", area->id, (void*)originalAddress);
4759 			TPF(PageFaultError(area->id,
4760 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4761 			status = B_PERMISSION_DENIED;
4762 			break;
4763 		}
4764 
4765 		// We have the area, it was a valid access, so let's try to resolve the
4766 		// page fault now.
4767 		// At first, the top most cache from the area is investigated.
4768 
4769 		context.Prepare(vm_area_get_locked_cache(area),
4770 			address - area->Base() + area->cache_offset);
4771 
4772 		// See if this cache has a fault handler -- this will do all the work
4773 		// for us.
4774 		{
4775 			// Note, since the page fault is resolved with interrupts enabled,
4776 			// the fault handler could be called more than once for the same
4777 			// reason -- the store must take this into account.
4778 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4779 			if (status != B_BAD_HANDLER)
4780 				break;
4781 		}
4782 
4783 		// The top most cache has no fault handler, so let's see if the cache or
4784 		// its sources already have the page we're searching for (we're going
4785 		// from top to bottom).
4786 		status = fault_get_page(context);
4787 		if (status != B_OK) {
4788 			TPF(PageFaultError(area->id, status));
4789 			break;
4790 		}
4791 
4792 		if (context.restart)
4793 			continue;
4794 
4795 		// All went fine, all there is left to do is to map the page into the
4796 		// address space.
4797 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4798 			context.page));
4799 
4800 		// If the page doesn't reside in the area's cache, we need to make sure
4801 		// it's mapped in read-only, so that we cannot overwrite someone else's
4802 		// data (copy-on-write)
4803 		uint32 newProtection = protection;
4804 		if (context.page->Cache() != context.topCache && !isWrite)
4805 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4806 
4807 		bool unmapPage = false;
4808 		bool mapPage = true;
4809 
4810 		// check whether there's already a page mapped at the address
4811 		context.map->Lock();
4812 
4813 		phys_addr_t physicalAddress;
4814 		uint32 flags;
4815 		vm_page* mappedPage = NULL;
4816 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4817 			&& (flags & PAGE_PRESENT) != 0
4818 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4819 				!= NULL) {
4820 			// Yep there's already a page. If it's ours, we can simply adjust
4821 			// its protection. Otherwise we have to unmap it.
4822 			if (mappedPage == context.page) {
4823 				context.map->ProtectPage(area, address, newProtection);
4824 					// Note: We assume that ProtectPage() is atomic (i.e.
4825 					// the page isn't temporarily unmapped), otherwise we'd have
4826 					// to make sure it isn't wired.
4827 				mapPage = false;
4828 			} else
4829 				unmapPage = true;
4830 		}
4831 
4832 		context.map->Unlock();
4833 
4834 		if (unmapPage) {
4835 			// If the page is wired, we can't unmap it. Wait until it is unwired
4836 			// again and restart. Note that the page cannot be wired for
4837 			// writing, since it it isn't in the topmost cache. So we can safely
4838 			// ignore ranges wired for writing (our own and other concurrent
4839 			// wiring attempts in progress) and in fact have to do that to avoid
4840 			// a deadlock.
4841 			VMAreaUnwiredWaiter waiter;
4842 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4843 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4844 				// unlock everything and wait
4845 				if (context.pageAllocated) {
4846 					// ... but since we allocated a page and inserted it into
4847 					// the top cache, remove and free it first. Otherwise we'd
4848 					// have a page from a lower cache mapped while an upper
4849 					// cache has a page that would shadow it.
4850 					context.topCache->RemovePage(context.page);
4851 					vm_page_free_etc(context.topCache, context.page,
4852 						&context.reservation);
4853 				} else
4854 					DEBUG_PAGE_ACCESS_END(context.page);
4855 
4856 				context.UnlockAll();
4857 				waiter.waitEntry.Wait();
4858 				continue;
4859 			}
4860 
4861 			// Note: The mapped page is a page of a lower cache. We are
4862 			// guaranteed to have that cached locked, our new page is a copy of
4863 			// that page, and the page is not busy. The logic for that guarantee
4864 			// is as follows: Since the page is mapped, it must live in the top
4865 			// cache (ruled out above) or any of its lower caches, and there is
4866 			// (was before the new page was inserted) no other page in any
4867 			// cache between the top cache and the page's cache (otherwise that
4868 			// would be mapped instead). That in turn means that our algorithm
4869 			// must have found it and therefore it cannot be busy either.
4870 			DEBUG_PAGE_ACCESS_START(mappedPage);
4871 			unmap_page(area, address);
4872 			DEBUG_PAGE_ACCESS_END(mappedPage);
4873 		}
4874 
4875 		if (mapPage) {
4876 			if (map_page(area, context.page, address, newProtection,
4877 					&context.reservation) != B_OK) {
4878 				// Mapping can only fail, when the page mapping object couldn't
4879 				// be allocated. Save for the missing mapping everything is
4880 				// fine, though. If this was a regular page fault, we'll simply
4881 				// leave and probably fault again. To make sure we'll have more
4882 				// luck then, we ensure that the minimum object reserve is
4883 				// available.
4884 				DEBUG_PAGE_ACCESS_END(context.page);
4885 
4886 				context.UnlockAll();
4887 
4888 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4889 						!= B_OK) {
4890 					// Apparently the situation is serious. Let's get ourselves
4891 					// killed.
4892 					status = B_NO_MEMORY;
4893 				} else if (wirePage != NULL) {
4894 					// The caller expects us to wire the page. Since
4895 					// object_cache_reserve() succeeded, we should now be able
4896 					// to allocate a mapping structure. Restart.
4897 					continue;
4898 				}
4899 
4900 				break;
4901 			}
4902 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4903 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4904 
4905 		// also wire the page, if requested
4906 		if (wirePage != NULL && status == B_OK) {
4907 			increment_page_wired_count(context.page);
4908 			*wirePage = context.page;
4909 		}
4910 
4911 		DEBUG_PAGE_ACCESS_END(context.page);
4912 
4913 		break;
4914 	}
4915 
4916 	return status;
4917 }
4918 
4919 
4920 status_t
4921 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4922 {
4923 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4924 }
4925 
4926 status_t
4927 vm_put_physical_page(addr_t vaddr, void* handle)
4928 {
4929 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4930 }
4931 
4932 
4933 status_t
4934 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4935 	void** _handle)
4936 {
4937 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4938 }
4939 
4940 status_t
4941 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4942 {
4943 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4944 }
4945 
4946 
4947 status_t
4948 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4949 {
4950 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4951 }
4952 
4953 status_t
4954 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4955 {
4956 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4957 }
4958 
4959 
4960 void
4961 vm_get_info(system_info* info)
4962 {
4963 	swap_get_info(info);
4964 
4965 	MutexLocker locker(sAvailableMemoryLock);
4966 	info->needed_memory = sNeededMemory;
4967 	info->free_memory = sAvailableMemory;
4968 }
4969 
4970 
4971 uint32
4972 vm_num_page_faults(void)
4973 {
4974 	return sPageFaults;
4975 }
4976 
4977 
4978 off_t
4979 vm_available_memory(void)
4980 {
4981 	MutexLocker locker(sAvailableMemoryLock);
4982 	return sAvailableMemory;
4983 }
4984 
4985 
4986 off_t
4987 vm_available_not_needed_memory(void)
4988 {
4989 	MutexLocker locker(sAvailableMemoryLock);
4990 	return sAvailableMemory - sNeededMemory;
4991 }
4992 
4993 
4994 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4995 	debugger.
4996 */
4997 off_t
4998 vm_available_not_needed_memory_debug(void)
4999 {
5000 	return sAvailableMemory - sNeededMemory;
5001 }
5002 
5003 
5004 size_t
5005 vm_kernel_address_space_left(void)
5006 {
5007 	return VMAddressSpace::Kernel()->FreeSpace();
5008 }
5009 
5010 
5011 void
5012 vm_unreserve_memory(size_t amount)
5013 {
5014 	mutex_lock(&sAvailableMemoryLock);
5015 
5016 	sAvailableMemory += amount;
5017 
5018 	mutex_unlock(&sAvailableMemoryLock);
5019 }
5020 
5021 
5022 status_t
5023 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
5024 {
5025 	size_t reserve = kMemoryReserveForPriority[priority];
5026 
5027 	MutexLocker locker(sAvailableMemoryLock);
5028 
5029 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5030 
5031 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
5032 		sAvailableMemory -= amount;
5033 		return B_OK;
5034 	}
5035 
5036 	if (timeout <= 0)
5037 		return B_NO_MEMORY;
5038 
5039 	// turn timeout into an absolute timeout
5040 	timeout += system_time();
5041 
5042 	// loop until we've got the memory or the timeout occurs
5043 	do {
5044 		sNeededMemory += amount;
5045 
5046 		// call the low resource manager
5047 		locker.Unlock();
5048 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5049 			B_ABSOLUTE_TIMEOUT, timeout);
5050 		locker.Lock();
5051 
5052 		sNeededMemory -= amount;
5053 
5054 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
5055 			sAvailableMemory -= amount;
5056 			return B_OK;
5057 		}
5058 	} while (timeout > system_time());
5059 
5060 	return B_NO_MEMORY;
5061 }
5062 
5063 
5064 status_t
5065 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
5066 {
5067 	// NOTE: The caller is responsible for synchronizing calls to this function!
5068 
5069 	AddressSpaceReadLocker locker;
5070 	VMArea* area;
5071 	status_t status = locker.SetFromArea(id, area);
5072 	if (status != B_OK)
5073 		return status;
5074 
5075 	// nothing to do, if the type doesn't change
5076 	uint32 oldType = area->MemoryType();
5077 	if (type == oldType)
5078 		return B_OK;
5079 
5080 	// set the memory type of the area and the mapped pages
5081 	VMTranslationMap* map = area->address_space->TranslationMap();
5082 	map->Lock();
5083 	area->SetMemoryType(type);
5084 	map->ProtectArea(area, area->protection);
5085 	map->Unlock();
5086 
5087 	// set the physical memory type
5088 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
5089 	if (error != B_OK) {
5090 		// reset the memory type of the area and the mapped pages
5091 		map->Lock();
5092 		area->SetMemoryType(oldType);
5093 		map->ProtectArea(area, area->protection);
5094 		map->Unlock();
5095 		return error;
5096 	}
5097 
5098 	return B_OK;
5099 
5100 }
5101 
5102 
5103 /*!	This function enforces some protection properties:
5104 	 - kernel areas must be W^X (after kernel startup)
5105 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
5106 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5107 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
5108 	   and B_KERNEL_WRITE_AREA.
5109 */
5110 static void
5111 fix_protection(uint32* protection)
5112 {
5113 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
5114 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
5115 			|| (*protection & B_WRITE_AREA) != 0)
5116 		&& !gKernelStartup)
5117 		panic("kernel areas cannot be both writable and executable!");
5118 
5119 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5120 		if ((*protection & B_USER_PROTECTION) == 0
5121 			|| (*protection & B_WRITE_AREA) != 0)
5122 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5123 		else
5124 			*protection |= B_KERNEL_READ_AREA;
5125 	}
5126 }
5127 
5128 
5129 static void
5130 fill_area_info(struct VMArea* area, area_info* info, size_t size)
5131 {
5132 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5133 	info->area = area->id;
5134 	info->address = (void*)area->Base();
5135 	info->size = area->Size();
5136 	info->protection = area->protection;
5137 	info->lock = area->wiring;
5138 	info->team = area->address_space->ID();
5139 	info->copy_count = 0;
5140 	info->in_count = 0;
5141 	info->out_count = 0;
5142 		// TODO: retrieve real values here!
5143 
5144 	VMCache* cache = vm_area_get_locked_cache(area);
5145 
5146 	// Note, this is a simplification; the cache could be larger than this area
5147 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5148 
5149 	vm_area_put_locked_cache(cache);
5150 }
5151 
5152 
5153 static status_t
5154 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5155 {
5156 	// is newSize a multiple of B_PAGE_SIZE?
5157 	if (newSize & (B_PAGE_SIZE - 1))
5158 		return B_BAD_VALUE;
5159 
5160 	// lock all affected address spaces and the cache
5161 	VMArea* area;
5162 	VMCache* cache;
5163 
5164 	MultiAddressSpaceLocker locker;
5165 	AreaCacheLocker cacheLocker;
5166 
5167 	status_t status;
5168 	size_t oldSize;
5169 	bool anyKernelArea;
5170 	bool restart;
5171 
5172 	do {
5173 		anyKernelArea = false;
5174 		restart = false;
5175 
5176 		locker.Unset();
5177 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5178 		if (status != B_OK)
5179 			return status;
5180 		cacheLocker.SetTo(cache, true);	// already locked
5181 
5182 		// enforce restrictions
5183 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
5184 				|| (area->protection & B_KERNEL_AREA) != 0)) {
5185 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5186 				"resize kernel area %" B_PRId32 " (%s)\n",
5187 				team_get_current_team_id(), areaID, area->name);
5188 			return B_NOT_ALLOWED;
5189 		}
5190 		// TODO: Enforce all restrictions (team, etc.)!
5191 
5192 		oldSize = area->Size();
5193 		if (newSize == oldSize)
5194 			return B_OK;
5195 
5196 		if (cache->type != CACHE_TYPE_RAM)
5197 			return B_NOT_ALLOWED;
5198 
5199 		if (oldSize < newSize) {
5200 			// We need to check if all areas of this cache can be resized.
5201 			for (VMArea* current = cache->areas; current != NULL;
5202 					current = current->cache_next) {
5203 				if (!current->address_space->CanResizeArea(current, newSize))
5204 					return B_ERROR;
5205 				anyKernelArea
5206 					|= current->address_space == VMAddressSpace::Kernel();
5207 			}
5208 		} else {
5209 			// We're shrinking the areas, so we must make sure the affected
5210 			// ranges are not wired.
5211 			for (VMArea* current = cache->areas; current != NULL;
5212 					current = current->cache_next) {
5213 				anyKernelArea
5214 					|= current->address_space == VMAddressSpace::Kernel();
5215 
5216 				if (wait_if_area_range_is_wired(current,
5217 						current->Base() + newSize, oldSize - newSize, &locker,
5218 						&cacheLocker)) {
5219 					restart = true;
5220 					break;
5221 				}
5222 			}
5223 		}
5224 	} while (restart);
5225 
5226 	// Okay, looks good so far, so let's do it
5227 
5228 	int priority = kernel && anyKernelArea
5229 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5230 	uint32 allocationFlags = kernel && anyKernelArea
5231 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5232 
5233 	if (oldSize < newSize) {
5234 		// Growing the cache can fail, so we do it first.
5235 		status = cache->Resize(cache->virtual_base + newSize, priority);
5236 		if (status != B_OK)
5237 			return status;
5238 	}
5239 
5240 	for (VMArea* current = cache->areas; current != NULL;
5241 			current = current->cache_next) {
5242 		status = current->address_space->ResizeArea(current, newSize,
5243 			allocationFlags);
5244 		if (status != B_OK)
5245 			break;
5246 
5247 		// We also need to unmap all pages beyond the new size, if the area has
5248 		// shrunk
5249 		if (newSize < oldSize) {
5250 			VMCacheChainLocker cacheChainLocker(cache);
5251 			cacheChainLocker.LockAllSourceCaches();
5252 
5253 			unmap_pages(current, current->Base() + newSize,
5254 				oldSize - newSize);
5255 
5256 			cacheChainLocker.Unlock(cache);
5257 		}
5258 	}
5259 
5260 	if (status == B_OK) {
5261 		// Shrink or grow individual page protections if in use.
5262 		if (area->page_protections != NULL) {
5263 			size_t bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5264 			uint8* newProtections
5265 				= (uint8*)realloc(area->page_protections, bytes);
5266 			if (newProtections == NULL)
5267 				status = B_NO_MEMORY;
5268 			else {
5269 				area->page_protections = newProtections;
5270 
5271 				if (oldSize < newSize) {
5272 					// init the additional page protections to that of the area
5273 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5274 					uint32 areaProtection = area->protection
5275 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5276 					memset(area->page_protections + offset,
5277 						areaProtection | (areaProtection << 4), bytes - offset);
5278 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5279 						uint8& entry = area->page_protections[offset - 1];
5280 						entry = (entry & 0x0f) | (areaProtection << 4);
5281 					}
5282 				}
5283 			}
5284 		}
5285 	}
5286 
5287 	// shrinking the cache can't fail, so we do it now
5288 	if (status == B_OK && newSize < oldSize)
5289 		status = cache->Resize(cache->virtual_base + newSize, priority);
5290 
5291 	if (status != B_OK) {
5292 		// Something failed -- resize the areas back to their original size.
5293 		// This can fail, too, in which case we're seriously screwed.
5294 		for (VMArea* current = cache->areas; current != NULL;
5295 				current = current->cache_next) {
5296 			if (current->address_space->ResizeArea(current, oldSize,
5297 					allocationFlags) != B_OK) {
5298 				panic("vm_resize_area(): Failed and not being able to restore "
5299 					"original state.");
5300 			}
5301 		}
5302 
5303 		cache->Resize(cache->virtual_base + oldSize, priority);
5304 	}
5305 
5306 	// TODO: we must honour the lock restrictions of this area
5307 	return status;
5308 }
5309 
5310 
5311 status_t
5312 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5313 {
5314 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5315 }
5316 
5317 
5318 status_t
5319 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5320 {
5321 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5322 }
5323 
5324 
5325 status_t
5326 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5327 	bool user)
5328 {
5329 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5330 }
5331 
5332 
5333 void
5334 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5335 {
5336 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5337 }
5338 
5339 
5340 /*!	Copies a range of memory directly from/to a page that might not be mapped
5341 	at the moment.
5342 
5343 	For \a unsafeMemory the current mapping (if any is ignored). The function
5344 	walks through the respective area's cache chain to find the physical page
5345 	and copies from/to it directly.
5346 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5347 	must not cross a page boundary.
5348 
5349 	\param teamID The team ID identifying the address space \a unsafeMemory is
5350 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5351 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5352 		is passed, the address space of the thread returned by
5353 		debug_get_debugged_thread() is used.
5354 	\param unsafeMemory The start of the unsafe memory range to be copied
5355 		from/to.
5356 	\param buffer A safely accessible kernel buffer to be copied from/to.
5357 	\param size The number of bytes to be copied.
5358 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5359 		\a unsafeMemory, the other way around otherwise.
5360 */
5361 status_t
5362 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5363 	size_t size, bool copyToUnsafe)
5364 {
5365 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5366 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5367 		return B_BAD_VALUE;
5368 	}
5369 
5370 	// get the address space for the debugged thread
5371 	VMAddressSpace* addressSpace;
5372 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5373 		addressSpace = VMAddressSpace::Kernel();
5374 	} else if (teamID == B_CURRENT_TEAM) {
5375 		Thread* thread = debug_get_debugged_thread();
5376 		if (thread == NULL || thread->team == NULL)
5377 			return B_BAD_ADDRESS;
5378 
5379 		addressSpace = thread->team->address_space;
5380 	} else
5381 		addressSpace = VMAddressSpace::DebugGet(teamID);
5382 
5383 	if (addressSpace == NULL)
5384 		return B_BAD_ADDRESS;
5385 
5386 	// get the area
5387 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5388 	if (area == NULL)
5389 		return B_BAD_ADDRESS;
5390 
5391 	// search the page
5392 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5393 		+ area->cache_offset;
5394 	VMCache* cache = area->cache;
5395 	vm_page* page = NULL;
5396 	while (cache != NULL) {
5397 		page = cache->DebugLookupPage(cacheOffset);
5398 		if (page != NULL)
5399 			break;
5400 
5401 		// Page not found in this cache -- if it is paged out, we must not try
5402 		// to get it from lower caches.
5403 		if (cache->DebugHasPage(cacheOffset))
5404 			break;
5405 
5406 		cache = cache->source;
5407 	}
5408 
5409 	if (page == NULL)
5410 		return B_UNSUPPORTED;
5411 
5412 	// copy from/to physical memory
5413 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5414 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5415 
5416 	if (copyToUnsafe) {
5417 		if (page->Cache() != area->cache)
5418 			return B_UNSUPPORTED;
5419 
5420 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5421 	}
5422 
5423 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5424 }
5425 
5426 
5427 /** Validate that a memory range is either fully in kernel space, or fully in
5428  *  userspace */
5429 static inline bool
5430 validate_memory_range(const void* addr, size_t size)
5431 {
5432 	addr_t address = (addr_t)addr;
5433 
5434 	// Check for overflows on all addresses.
5435 	if ((address + size) < address)
5436 		return false;
5437 
5438 	// Validate that the address range does not cross the kernel/user boundary.
5439 	return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1);
5440 }
5441 
5442 
5443 /** Validate that a memory range is fully in userspace. */
5444 static inline bool
5445 validate_user_memory_range(const void* addr, size_t size)
5446 {
5447 	addr_t address = (addr_t)addr;
5448 
5449 	// Check for overflows on all addresses.
5450 	if ((address + size) < address)
5451 		return false;
5452 
5453 	// Validate that both the start and end address are in userspace
5454 	return IS_USER_ADDRESS(address) && IS_USER_ADDRESS(address + size - 1);
5455 }
5456 
5457 
5458 //	#pragma mark - kernel public API
5459 
5460 
5461 status_t
5462 user_memcpy(void* to, const void* from, size_t size)
5463 {
5464 	if (!validate_memory_range(to, size) || !validate_memory_range(from, size))
5465 		return B_BAD_ADDRESS;
5466 
5467 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5468 		return B_BAD_ADDRESS;
5469 
5470 	return B_OK;
5471 }
5472 
5473 
5474 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5475 	the string in \a to, NULL-terminating the result.
5476 
5477 	\param to Pointer to the destination C-string.
5478 	\param from Pointer to the source C-string.
5479 	\param size Size in bytes of the string buffer pointed to by \a to.
5480 
5481 	\return strlen(\a from).
5482 */
5483 ssize_t
5484 user_strlcpy(char* to, const char* from, size_t size)
5485 {
5486 	if (to == NULL && size != 0)
5487 		return B_BAD_VALUE;
5488 	if (from == NULL)
5489 		return B_BAD_ADDRESS;
5490 
5491 	// Protect the source address from overflows.
5492 	size_t maxSize = size;
5493 	if ((addr_t)from + maxSize < (addr_t)from)
5494 		maxSize -= (addr_t)from + maxSize;
5495 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5496 		maxSize = USER_TOP - (addr_t)from;
5497 
5498 	if (!validate_memory_range(to, maxSize))
5499 		return B_BAD_ADDRESS;
5500 
5501 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5502 	if (result < 0)
5503 		return result;
5504 
5505 	// If we hit the address overflow boundary, fail.
5506 	if ((size_t)result >= maxSize && maxSize < size)
5507 		return B_BAD_ADDRESS;
5508 
5509 	return result;
5510 }
5511 
5512 
5513 status_t
5514 user_memset(void* s, char c, size_t count)
5515 {
5516 	if (!validate_memory_range(s, count))
5517 		return B_BAD_ADDRESS;
5518 
5519 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5520 		return B_BAD_ADDRESS;
5521 
5522 	return B_OK;
5523 }
5524 
5525 
5526 /*!	Wires a single page at the given address.
5527 
5528 	\param team The team whose address space the address belongs to. Supports
5529 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5530 		parameter is ignored.
5531 	\param address address The virtual address to wire down. Does not need to
5532 		be page aligned.
5533 	\param writable If \c true the page shall be writable.
5534 	\param info On success the info is filled in, among other things
5535 		containing the physical address the given virtual one translates to.
5536 	\return \c B_OK, when the page could be wired, another error code otherwise.
5537 */
5538 status_t
5539 vm_wire_page(team_id team, addr_t address, bool writable,
5540 	VMPageWiringInfo* info)
5541 {
5542 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5543 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5544 
5545 	// compute the page protection that is required
5546 	bool isUser = IS_USER_ADDRESS(address);
5547 	uint32 requiredProtection = PAGE_PRESENT
5548 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5549 	if (writable)
5550 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5551 
5552 	// get and read lock the address space
5553 	VMAddressSpace* addressSpace = NULL;
5554 	if (isUser) {
5555 		if (team == B_CURRENT_TEAM)
5556 			addressSpace = VMAddressSpace::GetCurrent();
5557 		else
5558 			addressSpace = VMAddressSpace::Get(team);
5559 	} else
5560 		addressSpace = VMAddressSpace::GetKernel();
5561 	if (addressSpace == NULL)
5562 		return B_ERROR;
5563 
5564 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5565 
5566 	VMTranslationMap* map = addressSpace->TranslationMap();
5567 	status_t error = B_OK;
5568 
5569 	// get the area
5570 	VMArea* area = addressSpace->LookupArea(pageAddress);
5571 	if (area == NULL) {
5572 		addressSpace->Put();
5573 		return B_BAD_ADDRESS;
5574 	}
5575 
5576 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5577 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5578 
5579 	// mark the area range wired
5580 	area->Wire(&info->range);
5581 
5582 	// Lock the area's cache chain and the translation map. Needed to look
5583 	// up the page and play with its wired count.
5584 	cacheChainLocker.LockAllSourceCaches();
5585 	map->Lock();
5586 
5587 	phys_addr_t physicalAddress;
5588 	uint32 flags;
5589 	vm_page* page;
5590 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5591 		&& (flags & requiredProtection) == requiredProtection
5592 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5593 			!= NULL) {
5594 		// Already mapped with the correct permissions -- just increment
5595 		// the page's wired count.
5596 		increment_page_wired_count(page);
5597 
5598 		map->Unlock();
5599 		cacheChainLocker.Unlock();
5600 		addressSpaceLocker.Unlock();
5601 	} else {
5602 		// Let vm_soft_fault() map the page for us, if possible. We need
5603 		// to fully unlock to avoid deadlocks. Since we have already
5604 		// wired the area itself, nothing disturbing will happen with it
5605 		// in the meantime.
5606 		map->Unlock();
5607 		cacheChainLocker.Unlock();
5608 		addressSpaceLocker.Unlock();
5609 
5610 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5611 			isUser, &page);
5612 
5613 		if (error != B_OK) {
5614 			// The page could not be mapped -- clean up.
5615 			VMCache* cache = vm_area_get_locked_cache(area);
5616 			area->Unwire(&info->range);
5617 			cache->ReleaseRefAndUnlock();
5618 			addressSpace->Put();
5619 			return error;
5620 		}
5621 	}
5622 
5623 	info->physicalAddress
5624 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5625 			+ address % B_PAGE_SIZE;
5626 	info->page = page;
5627 
5628 	return B_OK;
5629 }
5630 
5631 
5632 /*!	Unwires a single page previously wired via vm_wire_page().
5633 
5634 	\param info The same object passed to vm_wire_page() before.
5635 */
5636 void
5637 vm_unwire_page(VMPageWiringInfo* info)
5638 {
5639 	// lock the address space
5640 	VMArea* area = info->range.area;
5641 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5642 		// takes over our reference
5643 
5644 	// lock the top cache
5645 	VMCache* cache = vm_area_get_locked_cache(area);
5646 	VMCacheChainLocker cacheChainLocker(cache);
5647 
5648 	if (info->page->Cache() != cache) {
5649 		// The page is not in the top cache, so we lock the whole cache chain
5650 		// before touching the page's wired count.
5651 		cacheChainLocker.LockAllSourceCaches();
5652 	}
5653 
5654 	decrement_page_wired_count(info->page);
5655 
5656 	// remove the wired range from the range
5657 	area->Unwire(&info->range);
5658 
5659 	cacheChainLocker.Unlock();
5660 }
5661 
5662 
5663 /*!	Wires down the given address range in the specified team's address space.
5664 
5665 	If successful the function
5666 	- acquires a reference to the specified team's address space,
5667 	- adds respective wired ranges to all areas that intersect with the given
5668 	  address range,
5669 	- makes sure all pages in the given address range are mapped with the
5670 	  requested access permissions and increments their wired count.
5671 
5672 	It fails, when \a team doesn't specify a valid address space, when any part
5673 	of the specified address range is not covered by areas, when the concerned
5674 	areas don't allow mapping with the requested permissions, or when mapping
5675 	failed for another reason.
5676 
5677 	When successful the call must be balanced by a unlock_memory_etc() call with
5678 	the exact same parameters.
5679 
5680 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5681 		supported.
5682 	\param address The start of the address range to be wired.
5683 	\param numBytes The size of the address range to be wired.
5684 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5685 		requests that the range must be wired writable ("read from device
5686 		into memory").
5687 	\return \c B_OK on success, another error code otherwise.
5688 */
5689 status_t
5690 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5691 {
5692 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5693 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5694 
5695 	// compute the page protection that is required
5696 	bool isUser = IS_USER_ADDRESS(address);
5697 	bool writable = (flags & B_READ_DEVICE) == 0;
5698 	uint32 requiredProtection = PAGE_PRESENT
5699 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5700 	if (writable)
5701 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5702 
5703 	uint32 mallocFlags = isUser
5704 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5705 
5706 	// get and read lock the address space
5707 	VMAddressSpace* addressSpace = NULL;
5708 	if (isUser) {
5709 		if (team == B_CURRENT_TEAM)
5710 			addressSpace = VMAddressSpace::GetCurrent();
5711 		else
5712 			addressSpace = VMAddressSpace::Get(team);
5713 	} else
5714 		addressSpace = VMAddressSpace::GetKernel();
5715 	if (addressSpace == NULL)
5716 		return B_ERROR;
5717 
5718 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5719 		// We get a new address space reference here. The one we got above will
5720 		// be freed by unlock_memory_etc().
5721 
5722 	VMTranslationMap* map = addressSpace->TranslationMap();
5723 	status_t error = B_OK;
5724 
5725 	// iterate through all concerned areas
5726 	addr_t nextAddress = lockBaseAddress;
5727 	while (nextAddress != lockEndAddress) {
5728 		// get the next area
5729 		VMArea* area = addressSpace->LookupArea(nextAddress);
5730 		if (area == NULL) {
5731 			error = B_BAD_ADDRESS;
5732 			break;
5733 		}
5734 
5735 		addr_t areaStart = nextAddress;
5736 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5737 
5738 		// allocate the wired range (do that before locking the cache to avoid
5739 		// deadlocks)
5740 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5741 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5742 		if (range == NULL) {
5743 			error = B_NO_MEMORY;
5744 			break;
5745 		}
5746 
5747 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5748 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5749 
5750 		// mark the area range wired
5751 		area->Wire(range);
5752 
5753 		// Depending on the area cache type and the wiring, we may not need to
5754 		// look at the individual pages.
5755 		if (area->cache_type == CACHE_TYPE_NULL
5756 			|| area->cache_type == CACHE_TYPE_DEVICE
5757 			|| area->wiring == B_FULL_LOCK
5758 			|| area->wiring == B_CONTIGUOUS) {
5759 			nextAddress = areaEnd;
5760 			continue;
5761 		}
5762 
5763 		// Lock the area's cache chain and the translation map. Needed to look
5764 		// up pages and play with their wired count.
5765 		cacheChainLocker.LockAllSourceCaches();
5766 		map->Lock();
5767 
5768 		// iterate through the pages and wire them
5769 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5770 			phys_addr_t physicalAddress;
5771 			uint32 flags;
5772 
5773 			vm_page* page;
5774 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5775 				&& (flags & requiredProtection) == requiredProtection
5776 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5777 					!= NULL) {
5778 				// Already mapped with the correct permissions -- just increment
5779 				// the page's wired count.
5780 				increment_page_wired_count(page);
5781 			} else {
5782 				// Let vm_soft_fault() map the page for us, if possible. We need
5783 				// to fully unlock to avoid deadlocks. Since we have already
5784 				// wired the area itself, nothing disturbing will happen with it
5785 				// in the meantime.
5786 				map->Unlock();
5787 				cacheChainLocker.Unlock();
5788 				addressSpaceLocker.Unlock();
5789 
5790 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5791 					false, isUser, &page);
5792 
5793 				addressSpaceLocker.Lock();
5794 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5795 				cacheChainLocker.LockAllSourceCaches();
5796 				map->Lock();
5797 			}
5798 
5799 			if (error != B_OK)
5800 				break;
5801 		}
5802 
5803 		map->Unlock();
5804 
5805 		if (error == B_OK) {
5806 			cacheChainLocker.Unlock();
5807 		} else {
5808 			// An error occurred, so abort right here. If the current address
5809 			// is the first in this area, unwire the area, since we won't get
5810 			// to it when reverting what we've done so far.
5811 			if (nextAddress == areaStart) {
5812 				area->Unwire(range);
5813 				cacheChainLocker.Unlock();
5814 				range->~VMAreaWiredRange();
5815 				free_etc(range, mallocFlags);
5816 			} else
5817 				cacheChainLocker.Unlock();
5818 
5819 			break;
5820 		}
5821 	}
5822 
5823 	if (error != B_OK) {
5824 		// An error occurred, so unwire all that we've already wired. Note that
5825 		// even if not a single page was wired, unlock_memory_etc() is called
5826 		// to put the address space reference.
5827 		addressSpaceLocker.Unlock();
5828 		unlock_memory_etc(team, (void*)lockBaseAddress,
5829 			nextAddress - lockBaseAddress, flags);
5830 	}
5831 
5832 	return error;
5833 }
5834 
5835 
5836 status_t
5837 lock_memory(void* address, size_t numBytes, uint32 flags)
5838 {
5839 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5840 }
5841 
5842 
5843 /*!	Unwires an address range previously wired with lock_memory_etc().
5844 
5845 	Note that a call to this function must balance a previous lock_memory_etc()
5846 	call with exactly the same parameters.
5847 */
5848 status_t
5849 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5850 {
5851 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5852 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5853 
5854 	// compute the page protection that is required
5855 	bool isUser = IS_USER_ADDRESS(address);
5856 	bool writable = (flags & B_READ_DEVICE) == 0;
5857 	uint32 requiredProtection = PAGE_PRESENT
5858 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5859 	if (writable)
5860 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5861 
5862 	uint32 mallocFlags = isUser
5863 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5864 
5865 	// get and read lock the address space
5866 	VMAddressSpace* addressSpace = NULL;
5867 	if (isUser) {
5868 		if (team == B_CURRENT_TEAM)
5869 			addressSpace = VMAddressSpace::GetCurrent();
5870 		else
5871 			addressSpace = VMAddressSpace::Get(team);
5872 	} else
5873 		addressSpace = VMAddressSpace::GetKernel();
5874 	if (addressSpace == NULL)
5875 		return B_ERROR;
5876 
5877 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5878 		// Take over the address space reference. We don't unlock until we're
5879 		// done.
5880 
5881 	VMTranslationMap* map = addressSpace->TranslationMap();
5882 	status_t error = B_OK;
5883 
5884 	// iterate through all concerned areas
5885 	addr_t nextAddress = lockBaseAddress;
5886 	while (nextAddress != lockEndAddress) {
5887 		// get the next area
5888 		VMArea* area = addressSpace->LookupArea(nextAddress);
5889 		if (area == NULL) {
5890 			error = B_BAD_ADDRESS;
5891 			break;
5892 		}
5893 
5894 		addr_t areaStart = nextAddress;
5895 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5896 
5897 		// Lock the area's top cache. This is a requirement for
5898 		// VMArea::Unwire().
5899 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5900 
5901 		// Depending on the area cache type and the wiring, we may not need to
5902 		// look at the individual pages.
5903 		if (area->cache_type == CACHE_TYPE_NULL
5904 			|| area->cache_type == CACHE_TYPE_DEVICE
5905 			|| area->wiring == B_FULL_LOCK
5906 			|| area->wiring == B_CONTIGUOUS) {
5907 			// unwire the range (to avoid deadlocks we delete the range after
5908 			// unlocking the cache)
5909 			nextAddress = areaEnd;
5910 			VMAreaWiredRange* range = area->Unwire(areaStart,
5911 				areaEnd - areaStart, writable);
5912 			cacheChainLocker.Unlock();
5913 			if (range != NULL) {
5914 				range->~VMAreaWiredRange();
5915 				free_etc(range, mallocFlags);
5916 			}
5917 			continue;
5918 		}
5919 
5920 		// Lock the area's cache chain and the translation map. Needed to look
5921 		// up pages and play with their wired count.
5922 		cacheChainLocker.LockAllSourceCaches();
5923 		map->Lock();
5924 
5925 		// iterate through the pages and unwire them
5926 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5927 			phys_addr_t physicalAddress;
5928 			uint32 flags;
5929 
5930 			vm_page* page;
5931 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5932 				&& (flags & PAGE_PRESENT) != 0
5933 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5934 					!= NULL) {
5935 				// Already mapped with the correct permissions -- just increment
5936 				// the page's wired count.
5937 				decrement_page_wired_count(page);
5938 			} else {
5939 				panic("unlock_memory_etc(): Failed to unwire page: address "
5940 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5941 					nextAddress);
5942 				error = B_BAD_VALUE;
5943 				break;
5944 			}
5945 		}
5946 
5947 		map->Unlock();
5948 
5949 		// All pages are unwired. Remove the area's wired range as well (to
5950 		// avoid deadlocks we delete the range after unlocking the cache).
5951 		VMAreaWiredRange* range = area->Unwire(areaStart,
5952 			areaEnd - areaStart, writable);
5953 
5954 		cacheChainLocker.Unlock();
5955 
5956 		if (range != NULL) {
5957 			range->~VMAreaWiredRange();
5958 			free_etc(range, mallocFlags);
5959 		}
5960 
5961 		if (error != B_OK)
5962 			break;
5963 	}
5964 
5965 	// get rid of the address space reference lock_memory_etc() acquired
5966 	addressSpace->Put();
5967 
5968 	return error;
5969 }
5970 
5971 
5972 status_t
5973 unlock_memory(void* address, size_t numBytes, uint32 flags)
5974 {
5975 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5976 }
5977 
5978 
5979 /*!	Similar to get_memory_map(), but also allows to specify the address space
5980 	for the memory in question and has a saner semantics.
5981 	Returns \c B_OK when the complete range could be translated or
5982 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5983 	case the actual number of entries is written to \c *_numEntries. Any other
5984 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5985 	in this case.
5986 */
5987 status_t
5988 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5989 	physical_entry* table, uint32* _numEntries)
5990 {
5991 	uint32 numEntries = *_numEntries;
5992 	*_numEntries = 0;
5993 
5994 	VMAddressSpace* addressSpace;
5995 	addr_t virtualAddress = (addr_t)address;
5996 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5997 	phys_addr_t physicalAddress;
5998 	status_t status = B_OK;
5999 	int32 index = -1;
6000 	addr_t offset = 0;
6001 	bool interrupts = are_interrupts_enabled();
6002 
6003 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
6004 		"entries)\n", team, address, numBytes, numEntries));
6005 
6006 	if (numEntries == 0 || numBytes == 0)
6007 		return B_BAD_VALUE;
6008 
6009 	// in which address space is the address to be found?
6010 	if (IS_USER_ADDRESS(virtualAddress)) {
6011 		if (team == B_CURRENT_TEAM)
6012 			addressSpace = VMAddressSpace::GetCurrent();
6013 		else
6014 			addressSpace = VMAddressSpace::Get(team);
6015 	} else
6016 		addressSpace = VMAddressSpace::GetKernel();
6017 
6018 	if (addressSpace == NULL)
6019 		return B_ERROR;
6020 
6021 	VMTranslationMap* map = addressSpace->TranslationMap();
6022 
6023 	if (interrupts)
6024 		map->Lock();
6025 
6026 	while (offset < numBytes) {
6027 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
6028 		uint32 flags;
6029 
6030 		if (interrupts) {
6031 			status = map->Query((addr_t)address + offset, &physicalAddress,
6032 				&flags);
6033 		} else {
6034 			status = map->QueryInterrupt((addr_t)address + offset,
6035 				&physicalAddress, &flags);
6036 		}
6037 		if (status < B_OK)
6038 			break;
6039 		if ((flags & PAGE_PRESENT) == 0) {
6040 			panic("get_memory_map() called on unmapped memory!");
6041 			return B_BAD_ADDRESS;
6042 		}
6043 
6044 		if (index < 0 && pageOffset > 0) {
6045 			physicalAddress += pageOffset;
6046 			if (bytes > B_PAGE_SIZE - pageOffset)
6047 				bytes = B_PAGE_SIZE - pageOffset;
6048 		}
6049 
6050 		// need to switch to the next physical_entry?
6051 		if (index < 0 || table[index].address
6052 				!= physicalAddress - table[index].size) {
6053 			if ((uint32)++index + 1 > numEntries) {
6054 				// table to small
6055 				break;
6056 			}
6057 			table[index].address = physicalAddress;
6058 			table[index].size = bytes;
6059 		} else {
6060 			// page does fit in current entry
6061 			table[index].size += bytes;
6062 		}
6063 
6064 		offset += bytes;
6065 	}
6066 
6067 	if (interrupts)
6068 		map->Unlock();
6069 
6070 	if (status != B_OK)
6071 		return status;
6072 
6073 	if ((uint32)index + 1 > numEntries) {
6074 		*_numEntries = index;
6075 		return B_BUFFER_OVERFLOW;
6076 	}
6077 
6078 	*_numEntries = index + 1;
6079 	return B_OK;
6080 }
6081 
6082 
6083 /*!	According to the BeBook, this function should always succeed.
6084 	This is no longer the case.
6085 */
6086 extern "C" int32
6087 __get_memory_map_haiku(const void* address, size_t numBytes,
6088 	physical_entry* table, int32 numEntries)
6089 {
6090 	uint32 entriesRead = numEntries;
6091 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
6092 		table, &entriesRead);
6093 	if (error != B_OK)
6094 		return error;
6095 
6096 	// close the entry list
6097 
6098 	// if it's only one entry, we will silently accept the missing ending
6099 	if (numEntries == 1)
6100 		return B_OK;
6101 
6102 	if (entriesRead + 1 > (uint32)numEntries)
6103 		return B_BUFFER_OVERFLOW;
6104 
6105 	table[entriesRead].address = 0;
6106 	table[entriesRead].size = 0;
6107 
6108 	return B_OK;
6109 }
6110 
6111 
6112 area_id
6113 area_for(void* address)
6114 {
6115 	return vm_area_for((addr_t)address, true);
6116 }
6117 
6118 
6119 area_id
6120 find_area(const char* name)
6121 {
6122 	return VMAreaHash::Find(name);
6123 }
6124 
6125 
6126 status_t
6127 _get_area_info(area_id id, area_info* info, size_t size)
6128 {
6129 	if (size != sizeof(area_info) || info == NULL)
6130 		return B_BAD_VALUE;
6131 
6132 	AddressSpaceReadLocker locker;
6133 	VMArea* area;
6134 	status_t status = locker.SetFromArea(id, area);
6135 	if (status != B_OK)
6136 		return status;
6137 
6138 	fill_area_info(area, info, size);
6139 	return B_OK;
6140 }
6141 
6142 
6143 status_t
6144 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
6145 {
6146 	addr_t nextBase = *(addr_t*)cookie;
6147 
6148 	// we're already through the list
6149 	if (nextBase == (addr_t)-1)
6150 		return B_ENTRY_NOT_FOUND;
6151 
6152 	if (team == B_CURRENT_TEAM)
6153 		team = team_get_current_team_id();
6154 
6155 	AddressSpaceReadLocker locker(team);
6156 	if (!locker.IsLocked())
6157 		return B_BAD_TEAM_ID;
6158 
6159 	VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
6160 	if (area == NULL) {
6161 		nextBase = (addr_t)-1;
6162 		return B_ENTRY_NOT_FOUND;
6163 	}
6164 
6165 	fill_area_info(area, info, size);
6166 	*cookie = (ssize_t)(area->Base() + 1);
6167 
6168 	return B_OK;
6169 }
6170 
6171 
6172 status_t
6173 set_area_protection(area_id area, uint32 newProtection)
6174 {
6175 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6176 		newProtection, true);
6177 }
6178 
6179 
6180 status_t
6181 resize_area(area_id areaID, size_t newSize)
6182 {
6183 	return vm_resize_area(areaID, newSize, true);
6184 }
6185 
6186 
6187 /*!	Transfers the specified area to a new team. The caller must be the owner
6188 	of the area.
6189 */
6190 area_id
6191 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6192 	bool kernel)
6193 {
6194 	area_info info;
6195 	status_t status = get_area_info(id, &info);
6196 	if (status != B_OK)
6197 		return status;
6198 
6199 	if (info.team != thread_get_current_thread()->team->id)
6200 		return B_PERMISSION_DENIED;
6201 
6202 	// We need to mark the area cloneable so the following operations work.
6203 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
6204 	if (status != B_OK)
6205 		return status;
6206 
6207 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6208 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6209 	if (clonedArea < 0)
6210 		return clonedArea;
6211 
6212 	status = vm_delete_area(info.team, id, kernel);
6213 	if (status != B_OK) {
6214 		vm_delete_area(target, clonedArea, kernel);
6215 		return status;
6216 	}
6217 
6218 	// Now we can reset the protection to whatever it was before.
6219 	set_area_protection(clonedArea, info.protection);
6220 
6221 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6222 
6223 	return clonedArea;
6224 }
6225 
6226 
6227 extern "C" area_id
6228 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6229 	size_t numBytes, uint32 addressSpec, uint32 protection,
6230 	void** _virtualAddress)
6231 {
6232 	if (!arch_vm_supports_protection(protection))
6233 		return B_NOT_SUPPORTED;
6234 
6235 	fix_protection(&protection);
6236 
6237 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6238 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6239 		false);
6240 }
6241 
6242 
6243 area_id
6244 clone_area(const char* name, void** _address, uint32 addressSpec,
6245 	uint32 protection, area_id source)
6246 {
6247 	if ((protection & B_KERNEL_PROTECTION) == 0)
6248 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6249 
6250 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6251 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6252 }
6253 
6254 
6255 area_id
6256 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6257 	uint32 protection, uint32 flags, uint32 guardSize,
6258 	const virtual_address_restrictions* virtualAddressRestrictions,
6259 	const physical_address_restrictions* physicalAddressRestrictions,
6260 	void** _address)
6261 {
6262 	fix_protection(&protection);
6263 
6264 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6265 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6266 		true, _address);
6267 }
6268 
6269 
6270 extern "C" area_id
6271 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6272 	size_t size, uint32 lock, uint32 protection)
6273 {
6274 	fix_protection(&protection);
6275 
6276 	virtual_address_restrictions virtualRestrictions = {};
6277 	virtualRestrictions.address = *_address;
6278 	virtualRestrictions.address_specification = addressSpec;
6279 	physical_address_restrictions physicalRestrictions = {};
6280 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6281 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6282 		true, _address);
6283 }
6284 
6285 
6286 status_t
6287 delete_area(area_id area)
6288 {
6289 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6290 }
6291 
6292 
6293 //	#pragma mark - Userland syscalls
6294 
6295 
6296 status_t
6297 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6298 	addr_t size)
6299 {
6300 	// filter out some unavailable values (for userland)
6301 	switch (addressSpec) {
6302 		case B_ANY_KERNEL_ADDRESS:
6303 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6304 			return B_BAD_VALUE;
6305 	}
6306 
6307 	addr_t address;
6308 
6309 	if (!IS_USER_ADDRESS(userAddress)
6310 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6311 		return B_BAD_ADDRESS;
6312 
6313 	status_t status = vm_reserve_address_range(
6314 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6315 		RESERVED_AVOID_BASE);
6316 	if (status != B_OK)
6317 		return status;
6318 
6319 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6320 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6321 			(void*)address, size);
6322 		return B_BAD_ADDRESS;
6323 	}
6324 
6325 	return B_OK;
6326 }
6327 
6328 
6329 status_t
6330 _user_unreserve_address_range(addr_t address, addr_t size)
6331 {
6332 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6333 		(void*)address, size);
6334 }
6335 
6336 
6337 area_id
6338 _user_area_for(void* address)
6339 {
6340 	return vm_area_for((addr_t)address, false);
6341 }
6342 
6343 
6344 area_id
6345 _user_find_area(const char* userName)
6346 {
6347 	char name[B_OS_NAME_LENGTH];
6348 
6349 	if (!IS_USER_ADDRESS(userName)
6350 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6351 		return B_BAD_ADDRESS;
6352 
6353 	return find_area(name);
6354 }
6355 
6356 
6357 status_t
6358 _user_get_area_info(area_id area, area_info* userInfo)
6359 {
6360 	if (!IS_USER_ADDRESS(userInfo))
6361 		return B_BAD_ADDRESS;
6362 
6363 	area_info info;
6364 	status_t status = get_area_info(area, &info);
6365 	if (status < B_OK)
6366 		return status;
6367 
6368 	// TODO: do we want to prevent userland from seeing kernel protections?
6369 	//info.protection &= B_USER_PROTECTION;
6370 
6371 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6372 		return B_BAD_ADDRESS;
6373 
6374 	return status;
6375 }
6376 
6377 
6378 status_t
6379 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6380 {
6381 	ssize_t cookie;
6382 
6383 	if (!IS_USER_ADDRESS(userCookie)
6384 		|| !IS_USER_ADDRESS(userInfo)
6385 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6386 		return B_BAD_ADDRESS;
6387 
6388 	area_info info;
6389 	status_t status = _get_next_area_info(team, &cookie, &info,
6390 		sizeof(area_info));
6391 	if (status != B_OK)
6392 		return status;
6393 
6394 	//info.protection &= B_USER_PROTECTION;
6395 
6396 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6397 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6398 		return B_BAD_ADDRESS;
6399 
6400 	return status;
6401 }
6402 
6403 
6404 status_t
6405 _user_set_area_protection(area_id area, uint32 newProtection)
6406 {
6407 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6408 		return B_BAD_VALUE;
6409 
6410 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6411 		newProtection, false);
6412 }
6413 
6414 
6415 status_t
6416 _user_resize_area(area_id area, size_t newSize)
6417 {
6418 	// TODO: Since we restrict deleting of areas to those owned by the team,
6419 	// we should also do that for resizing (check other functions, too).
6420 	return vm_resize_area(area, newSize, false);
6421 }
6422 
6423 
6424 area_id
6425 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6426 	team_id target)
6427 {
6428 	// filter out some unavailable values (for userland)
6429 	switch (addressSpec) {
6430 		case B_ANY_KERNEL_ADDRESS:
6431 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6432 			return B_BAD_VALUE;
6433 	}
6434 
6435 	void* address;
6436 	if (!IS_USER_ADDRESS(userAddress)
6437 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6438 		return B_BAD_ADDRESS;
6439 
6440 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6441 	if (newArea < B_OK)
6442 		return newArea;
6443 
6444 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6445 		return B_BAD_ADDRESS;
6446 
6447 	return newArea;
6448 }
6449 
6450 
6451 area_id
6452 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6453 	uint32 protection, area_id sourceArea)
6454 {
6455 	char name[B_OS_NAME_LENGTH];
6456 	void* address;
6457 
6458 	// filter out some unavailable values (for userland)
6459 	switch (addressSpec) {
6460 		case B_ANY_KERNEL_ADDRESS:
6461 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6462 			return B_BAD_VALUE;
6463 	}
6464 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6465 		return B_BAD_VALUE;
6466 
6467 	if (!IS_USER_ADDRESS(userName)
6468 		|| !IS_USER_ADDRESS(userAddress)
6469 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6470 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6471 		return B_BAD_ADDRESS;
6472 
6473 	fix_protection(&protection);
6474 
6475 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6476 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6477 		false);
6478 	if (clonedArea < B_OK)
6479 		return clonedArea;
6480 
6481 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6482 		delete_area(clonedArea);
6483 		return B_BAD_ADDRESS;
6484 	}
6485 
6486 	return clonedArea;
6487 }
6488 
6489 
6490 area_id
6491 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6492 	size_t size, uint32 lock, uint32 protection)
6493 {
6494 	char name[B_OS_NAME_LENGTH];
6495 	void* address;
6496 
6497 	// filter out some unavailable values (for userland)
6498 	switch (addressSpec) {
6499 		case B_ANY_KERNEL_ADDRESS:
6500 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6501 			return B_BAD_VALUE;
6502 	}
6503 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6504 		return B_BAD_VALUE;
6505 
6506 	if (!IS_USER_ADDRESS(userName)
6507 		|| !IS_USER_ADDRESS(userAddress)
6508 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6509 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6510 		return B_BAD_ADDRESS;
6511 
6512 	if (addressSpec == B_EXACT_ADDRESS
6513 		&& IS_KERNEL_ADDRESS(address))
6514 		return B_BAD_VALUE;
6515 
6516 	if (addressSpec == B_ANY_ADDRESS)
6517 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6518 	if (addressSpec == B_BASE_ADDRESS)
6519 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6520 
6521 	fix_protection(&protection);
6522 
6523 	virtual_address_restrictions virtualRestrictions = {};
6524 	virtualRestrictions.address = address;
6525 	virtualRestrictions.address_specification = addressSpec;
6526 	physical_address_restrictions physicalRestrictions = {};
6527 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6528 		size, lock, protection, 0, 0, &virtualRestrictions,
6529 		&physicalRestrictions, false, &address);
6530 
6531 	if (area >= B_OK
6532 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6533 		delete_area(area);
6534 		return B_BAD_ADDRESS;
6535 	}
6536 
6537 	return area;
6538 }
6539 
6540 
6541 status_t
6542 _user_delete_area(area_id area)
6543 {
6544 	// Unlike the BeOS implementation, you can now only delete areas
6545 	// that you have created yourself from userland.
6546 	// The documentation to delete_area() explicitly states that this
6547 	// will be restricted in the future, and so it will.
6548 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6549 }
6550 
6551 
6552 // TODO: create a BeOS style call for this!
6553 
6554 area_id
6555 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6556 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6557 	int fd, off_t offset)
6558 {
6559 	char name[B_OS_NAME_LENGTH];
6560 	void* address;
6561 	area_id area;
6562 
6563 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6564 		return B_BAD_VALUE;
6565 
6566 	fix_protection(&protection);
6567 
6568 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6569 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6570 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6571 		return B_BAD_ADDRESS;
6572 
6573 	if (addressSpec == B_EXACT_ADDRESS) {
6574 		if ((addr_t)address + size < (addr_t)address
6575 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6576 			return B_BAD_VALUE;
6577 		}
6578 		if (!IS_USER_ADDRESS(address)
6579 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6580 			return B_BAD_ADDRESS;
6581 		}
6582 	}
6583 
6584 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6585 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6586 		false);
6587 	if (area < B_OK)
6588 		return area;
6589 
6590 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6591 		return B_BAD_ADDRESS;
6592 
6593 	return area;
6594 }
6595 
6596 
6597 status_t
6598 _user_unmap_memory(void* _address, size_t size)
6599 {
6600 	addr_t address = (addr_t)_address;
6601 
6602 	// check params
6603 	if (size == 0 || (addr_t)address + size < (addr_t)address
6604 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6605 		return B_BAD_VALUE;
6606 	}
6607 
6608 	if (!IS_USER_ADDRESS(address)
6609 		|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6610 		return B_BAD_ADDRESS;
6611 	}
6612 
6613 	// Write lock the address space and ensure the address range is not wired.
6614 	AddressSpaceWriteLocker locker;
6615 	do {
6616 		status_t status = locker.SetTo(team_get_current_team_id());
6617 		if (status != B_OK)
6618 			return status;
6619 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6620 			size, &locker));
6621 
6622 	// unmap
6623 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6624 }
6625 
6626 
6627 status_t
6628 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6629 {
6630 	// check address range
6631 	addr_t address = (addr_t)_address;
6632 	size = PAGE_ALIGN(size);
6633 
6634 	if ((address % B_PAGE_SIZE) != 0)
6635 		return B_BAD_VALUE;
6636 	if (!validate_user_memory_range(_address, size)) {
6637 		// weird error code required by POSIX
6638 		return ENOMEM;
6639 	}
6640 
6641 	// extend and check protection
6642 	if ((protection & ~B_USER_PROTECTION) != 0)
6643 		return B_BAD_VALUE;
6644 
6645 	fix_protection(&protection);
6646 
6647 	// We need to write lock the address space, since we're going to play with
6648 	// the areas. Also make sure that none of the areas is wired and that we're
6649 	// actually allowed to change the protection.
6650 	AddressSpaceWriteLocker locker;
6651 
6652 	bool restart;
6653 	do {
6654 		restart = false;
6655 
6656 		status_t status = locker.SetTo(team_get_current_team_id());
6657 		if (status != B_OK)
6658 			return status;
6659 
6660 		// First round: Check whether the whole range is covered by areas and we
6661 		// are allowed to modify them.
6662 		addr_t currentAddress = address;
6663 		size_t sizeLeft = size;
6664 		while (sizeLeft > 0) {
6665 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6666 			if (area == NULL)
6667 				return B_NO_MEMORY;
6668 
6669 			if ((area->protection & B_KERNEL_AREA) != 0)
6670 				return B_NOT_ALLOWED;
6671 			if (area->protection_max != 0
6672 				&& (protection & area->protection_max) != protection) {
6673 				return B_NOT_ALLOWED;
6674 			}
6675 
6676 			addr_t offset = currentAddress - area->Base();
6677 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6678 
6679 			AreaCacheLocker cacheLocker(area);
6680 
6681 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6682 					&locker, &cacheLocker)) {
6683 				restart = true;
6684 				break;
6685 			}
6686 
6687 			cacheLocker.Unlock();
6688 
6689 			currentAddress += rangeSize;
6690 			sizeLeft -= rangeSize;
6691 		}
6692 	} while (restart);
6693 
6694 	// Second round: If the protections differ from that of the area, create a
6695 	// page protection array and re-map mapped pages.
6696 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6697 	addr_t currentAddress = address;
6698 	size_t sizeLeft = size;
6699 	while (sizeLeft > 0) {
6700 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6701 		if (area == NULL)
6702 			return B_NO_MEMORY;
6703 
6704 		addr_t offset = currentAddress - area->Base();
6705 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6706 
6707 		currentAddress += rangeSize;
6708 		sizeLeft -= rangeSize;
6709 
6710 		if (area->page_protections == NULL) {
6711 			if (area->protection == protection)
6712 				continue;
6713 
6714 			status_t status = allocate_area_page_protections(area);
6715 			if (status != B_OK)
6716 				return status;
6717 		}
6718 
6719 		// We need to lock the complete cache chain, since we potentially unmap
6720 		// pages of lower caches.
6721 		VMCache* topCache = vm_area_get_locked_cache(area);
6722 		VMCacheChainLocker cacheChainLocker(topCache);
6723 		cacheChainLocker.LockAllSourceCaches();
6724 
6725 		for (addr_t pageAddress = area->Base() + offset;
6726 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6727 			map->Lock();
6728 
6729 			set_area_page_protection(area, pageAddress, protection);
6730 
6731 			phys_addr_t physicalAddress;
6732 			uint32 flags;
6733 
6734 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6735 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6736 				map->Unlock();
6737 				continue;
6738 			}
6739 
6740 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6741 			if (page == NULL) {
6742 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6743 					"\n", area, physicalAddress);
6744 				map->Unlock();
6745 				return B_ERROR;
6746 			}
6747 
6748 			// If the page is not in the topmost cache and write access is
6749 			// requested, we have to unmap it. Otherwise we can re-map it with
6750 			// the new protection.
6751 			bool unmapPage = page->Cache() != topCache
6752 				&& (protection & B_WRITE_AREA) != 0;
6753 
6754 			if (!unmapPage)
6755 				map->ProtectPage(area, pageAddress, protection);
6756 
6757 			map->Unlock();
6758 
6759 			if (unmapPage) {
6760 				DEBUG_PAGE_ACCESS_START(page);
6761 				unmap_page(area, pageAddress);
6762 				DEBUG_PAGE_ACCESS_END(page);
6763 			}
6764 		}
6765 	}
6766 
6767 	return B_OK;
6768 }
6769 
6770 
6771 status_t
6772 _user_sync_memory(void* _address, size_t size, uint32 flags)
6773 {
6774 	addr_t address = (addr_t)_address;
6775 	size = PAGE_ALIGN(size);
6776 
6777 	// check params
6778 	if ((address % B_PAGE_SIZE) != 0)
6779 		return B_BAD_VALUE;
6780 	if (!validate_user_memory_range(_address, size)) {
6781 		// weird error code required by POSIX
6782 		return ENOMEM;
6783 	}
6784 
6785 	bool writeSync = (flags & MS_SYNC) != 0;
6786 	bool writeAsync = (flags & MS_ASYNC) != 0;
6787 	if (writeSync && writeAsync)
6788 		return B_BAD_VALUE;
6789 
6790 	if (size == 0 || (!writeSync && !writeAsync))
6791 		return B_OK;
6792 
6793 	// iterate through the range and sync all concerned areas
6794 	while (size > 0) {
6795 		// read lock the address space
6796 		AddressSpaceReadLocker locker;
6797 		status_t error = locker.SetTo(team_get_current_team_id());
6798 		if (error != B_OK)
6799 			return error;
6800 
6801 		// get the first area
6802 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6803 		if (area == NULL)
6804 			return B_NO_MEMORY;
6805 
6806 		uint32 offset = address - area->Base();
6807 		size_t rangeSize = min_c(area->Size() - offset, size);
6808 		offset += area->cache_offset;
6809 
6810 		// lock the cache
6811 		AreaCacheLocker cacheLocker(area);
6812 		if (!cacheLocker)
6813 			return B_BAD_VALUE;
6814 		VMCache* cache = area->cache;
6815 
6816 		locker.Unlock();
6817 
6818 		uint32 firstPage = offset >> PAGE_SHIFT;
6819 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6820 
6821 		// write the pages
6822 		if (cache->type == CACHE_TYPE_VNODE) {
6823 			if (writeSync) {
6824 				// synchronous
6825 				error = vm_page_write_modified_page_range(cache, firstPage,
6826 					endPage);
6827 				if (error != B_OK)
6828 					return error;
6829 			} else {
6830 				// asynchronous
6831 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6832 				// TODO: This is probably not quite what is supposed to happen.
6833 				// Especially when a lot has to be written, it might take ages
6834 				// until it really hits the disk.
6835 			}
6836 		}
6837 
6838 		address += rangeSize;
6839 		size -= rangeSize;
6840 	}
6841 
6842 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6843 	// synchronize multiple mappings of the same file. In our VM they never get
6844 	// out of sync, though, so we don't have to do anything.
6845 
6846 	return B_OK;
6847 }
6848 
6849 
6850 status_t
6851 _user_memory_advice(void* _address, size_t size, uint32 advice)
6852 {
6853 	addr_t address = (addr_t)_address;
6854 	if ((address % B_PAGE_SIZE) != 0)
6855 		return B_BAD_VALUE;
6856 
6857 	size = PAGE_ALIGN(size);
6858 	if (!validate_user_memory_range(_address, size)) {
6859 		// weird error code required by POSIX
6860 		return B_NO_MEMORY;
6861 	}
6862 
6863 	switch (advice) {
6864 		case MADV_NORMAL:
6865 		case MADV_SEQUENTIAL:
6866 		case MADV_RANDOM:
6867 		case MADV_WILLNEED:
6868 		case MADV_DONTNEED:
6869 			// TODO: Implement!
6870 			break;
6871 
6872 		case MADV_FREE:
6873 		{
6874 			AddressSpaceWriteLocker locker;
6875 			do {
6876 				status_t status = locker.SetTo(team_get_current_team_id());
6877 				if (status != B_OK)
6878 					return status;
6879 			} while (wait_if_address_range_is_wired(locker.AddressSpace(),
6880 					address, size, &locker));
6881 
6882 			discard_address_range(locker.AddressSpace(), address, size, false);
6883 			break;
6884 		}
6885 
6886 		default:
6887 			return B_BAD_VALUE;
6888 	}
6889 
6890 	return B_OK;
6891 }
6892 
6893 
6894 status_t
6895 _user_get_memory_properties(team_id teamID, const void* address,
6896 	uint32* _protected, uint32* _lock)
6897 {
6898 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6899 		return B_BAD_ADDRESS;
6900 
6901 	AddressSpaceReadLocker locker;
6902 	status_t error = locker.SetTo(teamID);
6903 	if (error != B_OK)
6904 		return error;
6905 
6906 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6907 	if (area == NULL)
6908 		return B_NO_MEMORY;
6909 
6910 
6911 	uint32 protection = area->protection;
6912 	if (area->page_protections != NULL)
6913 		protection = get_area_page_protection(area, (addr_t)address);
6914 
6915 	uint32 wiring = area->wiring;
6916 
6917 	locker.Unlock();
6918 
6919 	error = user_memcpy(_protected, &protection, sizeof(protection));
6920 	if (error != B_OK)
6921 		return error;
6922 
6923 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6924 
6925 	return error;
6926 }
6927 
6928 
6929 // An ordered list of non-overlapping ranges to track mlock/munlock locking.
6930 // It is allowed to call mlock/munlock in unbalanced ways (lock a range
6931 // multiple times, unlock a part of it, lock several consecutive ranges and
6932 // unlock them in one go, etc). However the low level lock_memory and
6933 // unlock_memory calls require the locks/unlocks to be balanced (you lock a
6934 // fixed range, and then unlock exactly the same range). This list allows to
6935 // keep track of what was locked exactly so we can unlock the correct things.
6936 struct LockedPages : DoublyLinkedListLinkImpl<LockedPages> {
6937 	addr_t start;
6938 	addr_t end;
6939 
6940 	status_t LockMemory()
6941 	{
6942 		return lock_memory((void*)start, end - start, 0);
6943 	}
6944 
6945 	status_t UnlockMemory()
6946 	{
6947 		return unlock_memory((void*)start, end - start, 0);
6948 	}
6949 
6950 	status_t Move(addr_t start, addr_t end)
6951 	{
6952 		status_t result = lock_memory((void*)start, end - start, 0);
6953 		if (result != B_OK)
6954 			return result;
6955 
6956 		result = UnlockMemory();
6957 
6958 		if (result != B_OK) {
6959 			// What can we do if the unlock fails?
6960 			panic("Failed to unlock memory: %s", strerror(result));
6961 			return result;
6962 		}
6963 
6964 		this->start = start;
6965 		this->end = end;
6966 
6967 		return B_OK;
6968 	}
6969 };
6970 
6971 
6972 status_t
6973 _user_mlock(const void* address, size_t size) {
6974 	// Maybe there's nothing to do, in which case, do nothing
6975 	if (size == 0)
6976 		return B_OK;
6977 
6978 	// Make sure the address is multiple of B_PAGE_SIZE (POSIX allows us to
6979 	// reject the call otherwise)
6980 	if ((addr_t)address % B_PAGE_SIZE != 0)
6981 		return EINVAL;
6982 
6983 	size = ROUNDUP(size, B_PAGE_SIZE);
6984 
6985 	addr_t endAddress = (addr_t)address + size;
6986 
6987 	// Pre-allocate a linked list element we may need (it's simpler to do it
6988 	// now than run out of memory in the midle of changing things)
6989 	LockedPages* newRange = new(std::nothrow) LockedPages();
6990 	if (newRange == NULL)
6991 		return ENOMEM;
6992 
6993 	// Get and lock the team
6994 	Team* team = thread_get_current_thread()->team;
6995 	TeamLocker teamLocker(team);
6996 	teamLocker.Lock();
6997 
6998 	status_t error = B_OK;
6999 	LockedPagesList* lockedPages = &team->locked_pages_list;
7000 
7001 	// Locate the first locked range possibly overlapping ours
7002 	LockedPages* currentRange = lockedPages->Head();
7003 	while (currentRange != NULL && currentRange->end <= (addr_t)address)
7004 		currentRange = lockedPages->GetNext(currentRange);
7005 
7006 	if (currentRange == NULL || currentRange->start >= endAddress) {
7007 		// No existing range is overlapping with ours. We can just lock our
7008 		// range and stop here.
7009 		newRange->start = (addr_t)address;
7010 		newRange->end = endAddress;
7011 		error = newRange->LockMemory();
7012 		if (error != B_OK) {
7013 			delete newRange;
7014 			return error;
7015 		}
7016 		lockedPages->InsertBefore(currentRange, newRange);
7017 		return B_OK;
7018 	}
7019 
7020 	// We get here when there is at least one existing overlapping range.
7021 
7022 	if (currentRange->start <= (addr_t)address) {
7023 		if (currentRange->end >= endAddress) {
7024 			// An existing range is already fully covering the pages we need to
7025 			// lock. Nothing to do then.
7026 			delete newRange;
7027 			return B_OK;
7028 		} else {
7029 			// An existing range covers the start of the area we want to lock.
7030 			// Advance our start address to avoid it.
7031 			address = (void*)currentRange->end;
7032 
7033 			// Move on to the next range for the next step
7034 			currentRange = lockedPages->GetNext(currentRange);
7035 		}
7036 	}
7037 
7038 	// First, lock the new range
7039 	newRange->start = (addr_t)address;
7040 	newRange->end = endAddress;
7041 	error = newRange->LockMemory();
7042 	if (error != B_OK) {
7043 		delete newRange;
7044 		return error;
7045 	}
7046 
7047 	// Unlock all ranges fully overlapping with the area we need to lock
7048 	while (currentRange != NULL && currentRange->end < endAddress) {
7049 		// The existing range is fully contained inside the new one we're
7050 		// trying to lock. Delete/unlock it, and replace it with a new one
7051 		// (this limits fragmentation of the range list, and is simpler to
7052 		// manage)
7053 		error = currentRange->UnlockMemory();
7054 		if (error != B_OK) {
7055 			panic("Failed to unlock a memory range: %s", strerror(error));
7056 			newRange->UnlockMemory();
7057 			delete newRange;
7058 			return error;
7059 		}
7060 		LockedPages* temp = currentRange;
7061 		currentRange = lockedPages->GetNext(currentRange);
7062 		lockedPages->Remove(temp);
7063 		delete temp;
7064 	}
7065 
7066 	if (currentRange != NULL) {
7067 		// One last range may cover the end of the area we're trying to lock
7068 
7069 		if (currentRange->start == (addr_t)address) {
7070 			// In case two overlapping ranges (one at the start and the other
7071 			// at the end) already cover the area we're after, there's nothing
7072 			// more to do. So we destroy our new extra allocation
7073 			error = newRange->UnlockMemory();
7074 			delete newRange;
7075 			return error;
7076 		}
7077 
7078 		if (currentRange->start < endAddress) {
7079 			// Make sure the last range is not overlapping, by moving its start
7080 			error = currentRange->Move(endAddress, currentRange->end);
7081 			if (error != B_OK) {
7082 				panic("Failed to move a memory range: %s", strerror(error));
7083 				newRange->UnlockMemory();
7084 				delete newRange;
7085 				return error;
7086 			}
7087 		}
7088 	}
7089 
7090 	// Finally, store the new range in the locked list
7091 	lockedPages->InsertBefore(currentRange, newRange);
7092 	return B_OK;
7093 }
7094 
7095 
7096 status_t
7097 _user_munlock(const void* address, size_t size) {
7098 	// Maybe there's nothing to do, in which case, do nothing
7099 	if (size == 0)
7100 		return B_OK;
7101 
7102 	// Make sure the address is multiple of B_PAGE_SIZE (POSIX allows us to
7103 	// reject the call otherwise)
7104 	if ((addr_t)address % B_PAGE_SIZE != 0)
7105 		return EINVAL;
7106 
7107 	// Round size up to the next page
7108 	size = ROUNDUP(size, B_PAGE_SIZE);
7109 
7110 	addr_t endAddress = (addr_t)address + size;
7111 
7112 	// Get and lock the team
7113 	Team* team = thread_get_current_thread()->team;
7114 	TeamLocker teamLocker(team);
7115 	teamLocker.Lock();
7116 	LockedPagesList* lockedPages = &team->locked_pages_list;
7117 
7118 	status_t error = B_OK;
7119 
7120 	// Locate the first locked range possibly overlapping ours
7121 	LockedPages* currentRange = lockedPages->Head();
7122 	while (currentRange != NULL && currentRange->end <= (addr_t)address)
7123 		currentRange = lockedPages->GetNext(currentRange);
7124 
7125 	if (currentRange == NULL || currentRange->start >= endAddress) {
7126 		// No range is intersecting, nothing to unlock
7127 		return B_OK;
7128 	}
7129 
7130 	if (currentRange->start < (addr_t)address) {
7131 		if (currentRange->end > endAddress) {
7132 			// There is a range fully covering the area we want to unlock,
7133 			// and it extends on both sides. We need to split it in two
7134 			LockedPages* newRange = new(std::nothrow) LockedPages();
7135 			if (newRange == NULL)
7136 				return ENOMEM;
7137 
7138 			newRange->start = endAddress;
7139 			newRange->end = currentRange->end;
7140 
7141 			error = newRange->LockMemory();
7142 			if (error != B_OK) {
7143 				delete newRange;
7144 				return error;
7145 			}
7146 
7147 			error = currentRange->Move(currentRange->start, (addr_t)address);
7148 			if (error != B_OK) {
7149 				delete newRange;
7150 				return error;
7151 			}
7152 
7153 			lockedPages->InsertAfter(currentRange, newRange);
7154 			return B_OK;
7155 		} else {
7156 			// There is a range that overlaps and extends before the one we
7157 			// want to unlock, we need to shrink it
7158 			error = currentRange->Move(currentRange->start, (addr_t)address);
7159 			if (error != B_OK)
7160 				return error;
7161 		}
7162 	}
7163 
7164 	while (currentRange != NULL && currentRange->end <= endAddress) {
7165 		// Unlock all fully overlapping ranges
7166 		error = currentRange->UnlockMemory();
7167 		if (error != B_OK)
7168 			return error;
7169 		LockedPages* temp = currentRange;
7170 		currentRange = lockedPages->GetNext(currentRange);
7171 		lockedPages->Remove(temp);
7172 		delete temp;
7173 	}
7174 
7175 	// Finally split the last partially overlapping range if any
7176 	if (currentRange != NULL && currentRange->start < endAddress) {
7177 		error = currentRange->Move(endAddress, currentRange->end);
7178 		if (error != B_OK)
7179 			return error;
7180 	}
7181 
7182 	return B_OK;
7183 }
7184 
7185 
7186 // #pragma mark -- compatibility
7187 
7188 
7189 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7190 
7191 
7192 struct physical_entry_beos {
7193 	uint32	address;
7194 	uint32	size;
7195 };
7196 
7197 
7198 /*!	The physical_entry structure has changed. We need to translate it to the
7199 	old one.
7200 */
7201 extern "C" int32
7202 __get_memory_map_beos(const void* _address, size_t numBytes,
7203 	physical_entry_beos* table, int32 numEntries)
7204 {
7205 	if (numEntries <= 0)
7206 		return B_BAD_VALUE;
7207 
7208 	const uint8* address = (const uint8*)_address;
7209 
7210 	int32 count = 0;
7211 	while (numBytes > 0 && count < numEntries) {
7212 		physical_entry entry;
7213 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
7214 		if (result < 0) {
7215 			if (result != B_BUFFER_OVERFLOW)
7216 				return result;
7217 		}
7218 
7219 		if (entry.address >= (phys_addr_t)1 << 32) {
7220 			panic("get_memory_map(): Address is greater 4 GB!");
7221 			return B_ERROR;
7222 		}
7223 
7224 		table[count].address = entry.address;
7225 		table[count++].size = entry.size;
7226 
7227 		address += entry.size;
7228 		numBytes -= entry.size;
7229 	}
7230 
7231 	// null-terminate the table, if possible
7232 	if (count < numEntries) {
7233 		table[count].address = 0;
7234 		table[count].size = 0;
7235 	}
7236 
7237 	return B_OK;
7238 }
7239 
7240 
7241 /*!	The type of the \a physicalAddress parameter has changed from void* to
7242 	phys_addr_t.
7243 */
7244 extern "C" area_id
7245 __map_physical_memory_beos(const char* name, void* physicalAddress,
7246 	size_t numBytes, uint32 addressSpec, uint32 protection,
7247 	void** _virtualAddress)
7248 {
7249 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
7250 		addressSpec, protection, _virtualAddress);
7251 }
7252 
7253 
7254 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
7255 	we meddle with the \a lock parameter to force 32 bit.
7256 */
7257 extern "C" area_id
7258 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
7259 	size_t size, uint32 lock, uint32 protection)
7260 {
7261 	switch (lock) {
7262 		case B_NO_LOCK:
7263 			break;
7264 		case B_FULL_LOCK:
7265 		case B_LAZY_LOCK:
7266 			lock = B_32_BIT_FULL_LOCK;
7267 			break;
7268 		case B_CONTIGUOUS:
7269 			lock = B_32_BIT_CONTIGUOUS;
7270 			break;
7271 	}
7272 
7273 	return __create_area_haiku(name, _address, addressSpec, size, lock,
7274 		protection);
7275 }
7276 
7277 
7278 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
7279 	"BASE");
7280 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
7281 	"map_physical_memory@", "BASE");
7282 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
7283 	"BASE");
7284 
7285 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7286 	"get_memory_map@@", "1_ALPHA3");
7287 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7288 	"map_physical_memory@@", "1_ALPHA3");
7289 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7290 	"1_ALPHA3");
7291 
7292 
7293 #else
7294 
7295 
7296 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7297 	"get_memory_map@@", "BASE");
7298 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7299 	"map_physical_memory@@", "BASE");
7300 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7301 	"BASE");
7302 
7303 
7304 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7305